mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-20 19:19:56 +00:00
feat(eval): add --push_to_hub to upload eval results, videos, and model card to Hub
Adds a push_to_hub flag to lerobot-eval that uploads eval_info.json, rollout videos, and appends an evaluation results table to the model card on Hugging Face. Also declares missing LIBERO-plus runtime deps in pyproject.toml and adds an asset validation check for libero_plus. Made-with: Cursor
This commit is contained in:
@@ -180,6 +180,14 @@ libero_plus = [
|
|||||||
"hf-egl-probe>=1.0.1; sys_platform == 'linux'",
|
"hf-egl-probe>=1.0.1; sys_platform == 'linux'",
|
||||||
"egl_probe>=1.0.1; sys_platform == 'linux'",
|
"egl_probe>=1.0.1; sys_platform == 'linux'",
|
||||||
"libero @ git+https://github.com/sylvestf/LIBERO-plus.git@main ; sys_platform == 'linux'",
|
"libero @ git+https://github.com/sylvestf/LIBERO-plus.git@main ; sys_platform == 'linux'",
|
||||||
|
# LIBERO-plus setup.py has empty install_requires; declare its runtime deps here.
|
||||||
|
"robosuite>=1.4.0,<1.5.0; sys_platform == 'linux'",
|
||||||
|
"bddl>=1.0.1,<2.0.0; sys_platform == 'linux'",
|
||||||
|
"robomimic>=0.2.0,<0.3.0; sys_platform == 'linux'",
|
||||||
|
"easydict>=1.9; sys_platform == 'linux'",
|
||||||
|
"wand; sys_platform == 'linux'",
|
||||||
|
"scikit-image>=0.20.0; sys_platform == 'linux'",
|
||||||
|
"gym>=0.25.0,<0.27.0; sys_platform == 'linux'",
|
||||||
"lerobot[scipy-dep]",
|
"lerobot[scipy-dep]",
|
||||||
]
|
]
|
||||||
libero-plus = ["lerobot[libero_plus]"]
|
libero-plus = ["lerobot[libero_plus]"]
|
||||||
|
|||||||
@@ -40,6 +40,8 @@ class EvalPipelineConfig:
|
|||||||
rename_map: dict[str, str] = field(default_factory=dict)
|
rename_map: dict[str, str] = field(default_factory=dict)
|
||||||
# Explicit consent to execute remote code from the Hub (required for hub environments).
|
# Explicit consent to execute remote code from the Hub (required for hub environments).
|
||||||
trust_remote_code: bool = False
|
trust_remote_code: bool = False
|
||||||
|
# Push eval results (metrics JSON, rollout videos, model card update) to the model's Hub repo.
|
||||||
|
push_to_hub: bool = False
|
||||||
|
|
||||||
def __post_init__(self) -> None:
|
def __post_init__(self) -> None:
|
||||||
# HACK: We parse again the cli args here to get the pretrained path if there was one.
|
# HACK: We parse again the cli args here to get the pretrained path if there was one.
|
||||||
|
|||||||
@@ -191,6 +191,11 @@ def make_env(
|
|||||||
if cfg.task is None:
|
if cfg.task is None:
|
||||||
raise ValueError("LiberoEnv requires a task to be specified")
|
raise ValueError("LiberoEnv requires a task to be specified")
|
||||||
|
|
||||||
|
if cfg.type == "libero_plus":
|
||||||
|
from lerobot.envs.libero import _check_libero_plus_assets
|
||||||
|
|
||||||
|
_check_libero_plus_assets()
|
||||||
|
|
||||||
return create_libero_envs(
|
return create_libero_envs(
|
||||||
task=cfg.task,
|
task=cfg.task,
|
||||||
n_envs=n_envs,
|
n_envs=n_envs,
|
||||||
|
|||||||
@@ -77,6 +77,32 @@ from libero.libero.envs import OffScreenRenderEnv
|
|||||||
|
|
||||||
from lerobot.processor import RobotObservation
|
from lerobot.processor import RobotObservation
|
||||||
|
|
||||||
|
_ASSET_DOWNLOAD_INSTRUCTIONS = """\
|
||||||
|
LIBERO-plus assets not found at: {assets_dir}
|
||||||
|
|
||||||
|
The LIBERO-plus benchmark requires ~6 GB of scene/texture/object assets that
|
||||||
|
are hosted separately on Hugging Face. To download and install them:
|
||||||
|
|
||||||
|
python -c "
|
||||||
|
from huggingface_hub import hf_hub_download
|
||||||
|
hf_hub_download('Sylvest/LIBERO-plus', 'assets.zip',
|
||||||
|
repo_type='dataset', local_dir='/tmp/libero-plus-assets')
|
||||||
|
"
|
||||||
|
unzip /tmp/libero-plus-assets/assets.zip -d /tmp/libero-plus-assets-unzipped
|
||||||
|
# The zip contains a deeply nested path; move the assets directory:
|
||||||
|
mv /tmp/libero-plus-assets-unzipped/inspire/*/assets {assets_dir}
|
||||||
|
rm -rf /tmp/libero-plus-assets /tmp/libero-plus-assets-unzipped
|
||||||
|
|
||||||
|
See https://huggingface.co/datasets/Sylvest/LIBERO-plus for details.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def _check_libero_plus_assets() -> None:
|
||||||
|
"""Validate that LIBERO-plus scene assets are present."""
|
||||||
|
assets_dir = Path(get_libero_path("benchmark_root")) / "assets"
|
||||||
|
if not (assets_dir / "scenes").is_dir():
|
||||||
|
raise FileNotFoundError(_ASSET_DOWNLOAD_INSTRUCTIONS.format(assets_dir=assets_dir))
|
||||||
|
|
||||||
|
|
||||||
def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
|
def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
|
||||||
"""Normalize camera_name into a non-empty list of strings."""
|
"""Normalize camera_name into a non-empty list of strings."""
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ You can learn about the CLI options for this script in the `EvalPipelineConfig`
|
|||||||
import concurrent.futures as cf
|
import concurrent.futures as cf
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
@@ -502,6 +503,126 @@ def _compile_episode_data(
|
|||||||
return data_dict
|
return data_dict
|
||||||
|
|
||||||
|
|
||||||
|
def push_eval_to_hub(
|
||||||
|
repo_id: str,
|
||||||
|
output_dir: Path,
|
||||||
|
info: dict,
|
||||||
|
env_type: str,
|
||||||
|
) -> str:
|
||||||
|
"""Upload eval results, videos, and an updated model card to the Hub.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo_id: HF model repo (e.g. "user/my_policy").
|
||||||
|
output_dir: Local directory containing eval_info.json and videos/.
|
||||||
|
info: The eval results dict (as returned by eval_policy_all).
|
||||||
|
env_type: Environment type string (e.g. "libero_plus", "pusht").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
URL of the last Hub commit.
|
||||||
|
"""
|
||||||
|
from huggingface_hub import HfApi
|
||||||
|
|
||||||
|
api = HfApi()
|
||||||
|
api.create_repo(repo_id=repo_id, exist_ok=True)
|
||||||
|
|
||||||
|
# 1. Upload eval_info.json
|
||||||
|
eval_json_path = output_dir / "eval_info.json"
|
||||||
|
commit_url = ""
|
||||||
|
if eval_json_path.exists():
|
||||||
|
commit_url = api.upload_file(
|
||||||
|
path_or_fileobj=str(eval_json_path),
|
||||||
|
path_in_repo=f"eval/{env_type}/eval_info.json",
|
||||||
|
repo_id=repo_id,
|
||||||
|
commit_message=f"Upload eval results for {env_type}",
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Upload rollout videos
|
||||||
|
videos_dir = output_dir / "videos"
|
||||||
|
if videos_dir.is_dir():
|
||||||
|
api.upload_folder(
|
||||||
|
folder_path=str(videos_dir),
|
||||||
|
path_in_repo=f"eval/{env_type}/videos",
|
||||||
|
repo_id=repo_id,
|
||||||
|
commit_message=f"Upload eval rollout videos for {env_type}",
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. Update the model card with an eval results table
|
||||||
|
_update_model_card_with_eval(api, repo_id, info, env_type)
|
||||||
|
|
||||||
|
logging.info(f"Eval results pushed to https://huggingface.co/{repo_id}")
|
||||||
|
return commit_url
|
||||||
|
|
||||||
|
|
||||||
|
def _format_eval_table(info: dict, env_type: str) -> str:
|
||||||
|
"""Build a markdown table from eval results."""
|
||||||
|
lines = [
|
||||||
|
f"### Evaluation: `{env_type}`\n",
|
||||||
|
"| Suite | Success Rate (%) | Avg Sum Reward | Episodes |",
|
||||||
|
"|-------|-----------------|----------------|----------|",
|
||||||
|
]
|
||||||
|
|
||||||
|
per_group = info.get("per_group", {})
|
||||||
|
for group_name, stats in sorted(per_group.items()):
|
||||||
|
sr = stats.get("pc_success", float("nan"))
|
||||||
|
reward = stats.get("avg_sum_reward", float("nan"))
|
||||||
|
n_ep = stats.get("n_episodes", 0)
|
||||||
|
lines.append(f"| {group_name} | {sr:.1f} | {reward:.2f} | {n_ep} |")
|
||||||
|
|
||||||
|
overall = info.get("overall", {})
|
||||||
|
if overall:
|
||||||
|
sr = overall.get("pc_success", float("nan"))
|
||||||
|
reward = overall.get("avg_sum_reward", float("nan"))
|
||||||
|
n_ep = overall.get("n_episodes", 0)
|
||||||
|
lines.append(f"| **Overall** | **{sr:.1f}** | **{reward:.2f}** | **{n_ep}** |")
|
||||||
|
|
||||||
|
video_paths = overall.get("video_paths", [])
|
||||||
|
if video_paths:
|
||||||
|
lines.append("")
|
||||||
|
lines.append("<details><summary>Rollout videos</summary>\n")
|
||||||
|
for vp in video_paths[:10]:
|
||||||
|
video_name = Path(vp).name
|
||||||
|
parent = Path(vp).parent.name
|
||||||
|
lines.append(f"**{parent}/{video_name}**\n")
|
||||||
|
lines.append(f"\n")
|
||||||
|
lines.append("</details>")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _update_model_card_with_eval(api: Any, repo_id: str, info: dict, env_type: str) -> None:
|
||||||
|
"""Append or replace the eval section in the model card README."""
|
||||||
|
from huggingface_hub import ModelCard
|
||||||
|
|
||||||
|
try:
|
||||||
|
card = ModelCard.load(repo_id)
|
||||||
|
except Exception:
|
||||||
|
card = ModelCard("")
|
||||||
|
|
||||||
|
content = card.content or ""
|
||||||
|
|
||||||
|
eval_table = _format_eval_table(info, env_type)
|
||||||
|
|
||||||
|
section_marker_start = f"<!-- eval-results-{env_type}-start -->"
|
||||||
|
section_marker_end = f"<!-- eval-results-{env_type}-end -->"
|
||||||
|
new_section = f"{section_marker_start}\n{eval_table}\n{section_marker_end}"
|
||||||
|
|
||||||
|
if section_marker_start in content:
|
||||||
|
content = re.sub(
|
||||||
|
rf"{re.escape(section_marker_start)}.*?{re.escape(section_marker_end)}",
|
||||||
|
new_section,
|
||||||
|
content,
|
||||||
|
flags=re.DOTALL,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
eval_header = "\n## Evaluation Results\n\n"
|
||||||
|
if "## Evaluation Results" not in content:
|
||||||
|
content += eval_header
|
||||||
|
content += f"\n{new_section}\n"
|
||||||
|
|
||||||
|
card.content = content
|
||||||
|
card.push_to_hub(repo_id, commit_message=f"Update eval results for {env_type}")
|
||||||
|
|
||||||
|
|
||||||
@parser.wrap()
|
@parser.wrap()
|
||||||
def eval_main(cfg: EvalPipelineConfig):
|
def eval_main(cfg: EvalPipelineConfig):
|
||||||
logging.info(pformat(asdict(cfg)))
|
logging.info(pformat(asdict(cfg)))
|
||||||
@@ -573,9 +694,20 @@ def eval_main(cfg: EvalPipelineConfig):
|
|||||||
close_envs(envs)
|
close_envs(envs)
|
||||||
|
|
||||||
# Save info
|
# Save info
|
||||||
with open(Path(cfg.output_dir) / "eval_info.json", "w") as f:
|
output_dir = Path(cfg.output_dir)
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(output_dir / "eval_info.json", "w") as f:
|
||||||
json.dump(info, f, indent=2)
|
json.dump(info, f, indent=2)
|
||||||
|
|
||||||
|
if cfg.push_to_hub:
|
||||||
|
repo_id = str(cfg.policy.pretrained_path)
|
||||||
|
push_eval_to_hub(
|
||||||
|
repo_id=repo_id,
|
||||||
|
output_dir=output_dir,
|
||||||
|
info=info,
|
||||||
|
env_type=cfg.env.type,
|
||||||
|
)
|
||||||
|
|
||||||
logging.info("End of eval")
|
logging.info("End of eval")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user