feat(eval): add --push_to_hub to upload eval results, videos, and model card to Hub

Adds a push_to_hub flag to lerobot-eval that uploads eval_info.json, rollout videos, and appends an evaluation results table to the model card on Hugging Face. Also declares missing LIBERO-plus runtime deps in pyproject.toml and adds an asset validation check for libero_plus. Made-with: Cursor
2026-07-25 02:36:11 +00:00 · 2026-03-16 02:39:24 +01:00
parent c9cfc88602
commit 89f9bd78ab
5 changed files with 174 additions and 1 deletions
@@ -180,6 +180,14 @@ libero_plus = [
    "hf-egl-probe>=1.0.1; sys_platform == 'linux'",
    "egl_probe>=1.0.1; sys_platform == 'linux'",
    "libero @ git+https://github.com/sylvestf/LIBERO-plus.git@main ; sys_platform == 'linux'",
+    # LIBERO-plus setup.py has empty install_requires; declare its runtime deps here.
+    "robosuite>=1.4.0,<1.5.0; sys_platform == 'linux'",
+    "bddl>=1.0.1,<2.0.0; sys_platform == 'linux'",
+    "robomimic>=0.2.0,<0.3.0; sys_platform == 'linux'",
+    "easydict>=1.9; sys_platform == 'linux'",
+    "wand; sys_platform == 'linux'",
+    "scikit-image>=0.20.0; sys_platform == 'linux'",
+    "gym>=0.25.0,<0.27.0; sys_platform == 'linux'",
    "lerobot[scipy-dep]",
 ]
 libero-plus = ["lerobot[libero_plus]"]
@@ -40,6 +40,8 @@ class EvalPipelineConfig:
    rename_map: dict[str, str] = field(default_factory=dict)
    # Explicit consent to execute remote code from the Hub (required for hub environments).
    trust_remote_code: bool = False
+    # Push eval results (metrics JSON, rollout videos, model card update) to the model's Hub repo.
+    push_to_hub: bool = False

    def __post_init__(self) -> None:
        # HACK: We parse again the cli args here to get the pretrained path if there was one.
@@ -191,6 +191,11 @@ def make_env(
        if cfg.task is None:
            raise ValueError("LiberoEnv requires a task to be specified")

+        if cfg.type == "libero_plus":
+            from lerobot.envs.libero import _check_libero_plus_assets
+
+            _check_libero_plus_assets()
+
        return create_libero_envs(
            task=cfg.task,
            n_envs=n_envs,
@@ -77,6 +77,32 @@ from libero.libero.envs import OffScreenRenderEnv

 from lerobot.processor import RobotObservation

+_ASSET_DOWNLOAD_INSTRUCTIONS = """\
+LIBERO-plus assets not found at: {assets_dir}
+
+The LIBERO-plus benchmark requires ~6 GB of scene/texture/object assets that
+are hosted separately on Hugging Face.  To download and install them:
+
+    python -c "
+from huggingface_hub import hf_hub_download
+hf_hub_download('Sylvest/LIBERO-plus', 'assets.zip',
+                repo_type='dataset', local_dir='/tmp/libero-plus-assets')
+"
+    unzip /tmp/libero-plus-assets/assets.zip -d /tmp/libero-plus-assets-unzipped
+    # The zip contains a deeply nested path; move the assets directory:
+    mv /tmp/libero-plus-assets-unzipped/inspire/*/assets {assets_dir}
+    rm -rf /tmp/libero-plus-assets /tmp/libero-plus-assets-unzipped
+
+See https://huggingface.co/datasets/Sylvest/LIBERO-plus for details.
+"""
+
+
+def _check_libero_plus_assets() -> None:
+    """Validate that LIBERO-plus scene assets are present."""
+    assets_dir = Path(get_libero_path("benchmark_root")) / "assets"
+    if not (assets_dir / "scenes").is_dir():
+        raise FileNotFoundError(_ASSET_DOWNLOAD_INSTRUCTIONS.format(assets_dir=assets_dir))
+

 def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
    """Normalize camera_name into a non-empty list of strings."""
@@ -49,6 +49,7 @@ You can learn about the CLI options for this script in the `EvalPipelineConfig`
 import concurrent.futures as cf
 import json
 import logging
+import re
 import threading
 import time
 from collections import defaultdict
@@ -502,6 +503,126 @@ def _compile_episode_data(
    return data_dict


+def push_eval_to_hub(
+    repo_id: str,
+    output_dir: Path,
+    info: dict,
+    env_type: str,
+) -> str:
+    """Upload eval results, videos, and an updated model card to the Hub.
+
+    Args:
+        repo_id: HF model repo (e.g. "user/my_policy").
+        output_dir: Local directory containing eval_info.json and videos/.
+        info: The eval results dict (as returned by eval_policy_all).
+        env_type: Environment type string (e.g. "libero_plus", "pusht").
+
+    Returns:
+        URL of the last Hub commit.
+    """
+    from huggingface_hub import HfApi
+
+    api = HfApi()
+    api.create_repo(repo_id=repo_id, exist_ok=True)
+
+    # 1. Upload eval_info.json
+    eval_json_path = output_dir / "eval_info.json"
+    commit_url = ""
+    if eval_json_path.exists():
+        commit_url = api.upload_file(
+            path_or_fileobj=str(eval_json_path),
+            path_in_repo=f"eval/{env_type}/eval_info.json",
+            repo_id=repo_id,
+            commit_message=f"Upload eval results for {env_type}",
+        )
+
+    # 2. Upload rollout videos
+    videos_dir = output_dir / "videos"
+    if videos_dir.is_dir():
+        api.upload_folder(
+            folder_path=str(videos_dir),
+            path_in_repo=f"eval/{env_type}/videos",
+            repo_id=repo_id,
+            commit_message=f"Upload eval rollout videos for {env_type}",
+        )
+
+    # 3. Update the model card with an eval results table
+    _update_model_card_with_eval(api, repo_id, info, env_type)
+
+    logging.info(f"Eval results pushed to https://huggingface.co/{repo_id}")
+    return commit_url
+
+
+def _format_eval_table(info: dict, env_type: str) -> str:
+    """Build a markdown table from eval results."""
+    lines = [
+        f"### Evaluation: `{env_type}`\n",
+        "| Suite | Success Rate (%) | Avg Sum Reward | Episodes |",
+        "|-------|-----------------|----------------|----------|",
+    ]
+
+    per_group = info.get("per_group", {})
+    for group_name, stats in sorted(per_group.items()):
+        sr = stats.get("pc_success", float("nan"))
+        reward = stats.get("avg_sum_reward", float("nan"))
+        n_ep = stats.get("n_episodes", 0)
+        lines.append(f"| {group_name} | {sr:.1f} | {reward:.2f} | {n_ep} |")
+
+    overall = info.get("overall", {})
+    if overall:
+        sr = overall.get("pc_success", float("nan"))
+        reward = overall.get("avg_sum_reward", float("nan"))
+        n_ep = overall.get("n_episodes", 0)
+        lines.append(f"| **Overall** | **{sr:.1f}** | **{reward:.2f}** | **{n_ep}** |")
+
+    video_paths = overall.get("video_paths", [])
+    if video_paths:
+        lines.append("")
+        lines.append("<details><summary>Rollout videos</summary>\n")
+        for vp in video_paths[:10]:
+            video_name = Path(vp).name
+            parent = Path(vp).parent.name
+            lines.append(f"**{parent}/{video_name}**\n")
+            lines.append(f"![{video_name}](eval/{env_type}/videos/{parent}/{video_name})\n")
+        lines.append("</details>")
+
+    return "\n".join(lines)
+
+
+def _update_model_card_with_eval(api: Any, repo_id: str, info: dict, env_type: str) -> None:
+    """Append or replace the eval section in the model card README."""
+    from huggingface_hub import ModelCard
+
+    try:
+        card = ModelCard.load(repo_id)
+    except Exception:
+        card = ModelCard("")
+
+    content = card.content or ""
+
+    eval_table = _format_eval_table(info, env_type)
+
+    section_marker_start = f"<!-- eval-results-{env_type}-start -->"
+    section_marker_end = f"<!-- eval-results-{env_type}-end -->"
+    new_section = f"{section_marker_start}\n{eval_table}\n{section_marker_end}"
+
+    if section_marker_start in content:
+        content = re.sub(
+            rf"{re.escape(section_marker_start)}.*?{re.escape(section_marker_end)}",
+            new_section,
+            content,
+            flags=re.DOTALL,
+        )
+    else:
+        eval_header = "\n## Evaluation Results\n\n"
+        if "## Evaluation Results" not in content:
+            content += eval_header
+        content += f"\n{new_section}\n"
+
+    card.content = content
+    card.push_to_hub(repo_id, commit_message=f"Update eval results for {env_type}")
+
+
@parser.wrap()
 def eval_main(cfg: EvalPipelineConfig):
    logging.info(pformat(asdict(cfg)))
@@ -573,9 +694,20 @@ def eval_main(cfg: EvalPipelineConfig):
    close_envs(envs)

    # Save info
-    with open(Path(cfg.output_dir) / "eval_info.json", "w") as f:
+    output_dir = Path(cfg.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    with open(output_dir / "eval_info.json", "w") as f:
        json.dump(info, f, indent=2)

+    if cfg.push_to_hub:
+        repo_id = str(cfg.policy.pretrained_path)
+        push_eval_to_hub(
+            repo_id=repo_id,
+            output_dir=output_dir,
+            info=info,
+            env_type=cfg.env.type,
+        )
+
    logging.info("End of eval")