From 89f9bd78abbe9ea503b2ae57c1bc73d4db5cd0b4 Mon Sep 17 00:00:00 2001 From: Pepijn Kooijmans Date: Mon, 16 Mar 2026 02:39:24 +0100 Subject: [PATCH] feat(eval): add --push_to_hub to upload eval results, videos, and model card to Hub Adds a push_to_hub flag to lerobot-eval that uploads eval_info.json, rollout videos, and appends an evaluation results table to the model card on Hugging Face. Also declares missing LIBERO-plus runtime deps in pyproject.toml and adds an asset validation check for libero_plus. Made-with: Cursor --- pyproject.toml | 8 ++ src/lerobot/configs/eval.py | 2 + src/lerobot/envs/factory.py | 5 ++ src/lerobot/envs/libero.py | 26 ++++++ src/lerobot/scripts/lerobot_eval.py | 134 +++++++++++++++++++++++++++- 5 files changed, 174 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 28eb59bf2..6d284d5ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -180,6 +180,14 @@ libero_plus = [ "hf-egl-probe>=1.0.1; sys_platform == 'linux'", "egl_probe>=1.0.1; sys_platform == 'linux'", "libero @ git+https://github.com/sylvestf/LIBERO-plus.git@main ; sys_platform == 'linux'", + # LIBERO-plus setup.py has empty install_requires; declare its runtime deps here. + "robosuite>=1.4.0,<1.5.0; sys_platform == 'linux'", + "bddl>=1.0.1,<2.0.0; sys_platform == 'linux'", + "robomimic>=0.2.0,<0.3.0; sys_platform == 'linux'", + "easydict>=1.9; sys_platform == 'linux'", + "wand; sys_platform == 'linux'", + "scikit-image>=0.20.0; sys_platform == 'linux'", + "gym>=0.25.0,<0.27.0; sys_platform == 'linux'", "lerobot[scipy-dep]", ] libero-plus = ["lerobot[libero_plus]"] diff --git a/src/lerobot/configs/eval.py b/src/lerobot/configs/eval.py index da8bee6b2..f2d2e22f4 100644 --- a/src/lerobot/configs/eval.py +++ b/src/lerobot/configs/eval.py @@ -40,6 +40,8 @@ class EvalPipelineConfig: rename_map: dict[str, str] = field(default_factory=dict) # Explicit consent to execute remote code from the Hub (required for hub environments). trust_remote_code: bool = False + # Push eval results (metrics JSON, rollout videos, model card update) to the model's Hub repo. + push_to_hub: bool = False def __post_init__(self) -> None: # HACK: We parse again the cli args here to get the pretrained path if there was one. diff --git a/src/lerobot/envs/factory.py b/src/lerobot/envs/factory.py index 2810e4025..7471d57b9 100644 --- a/src/lerobot/envs/factory.py +++ b/src/lerobot/envs/factory.py @@ -191,6 +191,11 @@ def make_env( if cfg.task is None: raise ValueError("LiberoEnv requires a task to be specified") + if cfg.type == "libero_plus": + from lerobot.envs.libero import _check_libero_plus_assets + + _check_libero_plus_assets() + return create_libero_envs( task=cfg.task, n_envs=n_envs, diff --git a/src/lerobot/envs/libero.py b/src/lerobot/envs/libero.py index f5f639891..a289728b5 100644 --- a/src/lerobot/envs/libero.py +++ b/src/lerobot/envs/libero.py @@ -77,6 +77,32 @@ from libero.libero.envs import OffScreenRenderEnv from lerobot.processor import RobotObservation +_ASSET_DOWNLOAD_INSTRUCTIONS = """\ +LIBERO-plus assets not found at: {assets_dir} + +The LIBERO-plus benchmark requires ~6 GB of scene/texture/object assets that +are hosted separately on Hugging Face. To download and install them: + + python -c " +from huggingface_hub import hf_hub_download +hf_hub_download('Sylvest/LIBERO-plus', 'assets.zip', + repo_type='dataset', local_dir='/tmp/libero-plus-assets') +" + unzip /tmp/libero-plus-assets/assets.zip -d /tmp/libero-plus-assets-unzipped + # The zip contains a deeply nested path; move the assets directory: + mv /tmp/libero-plus-assets-unzipped/inspire/*/assets {assets_dir} + rm -rf /tmp/libero-plus-assets /tmp/libero-plus-assets-unzipped + +See https://huggingface.co/datasets/Sylvest/LIBERO-plus for details. +""" + + +def _check_libero_plus_assets() -> None: + """Validate that LIBERO-plus scene assets are present.""" + assets_dir = Path(get_libero_path("benchmark_root")) / "assets" + if not (assets_dir / "scenes").is_dir(): + raise FileNotFoundError(_ASSET_DOWNLOAD_INSTRUCTIONS.format(assets_dir=assets_dir)) + def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]: """Normalize camera_name into a non-empty list of strings.""" diff --git a/src/lerobot/scripts/lerobot_eval.py b/src/lerobot/scripts/lerobot_eval.py index e32b80404..19e241bc2 100644 --- a/src/lerobot/scripts/lerobot_eval.py +++ b/src/lerobot/scripts/lerobot_eval.py @@ -49,6 +49,7 @@ You can learn about the CLI options for this script in the `EvalPipelineConfig` import concurrent.futures as cf import json import logging +import re import threading import time from collections import defaultdict @@ -502,6 +503,126 @@ def _compile_episode_data( return data_dict +def push_eval_to_hub( + repo_id: str, + output_dir: Path, + info: dict, + env_type: str, +) -> str: + """Upload eval results, videos, and an updated model card to the Hub. + + Args: + repo_id: HF model repo (e.g. "user/my_policy"). + output_dir: Local directory containing eval_info.json and videos/. + info: The eval results dict (as returned by eval_policy_all). + env_type: Environment type string (e.g. "libero_plus", "pusht"). + + Returns: + URL of the last Hub commit. + """ + from huggingface_hub import HfApi + + api = HfApi() + api.create_repo(repo_id=repo_id, exist_ok=True) + + # 1. Upload eval_info.json + eval_json_path = output_dir / "eval_info.json" + commit_url = "" + if eval_json_path.exists(): + commit_url = api.upload_file( + path_or_fileobj=str(eval_json_path), + path_in_repo=f"eval/{env_type}/eval_info.json", + repo_id=repo_id, + commit_message=f"Upload eval results for {env_type}", + ) + + # 2. Upload rollout videos + videos_dir = output_dir / "videos" + if videos_dir.is_dir(): + api.upload_folder( + folder_path=str(videos_dir), + path_in_repo=f"eval/{env_type}/videos", + repo_id=repo_id, + commit_message=f"Upload eval rollout videos for {env_type}", + ) + + # 3. Update the model card with an eval results table + _update_model_card_with_eval(api, repo_id, info, env_type) + + logging.info(f"Eval results pushed to https://huggingface.co/{repo_id}") + return commit_url + + +def _format_eval_table(info: dict, env_type: str) -> str: + """Build a markdown table from eval results.""" + lines = [ + f"### Evaluation: `{env_type}`\n", + "| Suite | Success Rate (%) | Avg Sum Reward | Episodes |", + "|-------|-----------------|----------------|----------|", + ] + + per_group = info.get("per_group", {}) + for group_name, stats in sorted(per_group.items()): + sr = stats.get("pc_success", float("nan")) + reward = stats.get("avg_sum_reward", float("nan")) + n_ep = stats.get("n_episodes", 0) + lines.append(f"| {group_name} | {sr:.1f} | {reward:.2f} | {n_ep} |") + + overall = info.get("overall", {}) + if overall: + sr = overall.get("pc_success", float("nan")) + reward = overall.get("avg_sum_reward", float("nan")) + n_ep = overall.get("n_episodes", 0) + lines.append(f"| **Overall** | **{sr:.1f}** | **{reward:.2f}** | **{n_ep}** |") + + video_paths = overall.get("video_paths", []) + if video_paths: + lines.append("") + lines.append("
Rollout videos\n") + for vp in video_paths[:10]: + video_name = Path(vp).name + parent = Path(vp).parent.name + lines.append(f"**{parent}/{video_name}**\n") + lines.append(f"![{video_name}](eval/{env_type}/videos/{parent}/{video_name})\n") + lines.append("
") + + return "\n".join(lines) + + +def _update_model_card_with_eval(api: Any, repo_id: str, info: dict, env_type: str) -> None: + """Append or replace the eval section in the model card README.""" + from huggingface_hub import ModelCard + + try: + card = ModelCard.load(repo_id) + except Exception: + card = ModelCard("") + + content = card.content or "" + + eval_table = _format_eval_table(info, env_type) + + section_marker_start = f"" + section_marker_end = f"" + new_section = f"{section_marker_start}\n{eval_table}\n{section_marker_end}" + + if section_marker_start in content: + content = re.sub( + rf"{re.escape(section_marker_start)}.*?{re.escape(section_marker_end)}", + new_section, + content, + flags=re.DOTALL, + ) + else: + eval_header = "\n## Evaluation Results\n\n" + if "## Evaluation Results" not in content: + content += eval_header + content += f"\n{new_section}\n" + + card.content = content + card.push_to_hub(repo_id, commit_message=f"Update eval results for {env_type}") + + @parser.wrap() def eval_main(cfg: EvalPipelineConfig): logging.info(pformat(asdict(cfg))) @@ -573,9 +694,20 @@ def eval_main(cfg: EvalPipelineConfig): close_envs(envs) # Save info - with open(Path(cfg.output_dir) / "eval_info.json", "w") as f: + output_dir = Path(cfg.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + with open(output_dir / "eval_info.json", "w") as f: json.dump(info, f, indent=2) + if cfg.push_to_hub: + repo_id = str(cfg.policy.pretrained_path) + push_eval_to_hub( + repo_id=repo_id, + output_dir=output_dir, + info=info, + env_type=cfg.env.type, + ) + logging.info("End of eval")