feat(eval): add --push_to_hub to upload eval results, videos, and model card to Hub

Adds a push_to_hub flag to lerobot-eval that uploads eval_info.json,
rollout videos, and appends an evaluation results table to the model
card on Hugging Face. Also declares missing LIBERO-plus runtime deps
in pyproject.toml and adds an asset validation check for libero_plus.

Made-with: Cursor
This commit is contained in:
Pepijn Kooijmans
2026-03-16 02:39:24 +01:00
parent c9cfc88602
commit 89f9bd78ab
5 changed files with 174 additions and 1 deletions
+8
View File
@@ -180,6 +180,14 @@ libero_plus = [
"hf-egl-probe>=1.0.1; sys_platform == 'linux'", "hf-egl-probe>=1.0.1; sys_platform == 'linux'",
"egl_probe>=1.0.1; sys_platform == 'linux'", "egl_probe>=1.0.1; sys_platform == 'linux'",
"libero @ git+https://github.com/sylvestf/LIBERO-plus.git@main ; sys_platform == 'linux'", "libero @ git+https://github.com/sylvestf/LIBERO-plus.git@main ; sys_platform == 'linux'",
# LIBERO-plus setup.py has empty install_requires; declare its runtime deps here.
"robosuite>=1.4.0,<1.5.0; sys_platform == 'linux'",
"bddl>=1.0.1,<2.0.0; sys_platform == 'linux'",
"robomimic>=0.2.0,<0.3.0; sys_platform == 'linux'",
"easydict>=1.9; sys_platform == 'linux'",
"wand; sys_platform == 'linux'",
"scikit-image>=0.20.0; sys_platform == 'linux'",
"gym>=0.25.0,<0.27.0; sys_platform == 'linux'",
"lerobot[scipy-dep]", "lerobot[scipy-dep]",
] ]
libero-plus = ["lerobot[libero_plus]"] libero-plus = ["lerobot[libero_plus]"]
+2
View File
@@ -40,6 +40,8 @@ class EvalPipelineConfig:
rename_map: dict[str, str] = field(default_factory=dict) rename_map: dict[str, str] = field(default_factory=dict)
# Explicit consent to execute remote code from the Hub (required for hub environments). # Explicit consent to execute remote code from the Hub (required for hub environments).
trust_remote_code: bool = False trust_remote_code: bool = False
# Push eval results (metrics JSON, rollout videos, model card update) to the model's Hub repo.
push_to_hub: bool = False
def __post_init__(self) -> None: def __post_init__(self) -> None:
# HACK: We parse again the cli args here to get the pretrained path if there was one. # HACK: We parse again the cli args here to get the pretrained path if there was one.
+5
View File
@@ -191,6 +191,11 @@ def make_env(
if cfg.task is None: if cfg.task is None:
raise ValueError("LiberoEnv requires a task to be specified") raise ValueError("LiberoEnv requires a task to be specified")
if cfg.type == "libero_plus":
from lerobot.envs.libero import _check_libero_plus_assets
_check_libero_plus_assets()
return create_libero_envs( return create_libero_envs(
task=cfg.task, task=cfg.task,
n_envs=n_envs, n_envs=n_envs,
+26
View File
@@ -77,6 +77,32 @@ from libero.libero.envs import OffScreenRenderEnv
from lerobot.processor import RobotObservation from lerobot.processor import RobotObservation
_ASSET_DOWNLOAD_INSTRUCTIONS = """\
LIBERO-plus assets not found at: {assets_dir}
The LIBERO-plus benchmark requires ~6 GB of scene/texture/object assets that
are hosted separately on Hugging Face. To download and install them:
python -c "
from huggingface_hub import hf_hub_download
hf_hub_download('Sylvest/LIBERO-plus', 'assets.zip',
repo_type='dataset', local_dir='/tmp/libero-plus-assets')
"
unzip /tmp/libero-plus-assets/assets.zip -d /tmp/libero-plus-assets-unzipped
# The zip contains a deeply nested path; move the assets directory:
mv /tmp/libero-plus-assets-unzipped/inspire/*/assets {assets_dir}
rm -rf /tmp/libero-plus-assets /tmp/libero-plus-assets-unzipped
See https://huggingface.co/datasets/Sylvest/LIBERO-plus for details.
"""
def _check_libero_plus_assets() -> None:
"""Validate that LIBERO-plus scene assets are present."""
assets_dir = Path(get_libero_path("benchmark_root")) / "assets"
if not (assets_dir / "scenes").is_dir():
raise FileNotFoundError(_ASSET_DOWNLOAD_INSTRUCTIONS.format(assets_dir=assets_dir))
def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]: def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
"""Normalize camera_name into a non-empty list of strings.""" """Normalize camera_name into a non-empty list of strings."""
+133 -1
View File
@@ -49,6 +49,7 @@ You can learn about the CLI options for this script in the `EvalPipelineConfig`
import concurrent.futures as cf import concurrent.futures as cf
import json import json
import logging import logging
import re
import threading import threading
import time import time
from collections import defaultdict from collections import defaultdict
@@ -502,6 +503,126 @@ def _compile_episode_data(
return data_dict return data_dict
def push_eval_to_hub(
repo_id: str,
output_dir: Path,
info: dict,
env_type: str,
) -> str:
"""Upload eval results, videos, and an updated model card to the Hub.
Args:
repo_id: HF model repo (e.g. "user/my_policy").
output_dir: Local directory containing eval_info.json and videos/.
info: The eval results dict (as returned by eval_policy_all).
env_type: Environment type string (e.g. "libero_plus", "pusht").
Returns:
URL of the last Hub commit.
"""
from huggingface_hub import HfApi
api = HfApi()
api.create_repo(repo_id=repo_id, exist_ok=True)
# 1. Upload eval_info.json
eval_json_path = output_dir / "eval_info.json"
commit_url = ""
if eval_json_path.exists():
commit_url = api.upload_file(
path_or_fileobj=str(eval_json_path),
path_in_repo=f"eval/{env_type}/eval_info.json",
repo_id=repo_id,
commit_message=f"Upload eval results for {env_type}",
)
# 2. Upload rollout videos
videos_dir = output_dir / "videos"
if videos_dir.is_dir():
api.upload_folder(
folder_path=str(videos_dir),
path_in_repo=f"eval/{env_type}/videos",
repo_id=repo_id,
commit_message=f"Upload eval rollout videos for {env_type}",
)
# 3. Update the model card with an eval results table
_update_model_card_with_eval(api, repo_id, info, env_type)
logging.info(f"Eval results pushed to https://huggingface.co/{repo_id}")
return commit_url
def _format_eval_table(info: dict, env_type: str) -> str:
"""Build a markdown table from eval results."""
lines = [
f"### Evaluation: `{env_type}`\n",
"| Suite | Success Rate (%) | Avg Sum Reward | Episodes |",
"|-------|-----------------|----------------|----------|",
]
per_group = info.get("per_group", {})
for group_name, stats in sorted(per_group.items()):
sr = stats.get("pc_success", float("nan"))
reward = stats.get("avg_sum_reward", float("nan"))
n_ep = stats.get("n_episodes", 0)
lines.append(f"| {group_name} | {sr:.1f} | {reward:.2f} | {n_ep} |")
overall = info.get("overall", {})
if overall:
sr = overall.get("pc_success", float("nan"))
reward = overall.get("avg_sum_reward", float("nan"))
n_ep = overall.get("n_episodes", 0)
lines.append(f"| **Overall** | **{sr:.1f}** | **{reward:.2f}** | **{n_ep}** |")
video_paths = overall.get("video_paths", [])
if video_paths:
lines.append("")
lines.append("<details><summary>Rollout videos</summary>\n")
for vp in video_paths[:10]:
video_name = Path(vp).name
parent = Path(vp).parent.name
lines.append(f"**{parent}/{video_name}**\n")
lines.append(f"![{video_name}](eval/{env_type}/videos/{parent}/{video_name})\n")
lines.append("</details>")
return "\n".join(lines)
def _update_model_card_with_eval(api: Any, repo_id: str, info: dict, env_type: str) -> None:
"""Append or replace the eval section in the model card README."""
from huggingface_hub import ModelCard
try:
card = ModelCard.load(repo_id)
except Exception:
card = ModelCard("")
content = card.content or ""
eval_table = _format_eval_table(info, env_type)
section_marker_start = f"<!-- eval-results-{env_type}-start -->"
section_marker_end = f"<!-- eval-results-{env_type}-end -->"
new_section = f"{section_marker_start}\n{eval_table}\n{section_marker_end}"
if section_marker_start in content:
content = re.sub(
rf"{re.escape(section_marker_start)}.*?{re.escape(section_marker_end)}",
new_section,
content,
flags=re.DOTALL,
)
else:
eval_header = "\n## Evaluation Results\n\n"
if "## Evaluation Results" not in content:
content += eval_header
content += f"\n{new_section}\n"
card.content = content
card.push_to_hub(repo_id, commit_message=f"Update eval results for {env_type}")
@parser.wrap() @parser.wrap()
def eval_main(cfg: EvalPipelineConfig): def eval_main(cfg: EvalPipelineConfig):
logging.info(pformat(asdict(cfg))) logging.info(pformat(asdict(cfg)))
@@ -573,9 +694,20 @@ def eval_main(cfg: EvalPipelineConfig):
close_envs(envs) close_envs(envs)
# Save info # Save info
with open(Path(cfg.output_dir) / "eval_info.json", "w") as f: output_dir = Path(cfg.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
with open(output_dir / "eval_info.json", "w") as f:
json.dump(info, f, indent=2) json.dump(info, f, indent=2)
if cfg.push_to_hub:
repo_id = str(cfg.policy.pretrained_path)
push_eval_to_hub(
repo_id=repo_id,
output_dir=output_dir,
info=info,
env_type=cfg.env.type,
)
logging.info("End of eval") logging.info("End of eval")