feat(eval): add --push_to_hub to upload eval results, videos, and model card to Hub

Adds a push_to_hub flag to lerobot-eval that uploads eval_info.json,
rollout videos, and appends an evaluation results table to the model
card on Hugging Face. Also declares missing LIBERO-plus runtime deps
in pyproject.toml and adds an asset validation check for libero_plus.

Made-with: Cursor
This commit is contained in:
Pepijn Kooijmans
2026-03-16 02:39:24 +01:00
parent c9cfc88602
commit 89f9bd78ab
5 changed files with 174 additions and 1 deletions
+8
View File
@@ -180,6 +180,14 @@ libero_plus = [
"hf-egl-probe>=1.0.1; sys_platform == 'linux'",
"egl_probe>=1.0.1; sys_platform == 'linux'",
"libero @ git+https://github.com/sylvestf/LIBERO-plus.git@main ; sys_platform == 'linux'",
# LIBERO-plus setup.py has empty install_requires; declare its runtime deps here.
"robosuite>=1.4.0,<1.5.0; sys_platform == 'linux'",
"bddl>=1.0.1,<2.0.0; sys_platform == 'linux'",
"robomimic>=0.2.0,<0.3.0; sys_platform == 'linux'",
"easydict>=1.9; sys_platform == 'linux'",
"wand; sys_platform == 'linux'",
"scikit-image>=0.20.0; sys_platform == 'linux'",
"gym>=0.25.0,<0.27.0; sys_platform == 'linux'",
"lerobot[scipy-dep]",
]
libero-plus = ["lerobot[libero_plus]"]
+2
View File
@@ -40,6 +40,8 @@ class EvalPipelineConfig:
rename_map: dict[str, str] = field(default_factory=dict)
# Explicit consent to execute remote code from the Hub (required for hub environments).
trust_remote_code: bool = False
# Push eval results (metrics JSON, rollout videos, model card update) to the model's Hub repo.
push_to_hub: bool = False
def __post_init__(self) -> None:
# HACK: We parse again the cli args here to get the pretrained path if there was one.
+5
View File
@@ -191,6 +191,11 @@ def make_env(
if cfg.task is None:
raise ValueError("LiberoEnv requires a task to be specified")
if cfg.type == "libero_plus":
from lerobot.envs.libero import _check_libero_plus_assets
_check_libero_plus_assets()
return create_libero_envs(
task=cfg.task,
n_envs=n_envs,
+26
View File
@@ -77,6 +77,32 @@ from libero.libero.envs import OffScreenRenderEnv
from lerobot.processor import RobotObservation
_ASSET_DOWNLOAD_INSTRUCTIONS = """\
LIBERO-plus assets not found at: {assets_dir}
The LIBERO-plus benchmark requires ~6 GB of scene/texture/object assets that
are hosted separately on Hugging Face. To download and install them:
python -c "
from huggingface_hub import hf_hub_download
hf_hub_download('Sylvest/LIBERO-plus', 'assets.zip',
repo_type='dataset', local_dir='/tmp/libero-plus-assets')
"
unzip /tmp/libero-plus-assets/assets.zip -d /tmp/libero-plus-assets-unzipped
# The zip contains a deeply nested path; move the assets directory:
mv /tmp/libero-plus-assets-unzipped/inspire/*/assets {assets_dir}
rm -rf /tmp/libero-plus-assets /tmp/libero-plus-assets-unzipped
See https://huggingface.co/datasets/Sylvest/LIBERO-plus for details.
"""
def _check_libero_plus_assets() -> None:
"""Validate that LIBERO-plus scene assets are present."""
assets_dir = Path(get_libero_path("benchmark_root")) / "assets"
if not (assets_dir / "scenes").is_dir():
raise FileNotFoundError(_ASSET_DOWNLOAD_INSTRUCTIONS.format(assets_dir=assets_dir))
def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
"""Normalize camera_name into a non-empty list of strings."""
+133 -1
View File
@@ -49,6 +49,7 @@ You can learn about the CLI options for this script in the `EvalPipelineConfig`
import concurrent.futures as cf
import json
import logging
import re
import threading
import time
from collections import defaultdict
@@ -502,6 +503,126 @@ def _compile_episode_data(
return data_dict
def push_eval_to_hub(
repo_id: str,
output_dir: Path,
info: dict,
env_type: str,
) -> str:
"""Upload eval results, videos, and an updated model card to the Hub.
Args:
repo_id: HF model repo (e.g. "user/my_policy").
output_dir: Local directory containing eval_info.json and videos/.
info: The eval results dict (as returned by eval_policy_all).
env_type: Environment type string (e.g. "libero_plus", "pusht").
Returns:
URL of the last Hub commit.
"""
from huggingface_hub import HfApi
api = HfApi()
api.create_repo(repo_id=repo_id, exist_ok=True)
# 1. Upload eval_info.json
eval_json_path = output_dir / "eval_info.json"
commit_url = ""
if eval_json_path.exists():
commit_url = api.upload_file(
path_or_fileobj=str(eval_json_path),
path_in_repo=f"eval/{env_type}/eval_info.json",
repo_id=repo_id,
commit_message=f"Upload eval results for {env_type}",
)
# 2. Upload rollout videos
videos_dir = output_dir / "videos"
if videos_dir.is_dir():
api.upload_folder(
folder_path=str(videos_dir),
path_in_repo=f"eval/{env_type}/videos",
repo_id=repo_id,
commit_message=f"Upload eval rollout videos for {env_type}",
)
# 3. Update the model card with an eval results table
_update_model_card_with_eval(api, repo_id, info, env_type)
logging.info(f"Eval results pushed to https://huggingface.co/{repo_id}")
return commit_url
def _format_eval_table(info: dict, env_type: str) -> str:
"""Build a markdown table from eval results."""
lines = [
f"### Evaluation: `{env_type}`\n",
"| Suite | Success Rate (%) | Avg Sum Reward | Episodes |",
"|-------|-----------------|----------------|----------|",
]
per_group = info.get("per_group", {})
for group_name, stats in sorted(per_group.items()):
sr = stats.get("pc_success", float("nan"))
reward = stats.get("avg_sum_reward", float("nan"))
n_ep = stats.get("n_episodes", 0)
lines.append(f"| {group_name} | {sr:.1f} | {reward:.2f} | {n_ep} |")
overall = info.get("overall", {})
if overall:
sr = overall.get("pc_success", float("nan"))
reward = overall.get("avg_sum_reward", float("nan"))
n_ep = overall.get("n_episodes", 0)
lines.append(f"| **Overall** | **{sr:.1f}** | **{reward:.2f}** | **{n_ep}** |")
video_paths = overall.get("video_paths", [])
if video_paths:
lines.append("")
lines.append("<details><summary>Rollout videos</summary>\n")
for vp in video_paths[:10]:
video_name = Path(vp).name
parent = Path(vp).parent.name
lines.append(f"**{parent}/{video_name}**\n")
lines.append(f"![{video_name}](eval/{env_type}/videos/{parent}/{video_name})\n")
lines.append("</details>")
return "\n".join(lines)
def _update_model_card_with_eval(api: Any, repo_id: str, info: dict, env_type: str) -> None:
"""Append or replace the eval section in the model card README."""
from huggingface_hub import ModelCard
try:
card = ModelCard.load(repo_id)
except Exception:
card = ModelCard("")
content = card.content or ""
eval_table = _format_eval_table(info, env_type)
section_marker_start = f"<!-- eval-results-{env_type}-start -->"
section_marker_end = f"<!-- eval-results-{env_type}-end -->"
new_section = f"{section_marker_start}\n{eval_table}\n{section_marker_end}"
if section_marker_start in content:
content = re.sub(
rf"{re.escape(section_marker_start)}.*?{re.escape(section_marker_end)}",
new_section,
content,
flags=re.DOTALL,
)
else:
eval_header = "\n## Evaluation Results\n\n"
if "## Evaluation Results" not in content:
content += eval_header
content += f"\n{new_section}\n"
card.content = content
card.push_to_hub(repo_id, commit_message=f"Update eval results for {env_type}")
@parser.wrap()
def eval_main(cfg: EvalPipelineConfig):
logging.info(pformat(asdict(cfg)))
@@ -573,9 +694,20 @@ def eval_main(cfg: EvalPipelineConfig):
close_envs(envs)
# Save info
with open(Path(cfg.output_dir) / "eval_info.json", "w") as f:
output_dir = Path(cfg.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
with open(output_dir / "eval_info.json", "w") as f:
json.dump(info, f, indent=2)
if cfg.push_to_hub:
repo_id = str(cfg.policy.pretrained_path)
push_eval_to_hub(
repo_id=repo_id,
output_dir=output_dir,
info=info,
env_type=cfg.env.type,
)
logging.info("End of eval")