#!/usr/bin/env python3 # Copyright 2025 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Extract natural-language task descriptions for a benchmark suite. Runs inside the benchmark Docker container (where the env library is installed) immediately after lerobot-eval, writing a JSON file that parse_eval_metrics.py picks up and embeds in metrics.json. Output format: {"_": "", ...} Usage: python scripts/ci/extract_task_descriptions.py \\ --env libero --task libero_spatial \\ --output /tmp/eval-artifacts/task_descriptions.json """ from __future__ import annotations import argparse import json import re import sys from pathlib import Path # LIBERO-plus derives task.language by space-joining the perturbation-variant # filename (grab_language_from_filename in libero/libero/benchmark/__init__.py), # so non-_language_ variants inherit a trailing metadata blob like # "view 0 0 100 0 0 initstate 0 noise 45" or "add 16". Strip those tokens so # the description matches the base instruction used in the training dataset. _LIBERO_PERTURBATION_TAIL_RE = re.compile( r"(?:\s(?:view|initstate|noise|add|tb|table|light|level)(?:\s\d+)+)+$" ) def _strip_libero_perturbation_tail(instruction: str) -> str: return _LIBERO_PERTURBATION_TAIL_RE.sub("", instruction).strip() def _libero_descriptions(task_suite: str) -> dict[str, str]: from libero.libero import benchmark # type: ignore[import-untyped] suite_dict = benchmark.get_benchmark_dict() if task_suite not in suite_dict: print( f"[extract_task_descriptions] Unknown LIBERO suite '{task_suite}'. " f"Available: {list(suite_dict.keys())}", file=sys.stderr, ) return {} suite = suite_dict[task_suite]() return { f"{task_suite}_{i}": _strip_libero_perturbation_tail(suite.get_task(i).language) for i in range(suite.n_tasks) } def _metaworld_descriptions(task_name: str) -> dict[str, str]: # MetaWorld tasks don't expose a separate NL description attribute; # use a cleaned version of the task name as the description. label = task_name.removeprefix("metaworld-").replace("-", " ").strip() return {f"{task_name}_0": label} def _robotwin_descriptions(task_names: str) -> dict[str, str]: """Return descriptions for each requested RoboTwin task. Reads `description/task_instruction/.json` from the RoboTwin clone (cwd is /opt/robotwin in CI). Falls back to the task name if missing.""" out: dict[str, str] = {} root = Path("description/task_instruction") for name in (t.strip() for t in task_names.split(",") if t.strip()): desc_file = root / f"{name}.json" desc = name.replace("_", " ") if desc_file.is_file(): data = json.loads(desc_file.read_text()) full = data.get("full_description") or desc # Strip the schema placeholders ({A}, {a}) — keep the sentence readable. desc = full.replace("<", "").replace(">", "") out[f"{name}_0"] = desc return out def _robocasa_descriptions(task_spec: str) -> dict[str, str]: """For each task in the comma-separated list, emit a cleaned-name label. RoboCasa episodes carry their language instruction in the env's `ep_meta['lang']`, populated per reset. Pulling it requires spinning up the full kitchen env per task (~seconds each); we use the task name as the key here and let the eval's episode info carry the actual instruction. """ out: dict[str, str] = {} for task in (t.strip() for t in task_spec.split(",") if t.strip()): # Split CamelCase into words: "CloseFridge" → "close fridge". label = "".join(f" {c.lower()}" if c.isupper() else c for c in task).strip() out[f"{task}_0"] = label or task return out _ROBOMME_DESCRIPTIONS = { "BinFill": "Fill the target bin with the correct number of cubes", "PickXtimes": "Pick the indicated cube the specified number of times", "SwingXtimes": "Swing the object the specified number of times", "StopCube": "Grasp and stop the moving cube", "VideoUnmask": "Pick the cube shown in the reference video", "VideoUnmaskSwap": "Pick the cube matching the reference video after a swap", "ButtonUnmask": "Press the button indicated by the reference", "ButtonUnmaskSwap": "Press the correct button after objects are swapped", "PickHighlight": "Pick the highlighted cube", "VideoRepick": "Repick the cube shown in the reference video", "VideoPlaceButton": "Place the cube on the button shown in the video", "VideoPlaceOrder": "Place cubes in the order shown in the video", "MoveCube": "Move the cube to the target location", "InsertPeg": "Insert the peg into the target hole", "PatternLock": "Unlock the pattern by pressing buttons in sequence", "RouteStick": "Route the stick through the required waypoints", } def _robomme_descriptions(task_names: str, task_ids: list[int] | None = None) -> dict[str, str]: """Return descriptions for each requested RoboMME task. Keys match the video filename pattern `_` used by the eval script.""" if task_ids is None: task_ids = [0] out: dict[str, str] = {} for name in (t.strip() for t in task_names.split(",") if t.strip()): desc = _ROBOMME_DESCRIPTIONS.get(name, name) for tid in task_ids: out[f"{name}_{tid}"] = desc return out def _vlabench_descriptions(task_spec: str) -> dict[str, str]: """For each task in the comma-separated list, emit a cleaned-name label. VLABench tasks carry language instructions on their dm_control task object, but pulling them requires loading the full env per task (~seconds each). The CI smoke-eval already captures the instruction inside its episode info; this mapping is just enough to key `metrics.json` by `_0`. """ out: dict[str, str] = {} for task in (t.strip() for t in task_spec.split(",") if t.strip()): out[f"{task}_0"] = task.replace("_", " ").strip() return out def main() -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--env", required=True, help="Environment family (libero, metaworld, ...)") parser.add_argument("--task", required=True, help="Task/suite name (e.g. libero_spatial)") parser.add_argument( "--task-ids", type=str, default=None, help="Comma-separated task IDs (e.g. '0,1,2'). Default: [0]", ) parser.add_argument("--output", required=True, help="Path to write task_descriptions.json") args = parser.parse_args() task_ids: list[int] | None = None if args.task_ids: task_ids = [int(x.strip()) for x in args.task_ids.split(",")] descriptions: dict[str, str] = {} try: if args.env == ("libero", "libero_plus"): descriptions = _libero_descriptions(args.task) elif args.env == "metaworld": descriptions = _metaworld_descriptions(args.task) elif args.env == "robotwin": descriptions = _robotwin_descriptions(args.task) elif args.env == "robocasa": descriptions = _robocasa_descriptions(args.task) elif args.env == "robomme": descriptions = _robomme_descriptions(args.task, task_ids=task_ids) elif args.env == "vlabench": descriptions = _vlabench_descriptions(args.task) else: print( f"[extract_task_descriptions] No description extractor for env '{args.env}'.", file=sys.stderr, ) except Exception as exc: print(f"[extract_task_descriptions] Warning: {exc}", file=sys.stderr) out_path = Path(args.output) out_path.parent.mkdir(parents=True, exist_ok=True) out_path.write_text(json.dumps(descriptions, indent=2)) print(f"[extract_task_descriptions] {len(descriptions)} descriptions → {out_path}") return 0 if __name__ == "__main__": sys.exit(main())