diff --git a/.github/workflows/benchmark_tests.yml b/.github/workflows/benchmark_tests.yml index 558569c57..00806f990 100644 --- a/.github/workflows/benchmark_tests.yml +++ b/.github/workflows/benchmark_tests.yml @@ -118,7 +118,7 @@ jobs: bash -c " hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true lerobot-eval \ - --policy.path=pepijn223/smolvla_libero \ + --policy.path=lerobot/smolvla_libero \ --env.type=libero \ --env.task=libero_spatial \ --eval.batch_size=1 \ @@ -147,7 +147,7 @@ jobs: --artifacts-dir /tmp/libero-artifacts \ --env libero \ --task libero_spatial \ - --policy pepijn223/smolvla_libero + --policy lerobot/smolvla_libero - name: Upload Libero rollout video if: always() @@ -270,7 +270,7 @@ jobs: bash -c " hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true lerobot-eval \ - --policy.path=pepijn223/smolvla_metaworld \ + --policy.path=lerobot/smolvla_metaworld \ --env.type=metaworld \ --env.task=metaworld-push-v3 \ --eval.batch_size=1 \ @@ -299,7 +299,7 @@ jobs: --artifacts-dir /tmp/metaworld-artifacts \ --env metaworld \ --task metaworld-push-v3 \ - --policy pepijn223/smolvla_metaworld + --policy lerobot/smolvla_metaworld - name: Upload MetaWorld rollout video if: always() @@ -317,6 +317,115 @@ jobs: path: /tmp/metaworld-artifacts/metrics.json if-no-files-found: warn + # ── ROBOTWIN 2.0 ────────────────────────────────────────────────────────── + # Isolated image: full RoboTwin 2.0 stack — SAPIEN, mplib, CuRobo, + # pytorch3d, + simulation assets (~4 GB). + # Build takes ~20 min on first run; subsequent runs hit the layer cache. + # Requires an NVIDIA GPU runner with CUDA 12.1 drivers. + robotwin-integration-test: + name: RoboTwin 2.0 — build image + 1-episode eval + runs-on: + group: aws-g6-4xlarge-plus + env: + HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }} + ROBOTWIN_POLICY: lerobot/smolvla_robotwin + ROBOTWIN_TASKS: beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch + + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + lfs: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses] + with: + cache-binary: false + + - name: Login to Docker Hub + if: ${{ env.DOCKERHUB_USERNAME != '' }} + uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses] + with: + username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }} + password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }} + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }} + + # Build the full-install image: SAPIEN, mplib, CuRobo, pytorch3d + + # simulation assets (~4 GB). Layer cache lives in the runner's local + # Docker daemon — reused across re-runs on the same machine. + - name: Build RoboTwin 2.0 benchmark image + uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses] + with: + context: . + file: docker/Dockerfile.benchmark.robotwin + push: false + load: true + tags: lerobot-benchmark-robotwin:ci + cache-from: type=local,src=/tmp/.buildx-cache-robotwin + cache-to: type=local,dest=/tmp/.buildx-cache-robotwin,mode=max + + - name: Run RoboTwin 2.0 smoke eval (10 tasks, 1 episode each) + if: env.HF_USER_TOKEN != '' + run: | + # Named container (no --rm) so we can docker cp artifacts out. + docker run --name robotwin-eval --gpus all \ + --shm-size=4g \ + -e HF_HOME=/tmp/hf \ + -e HF_USER_TOKEN="${HF_USER_TOKEN}" \ + -e ROBOTWIN_POLICY="${ROBOTWIN_POLICY}" \ + -e ROBOTWIN_TASKS="${ROBOTWIN_TASKS}" \ + lerobot-benchmark-robotwin:ci \ + bash -c " + hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true + cd /opt/robotwin && lerobot-eval \ + --policy.path=\"\$ROBOTWIN_POLICY\" \ + --env.type=robotwin \ + --env.task=\"\$ROBOTWIN_TASKS\" \ + --eval.batch_size=1 \ + --eval.n_episodes=1 \ + --eval.use_async_envs=false \ + --policy.device=cuda \ + '--rename_map={\"observation.images.head_camera\": \"observation.images.camera1\", \"observation.images.left_camera\": \"observation.images.camera2\", \"observation.images.right_camera\": \"observation.images.camera3\"}' \ + --output_dir=/tmp/eval-artifacts + python /lerobot/scripts/ci/extract_task_descriptions.py \ + --env robotwin \ + --task \"\$ROBOTWIN_TASKS\" \ + --output /tmp/eval-artifacts/task_descriptions.json + " + + - name: Copy RoboTwin artifacts from container + if: always() + run: | + mkdir -p /tmp/robotwin-artifacts + docker cp robotwin-eval:/tmp/eval-artifacts/. /tmp/robotwin-artifacts/ 2>/dev/null || true + docker rm -f robotwin-eval || true + + - name: Parse RoboTwin eval metrics + if: always() + run: | + python3 scripts/ci/parse_eval_metrics.py \ + --artifacts-dir /tmp/robotwin-artifacts \ + --env robotwin \ + --task "${ROBOTWIN_TASKS}" \ + --policy "${ROBOTWIN_POLICY}" + + - name: Upload RoboTwin rollout video + if: always() + uses: actions/upload-artifact@v4 + with: + name: robotwin-rollout-video + path: /tmp/robotwin-artifacts/videos/ + if-no-files-found: warn + + - name: Upload RoboTwin eval metrics + if: always() + uses: actions/upload-artifact@v4 + with: + name: robotwin-metrics + path: /tmp/robotwin-artifacts/metrics.json + if-no-files-found: warn + # ── ROBOCASA365 ────────────────────────────────────────────────────────── # Isolated image: robocasa + robosuite installed manually as editable # clones (no `lerobot[robocasa]` extra — robocasa's setup.py pins diff --git a/docker/Dockerfile.benchmark.robotwin b/docker/Dockerfile.benchmark.robotwin new file mode 100644 index 000000000..423854c31 --- /dev/null +++ b/docker/Dockerfile.benchmark.robotwin @@ -0,0 +1,122 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Benchmark image for RoboTwin 2.0 integration tests. +# Extends the nightly GPU image with the RoboTwin simulator stack: +# sapien/mplib/pytorch3d + NVlabs CuRobo + embodiments.zip + objects.zip +# (~3.96 GB of assets; background_texture.zip ~11 GB skipped for smoke eval). +# +# Build: docker build -f docker/Dockerfile.benchmark.robotwin -t lerobot-benchmark-robotwin . +# Run: docker run --gpus all --rm lerobot-benchmark-robotwin \ +# lerobot-eval --env.type=robotwin --env.task=beat_block_hammer ... + +FROM huggingface/lerobot-gpu:latest + +ENV NVIDIA_DRIVER_CAPABILITIES=all \ + VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/nvidia_icd.json \ + ROBOTWIN_ROOT=/opt/robotwin + +# The nightly base is CUDA -base (no compiler, no Vulkan loader). CuRobo's +# `pip install -e .` runs nvcc, and SAPIEN renders via Vulkan — add both. +USER root +# Pinned upstream SHA for reproducible benchmark runs. Bump when we need +# an upstream fix; don't rely on `main` drift. +ARG ROBOTWIN_SHA=0aeea2d669c0f8516f4d5785f0aa33ba812c14b4 +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + cuda-nvcc-12-4 cuda-cudart-dev-12-4 \ + libvulkan1 vulkan-tools \ + && mkdir -p /usr/share/vulkan/icd.d \ + && echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libGLX_nvidia.so.0","api_version":"1.3.0"}}' \ + > /usr/share/vulkan/icd.d/nvidia_icd.json \ + && git clone https://github.com/RoboTwin-Platform/RoboTwin.git ${ROBOTWIN_ROOT} \ + && git -C ${ROBOTWIN_ROOT} checkout ${ROBOTWIN_SHA} \ + && chown -R user_lerobot:user_lerobot ${ROBOTWIN_ROOT} \ + && apt-get clean && rm -rf /var/lib/apt/lists/* +USER user_lerobot + +# RoboTwin runtime deps (av is already in the base via [av-dep]). +RUN uv pip install --no-cache \ + "sapien==3.0.0b1" "mplib==0.2.1" "transforms3d==0.4.2" "trimesh==4.4.3" \ + "open3d==0.19.0" "imageio==2.34.2" termcolor zarr pydantic h5py + +# pytorch3d has no universal wheel; must be built from source (~10 min, cached). +RUN uv pip install --no-cache --no-build-isolation \ + "git+https://github.com/facebookresearch/pytorch3d.git@stable" + +# CuRobo — NVlabs motion generator; TORCH_CUDA_ARCH_LIST must be set or the +# build aborts on an empty arch list. Pinned SHA for reproducibility. +ARG CUROBO_SHA=ca941586c33b8482ed9c0e74d60f23efd64b516a +RUN cd ${ROBOTWIN_ROOT}/envs \ + && git clone https://github.com/NVlabs/curobo.git \ + && git -C curobo checkout ${CUROBO_SHA} \ + && cd curobo \ + && TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9;9.0" \ + uv pip install -e . --no-build-isolation --no-cache + +# Upstream patches (mirror RoboTwin's script/_install.sh). +# These patches target the exact versions pinned above; re-check when upgrading. +# mplib==0.2.1: drop a broken `or collide` clause in planner.py. +# Safe to remove once mplib > 0.2.1 ships with the fix upstream. +# sapien==3.0.0b1: fix URDF loader encoding + .srdf extension check. +# Safe to remove once sapien > 3.0.0b1 ships with the fix upstream. +RUN python - <<'EOF' +import pathlib, re, site +for d in site.getsitepackages(): + p = pathlib.Path(d) / "mplib" / "planner.py" + if p.exists(): + p.write_text(re.sub(r"\bor collide\b", "", p.read_text(), count=1)) + print(f"mplib patch applied: {p}") + p = pathlib.Path(d) / "sapien" / "wrapper" / "urdf_loader.py" + if p.exists(): + src = p.read_text().replace( + "with open(srdf_path) as f:", 'with open(srdf_path, encoding="utf-8") as f:' + ).replace('"srdf"', '".srdf"') + p.write_text(src) + print(f"sapien patch applied: {p}") +EOF + +# Simulation assets from TianxingChen/RoboTwin2.0: embodiments (~220 MB) + +# objects (~3.74 GB). background_texture (~11 GB) is intentionally skipped. +# The dataset is public — no auth token needed. +RUN python - <<'EOF' +import os, pathlib, zipfile +from huggingface_hub import hf_hub_download + +assets_dir = pathlib.Path(os.environ["ROBOTWIN_ROOT"]) / "assets" +assets_dir.mkdir(parents=True, exist_ok=True) +for fname in ("embodiments.zip", "objects.zip"): + local = hf_hub_download( + repo_id="TianxingChen/RoboTwin2.0", + repo_type="dataset", + filename=fname, + local_dir=str(assets_dir), + ) + with zipfile.ZipFile(local, "r") as z: + z.extractall(str(assets_dir)) + pathlib.Path(local).unlink() +EOF + +WORKDIR ${ROBOTWIN_ROOT} +RUN python script/update_embodiment_config_path.py + +ENV PYTHONPATH="${ROBOTWIN_ROOT}:${PYTHONPATH}" + +# Return to the lerobot source directory (set by base image) before overlaying. +WORKDIR /lerobot + +# Overlay the PR's source code on top of the nightly image. +COPY --chown=user_lerobot:user_lerobot . . + +CMD ["/bin/bash"] diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index b48bbf757..bb0dad1bf 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -79,6 +79,8 @@ title: LIBERO - local: metaworld title: Meta-World + - local: robotwin + title: RoboTwin 2.0 - local: robocasa title: RoboCasa365 - local: envhub_isaaclab_arena diff --git a/docs/source/robotwin.mdx b/docs/source/robotwin.mdx new file mode 100644 index 000000000..ad1db766f --- /dev/null +++ b/docs/source/robotwin.mdx @@ -0,0 +1,223 @@ +# RoboTwin 2.0 + +RoboTwin 2.0 is a **large-scale dual-arm manipulation benchmark** built on the SAPIEN physics engine. It provides a standardized evaluation protocol for bimanual robotic policies across 50 tasks (as of upstream `main`) with strong domain randomization (clutter, lighting, background, tabletop height, and language instructions). + +- Paper: [RoboTwin 2.0: A Scalable Data Generator and Benchmark with Strong Domain Randomization for Robust Bimanual Robotic Manipulation](https://arxiv.org/abs/2506.18088) +- GitHub: [RoboTwin-Platform/RoboTwin](https://github.com/RoboTwin-Platform/RoboTwin) +- Leaderboard: [robotwin-platform.github.io/leaderboard](https://robotwin-platform.github.io/leaderboard) +- Dataset: [lerobot/robotwin_unified](https://huggingface.co/datasets/lerobot/robotwin_unified) + +![RoboTwin 2.0 benchmark overview](https://www.aitntnews.com/pictures/2025/7/8/9a7f79cb-5ba9-11f0-8581-fa163e47d677.png) + +## Overview + +| Property | Value | +| ------------- | -------------------------------------------------------- | +| Tasks | 50 dual-arm manipulation tasks | +| Robot | Aloha-AgileX bimanual (14 DOF, 7 per arm) | +| Action space | 14-dim joint-space, continuous in `[-1, 1]` | +| Cameras | `head_camera`, `left_camera`, `right_camera` | +| Simulator | SAPIEN (not MuJoCo) | +| Eval protocol | 100 episodes/task, 50 demo_clean demonstrations | +| Eval settings | **Easy** (`demo_clean`) and **Hard** (`demo_randomized`) | + +## Available tasks + +RoboTwin 2.0 ships 50 dual-arm manipulation tasks in its upstream `envs/` directory. The canonical list is the `ROBOTWIN_TASKS` tuple in `src/lerobot/envs/robotwin.py`, mirrored verbatim from the upstream repo. Example tasks: + +| Task | CLI name | Category | +| ------------------------ | ------------------------ | ----------------- | +| Beat block with hammer | `beat_block_hammer` | Tool use | +| Click bell / alarm clock | `click_bell` | Precision press | +| Stack blocks (2 / 3) | `stack_blocks_two/three` | Stacking | +| Stack bowls (2 / 3) | `stack_bowls_two/three` | Stacking | +| Handover block / mic | `handover_block` | Bimanual coord. | +| Lift pot | `lift_pot` | Bimanual lift | +| Shake bottle | `shake_bottle` | Continuous motion | +| Turn switch | `turn_switch` | Articulated obj | +| Stamp seal | `stamp_seal` | Precision place | +| Scan object | `scan_object` | Mobile manip. | + +Pass a comma-separated list to `--env.task` to run multiple tasks in a single eval sweep. + + + `open_laptop` is currently broken upstream (its `check_success()` uses + `self.arm_tag`, which is only set inside the scripted-expert `play_once()` + path and therefore unavailable during normal policy eval). Avoid it until the + upstream bug is fixed, or patch the task to default `self.arm_tag = "left"` in + `load_actors()`. + + +## Dataset + +The RoboTwin 2.0 dataset is available in **LeRobot v3.0 format** on the Hugging Face Hub: + +``` +lerobot/robotwin_unified +``` + +It contains over 100,000 pre-collected trajectories across all 50 tasks (79.6 GB, Apache 2.0 license). No format conversion is needed — it is already in the correct LeRobot v3.0 schema with video observations and action labels. + +You can load it directly with the HF Datasets library: + +```python +from datasets import load_dataset + +ds = load_dataset("lerobot/robotwin_unified", split="train") +``` + +## Installation + +RoboTwin 2.0 requires **Linux** with an NVIDIA GPU (CUDA 12.1 recommended). Installation takes approximately 20 minutes. + +### 1. Create a conda environment + +```bash +conda create -n robotwin python=3.10 -y +conda activate robotwin +``` + +### 2. Install LeRobot + +```bash +git clone https://github.com/huggingface/lerobot.git +cd lerobot +pip install -e "." +``` + +### 3. Install RoboTwin 2.0 + +```bash +git clone https://github.com/RoboTwin-Platform/RoboTwin.git +cd RoboTwin +bash script/_install.sh +bash script/_download_assets.sh +``` + +The install script handles all Python dependencies including SAPIEN, CuRobo, mplib, and pytorch3d. + + +If the automated install fails, install manually: + +```bash +pip install -r requirements.txt +pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable" +cd envs && git clone https://github.com/NVlabs/curobo.git && cd curobo +pip install -e . --no-build-isolation +``` + +Then apply the required mplib fix: in `mplib/planner.py` line 807, remove `or collide` from the conditional. + + + +### 4. Add RoboTwin to PYTHONPATH + +The RoboTwin task modules must be importable by LeRobot. From within the `RoboTwin/` directory: + +```bash +export PYTHONPATH="${PYTHONPATH}:$(pwd)" +``` + +Add this to your shell profile to make it permanent. + +## Evaluation + +### Standard evaluation (recommended) + +Evaluate a policy on a single task with the official protocol (100 episodes): + +```bash +lerobot-eval \ + --policy.path="your-hf-policy-id" \ + --env.type=robotwin \ + --env.task=beat_block_hammer \ + --eval.batch_size=1 \ + --eval.n_episodes=100 +``` + +### Single-task quick check + +```bash +lerobot-eval \ + --policy.path="your-hf-policy-id" \ + --env.type=robotwin \ + --env.task=beat_block_hammer \ + --eval.batch_size=1 \ + --eval.n_episodes=5 +``` + +### Multi-task sweep + +Evaluate on several tasks in one run: + +```bash +lerobot-eval \ + --policy.path="your-hf-policy-id" \ + --env.type=robotwin \ + --env.task=beat_block_hammer,click_bell,handover_block,stack_blocks_two \ + --eval.batch_size=1 \ + --eval.n_episodes=100 +``` + +### Full benchmark (all 50 tasks) + +```bash +lerobot-eval \ + --policy.path="your-hf-policy-id" \ + --env.type=robotwin \ + --env.task=adjust_bottle,beat_block_hammer,blocks_ranking_rgb,blocks_ranking_size,click_alarmclock,click_bell,dump_bin_bigbin,grab_roller,handover_block,handover_mic,hanging_mug,lift_pot,move_can_pot,move_pillbottle_pad,move_playingcard_away,move_stapler_pad,open_microwave,pick_diverse_bottles,pick_dual_bottles,place_a2b_left,place_a2b_right,place_bread_basket,place_bread_skillet,place_burger_fries,place_can_basket,place_cans_plasticbox,place_container_plate,place_dual_shoes,place_empty_cup,place_fan,place_mouse_pad,place_object_basket,place_object_scale,place_object_stand,place_phone_stand,place_shoe,press_stapler,put_bottles_dustbin,put_object_cabinet,rotate_qrcode,scan_object,shake_bottle,shake_bottle_horizontally,stack_blocks_three,stack_blocks_two,stack_bowls_three,stack_bowls_two,stamp_seal,turn_switch \ + --eval.batch_size=1 \ + --eval.n_episodes=100 +``` + + + `open_laptop` is intentionally omitted above because of the upstream + `self.arm_tag` bug (see the **Available tasks** section). Re-add it once the + upstream fix lands. + + +## Camera configuration + +By default, all three cameras are included: + +| Camera key | Description | +| -------------- | ------------------------------ | +| `head_camera` | Torso-mounted overhead view | +| `left_camera` | Left arm wrist-mounted camera | +| `right_camera` | Right arm wrist-mounted camera | + +To use a subset of cameras, override `--env.camera_names`: + +```bash +lerobot-eval \ + --policy.path="your-hf-policy-id" \ + --env.type=robotwin \ + --env.task=beat_block_hammer \ + --env.camera_names="head_camera,left_camera" \ + --eval.batch_size=1 \ + --eval.n_episodes=10 +``` + +## Environment config reference + +Key parameters for `RoboTwinEnvConfig`: + +| Parameter | Default | Description | +| -------------------- | ---------------------------------------- | ---------------------------------- | +| `task` | `"beat_block_hammer"` | Comma-separated task name(s) | +| `fps` | `25` | Simulation FPS | +| `episode_length` | `300` | Max steps per episode | +| `obs_type` | `"pixels_agent_pos"` | `"pixels"` or `"pixels_agent_pos"` | +| `camera_names` | `"head_camera,left_camera,right_camera"` | Comma-separated active cameras | +| `observation_height` | `240` | Camera pixel height | +| `observation_width` | `320` | Camera pixel width | + +## Leaderboard submission + +Results can be submitted to the [RoboTwin 2.0 leaderboard](https://robotwin-platform.github.io/leaderboard). The official protocol requires: + +- Training on 50 `demo_clean` demonstrations per task +- Evaluating 100 episodes per task +- Reporting success rate separately for **Easy** (`demo_clean`) and **Hard** (`demo_randomized`) settings + +For submission instructions, refer to the [RoboTwin 2.0 documentation](https://robotwin-platform.github.io/doc/). diff --git a/scripts/ci/extract_task_descriptions.py b/scripts/ci/extract_task_descriptions.py index 73ec92196..c9216e02d 100644 --- a/scripts/ci/extract_task_descriptions.py +++ b/scripts/ci/extract_task_descriptions.py @@ -57,6 +57,24 @@ def _metaworld_descriptions(task_name: str) -> dict[str, str]: return {f"{task_name}_0": label} +def _robotwin_descriptions(task_names: str) -> dict[str, str]: + """Return descriptions for each requested RoboTwin task. Reads + `description/task_instruction/.json` from the RoboTwin clone + (cwd is /opt/robotwin in CI). Falls back to the task name if missing.""" + out: dict[str, str] = {} + root = Path("description/task_instruction") + for name in (t.strip() for t in task_names.split(",") if t.strip()): + desc_file = root / f"{name}.json" + desc = name.replace("_", " ") + if desc_file.is_file(): + data = json.loads(desc_file.read_text()) + full = data.get("full_description") or desc + # Strip the schema placeholders ({A}, {a}) — keep the sentence readable. + desc = full.replace("<", "").replace(">", "") + out[f"{name}_0"] = desc + return out + + def _robocasa_descriptions(task_spec: str) -> dict[str, str]: """For each task in the comma-separated list, emit a cleaned-name label. @@ -87,6 +105,8 @@ def main() -> int: descriptions = _libero_descriptions(args.task) elif args.env == "metaworld": descriptions = _metaworld_descriptions(args.task) + elif args.env == "robotwin": + descriptions = _robotwin_descriptions(args.task) elif args.env == "robocasa": descriptions = _robocasa_descriptions(args.task) else: diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py index 9b5b3ab94..4f93a26a7 100644 --- a/src/lerobot/envs/configs.py +++ b/src/lerobot/envs/configs.py @@ -649,3 +649,90 @@ class IsaaclabArenaEnv(HubEnvConfig): ), PolicyProcessorPipeline(steps=[]), ) + + +@EnvConfig.register_subclass("robotwin") +@dataclass +class RoboTwinEnvConfig(EnvConfig): + """Configuration for RoboTwin 2.0 benchmark environments. + + RoboTwin 2.0 is a dual-arm manipulation benchmark with 50 tasks built on the + SAPIEN simulator. The robot is an Aloha-AgileX bimanual platform with 14 DOF + (7 per arm). All three cameras are enabled by default. + + See: https://robotwin-platform.github.io + Dataset: https://huggingface.co/datasets/lerobot/robotwin_unified + """ + + task: str = "beat_block_hammer" # single task or comma-separated list + fps: int = 25 + episode_length: int = 300 + obs_type: str = "pixels_agent_pos" + render_mode: str = "rgb_array" + # Available cameras from RoboTwin's aloha-agilex embodiment: head_camera + # (torso-mounted) + left_camera / right_camera (wrists). + camera_names: str = "head_camera,left_camera,right_camera" + # Match the D435 dims in task_config/demo_clean.yml (_camera_config.yml). + # Gym's vector-env concatenate pre-allocates buffers of this shape, so it + # must equal what SAPIEN actually renders. + observation_height: int = 240 + observation_width: int = 320 + features: dict[str, PolicyFeature] = field( + default_factory=lambda: { + ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(14,)), + } + ) + features_map: dict[str, str] = field( + default_factory=lambda: { + ACTION: ACTION, + "pixels/head_camera": f"{OBS_IMAGES}.head_camera", + "pixels/left_camera": f"{OBS_IMAGES}.left_camera", + "pixels/right_camera": f"{OBS_IMAGES}.right_camera", + "agent_pos": OBS_STATE, + } + ) + + def __post_init__(self): + cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()] + for cam in cam_list: + self.features[f"pixels/{cam}"] = PolicyFeature( + type=FeatureType.VISUAL, + shape=(self.observation_height, self.observation_width, 3), + ) + # Keep features_map entry if already set (default_factory); add if missing. + key = f"pixels/{cam}" + if key not in self.features_map: + self.features_map[key] = f"{OBS_IMAGES}.{cam}" + + if self.obs_type == "pixels_agent_pos": + self.features["agent_pos"] = PolicyFeature( + type=FeatureType.STATE, + shape=(14,), # 14 DOF: 7 per arm + ) + elif self.obs_type != "pixels": + raise ValueError( + f"Unsupported obs_type '{self.obs_type}'. " + "RoboTwinEnvConfig supports 'pixels' and 'pixels_agent_pos'." + ) + + @property + def gym_kwargs(self) -> dict: + return {} + + def create_envs(self, n_envs: int, use_async_envs: bool = True): + from lerobot.envs.robotwin import create_robotwin_envs + + if not self.task: + raise ValueError("RoboTwinEnvConfig requires `task` to be specified.") + + env_cls = _make_vec_env_cls(use_async_envs, n_envs) + cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()] + return create_robotwin_envs( + task=self.task, + n_envs=n_envs, + env_cls=env_cls, + camera_names=cam_list, + observation_height=self.observation_height, + observation_width=self.observation_width, + episode_length=self.episode_length, + ) diff --git a/src/lerobot/envs/robotwin.py b/src/lerobot/envs/robotwin.py new file mode 100644 index 000000000..823f14fa0 --- /dev/null +++ b/src/lerobot/envs/robotwin.py @@ -0,0 +1,488 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import importlib +import logging +from collections import defaultdict +from collections.abc import Callable, Sequence +from functools import partial +from typing import Any + +import gymnasium as gym +import numpy as np +import torch +from gymnasium import spaces + +from lerobot.types import RobotObservation + +from .utils import _LazyAsyncVectorEnv + +logger = logging.getLogger(__name__) + +# Camera names as used by RoboTwin 2.0. The wrapper appends "_rgb" when looking +# up keys in get_obs() output (e.g. "head_camera" → "head_camera_rgb"). +ROBOTWIN_CAMERA_NAMES: tuple[str, ...] = ( + "head_camera", + "left_camera", + "right_camera", +) + +ACTION_DIM = 14 # 7 DOF × 2 arms +ACTION_LOW = -1.0 +ACTION_HIGH = 1.0 +DEFAULT_EPISODE_LENGTH = 300 +# D435 dims from task_config/_camera_config.yml (what demo_clean.yml selects). +DEFAULT_CAMERA_H = 240 +DEFAULT_CAMERA_W = 320 + +# Task list from RoboTwin 2.0's `envs/` directory — mirrors upstream exactly +# (50 tasks as of main; earlier revisions had 60 with a different split). +# Keep this in sync with: +# gh api /repos/RoboTwin-Platform/RoboTwin/contents/envs --paginate \ +# | jq -r '.[].name' | grep -E '\.py$' | grep -v '^_' | sed 's/\.py$//' +ROBOTWIN_TASKS: tuple[str, ...] = ( + "adjust_bottle", + "beat_block_hammer", + "blocks_ranking_rgb", + "blocks_ranking_size", + "click_alarmclock", + "click_bell", + "dump_bin_bigbin", + "grab_roller", + "handover_block", + "handover_mic", + "hanging_mug", + "lift_pot", + "move_can_pot", + "move_pillbottle_pad", + "move_playingcard_away", + "move_stapler_pad", + "open_laptop", + "open_microwave", + "pick_diverse_bottles", + "pick_dual_bottles", + "place_a2b_left", + "place_a2b_right", + "place_bread_basket", + "place_bread_skillet", + "place_burger_fries", + "place_can_basket", + "place_cans_plasticbox", + "place_container_plate", + "place_dual_shoes", + "place_empty_cup", + "place_fan", + "place_mouse_pad", + "place_object_basket", + "place_object_scale", + "place_object_stand", + "place_phone_stand", + "place_shoe", + "press_stapler", + "put_bottles_dustbin", + "put_object_cabinet", + "rotate_qrcode", + "scan_object", + "shake_bottle", + "shake_bottle_horizontally", + "stack_blocks_three", + "stack_blocks_two", + "stack_bowls_three", + "stack_bowls_two", + "stamp_seal", + "turn_switch", +) + + +_ROBOTWIN_SETUP_CACHE: dict[str, dict[str, Any]] = {} + + +def _load_robotwin_setup_kwargs(task_name: str) -> dict[str, Any]: + """Build the kwargs dict RoboTwin's setup_demo expects. + + Mirrors the config loading done by RoboTwin's ``script/eval_policy.py``: + reads ``task_config/demo_clean.yml``, resolves the embodiment file from + ``_embodiment_config.yml``, loads the robot's own ``config.yml``, and + reads camera dimensions from ``_camera_config.yml``. + + Uses ``aloha-agilex`` single-robot dual-arm by default (the only embodiment + used by beat_block_hammer and most smoke-test tasks). + """ + if task_name in _ROBOTWIN_SETUP_CACHE: + return dict(_ROBOTWIN_SETUP_CACHE[task_name]) + + import os + + import yaml # type: ignore[import-untyped] + from envs import CONFIGS_PATH # type: ignore[import-not-found] + + task_config = "demo_clean" + with open(os.path.join(CONFIGS_PATH, f"{task_config}.yml"), encoding="utf-8") as f: + args = yaml.safe_load(f) + + # Resolve embodiment — demo_clean.yml uses [aloha-agilex] (dual-arm single robot) + with open(os.path.join(CONFIGS_PATH, "_embodiment_config.yml"), encoding="utf-8") as f: + embodiment_types = yaml.safe_load(f) + embodiment = args.get("embodiment", ["aloha-agilex"]) + if len(embodiment) == 1: + robot_file = embodiment_types[embodiment[0]]["file_path"] + args["left_robot_file"] = robot_file + args["right_robot_file"] = robot_file + args["dual_arm_embodied"] = True + elif len(embodiment) == 3: + args["left_robot_file"] = embodiment_types[embodiment[0]]["file_path"] + args["right_robot_file"] = embodiment_types[embodiment[1]]["file_path"] + args["embodiment_dis"] = embodiment[2] + args["dual_arm_embodied"] = False + else: + raise ValueError(f"embodiment must have 1 or 3 items, got {len(embodiment)}") + + with open(os.path.join(args["left_robot_file"], "config.yml"), encoding="utf-8") as f: + args["left_embodiment_config"] = yaml.safe_load(f) + with open(os.path.join(args["right_robot_file"], "config.yml"), encoding="utf-8") as f: + args["right_embodiment_config"] = yaml.safe_load(f) + + # Camera dimensions + with open(os.path.join(CONFIGS_PATH, "_camera_config.yml"), encoding="utf-8") as f: + camera_config = yaml.safe_load(f) + head_cam = args["camera"]["head_camera_type"] + args["head_camera_h"] = camera_config[head_cam]["h"] + args["head_camera_w"] = camera_config[head_cam]["w"] + + # Headless overrides + args["render_freq"] = 0 + args["task_name"] = task_name + args["task_config"] = task_config + + _ROBOTWIN_SETUP_CACHE[task_name] = args + return dict(args) + + +def _load_robotwin_task(task_name: str) -> type: + """Dynamically import and return a RoboTwin 2.0 task class. + + RoboTwin tasks live in ``envs/.py`` relative to the repository + root and are expected to be on ``sys.path`` after installation. + """ + try: + module = importlib.import_module(f"envs.{task_name}") + except ModuleNotFoundError as e: + raise ModuleNotFoundError( + f"Could not import RoboTwin task '{task_name}'. " + "Ensure RoboTwin 2.0 is installed and its 'envs/' directory is on PYTHONPATH. " + "See the RoboTwin installation guide: https://robotwin-platform.github.io/doc/usage/robotwin-install.html" + ) from e + task_cls = getattr(module, task_name, None) + if task_cls is None: + raise AttributeError(f"Task class '{task_name}' not found in envs/{task_name}.py") + return task_cls + + +class RoboTwinEnv(gym.Env): + """Gymnasium wrapper around a single RoboTwin 2.0 task. + + RoboTwin uses a custom SAPIEN-based API (``setup_demo`` / ``get_obs`` / + ``take_action`` / ``check_success``) rather than the standard gym interface. + This class bridges that API to Gymnasium so that ``lerobot-eval`` can drive + RoboTwin exactly like LIBERO or Meta-World. + + The underlying SAPIEN environment is created lazily on the first ``reset()`` + call *inside the worker process*. This is required for + ``gym.vector.AsyncVectorEnv`` compatibility: SAPIEN allocates EGL/GPU + contexts that must not be forked from the parent process. + + Observations + ------------ + The ``pixels`` dict uses the raw RoboTwin camera names as keys (e.g. + ``"head_camera"``, ``"left_camera"``). ``preprocess_observation`` in + ``envs/utils.py`` then converts these to ``observation.images.``. + + Actions + ------- + 14-dim float32 array in ``[-1, 1]`` (joint-space, 7 DOF per arm). + + Autograd + -------- + ``setup_demo`` and ``take_action`` drive CuRobo's Newton trajectory + optimizer, which calls ``cost.backward()`` internally. lerobot_eval wraps + the rollout in ``torch.no_grad()``, so both call sites re-enable grad. + """ + + metadata = {"render_modes": ["rgb_array"], "render_fps": 25} + + def __init__( + self, + task_name: str, + episode_index: int = 0, + n_envs: int = 1, + camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES, + observation_height: int | None = None, + observation_width: int | None = None, + episode_length: int = DEFAULT_EPISODE_LENGTH, + render_mode: str = "rgb_array", + ): + super().__init__() + self.task_name = task_name + self.task = task_name # used by add_envs_task() in utils.py + self.task_description = task_name.replace("_", " ") + self.episode_index = episode_index + self._reset_stride = n_envs + self.camera_names = list(camera_names) + # Default to D435 dims (the camera type baked into task_config/demo_clean.yml). + # The YAML-driven lookup is deferred to reset() so construction doesn't + # import RoboTwin's `envs` module — fast-tests run without RoboTwin installed. + self.observation_height = observation_height or DEFAULT_CAMERA_H + self.observation_width = observation_width or DEFAULT_CAMERA_W + self.episode_length = episode_length + self._max_episode_steps = episode_length # lerobot_eval.rollout reads this + self.render_mode = render_mode + + self._env: Any | None = None # deferred — created on first reset() inside worker + self._step_count: int = 0 + self._black_frame = np.zeros((self.observation_height, self.observation_width, 3), dtype=np.uint8) + + image_spaces = { + cam: spaces.Box( + low=0, + high=255, + shape=(self.observation_height, self.observation_width, 3), + dtype=np.uint8, + ) + for cam in self.camera_names + } + self.observation_space = spaces.Dict( + { + "pixels": spaces.Dict(image_spaces), + "agent_pos": spaces.Box(low=-np.inf, high=np.inf, shape=(ACTION_DIM,), dtype=np.float32), + } + ) + self.action_space = spaces.Box( + low=ACTION_LOW, high=ACTION_HIGH, shape=(ACTION_DIM,), dtype=np.float32 + ) + + def _ensure_env(self) -> None: + """Create the SAPIEN environment on first use. + + Called inside the worker subprocess after fork(), so each worker gets + its own EGL/GPU context rather than inheriting a stale one from the + parent process (which causes crashes with AsyncVectorEnv). + """ + if self._env is not None: + return + task_cls = _load_robotwin_task(self.task_name) + self._env = task_cls() + + def _get_obs(self) -> RobotObservation: + assert self._env is not None, "_get_obs called before _ensure_env()" + raw = self._env.get_obs() + cameras_raw = raw.get("observation", {}) + + images: dict[str, np.ndarray] = {} + for cam in self.camera_names: + cam_data = cameras_raw.get(cam) + img = cam_data.get("rgb") if cam_data else None + if img is None: + images[cam] = self._black_frame + continue + img = np.asarray(img, dtype=np.uint8) + if img.ndim == 2: + img = np.stack([img, img, img], axis=-1) + elif img.shape[-1] != 3: + img = img[..., :3] + images[cam] = img + + ja = raw.get("joint_action") or {} + vec = ja.get("vector") + if vec is not None: + arr = np.asarray(vec, dtype=np.float32).ravel() + joint_state = ( + arr[:ACTION_DIM] if arr.size >= ACTION_DIM else np.zeros(ACTION_DIM, dtype=np.float32) + ) + else: + joint_state = np.zeros(ACTION_DIM, dtype=np.float32) + + return {"pixels": images, "agent_pos": joint_state} + + def reset(self, seed: int | None = None, **kwargs) -> tuple[RobotObservation, dict]: + self._ensure_env() + super().reset(seed=seed) + assert self._env is not None # set by _ensure_env() above + + actual_seed = self.episode_index if seed is None else seed + setup_kwargs = _load_robotwin_setup_kwargs(self.task_name) + setup_kwargs.update(seed=actual_seed, is_test=True) + with torch.enable_grad(): + self._env.setup_demo(**setup_kwargs) + self.episode_index += self._reset_stride + self._step_count = 0 + + obs = self._get_obs() + return obs, {"is_success": False, "task": self.task_name} + + def step(self, action: np.ndarray) -> tuple[RobotObservation, float, bool, bool, dict[str, Any]]: + assert self._env is not None, "step() called before reset()" + if action.ndim != 1 or action.shape[0] != ACTION_DIM: + raise ValueError(f"Expected 1-D action of shape ({ACTION_DIM},), got {action.shape}") + + with torch.enable_grad(): + if hasattr(self._env, "take_action"): + self._env.take_action(action) + else: + self._env.step(action) + + self._step_count += 1 + + is_success = bool(getattr(self._env, "eval_success", False)) + if not is_success and hasattr(self._env, "check_success"): + is_success = bool(self._env.check_success()) + + obs = self._get_obs() + reward = float(is_success) + terminated = is_success + truncated = self._step_count >= self.episode_length + + info: dict[str, Any] = { + "task": self.task_name, + "is_success": is_success, + "step": self._step_count, + } + if terminated or truncated: + info["final_info"] = { + "task": self.task_name, + "is_success": is_success, + } + self.reset() + + return obs, reward, terminated, truncated, info + + def render(self) -> np.ndarray: + self._ensure_env() + obs = self._get_obs() + # Prefer head camera for rendering; fall back to first available. + if "head_camera" in obs["pixels"]: + return obs["pixels"]["head_camera"] + return next(iter(obs["pixels"].values())) + + def close(self) -> None: + if self._env is not None: + if hasattr(self._env, "close_env"): + import contextlib + + with contextlib.suppress(TypeError): + self._env.close_env() + self._env = None + + +# ---- Multi-task factory -------------------------------------------------------- + + +def _make_env_fns( + *, + task_name: str, + n_envs: int, + camera_names: list[str], + observation_height: int, + observation_width: int, + episode_length: int, +) -> list[Callable[[], RoboTwinEnv]]: + """Return n_envs factory callables for a single task.""" + + def _make_one(episode_index: int) -> RoboTwinEnv: + return RoboTwinEnv( + task_name=task_name, + episode_index=episode_index, + n_envs=n_envs, + camera_names=camera_names, + observation_height=observation_height, + observation_width=observation_width, + episode_length=episode_length, + ) + + return [partial(_make_one, i) for i in range(n_envs)] + + +def create_robotwin_envs( + task: str, + n_envs: int, + env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None, + camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES, + observation_height: int = DEFAULT_CAMERA_H, + observation_width: int = DEFAULT_CAMERA_W, + episode_length: int = DEFAULT_EPISODE_LENGTH, +) -> dict[str, dict[int, Any]]: + """Create vectorized RoboTwin 2.0 environments. + + Returns: + ``dict[task_name][0] -> VectorEnv`` — one entry per task, each wrapping + ``n_envs`` parallel rollouts. + + Args: + task: Comma-separated list of task names (e.g. ``"beat_block_hammer"`` + or ``"beat_block_hammer,click_bell"``). + n_envs: Number of parallel rollouts per task. + env_cls: Vector env constructor (e.g. ``gym.vector.AsyncVectorEnv``). + camera_names: Cameras to include in observations. + observation_height: Pixel height for all cameras. + observation_width: Pixel width for all cameras. + episode_length: Max steps before truncation. + """ + if env_cls is None or not callable(env_cls): + raise ValueError("env_cls must be callable (e.g. gym.vector.AsyncVectorEnv).") + if not isinstance(n_envs, int) or n_envs <= 0: + raise ValueError(f"n_envs must be a positive int; got {n_envs}.") + + task_names = [t.strip() for t in str(task).split(",") if t.strip()] + if not task_names: + raise ValueError("`task` must contain at least one RoboTwin task name.") + + unknown = [t for t in task_names if t not in ROBOTWIN_TASKS] + if unknown: + raise ValueError(f"Unknown RoboTwin tasks: {unknown}. Available tasks: {sorted(ROBOTWIN_TASKS)}") + + logger.info( + "Creating RoboTwin envs | tasks=%s | n_envs(per task)=%d", + task_names, + n_envs, + ) + + is_async = env_cls is gym.vector.AsyncVectorEnv + cached_obs_space: spaces.Space | None = None + cached_act_space: spaces.Space | None = None + cached_metadata: dict[str, Any] | None = None + + out: dict[str, dict[int, Any]] = defaultdict(dict) + for task_name in task_names: + fns = _make_env_fns( + task_name=task_name, + n_envs=n_envs, + camera_names=list(camera_names), + observation_height=observation_height, + observation_width=observation_width, + episode_length=episode_length, + ) + if is_async: + lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space, cached_metadata) + if cached_obs_space is None: + cached_obs_space = lazy.observation_space + cached_act_space = lazy.action_space + cached_metadata = lazy.metadata + out[task_name][0] = lazy + else: + out[task_name][0] = env_cls(fns) + logger.info("Built vec env | task=%s | n_envs=%d", task_name, n_envs) + + return {k: dict(v) for k, v in out.items()} diff --git a/tests/envs/test_robotwin.py b/tests/envs/test_robotwin.py new file mode 100644 index 000000000..fcd45adbf --- /dev/null +++ b/tests/envs/test_robotwin.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests for the RoboTwin 2.0 Gymnasium wrapper. + +These tests mock out the SAPIEN-based RoboTwin runtime (task modules + +YAML config loader) so they run without the full RoboTwin installation +(SAPIEN, CuRobo, mplib, asset downloads, etc.). +""" + +from __future__ import annotations + +from contextlib import contextmanager +from unittest.mock import MagicMock, patch + +import gymnasium as gym +import numpy as np +import pytest + +from lerobot.envs.robotwin import ( + ACTION_DIM, + ROBOTWIN_CAMERA_NAMES, + ROBOTWIN_TASKS, + RoboTwinEnv, + create_robotwin_envs, +) + +# --------------------------------------------------------------------------- +# Fixtures / helpers +# --------------------------------------------------------------------------- + + +def _make_mock_task_env( + height: int = 240, + width: int = 320, + cameras: tuple[str, ...] = ROBOTWIN_CAMERA_NAMES, +) -> MagicMock: + """Return a mock that mimics the RoboTwin task class API. + + RoboTwin's real get_obs returns + {"observation": {cam: {"rgb": img}}, "joint_action": {"vector": np.ndarray}, ...} + so the mock follows the same nested shape. + """ + obs_dict = { + "observation": {cam: {"rgb": np.zeros((height, width, 3), dtype=np.uint8)} for cam in cameras}, + "joint_action": {"vector": np.zeros(ACTION_DIM, dtype=np.float32)}, + "endpose": {}, + } + + mock = MagicMock() + mock.get_obs.return_value = obs_dict + mock.setup_demo.return_value = None + mock.take_action.return_value = None + mock.eval_success = False + mock.check_success.return_value = False + mock.close_env.return_value = None + return mock + + +@contextmanager +def _patch_runtime(mock_task_instance: MagicMock): + """Patch both the task-class loader and the YAML config loader so the + env can construct + reset without a real RoboTwin install.""" + task_cls = MagicMock(return_value=mock_task_instance) + fake_setup = { + "head_camera_h": 240, + "head_camera_w": 320, + "left_embodiment_config": {}, + "right_embodiment_config": {}, + "left_robot_file": "", + "right_robot_file": "", + "dual_arm_embodied": True, + "render_freq": 0, + "task_name": "beat_block_hammer", + "task_config": "demo_clean", + } + with ( + patch("lerobot.envs.robotwin._load_robotwin_task", return_value=task_cls), + patch("lerobot.envs.robotwin._load_robotwin_setup_kwargs", return_value=fake_setup), + ): + yield + + +# --------------------------------------------------------------------------- +# RoboTwinEnv unit tests +# --------------------------------------------------------------------------- + + +class TestRoboTwinEnv: + def test_observation_space_shape(self): + """observation_space should have the configured h×w×3 for every camera.""" + h, w = 240, 320 + env = RoboTwinEnv( + task_name="beat_block_hammer", + observation_height=h, + observation_width=w, + camera_names=["head_camera", "left_camera"], + ) + pixels_space = env.observation_space["pixels"] + assert pixels_space["head_camera"].shape == (h, w, 3) + assert pixels_space["left_camera"].shape == (h, w, 3) + assert "right_camera" not in pixels_space + + def test_action_space(self): + env = RoboTwinEnv(task_name="beat_block_hammer") + assert env.action_space.shape == (ACTION_DIM,) + assert env.action_space.dtype == np.float32 + + def test_reset_returns_correct_obs_keys(self): + mock_task = _make_mock_task_env() + env = RoboTwinEnv(task_name="beat_block_hammer") + with _patch_runtime(mock_task): + obs, info = env.reset() + + assert "pixels" in obs + for cam in ROBOTWIN_CAMERA_NAMES: + assert cam in obs["pixels"], f"Missing camera '{cam}' in obs" + assert "agent_pos" in obs + assert obs["agent_pos"].shape == (ACTION_DIM,) + assert info["is_success"] is False + + def test_reset_calls_setup_demo(self): + mock_task = _make_mock_task_env() + env = RoboTwinEnv(task_name="beat_block_hammer") + with _patch_runtime(mock_task): + env.reset(seed=42) + # setup_demo receives the full YAML-derived kwargs plus seed + is_test; + # we only assert the caller-provided bits. + assert mock_task.setup_demo.call_count == 1 + call_kwargs = mock_task.setup_demo.call_args.kwargs + assert call_kwargs["seed"] == 42 + assert call_kwargs["is_test"] is True + + def test_step_returns_correct_types(self): + mock_task = _make_mock_task_env() + env = RoboTwinEnv(task_name="beat_block_hammer") + action = np.zeros(ACTION_DIM, dtype=np.float32) + with _patch_runtime(mock_task): + env.reset() + obs, reward, terminated, truncated, info = env.step(action) + + assert isinstance(obs, dict) + assert isinstance(reward, float) + assert isinstance(terminated, bool) + assert isinstance(truncated, bool) + assert isinstance(info, dict) + + def test_step_wrong_action_shape_raises(self): + mock_task = _make_mock_task_env() + env = RoboTwinEnv(task_name="beat_block_hammer") + bad_action = np.zeros(7, dtype=np.float32) # wrong dim + with _patch_runtime(mock_task): + env.reset() + with pytest.raises(ValueError, match="Expected 1-D action"): + env.step(bad_action) + + def test_success_terminates_episode(self): + mock_task = _make_mock_task_env() + mock_task.check_success.return_value = True + env = RoboTwinEnv(task_name="beat_block_hammer") + action = np.zeros(ACTION_DIM, dtype=np.float32) + with _patch_runtime(mock_task): + env.reset() + _, _, terminated, _, info = env.step(action) + assert terminated is True + assert info["is_success"] is True + + def test_truncation_after_episode_length(self): + mock_task = _make_mock_task_env() + env = RoboTwinEnv(task_name="beat_block_hammer", episode_length=2) + action = np.zeros(ACTION_DIM, dtype=np.float32) + with _patch_runtime(mock_task): + env.reset() + env.step(action) # step 1 + _, _, _, truncated, _ = env.step(action) # step 2 → truncated + assert truncated is True + + def test_close_calls_close_env(self): + mock_task = _make_mock_task_env() + env = RoboTwinEnv(task_name="beat_block_hammer") + with _patch_runtime(mock_task): + env.reset() + env.close() + mock_task.close_env.assert_called_once() + + def test_black_frame_for_missing_camera(self): + """If a camera key is absent from get_obs(), a black frame is returned.""" + # Mock exposes only head_camera; we ask for both head_camera + left_camera. + mock_task = _make_mock_task_env(height=10, width=10, cameras=("head_camera",)) + env = RoboTwinEnv( + task_name="beat_block_hammer", + camera_names=["head_camera", "left_camera"], + observation_height=10, + observation_width=10, + ) + with _patch_runtime(mock_task): + obs, _ = env.reset() + assert obs["pixels"]["left_camera"].shape == (10, 10, 3) + assert obs["pixels"]["left_camera"].sum() == 0 + + def test_task_and_task_description_attributes(self): + env = RoboTwinEnv(task_name="beat_block_hammer") + assert env.task == "beat_block_hammer" + assert isinstance(env.task_description, str) + + def test_deferred_init_env_is_none_before_reset(self): + env = RoboTwinEnv(task_name="beat_block_hammer") + assert env._env is None # noqa: SLF001 (testing internal state) + + +# --------------------------------------------------------------------------- +# create_robotwin_envs tests +# --------------------------------------------------------------------------- + + +class TestCreateRoboTwinEnvs: + def test_returns_correct_structure(self): + mock_task = _make_mock_task_env() + with _patch_runtime(mock_task): + envs = create_robotwin_envs( + task="beat_block_hammer", + n_envs=1, + env_cls=gym.vector.SyncVectorEnv, + ) + assert "beat_block_hammer" in envs + assert 0 in envs["beat_block_hammer"] + assert isinstance(envs["beat_block_hammer"][0], gym.vector.SyncVectorEnv) + + def test_multi_task(self): + mock_task = _make_mock_task_env() + with _patch_runtime(mock_task): + envs = create_robotwin_envs( + task="beat_block_hammer,click_bell", + n_envs=1, + env_cls=gym.vector.SyncVectorEnv, + ) + assert set(envs.keys()) == {"beat_block_hammer", "click_bell"} + + def test_unknown_task_raises(self): + with pytest.raises(ValueError, match="Unknown RoboTwin tasks"): + create_robotwin_envs( + task="not_a_real_task", + n_envs=1, + env_cls=gym.vector.SyncVectorEnv, + ) + + def test_invalid_n_envs_raises(self): + with pytest.raises(ValueError, match="n_envs must be a positive int"): + create_robotwin_envs( + task="beat_block_hammer", + n_envs=0, + env_cls=gym.vector.SyncVectorEnv, + ) + + +# --------------------------------------------------------------------------- +# ROBOTWIN_TASKS list +# --------------------------------------------------------------------------- + + +def test_task_list_not_empty(): + assert len(ROBOTWIN_TASKS) >= 50 + + +def test_all_tasks_are_strings(): + assert all(isinstance(t, str) and t for t in ROBOTWIN_TASKS) + + +def test_no_duplicate_tasks(): + assert len(ROBOTWIN_TASKS) == len(set(ROBOTWIN_TASKS))