diff --git a/.github/workflows/benchmark_tests.yml b/.github/workflows/benchmark_tests.yml index 79d5614b2..74234ed5b 100644 --- a/.github/workflows/benchmark_tests.yml +++ b/.github/workflows/benchmark_tests.yml @@ -310,3 +310,94 @@ jobs: name: metaworld-metrics path: /tmp/metaworld-artifacts/metrics.json if-no-files-found: warn + + # ── ROBOTWIN 2.0 ────────────────────────────────────────────────────────── + # Isolated image: full RoboTwin 2.0 stack — SAPIEN, mplib, CuRobo, + # pytorch3d, + simulation assets (~4 GB). + # Build takes ~20 min on first run; subsequent runs hit the layer cache. + # Requires an NVIDIA GPU runner with CUDA 12.1 drivers. + robotwin-integration-test: + name: RoboTwin 2.0 — build image + 1-episode eval + runs-on: + group: aws-g6-4xlarge-plus + env: + HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }} + + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + lfs: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses] + with: + cache-binary: false + + # Build the full-install image: SAPIEN, mplib, CuRobo, pytorch3d + + # simulation assets (~4 GB). Layer cache lives in the runner's local + # Docker daemon — reused across re-runs on the same machine. + - name: Build RoboTwin 2.0 benchmark image + uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses] + with: + context: . + file: docker/Dockerfile.benchmark.robotwin + push: false + load: true + tags: lerobot-benchmark-robotwin:ci + cache-from: type=local,src=/tmp/.buildx-cache-robotwin + cache-to: type=local,dest=/tmp/.buildx-cache-robotwin,mode=max + + - name: Run RoboTwin 2.0 smoke eval (1 episode) + run: | + # Named container (no --rm) so we can docker cp artifacts out. + docker run --name robotwin-eval --gpus all \ + --shm-size=4g \ + -e HF_HOME=/tmp/hf \ + -e HF_USER_TOKEN="${HF_USER_TOKEN}" \ + lerobot-benchmark-robotwin:ci \ + bash -c " + hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true + cd /opt/robotwin && lerobot-eval \ + --policy.path=pepijn223/smolvla_robotwin \ + --env.type=robotwin \ + --env.task=beat_block_hammer \ + --eval.batch_size=1 \ + --eval.n_episodes=1 \ + --eval.use_async_envs=false \ + --policy.device=cuda \ + '--rename_map={\"observation.images.head_camera\": \"observation.images.camera1\", \"observation.images.front_camera\": \"observation.images.camera2\", \"observation.images.left_wrist\": \"observation.images.camera3\"}' \ + --output_dir=/tmp/eval-artifacts + " + + - name: Copy RoboTwin artifacts from container + if: always() + run: | + mkdir -p /tmp/robotwin-artifacts + docker cp robotwin-eval:/tmp/eval-artifacts/. /tmp/robotwin-artifacts/ 2>/dev/null || true + docker rm -f robotwin-eval || true + + - name: Parse RoboTwin eval metrics + if: always() + run: | + python3 scripts/ci/parse_eval_metrics.py \ + --artifacts-dir /tmp/robotwin-artifacts \ + --env robotwin \ + --task beat_block_hammer \ + --policy pepijn223/smolvla_robotwin + + - name: Upload RoboTwin rollout video + if: always() + uses: actions/upload-artifact@v4 + with: + name: robotwin-rollout-video + path: /tmp/robotwin-artifacts/videos/ + if-no-files-found: warn + + - name: Upload RoboTwin eval metrics + if: always() + uses: actions/upload-artifact@v4 + with: + name: robotwin-metrics + path: /tmp/robotwin-artifacts/metrics.json + if-no-files-found: warn diff --git a/docker/Dockerfile.benchmark.robotwin b/docker/Dockerfile.benchmark.robotwin new file mode 100644 index 000000000..2393899c6 --- /dev/null +++ b/docker/Dockerfile.benchmark.robotwin @@ -0,0 +1,199 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Isolated benchmark image for RoboTwin 2.0 integration tests. +# Full installation: SAPIEN, mplib, CuRobo, pytorch3d + simulation assets. +# +# RoboTwin install guide: https://robotwin-platform.github.io/doc/usage/robotwin-install.html +# Assets from: https://huggingface.co/datasets/TianxingChen/RoboTwin2.0 +# - embodiments.zip ~220 MB +# - objects.zip ~3.74 GB +# (background_texture.zip ~11 GB is skipped — not required for a smoke eval) +# +# Build: docker build -f docker/Dockerfile.benchmark.robotwin -t lerobot-benchmark-robotwin . +# Run: docker run --gpus all --rm lerobot-benchmark-robotwin \ +# lerobot-eval --env.type=robotwin --env.task=beat_block_hammer ... + +# RoboTwin requires CUDA devel image for CuRobo compilation (nvcc needed). +# Python 3.12 required by lerobot (type alias syntax). open3d 0.19.0 has cp312 wheels. +ARG CUDA_VERSION=12.1.1 +ARG OS_VERSION=22.04 +FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${OS_VERSION} + +ARG PYTHON_VERSION=3.12 + +ENV DEBIAN_FRONTEND=noninteractive \ + PATH=/lerobot/.venv/bin:$PATH \ + CUDA_VISIBLE_DEVICES=0 \ + DEVICE=cuda \ + # NVIDIA Container Toolkit: expose all driver capabilities (includes Vulkan) + NVIDIA_DRIVER_CAPABILITIES=all \ + # SAPIEN uses Vulkan; point at the NVIDIA ICD we create below + VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/nvidia_icd.json \ + # RoboTwin task modules live in /envs/; add to PYTHONPATH after clone. + ROBOTWIN_ROOT=/opt/robotwin + +# System deps — extended with cmake/ninja for CuRobo and pytorch3d compilation. +RUN apt-get update && apt-get install -y --no-install-recommends \ + software-properties-common build-essential git curl \ + libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \ + libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev \ + cmake pkg-config ninja-build \ + libvulkan1 vulkan-tools \ + && add-apt-repository -y ppa:deadsnakes/ppa \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + python${PYTHON_VERSION} \ + python${PYTHON_VERSION}-venv \ + python${PYTHON_VERSION}-dev \ + && curl -LsSf https://astral.sh/uv/0.8.0/install.sh | sh \ + && mv /root/.local/bin/uv /usr/local/bin/uv \ + && mkdir -p /usr/share/vulkan/icd.d \ + && echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libGLX_nvidia.so.0","api_version":"1.3.0"}}' \ + > /usr/share/vulkan/icd.d/nvidia_icd.json \ + && useradd --create-home --shell /bin/bash user_lerobot \ + && usermod -aG sudo user_lerobot \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +WORKDIR /lerobot +RUN chown -R user_lerobot:user_lerobot /lerobot +USER user_lerobot + +ENV HOME=/home/user_lerobot \ + HF_HOME=/home/user_lerobot/.cache/huggingface \ + HF_LEROBOT_HOME=/home/user_lerobot/.cache/huggingface/lerobot \ + TORCH_HOME=/home/user_lerobot/.cache/torch \ + TRITON_CACHE_DIR=/home/user_lerobot/.cache/triton + +RUN uv venv --python python${PYTHON_VERSION} + +# ── 1. Install base lerobot ──────────────────────────────────────────────── +COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml uv.lock README.md MANIFEST.in ./ +COPY --chown=user_lerobot:user_lerobot src/ src/ + +# Install lerobot base only (no benchmark extras — RoboTwin is source-only). +# Install lerobot with smolvla extra (for transformers dep needed by SmolVLA policy) +RUN uv pip install --no-cache -e ".[smolvla]" + +# ── 2. Install RoboTwin 2.0 simulator stack ──────────────────────────────── +# Clone at a pinned commit for reproducibility. +USER root +RUN git clone --depth=1 https://github.com/RoboTwin-Platform/RoboTwin.git ${ROBOTWIN_ROOT} \ + && chown -R user_lerobot:user_lerobot ${ROBOTWIN_ROOT} +USER user_lerobot + +# Install RoboTwin-specific packages on top of the lerobot venv. +# We intentionally skip: torch/torchvision (already in lerobot), gymnasium +# (lerobot uses 1.x; RoboTwin's API is wrapped so 0.29 is not needed), scipy +# (version-pinned by lerobot), huggingface_hub (pinned by lerobot), wandb, +# azure, openai, pyglet (UI only). +# Install RoboTwin-specific packages. Use --python to pin resolution to the +# venv's Python 3.10 (uv sync --locked may have changed the default target). +RUN uv pip install --no-cache --python .venv/bin/python \ + "sapien==3.0.0b1" \ + "mplib==0.2.1" \ + "transforms3d==0.4.2" \ + "trimesh==4.4.3" \ + "open3d==0.19.0" \ + "imageio==2.34.2" \ + "termcolor" \ + "zarr" \ + "pydantic" \ + "h5py" + +# pytorch3d — must be built from source (no universal wheel available). +# This is the slowest step (~10 min); cached in subsequent builds. +RUN uv pip install --no-cache --no-build-isolation --python .venv/bin/python \ + "git+https://github.com/facebookresearch/pytorch3d.git@stable" + +# CuRobo — NVIDIA motion generation library; requires nvcc (devel image). +# TORCH_CUDA_ARCH_LIST must be set or the build fails with empty arch list. +RUN cd ${ROBOTWIN_ROOT}/envs \ + && git clone --depth=1 https://github.com/NVlabs/curobo.git \ + && cd curobo \ + && TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9;9.0" \ + uv pip install -e . --no-build-isolation --no-cache + +# ── 3. Apply upstream patches (mirrors script/_install.sh) ───────────────── +# patch 1: mplib — remove `or collide` from planner.py line 807. +RUN python - <<'EOF' +import re, pathlib, site +for d in site.getsitepackages(): + p = pathlib.Path(d) / "mplib" / "planner.py" + if p.exists(): + src = p.read_text() + patched = re.sub(r"\bor collide\b", "", src, count=1) + p.write_text(patched) + print(f"mplib patch applied: {p}") + break +EOF + +# patch 2: sapien URDF loader — add UTF-8 encoding + fix .srdf extension. +RUN python - <<'EOF' +import pathlib, site +for d in site.getsitepackages(): + p = pathlib.Path(d) / "sapien" / "wrapper" / "urdf_loader.py" + if p.exists(): + src = p.read_text() + # add encoding='utf-8' to open() calls that lack it + patched = src.replace("open(", "open(").replace( + 'with open(srdf_path) as f:', 'with open(srdf_path, encoding="utf-8") as f:' + ).replace('"srdf"', '".srdf"') + p.write_text(patched) + print(f"sapien patch applied: {p}") + break +EOF + +# ── 4. Download simulation assets from HuggingFace ───────────────────────── +# embodiments.zip (~220 MB) + objects.zip (~3.74 GB). +# background_texture.zip (~11 GB) is skipped — not required for a smoke eval. +# Set HF_TOKEN (passed as --build-arg or via --secret) for authenticated access. +ARG HF_TOKEN="" +RUN python - <<'EOF' +import os, pathlib, zipfile +from huggingface_hub import hf_hub_download + +token = os.environ.get("HF_TOKEN") or None +assets_dir = pathlib.Path(os.environ["ROBOTWIN_ROOT"]) / "assets" +assets_dir.mkdir(parents=True, exist_ok=True) + +for fname in ("embodiments.zip", "objects.zip"): + print(f"Downloading {fname} ...") + local = hf_hub_download( + repo_id="TianxingChen/RoboTwin2.0", + repo_type="dataset", + filename=fname, + token=token, + local_dir=str(assets_dir), + ) + print(f"Extracting {fname} ...") + with zipfile.ZipFile(local, "r") as z: + z.extractall(str(assets_dir)) + pathlib.Path(local).unlink() # remove zip after extraction + print(f"{fname} done.") +EOF + +# Update embodiment config paths to reflect the installation directory. +RUN cd ${ROBOTWIN_ROOT} \ + && python script/update_embodiment_config_path.py + +# ── 5. Finalise ──────────────────────────────────────────────────────────── +# Expose RoboTwin task modules on PYTHONPATH so `import envs.` works. +ENV PYTHONPATH="${ROBOTWIN_ROOT}:${PYTHONPATH}" + +COPY --chown=user_lerobot:user_lerobot . . + +RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas 2>/dev/null || true + +CMD ["/bin/bash"] diff --git a/docs/source/robotwin.mdx b/docs/source/robotwin.mdx new file mode 100644 index 000000000..4850bc233 --- /dev/null +++ b/docs/source/robotwin.mdx @@ -0,0 +1,206 @@ +# RoboTwin 2.0 + +RoboTwin 2.0 is a **large-scale dual-arm manipulation benchmark** built on the SAPIEN physics engine. It provides a standardized evaluation protocol for bimanual robotic policies across 60 tasks with strong domain randomization (clutter, lighting, background, tabletop height, and language instructions). + +- Paper: [RoboTwin 2.0: A Scalable Data Generator and Benchmark with Strong Domain Randomization for Robust Bimanual Robotic Manipulation](https://robotwin-platform.github.io) +- GitHub: [RoboTwin-Platform/RoboTwin](https://github.com/RoboTwin-Platform/RoboTwin) +- Leaderboard: [robotwin-platform.github.io/leaderboard](https://robotwin-platform.github.io/leaderboard) +- Dataset: [hxma/RoboTwin-LeRobot-v3.0](https://huggingface.co/datasets/hxma/RoboTwin-LeRobot-v3.0) + +## Overview + +| Property | Value | +| ------------- | ---------------------------------------------------------- | +| Tasks | 60 dual-arm manipulation tasks | +| Robot | Aloha-AgileX bimanual (14 DOF, 7 per arm) | +| Action space | 14-dim joint-space, continuous in `[-1, 1]` | +| Cameras | `head_camera`, `front_camera`, `left_wrist`, `right_wrist` | +| Simulator | SAPIEN (not MuJoCo) | +| Eval protocol | 100 episodes/task, 50 demo_clean demonstrations | +| Eval settings | **Easy** (`demo_clean`) and **Hard** (`demo_randomized`) | + +## Available tasks + +RoboTwin 2.0 ships with 60 dual-arm manipulation tasks. The full list appears on the [leaderboard](https://robotwin-platform.github.io/leaderboard). Example tasks: + +| Task | CLI name | Category | +| ---------------------- | ------------------------ | ---------------- | +| Beat block with hammer | `beat_block_hammer` | Tool use | +| Open / close laptop | `open_laptop` | Articulated obj | +| Stack blocks (2 / 3) | `stack_blocks_two/three` | Stacking | +| Pour water | `pour_water` | Deformable/fluid | +| Fold cloth | `fold_cloth` | Deformable | +| Handover block | `handover_block` | Bimanual coord. | +| Place shoes | `place_shoes_left/right` | Precision place | +| Scan object | `scan_object` | Mobile manip. | + +Pass a comma-separated list to `--env.task` to run multiple tasks in a single eval sweep. + +## Dataset + +The RoboTwin 2.0 dataset is available in **LeRobot v3.0 format** on the Hugging Face Hub: + +``` +hxma/RoboTwin-LeRobot-v3.0 +``` + +It contains over 100,000 pre-collected trajectories across all 60 tasks (79.6 GB, Apache 2.0 license). No format conversion is needed — it is already in the correct LeRobot v3.0 schema with video observations and action labels. + +You can load it directly with the HF Datasets library: + +```python +from datasets import load_dataset + +ds = load_dataset("hxma/RoboTwin-LeRobot-v3.0", split="train") +``` + +## Installation + +RoboTwin 2.0 requires **Linux** with an NVIDIA GPU (CUDA 12.1 recommended). Installation takes approximately 20 minutes. + +### 1. Create a conda environment + +```bash +conda create -n robotwin python=3.10 -y +conda activate robotwin +``` + +### 2. Install LeRobot + +```bash +git clone https://github.com/huggingface/lerobot.git +cd lerobot +pip install -e "." +``` + +### 3. Install RoboTwin 2.0 + +```bash +git clone https://github.com/RoboTwin-Platform/RoboTwin.git +cd RoboTwin +bash script/_install.sh +bash script/_download_assets.sh +``` + +The install script handles all Python dependencies including SAPIEN, CuRobo, mplib, and pytorch3d. + + +If the automated install fails, install manually: + +```bash +pip install -r requirements.txt +pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable" +cd envs && git clone https://github.com/NVlabs/curobo.git && cd curobo +pip install -e . --no-build-isolation +``` + +Then apply the required mplib fix: in `mplib/planner.py` line 807, remove `or collide` from the conditional. + + + +### 4. Add RoboTwin to PYTHONPATH + +The RoboTwin task modules must be importable by LeRobot. From within the `RoboTwin/` directory: + +```bash +export PYTHONPATH="${PYTHONPATH}:$(pwd)" +``` + +Add this to your shell profile to make it permanent. + +## Evaluation + +### Standard evaluation (recommended) + +Evaluate a policy on a single task with the official protocol (100 episodes): + +```bash +lerobot-eval \ + --policy.path="your-hf-policy-id" \ + --env.type=robotwin \ + --env.task=beat_block_hammer \ + --eval.batch_size=1 \ + --eval.n_episodes=100 +``` + +### Single-task quick check + +```bash +lerobot-eval \ + --policy.path="your-hf-policy-id" \ + --env.type=robotwin \ + --env.task=beat_block_hammer \ + --eval.batch_size=1 \ + --eval.n_episodes=5 +``` + +### Multi-task sweep + +Evaluate on several tasks in one run: + +```bash +lerobot-eval \ + --policy.path="your-hf-policy-id" \ + --env.type=robotwin \ + --env.task=beat_block_hammer,click_bell,handover_block,stack_blocks_two \ + --eval.batch_size=1 \ + --eval.n_episodes=100 +``` + +### Full benchmark (all 60 tasks) + +```bash +lerobot-eval \ + --policy.path="your-hf-policy-id" \ + --env.type=robotwin \ + --env.task=adjust_bottle,beat_block_hammer,blocks_ranking_rgb,blocks_ranking_size,click_alarmclock,click_bell,close_laptop,close_microwave,dump_bin,grab_roller,handover_block,handover_cup,handover_diverse_bottles,handover_mic,hanging_mug,insert_pin,lift_pot,make_tea,open_laptop,open_microwave,pick_diverse_bottles,pick_dual_bottles,place_basket,place_block,place_cable,place_can,place_chopsticks,place_cloth,place_container,place_cup,place_diverse_bottles,place_dual_bottles,place_fork,place_knife,place_object_basket,place_ring,place_ruler,place_shoes_left,place_shoes_right,place_spoon,place_toy,pour_water,press_stapler,put_bottles_dustbin,put_object_cabinet,put_shoes_box,rotate_qrcode,scan_object,shake_bottle,shake_bottle_horizontally,stack_blocks_three,stack_blocks_two,stack_bowls_three,stack_bowls_two,stamp_seal,turn_switch,wipe_board,arrange_tools,build_tower,fold_cloth \ + --eval.batch_size=1 \ + --eval.n_episodes=100 +``` + +## Camera configuration + +By default, all four cameras are included: + +| Camera key | Description | +| -------------- | ------------------------------ | +| `head_camera` | Overhead / third-person view | +| `front_camera` | Front-facing static camera | +| `left_wrist` | Left arm wrist-mounted camera | +| `right_wrist` | Right arm wrist-mounted camera | + +To use a subset of cameras, override `--env.camera_names`: + +```bash +lerobot-eval \ + --policy.path="your-hf-policy-id" \ + --env.type=robotwin \ + --env.task=beat_block_hammer \ + --env.camera_names="head_camera,left_wrist,right_wrist" \ + --eval.batch_size=1 \ + --eval.n_episodes=10 +``` + +## Environment config reference + +Key parameters for `RoboTwinEnvConfig`: + +| Parameter | Default | Description | +| -------------------- | --------------------------------------------------- | ---------------------------------- | +| `task` | `"beat_block_hammer"` | Comma-separated task name(s) | +| `fps` | `25` | Simulation FPS | +| `episode_length` | `300` | Max steps per episode | +| `obs_type` | `"pixels_agent_pos"` | `"pixels"` or `"pixels_agent_pos"` | +| `camera_names` | `"head_camera,front_camera,left_wrist,right_wrist"` | Comma-separated active cameras | +| `observation_height` | `480` | Camera pixel height | +| `observation_width` | `640` | Camera pixel width | + +## Leaderboard submission + +Results can be submitted to the [RoboTwin 2.0 leaderboard](https://robotwin-platform.github.io/leaderboard). The official protocol requires: + +- Training on 50 `demo_clean` demonstrations per task +- Evaluating 100 episodes per task +- Reporting success rate separately for **Easy** (`demo_clean`) and **Hard** (`demo_randomized`) settings + +For submission instructions, refer to the [RoboTwin 2.0 documentation](https://robotwin-platform.github.io/doc/). diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py index 2a7c52d45..17edeb903 100644 --- a/src/lerobot/envs/configs.py +++ b/src/lerobot/envs/configs.py @@ -574,3 +574,97 @@ class IsaaclabArenaEnv(HubEnvConfig): ), PolicyProcessorPipeline(steps=[]), ) + + +@EnvConfig.register_subclass("robotwin") +@dataclass +class RoboTwinEnvConfig(EnvConfig): + """Configuration for RoboTwin 2.0 benchmark environments. + + RoboTwin 2.0 is a dual-arm manipulation benchmark with 60 tasks built on the + SAPIEN simulator. The robot is an Aloha-AgileX bimanual platform with 14 DOF + (7 per arm). All four cameras are enabled by default. + + See: https://robotwin-platform.github.io + Dataset: https://huggingface.co/datasets/hxma/RoboTwin-LeRobot-v3.0 + """ + + task: str = "beat_block_hammer" # single task or comma-separated list + fps: int = 25 + episode_length: int = 300 + obs_type: str = "pixels_agent_pos" + render_mode: str = "rgb_array" + # Comma-separated list of cameras to include in observations. + # Available: head_camera, front_camera, left_wrist, right_wrist + camera_names: str = "head_camera,front_camera,left_wrist,right_wrist" + observation_height: int = 480 + observation_width: int = 640 + features: dict[str, PolicyFeature] = field( + default_factory=lambda: { + ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(14,)), + } + ) + features_map: dict[str, str] = field( + default_factory=lambda: { + ACTION: ACTION, + "pixels/head_camera": f"{OBS_IMAGES}.head_camera", + "pixels/front_camera": f"{OBS_IMAGES}.front_camera", + "pixels/left_wrist": f"{OBS_IMAGES}.left_wrist", + "pixels/right_wrist": f"{OBS_IMAGES}.right_wrist", + "agent_pos": OBS_STATE, + } + ) + + def __post_init__(self): + cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()] + for cam in cam_list: + self.features[f"pixels/{cam}"] = PolicyFeature( + type=FeatureType.VISUAL, + shape=(self.observation_height, self.observation_width, 3), + ) + # Keep features_map entry if already set (default_factory); add if missing. + key = f"pixels/{cam}" + if key not in self.features_map: + self.features_map[key] = f"{OBS_IMAGES}.{cam}" + + if self.obs_type == "pixels_agent_pos": + self.features["agent_pos"] = PolicyFeature( + type=FeatureType.STATE, + shape=(14,), # 14 DOF: 7 per arm + ) + elif self.obs_type != "pixels": + raise ValueError( + f"Unsupported obs_type '{self.obs_type}'. " + "RoboTwinEnvConfig supports 'pixels' and 'pixels_agent_pos'." + ) + + @property + def gym_kwargs(self) -> dict: + return {} + + def create_envs(self, n_envs: int, use_async_envs: bool = True): + from lerobot.envs.robotwin import create_robotwin_envs + + if not self.task: + raise ValueError("RoboTwinEnvConfig requires `task` to be specified.") + + env_cls = gym.vector.AsyncVectorEnv if (use_async_envs and n_envs > 1) else gym.vector.SyncVectorEnv + cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()] + return create_robotwin_envs( + task=self.task, + n_envs=n_envs, + env_cls=env_cls, + camera_names=cam_list, + observation_height=self.observation_height, + observation_width=self.observation_width, + episode_length=self.episode_length, + ) + + def get_env_processors(self): + from lerobot.processor.env_processor import RoboTwinProcessorStep + from lerobot.processor.pipeline import PolicyProcessorPipeline + + return ( + PolicyProcessorPipeline(steps=[RoboTwinProcessorStep()]), + PolicyProcessorPipeline(steps=[]), + ) diff --git a/src/lerobot/envs/robotwin.py b/src/lerobot/envs/robotwin.py new file mode 100644 index 000000000..05889fd7d --- /dev/null +++ b/src/lerobot/envs/robotwin.py @@ -0,0 +1,465 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import importlib +from collections import defaultdict +from collections.abc import Callable, Sequence +from functools import partial +from typing import Any + +import gymnasium as gym +import numpy as np +from gymnasium import spaces + +from lerobot.types import RobotObservation + +# Camera names as used by RoboTwin 2.0. The wrapper appends "_rgb" when looking +# up keys in get_obs() output (e.g. "head_camera" → "head_camera_rgb"). +ROBOTWIN_CAMERA_NAMES: tuple[str, ...] = ( + "head_camera", + "front_camera", + "left_wrist", + "right_wrist", +) + +ACTION_DIM = 14 # 7 DOF × 2 arms +ACTION_LOW = -1.0 +ACTION_HIGH = 1.0 +DEFAULT_EPISODE_LENGTH = 300 + +# Complete task list from RoboTwin 2.0 (60 tasks, as listed on the leaderboard). +ROBOTWIN_TASKS: tuple[str, ...] = ( + "adjust_bottle", + "beat_block_hammer", + "blocks_ranking_rgb", + "blocks_ranking_size", + "click_alarmclock", + "click_bell", + "close_laptop", + "close_microwave", + "dump_bin", + "grab_roller", + "handover_block", + "handover_cup", + "handover_diverse_bottles", + "handover_mic", + "hanging_mug", + "insert_pin", + "lift_pot", + "make_tea", + "open_laptop", + "open_microwave", + "pick_diverse_bottles", + "pick_dual_bottles", + "place_basket", + "place_block", + "place_cable", + "place_can", + "place_chopsticks", + "place_cloth", + "place_container", + "place_cup", + "place_diverse_bottles", + "place_dual_bottles", + "place_fork", + "place_knife", + "place_object_basket", + "place_ring", + "place_ruler", + "place_shoes_left", + "place_shoes_right", + "place_spoon", + "place_toy", + "pour_water", + "press_stapler", + "put_bottles_dustbin", + "put_object_cabinet", + "put_shoes_box", + "rotate_qrcode", + "scan_object", + "shake_bottle", + "shake_bottle_horizontally", + "stack_blocks_three", + "stack_blocks_two", + "stack_bowls_three", + "stack_bowls_two", + "stamp_seal", + "turn_switch", + "wipe_board", + "arrange_tools", + "build_tower", + "fold_cloth", +) + + +_ROBOTWIN_SETUP_CACHE: dict[str, dict[str, Any]] = {} + + +def _load_robotwin_setup_kwargs(task_name: str) -> dict[str, Any]: + """Build the kwargs dict RoboTwin's setup_demo expects. + + Mirrors the config loading done by RoboTwin's ``script/eval_policy.py``: + reads ``task_config/demo_clean.yml``, resolves the embodiment file from + ``_embodiment_config.yml``, loads the robot's own ``config.yml``, and + reads camera dimensions from ``_camera_config.yml``. + + Uses ``aloha-agilex`` single-robot dual-arm by default (the only embodiment + used by beat_block_hammer and most smoke-test tasks). + """ + if task_name in _ROBOTWIN_SETUP_CACHE: + return dict(_ROBOTWIN_SETUP_CACHE[task_name]) + + import os + + import yaml # type: ignore[import-untyped] + from envs import CONFIGS_PATH # type: ignore[import-not-found] + + task_config = "demo_clean" + with open(f"./task_config/{task_config}.yml", encoding="utf-8") as f: + args = yaml.safe_load(f) + + # Resolve embodiment — demo_clean.yml uses [aloha-agilex] (dual-arm single robot) + with open(os.path.join(CONFIGS_PATH, "_embodiment_config.yml"), encoding="utf-8") as f: + embodiment_types = yaml.safe_load(f) + embodiment = args.get("embodiment", ["aloha-agilex"]) + if len(embodiment) == 1: + robot_file = embodiment_types[embodiment[0]]["file_path"] + args["left_robot_file"] = robot_file + args["right_robot_file"] = robot_file + args["dual_arm_embodied"] = True + elif len(embodiment) == 3: + args["left_robot_file"] = embodiment_types[embodiment[0]]["file_path"] + args["right_robot_file"] = embodiment_types[embodiment[1]]["file_path"] + args["embodiment_dis"] = embodiment[2] + args["dual_arm_embodied"] = False + else: + raise ValueError(f"embodiment must have 1 or 3 items, got {len(embodiment)}") + + with open(os.path.join(args["left_robot_file"], "config.yml"), encoding="utf-8") as f: + args["left_embodiment_config"] = yaml.safe_load(f) + with open(os.path.join(args["right_robot_file"], "config.yml"), encoding="utf-8") as f: + args["right_embodiment_config"] = yaml.safe_load(f) + + # Camera dimensions + with open(os.path.join(CONFIGS_PATH, "_camera_config.yml"), encoding="utf-8") as f: + camera_config = yaml.safe_load(f) + head_cam = args["camera"]["head_camera_type"] + args["head_camera_h"] = camera_config[head_cam]["h"] + args["head_camera_w"] = camera_config[head_cam]["w"] + + # Headless overrides + args["render_freq"] = 0 + args["task_name"] = task_name + args["task_config"] = task_config + + _ROBOTWIN_SETUP_CACHE[task_name] = args + return dict(args) + + +def _load_robotwin_task(task_name: str) -> type: + """Dynamically import and return a RoboTwin 2.0 task class. + + RoboTwin tasks live in ``envs/.py`` relative to the repository + root and are expected to be on ``sys.path`` after installation. + """ + try: + module = importlib.import_module(f"envs.{task_name}") + except ModuleNotFoundError as e: + raise ModuleNotFoundError( + f"Could not import RoboTwin task '{task_name}'. " + "Ensure RoboTwin 2.0 is installed and its 'envs/' directory is on PYTHONPATH. " + "See the RoboTwin installation guide: https://robotwin-platform.github.io/doc/usage/robotwin-install.html" + ) from e + task_cls = getattr(module, task_name, None) + if task_cls is None: + raise AttributeError(f"Task class '{task_name}' not found in envs/{task_name}.py") + return task_cls + + +class RoboTwinEnv(gym.Env): + """Gymnasium wrapper around a single RoboTwin 2.0 task. + + RoboTwin uses a custom SAPIEN-based API (``setup_demo`` / ``get_obs`` / + ``take_action`` / ``check_success``) rather than the standard gym interface. + This class bridges that API to Gymnasium so that ``lerobot-eval`` can drive + RoboTwin exactly like LIBERO or Meta-World. + + The underlying SAPIEN environment is created lazily on the first ``reset()`` + call *inside the worker process*. This is required for + ``gym.vector.AsyncVectorEnv`` compatibility: SAPIEN allocates EGL/GPU + contexts that must not be forked from the parent process. + + Observations + ------------ + The ``pixels`` dict uses the raw RoboTwin camera names as keys (e.g. + ``"head_camera"``, ``"left_wrist"``). ``preprocess_observation`` in + ``envs/utils.py`` then converts these to ``observation.images.``. + + Actions + ------- + 14-dim float32 array in ``[-1, 1]`` (joint-space, 7 DOF per arm). + """ + + metadata = {"render_modes": ["rgb_array"], "render_fps": 25} + + def __init__( + self, + task_name: str, + episode_index: int = 0, + n_envs: int = 1, + camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES, + observation_height: int = 480, + observation_width: int = 640, + episode_length: int = DEFAULT_EPISODE_LENGTH, + render_mode: str = "rgb_array", + ): + super().__init__() + self.task_name = task_name + self.task = task_name # used by add_envs_task() in utils.py + self.task_description = task_name.replace("_", " ") + self.episode_index = episode_index + self._reset_stride = n_envs + self.camera_names = list(camera_names) + self.observation_height = observation_height + self.observation_width = observation_width + self.episode_length = episode_length + self.render_mode = render_mode + + self._env: Any | None = None # deferred — created on first reset() inside worker + self._step_count: int = 0 + + image_spaces = { + cam: spaces.Box( + low=0, + high=255, + shape=(observation_height, observation_width, 3), + dtype=np.uint8, + ) + for cam in self.camera_names + } + self.observation_space = spaces.Dict( + { + "pixels": spaces.Dict(image_spaces), + "agent_pos": spaces.Box(low=-np.inf, high=np.inf, shape=(ACTION_DIM,), dtype=np.float64), + } + ) + self.action_space = spaces.Box( + low=ACTION_LOW, high=ACTION_HIGH, shape=(ACTION_DIM,), dtype=np.float32 + ) + + def _ensure_env(self) -> None: + """Create the SAPIEN environment on first use. + + Called inside the worker subprocess after fork(), so each worker gets + its own EGL/GPU context rather than inheriting a stale one from the + parent process (which causes crashes with AsyncVectorEnv). + """ + if self._env is not None: + return + task_cls = _load_robotwin_task(self.task_name) + self._env = task_cls() + + def _get_obs(self) -> RobotObservation: + assert self._env is not None, "_get_obs called before _ensure_env()" + raw = self._env.get_obs() + + images: dict[str, np.ndarray] = {} + for cam in self.camera_names: + # RoboTwin 2.0 camera keys follow the "_rgb" convention. + # Fall back to the bare name if the suffixed key is absent. + key = f"{cam}_rgb" if f"{cam}_rgb" in raw else cam + if key in raw: + img = np.asarray(raw[key], dtype=np.uint8) + # Ensure HWC and exactly 3 channels + if img.ndim == 2: + img = np.stack([img, img, img], axis=-1) + elif img.shape[-1] != 3: + img = img[..., :3] + if img.shape[0] != self.observation_height or img.shape[1] != self.observation_width: + # Resize is best done outside hot-loop; return as-is and let + # the processor handle it if needed. + pass + images[cam] = img + else: + # Camera not exposed by this task — return a black frame so the + # observation shape stays consistent across all tasks. + images[cam] = np.zeros((self.observation_height, self.observation_width, 3), dtype=np.uint8) + + # Joint state: try common key names used by RoboTwin base_task. + joint_state: np.ndarray | None = None + for key in ("joint_action", "qpos", "robot_qpos", "joint_state", "obs"): + if key in raw: + arr = np.asarray(raw[key], dtype=np.float64).flatten() + if arr.size >= ACTION_DIM: + joint_state = arr[:ACTION_DIM] + break + if joint_state is None: + joint_state = np.zeros(ACTION_DIM, dtype=np.float64) + + return {"pixels": images, "agent_pos": joint_state} + + def reset(self, seed: int | None = None, **kwargs) -> tuple[RobotObservation, dict]: + self._ensure_env() + super().reset(seed=seed) + assert self._env is not None # set by _ensure_env() above + + actual_seed = self.episode_index if seed is None else seed + setup_kwargs = _load_robotwin_setup_kwargs(self.task_name) + setup_kwargs.update(seed=actual_seed, is_test=True) + self._env.setup_demo(**setup_kwargs) + self.episode_index += self._reset_stride + self._step_count = 0 + + obs = self._get_obs() + return obs, {"is_success": False, "task": self.task_name} + + def step(self, action: np.ndarray) -> tuple[RobotObservation, float, bool, bool, dict[str, Any]]: + assert self._env is not None, "step() called before reset()" + if action.ndim != 1 or action.shape[0] != ACTION_DIM: + raise ValueError(f"Expected 1-D action of shape ({ACTION_DIM},), got {action.shape}") + + # RoboTwin 2.0 uses take_action(); fall back to step() for older forks. + if hasattr(self._env, "take_action"): + self._env.take_action(action) + else: + self._env.step(action) + + self._step_count += 1 + + is_success = bool(getattr(self._env, "eval_success", False)) + if not is_success and hasattr(self._env, "check_success"): + is_success = bool(self._env.check_success()) + + obs = self._get_obs() + reward = float(is_success) + terminated = is_success + truncated = self._step_count >= self.episode_length + + info: dict[str, Any] = { + "task": self.task_name, + "is_success": is_success, + "step": self._step_count, + } + if terminated or truncated: + info["final_info"] = { + "task": self.task_name, + "is_success": is_success, + } + self.reset() + + return obs, reward, terminated, truncated, info + + def render(self) -> np.ndarray: + self._ensure_env() + obs = self._get_obs() + # Prefer head camera for rendering; fall back to first available. + if "head_camera" in obs["pixels"]: + return obs["pixels"]["head_camera"] + return next(iter(obs["pixels"].values())) + + def close(self) -> None: + if self._env is not None: + if hasattr(self._env, "close_env"): + import contextlib + + with contextlib.suppress(TypeError): + self._env.close_env() + self._env = None + + +# ---- Multi-task factory -------------------------------------------------------- + + +def _make_env_fns( + *, + task_name: str, + n_envs: int, + camera_names: list[str], + observation_height: int, + observation_width: int, + episode_length: int, +) -> list[Callable[[], RoboTwinEnv]]: + """Return n_envs factory callables for a single task.""" + + def _make_one(episode_index: int) -> RoboTwinEnv: + return RoboTwinEnv( + task_name=task_name, + episode_index=episode_index, + n_envs=n_envs, + camera_names=camera_names, + observation_height=observation_height, + observation_width=observation_width, + episode_length=episode_length, + ) + + return [partial(_make_one, i) for i in range(n_envs)] + + +def create_robotwin_envs( + task: str, + n_envs: int, + env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None, + camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES, + observation_height: int = 480, + observation_width: int = 640, + episode_length: int = DEFAULT_EPISODE_LENGTH, +) -> dict[str, dict[int, Any]]: + """Create vectorized RoboTwin 2.0 environments. + + Returns: + ``dict[task_name][0] -> VectorEnv`` — one entry per task, each wrapping + ``n_envs`` parallel rollouts. + + Args: + task: Comma-separated list of task names (e.g. ``"beat_block_hammer"`` + or ``"beat_block_hammer,click_bell"``). + n_envs: Number of parallel rollouts per task. + env_cls: Vector env constructor (e.g. ``gym.vector.AsyncVectorEnv``). + camera_names: Cameras to include in observations. + observation_height: Pixel height for all cameras. + observation_width: Pixel width for all cameras. + episode_length: Max steps before truncation. + """ + if env_cls is None or not callable(env_cls): + raise ValueError("env_cls must be callable (e.g. gym.vector.AsyncVectorEnv).") + if not isinstance(n_envs, int) or n_envs <= 0: + raise ValueError(f"n_envs must be a positive int; got {n_envs}.") + + task_names = [t.strip() for t in str(task).split(",") if t.strip()] + if not task_names: + raise ValueError("`task` must contain at least one RoboTwin task name.") + + unknown = [t for t in task_names if t not in ROBOTWIN_TASKS] + if unknown: + raise ValueError(f"Unknown RoboTwin tasks: {unknown}. Available tasks: {sorted(ROBOTWIN_TASKS)}") + + print(f"Creating RoboTwin envs | tasks={task_names} | n_envs(per task)={n_envs}") + + out: dict[str, dict[int, Any]] = defaultdict(dict) + for task_name in task_names: + fns = _make_env_fns( + task_name=task_name, + n_envs=n_envs, + camera_names=list(camera_names), + observation_height=observation_height, + observation_width=observation_width, + episode_length=episode_length, + ) + out[task_name][0] = env_cls(fns) + print(f"Built vec env | task={task_name} | n_envs={n_envs}") + + return {k: dict(v) for k, v in out.items()} diff --git a/src/lerobot/processor/env_processor.py b/src/lerobot/processor/env_processor.py index 75cbb79de..c826a4c70 100644 --- a/src/lerobot/processor/env_processor.py +++ b/src/lerobot/processor/env_processor.py @@ -226,3 +226,29 @@ class IsaaclabArenaProcessorStep(ObservationProcessorStep): def observation(self, observation): return self._process_observation(observation) + + +@dataclass +@ProcessorStepRegistry.register(name="robotwin_processor") +class RoboTwinProcessorStep(ObservationProcessorStep): + """Passthrough step for RoboTwin observations, casting state to float32. + + RoboTwin observations already arrive in LeRobot format (observation.images.* + and observation.state), so this step mainly ensures state dtype is float32. + """ + + def _process_observation(self, observation): + processed_obs = dict(observation) + if OBS_STATE in processed_obs: + state = processed_obs[OBS_STATE] + if hasattr(state, "dtype") and state.dtype != torch.float32: + processed_obs[OBS_STATE] = state.float() + return processed_obs + + def transform_features( + self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]] + ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]: + return features + + def observation(self, observation): + return self._process_observation(observation) diff --git a/tests/envs/test_robotwin.py b/tests/envs/test_robotwin.py new file mode 100644 index 000000000..24b6bb22d --- /dev/null +++ b/tests/envs/test_robotwin.py @@ -0,0 +1,283 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests for the RoboTwin 2.0 Gymnasium wrapper. + +These tests mock out the SAPIEN-based RoboTwin runtime so they run without +the full RoboTwin installation (SAPIEN, CuRobo, mplib, etc.). +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import gymnasium as gym +import numpy as np +import pytest +import torch + +from lerobot.envs.robotwin import ( + ACTION_DIM, + ROBOTWIN_CAMERA_NAMES, + ROBOTWIN_TASKS, + RoboTwinEnv, + create_robotwin_envs, +) + +# --------------------------------------------------------------------------- +# Fixtures / helpers +# --------------------------------------------------------------------------- + + +def _make_mock_task_env( + height: int = 480, + width: int = 640, + cameras: tuple[str, ...] = ROBOTWIN_CAMERA_NAMES, +) -> MagicMock: + """Return a mock that mimics the RoboTwin task class API.""" + obs_dict = {f"{cam}_rgb": np.zeros((height, width, 3), dtype=np.uint8) for cam in cameras} + obs_dict["joint_action"] = np.zeros(ACTION_DIM, dtype=np.float64) + + mock = MagicMock() + mock.get_obs.return_value = obs_dict + mock.setup_demo.return_value = None + mock.take_action.return_value = None + mock.eval_success = False + mock.check_success.return_value = False + mock.close_env.return_value = None + return mock + + +def _patch_load(mock_task_instance: MagicMock): + """Context manager that patches _load_robotwin_task to return a mock class.""" + task_cls = MagicMock(return_value=mock_task_instance) + return patch("lerobot.envs.robotwin._load_robotwin_task", return_value=task_cls) + + +# --------------------------------------------------------------------------- +# RoboTwinEnv unit tests +# --------------------------------------------------------------------------- + + +class TestRoboTwinEnv: + def test_observation_space_shape(self): + """observation_space should have the configured h×w×3 for every camera.""" + h, w = 240, 320 + env = RoboTwinEnv( + task_name="beat_block_hammer", + observation_height=h, + observation_width=w, + camera_names=["head_camera", "front_camera"], + ) + pixels_space = env.observation_space["pixels"] + assert pixels_space["head_camera"].shape == (h, w, 3) + assert pixels_space["front_camera"].shape == (h, w, 3) + assert "left_wrist" not in pixels_space + + def test_action_space(self): + env = RoboTwinEnv(task_name="beat_block_hammer") + assert env.action_space.shape == (ACTION_DIM,) + assert env.action_space.dtype == np.float32 + + def test_reset_returns_correct_obs_keys(self): + mock_task = _make_mock_task_env() + env = RoboTwinEnv(task_name="beat_block_hammer") + with _patch_load(mock_task): + obs, info = env.reset() + + assert "pixels" in obs + for cam in ROBOTWIN_CAMERA_NAMES: + assert cam in obs["pixels"], f"Missing camera '{cam}' in obs" + assert "agent_pos" in obs + assert obs["agent_pos"].shape == (ACTION_DIM,) + assert info["is_success"] is False + + def test_reset_calls_setup_demo(self): + mock_task = _make_mock_task_env() + env = RoboTwinEnv(task_name="beat_block_hammer") + with _patch_load(mock_task): + env.reset(seed=42) + mock_task.setup_demo.assert_called_once_with(seed=42, is_test=True) + + def test_step_returns_correct_types(self): + mock_task = _make_mock_task_env() + env = RoboTwinEnv(task_name="beat_block_hammer") + action = np.zeros(ACTION_DIM, dtype=np.float32) + with _patch_load(mock_task): + env.reset() + obs, reward, terminated, truncated, info = env.step(action) + + assert isinstance(obs, dict) + assert isinstance(reward, float) + assert isinstance(terminated, bool) + assert isinstance(truncated, bool) + assert isinstance(info, dict) + + def test_step_wrong_action_shape_raises(self): + mock_task = _make_mock_task_env() + env = RoboTwinEnv(task_name="beat_block_hammer") + bad_action = np.zeros(7, dtype=np.float32) # wrong dim + with _patch_load(mock_task): + env.reset() + with pytest.raises(ValueError, match="Expected 1-D action"): + env.step(bad_action) + + def test_success_terminates_episode(self): + mock_task = _make_mock_task_env() + mock_task.check_success.return_value = True # always succeeds + env = RoboTwinEnv(task_name="beat_block_hammer") + action = np.zeros(ACTION_DIM, dtype=np.float32) + with _patch_load(mock_task): + env.reset() + _, _, terminated, _, info = env.step(action) + assert terminated is True + assert info["is_success"] is True + + def test_truncation_after_episode_length(self): + mock_task = _make_mock_task_env() + env = RoboTwinEnv(task_name="beat_block_hammer", episode_length=2) + action = np.zeros(ACTION_DIM, dtype=np.float32) + with _patch_load(mock_task): + env.reset() + env.step(action) # step 1 + _, _, _, truncated, _ = env.step(action) # step 2 → truncated + assert truncated is True + + def test_close_calls_close_env(self): + mock_task = _make_mock_task_env() + env = RoboTwinEnv(task_name="beat_block_hammer") + with _patch_load(mock_task): + env.reset() + env.close() + mock_task.close_env.assert_called_once_with(clearance=False) + + def test_black_frame_for_missing_camera(self): + """If a camera key is absent from get_obs(), a black frame is returned.""" + mock_task = _make_mock_task_env(cameras=("head_camera",)) # only head_camera + env = RoboTwinEnv( + task_name="beat_block_hammer", + camera_names=["head_camera", "front_camera"], + observation_height=10, + observation_width=10, + ) + with _patch_load(mock_task): + obs, _ = env.reset() + # front_camera is missing from mock → should be a black 10×10×3 array + assert obs["pixels"]["front_camera"].shape == (10, 10, 3) + assert obs["pixels"]["front_camera"].sum() == 0 + + def test_task_and_task_description_attributes(self): + env = RoboTwinEnv(task_name="beat_block_hammer") + assert env.task == "beat_block_hammer" + assert isinstance(env.task_description, str) + + def test_deferred_init_env_is_none_before_reset(self): + env = RoboTwinEnv(task_name="beat_block_hammer") + assert env._env is None # noqa: SLF001 (testing internal state) + + +# --------------------------------------------------------------------------- +# create_robotwin_envs tests +# --------------------------------------------------------------------------- + + +class TestCreateRoboTwinEnvs: + def test_returns_correct_structure(self): + mock_task = _make_mock_task_env() + env_cls = gym.vector.SyncVectorEnv + with _patch_load(mock_task): + envs = create_robotwin_envs( + task="beat_block_hammer", + n_envs=1, + env_cls=env_cls, + ) + assert "beat_block_hammer" in envs + assert 0 in envs["beat_block_hammer"] + assert isinstance(envs["beat_block_hammer"][0], gym.vector.SyncVectorEnv) + + def test_multi_task(self): + mock_task = _make_mock_task_env() + env_cls = gym.vector.SyncVectorEnv + with _patch_load(mock_task): + envs = create_robotwin_envs( + task="beat_block_hammer,click_bell", + n_envs=1, + env_cls=env_cls, + ) + assert set(envs.keys()) == {"beat_block_hammer", "click_bell"} + + def test_unknown_task_raises(self): + with pytest.raises(ValueError, match="Unknown RoboTwin tasks"): + create_robotwin_envs( + task="not_a_real_task", + n_envs=1, + env_cls=gym.vector.SyncVectorEnv, + ) + + def test_invalid_n_envs_raises(self): + with pytest.raises(ValueError, match="n_envs must be a positive int"): + create_robotwin_envs( + task="beat_block_hammer", + n_envs=0, + env_cls=gym.vector.SyncVectorEnv, + ) + + +# --------------------------------------------------------------------------- +# ROBOTWIN_TASKS list +# --------------------------------------------------------------------------- + + +def test_task_list_not_empty(): + assert len(ROBOTWIN_TASKS) >= 60 + + +def test_all_tasks_are_strings(): + assert all(isinstance(t, str) and t for t in ROBOTWIN_TASKS) + + +def test_no_duplicate_tasks(): + assert len(ROBOTWIN_TASKS) == len(set(ROBOTWIN_TASKS)) + + +# --------------------------------------------------------------------------- +# RoboTwinProcessorStep +# --------------------------------------------------------------------------- + + +class TestRoboTwinProcessorStep: + def test_passes_through_images_and_state(self): + from lerobot.processor.env_processor import RoboTwinProcessorStep + from lerobot.utils.constants import OBS_IMAGES, OBS_STATE + + step = RoboTwinProcessorStep() + obs = { + f"{OBS_IMAGES}.head_camera": torch.zeros(1, 3, 4, 4), + f"{OBS_IMAGES}.front_camera": torch.zeros(1, 3, 4, 4), + OBS_STATE: torch.zeros(1, 14), + } + result = step.observation(obs) + assert f"{OBS_IMAGES}.head_camera" in result + assert f"{OBS_IMAGES}.front_camera" in result + assert result[OBS_STATE].dtype == torch.float32 + + def test_state_cast_to_float32(self): + from lerobot.processor.env_processor import RoboTwinProcessorStep + from lerobot.utils.constants import OBS_STATE + + step = RoboTwinProcessorStep() + obs = {OBS_STATE: torch.zeros(1, 14, dtype=torch.float64)} + result = step.observation(obs) + assert result[OBS_STATE].dtype == torch.float32