feat(envs): add RoboTwin 2.0 benchmark integration

- RoboTwinEnvConfig with 4-camera setup (head/front/left_wrist/right_wrist)
- Docker image with SAPIEN, mplib, CuRobo, pytorch3d (Python 3.12)
- CI workflow: 1-episode smoke eval with pepijn223/smolvla_robotwin
- RoboTwinProcessorStep for state float32 casting
- Camera rename_map: head_camera/front_camera/left_wrist -> camera1/2/3

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pepijn
2026-04-14 11:49:15 +02:00
parent cff4bcf4a0
commit 5558ea2207
7 changed files with 1364 additions and 0 deletions
+91
View File
@@ -310,3 +310,94 @@ jobs:
name: metaworld-metrics
path: /tmp/metaworld-artifacts/metrics.json
if-no-files-found: warn
# ── ROBOTWIN 2.0 ──────────────────────────────────────────────────────────
# Isolated image: full RoboTwin 2.0 stack — SAPIEN, mplib, CuRobo,
# pytorch3d, + simulation assets (~4 GB).
# Build takes ~20 min on first run; subsequent runs hit the layer cache.
# Requires an NVIDIA GPU runner with CUDA 12.1 drivers.
robotwin-integration-test:
name: RoboTwin 2.0 — build image + 1-episode eval
runs-on:
group: aws-g6-4xlarge-plus
env:
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
lfs: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
with:
cache-binary: false
# Build the full-install image: SAPIEN, mplib, CuRobo, pytorch3d +
# simulation assets (~4 GB). Layer cache lives in the runner's local
# Docker daemon — reused across re-runs on the same machine.
- name: Build RoboTwin 2.0 benchmark image
uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
with:
context: .
file: docker/Dockerfile.benchmark.robotwin
push: false
load: true
tags: lerobot-benchmark-robotwin:ci
cache-from: type=local,src=/tmp/.buildx-cache-robotwin
cache-to: type=local,dest=/tmp/.buildx-cache-robotwin,mode=max
- name: Run RoboTwin 2.0 smoke eval (1 episode)
run: |
# Named container (no --rm) so we can docker cp artifacts out.
docker run --name robotwin-eval --gpus all \
--shm-size=4g \
-e HF_HOME=/tmp/hf \
-e HF_USER_TOKEN="${HF_USER_TOKEN}" \
lerobot-benchmark-robotwin:ci \
bash -c "
hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
cd /opt/robotwin && lerobot-eval \
--policy.path=pepijn223/smolvla_robotwin \
--env.type=robotwin \
--env.task=beat_block_hammer \
--eval.batch_size=1 \
--eval.n_episodes=1 \
--eval.use_async_envs=false \
--policy.device=cuda \
'--rename_map={\"observation.images.head_camera\": \"observation.images.camera1\", \"observation.images.front_camera\": \"observation.images.camera2\", \"observation.images.left_wrist\": \"observation.images.camera3\"}' \
--output_dir=/tmp/eval-artifacts
"
- name: Copy RoboTwin artifacts from container
if: always()
run: |
mkdir -p /tmp/robotwin-artifacts
docker cp robotwin-eval:/tmp/eval-artifacts/. /tmp/robotwin-artifacts/ 2>/dev/null || true
docker rm -f robotwin-eval || true
- name: Parse RoboTwin eval metrics
if: always()
run: |
python3 scripts/ci/parse_eval_metrics.py \
--artifacts-dir /tmp/robotwin-artifacts \
--env robotwin \
--task beat_block_hammer \
--policy pepijn223/smolvla_robotwin
- name: Upload RoboTwin rollout video
if: always()
uses: actions/upload-artifact@v4
with:
name: robotwin-rollout-video
path: /tmp/robotwin-artifacts/videos/
if-no-files-found: warn
- name: Upload RoboTwin eval metrics
if: always()
uses: actions/upload-artifact@v4
with:
name: robotwin-metrics
path: /tmp/robotwin-artifacts/metrics.json
if-no-files-found: warn
+199
View File
@@ -0,0 +1,199 @@
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Isolated benchmark image for RoboTwin 2.0 integration tests.
# Full installation: SAPIEN, mplib, CuRobo, pytorch3d + simulation assets.
#
# RoboTwin install guide: https://robotwin-platform.github.io/doc/usage/robotwin-install.html
# Assets from: https://huggingface.co/datasets/TianxingChen/RoboTwin2.0
# - embodiments.zip ~220 MB
# - objects.zip ~3.74 GB
# (background_texture.zip ~11 GB is skipped — not required for a smoke eval)
#
# Build: docker build -f docker/Dockerfile.benchmark.robotwin -t lerobot-benchmark-robotwin .
# Run: docker run --gpus all --rm lerobot-benchmark-robotwin \
# lerobot-eval --env.type=robotwin --env.task=beat_block_hammer ...
# RoboTwin requires CUDA devel image for CuRobo compilation (nvcc needed).
# Python 3.12 required by lerobot (type alias syntax). open3d 0.19.0 has cp312 wheels.
ARG CUDA_VERSION=12.1.1
ARG OS_VERSION=22.04
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${OS_VERSION}
ARG PYTHON_VERSION=3.12
ENV DEBIAN_FRONTEND=noninteractive \
PATH=/lerobot/.venv/bin:$PATH \
CUDA_VISIBLE_DEVICES=0 \
DEVICE=cuda \
# NVIDIA Container Toolkit: expose all driver capabilities (includes Vulkan)
NVIDIA_DRIVER_CAPABILITIES=all \
# SAPIEN uses Vulkan; point at the NVIDIA ICD we create below
VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/nvidia_icd.json \
# RoboTwin task modules live in <repo>/envs/; add to PYTHONPATH after clone.
ROBOTWIN_ROOT=/opt/robotwin
# System deps — extended with cmake/ninja for CuRobo and pytorch3d compilation.
RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common build-essential git curl \
libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \
libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev \
cmake pkg-config ninja-build \
libvulkan1 vulkan-tools \
&& add-apt-repository -y ppa:deadsnakes/ppa \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-venv \
python${PYTHON_VERSION}-dev \
&& curl -LsSf https://astral.sh/uv/0.8.0/install.sh | sh \
&& mv /root/.local/bin/uv /usr/local/bin/uv \
&& mkdir -p /usr/share/vulkan/icd.d \
&& echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libGLX_nvidia.so.0","api_version":"1.3.0"}}' \
> /usr/share/vulkan/icd.d/nvidia_icd.json \
&& useradd --create-home --shell /bin/bash user_lerobot \
&& usermod -aG sudo user_lerobot \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
WORKDIR /lerobot
RUN chown -R user_lerobot:user_lerobot /lerobot
USER user_lerobot
ENV HOME=/home/user_lerobot \
HF_HOME=/home/user_lerobot/.cache/huggingface \
HF_LEROBOT_HOME=/home/user_lerobot/.cache/huggingface/lerobot \
TORCH_HOME=/home/user_lerobot/.cache/torch \
TRITON_CACHE_DIR=/home/user_lerobot/.cache/triton
RUN uv venv --python python${PYTHON_VERSION}
# ── 1. Install base lerobot ────────────────────────────────────────────────
COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml uv.lock README.md MANIFEST.in ./
COPY --chown=user_lerobot:user_lerobot src/ src/
# Install lerobot base only (no benchmark extras — RoboTwin is source-only).
# Install lerobot with smolvla extra (for transformers dep needed by SmolVLA policy)
RUN uv pip install --no-cache -e ".[smolvla]"
# ── 2. Install RoboTwin 2.0 simulator stack ────────────────────────────────
# Clone at a pinned commit for reproducibility.
USER root
RUN git clone --depth=1 https://github.com/RoboTwin-Platform/RoboTwin.git ${ROBOTWIN_ROOT} \
&& chown -R user_lerobot:user_lerobot ${ROBOTWIN_ROOT}
USER user_lerobot
# Install RoboTwin-specific packages on top of the lerobot venv.
# We intentionally skip: torch/torchvision (already in lerobot), gymnasium
# (lerobot uses 1.x; RoboTwin's API is wrapped so 0.29 is not needed), scipy
# (version-pinned by lerobot), huggingface_hub (pinned by lerobot), wandb,
# azure, openai, pyglet (UI only).
# Install RoboTwin-specific packages. Use --python to pin resolution to the
# venv's Python 3.10 (uv sync --locked may have changed the default target).
RUN uv pip install --no-cache --python .venv/bin/python \
"sapien==3.0.0b1" \
"mplib==0.2.1" \
"transforms3d==0.4.2" \
"trimesh==4.4.3" \
"open3d==0.19.0" \
"imageio==2.34.2" \
"termcolor" \
"zarr" \
"pydantic" \
"h5py"
# pytorch3d — must be built from source (no universal wheel available).
# This is the slowest step (~10 min); cached in subsequent builds.
RUN uv pip install --no-cache --no-build-isolation --python .venv/bin/python \
"git+https://github.com/facebookresearch/pytorch3d.git@stable"
# CuRobo — NVIDIA motion generation library; requires nvcc (devel image).
# TORCH_CUDA_ARCH_LIST must be set or the build fails with empty arch list.
RUN cd ${ROBOTWIN_ROOT}/envs \
&& git clone --depth=1 https://github.com/NVlabs/curobo.git \
&& cd curobo \
&& TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9;9.0" \
uv pip install -e . --no-build-isolation --no-cache
# ── 3. Apply upstream patches (mirrors script/_install.sh) ─────────────────
# patch 1: mplib — remove `or collide` from planner.py line 807.
RUN python - <<'EOF'
import re, pathlib, site
for d in site.getsitepackages():
p = pathlib.Path(d) / "mplib" / "planner.py"
if p.exists():
src = p.read_text()
patched = re.sub(r"\bor collide\b", "", src, count=1)
p.write_text(patched)
print(f"mplib patch applied: {p}")
break
EOF
# patch 2: sapien URDF loader — add UTF-8 encoding + fix .srdf extension.
RUN python - <<'EOF'
import pathlib, site
for d in site.getsitepackages():
p = pathlib.Path(d) / "sapien" / "wrapper" / "urdf_loader.py"
if p.exists():
src = p.read_text()
# add encoding='utf-8' to open() calls that lack it
patched = src.replace("open(", "open(").replace(
'with open(srdf_path) as f:', 'with open(srdf_path, encoding="utf-8") as f:'
).replace('"srdf"', '".srdf"')
p.write_text(patched)
print(f"sapien patch applied: {p}")
break
EOF
# ── 4. Download simulation assets from HuggingFace ─────────────────────────
# embodiments.zip (~220 MB) + objects.zip (~3.74 GB).
# background_texture.zip (~11 GB) is skipped — not required for a smoke eval.
# Set HF_TOKEN (passed as --build-arg or via --secret) for authenticated access.
ARG HF_TOKEN=""
RUN python - <<'EOF'
import os, pathlib, zipfile
from huggingface_hub import hf_hub_download
token = os.environ.get("HF_TOKEN") or None
assets_dir = pathlib.Path(os.environ["ROBOTWIN_ROOT"]) / "assets"
assets_dir.mkdir(parents=True, exist_ok=True)
for fname in ("embodiments.zip", "objects.zip"):
print(f"Downloading {fname} ...")
local = hf_hub_download(
repo_id="TianxingChen/RoboTwin2.0",
repo_type="dataset",
filename=fname,
token=token,
local_dir=str(assets_dir),
)
print(f"Extracting {fname} ...")
with zipfile.ZipFile(local, "r") as z:
z.extractall(str(assets_dir))
pathlib.Path(local).unlink() # remove zip after extraction
print(f"{fname} done.")
EOF
# Update embodiment config paths to reflect the installation directory.
RUN cd ${ROBOTWIN_ROOT} \
&& python script/update_embodiment_config_path.py
# ── 5. Finalise ────────────────────────────────────────────────────────────
# Expose RoboTwin task modules on PYTHONPATH so `import envs.<task>` works.
ENV PYTHONPATH="${ROBOTWIN_ROOT}:${PYTHONPATH}"
COPY --chown=user_lerobot:user_lerobot . .
RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas 2>/dev/null || true
CMD ["/bin/bash"]
+206
View File
@@ -0,0 +1,206 @@
# RoboTwin 2.0
RoboTwin 2.0 is a **large-scale dual-arm manipulation benchmark** built on the SAPIEN physics engine. It provides a standardized evaluation protocol for bimanual robotic policies across 60 tasks with strong domain randomization (clutter, lighting, background, tabletop height, and language instructions).
- Paper: [RoboTwin 2.0: A Scalable Data Generator and Benchmark with Strong Domain Randomization for Robust Bimanual Robotic Manipulation](https://robotwin-platform.github.io)
- GitHub: [RoboTwin-Platform/RoboTwin](https://github.com/RoboTwin-Platform/RoboTwin)
- Leaderboard: [robotwin-platform.github.io/leaderboard](https://robotwin-platform.github.io/leaderboard)
- Dataset: [hxma/RoboTwin-LeRobot-v3.0](https://huggingface.co/datasets/hxma/RoboTwin-LeRobot-v3.0)
## Overview
| Property | Value |
| ------------- | ---------------------------------------------------------- |
| Tasks | 60 dual-arm manipulation tasks |
| Robot | Aloha-AgileX bimanual (14 DOF, 7 per arm) |
| Action space | 14-dim joint-space, continuous in `[-1, 1]` |
| Cameras | `head_camera`, `front_camera`, `left_wrist`, `right_wrist` |
| Simulator | SAPIEN (not MuJoCo) |
| Eval protocol | 100 episodes/task, 50 demo_clean demonstrations |
| Eval settings | **Easy** (`demo_clean`) and **Hard** (`demo_randomized`) |
## Available tasks
RoboTwin 2.0 ships with 60 dual-arm manipulation tasks. The full list appears on the [leaderboard](https://robotwin-platform.github.io/leaderboard). Example tasks:
| Task | CLI name | Category |
| ---------------------- | ------------------------ | ---------------- |
| Beat block with hammer | `beat_block_hammer` | Tool use |
| Open / close laptop | `open_laptop` | Articulated obj |
| Stack blocks (2 / 3) | `stack_blocks_two/three` | Stacking |
| Pour water | `pour_water` | Deformable/fluid |
| Fold cloth | `fold_cloth` | Deformable |
| Handover block | `handover_block` | Bimanual coord. |
| Place shoes | `place_shoes_left/right` | Precision place |
| Scan object | `scan_object` | Mobile manip. |
Pass a comma-separated list to `--env.task` to run multiple tasks in a single eval sweep.
## Dataset
The RoboTwin 2.0 dataset is available in **LeRobot v3.0 format** on the Hugging Face Hub:
```
hxma/RoboTwin-LeRobot-v3.0
```
It contains over 100,000 pre-collected trajectories across all 60 tasks (79.6 GB, Apache 2.0 license). No format conversion is needed — it is already in the correct LeRobot v3.0 schema with video observations and action labels.
You can load it directly with the HF Datasets library:
```python
from datasets import load_dataset
ds = load_dataset("hxma/RoboTwin-LeRobot-v3.0", split="train")
```
## Installation
RoboTwin 2.0 requires **Linux** with an NVIDIA GPU (CUDA 12.1 recommended). Installation takes approximately 20 minutes.
### 1. Create a conda environment
```bash
conda create -n robotwin python=3.10 -y
conda activate robotwin
```
### 2. Install LeRobot
```bash
git clone https://github.com/huggingface/lerobot.git
cd lerobot
pip install -e "."
```
### 3. Install RoboTwin 2.0
```bash
git clone https://github.com/RoboTwin-Platform/RoboTwin.git
cd RoboTwin
bash script/_install.sh
bash script/_download_assets.sh
```
The install script handles all Python dependencies including SAPIEN, CuRobo, mplib, and pytorch3d.
<Tip warning={true}>
If the automated install fails, install manually:
```bash
pip install -r requirements.txt
pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable"
cd envs && git clone https://github.com/NVlabs/curobo.git && cd curobo
pip install -e . --no-build-isolation
```
Then apply the required mplib fix: in `mplib/planner.py` line 807, remove `or collide` from the conditional.
</Tip>
### 4. Add RoboTwin to PYTHONPATH
The RoboTwin task modules must be importable by LeRobot. From within the `RoboTwin/` directory:
```bash
export PYTHONPATH="${PYTHONPATH}:$(pwd)"
```
Add this to your shell profile to make it permanent.
## Evaluation
### Standard evaluation (recommended)
Evaluate a policy on a single task with the official protocol (100 episodes):
```bash
lerobot-eval \
--policy.path="your-hf-policy-id" \
--env.type=robotwin \
--env.task=beat_block_hammer \
--eval.batch_size=1 \
--eval.n_episodes=100
```
### Single-task quick check
```bash
lerobot-eval \
--policy.path="your-hf-policy-id" \
--env.type=robotwin \
--env.task=beat_block_hammer \
--eval.batch_size=1 \
--eval.n_episodes=5
```
### Multi-task sweep
Evaluate on several tasks in one run:
```bash
lerobot-eval \
--policy.path="your-hf-policy-id" \
--env.type=robotwin \
--env.task=beat_block_hammer,click_bell,handover_block,stack_blocks_two \
--eval.batch_size=1 \
--eval.n_episodes=100
```
### Full benchmark (all 60 tasks)
```bash
lerobot-eval \
--policy.path="your-hf-policy-id" \
--env.type=robotwin \
--env.task=adjust_bottle,beat_block_hammer,blocks_ranking_rgb,blocks_ranking_size,click_alarmclock,click_bell,close_laptop,close_microwave,dump_bin,grab_roller,handover_block,handover_cup,handover_diverse_bottles,handover_mic,hanging_mug,insert_pin,lift_pot,make_tea,open_laptop,open_microwave,pick_diverse_bottles,pick_dual_bottles,place_basket,place_block,place_cable,place_can,place_chopsticks,place_cloth,place_container,place_cup,place_diverse_bottles,place_dual_bottles,place_fork,place_knife,place_object_basket,place_ring,place_ruler,place_shoes_left,place_shoes_right,place_spoon,place_toy,pour_water,press_stapler,put_bottles_dustbin,put_object_cabinet,put_shoes_box,rotate_qrcode,scan_object,shake_bottle,shake_bottle_horizontally,stack_blocks_three,stack_blocks_two,stack_bowls_three,stack_bowls_two,stamp_seal,turn_switch,wipe_board,arrange_tools,build_tower,fold_cloth \
--eval.batch_size=1 \
--eval.n_episodes=100
```
## Camera configuration
By default, all four cameras are included:
| Camera key | Description |
| -------------- | ------------------------------ |
| `head_camera` | Overhead / third-person view |
| `front_camera` | Front-facing static camera |
| `left_wrist` | Left arm wrist-mounted camera |
| `right_wrist` | Right arm wrist-mounted camera |
To use a subset of cameras, override `--env.camera_names`:
```bash
lerobot-eval \
--policy.path="your-hf-policy-id" \
--env.type=robotwin \
--env.task=beat_block_hammer \
--env.camera_names="head_camera,left_wrist,right_wrist" \
--eval.batch_size=1 \
--eval.n_episodes=10
```
## Environment config reference
Key parameters for `RoboTwinEnvConfig`:
| Parameter | Default | Description |
| -------------------- | --------------------------------------------------- | ---------------------------------- |
| `task` | `"beat_block_hammer"` | Comma-separated task name(s) |
| `fps` | `25` | Simulation FPS |
| `episode_length` | `300` | Max steps per episode |
| `obs_type` | `"pixels_agent_pos"` | `"pixels"` or `"pixels_agent_pos"` |
| `camera_names` | `"head_camera,front_camera,left_wrist,right_wrist"` | Comma-separated active cameras |
| `observation_height` | `480` | Camera pixel height |
| `observation_width` | `640` | Camera pixel width |
## Leaderboard submission
Results can be submitted to the [RoboTwin 2.0 leaderboard](https://robotwin-platform.github.io/leaderboard). The official protocol requires:
- Training on 50 `demo_clean` demonstrations per task
- Evaluating 100 episodes per task
- Reporting success rate separately for **Easy** (`demo_clean`) and **Hard** (`demo_randomized`) settings
For submission instructions, refer to the [RoboTwin 2.0 documentation](https://robotwin-platform.github.io/doc/).
+94
View File
@@ -574,3 +574,97 @@ class IsaaclabArenaEnv(HubEnvConfig):
),
PolicyProcessorPipeline(steps=[]),
)
@EnvConfig.register_subclass("robotwin")
@dataclass
class RoboTwinEnvConfig(EnvConfig):
"""Configuration for RoboTwin 2.0 benchmark environments.
RoboTwin 2.0 is a dual-arm manipulation benchmark with 60 tasks built on the
SAPIEN simulator. The robot is an Aloha-AgileX bimanual platform with 14 DOF
(7 per arm). All four cameras are enabled by default.
See: https://robotwin-platform.github.io
Dataset: https://huggingface.co/datasets/hxma/RoboTwin-LeRobot-v3.0
"""
task: str = "beat_block_hammer" # single task or comma-separated list
fps: int = 25
episode_length: int = 300
obs_type: str = "pixels_agent_pos"
render_mode: str = "rgb_array"
# Comma-separated list of cameras to include in observations.
# Available: head_camera, front_camera, left_wrist, right_wrist
camera_names: str = "head_camera,front_camera,left_wrist,right_wrist"
observation_height: int = 480
observation_width: int = 640
features: dict[str, PolicyFeature] = field(
default_factory=lambda: {
ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(14,)),
}
)
features_map: dict[str, str] = field(
default_factory=lambda: {
ACTION: ACTION,
"pixels/head_camera": f"{OBS_IMAGES}.head_camera",
"pixels/front_camera": f"{OBS_IMAGES}.front_camera",
"pixels/left_wrist": f"{OBS_IMAGES}.left_wrist",
"pixels/right_wrist": f"{OBS_IMAGES}.right_wrist",
"agent_pos": OBS_STATE,
}
)
def __post_init__(self):
cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()]
for cam in cam_list:
self.features[f"pixels/{cam}"] = PolicyFeature(
type=FeatureType.VISUAL,
shape=(self.observation_height, self.observation_width, 3),
)
# Keep features_map entry if already set (default_factory); add if missing.
key = f"pixels/{cam}"
if key not in self.features_map:
self.features_map[key] = f"{OBS_IMAGES}.{cam}"
if self.obs_type == "pixels_agent_pos":
self.features["agent_pos"] = PolicyFeature(
type=FeatureType.STATE,
shape=(14,), # 14 DOF: 7 per arm
)
elif self.obs_type != "pixels":
raise ValueError(
f"Unsupported obs_type '{self.obs_type}'. "
"RoboTwinEnvConfig supports 'pixels' and 'pixels_agent_pos'."
)
@property
def gym_kwargs(self) -> dict:
return {}
def create_envs(self, n_envs: int, use_async_envs: bool = True):
from lerobot.envs.robotwin import create_robotwin_envs
if not self.task:
raise ValueError("RoboTwinEnvConfig requires `task` to be specified.")
env_cls = gym.vector.AsyncVectorEnv if (use_async_envs and n_envs > 1) else gym.vector.SyncVectorEnv
cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()]
return create_robotwin_envs(
task=self.task,
n_envs=n_envs,
env_cls=env_cls,
camera_names=cam_list,
observation_height=self.observation_height,
observation_width=self.observation_width,
episode_length=self.episode_length,
)
def get_env_processors(self):
from lerobot.processor.env_processor import RoboTwinProcessorStep
from lerobot.processor.pipeline import PolicyProcessorPipeline
return (
PolicyProcessorPipeline(steps=[RoboTwinProcessorStep()]),
PolicyProcessorPipeline(steps=[]),
)
+465
View File
@@ -0,0 +1,465 @@
#!/usr/bin/env python
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import importlib
from collections import defaultdict
from collections.abc import Callable, Sequence
from functools import partial
from typing import Any
import gymnasium as gym
import numpy as np
from gymnasium import spaces
from lerobot.types import RobotObservation
# Camera names as used by RoboTwin 2.0. The wrapper appends "_rgb" when looking
# up keys in get_obs() output (e.g. "head_camera" → "head_camera_rgb").
ROBOTWIN_CAMERA_NAMES: tuple[str, ...] = (
"head_camera",
"front_camera",
"left_wrist",
"right_wrist",
)
ACTION_DIM = 14 # 7 DOF × 2 arms
ACTION_LOW = -1.0
ACTION_HIGH = 1.0
DEFAULT_EPISODE_LENGTH = 300
# Complete task list from RoboTwin 2.0 (60 tasks, as listed on the leaderboard).
ROBOTWIN_TASKS: tuple[str, ...] = (
"adjust_bottle",
"beat_block_hammer",
"blocks_ranking_rgb",
"blocks_ranking_size",
"click_alarmclock",
"click_bell",
"close_laptop",
"close_microwave",
"dump_bin",
"grab_roller",
"handover_block",
"handover_cup",
"handover_diverse_bottles",
"handover_mic",
"hanging_mug",
"insert_pin",
"lift_pot",
"make_tea",
"open_laptop",
"open_microwave",
"pick_diverse_bottles",
"pick_dual_bottles",
"place_basket",
"place_block",
"place_cable",
"place_can",
"place_chopsticks",
"place_cloth",
"place_container",
"place_cup",
"place_diverse_bottles",
"place_dual_bottles",
"place_fork",
"place_knife",
"place_object_basket",
"place_ring",
"place_ruler",
"place_shoes_left",
"place_shoes_right",
"place_spoon",
"place_toy",
"pour_water",
"press_stapler",
"put_bottles_dustbin",
"put_object_cabinet",
"put_shoes_box",
"rotate_qrcode",
"scan_object",
"shake_bottle",
"shake_bottle_horizontally",
"stack_blocks_three",
"stack_blocks_two",
"stack_bowls_three",
"stack_bowls_two",
"stamp_seal",
"turn_switch",
"wipe_board",
"arrange_tools",
"build_tower",
"fold_cloth",
)
_ROBOTWIN_SETUP_CACHE: dict[str, dict[str, Any]] = {}
def _load_robotwin_setup_kwargs(task_name: str) -> dict[str, Any]:
"""Build the kwargs dict RoboTwin's setup_demo expects.
Mirrors the config loading done by RoboTwin's ``script/eval_policy.py``:
reads ``task_config/demo_clean.yml``, resolves the embodiment file from
``_embodiment_config.yml``, loads the robot's own ``config.yml``, and
reads camera dimensions from ``_camera_config.yml``.
Uses ``aloha-agilex`` single-robot dual-arm by default (the only embodiment
used by beat_block_hammer and most smoke-test tasks).
"""
if task_name in _ROBOTWIN_SETUP_CACHE:
return dict(_ROBOTWIN_SETUP_CACHE[task_name])
import os
import yaml # type: ignore[import-untyped]
from envs import CONFIGS_PATH # type: ignore[import-not-found]
task_config = "demo_clean"
with open(f"./task_config/{task_config}.yml", encoding="utf-8") as f:
args = yaml.safe_load(f)
# Resolve embodiment — demo_clean.yml uses [aloha-agilex] (dual-arm single robot)
with open(os.path.join(CONFIGS_PATH, "_embodiment_config.yml"), encoding="utf-8") as f:
embodiment_types = yaml.safe_load(f)
embodiment = args.get("embodiment", ["aloha-agilex"])
if len(embodiment) == 1:
robot_file = embodiment_types[embodiment[0]]["file_path"]
args["left_robot_file"] = robot_file
args["right_robot_file"] = robot_file
args["dual_arm_embodied"] = True
elif len(embodiment) == 3:
args["left_robot_file"] = embodiment_types[embodiment[0]]["file_path"]
args["right_robot_file"] = embodiment_types[embodiment[1]]["file_path"]
args["embodiment_dis"] = embodiment[2]
args["dual_arm_embodied"] = False
else:
raise ValueError(f"embodiment must have 1 or 3 items, got {len(embodiment)}")
with open(os.path.join(args["left_robot_file"], "config.yml"), encoding="utf-8") as f:
args["left_embodiment_config"] = yaml.safe_load(f)
with open(os.path.join(args["right_robot_file"], "config.yml"), encoding="utf-8") as f:
args["right_embodiment_config"] = yaml.safe_load(f)
# Camera dimensions
with open(os.path.join(CONFIGS_PATH, "_camera_config.yml"), encoding="utf-8") as f:
camera_config = yaml.safe_load(f)
head_cam = args["camera"]["head_camera_type"]
args["head_camera_h"] = camera_config[head_cam]["h"]
args["head_camera_w"] = camera_config[head_cam]["w"]
# Headless overrides
args["render_freq"] = 0
args["task_name"] = task_name
args["task_config"] = task_config
_ROBOTWIN_SETUP_CACHE[task_name] = args
return dict(args)
def _load_robotwin_task(task_name: str) -> type:
"""Dynamically import and return a RoboTwin 2.0 task class.
RoboTwin tasks live in ``envs/<task_name>.py`` relative to the repository
root and are expected to be on ``sys.path`` after installation.
"""
try:
module = importlib.import_module(f"envs.{task_name}")
except ModuleNotFoundError as e:
raise ModuleNotFoundError(
f"Could not import RoboTwin task '{task_name}'. "
"Ensure RoboTwin 2.0 is installed and its 'envs/' directory is on PYTHONPATH. "
"See the RoboTwin installation guide: https://robotwin-platform.github.io/doc/usage/robotwin-install.html"
) from e
task_cls = getattr(module, task_name, None)
if task_cls is None:
raise AttributeError(f"Task class '{task_name}' not found in envs/{task_name}.py")
return task_cls
class RoboTwinEnv(gym.Env):
"""Gymnasium wrapper around a single RoboTwin 2.0 task.
RoboTwin uses a custom SAPIEN-based API (``setup_demo`` / ``get_obs`` /
``take_action`` / ``check_success``) rather than the standard gym interface.
This class bridges that API to Gymnasium so that ``lerobot-eval`` can drive
RoboTwin exactly like LIBERO or Meta-World.
The underlying SAPIEN environment is created lazily on the first ``reset()``
call *inside the worker process*. This is required for
``gym.vector.AsyncVectorEnv`` compatibility: SAPIEN allocates EGL/GPU
contexts that must not be forked from the parent process.
Observations
------------
The ``pixels`` dict uses the raw RoboTwin camera names as keys (e.g.
``"head_camera"``, ``"left_wrist"``). ``preprocess_observation`` in
``envs/utils.py`` then converts these to ``observation.images.<cam>``.
Actions
-------
14-dim float32 array in ``[-1, 1]`` (joint-space, 7 DOF per arm).
"""
metadata = {"render_modes": ["rgb_array"], "render_fps": 25}
def __init__(
self,
task_name: str,
episode_index: int = 0,
n_envs: int = 1,
camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES,
observation_height: int = 480,
observation_width: int = 640,
episode_length: int = DEFAULT_EPISODE_LENGTH,
render_mode: str = "rgb_array",
):
super().__init__()
self.task_name = task_name
self.task = task_name # used by add_envs_task() in utils.py
self.task_description = task_name.replace("_", " ")
self.episode_index = episode_index
self._reset_stride = n_envs
self.camera_names = list(camera_names)
self.observation_height = observation_height
self.observation_width = observation_width
self.episode_length = episode_length
self.render_mode = render_mode
self._env: Any | None = None # deferred — created on first reset() inside worker
self._step_count: int = 0
image_spaces = {
cam: spaces.Box(
low=0,
high=255,
shape=(observation_height, observation_width, 3),
dtype=np.uint8,
)
for cam in self.camera_names
}
self.observation_space = spaces.Dict(
{
"pixels": spaces.Dict(image_spaces),
"agent_pos": spaces.Box(low=-np.inf, high=np.inf, shape=(ACTION_DIM,), dtype=np.float64),
}
)
self.action_space = spaces.Box(
low=ACTION_LOW, high=ACTION_HIGH, shape=(ACTION_DIM,), dtype=np.float32
)
def _ensure_env(self) -> None:
"""Create the SAPIEN environment on first use.
Called inside the worker subprocess after fork(), so each worker gets
its own EGL/GPU context rather than inheriting a stale one from the
parent process (which causes crashes with AsyncVectorEnv).
"""
if self._env is not None:
return
task_cls = _load_robotwin_task(self.task_name)
self._env = task_cls()
def _get_obs(self) -> RobotObservation:
assert self._env is not None, "_get_obs called before _ensure_env()"
raw = self._env.get_obs()
images: dict[str, np.ndarray] = {}
for cam in self.camera_names:
# RoboTwin 2.0 camera keys follow the "<name>_rgb" convention.
# Fall back to the bare name if the suffixed key is absent.
key = f"{cam}_rgb" if f"{cam}_rgb" in raw else cam
if key in raw:
img = np.asarray(raw[key], dtype=np.uint8)
# Ensure HWC and exactly 3 channels
if img.ndim == 2:
img = np.stack([img, img, img], axis=-1)
elif img.shape[-1] != 3:
img = img[..., :3]
if img.shape[0] != self.observation_height or img.shape[1] != self.observation_width:
# Resize is best done outside hot-loop; return as-is and let
# the processor handle it if needed.
pass
images[cam] = img
else:
# Camera not exposed by this task — return a black frame so the
# observation shape stays consistent across all tasks.
images[cam] = np.zeros((self.observation_height, self.observation_width, 3), dtype=np.uint8)
# Joint state: try common key names used by RoboTwin base_task.
joint_state: np.ndarray | None = None
for key in ("joint_action", "qpos", "robot_qpos", "joint_state", "obs"):
if key in raw:
arr = np.asarray(raw[key], dtype=np.float64).flatten()
if arr.size >= ACTION_DIM:
joint_state = arr[:ACTION_DIM]
break
if joint_state is None:
joint_state = np.zeros(ACTION_DIM, dtype=np.float64)
return {"pixels": images, "agent_pos": joint_state}
def reset(self, seed: int | None = None, **kwargs) -> tuple[RobotObservation, dict]:
self._ensure_env()
super().reset(seed=seed)
assert self._env is not None # set by _ensure_env() above
actual_seed = self.episode_index if seed is None else seed
setup_kwargs = _load_robotwin_setup_kwargs(self.task_name)
setup_kwargs.update(seed=actual_seed, is_test=True)
self._env.setup_demo(**setup_kwargs)
self.episode_index += self._reset_stride
self._step_count = 0
obs = self._get_obs()
return obs, {"is_success": False, "task": self.task_name}
def step(self, action: np.ndarray) -> tuple[RobotObservation, float, bool, bool, dict[str, Any]]:
assert self._env is not None, "step() called before reset()"
if action.ndim != 1 or action.shape[0] != ACTION_DIM:
raise ValueError(f"Expected 1-D action of shape ({ACTION_DIM},), got {action.shape}")
# RoboTwin 2.0 uses take_action(); fall back to step() for older forks.
if hasattr(self._env, "take_action"):
self._env.take_action(action)
else:
self._env.step(action)
self._step_count += 1
is_success = bool(getattr(self._env, "eval_success", False))
if not is_success and hasattr(self._env, "check_success"):
is_success = bool(self._env.check_success())
obs = self._get_obs()
reward = float(is_success)
terminated = is_success
truncated = self._step_count >= self.episode_length
info: dict[str, Any] = {
"task": self.task_name,
"is_success": is_success,
"step": self._step_count,
}
if terminated or truncated:
info["final_info"] = {
"task": self.task_name,
"is_success": is_success,
}
self.reset()
return obs, reward, terminated, truncated, info
def render(self) -> np.ndarray:
self._ensure_env()
obs = self._get_obs()
# Prefer head camera for rendering; fall back to first available.
if "head_camera" in obs["pixels"]:
return obs["pixels"]["head_camera"]
return next(iter(obs["pixels"].values()))
def close(self) -> None:
if self._env is not None:
if hasattr(self._env, "close_env"):
import contextlib
with contextlib.suppress(TypeError):
self._env.close_env()
self._env = None
# ---- Multi-task factory --------------------------------------------------------
def _make_env_fns(
*,
task_name: str,
n_envs: int,
camera_names: list[str],
observation_height: int,
observation_width: int,
episode_length: int,
) -> list[Callable[[], RoboTwinEnv]]:
"""Return n_envs factory callables for a single task."""
def _make_one(episode_index: int) -> RoboTwinEnv:
return RoboTwinEnv(
task_name=task_name,
episode_index=episode_index,
n_envs=n_envs,
camera_names=camera_names,
observation_height=observation_height,
observation_width=observation_width,
episode_length=episode_length,
)
return [partial(_make_one, i) for i in range(n_envs)]
def create_robotwin_envs(
task: str,
n_envs: int,
env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None,
camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES,
observation_height: int = 480,
observation_width: int = 640,
episode_length: int = DEFAULT_EPISODE_LENGTH,
) -> dict[str, dict[int, Any]]:
"""Create vectorized RoboTwin 2.0 environments.
Returns:
``dict[task_name][0] -> VectorEnv`` one entry per task, each wrapping
``n_envs`` parallel rollouts.
Args:
task: Comma-separated list of task names (e.g. ``"beat_block_hammer"``
or ``"beat_block_hammer,click_bell"``).
n_envs: Number of parallel rollouts per task.
env_cls: Vector env constructor (e.g. ``gym.vector.AsyncVectorEnv``).
camera_names: Cameras to include in observations.
observation_height: Pixel height for all cameras.
observation_width: Pixel width for all cameras.
episode_length: Max steps before truncation.
"""
if env_cls is None or not callable(env_cls):
raise ValueError("env_cls must be callable (e.g. gym.vector.AsyncVectorEnv).")
if not isinstance(n_envs, int) or n_envs <= 0:
raise ValueError(f"n_envs must be a positive int; got {n_envs}.")
task_names = [t.strip() for t in str(task).split(",") if t.strip()]
if not task_names:
raise ValueError("`task` must contain at least one RoboTwin task name.")
unknown = [t for t in task_names if t not in ROBOTWIN_TASKS]
if unknown:
raise ValueError(f"Unknown RoboTwin tasks: {unknown}. Available tasks: {sorted(ROBOTWIN_TASKS)}")
print(f"Creating RoboTwin envs | tasks={task_names} | n_envs(per task)={n_envs}")
out: dict[str, dict[int, Any]] = defaultdict(dict)
for task_name in task_names:
fns = _make_env_fns(
task_name=task_name,
n_envs=n_envs,
camera_names=list(camera_names),
observation_height=observation_height,
observation_width=observation_width,
episode_length=episode_length,
)
out[task_name][0] = env_cls(fns)
print(f"Built vec env | task={task_name} | n_envs={n_envs}")
return {k: dict(v) for k, v in out.items()}
+26
View File
@@ -226,3 +226,29 @@ class IsaaclabArenaProcessorStep(ObservationProcessorStep):
def observation(self, observation):
return self._process_observation(observation)
@dataclass
@ProcessorStepRegistry.register(name="robotwin_processor")
class RoboTwinProcessorStep(ObservationProcessorStep):
"""Passthrough step for RoboTwin observations, casting state to float32.
RoboTwin observations already arrive in LeRobot format (observation.images.*
and observation.state), so this step mainly ensures state dtype is float32.
"""
def _process_observation(self, observation):
processed_obs = dict(observation)
if OBS_STATE in processed_obs:
state = processed_obs[OBS_STATE]
if hasattr(state, "dtype") and state.dtype != torch.float32:
processed_obs[OBS_STATE] = state.float()
return processed_obs
def transform_features(
self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
return features
def observation(self, observation):
return self._process_observation(observation)
+283
View File
@@ -0,0 +1,283 @@
#!/usr/bin/env python
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unit tests for the RoboTwin 2.0 Gymnasium wrapper.
These tests mock out the SAPIEN-based RoboTwin runtime so they run without
the full RoboTwin installation (SAPIEN, CuRobo, mplib, etc.).
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import gymnasium as gym
import numpy as np
import pytest
import torch
from lerobot.envs.robotwin import (
ACTION_DIM,
ROBOTWIN_CAMERA_NAMES,
ROBOTWIN_TASKS,
RoboTwinEnv,
create_robotwin_envs,
)
# ---------------------------------------------------------------------------
# Fixtures / helpers
# ---------------------------------------------------------------------------
def _make_mock_task_env(
height: int = 480,
width: int = 640,
cameras: tuple[str, ...] = ROBOTWIN_CAMERA_NAMES,
) -> MagicMock:
"""Return a mock that mimics the RoboTwin task class API."""
obs_dict = {f"{cam}_rgb": np.zeros((height, width, 3), dtype=np.uint8) for cam in cameras}
obs_dict["joint_action"] = np.zeros(ACTION_DIM, dtype=np.float64)
mock = MagicMock()
mock.get_obs.return_value = obs_dict
mock.setup_demo.return_value = None
mock.take_action.return_value = None
mock.eval_success = False
mock.check_success.return_value = False
mock.close_env.return_value = None
return mock
def _patch_load(mock_task_instance: MagicMock):
"""Context manager that patches _load_robotwin_task to return a mock class."""
task_cls = MagicMock(return_value=mock_task_instance)
return patch("lerobot.envs.robotwin._load_robotwin_task", return_value=task_cls)
# ---------------------------------------------------------------------------
# RoboTwinEnv unit tests
# ---------------------------------------------------------------------------
class TestRoboTwinEnv:
def test_observation_space_shape(self):
"""observation_space should have the configured h×w×3 for every camera."""
h, w = 240, 320
env = RoboTwinEnv(
task_name="beat_block_hammer",
observation_height=h,
observation_width=w,
camera_names=["head_camera", "front_camera"],
)
pixels_space = env.observation_space["pixels"]
assert pixels_space["head_camera"].shape == (h, w, 3)
assert pixels_space["front_camera"].shape == (h, w, 3)
assert "left_wrist" not in pixels_space
def test_action_space(self):
env = RoboTwinEnv(task_name="beat_block_hammer")
assert env.action_space.shape == (ACTION_DIM,)
assert env.action_space.dtype == np.float32
def test_reset_returns_correct_obs_keys(self):
mock_task = _make_mock_task_env()
env = RoboTwinEnv(task_name="beat_block_hammer")
with _patch_load(mock_task):
obs, info = env.reset()
assert "pixels" in obs
for cam in ROBOTWIN_CAMERA_NAMES:
assert cam in obs["pixels"], f"Missing camera '{cam}' in obs"
assert "agent_pos" in obs
assert obs["agent_pos"].shape == (ACTION_DIM,)
assert info["is_success"] is False
def test_reset_calls_setup_demo(self):
mock_task = _make_mock_task_env()
env = RoboTwinEnv(task_name="beat_block_hammer")
with _patch_load(mock_task):
env.reset(seed=42)
mock_task.setup_demo.assert_called_once_with(seed=42, is_test=True)
def test_step_returns_correct_types(self):
mock_task = _make_mock_task_env()
env = RoboTwinEnv(task_name="beat_block_hammer")
action = np.zeros(ACTION_DIM, dtype=np.float32)
with _patch_load(mock_task):
env.reset()
obs, reward, terminated, truncated, info = env.step(action)
assert isinstance(obs, dict)
assert isinstance(reward, float)
assert isinstance(terminated, bool)
assert isinstance(truncated, bool)
assert isinstance(info, dict)
def test_step_wrong_action_shape_raises(self):
mock_task = _make_mock_task_env()
env = RoboTwinEnv(task_name="beat_block_hammer")
bad_action = np.zeros(7, dtype=np.float32) # wrong dim
with _patch_load(mock_task):
env.reset()
with pytest.raises(ValueError, match="Expected 1-D action"):
env.step(bad_action)
def test_success_terminates_episode(self):
mock_task = _make_mock_task_env()
mock_task.check_success.return_value = True # always succeeds
env = RoboTwinEnv(task_name="beat_block_hammer")
action = np.zeros(ACTION_DIM, dtype=np.float32)
with _patch_load(mock_task):
env.reset()
_, _, terminated, _, info = env.step(action)
assert terminated is True
assert info["is_success"] is True
def test_truncation_after_episode_length(self):
mock_task = _make_mock_task_env()
env = RoboTwinEnv(task_name="beat_block_hammer", episode_length=2)
action = np.zeros(ACTION_DIM, dtype=np.float32)
with _patch_load(mock_task):
env.reset()
env.step(action) # step 1
_, _, _, truncated, _ = env.step(action) # step 2 → truncated
assert truncated is True
def test_close_calls_close_env(self):
mock_task = _make_mock_task_env()
env = RoboTwinEnv(task_name="beat_block_hammer")
with _patch_load(mock_task):
env.reset()
env.close()
mock_task.close_env.assert_called_once_with(clearance=False)
def test_black_frame_for_missing_camera(self):
"""If a camera key is absent from get_obs(), a black frame is returned."""
mock_task = _make_mock_task_env(cameras=("head_camera",)) # only head_camera
env = RoboTwinEnv(
task_name="beat_block_hammer",
camera_names=["head_camera", "front_camera"],
observation_height=10,
observation_width=10,
)
with _patch_load(mock_task):
obs, _ = env.reset()
# front_camera is missing from mock → should be a black 10×10×3 array
assert obs["pixels"]["front_camera"].shape == (10, 10, 3)
assert obs["pixels"]["front_camera"].sum() == 0
def test_task_and_task_description_attributes(self):
env = RoboTwinEnv(task_name="beat_block_hammer")
assert env.task == "beat_block_hammer"
assert isinstance(env.task_description, str)
def test_deferred_init_env_is_none_before_reset(self):
env = RoboTwinEnv(task_name="beat_block_hammer")
assert env._env is None # noqa: SLF001 (testing internal state)
# ---------------------------------------------------------------------------
# create_robotwin_envs tests
# ---------------------------------------------------------------------------
class TestCreateRoboTwinEnvs:
def test_returns_correct_structure(self):
mock_task = _make_mock_task_env()
env_cls = gym.vector.SyncVectorEnv
with _patch_load(mock_task):
envs = create_robotwin_envs(
task="beat_block_hammer",
n_envs=1,
env_cls=env_cls,
)
assert "beat_block_hammer" in envs
assert 0 in envs["beat_block_hammer"]
assert isinstance(envs["beat_block_hammer"][0], gym.vector.SyncVectorEnv)
def test_multi_task(self):
mock_task = _make_mock_task_env()
env_cls = gym.vector.SyncVectorEnv
with _patch_load(mock_task):
envs = create_robotwin_envs(
task="beat_block_hammer,click_bell",
n_envs=1,
env_cls=env_cls,
)
assert set(envs.keys()) == {"beat_block_hammer", "click_bell"}
def test_unknown_task_raises(self):
with pytest.raises(ValueError, match="Unknown RoboTwin tasks"):
create_robotwin_envs(
task="not_a_real_task",
n_envs=1,
env_cls=gym.vector.SyncVectorEnv,
)
def test_invalid_n_envs_raises(self):
with pytest.raises(ValueError, match="n_envs must be a positive int"):
create_robotwin_envs(
task="beat_block_hammer",
n_envs=0,
env_cls=gym.vector.SyncVectorEnv,
)
# ---------------------------------------------------------------------------
# ROBOTWIN_TASKS list
# ---------------------------------------------------------------------------
def test_task_list_not_empty():
assert len(ROBOTWIN_TASKS) >= 60
def test_all_tasks_are_strings():
assert all(isinstance(t, str) and t for t in ROBOTWIN_TASKS)
def test_no_duplicate_tasks():
assert len(ROBOTWIN_TASKS) == len(set(ROBOTWIN_TASKS))
# ---------------------------------------------------------------------------
# RoboTwinProcessorStep
# ---------------------------------------------------------------------------
class TestRoboTwinProcessorStep:
def test_passes_through_images_and_state(self):
from lerobot.processor.env_processor import RoboTwinProcessorStep
from lerobot.utils.constants import OBS_IMAGES, OBS_STATE
step = RoboTwinProcessorStep()
obs = {
f"{OBS_IMAGES}.head_camera": torch.zeros(1, 3, 4, 4),
f"{OBS_IMAGES}.front_camera": torch.zeros(1, 3, 4, 4),
OBS_STATE: torch.zeros(1, 14),
}
result = step.observation(obs)
assert f"{OBS_IMAGES}.head_camera" in result
assert f"{OBS_IMAGES}.front_camera" in result
assert result[OBS_STATE].dtype == torch.float32
def test_state_cast_to_float32(self):
from lerobot.processor.env_processor import RoboTwinProcessorStep
from lerobot.utils.constants import OBS_STATE
step = RoboTwinProcessorStep()
obs = {OBS_STATE: torch.zeros(1, 14, dtype=torch.float64)}
result = step.observation(obs)
assert result[OBS_STATE].dtype == torch.float32