diff --git a/.github/workflows/benchmark_tests.yml b/.github/workflows/benchmark_tests.yml
index 558569c57..00806f990 100644
--- a/.github/workflows/benchmark_tests.yml
+++ b/.github/workflows/benchmark_tests.yml
@@ -118,7 +118,7 @@ jobs:
             bash -c "
               hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
               lerobot-eval \
-                --policy.path=pepijn223/smolvla_libero \
+                --policy.path=lerobot/smolvla_libero \
                 --env.type=libero \
                 --env.task=libero_spatial \
                 --eval.batch_size=1 \
@@ -147,7 +147,7 @@ jobs:
             --artifacts-dir /tmp/libero-artifacts \
             --env libero \
             --task libero_spatial \
-            --policy pepijn223/smolvla_libero
+            --policy lerobot/smolvla_libero
 
       - name: Upload Libero rollout video
         if: always()
@@ -270,7 +270,7 @@ jobs:
             bash -c "
               hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
               lerobot-eval \
-                --policy.path=pepijn223/smolvla_metaworld \
+                --policy.path=lerobot/smolvla_metaworld \
                 --env.type=metaworld \
                 --env.task=metaworld-push-v3 \
                 --eval.batch_size=1 \
@@ -299,7 +299,7 @@ jobs:
             --artifacts-dir /tmp/metaworld-artifacts \
             --env metaworld \
             --task metaworld-push-v3 \
-            --policy pepijn223/smolvla_metaworld
+            --policy lerobot/smolvla_metaworld
 
       - name: Upload MetaWorld rollout video
         if: always()
@@ -317,6 +317,115 @@ jobs:
           path: /tmp/metaworld-artifacts/metrics.json
           if-no-files-found: warn
 
+  # ── ROBOTWIN 2.0 ──────────────────────────────────────────────────────────
+  # Isolated image: full RoboTwin 2.0 stack — SAPIEN, mplib, CuRobo,
+  # pytorch3d, + simulation assets (~4 GB).
+  # Build takes ~20 min on first run; subsequent runs hit the layer cache.
+  # Requires an NVIDIA GPU runner with CUDA 12.1 drivers.
+  robotwin-integration-test:
+    name: RoboTwin 2.0 — build image + 1-episode eval
+    runs-on:
+      group: aws-g6-4xlarge-plus
+    env:
+      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
+      ROBOTWIN_POLICY: lerobot/smolvla_robotwin
+      ROBOTWIN_TASKS: beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch
+
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+          lfs: true
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
+        with:
+          cache-binary: false
+
+      - name: Login to Docker Hub
+        if: ${{ env.DOCKERHUB_USERNAME != '' }}
+        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
+        with:
+          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
+        env:
+          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
+
+      # Build the full-install image: SAPIEN, mplib, CuRobo, pytorch3d +
+      # simulation assets (~4 GB). Layer cache lives in the runner's local
+      # Docker daemon — reused across re-runs on the same machine.
+      - name: Build RoboTwin 2.0 benchmark image
+        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
+        with:
+          context: .
+          file: docker/Dockerfile.benchmark.robotwin
+          push: false
+          load: true
+          tags: lerobot-benchmark-robotwin:ci
+          cache-from: type=local,src=/tmp/.buildx-cache-robotwin
+          cache-to: type=local,dest=/tmp/.buildx-cache-robotwin,mode=max
+
+      - name: Run RoboTwin 2.0 smoke eval (10 tasks, 1 episode each)
+        if: env.HF_USER_TOKEN != ''
+        run: |
+          # Named container (no --rm) so we can docker cp artifacts out.
+          docker run --name robotwin-eval --gpus all \
+            --shm-size=4g \
+            -e HF_HOME=/tmp/hf \
+            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
+            -e ROBOTWIN_POLICY="${ROBOTWIN_POLICY}" \
+            -e ROBOTWIN_TASKS="${ROBOTWIN_TASKS}" \
+            lerobot-benchmark-robotwin:ci \
+            bash -c "
+              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
+              cd /opt/robotwin && lerobot-eval \
+                --policy.path=\"\$ROBOTWIN_POLICY\" \
+                --env.type=robotwin \
+                --env.task=\"\$ROBOTWIN_TASKS\" \
+                --eval.batch_size=1 \
+                --eval.n_episodes=1 \
+                --eval.use_async_envs=false \
+                --policy.device=cuda \
+                '--rename_map={\"observation.images.head_camera\": \"observation.images.camera1\", \"observation.images.left_camera\": \"observation.images.camera2\", \"observation.images.right_camera\": \"observation.images.camera3\"}' \
+                --output_dir=/tmp/eval-artifacts
+              python /lerobot/scripts/ci/extract_task_descriptions.py \
+                --env robotwin \
+                --task \"\$ROBOTWIN_TASKS\" \
+                --output /tmp/eval-artifacts/task_descriptions.json
+            "
+
+      - name: Copy RoboTwin artifacts from container
+        if: always()
+        run: |
+          mkdir -p /tmp/robotwin-artifacts
+          docker cp robotwin-eval:/tmp/eval-artifacts/. /tmp/robotwin-artifacts/ 2>/dev/null || true
+          docker rm -f robotwin-eval || true
+
+      - name: Parse RoboTwin eval metrics
+        if: always()
+        run: |
+          python3 scripts/ci/parse_eval_metrics.py \
+            --artifacts-dir /tmp/robotwin-artifacts \
+            --env robotwin \
+            --task "${ROBOTWIN_TASKS}" \
+            --policy "${ROBOTWIN_POLICY}"
+
+      - name: Upload RoboTwin rollout video
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: robotwin-rollout-video
+          path: /tmp/robotwin-artifacts/videos/
+          if-no-files-found: warn
+
+      - name: Upload RoboTwin eval metrics
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: robotwin-metrics
+          path: /tmp/robotwin-artifacts/metrics.json
+          if-no-files-found: warn
+
   # ── ROBOCASA365 ──────────────────────────────────────────────────────────
   # Isolated image: robocasa + robosuite installed manually as editable
   # clones (no `lerobot[robocasa]` extra — robocasa's setup.py pins
diff --git a/docker/Dockerfile.benchmark.robotwin b/docker/Dockerfile.benchmark.robotwin
new file mode 100644
index 000000000..423854c31
--- /dev/null
+++ b/docker/Dockerfile.benchmark.robotwin
@@ -0,0 +1,122 @@
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Benchmark image for RoboTwin 2.0 integration tests.
+# Extends the nightly GPU image with the RoboTwin simulator stack:
+#   sapien/mplib/pytorch3d + NVlabs CuRobo + embodiments.zip + objects.zip
+# (~3.96 GB of assets; background_texture.zip ~11 GB skipped for smoke eval).
+#
+# Build: docker build -f docker/Dockerfile.benchmark.robotwin -t lerobot-benchmark-robotwin .
+# Run:   docker run --gpus all --rm lerobot-benchmark-robotwin \
+#            lerobot-eval --env.type=robotwin --env.task=beat_block_hammer ...
+
+FROM huggingface/lerobot-gpu:latest
+
+ENV NVIDIA_DRIVER_CAPABILITIES=all \
+    VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/nvidia_icd.json \
+    ROBOTWIN_ROOT=/opt/robotwin
+
+# The nightly base is CUDA -base (no compiler, no Vulkan loader). CuRobo's
+# `pip install -e .` runs nvcc, and SAPIEN renders via Vulkan — add both.
+USER root
+# Pinned upstream SHA for reproducible benchmark runs. Bump when we need
+# an upstream fix; don't rely on `main` drift.
+ARG ROBOTWIN_SHA=0aeea2d669c0f8516f4d5785f0aa33ba812c14b4
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+         cuda-nvcc-12-4 cuda-cudart-dev-12-4 \
+         libvulkan1 vulkan-tools \
+    && mkdir -p /usr/share/vulkan/icd.d \
+    && echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libGLX_nvidia.so.0","api_version":"1.3.0"}}' \
+       > /usr/share/vulkan/icd.d/nvidia_icd.json \
+    && git clone https://github.com/RoboTwin-Platform/RoboTwin.git ${ROBOTWIN_ROOT} \
+    && git -C ${ROBOTWIN_ROOT} checkout ${ROBOTWIN_SHA} \
+    && chown -R user_lerobot:user_lerobot ${ROBOTWIN_ROOT} \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
+USER user_lerobot
+
+# RoboTwin runtime deps (av is already in the base via [av-dep]).
+RUN uv pip install --no-cache \
+        "sapien==3.0.0b1" "mplib==0.2.1" "transforms3d==0.4.2" "trimesh==4.4.3" \
+        "open3d==0.19.0" "imageio==2.34.2" termcolor zarr pydantic h5py
+
+# pytorch3d has no universal wheel; must be built from source (~10 min, cached).
+RUN uv pip install --no-cache --no-build-isolation \
+        "git+https://github.com/facebookresearch/pytorch3d.git@stable"
+
+# CuRobo — NVlabs motion generator; TORCH_CUDA_ARCH_LIST must be set or the
+# build aborts on an empty arch list. Pinned SHA for reproducibility.
+ARG CUROBO_SHA=ca941586c33b8482ed9c0e74d60f23efd64b516a
+RUN cd ${ROBOTWIN_ROOT}/envs \
+    && git clone https://github.com/NVlabs/curobo.git \
+    && git -C curobo checkout ${CUROBO_SHA} \
+    && cd curobo \
+    && TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9;9.0" \
+       uv pip install -e . --no-build-isolation --no-cache
+
+# Upstream patches (mirror RoboTwin's script/_install.sh).
+# These patches target the exact versions pinned above; re-check when upgrading.
+# mplib==0.2.1: drop a broken `or collide` clause in planner.py.
+#   Safe to remove once mplib > 0.2.1 ships with the fix upstream.
+# sapien==3.0.0b1: fix URDF loader encoding + .srdf extension check.
+#   Safe to remove once sapien > 3.0.0b1 ships with the fix upstream.
+RUN python - <<'EOF'
+import pathlib, re, site
+for d in site.getsitepackages():
+    p = pathlib.Path(d) / "mplib" / "planner.py"
+    if p.exists():
+        p.write_text(re.sub(r"\bor collide\b", "", p.read_text(), count=1))
+        print(f"mplib patch applied: {p}")
+    p = pathlib.Path(d) / "sapien" / "wrapper" / "urdf_loader.py"
+    if p.exists():
+        src = p.read_text().replace(
+            "with open(srdf_path) as f:", 'with open(srdf_path, encoding="utf-8") as f:'
+        ).replace('"srdf"', '".srdf"')
+        p.write_text(src)
+        print(f"sapien patch applied: {p}")
+EOF
+
+# Simulation assets from TianxingChen/RoboTwin2.0: embodiments (~220 MB) +
+# objects (~3.74 GB). background_texture (~11 GB) is intentionally skipped.
+# The dataset is public — no auth token needed.
+RUN python - <<'EOF'
+import os, pathlib, zipfile
+from huggingface_hub import hf_hub_download
+
+assets_dir = pathlib.Path(os.environ["ROBOTWIN_ROOT"]) / "assets"
+assets_dir.mkdir(parents=True, exist_ok=True)
+for fname in ("embodiments.zip", "objects.zip"):
+    local = hf_hub_download(
+        repo_id="TianxingChen/RoboTwin2.0",
+        repo_type="dataset",
+        filename=fname,
+        local_dir=str(assets_dir),
+    )
+    with zipfile.ZipFile(local, "r") as z:
+        z.extractall(str(assets_dir))
+    pathlib.Path(local).unlink()
+EOF
+
+WORKDIR ${ROBOTWIN_ROOT}
+RUN python script/update_embodiment_config_path.py
+
+ENV PYTHONPATH="${ROBOTWIN_ROOT}:${PYTHONPATH}"
+
+# Return to the lerobot source directory (set by base image) before overlaying.
+WORKDIR /lerobot
+
+# Overlay the PR's source code on top of the nightly image.
+COPY --chown=user_lerobot:user_lerobot . .
+
+CMD ["/bin/bash"]
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index b48bbf757..bb0dad1bf 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -79,6 +79,8 @@
     title: LIBERO
   - local: metaworld
     title: Meta-World
+  - local: robotwin
+    title: RoboTwin 2.0
   - local: robocasa
     title: RoboCasa365
   - local: envhub_isaaclab_arena
diff --git a/docs/source/robotwin.mdx b/docs/source/robotwin.mdx
new file mode 100644
index 000000000..ad1db766f
--- /dev/null
+++ b/docs/source/robotwin.mdx
@@ -0,0 +1,223 @@
+# RoboTwin 2.0
+
+RoboTwin 2.0 is a **large-scale dual-arm manipulation benchmark** built on the SAPIEN physics engine. It provides a standardized evaluation protocol for bimanual robotic policies across 50 tasks (as of upstream `main`) with strong domain randomization (clutter, lighting, background, tabletop height, and language instructions).
+
+- Paper: [RoboTwin 2.0: A Scalable Data Generator and Benchmark with Strong Domain Randomization for Robust Bimanual Robotic Manipulation](https://arxiv.org/abs/2506.18088)
+- GitHub: [RoboTwin-Platform/RoboTwin](https://github.com/RoboTwin-Platform/RoboTwin)
+- Leaderboard: [robotwin-platform.github.io/leaderboard](https://robotwin-platform.github.io/leaderboard)
+- Dataset: [lerobot/robotwin_unified](https://huggingface.co/datasets/lerobot/robotwin_unified)
+
+![RoboTwin 2.0 benchmark overview](https://www.aitntnews.com/pictures/2025/7/8/9a7f79cb-5ba9-11f0-8581-fa163e47d677.png)
+
+## Overview
+
+| Property      | Value                                                    |
+| ------------- | -------------------------------------------------------- |
+| Tasks         | 50 dual-arm manipulation tasks                           |
+| Robot         | Aloha-AgileX bimanual (14 DOF, 7 per arm)                |
+| Action space  | 14-dim joint-space, continuous in `[-1, 1]`              |
+| Cameras       | `head_camera`, `left_camera`, `right_camera`             |
+| Simulator     | SAPIEN (not MuJoCo)                                      |
+| Eval protocol | 100 episodes/task, 50 demo_clean demonstrations          |
+| Eval settings | **Easy** (`demo_clean`) and **Hard** (`demo_randomized`) |
+
+## Available tasks
+
+RoboTwin 2.0 ships 50 dual-arm manipulation tasks in its upstream `envs/` directory. The canonical list is the `ROBOTWIN_TASKS` tuple in `src/lerobot/envs/robotwin.py`, mirrored verbatim from the upstream repo. Example tasks:
+
+| Task                     | CLI name                 | Category          |
+| ------------------------ | ------------------------ | ----------------- |
+| Beat block with hammer   | `beat_block_hammer`      | Tool use          |
+| Click bell / alarm clock | `click_bell`             | Precision press   |
+| Stack blocks (2 / 3)     | `stack_blocks_two/three` | Stacking          |
+| Stack bowls (2 / 3)      | `stack_bowls_two/three`  | Stacking          |
+| Handover block / mic     | `handover_block`         | Bimanual coord.   |
+| Lift pot                 | `lift_pot`               | Bimanual lift     |
+| Shake bottle             | `shake_bottle`           | Continuous motion |
+| Turn switch              | `turn_switch`            | Articulated obj   |
+| Stamp seal               | `stamp_seal`             | Precision place   |
+| Scan object              | `scan_object`            | Mobile manip.     |
+
+Pass a comma-separated list to `--env.task` to run multiple tasks in a single eval sweep.
+
+<Tip warning={true}>
+  `open_laptop` is currently broken upstream (its `check_success()` uses
+  `self.arm_tag`, which is only set inside the scripted-expert `play_once()`
+  path and therefore unavailable during normal policy eval). Avoid it until the
+  upstream bug is fixed, or patch the task to default `self.arm_tag = "left"` in
+  `load_actors()`.
+</Tip>
+
+## Dataset
+
+The RoboTwin 2.0 dataset is available in **LeRobot v3.0 format** on the Hugging Face Hub:
+
+```
+lerobot/robotwin_unified
+```
+
+It contains over 100,000 pre-collected trajectories across all 50 tasks (79.6 GB, Apache 2.0 license). No format conversion is needed — it is already in the correct LeRobot v3.0 schema with video observations and action labels.
+
+You can load it directly with the HF Datasets library:
+
+```python
+from datasets import load_dataset
+
+ds = load_dataset("lerobot/robotwin_unified", split="train")
+```
+
+## Installation
+
+RoboTwin 2.0 requires **Linux** with an NVIDIA GPU (CUDA 12.1 recommended). Installation takes approximately 20 minutes.
+
+### 1. Create a conda environment
+
+```bash
+conda create -n robotwin python=3.10 -y
+conda activate robotwin
+```
+
+### 2. Install LeRobot
+
+```bash
+git clone https://github.com/huggingface/lerobot.git
+cd lerobot
+pip install -e "."
+```
+
+### 3. Install RoboTwin 2.0
+
+```bash
+git clone https://github.com/RoboTwin-Platform/RoboTwin.git
+cd RoboTwin
+bash script/_install.sh
+bash script/_download_assets.sh
+```
+
+The install script handles all Python dependencies including SAPIEN, CuRobo, mplib, and pytorch3d.
+
+<Tip warning={true}>
+If the automated install fails, install manually:
+
+```bash
+pip install -r requirements.txt
+pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable"
+cd envs && git clone https://github.com/NVlabs/curobo.git && cd curobo
+pip install -e . --no-build-isolation
+```
+
+Then apply the required mplib fix: in `mplib/planner.py` line 807, remove `or collide` from the conditional.
+
+</Tip>
+
+### 4. Add RoboTwin to PYTHONPATH
+
+The RoboTwin task modules must be importable by LeRobot. From within the `RoboTwin/` directory:
+
+```bash
+export PYTHONPATH="${PYTHONPATH}:$(pwd)"
+```
+
+Add this to your shell profile to make it permanent.
+
+## Evaluation
+
+### Standard evaluation (recommended)
+
+Evaluate a policy on a single task with the official protocol (100 episodes):
+
+```bash
+lerobot-eval \
+  --policy.path="your-hf-policy-id" \
+  --env.type=robotwin \
+  --env.task=beat_block_hammer \
+  --eval.batch_size=1 \
+  --eval.n_episodes=100
+```
+
+### Single-task quick check
+
+```bash
+lerobot-eval \
+  --policy.path="your-hf-policy-id" \
+  --env.type=robotwin \
+  --env.task=beat_block_hammer \
+  --eval.batch_size=1 \
+  --eval.n_episodes=5
+```
+
+### Multi-task sweep
+
+Evaluate on several tasks in one run:
+
+```bash
+lerobot-eval \
+  --policy.path="your-hf-policy-id" \
+  --env.type=robotwin \
+  --env.task=beat_block_hammer,click_bell,handover_block,stack_blocks_two \
+  --eval.batch_size=1 \
+  --eval.n_episodes=100
+```
+
+### Full benchmark (all 50 tasks)
+
+```bash
+lerobot-eval \
+  --policy.path="your-hf-policy-id" \
+  --env.type=robotwin \
+  --env.task=adjust_bottle,beat_block_hammer,blocks_ranking_rgb,blocks_ranking_size,click_alarmclock,click_bell,dump_bin_bigbin,grab_roller,handover_block,handover_mic,hanging_mug,lift_pot,move_can_pot,move_pillbottle_pad,move_playingcard_away,move_stapler_pad,open_microwave,pick_diverse_bottles,pick_dual_bottles,place_a2b_left,place_a2b_right,place_bread_basket,place_bread_skillet,place_burger_fries,place_can_basket,place_cans_plasticbox,place_container_plate,place_dual_shoes,place_empty_cup,place_fan,place_mouse_pad,place_object_basket,place_object_scale,place_object_stand,place_phone_stand,place_shoe,press_stapler,put_bottles_dustbin,put_object_cabinet,rotate_qrcode,scan_object,shake_bottle,shake_bottle_horizontally,stack_blocks_three,stack_blocks_two,stack_bowls_three,stack_bowls_two,stamp_seal,turn_switch \
+  --eval.batch_size=1 \
+  --eval.n_episodes=100
+```
+
+<Tip>
+  `open_laptop` is intentionally omitted above because of the upstream
+  `self.arm_tag` bug (see the **Available tasks** section). Re-add it once the
+  upstream fix lands.
+</Tip>
+
+## Camera configuration
+
+By default, all three cameras are included:
+
+| Camera key     | Description                    |
+| -------------- | ------------------------------ |
+| `head_camera`  | Torso-mounted overhead view    |
+| `left_camera`  | Left arm wrist-mounted camera  |
+| `right_camera` | Right arm wrist-mounted camera |
+
+To use a subset of cameras, override `--env.camera_names`:
+
+```bash
+lerobot-eval \
+  --policy.path="your-hf-policy-id" \
+  --env.type=robotwin \
+  --env.task=beat_block_hammer \
+  --env.camera_names="head_camera,left_camera" \
+  --eval.batch_size=1 \
+  --eval.n_episodes=10
+```
+
+## Environment config reference
+
+Key parameters for `RoboTwinEnvConfig`:
+
+| Parameter            | Default                                  | Description                        |
+| -------------------- | ---------------------------------------- | ---------------------------------- |
+| `task`               | `"beat_block_hammer"`                    | Comma-separated task name(s)       |
+| `fps`                | `25`                                     | Simulation FPS                     |
+| `episode_length`     | `300`                                    | Max steps per episode              |
+| `obs_type`           | `"pixels_agent_pos"`                     | `"pixels"` or `"pixels_agent_pos"` |
+| `camera_names`       | `"head_camera,left_camera,right_camera"` | Comma-separated active cameras     |
+| `observation_height` | `240`                                    | Camera pixel height                |
+| `observation_width`  | `320`                                    | Camera pixel width                 |
+
+## Leaderboard submission
+
+Results can be submitted to the [RoboTwin 2.0 leaderboard](https://robotwin-platform.github.io/leaderboard). The official protocol requires:
+
+- Training on 50 `demo_clean` demonstrations per task
+- Evaluating 100 episodes per task
+- Reporting success rate separately for **Easy** (`demo_clean`) and **Hard** (`demo_randomized`) settings
+
+For submission instructions, refer to the [RoboTwin 2.0 documentation](https://robotwin-platform.github.io/doc/).
diff --git a/scripts/ci/extract_task_descriptions.py b/scripts/ci/extract_task_descriptions.py
index 73ec92196..c9216e02d 100644
--- a/scripts/ci/extract_task_descriptions.py
+++ b/scripts/ci/extract_task_descriptions.py
@@ -57,6 +57,24 @@ def _metaworld_descriptions(task_name: str) -> dict[str, str]:
     return {f"{task_name}_0": label}
 
 
+def _robotwin_descriptions(task_names: str) -> dict[str, str]:
+    """Return descriptions for each requested RoboTwin task. Reads
+    `description/task_instruction/<task>.json` from the RoboTwin clone
+    (cwd is /opt/robotwin in CI). Falls back to the task name if missing."""
+    out: dict[str, str] = {}
+    root = Path("description/task_instruction")
+    for name in (t.strip() for t in task_names.split(",") if t.strip()):
+        desc_file = root / f"{name}.json"
+        desc = name.replace("_", " ")
+        if desc_file.is_file():
+            data = json.loads(desc_file.read_text())
+            full = data.get("full_description") or desc
+            # Strip the schema placeholders ({A}, {a}) — keep the sentence readable.
+            desc = full.replace("<", "").replace(">", "")
+        out[f"{name}_0"] = desc
+    return out
+
+
 def _robocasa_descriptions(task_spec: str) -> dict[str, str]:
     """For each task in the comma-separated list, emit a cleaned-name label.
 
@@ -87,6 +105,8 @@ def main() -> int:
             descriptions = _libero_descriptions(args.task)
         elif args.env == "metaworld":
             descriptions = _metaworld_descriptions(args.task)
+        elif args.env == "robotwin":
+            descriptions = _robotwin_descriptions(args.task)
         elif args.env == "robocasa":
             descriptions = _robocasa_descriptions(args.task)
         else:
diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py
index 9b5b3ab94..4f93a26a7 100644
--- a/src/lerobot/envs/configs.py
+++ b/src/lerobot/envs/configs.py
@@ -649,3 +649,90 @@ class IsaaclabArenaEnv(HubEnvConfig):
             ),
             PolicyProcessorPipeline(steps=[]),
         )
+
+
+@EnvConfig.register_subclass("robotwin")
+@dataclass
+class RoboTwinEnvConfig(EnvConfig):
+    """Configuration for RoboTwin 2.0 benchmark environments.
+
+    RoboTwin 2.0 is a dual-arm manipulation benchmark with 50 tasks built on the
+    SAPIEN simulator. The robot is an Aloha-AgileX bimanual platform with 14 DOF
+    (7 per arm). All three cameras are enabled by default.
+
+    See: https://robotwin-platform.github.io
+    Dataset: https://huggingface.co/datasets/lerobot/robotwin_unified
+    """
+
+    task: str = "beat_block_hammer"  # single task or comma-separated list
+    fps: int = 25
+    episode_length: int = 300
+    obs_type: str = "pixels_agent_pos"
+    render_mode: str = "rgb_array"
+    # Available cameras from RoboTwin's aloha-agilex embodiment: head_camera
+    # (torso-mounted) + left_camera / right_camera (wrists).
+    camera_names: str = "head_camera,left_camera,right_camera"
+    # Match the D435 dims in task_config/demo_clean.yml (_camera_config.yml).
+    # Gym's vector-env concatenate pre-allocates buffers of this shape, so it
+    # must equal what SAPIEN actually renders.
+    observation_height: int = 240
+    observation_width: int = 320
+    features: dict[str, PolicyFeature] = field(
+        default_factory=lambda: {
+            ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(14,)),
+        }
+    )
+    features_map: dict[str, str] = field(
+        default_factory=lambda: {
+            ACTION: ACTION,
+            "pixels/head_camera": f"{OBS_IMAGES}.head_camera",
+            "pixels/left_camera": f"{OBS_IMAGES}.left_camera",
+            "pixels/right_camera": f"{OBS_IMAGES}.right_camera",
+            "agent_pos": OBS_STATE,
+        }
+    )
+
+    def __post_init__(self):
+        cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()]
+        for cam in cam_list:
+            self.features[f"pixels/{cam}"] = PolicyFeature(
+                type=FeatureType.VISUAL,
+                shape=(self.observation_height, self.observation_width, 3),
+            )
+            # Keep features_map entry if already set (default_factory); add if missing.
+            key = f"pixels/{cam}"
+            if key not in self.features_map:
+                self.features_map[key] = f"{OBS_IMAGES}.{cam}"
+
+        if self.obs_type == "pixels_agent_pos":
+            self.features["agent_pos"] = PolicyFeature(
+                type=FeatureType.STATE,
+                shape=(14,),  # 14 DOF: 7 per arm
+            )
+        elif self.obs_type != "pixels":
+            raise ValueError(
+                f"Unsupported obs_type '{self.obs_type}'. "
+                "RoboTwinEnvConfig supports 'pixels' and 'pixels_agent_pos'."
+            )
+
+    @property
+    def gym_kwargs(self) -> dict:
+        return {}
+
+    def create_envs(self, n_envs: int, use_async_envs: bool = True):
+        from lerobot.envs.robotwin import create_robotwin_envs
+
+        if not self.task:
+            raise ValueError("RoboTwinEnvConfig requires `task` to be specified.")
+
+        env_cls = _make_vec_env_cls(use_async_envs, n_envs)
+        cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()]
+        return create_robotwin_envs(
+            task=self.task,
+            n_envs=n_envs,
+            env_cls=env_cls,
+            camera_names=cam_list,
+            observation_height=self.observation_height,
+            observation_width=self.observation_width,
+            episode_length=self.episode_length,
+        )
diff --git a/src/lerobot/envs/robotwin.py b/src/lerobot/envs/robotwin.py
new file mode 100644
index 000000000..823f14fa0
--- /dev/null
+++ b/src/lerobot/envs/robotwin.py
@@ -0,0 +1,488 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import importlib
+import logging
+from collections import defaultdict
+from collections.abc import Callable, Sequence
+from functools import partial
+from typing import Any
+
+import gymnasium as gym
+import numpy as np
+import torch
+from gymnasium import spaces
+
+from lerobot.types import RobotObservation
+
+from .utils import _LazyAsyncVectorEnv
+
+logger = logging.getLogger(__name__)
+
+# Camera names as used by RoboTwin 2.0. The wrapper appends "_rgb" when looking
+# up keys in get_obs() output (e.g. "head_camera" → "head_camera_rgb").
+ROBOTWIN_CAMERA_NAMES: tuple[str, ...] = (
+    "head_camera",
+    "left_camera",
+    "right_camera",
+)
+
+ACTION_DIM = 14  # 7 DOF × 2 arms
+ACTION_LOW = -1.0
+ACTION_HIGH = 1.0
+DEFAULT_EPISODE_LENGTH = 300
+# D435 dims from task_config/_camera_config.yml (what demo_clean.yml selects).
+DEFAULT_CAMERA_H = 240
+DEFAULT_CAMERA_W = 320
+
+# Task list from RoboTwin 2.0's `envs/` directory — mirrors upstream exactly
+# (50 tasks as of main; earlier revisions had 60 with a different split).
+# Keep this in sync with:
+#   gh api /repos/RoboTwin-Platform/RoboTwin/contents/envs --paginate \
+#     | jq -r '.[].name' | grep -E '\.py$' | grep -v '^_' | sed 's/\.py$//'
+ROBOTWIN_TASKS: tuple[str, ...] = (
+    "adjust_bottle",
+    "beat_block_hammer",
+    "blocks_ranking_rgb",
+    "blocks_ranking_size",
+    "click_alarmclock",
+    "click_bell",
+    "dump_bin_bigbin",
+    "grab_roller",
+    "handover_block",
+    "handover_mic",
+    "hanging_mug",
+    "lift_pot",
+    "move_can_pot",
+    "move_pillbottle_pad",
+    "move_playingcard_away",
+    "move_stapler_pad",
+    "open_laptop",
+    "open_microwave",
+    "pick_diverse_bottles",
+    "pick_dual_bottles",
+    "place_a2b_left",
+    "place_a2b_right",
+    "place_bread_basket",
+    "place_bread_skillet",
+    "place_burger_fries",
+    "place_can_basket",
+    "place_cans_plasticbox",
+    "place_container_plate",
+    "place_dual_shoes",
+    "place_empty_cup",
+    "place_fan",
+    "place_mouse_pad",
+    "place_object_basket",
+    "place_object_scale",
+    "place_object_stand",
+    "place_phone_stand",
+    "place_shoe",
+    "press_stapler",
+    "put_bottles_dustbin",
+    "put_object_cabinet",
+    "rotate_qrcode",
+    "scan_object",
+    "shake_bottle",
+    "shake_bottle_horizontally",
+    "stack_blocks_three",
+    "stack_blocks_two",
+    "stack_bowls_three",
+    "stack_bowls_two",
+    "stamp_seal",
+    "turn_switch",
+)
+
+
+_ROBOTWIN_SETUP_CACHE: dict[str, dict[str, Any]] = {}
+
+
+def _load_robotwin_setup_kwargs(task_name: str) -> dict[str, Any]:
+    """Build the kwargs dict RoboTwin's setup_demo expects.
+
+    Mirrors the config loading done by RoboTwin's ``script/eval_policy.py``:
+    reads ``task_config/demo_clean.yml``, resolves the embodiment file from
+    ``_embodiment_config.yml``, loads the robot's own ``config.yml``, and
+    reads camera dimensions from ``_camera_config.yml``.
+
+    Uses ``aloha-agilex`` single-robot dual-arm by default (the only embodiment
+    used by beat_block_hammer and most smoke-test tasks).
+    """
+    if task_name in _ROBOTWIN_SETUP_CACHE:
+        return dict(_ROBOTWIN_SETUP_CACHE[task_name])
+
+    import os
+
+    import yaml  # type: ignore[import-untyped]
+    from envs import CONFIGS_PATH  # type: ignore[import-not-found]
+
+    task_config = "demo_clean"
+    with open(os.path.join(CONFIGS_PATH, f"{task_config}.yml"), encoding="utf-8") as f:
+        args = yaml.safe_load(f)
+
+    # Resolve embodiment — demo_clean.yml uses [aloha-agilex] (dual-arm single robot)
+    with open(os.path.join(CONFIGS_PATH, "_embodiment_config.yml"), encoding="utf-8") as f:
+        embodiment_types = yaml.safe_load(f)
+    embodiment = args.get("embodiment", ["aloha-agilex"])
+    if len(embodiment) == 1:
+        robot_file = embodiment_types[embodiment[0]]["file_path"]
+        args["left_robot_file"] = robot_file
+        args["right_robot_file"] = robot_file
+        args["dual_arm_embodied"] = True
+    elif len(embodiment) == 3:
+        args["left_robot_file"] = embodiment_types[embodiment[0]]["file_path"]
+        args["right_robot_file"] = embodiment_types[embodiment[1]]["file_path"]
+        args["embodiment_dis"] = embodiment[2]
+        args["dual_arm_embodied"] = False
+    else:
+        raise ValueError(f"embodiment must have 1 or 3 items, got {len(embodiment)}")
+
+    with open(os.path.join(args["left_robot_file"], "config.yml"), encoding="utf-8") as f:
+        args["left_embodiment_config"] = yaml.safe_load(f)
+    with open(os.path.join(args["right_robot_file"], "config.yml"), encoding="utf-8") as f:
+        args["right_embodiment_config"] = yaml.safe_load(f)
+
+    # Camera dimensions
+    with open(os.path.join(CONFIGS_PATH, "_camera_config.yml"), encoding="utf-8") as f:
+        camera_config = yaml.safe_load(f)
+    head_cam = args["camera"]["head_camera_type"]
+    args["head_camera_h"] = camera_config[head_cam]["h"]
+    args["head_camera_w"] = camera_config[head_cam]["w"]
+
+    # Headless overrides
+    args["render_freq"] = 0
+    args["task_name"] = task_name
+    args["task_config"] = task_config
+
+    _ROBOTWIN_SETUP_CACHE[task_name] = args
+    return dict(args)
+
+
+def _load_robotwin_task(task_name: str) -> type:
+    """Dynamically import and return a RoboTwin 2.0 task class.
+
+    RoboTwin tasks live in ``envs/<task_name>.py`` relative to the repository
+    root and are expected to be on ``sys.path`` after installation.
+    """
+    try:
+        module = importlib.import_module(f"envs.{task_name}")
+    except ModuleNotFoundError as e:
+        raise ModuleNotFoundError(
+            f"Could not import RoboTwin task '{task_name}'. "
+            "Ensure RoboTwin 2.0 is installed and its 'envs/' directory is on PYTHONPATH. "
+            "See the RoboTwin installation guide: https://robotwin-platform.github.io/doc/usage/robotwin-install.html"
+        ) from e
+    task_cls = getattr(module, task_name, None)
+    if task_cls is None:
+        raise AttributeError(f"Task class '{task_name}' not found in envs/{task_name}.py")
+    return task_cls
+
+
+class RoboTwinEnv(gym.Env):
+    """Gymnasium wrapper around a single RoboTwin 2.0 task.
+
+    RoboTwin uses a custom SAPIEN-based API (``setup_demo`` / ``get_obs`` /
+    ``take_action`` / ``check_success``) rather than the standard gym interface.
+    This class bridges that API to Gymnasium so that ``lerobot-eval`` can drive
+    RoboTwin exactly like LIBERO or Meta-World.
+
+    The underlying SAPIEN environment is created lazily on the first ``reset()``
+    call *inside the worker process*.  This is required for
+    ``gym.vector.AsyncVectorEnv`` compatibility: SAPIEN allocates EGL/GPU
+    contexts that must not be forked from the parent process.
+
+    Observations
+    ------------
+    The ``pixels`` dict uses the raw RoboTwin camera names as keys (e.g.
+    ``"head_camera"``, ``"left_camera"``). ``preprocess_observation`` in
+    ``envs/utils.py`` then converts these to ``observation.images.<cam>``.
+
+    Actions
+    -------
+    14-dim float32 array in ``[-1, 1]`` (joint-space, 7 DOF per arm).
+
+    Autograd
+    --------
+    ``setup_demo`` and ``take_action`` drive CuRobo's Newton trajectory
+    optimizer, which calls ``cost.backward()`` internally. lerobot_eval wraps
+    the rollout in ``torch.no_grad()``, so both call sites re-enable grad.
+    """
+
+    metadata = {"render_modes": ["rgb_array"], "render_fps": 25}
+
+    def __init__(
+        self,
+        task_name: str,
+        episode_index: int = 0,
+        n_envs: int = 1,
+        camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES,
+        observation_height: int | None = None,
+        observation_width: int | None = None,
+        episode_length: int = DEFAULT_EPISODE_LENGTH,
+        render_mode: str = "rgb_array",
+    ):
+        super().__init__()
+        self.task_name = task_name
+        self.task = task_name  # used by add_envs_task() in utils.py
+        self.task_description = task_name.replace("_", " ")
+        self.episode_index = episode_index
+        self._reset_stride = n_envs
+        self.camera_names = list(camera_names)
+        # Default to D435 dims (the camera type baked into task_config/demo_clean.yml).
+        # The YAML-driven lookup is deferred to reset() so construction doesn't
+        # import RoboTwin's `envs` module — fast-tests run without RoboTwin installed.
+        self.observation_height = observation_height or DEFAULT_CAMERA_H
+        self.observation_width = observation_width or DEFAULT_CAMERA_W
+        self.episode_length = episode_length
+        self._max_episode_steps = episode_length  # lerobot_eval.rollout reads this
+        self.render_mode = render_mode
+
+        self._env: Any | None = None  # deferred — created on first reset() inside worker
+        self._step_count: int = 0
+        self._black_frame = np.zeros((self.observation_height, self.observation_width, 3), dtype=np.uint8)
+
+        image_spaces = {
+            cam: spaces.Box(
+                low=0,
+                high=255,
+                shape=(self.observation_height, self.observation_width, 3),
+                dtype=np.uint8,
+            )
+            for cam in self.camera_names
+        }
+        self.observation_space = spaces.Dict(
+            {
+                "pixels": spaces.Dict(image_spaces),
+                "agent_pos": spaces.Box(low=-np.inf, high=np.inf, shape=(ACTION_DIM,), dtype=np.float32),
+            }
+        )
+        self.action_space = spaces.Box(
+            low=ACTION_LOW, high=ACTION_HIGH, shape=(ACTION_DIM,), dtype=np.float32
+        )
+
+    def _ensure_env(self) -> None:
+        """Create the SAPIEN environment on first use.
+
+        Called inside the worker subprocess after fork(), so each worker gets
+        its own EGL/GPU context rather than inheriting a stale one from the
+        parent process (which causes crashes with AsyncVectorEnv).
+        """
+        if self._env is not None:
+            return
+        task_cls = _load_robotwin_task(self.task_name)
+        self._env = task_cls()
+
+    def _get_obs(self) -> RobotObservation:
+        assert self._env is not None, "_get_obs called before _ensure_env()"
+        raw = self._env.get_obs()
+        cameras_raw = raw.get("observation", {})
+
+        images: dict[str, np.ndarray] = {}
+        for cam in self.camera_names:
+            cam_data = cameras_raw.get(cam)
+            img = cam_data.get("rgb") if cam_data else None
+            if img is None:
+                images[cam] = self._black_frame
+                continue
+            img = np.asarray(img, dtype=np.uint8)
+            if img.ndim == 2:
+                img = np.stack([img, img, img], axis=-1)
+            elif img.shape[-1] != 3:
+                img = img[..., :3]
+            images[cam] = img
+
+        ja = raw.get("joint_action") or {}
+        vec = ja.get("vector")
+        if vec is not None:
+            arr = np.asarray(vec, dtype=np.float32).ravel()
+            joint_state = (
+                arr[:ACTION_DIM] if arr.size >= ACTION_DIM else np.zeros(ACTION_DIM, dtype=np.float32)
+            )
+        else:
+            joint_state = np.zeros(ACTION_DIM, dtype=np.float32)
+
+        return {"pixels": images, "agent_pos": joint_state}
+
+    def reset(self, seed: int | None = None, **kwargs) -> tuple[RobotObservation, dict]:
+        self._ensure_env()
+        super().reset(seed=seed)
+        assert self._env is not None  # set by _ensure_env() above
+
+        actual_seed = self.episode_index if seed is None else seed
+        setup_kwargs = _load_robotwin_setup_kwargs(self.task_name)
+        setup_kwargs.update(seed=actual_seed, is_test=True)
+        with torch.enable_grad():
+            self._env.setup_demo(**setup_kwargs)
+        self.episode_index += self._reset_stride
+        self._step_count = 0
+
+        obs = self._get_obs()
+        return obs, {"is_success": False, "task": self.task_name}
+
+    def step(self, action: np.ndarray) -> tuple[RobotObservation, float, bool, bool, dict[str, Any]]:
+        assert self._env is not None, "step() called before reset()"
+        if action.ndim != 1 or action.shape[0] != ACTION_DIM:
+            raise ValueError(f"Expected 1-D action of shape ({ACTION_DIM},), got {action.shape}")
+
+        with torch.enable_grad():
+            if hasattr(self._env, "take_action"):
+                self._env.take_action(action)
+            else:
+                self._env.step(action)
+
+        self._step_count += 1
+
+        is_success = bool(getattr(self._env, "eval_success", False))
+        if not is_success and hasattr(self._env, "check_success"):
+            is_success = bool(self._env.check_success())
+
+        obs = self._get_obs()
+        reward = float(is_success)
+        terminated = is_success
+        truncated = self._step_count >= self.episode_length
+
+        info: dict[str, Any] = {
+            "task": self.task_name,
+            "is_success": is_success,
+            "step": self._step_count,
+        }
+        if terminated or truncated:
+            info["final_info"] = {
+                "task": self.task_name,
+                "is_success": is_success,
+            }
+            self.reset()
+
+        return obs, reward, terminated, truncated, info
+
+    def render(self) -> np.ndarray:
+        self._ensure_env()
+        obs = self._get_obs()
+        # Prefer head camera for rendering; fall back to first available.
+        if "head_camera" in obs["pixels"]:
+            return obs["pixels"]["head_camera"]
+        return next(iter(obs["pixels"].values()))
+
+    def close(self) -> None:
+        if self._env is not None:
+            if hasattr(self._env, "close_env"):
+                import contextlib
+
+                with contextlib.suppress(TypeError):
+                    self._env.close_env()
+            self._env = None
+
+
+# ---- Multi-task factory --------------------------------------------------------
+
+
+def _make_env_fns(
+    *,
+    task_name: str,
+    n_envs: int,
+    camera_names: list[str],
+    observation_height: int,
+    observation_width: int,
+    episode_length: int,
+) -> list[Callable[[], RoboTwinEnv]]:
+    """Return n_envs factory callables for a single task."""
+
+    def _make_one(episode_index: int) -> RoboTwinEnv:
+        return RoboTwinEnv(
+            task_name=task_name,
+            episode_index=episode_index,
+            n_envs=n_envs,
+            camera_names=camera_names,
+            observation_height=observation_height,
+            observation_width=observation_width,
+            episode_length=episode_length,
+        )
+
+    return [partial(_make_one, i) for i in range(n_envs)]
+
+
+def create_robotwin_envs(
+    task: str,
+    n_envs: int,
+    env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None,
+    camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES,
+    observation_height: int = DEFAULT_CAMERA_H,
+    observation_width: int = DEFAULT_CAMERA_W,
+    episode_length: int = DEFAULT_EPISODE_LENGTH,
+) -> dict[str, dict[int, Any]]:
+    """Create vectorized RoboTwin 2.0 environments.
+
+    Returns:
+        ``dict[task_name][0] -> VectorEnv`` — one entry per task, each wrapping
+        ``n_envs`` parallel rollouts.
+
+    Args:
+        task: Comma-separated list of task names (e.g. ``"beat_block_hammer"``
+            or ``"beat_block_hammer,click_bell"``).
+        n_envs: Number of parallel rollouts per task.
+        env_cls: Vector env constructor (e.g. ``gym.vector.AsyncVectorEnv``).
+        camera_names: Cameras to include in observations.
+        observation_height: Pixel height for all cameras.
+        observation_width: Pixel width for all cameras.
+        episode_length: Max steps before truncation.
+    """
+    if env_cls is None or not callable(env_cls):
+        raise ValueError("env_cls must be callable (e.g. gym.vector.AsyncVectorEnv).")
+    if not isinstance(n_envs, int) or n_envs <= 0:
+        raise ValueError(f"n_envs must be a positive int; got {n_envs}.")
+
+    task_names = [t.strip() for t in str(task).split(",") if t.strip()]
+    if not task_names:
+        raise ValueError("`task` must contain at least one RoboTwin task name.")
+
+    unknown = [t for t in task_names if t not in ROBOTWIN_TASKS]
+    if unknown:
+        raise ValueError(f"Unknown RoboTwin tasks: {unknown}. Available tasks: {sorted(ROBOTWIN_TASKS)}")
+
+    logger.info(
+        "Creating RoboTwin envs | tasks=%s | n_envs(per task)=%d",
+        task_names,
+        n_envs,
+    )
+
+    is_async = env_cls is gym.vector.AsyncVectorEnv
+    cached_obs_space: spaces.Space | None = None
+    cached_act_space: spaces.Space | None = None
+    cached_metadata: dict[str, Any] | None = None
+
+    out: dict[str, dict[int, Any]] = defaultdict(dict)
+    for task_name in task_names:
+        fns = _make_env_fns(
+            task_name=task_name,
+            n_envs=n_envs,
+            camera_names=list(camera_names),
+            observation_height=observation_height,
+            observation_width=observation_width,
+            episode_length=episode_length,
+        )
+        if is_async:
+            lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space, cached_metadata)
+            if cached_obs_space is None:
+                cached_obs_space = lazy.observation_space
+                cached_act_space = lazy.action_space
+                cached_metadata = lazy.metadata
+            out[task_name][0] = lazy
+        else:
+            out[task_name][0] = env_cls(fns)
+        logger.info("Built vec env | task=%s | n_envs=%d", task_name, n_envs)
+
+    return {k: dict(v) for k, v in out.items()}
diff --git a/tests/envs/test_robotwin.py b/tests/envs/test_robotwin.py
new file mode 100644
index 000000000..fcd45adbf
--- /dev/null
+++ b/tests/envs/test_robotwin.py
@@ -0,0 +1,282 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Unit tests for the RoboTwin 2.0 Gymnasium wrapper.
+
+These tests mock out the SAPIEN-based RoboTwin runtime (task modules +
+YAML config loader) so they run without the full RoboTwin installation
+(SAPIEN, CuRobo, mplib, asset downloads, etc.).
+"""
+
+from __future__ import annotations
+
+from contextlib import contextmanager
+from unittest.mock import MagicMock, patch
+
+import gymnasium as gym
+import numpy as np
+import pytest
+
+from lerobot.envs.robotwin import (
+    ACTION_DIM,
+    ROBOTWIN_CAMERA_NAMES,
+    ROBOTWIN_TASKS,
+    RoboTwinEnv,
+    create_robotwin_envs,
+)
+
+# ---------------------------------------------------------------------------
+# Fixtures / helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_mock_task_env(
+    height: int = 240,
+    width: int = 320,
+    cameras: tuple[str, ...] = ROBOTWIN_CAMERA_NAMES,
+) -> MagicMock:
+    """Return a mock that mimics the RoboTwin task class API.
+
+    RoboTwin's real get_obs returns
+        {"observation": {cam: {"rgb": img}}, "joint_action": {"vector": np.ndarray}, ...}
+    so the mock follows the same nested shape.
+    """
+    obs_dict = {
+        "observation": {cam: {"rgb": np.zeros((height, width, 3), dtype=np.uint8)} for cam in cameras},
+        "joint_action": {"vector": np.zeros(ACTION_DIM, dtype=np.float32)},
+        "endpose": {},
+    }
+
+    mock = MagicMock()
+    mock.get_obs.return_value = obs_dict
+    mock.setup_demo.return_value = None
+    mock.take_action.return_value = None
+    mock.eval_success = False
+    mock.check_success.return_value = False
+    mock.close_env.return_value = None
+    return mock
+
+
+@contextmanager
+def _patch_runtime(mock_task_instance: MagicMock):
+    """Patch both the task-class loader and the YAML config loader so the
+    env can construct + reset without a real RoboTwin install."""
+    task_cls = MagicMock(return_value=mock_task_instance)
+    fake_setup = {
+        "head_camera_h": 240,
+        "head_camera_w": 320,
+        "left_embodiment_config": {},
+        "right_embodiment_config": {},
+        "left_robot_file": "",
+        "right_robot_file": "",
+        "dual_arm_embodied": True,
+        "render_freq": 0,
+        "task_name": "beat_block_hammer",
+        "task_config": "demo_clean",
+    }
+    with (
+        patch("lerobot.envs.robotwin._load_robotwin_task", return_value=task_cls),
+        patch("lerobot.envs.robotwin._load_robotwin_setup_kwargs", return_value=fake_setup),
+    ):
+        yield
+
+
+# ---------------------------------------------------------------------------
+# RoboTwinEnv unit tests
+# ---------------------------------------------------------------------------
+
+
+class TestRoboTwinEnv:
+    def test_observation_space_shape(self):
+        """observation_space should have the configured h×w×3 for every camera."""
+        h, w = 240, 320
+        env = RoboTwinEnv(
+            task_name="beat_block_hammer",
+            observation_height=h,
+            observation_width=w,
+            camera_names=["head_camera", "left_camera"],
+        )
+        pixels_space = env.observation_space["pixels"]
+        assert pixels_space["head_camera"].shape == (h, w, 3)
+        assert pixels_space["left_camera"].shape == (h, w, 3)
+        assert "right_camera" not in pixels_space
+
+    def test_action_space(self):
+        env = RoboTwinEnv(task_name="beat_block_hammer")
+        assert env.action_space.shape == (ACTION_DIM,)
+        assert env.action_space.dtype == np.float32
+
+    def test_reset_returns_correct_obs_keys(self):
+        mock_task = _make_mock_task_env()
+        env = RoboTwinEnv(task_name="beat_block_hammer")
+        with _patch_runtime(mock_task):
+            obs, info = env.reset()
+
+        assert "pixels" in obs
+        for cam in ROBOTWIN_CAMERA_NAMES:
+            assert cam in obs["pixels"], f"Missing camera '{cam}' in obs"
+        assert "agent_pos" in obs
+        assert obs["agent_pos"].shape == (ACTION_DIM,)
+        assert info["is_success"] is False
+
+    def test_reset_calls_setup_demo(self):
+        mock_task = _make_mock_task_env()
+        env = RoboTwinEnv(task_name="beat_block_hammer")
+        with _patch_runtime(mock_task):
+            env.reset(seed=42)
+        # setup_demo receives the full YAML-derived kwargs plus seed + is_test;
+        # we only assert the caller-provided bits.
+        assert mock_task.setup_demo.call_count == 1
+        call_kwargs = mock_task.setup_demo.call_args.kwargs
+        assert call_kwargs["seed"] == 42
+        assert call_kwargs["is_test"] is True
+
+    def test_step_returns_correct_types(self):
+        mock_task = _make_mock_task_env()
+        env = RoboTwinEnv(task_name="beat_block_hammer")
+        action = np.zeros(ACTION_DIM, dtype=np.float32)
+        with _patch_runtime(mock_task):
+            env.reset()
+            obs, reward, terminated, truncated, info = env.step(action)
+
+        assert isinstance(obs, dict)
+        assert isinstance(reward, float)
+        assert isinstance(terminated, bool)
+        assert isinstance(truncated, bool)
+        assert isinstance(info, dict)
+
+    def test_step_wrong_action_shape_raises(self):
+        mock_task = _make_mock_task_env()
+        env = RoboTwinEnv(task_name="beat_block_hammer")
+        bad_action = np.zeros(7, dtype=np.float32)  # wrong dim
+        with _patch_runtime(mock_task):
+            env.reset()
+            with pytest.raises(ValueError, match="Expected 1-D action"):
+                env.step(bad_action)
+
+    def test_success_terminates_episode(self):
+        mock_task = _make_mock_task_env()
+        mock_task.check_success.return_value = True
+        env = RoboTwinEnv(task_name="beat_block_hammer")
+        action = np.zeros(ACTION_DIM, dtype=np.float32)
+        with _patch_runtime(mock_task):
+            env.reset()
+            _, _, terminated, _, info = env.step(action)
+        assert terminated is True
+        assert info["is_success"] is True
+
+    def test_truncation_after_episode_length(self):
+        mock_task = _make_mock_task_env()
+        env = RoboTwinEnv(task_name="beat_block_hammer", episode_length=2)
+        action = np.zeros(ACTION_DIM, dtype=np.float32)
+        with _patch_runtime(mock_task):
+            env.reset()
+            env.step(action)  # step 1
+            _, _, _, truncated, _ = env.step(action)  # step 2 → truncated
+        assert truncated is True
+
+    def test_close_calls_close_env(self):
+        mock_task = _make_mock_task_env()
+        env = RoboTwinEnv(task_name="beat_block_hammer")
+        with _patch_runtime(mock_task):
+            env.reset()
+            env.close()
+        mock_task.close_env.assert_called_once()
+
+    def test_black_frame_for_missing_camera(self):
+        """If a camera key is absent from get_obs(), a black frame is returned."""
+        # Mock exposes only head_camera; we ask for both head_camera + left_camera.
+        mock_task = _make_mock_task_env(height=10, width=10, cameras=("head_camera",))
+        env = RoboTwinEnv(
+            task_name="beat_block_hammer",
+            camera_names=["head_camera", "left_camera"],
+            observation_height=10,
+            observation_width=10,
+        )
+        with _patch_runtime(mock_task):
+            obs, _ = env.reset()
+        assert obs["pixels"]["left_camera"].shape == (10, 10, 3)
+        assert obs["pixels"]["left_camera"].sum() == 0
+
+    def test_task_and_task_description_attributes(self):
+        env = RoboTwinEnv(task_name="beat_block_hammer")
+        assert env.task == "beat_block_hammer"
+        assert isinstance(env.task_description, str)
+
+    def test_deferred_init_env_is_none_before_reset(self):
+        env = RoboTwinEnv(task_name="beat_block_hammer")
+        assert env._env is None  # noqa: SLF001  (testing internal state)
+
+
+# ---------------------------------------------------------------------------
+# create_robotwin_envs tests
+# ---------------------------------------------------------------------------
+
+
+class TestCreateRoboTwinEnvs:
+    def test_returns_correct_structure(self):
+        mock_task = _make_mock_task_env()
+        with _patch_runtime(mock_task):
+            envs = create_robotwin_envs(
+                task="beat_block_hammer",
+                n_envs=1,
+                env_cls=gym.vector.SyncVectorEnv,
+            )
+        assert "beat_block_hammer" in envs
+        assert 0 in envs["beat_block_hammer"]
+        assert isinstance(envs["beat_block_hammer"][0], gym.vector.SyncVectorEnv)
+
+    def test_multi_task(self):
+        mock_task = _make_mock_task_env()
+        with _patch_runtime(mock_task):
+            envs = create_robotwin_envs(
+                task="beat_block_hammer,click_bell",
+                n_envs=1,
+                env_cls=gym.vector.SyncVectorEnv,
+            )
+        assert set(envs.keys()) == {"beat_block_hammer", "click_bell"}
+
+    def test_unknown_task_raises(self):
+        with pytest.raises(ValueError, match="Unknown RoboTwin tasks"):
+            create_robotwin_envs(
+                task="not_a_real_task",
+                n_envs=1,
+                env_cls=gym.vector.SyncVectorEnv,
+            )
+
+    def test_invalid_n_envs_raises(self):
+        with pytest.raises(ValueError, match="n_envs must be a positive int"):
+            create_robotwin_envs(
+                task="beat_block_hammer",
+                n_envs=0,
+                env_cls=gym.vector.SyncVectorEnv,
+            )
+
+
+# ---------------------------------------------------------------------------
+# ROBOTWIN_TASKS list
+# ---------------------------------------------------------------------------
+
+
+def test_task_list_not_empty():
+    assert len(ROBOTWIN_TASKS) >= 50
+
+
+def test_all_tasks_are_strings():
+    assert all(isinstance(t, str) and t for t in ROBOTWIN_TASKS)
+
+
+def test_no_duplicate_tasks():
+    assert len(ROBOTWIN_TASKS) == len(set(ROBOTWIN_TASKS))