diff --git a/docker/Dockerfile.benchmark.robotwin b/docker/Dockerfile.benchmark.robotwin index 7d5b4eca5..936fa2379 100644 --- a/docker/Dockerfile.benchmark.robotwin +++ b/docker/Dockerfile.benchmark.robotwin @@ -12,188 +12,99 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Isolated benchmark image for RoboTwin 2.0 integration tests. -# Full installation: SAPIEN, mplib, CuRobo, pytorch3d + simulation assets. -# -# RoboTwin install guide: https://robotwin-platform.github.io/doc/usage/robotwin-install.html -# Assets from: https://huggingface.co/datasets/TianxingChen/RoboTwin2.0 -# - embodiments.zip ~220 MB -# - objects.zip ~3.74 GB -# (background_texture.zip ~11 GB is skipped — not required for a smoke eval) +# Benchmark image for RoboTwin 2.0 integration tests. +# Extends the nightly GPU image with the RoboTwin simulator stack: +# sapien/mplib/pytorch3d + NVlabs CuRobo + embodiments.zip + objects.zip +# (~3.96 GB of assets; background_texture.zip ~11 GB skipped for smoke eval). # # Build: docker build -f docker/Dockerfile.benchmark.robotwin -t lerobot-benchmark-robotwin . # Run: docker run --gpus all --rm lerobot-benchmark-robotwin \ # lerobot-eval --env.type=robotwin --env.task=beat_block_hammer ... -# RoboTwin requires CUDA devel image for CuRobo compilation (nvcc needed). -# Python 3.12 required by lerobot (type alias syntax). open3d 0.19.0 has cp312 wheels. -ARG CUDA_VERSION=12.1.1 -ARG OS_VERSION=22.04 -FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${OS_VERSION} +FROM huggingface/lerobot-gpu:latest -ARG PYTHON_VERSION=3.12 - -ENV DEBIAN_FRONTEND=noninteractive \ - PATH=/lerobot/.venv/bin:$PATH \ - CUDA_VISIBLE_DEVICES=0 \ - DEVICE=cuda \ - # NVIDIA Container Toolkit: expose all driver capabilities (includes Vulkan) - NVIDIA_DRIVER_CAPABILITIES=all \ - # SAPIEN uses Vulkan; point at the NVIDIA ICD we create below +ENV NVIDIA_DRIVER_CAPABILITIES=all \ VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/nvidia_icd.json \ - # RoboTwin task modules live in /envs/; add to PYTHONPATH after clone. ROBOTWIN_ROOT=/opt/robotwin -# System deps — extended with cmake/ninja for CuRobo and pytorch3d compilation. -RUN apt-get update && apt-get install -y --no-install-recommends \ - software-properties-common build-essential git curl \ - libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \ - libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev \ - cmake pkg-config ninja-build \ - libvulkan1 vulkan-tools \ - && add-apt-repository -y ppa:deadsnakes/ppa \ - && apt-get update \ +# The nightly base is CUDA -base (no compiler, no Vulkan loader). CuRobo's +# `pip install -e .` runs nvcc, and SAPIEN renders via Vulkan — add both. +USER root +RUN apt-get update \ && apt-get install -y --no-install-recommends \ - python${PYTHON_VERSION} \ - python${PYTHON_VERSION}-venv \ - python${PYTHON_VERSION}-dev \ - && curl -LsSf https://astral.sh/uv/0.8.0/install.sh | sh \ - && mv /root/.local/bin/uv /usr/local/bin/uv \ + cuda-nvcc-12-4 cuda-cudart-dev-12-4 \ + libvulkan1 vulkan-tools \ && mkdir -p /usr/share/vulkan/icd.d \ && echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libGLX_nvidia.so.0","api_version":"1.3.0"}}' \ > /usr/share/vulkan/icd.d/nvidia_icd.json \ - && useradd --create-home --shell /bin/bash user_lerobot \ - && usermod -aG sudo user_lerobot \ + && git clone --depth=1 https://github.com/RoboTwin-Platform/RoboTwin.git ${ROBOTWIN_ROOT} \ + && chown -R user_lerobot:user_lerobot ${ROBOTWIN_ROOT} \ && apt-get clean && rm -rf /var/lib/apt/lists/* - -WORKDIR /lerobot -RUN chown -R user_lerobot:user_lerobot /lerobot USER user_lerobot -ENV HOME=/home/user_lerobot \ - HF_HOME=/home/user_lerobot/.cache/huggingface \ - HF_LEROBOT_HOME=/home/user_lerobot/.cache/huggingface/lerobot \ - TORCH_HOME=/home/user_lerobot/.cache/torch \ - TRITON_CACHE_DIR=/home/user_lerobot/.cache/triton +# RoboTwin runtime deps (av is already in the base via [av-dep]). +RUN uv pip install --no-cache \ + "sapien==3.0.0b1" "mplib==0.2.1" "transforms3d==0.4.2" "trimesh==4.4.3" \ + "open3d==0.19.0" "imageio==2.34.2" termcolor zarr pydantic h5py -RUN uv venv --python python${PYTHON_VERSION} +# pytorch3d has no universal wheel; must be built from source (~10 min, cached). +RUN uv pip install --no-cache --no-build-isolation \ + "git+https://github.com/facebookresearch/pytorch3d.git@stable" -# ── 1. Install base lerobot ──────────────────────────────────────────────── -COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml uv.lock README.md MANIFEST.in ./ -COPY --chown=user_lerobot:user_lerobot src/ src/ - -# Install lerobot base only (no benchmark extras — RoboTwin is source-only). -# smolvla → transformers for SmolVLA policy; av-dep → PyAV for MP4 rollout videos. -RUN uv pip install --no-cache -e ".[smolvla,av-dep]" - -# ── 2. Install RoboTwin 2.0 simulator stack ──────────────────────────────── -# Clone at a pinned commit for reproducibility. -USER root -RUN git clone --depth=1 https://github.com/RoboTwin-Platform/RoboTwin.git ${ROBOTWIN_ROOT} \ - && chown -R user_lerobot:user_lerobot ${ROBOTWIN_ROOT} -USER user_lerobot - -# Install RoboTwin-specific packages on top of the lerobot venv. -# We intentionally skip: torch/torchvision (already in lerobot), gymnasium -# (lerobot uses 1.x; RoboTwin's API is wrapped so 0.29 is not needed), scipy -# (version-pinned by lerobot), huggingface_hub (pinned by lerobot), wandb, -# azure, openai, pyglet (UI only). -# Install RoboTwin-specific packages. Use --python to pin resolution to the -# venv's Python 3.10 (uv sync --locked may have changed the default target). -RUN uv pip install --no-cache --python .venv/bin/python \ - "sapien==3.0.0b1" \ - "mplib==0.2.1" \ - "transforms3d==0.4.2" \ - "trimesh==4.4.3" \ - "open3d==0.19.0" \ - "imageio==2.34.2" \ - "termcolor" \ - "zarr" \ - "pydantic" \ - "h5py" - -# pytorch3d — must be built from source (no universal wheel available). -# This is the slowest step (~10 min); cached in subsequent builds. -RUN uv pip install --no-cache --no-build-isolation --python .venv/bin/python \ - "git+https://github.com/facebookresearch/pytorch3d.git@stable" - -# CuRobo — NVIDIA motion generation library; requires nvcc (devel image). -# TORCH_CUDA_ARCH_LIST must be set or the build fails with empty arch list. +# CuRobo — NVlabs motion generator; TORCH_CUDA_ARCH_LIST must be set or the +# build aborts on an empty arch list. RUN cd ${ROBOTWIN_ROOT}/envs \ && git clone --depth=1 https://github.com/NVlabs/curobo.git \ && cd curobo \ && TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9;9.0" \ uv pip install -e . --no-build-isolation --no-cache -# ── 3. Apply upstream patches (mirrors script/_install.sh) ───────────────── -# patch 1: mplib — remove `or collide` from planner.py line 807. +# Upstream patches (mirror RoboTwin's script/_install.sh). +# mplib: drop a broken `or collide` clause in planner.py. +# sapien: fix URDF loader encoding + .srdf extension check. RUN python - <<'EOF' -import re, pathlib, site +import pathlib, re, site for d in site.getsitepackages(): p = pathlib.Path(d) / "mplib" / "planner.py" if p.exists(): - src = p.read_text() - patched = re.sub(r"\bor collide\b", "", src, count=1) - p.write_text(patched) + p.write_text(re.sub(r"\bor collide\b", "", p.read_text(), count=1)) print(f"mplib patch applied: {p}") - break -EOF - -# patch 2: sapien URDF loader — add UTF-8 encoding + fix .srdf extension. -RUN python - <<'EOF' -import pathlib, site -for d in site.getsitepackages(): p = pathlib.Path(d) / "sapien" / "wrapper" / "urdf_loader.py" if p.exists(): - src = p.read_text() - # add encoding='utf-8' to open() calls that lack it - patched = src.replace("open(", "open(").replace( - 'with open(srdf_path) as f:', 'with open(srdf_path, encoding="utf-8") as f:' + src = p.read_text().replace( + "with open(srdf_path) as f:", 'with open(srdf_path, encoding="utf-8") as f:' ).replace('"srdf"', '".srdf"') - p.write_text(patched) + p.write_text(src) print(f"sapien patch applied: {p}") - break EOF -# ── 4. Download simulation assets from HuggingFace ───────────────────────── -# embodiments.zip (~220 MB) + objects.zip (~3.74 GB). -# background_texture.zip (~11 GB) is skipped — not required for a smoke eval. -# Set HF_TOKEN (passed as --build-arg or via --secret) for authenticated access. +# Simulation assets from TianxingChen/RoboTwin2.0: embodiments (~220 MB) + +# objects (~3.74 GB). background_texture (~11 GB) is intentionally skipped. ARG HF_TOKEN="" RUN python - <<'EOF' import os, pathlib, zipfile from huggingface_hub import hf_hub_download -token = os.environ.get("HF_TOKEN") or None assets_dir = pathlib.Path(os.environ["ROBOTWIN_ROOT"]) / "assets" assets_dir.mkdir(parents=True, exist_ok=True) - for fname in ("embodiments.zip", "objects.zip"): - print(f"Downloading {fname} ...") local = hf_hub_download( repo_id="TianxingChen/RoboTwin2.0", repo_type="dataset", filename=fname, - token=token, + token=os.environ.get("HF_TOKEN") or None, local_dir=str(assets_dir), ) - print(f"Extracting {fname} ...") with zipfile.ZipFile(local, "r") as z: z.extractall(str(assets_dir)) - pathlib.Path(local).unlink() # remove zip after extraction - print(f"{fname} done.") + pathlib.Path(local).unlink() EOF -# Update embodiment config paths to reflect the installation directory. -RUN cd ${ROBOTWIN_ROOT} \ - && python script/update_embodiment_config_path.py +RUN cd ${ROBOTWIN_ROOT} && python script/update_embodiment_config_path.py -# ── 5. Finalise ──────────────────────────────────────────────────────────── -# Expose RoboTwin task modules on PYTHONPATH so `import envs.` works. ENV PYTHONPATH="${ROBOTWIN_ROOT}:${PYTHONPATH}" +# Overlay the PR's source code on top of the nightly image. COPY --chown=user_lerobot:user_lerobot . . -RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas 2>/dev/null || true - CMD ["/bin/bash"] diff --git a/docs/source/robotwin.mdx b/docs/source/robotwin.mdx index 4850bc233..815e157dd 100644 --- a/docs/source/robotwin.mdx +++ b/docs/source/robotwin.mdx @@ -5,7 +5,7 @@ RoboTwin 2.0 is a **large-scale dual-arm manipulation benchmark** built on the S - Paper: [RoboTwin 2.0: A Scalable Data Generator and Benchmark with Strong Domain Randomization for Robust Bimanual Robotic Manipulation](https://robotwin-platform.github.io) - GitHub: [RoboTwin-Platform/RoboTwin](https://github.com/RoboTwin-Platform/RoboTwin) - Leaderboard: [robotwin-platform.github.io/leaderboard](https://robotwin-platform.github.io/leaderboard) -- Dataset: [hxma/RoboTwin-LeRobot-v3.0](https://huggingface.co/datasets/hxma/RoboTwin-LeRobot-v3.0) +- Dataset: [lerobot/robotwin_unified](https://huggingface.co/datasets/lerobot/robotwin_unified) ## Overview @@ -41,7 +41,7 @@ Pass a comma-separated list to `--env.task` to run multiple tasks in a single ev The RoboTwin 2.0 dataset is available in **LeRobot v3.0 format** on the Hugging Face Hub: ``` -hxma/RoboTwin-LeRobot-v3.0 +lerobot/robotwin_unified ``` It contains over 100,000 pre-collected trajectories across all 60 tasks (79.6 GB, Apache 2.0 license). No format conversion is needed — it is already in the correct LeRobot v3.0 schema with video observations and action labels. @@ -51,7 +51,7 @@ You can load it directly with the HF Datasets library: ```python from datasets import load_dataset -ds = load_dataset("hxma/RoboTwin-LeRobot-v3.0", split="train") +ds = load_dataset("lerobot/robotwin_unified", split="train") ``` ## Installation diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py index 9f27a49da..2f024d3fa 100644 --- a/src/lerobot/envs/configs.py +++ b/src/lerobot/envs/configs.py @@ -586,7 +586,7 @@ class RoboTwinEnvConfig(EnvConfig): (7 per arm). All four cameras are enabled by default. See: https://robotwin-platform.github.io - Dataset: https://huggingface.co/datasets/hxma/RoboTwin-LeRobot-v3.0 + Dataset: https://huggingface.co/datasets/lerobot/robotwin_unified """ task: str = "beat_block_hammer" # single task or comma-separated list