From 1157fb11e6ed24ed4f98130e8a4083668fe2cb33 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Mon, 20 Apr 2026 15:18:41 +0200 Subject: [PATCH] fix: integrate PR #3315 review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - envs(robotwin): default `observation_height/width` in `create_robotwin_envs` to `DEFAULT_CAMERA_H/W` (240/320) so they match the D435 dims baked into `task_config/demo_clean.yml`. - envs(robotwin): resolve `task_config/demo_clean.yml` via `CONFIGS_PATH` instead of a cwd-relative path; works regardless of where `lerobot-eval` is invoked. - envs(robotwin): replace `print()` calls in `create_robotwin_envs` with `logger.info(...)` (module-level `logger = logging.getLogger`). - envs(robotwin): use `_LazyAsyncVectorEnv` for the async path so async workers start lazily (matches LIBERO / RoboCasa / VLABench). - envs(robotwin): cast `agent_pos` space + joint-state output to float32 end-to-end (was mixed float64/float32). - envs(configs): use the existing `_make_vec_env_cls(use_async, n_envs)` helper in `RoboTwinEnvConfig.create_envs`; drop the `get_env_processors` override so RoboTwin uses the identity processor inherited from `EnvConfig`. - processor: delete `RoboTwinProcessorStep` — the float32 cast now happens in the wrapper itself, so the processor is redundant. - tests: drop the `TestRoboTwinProcessorStep` suite; update the mock obs fixture to use float32 `joint_action.vector`. - ci: hoist `ROBOTWIN_POLICY` and `ROBOTWIN_TASKS` to job-level env vars so the task list and policy aren't duplicated across eval / extract / parse steps. - docker: pin RoboTwin + CuRobo upstream clones to commit SHAs (`RoboTwin@0aeea2d6`, `curobo@ca941586`) for reproducibility. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/benchmark_tests.yml | 14 ++++++--- docker/Dockerfile.benchmark.robotwin | 12 ++++++-- src/lerobot/envs/configs.py | 11 +------ src/lerobot/envs/robotwin.py | 42 ++++++++++++++++++++------ src/lerobot/processor/env_processor.py | 26 ---------------- tests/envs/test_robotwin.py | 34 +-------------------- 6 files changed, 52 insertions(+), 87 deletions(-) diff --git a/.github/workflows/benchmark_tests.yml b/.github/workflows/benchmark_tests.yml index 9b11c8c19..70258c7f6 100644 --- a/.github/workflows/benchmark_tests.yml +++ b/.github/workflows/benchmark_tests.yml @@ -328,6 +328,8 @@ jobs: group: aws-g6-4xlarge-plus env: HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }} + ROBOTWIN_POLICY: lerobot/smolvla_robotwin + ROBOTWIN_TASKS: beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -371,13 +373,15 @@ jobs: --shm-size=4g \ -e HF_HOME=/tmp/hf \ -e HF_USER_TOKEN="${HF_USER_TOKEN}" \ + -e ROBOTWIN_POLICY="${ROBOTWIN_POLICY}" \ + -e ROBOTWIN_TASKS="${ROBOTWIN_TASKS}" \ lerobot-benchmark-robotwin:ci \ bash -c " hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true cd /opt/robotwin && lerobot-eval \ - --policy.path=lerobot/smolvla_robotwin \ + --policy.path=\"\$ROBOTWIN_POLICY\" \ --env.type=robotwin \ - --env.task=beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch \ + --env.task=\"\$ROBOTWIN_TASKS\" \ --eval.batch_size=1 \ --eval.n_episodes=1 \ --eval.use_async_envs=false \ @@ -386,7 +390,7 @@ jobs: --output_dir=/tmp/eval-artifacts python /lerobot/scripts/ci/extract_task_descriptions.py \ --env robotwin \ - --task beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch \ + --task \"\$ROBOTWIN_TASKS\" \ --output /tmp/eval-artifacts/task_descriptions.json " @@ -403,8 +407,8 @@ jobs: python3 scripts/ci/parse_eval_metrics.py \ --artifacts-dir /tmp/robotwin-artifacts \ --env robotwin \ - --task beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch \ - --policy lerobot/smolvla_robotwin + --task "${ROBOTWIN_TASKS}" \ + --policy "${ROBOTWIN_POLICY}" - name: Upload RoboTwin rollout video if: always() diff --git a/docker/Dockerfile.benchmark.robotwin b/docker/Dockerfile.benchmark.robotwin index 7a24071eb..423854c31 100644 --- a/docker/Dockerfile.benchmark.robotwin +++ b/docker/Dockerfile.benchmark.robotwin @@ -30,6 +30,9 @@ ENV NVIDIA_DRIVER_CAPABILITIES=all \ # The nightly base is CUDA -base (no compiler, no Vulkan loader). CuRobo's # `pip install -e .` runs nvcc, and SAPIEN renders via Vulkan — add both. USER root +# Pinned upstream SHA for reproducible benchmark runs. Bump when we need +# an upstream fix; don't rely on `main` drift. +ARG ROBOTWIN_SHA=0aeea2d669c0f8516f4d5785f0aa33ba812c14b4 RUN apt-get update \ && apt-get install -y --no-install-recommends \ cuda-nvcc-12-4 cuda-cudart-dev-12-4 \ @@ -37,7 +40,8 @@ RUN apt-get update \ && mkdir -p /usr/share/vulkan/icd.d \ && echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libGLX_nvidia.so.0","api_version":"1.3.0"}}' \ > /usr/share/vulkan/icd.d/nvidia_icd.json \ - && git clone --depth=1 https://github.com/RoboTwin-Platform/RoboTwin.git ${ROBOTWIN_ROOT} \ + && git clone https://github.com/RoboTwin-Platform/RoboTwin.git ${ROBOTWIN_ROOT} \ + && git -C ${ROBOTWIN_ROOT} checkout ${ROBOTWIN_SHA} \ && chown -R user_lerobot:user_lerobot ${ROBOTWIN_ROOT} \ && apt-get clean && rm -rf /var/lib/apt/lists/* USER user_lerobot @@ -52,9 +56,11 @@ RUN uv pip install --no-cache --no-build-isolation \ "git+https://github.com/facebookresearch/pytorch3d.git@stable" # CuRobo — NVlabs motion generator; TORCH_CUDA_ARCH_LIST must be set or the -# build aborts on an empty arch list. +# build aborts on an empty arch list. Pinned SHA for reproducibility. +ARG CUROBO_SHA=ca941586c33b8482ed9c0e74d60f23efd64b516a RUN cd ${ROBOTWIN_ROOT}/envs \ - && git clone --depth=1 https://github.com/NVlabs/curobo.git \ + && git clone https://github.com/NVlabs/curobo.git \ + && git -C curobo checkout ${CUROBO_SHA} \ && cd curobo \ && TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9;9.0" \ uv pip install -e . --no-build-isolation --no-cache diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py index a85a9a2fe..108a88f1d 100644 --- a/src/lerobot/envs/configs.py +++ b/src/lerobot/envs/configs.py @@ -650,7 +650,7 @@ class RoboTwinEnvConfig(EnvConfig): if not self.task: raise ValueError("RoboTwinEnvConfig requires `task` to be specified.") - env_cls = gym.vector.AsyncVectorEnv if (use_async_envs and n_envs > 1) else gym.vector.SyncVectorEnv + env_cls = _make_vec_env_cls(use_async_envs, n_envs) cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()] return create_robotwin_envs( task=self.task, @@ -661,12 +661,3 @@ class RoboTwinEnvConfig(EnvConfig): observation_width=self.observation_width, episode_length=self.episode_length, ) - - def get_env_processors(self): - from lerobot.processor.env_processor import RoboTwinProcessorStep - from lerobot.processor.pipeline import PolicyProcessorPipeline - - return ( - PolicyProcessorPipeline(steps=[RoboTwinProcessorStep()]), - PolicyProcessorPipeline(steps=[]), - ) diff --git a/src/lerobot/envs/robotwin.py b/src/lerobot/envs/robotwin.py index 5959b9738..823f14fa0 100644 --- a/src/lerobot/envs/robotwin.py +++ b/src/lerobot/envs/robotwin.py @@ -16,6 +16,7 @@ from __future__ import annotations import importlib +import logging from collections import defaultdict from collections.abc import Callable, Sequence from functools import partial @@ -28,6 +29,10 @@ from gymnasium import spaces from lerobot.types import RobotObservation +from .utils import _LazyAsyncVectorEnv + +logger = logging.getLogger(__name__) + # Camera names as used by RoboTwin 2.0. The wrapper appends "_rgb" when looking # up keys in get_obs() output (e.g. "head_camera" → "head_camera_rgb"). ROBOTWIN_CAMERA_NAMES: tuple[str, ...] = ( @@ -126,7 +131,7 @@ def _load_robotwin_setup_kwargs(task_name: str) -> dict[str, Any]: from envs import CONFIGS_PATH # type: ignore[import-not-found] task_config = "demo_clean" - with open(f"./task_config/{task_config}.yml", encoding="utf-8") as f: + with open(os.path.join(CONFIGS_PATH, f"{task_config}.yml"), encoding="utf-8") as f: args = yaml.safe_load(f) # Resolve embodiment — demo_clean.yml uses [aloha-agilex] (dual-arm single robot) @@ -262,7 +267,7 @@ class RoboTwinEnv(gym.Env): self.observation_space = spaces.Dict( { "pixels": spaces.Dict(image_spaces), - "agent_pos": spaces.Box(low=-np.inf, high=np.inf, shape=(ACTION_DIM,), dtype=np.float64), + "agent_pos": spaces.Box(low=-np.inf, high=np.inf, shape=(ACTION_DIM,), dtype=np.float32), } ) self.action_space = spaces.Box( @@ -303,12 +308,12 @@ class RoboTwinEnv(gym.Env): ja = raw.get("joint_action") or {} vec = ja.get("vector") if vec is not None: - arr = np.asarray(vec, dtype=np.float64).ravel() + arr = np.asarray(vec, dtype=np.float32).ravel() joint_state = ( - arr[:ACTION_DIM] if arr.size >= ACTION_DIM else np.zeros(ACTION_DIM, dtype=np.float64) + arr[:ACTION_DIM] if arr.size >= ACTION_DIM else np.zeros(ACTION_DIM, dtype=np.float32) ) else: - joint_state = np.zeros(ACTION_DIM, dtype=np.float64) + joint_state = np.zeros(ACTION_DIM, dtype=np.float32) return {"pixels": images, "agent_pos": joint_state} @@ -415,8 +420,8 @@ def create_robotwin_envs( n_envs: int, env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None, camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES, - observation_height: int = 480, - observation_width: int = 640, + observation_height: int = DEFAULT_CAMERA_H, + observation_width: int = DEFAULT_CAMERA_W, episode_length: int = DEFAULT_EPISODE_LENGTH, ) -> dict[str, dict[int, Any]]: """Create vectorized RoboTwin 2.0 environments. @@ -448,7 +453,16 @@ def create_robotwin_envs( if unknown: raise ValueError(f"Unknown RoboTwin tasks: {unknown}. Available tasks: {sorted(ROBOTWIN_TASKS)}") - print(f"Creating RoboTwin envs | tasks={task_names} | n_envs(per task)={n_envs}") + logger.info( + "Creating RoboTwin envs | tasks=%s | n_envs(per task)=%d", + task_names, + n_envs, + ) + + is_async = env_cls is gym.vector.AsyncVectorEnv + cached_obs_space: spaces.Space | None = None + cached_act_space: spaces.Space | None = None + cached_metadata: dict[str, Any] | None = None out: dict[str, dict[int, Any]] = defaultdict(dict) for task_name in task_names: @@ -460,7 +474,15 @@ def create_robotwin_envs( observation_width=observation_width, episode_length=episode_length, ) - out[task_name][0] = env_cls(fns) - print(f"Built vec env | task={task_name} | n_envs={n_envs}") + if is_async: + lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space, cached_metadata) + if cached_obs_space is None: + cached_obs_space = lazy.observation_space + cached_act_space = lazy.action_space + cached_metadata = lazy.metadata + out[task_name][0] = lazy + else: + out[task_name][0] = env_cls(fns) + logger.info("Built vec env | task=%s | n_envs=%d", task_name, n_envs) return {k: dict(v) for k, v in out.items()} diff --git a/src/lerobot/processor/env_processor.py b/src/lerobot/processor/env_processor.py index c826a4c70..75cbb79de 100644 --- a/src/lerobot/processor/env_processor.py +++ b/src/lerobot/processor/env_processor.py @@ -226,29 +226,3 @@ class IsaaclabArenaProcessorStep(ObservationProcessorStep): def observation(self, observation): return self._process_observation(observation) - - -@dataclass -@ProcessorStepRegistry.register(name="robotwin_processor") -class RoboTwinProcessorStep(ObservationProcessorStep): - """Passthrough step for RoboTwin observations, casting state to float32. - - RoboTwin observations already arrive in LeRobot format (observation.images.* - and observation.state), so this step mainly ensures state dtype is float32. - """ - - def _process_observation(self, observation): - processed_obs = dict(observation) - if OBS_STATE in processed_obs: - state = processed_obs[OBS_STATE] - if hasattr(state, "dtype") and state.dtype != torch.float32: - processed_obs[OBS_STATE] = state.float() - return processed_obs - - def transform_features( - self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]] - ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]: - return features - - def observation(self, observation): - return self._process_observation(observation) diff --git a/tests/envs/test_robotwin.py b/tests/envs/test_robotwin.py index b31f63ce6..fcd45adbf 100644 --- a/tests/envs/test_robotwin.py +++ b/tests/envs/test_robotwin.py @@ -28,7 +28,6 @@ from unittest.mock import MagicMock, patch import gymnasium as gym import numpy as np import pytest -import torch from lerobot.envs.robotwin import ( ACTION_DIM, @@ -56,7 +55,7 @@ def _make_mock_task_env( """ obs_dict = { "observation": {cam: {"rgb": np.zeros((height, width, 3), dtype=np.uint8)} for cam in cameras}, - "joint_action": {"vector": np.zeros(ACTION_DIM, dtype=np.float64)}, + "joint_action": {"vector": np.zeros(ACTION_DIM, dtype=np.float32)}, "endpose": {}, } @@ -281,34 +280,3 @@ def test_all_tasks_are_strings(): def test_no_duplicate_tasks(): assert len(ROBOTWIN_TASKS) == len(set(ROBOTWIN_TASKS)) - - -# --------------------------------------------------------------------------- -# RoboTwinProcessorStep -# --------------------------------------------------------------------------- - - -class TestRoboTwinProcessorStep: - def test_passes_through_images_and_state(self): - from lerobot.processor.env_processor import RoboTwinProcessorStep - from lerobot.utils.constants import OBS_IMAGES, OBS_STATE - - step = RoboTwinProcessorStep() - obs = { - f"{OBS_IMAGES}.head_camera": torch.zeros(1, 3, 4, 4), - f"{OBS_IMAGES}.left_camera": torch.zeros(1, 3, 4, 4), - OBS_STATE: torch.zeros(1, 14), - } - result = step.observation(obs) - assert f"{OBS_IMAGES}.head_camera" in result - assert f"{OBS_IMAGES}.left_camera" in result - assert result[OBS_STATE].dtype == torch.float32 - - def test_state_cast_to_float32(self): - from lerobot.processor.env_processor import RoboTwinProcessorStep - from lerobot.utils.constants import OBS_STATE - - step = RoboTwinProcessorStep() - obs = {OBS_STATE: torch.zeros(1, 14, dtype=torch.float64)} - result = step.observation(obs) - assert result[OBS_STATE].dtype == torch.float32