fix: integrate PR #3315 review feedback

- envs(robotwin): default `observation_height/width` in
  `create_robotwin_envs` to `DEFAULT_CAMERA_H/W` (240/320) so they
  match the D435 dims baked into `task_config/demo_clean.yml`.
- envs(robotwin): resolve `task_config/demo_clean.yml` via
  `CONFIGS_PATH` instead of a cwd-relative path; works regardless
  of where `lerobot-eval` is invoked.
- envs(robotwin): replace `print()` calls in `create_robotwin_envs`
  with `logger.info(...)` (module-level `logger = logging.getLogger`).
- envs(robotwin): use `_LazyAsyncVectorEnv` for the async path so
  async workers start lazily (matches LIBERO / RoboCasa / VLABench).
- envs(robotwin): cast `agent_pos` space + joint-state output to
  float32 end-to-end (was mixed float64/float32).
- envs(configs): use the existing `_make_vec_env_cls(use_async,
  n_envs)` helper in `RoboTwinEnvConfig.create_envs`; drop the
  `get_env_processors` override so RoboTwin uses the identity
  processor inherited from `EnvConfig`.
- processor: delete `RoboTwinProcessorStep` — the float32 cast now
  happens in the wrapper itself, so the processor is redundant.
- tests: drop the `TestRoboTwinProcessorStep` suite; update the
  mock obs fixture to use float32 `joint_action.vector`.
- ci: hoist `ROBOTWIN_POLICY` and `ROBOTWIN_TASKS` to job-level
  env vars so the task list and policy aren't duplicated across
  eval / extract / parse steps.
- docker: pin RoboTwin + CuRobo upstream clones to commit SHAs
  (`RoboTwin@0aeea2d6`, `curobo@ca941586`) for reproducibility.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pepijn
2026-04-20 15:18:41 +02:00
parent 0fed8b45c2
commit 1157fb11e6
6 changed files with 52 additions and 87 deletions
+9 -5
View File
@@ -328,6 +328,8 @@ jobs:
group: aws-g6-4xlarge-plus group: aws-g6-4xlarge-plus
env: env:
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }} HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
ROBOTWIN_POLICY: lerobot/smolvla_robotwin
ROBOTWIN_TASKS: beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch
steps: steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -371,13 +373,15 @@ jobs:
--shm-size=4g \ --shm-size=4g \
-e HF_HOME=/tmp/hf \ -e HF_HOME=/tmp/hf \
-e HF_USER_TOKEN="${HF_USER_TOKEN}" \ -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
-e ROBOTWIN_POLICY="${ROBOTWIN_POLICY}" \
-e ROBOTWIN_TASKS="${ROBOTWIN_TASKS}" \
lerobot-benchmark-robotwin:ci \ lerobot-benchmark-robotwin:ci \
bash -c " bash -c "
hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
cd /opt/robotwin && lerobot-eval \ cd /opt/robotwin && lerobot-eval \
--policy.path=lerobot/smolvla_robotwin \ --policy.path=\"\$ROBOTWIN_POLICY\" \
--env.type=robotwin \ --env.type=robotwin \
--env.task=beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch \ --env.task=\"\$ROBOTWIN_TASKS\" \
--eval.batch_size=1 \ --eval.batch_size=1 \
--eval.n_episodes=1 \ --eval.n_episodes=1 \
--eval.use_async_envs=false \ --eval.use_async_envs=false \
@@ -386,7 +390,7 @@ jobs:
--output_dir=/tmp/eval-artifacts --output_dir=/tmp/eval-artifacts
python /lerobot/scripts/ci/extract_task_descriptions.py \ python /lerobot/scripts/ci/extract_task_descriptions.py \
--env robotwin \ --env robotwin \
--task beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch \ --task \"\$ROBOTWIN_TASKS\" \
--output /tmp/eval-artifacts/task_descriptions.json --output /tmp/eval-artifacts/task_descriptions.json
" "
@@ -403,8 +407,8 @@ jobs:
python3 scripts/ci/parse_eval_metrics.py \ python3 scripts/ci/parse_eval_metrics.py \
--artifacts-dir /tmp/robotwin-artifacts \ --artifacts-dir /tmp/robotwin-artifacts \
--env robotwin \ --env robotwin \
--task beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch \ --task "${ROBOTWIN_TASKS}" \
--policy lerobot/smolvla_robotwin --policy "${ROBOTWIN_POLICY}"
- name: Upload RoboTwin rollout video - name: Upload RoboTwin rollout video
if: always() if: always()
+9 -3
View File
@@ -30,6 +30,9 @@ ENV NVIDIA_DRIVER_CAPABILITIES=all \
# The nightly base is CUDA -base (no compiler, no Vulkan loader). CuRobo's # The nightly base is CUDA -base (no compiler, no Vulkan loader). CuRobo's
# `pip install -e .` runs nvcc, and SAPIEN renders via Vulkan — add both. # `pip install -e .` runs nvcc, and SAPIEN renders via Vulkan — add both.
USER root USER root
# Pinned upstream SHA for reproducible benchmark runs. Bump when we need
# an upstream fix; don't rely on `main` drift.
ARG ROBOTWIN_SHA=0aeea2d669c0f8516f4d5785f0aa33ba812c14b4
RUN apt-get update \ RUN apt-get update \
&& apt-get install -y --no-install-recommends \ && apt-get install -y --no-install-recommends \
cuda-nvcc-12-4 cuda-cudart-dev-12-4 \ cuda-nvcc-12-4 cuda-cudart-dev-12-4 \
@@ -37,7 +40,8 @@ RUN apt-get update \
&& mkdir -p /usr/share/vulkan/icd.d \ && mkdir -p /usr/share/vulkan/icd.d \
&& echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libGLX_nvidia.so.0","api_version":"1.3.0"}}' \ && echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libGLX_nvidia.so.0","api_version":"1.3.0"}}' \
> /usr/share/vulkan/icd.d/nvidia_icd.json \ > /usr/share/vulkan/icd.d/nvidia_icd.json \
&& git clone --depth=1 https://github.com/RoboTwin-Platform/RoboTwin.git ${ROBOTWIN_ROOT} \ && git clone https://github.com/RoboTwin-Platform/RoboTwin.git ${ROBOTWIN_ROOT} \
&& git -C ${ROBOTWIN_ROOT} checkout ${ROBOTWIN_SHA} \
&& chown -R user_lerobot:user_lerobot ${ROBOTWIN_ROOT} \ && chown -R user_lerobot:user_lerobot ${ROBOTWIN_ROOT} \
&& apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get clean && rm -rf /var/lib/apt/lists/*
USER user_lerobot USER user_lerobot
@@ -52,9 +56,11 @@ RUN uv pip install --no-cache --no-build-isolation \
"git+https://github.com/facebookresearch/pytorch3d.git@stable" "git+https://github.com/facebookresearch/pytorch3d.git@stable"
# CuRobo — NVlabs motion generator; TORCH_CUDA_ARCH_LIST must be set or the # CuRobo — NVlabs motion generator; TORCH_CUDA_ARCH_LIST must be set or the
# build aborts on an empty arch list. # build aborts on an empty arch list. Pinned SHA for reproducibility.
ARG CUROBO_SHA=ca941586c33b8482ed9c0e74d60f23efd64b516a
RUN cd ${ROBOTWIN_ROOT}/envs \ RUN cd ${ROBOTWIN_ROOT}/envs \
&& git clone --depth=1 https://github.com/NVlabs/curobo.git \ && git clone https://github.com/NVlabs/curobo.git \
&& git -C curobo checkout ${CUROBO_SHA} \
&& cd curobo \ && cd curobo \
&& TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9;9.0" \ && TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9;9.0" \
uv pip install -e . --no-build-isolation --no-cache uv pip install -e . --no-build-isolation --no-cache
+1 -10
View File
@@ -650,7 +650,7 @@ class RoboTwinEnvConfig(EnvConfig):
if not self.task: if not self.task:
raise ValueError("RoboTwinEnvConfig requires `task` to be specified.") raise ValueError("RoboTwinEnvConfig requires `task` to be specified.")
env_cls = gym.vector.AsyncVectorEnv if (use_async_envs and n_envs > 1) else gym.vector.SyncVectorEnv env_cls = _make_vec_env_cls(use_async_envs, n_envs)
cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()] cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()]
return create_robotwin_envs( return create_robotwin_envs(
task=self.task, task=self.task,
@@ -661,12 +661,3 @@ class RoboTwinEnvConfig(EnvConfig):
observation_width=self.observation_width, observation_width=self.observation_width,
episode_length=self.episode_length, episode_length=self.episode_length,
) )
def get_env_processors(self):
from lerobot.processor.env_processor import RoboTwinProcessorStep
from lerobot.processor.pipeline import PolicyProcessorPipeline
return (
PolicyProcessorPipeline(steps=[RoboTwinProcessorStep()]),
PolicyProcessorPipeline(steps=[]),
)
+32 -10
View File
@@ -16,6 +16,7 @@
from __future__ import annotations from __future__ import annotations
import importlib import importlib
import logging
from collections import defaultdict from collections import defaultdict
from collections.abc import Callable, Sequence from collections.abc import Callable, Sequence
from functools import partial from functools import partial
@@ -28,6 +29,10 @@ from gymnasium import spaces
from lerobot.types import RobotObservation from lerobot.types import RobotObservation
from .utils import _LazyAsyncVectorEnv
logger = logging.getLogger(__name__)
# Camera names as used by RoboTwin 2.0. The wrapper appends "_rgb" when looking # Camera names as used by RoboTwin 2.0. The wrapper appends "_rgb" when looking
# up keys in get_obs() output (e.g. "head_camera" → "head_camera_rgb"). # up keys in get_obs() output (e.g. "head_camera" → "head_camera_rgb").
ROBOTWIN_CAMERA_NAMES: tuple[str, ...] = ( ROBOTWIN_CAMERA_NAMES: tuple[str, ...] = (
@@ -126,7 +131,7 @@ def _load_robotwin_setup_kwargs(task_name: str) -> dict[str, Any]:
from envs import CONFIGS_PATH # type: ignore[import-not-found] from envs import CONFIGS_PATH # type: ignore[import-not-found]
task_config = "demo_clean" task_config = "demo_clean"
with open(f"./task_config/{task_config}.yml", encoding="utf-8") as f: with open(os.path.join(CONFIGS_PATH, f"{task_config}.yml"), encoding="utf-8") as f:
args = yaml.safe_load(f) args = yaml.safe_load(f)
# Resolve embodiment — demo_clean.yml uses [aloha-agilex] (dual-arm single robot) # Resolve embodiment — demo_clean.yml uses [aloha-agilex] (dual-arm single robot)
@@ -262,7 +267,7 @@ class RoboTwinEnv(gym.Env):
self.observation_space = spaces.Dict( self.observation_space = spaces.Dict(
{ {
"pixels": spaces.Dict(image_spaces), "pixels": spaces.Dict(image_spaces),
"agent_pos": spaces.Box(low=-np.inf, high=np.inf, shape=(ACTION_DIM,), dtype=np.float64), "agent_pos": spaces.Box(low=-np.inf, high=np.inf, shape=(ACTION_DIM,), dtype=np.float32),
} }
) )
self.action_space = spaces.Box( self.action_space = spaces.Box(
@@ -303,12 +308,12 @@ class RoboTwinEnv(gym.Env):
ja = raw.get("joint_action") or {} ja = raw.get("joint_action") or {}
vec = ja.get("vector") vec = ja.get("vector")
if vec is not None: if vec is not None:
arr = np.asarray(vec, dtype=np.float64).ravel() arr = np.asarray(vec, dtype=np.float32).ravel()
joint_state = ( joint_state = (
arr[:ACTION_DIM] if arr.size >= ACTION_DIM else np.zeros(ACTION_DIM, dtype=np.float64) arr[:ACTION_DIM] if arr.size >= ACTION_DIM else np.zeros(ACTION_DIM, dtype=np.float32)
) )
else: else:
joint_state = np.zeros(ACTION_DIM, dtype=np.float64) joint_state = np.zeros(ACTION_DIM, dtype=np.float32)
return {"pixels": images, "agent_pos": joint_state} return {"pixels": images, "agent_pos": joint_state}
@@ -415,8 +420,8 @@ def create_robotwin_envs(
n_envs: int, n_envs: int,
env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None, env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None,
camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES, camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES,
observation_height: int = 480, observation_height: int = DEFAULT_CAMERA_H,
observation_width: int = 640, observation_width: int = DEFAULT_CAMERA_W,
episode_length: int = DEFAULT_EPISODE_LENGTH, episode_length: int = DEFAULT_EPISODE_LENGTH,
) -> dict[str, dict[int, Any]]: ) -> dict[str, dict[int, Any]]:
"""Create vectorized RoboTwin 2.0 environments. """Create vectorized RoboTwin 2.0 environments.
@@ -448,7 +453,16 @@ def create_robotwin_envs(
if unknown: if unknown:
raise ValueError(f"Unknown RoboTwin tasks: {unknown}. Available tasks: {sorted(ROBOTWIN_TASKS)}") raise ValueError(f"Unknown RoboTwin tasks: {unknown}. Available tasks: {sorted(ROBOTWIN_TASKS)}")
print(f"Creating RoboTwin envs | tasks={task_names} | n_envs(per task)={n_envs}") logger.info(
"Creating RoboTwin envs | tasks=%s | n_envs(per task)=%d",
task_names,
n_envs,
)
is_async = env_cls is gym.vector.AsyncVectorEnv
cached_obs_space: spaces.Space | None = None
cached_act_space: spaces.Space | None = None
cached_metadata: dict[str, Any] | None = None
out: dict[str, dict[int, Any]] = defaultdict(dict) out: dict[str, dict[int, Any]] = defaultdict(dict)
for task_name in task_names: for task_name in task_names:
@@ -460,7 +474,15 @@ def create_robotwin_envs(
observation_width=observation_width, observation_width=observation_width,
episode_length=episode_length, episode_length=episode_length,
) )
out[task_name][0] = env_cls(fns) if is_async:
print(f"Built vec env | task={task_name} | n_envs={n_envs}") lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space, cached_metadata)
if cached_obs_space is None:
cached_obs_space = lazy.observation_space
cached_act_space = lazy.action_space
cached_metadata = lazy.metadata
out[task_name][0] = lazy
else:
out[task_name][0] = env_cls(fns)
logger.info("Built vec env | task=%s | n_envs=%d", task_name, n_envs)
return {k: dict(v) for k, v in out.items()} return {k: dict(v) for k, v in out.items()}
-26
View File
@@ -226,29 +226,3 @@ class IsaaclabArenaProcessorStep(ObservationProcessorStep):
def observation(self, observation): def observation(self, observation):
return self._process_observation(observation) return self._process_observation(observation)
@dataclass
@ProcessorStepRegistry.register(name="robotwin_processor")
class RoboTwinProcessorStep(ObservationProcessorStep):
"""Passthrough step for RoboTwin observations, casting state to float32.
RoboTwin observations already arrive in LeRobot format (observation.images.*
and observation.state), so this step mainly ensures state dtype is float32.
"""
def _process_observation(self, observation):
processed_obs = dict(observation)
if OBS_STATE in processed_obs:
state = processed_obs[OBS_STATE]
if hasattr(state, "dtype") and state.dtype != torch.float32:
processed_obs[OBS_STATE] = state.float()
return processed_obs
def transform_features(
self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
return features
def observation(self, observation):
return self._process_observation(observation)
+1 -33
View File
@@ -28,7 +28,6 @@ from unittest.mock import MagicMock, patch
import gymnasium as gym import gymnasium as gym
import numpy as np import numpy as np
import pytest import pytest
import torch
from lerobot.envs.robotwin import ( from lerobot.envs.robotwin import (
ACTION_DIM, ACTION_DIM,
@@ -56,7 +55,7 @@ def _make_mock_task_env(
""" """
obs_dict = { obs_dict = {
"observation": {cam: {"rgb": np.zeros((height, width, 3), dtype=np.uint8)} for cam in cameras}, "observation": {cam: {"rgb": np.zeros((height, width, 3), dtype=np.uint8)} for cam in cameras},
"joint_action": {"vector": np.zeros(ACTION_DIM, dtype=np.float64)}, "joint_action": {"vector": np.zeros(ACTION_DIM, dtype=np.float32)},
"endpose": {}, "endpose": {},
} }
@@ -281,34 +280,3 @@ def test_all_tasks_are_strings():
def test_no_duplicate_tasks(): def test_no_duplicate_tasks():
assert len(ROBOTWIN_TASKS) == len(set(ROBOTWIN_TASKS)) assert len(ROBOTWIN_TASKS) == len(set(ROBOTWIN_TASKS))
# ---------------------------------------------------------------------------
# RoboTwinProcessorStep
# ---------------------------------------------------------------------------
class TestRoboTwinProcessorStep:
def test_passes_through_images_and_state(self):
from lerobot.processor.env_processor import RoboTwinProcessorStep
from lerobot.utils.constants import OBS_IMAGES, OBS_STATE
step = RoboTwinProcessorStep()
obs = {
f"{OBS_IMAGES}.head_camera": torch.zeros(1, 3, 4, 4),
f"{OBS_IMAGES}.left_camera": torch.zeros(1, 3, 4, 4),
OBS_STATE: torch.zeros(1, 14),
}
result = step.observation(obs)
assert f"{OBS_IMAGES}.head_camera" in result
assert f"{OBS_IMAGES}.left_camera" in result
assert result[OBS_STATE].dtype == torch.float32
def test_state_cast_to_float32(self):
from lerobot.processor.env_processor import RoboTwinProcessorStep
from lerobot.utils.constants import OBS_STATE
step = RoboTwinProcessorStep()
obs = {OBS_STATE: torch.zeros(1, 14, dtype=torch.float64)}
result = step.observation(obs)
assert result[OBS_STATE].dtype == torch.float32