mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-23 12:40:08 +00:00
fix: integrate PR #3315 review feedback
- envs(robotwin): default `observation_height/width` in `create_robotwin_envs` to `DEFAULT_CAMERA_H/W` (240/320) so they match the D435 dims baked into `task_config/demo_clean.yml`. - envs(robotwin): resolve `task_config/demo_clean.yml` via `CONFIGS_PATH` instead of a cwd-relative path; works regardless of where `lerobot-eval` is invoked. - envs(robotwin): replace `print()` calls in `create_robotwin_envs` with `logger.info(...)` (module-level `logger = logging.getLogger`). - envs(robotwin): use `_LazyAsyncVectorEnv` for the async path so async workers start lazily (matches LIBERO / RoboCasa / VLABench). - envs(robotwin): cast `agent_pos` space + joint-state output to float32 end-to-end (was mixed float64/float32). - envs(configs): use the existing `_make_vec_env_cls(use_async, n_envs)` helper in `RoboTwinEnvConfig.create_envs`; drop the `get_env_processors` override so RoboTwin uses the identity processor inherited from `EnvConfig`. - processor: delete `RoboTwinProcessorStep` — the float32 cast now happens in the wrapper itself, so the processor is redundant. - tests: drop the `TestRoboTwinProcessorStep` suite; update the mock obs fixture to use float32 `joint_action.vector`. - ci: hoist `ROBOTWIN_POLICY` and `ROBOTWIN_TASKS` to job-level env vars so the task list and policy aren't duplicated across eval / extract / parse steps. - docker: pin RoboTwin + CuRobo upstream clones to commit SHAs (`RoboTwin@0aeea2d6`, `curobo@ca941586`) for reproducibility. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -328,6 +328,8 @@ jobs:
|
||||
group: aws-g6-4xlarge-plus
|
||||
env:
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
ROBOTWIN_POLICY: lerobot/smolvla_robotwin
|
||||
ROBOTWIN_TASKS: beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
@@ -371,13 +373,15 @@ jobs:
|
||||
--shm-size=4g \
|
||||
-e HF_HOME=/tmp/hf \
|
||||
-e HF_USER_TOKEN="${HF_USER_TOKEN}" \
|
||||
-e ROBOTWIN_POLICY="${ROBOTWIN_POLICY}" \
|
||||
-e ROBOTWIN_TASKS="${ROBOTWIN_TASKS}" \
|
||||
lerobot-benchmark-robotwin:ci \
|
||||
bash -c "
|
||||
hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
|
||||
cd /opt/robotwin && lerobot-eval \
|
||||
--policy.path=lerobot/smolvla_robotwin \
|
||||
--policy.path=\"\$ROBOTWIN_POLICY\" \
|
||||
--env.type=robotwin \
|
||||
--env.task=beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch \
|
||||
--env.task=\"\$ROBOTWIN_TASKS\" \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.use_async_envs=false \
|
||||
@@ -386,7 +390,7 @@ jobs:
|
||||
--output_dir=/tmp/eval-artifacts
|
||||
python /lerobot/scripts/ci/extract_task_descriptions.py \
|
||||
--env robotwin \
|
||||
--task beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch \
|
||||
--task \"\$ROBOTWIN_TASKS\" \
|
||||
--output /tmp/eval-artifacts/task_descriptions.json
|
||||
"
|
||||
|
||||
@@ -403,8 +407,8 @@ jobs:
|
||||
python3 scripts/ci/parse_eval_metrics.py \
|
||||
--artifacts-dir /tmp/robotwin-artifacts \
|
||||
--env robotwin \
|
||||
--task beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch \
|
||||
--policy lerobot/smolvla_robotwin
|
||||
--task "${ROBOTWIN_TASKS}" \
|
||||
--policy "${ROBOTWIN_POLICY}"
|
||||
|
||||
- name: Upload RoboTwin rollout video
|
||||
if: always()
|
||||
|
||||
@@ -30,6 +30,9 @@ ENV NVIDIA_DRIVER_CAPABILITIES=all \
|
||||
# The nightly base is CUDA -base (no compiler, no Vulkan loader). CuRobo's
|
||||
# `pip install -e .` runs nvcc, and SAPIEN renders via Vulkan — add both.
|
||||
USER root
|
||||
# Pinned upstream SHA for reproducible benchmark runs. Bump when we need
|
||||
# an upstream fix; don't rely on `main` drift.
|
||||
ARG ROBOTWIN_SHA=0aeea2d669c0f8516f4d5785f0aa33ba812c14b4
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
cuda-nvcc-12-4 cuda-cudart-dev-12-4 \
|
||||
@@ -37,7 +40,8 @@ RUN apt-get update \
|
||||
&& mkdir -p /usr/share/vulkan/icd.d \
|
||||
&& echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libGLX_nvidia.so.0","api_version":"1.3.0"}}' \
|
||||
> /usr/share/vulkan/icd.d/nvidia_icd.json \
|
||||
&& git clone --depth=1 https://github.com/RoboTwin-Platform/RoboTwin.git ${ROBOTWIN_ROOT} \
|
||||
&& git clone https://github.com/RoboTwin-Platform/RoboTwin.git ${ROBOTWIN_ROOT} \
|
||||
&& git -C ${ROBOTWIN_ROOT} checkout ${ROBOTWIN_SHA} \
|
||||
&& chown -R user_lerobot:user_lerobot ${ROBOTWIN_ROOT} \
|
||||
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
USER user_lerobot
|
||||
@@ -52,9 +56,11 @@ RUN uv pip install --no-cache --no-build-isolation \
|
||||
"git+https://github.com/facebookresearch/pytorch3d.git@stable"
|
||||
|
||||
# CuRobo — NVlabs motion generator; TORCH_CUDA_ARCH_LIST must be set or the
|
||||
# build aborts on an empty arch list.
|
||||
# build aborts on an empty arch list. Pinned SHA for reproducibility.
|
||||
ARG CUROBO_SHA=ca941586c33b8482ed9c0e74d60f23efd64b516a
|
||||
RUN cd ${ROBOTWIN_ROOT}/envs \
|
||||
&& git clone --depth=1 https://github.com/NVlabs/curobo.git \
|
||||
&& git clone https://github.com/NVlabs/curobo.git \
|
||||
&& git -C curobo checkout ${CUROBO_SHA} \
|
||||
&& cd curobo \
|
||||
&& TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9;9.0" \
|
||||
uv pip install -e . --no-build-isolation --no-cache
|
||||
|
||||
@@ -650,7 +650,7 @@ class RoboTwinEnvConfig(EnvConfig):
|
||||
if not self.task:
|
||||
raise ValueError("RoboTwinEnvConfig requires `task` to be specified.")
|
||||
|
||||
env_cls = gym.vector.AsyncVectorEnv if (use_async_envs and n_envs > 1) else gym.vector.SyncVectorEnv
|
||||
env_cls = _make_vec_env_cls(use_async_envs, n_envs)
|
||||
cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()]
|
||||
return create_robotwin_envs(
|
||||
task=self.task,
|
||||
@@ -661,12 +661,3 @@ class RoboTwinEnvConfig(EnvConfig):
|
||||
observation_width=self.observation_width,
|
||||
episode_length=self.episode_length,
|
||||
)
|
||||
|
||||
def get_env_processors(self):
|
||||
from lerobot.processor.env_processor import RoboTwinProcessorStep
|
||||
from lerobot.processor.pipeline import PolicyProcessorPipeline
|
||||
|
||||
return (
|
||||
PolicyProcessorPipeline(steps=[RoboTwinProcessorStep()]),
|
||||
PolicyProcessorPipeline(steps=[]),
|
||||
)
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from collections.abc import Callable, Sequence
|
||||
from functools import partial
|
||||
@@ -28,6 +29,10 @@ from gymnasium import spaces
|
||||
|
||||
from lerobot.types import RobotObservation
|
||||
|
||||
from .utils import _LazyAsyncVectorEnv
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Camera names as used by RoboTwin 2.0. The wrapper appends "_rgb" when looking
|
||||
# up keys in get_obs() output (e.g. "head_camera" → "head_camera_rgb").
|
||||
ROBOTWIN_CAMERA_NAMES: tuple[str, ...] = (
|
||||
@@ -126,7 +131,7 @@ def _load_robotwin_setup_kwargs(task_name: str) -> dict[str, Any]:
|
||||
from envs import CONFIGS_PATH # type: ignore[import-not-found]
|
||||
|
||||
task_config = "demo_clean"
|
||||
with open(f"./task_config/{task_config}.yml", encoding="utf-8") as f:
|
||||
with open(os.path.join(CONFIGS_PATH, f"{task_config}.yml"), encoding="utf-8") as f:
|
||||
args = yaml.safe_load(f)
|
||||
|
||||
# Resolve embodiment — demo_clean.yml uses [aloha-agilex] (dual-arm single robot)
|
||||
@@ -262,7 +267,7 @@ class RoboTwinEnv(gym.Env):
|
||||
self.observation_space = spaces.Dict(
|
||||
{
|
||||
"pixels": spaces.Dict(image_spaces),
|
||||
"agent_pos": spaces.Box(low=-np.inf, high=np.inf, shape=(ACTION_DIM,), dtype=np.float64),
|
||||
"agent_pos": spaces.Box(low=-np.inf, high=np.inf, shape=(ACTION_DIM,), dtype=np.float32),
|
||||
}
|
||||
)
|
||||
self.action_space = spaces.Box(
|
||||
@@ -303,12 +308,12 @@ class RoboTwinEnv(gym.Env):
|
||||
ja = raw.get("joint_action") or {}
|
||||
vec = ja.get("vector")
|
||||
if vec is not None:
|
||||
arr = np.asarray(vec, dtype=np.float64).ravel()
|
||||
arr = np.asarray(vec, dtype=np.float32).ravel()
|
||||
joint_state = (
|
||||
arr[:ACTION_DIM] if arr.size >= ACTION_DIM else np.zeros(ACTION_DIM, dtype=np.float64)
|
||||
arr[:ACTION_DIM] if arr.size >= ACTION_DIM else np.zeros(ACTION_DIM, dtype=np.float32)
|
||||
)
|
||||
else:
|
||||
joint_state = np.zeros(ACTION_DIM, dtype=np.float64)
|
||||
joint_state = np.zeros(ACTION_DIM, dtype=np.float32)
|
||||
|
||||
return {"pixels": images, "agent_pos": joint_state}
|
||||
|
||||
@@ -415,8 +420,8 @@ def create_robotwin_envs(
|
||||
n_envs: int,
|
||||
env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None,
|
||||
camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES,
|
||||
observation_height: int = 480,
|
||||
observation_width: int = 640,
|
||||
observation_height: int = DEFAULT_CAMERA_H,
|
||||
observation_width: int = DEFAULT_CAMERA_W,
|
||||
episode_length: int = DEFAULT_EPISODE_LENGTH,
|
||||
) -> dict[str, dict[int, Any]]:
|
||||
"""Create vectorized RoboTwin 2.0 environments.
|
||||
@@ -448,7 +453,16 @@ def create_robotwin_envs(
|
||||
if unknown:
|
||||
raise ValueError(f"Unknown RoboTwin tasks: {unknown}. Available tasks: {sorted(ROBOTWIN_TASKS)}")
|
||||
|
||||
print(f"Creating RoboTwin envs | tasks={task_names} | n_envs(per task)={n_envs}")
|
||||
logger.info(
|
||||
"Creating RoboTwin envs | tasks=%s | n_envs(per task)=%d",
|
||||
task_names,
|
||||
n_envs,
|
||||
)
|
||||
|
||||
is_async = env_cls is gym.vector.AsyncVectorEnv
|
||||
cached_obs_space: spaces.Space | None = None
|
||||
cached_act_space: spaces.Space | None = None
|
||||
cached_metadata: dict[str, Any] | None = None
|
||||
|
||||
out: dict[str, dict[int, Any]] = defaultdict(dict)
|
||||
for task_name in task_names:
|
||||
@@ -460,7 +474,15 @@ def create_robotwin_envs(
|
||||
observation_width=observation_width,
|
||||
episode_length=episode_length,
|
||||
)
|
||||
out[task_name][0] = env_cls(fns)
|
||||
print(f"Built vec env | task={task_name} | n_envs={n_envs}")
|
||||
if is_async:
|
||||
lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space, cached_metadata)
|
||||
if cached_obs_space is None:
|
||||
cached_obs_space = lazy.observation_space
|
||||
cached_act_space = lazy.action_space
|
||||
cached_metadata = lazy.metadata
|
||||
out[task_name][0] = lazy
|
||||
else:
|
||||
out[task_name][0] = env_cls(fns)
|
||||
logger.info("Built vec env | task=%s | n_envs=%d", task_name, n_envs)
|
||||
|
||||
return {k: dict(v) for k, v in out.items()}
|
||||
|
||||
@@ -226,29 +226,3 @@ class IsaaclabArenaProcessorStep(ObservationProcessorStep):
|
||||
|
||||
def observation(self, observation):
|
||||
return self._process_observation(observation)
|
||||
|
||||
|
||||
@dataclass
|
||||
@ProcessorStepRegistry.register(name="robotwin_processor")
|
||||
class RoboTwinProcessorStep(ObservationProcessorStep):
|
||||
"""Passthrough step for RoboTwin observations, casting state to float32.
|
||||
|
||||
RoboTwin observations already arrive in LeRobot format (observation.images.*
|
||||
and observation.state), so this step mainly ensures state dtype is float32.
|
||||
"""
|
||||
|
||||
def _process_observation(self, observation):
|
||||
processed_obs = dict(observation)
|
||||
if OBS_STATE in processed_obs:
|
||||
state = processed_obs[OBS_STATE]
|
||||
if hasattr(state, "dtype") and state.dtype != torch.float32:
|
||||
processed_obs[OBS_STATE] = state.float()
|
||||
return processed_obs
|
||||
|
||||
def transform_features(
|
||||
self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
|
||||
) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
|
||||
return features
|
||||
|
||||
def observation(self, observation):
|
||||
return self._process_observation(observation)
|
||||
|
||||
@@ -28,7 +28,6 @@ from unittest.mock import MagicMock, patch
|
||||
import gymnasium as gym
|
||||
import numpy as np
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from lerobot.envs.robotwin import (
|
||||
ACTION_DIM,
|
||||
@@ -56,7 +55,7 @@ def _make_mock_task_env(
|
||||
"""
|
||||
obs_dict = {
|
||||
"observation": {cam: {"rgb": np.zeros((height, width, 3), dtype=np.uint8)} for cam in cameras},
|
||||
"joint_action": {"vector": np.zeros(ACTION_DIM, dtype=np.float64)},
|
||||
"joint_action": {"vector": np.zeros(ACTION_DIM, dtype=np.float32)},
|
||||
"endpose": {},
|
||||
}
|
||||
|
||||
@@ -281,34 +280,3 @@ def test_all_tasks_are_strings():
|
||||
|
||||
def test_no_duplicate_tasks():
|
||||
assert len(ROBOTWIN_TASKS) == len(set(ROBOTWIN_TASKS))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# RoboTwinProcessorStep
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRoboTwinProcessorStep:
|
||||
def test_passes_through_images_and_state(self):
|
||||
from lerobot.processor.env_processor import RoboTwinProcessorStep
|
||||
from lerobot.utils.constants import OBS_IMAGES, OBS_STATE
|
||||
|
||||
step = RoboTwinProcessorStep()
|
||||
obs = {
|
||||
f"{OBS_IMAGES}.head_camera": torch.zeros(1, 3, 4, 4),
|
||||
f"{OBS_IMAGES}.left_camera": torch.zeros(1, 3, 4, 4),
|
||||
OBS_STATE: torch.zeros(1, 14),
|
||||
}
|
||||
result = step.observation(obs)
|
||||
assert f"{OBS_IMAGES}.head_camera" in result
|
||||
assert f"{OBS_IMAGES}.left_camera" in result
|
||||
assert result[OBS_STATE].dtype == torch.float32
|
||||
|
||||
def test_state_cast_to_float32(self):
|
||||
from lerobot.processor.env_processor import RoboTwinProcessorStep
|
||||
from lerobot.utils.constants import OBS_STATE
|
||||
|
||||
step = RoboTwinProcessorStep()
|
||||
obs = {OBS_STATE: torch.zeros(1, 14, dtype=torch.float64)}
|
||||
result = step.observation(obs)
|
||||
assert result[OBS_STATE].dtype == torch.float32
|
||||
|
||||
Reference in New Issue
Block a user