mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-24 21:19:53 +00:00
fix(robotwin): align observation_space dims with D435 camera output
lerobot_eval crashed in gym.vector's SyncVectorEnv.reset with:
ValueError: Output array is the wrong shape
because RoboTwinEnvConfig declared observation_space = (480, 640, 3) but
task_config/demo_clean.yml specifies head_camera_type=D435, which renders
(240, 320, 3). gym.vector.concatenate pre-allocates a buffer from the
declared space, so the first np.stack raises on shape mismatch.
Changes:
- Config defaults now 240×320 (the D435 dims in _camera_config.yml), with
a comment pointing at the source of truth.
- RoboTwinEnv.__init__ accepts observation_height/width as Optional and
falls back to setup_kwargs["head_camera_h/w"] so the env is self-consistent
even if the config is not in sync.
- Config camera_names / features_map use the actual aloha-agilex camera
names (head_camera, left_camera, right_camera). Drops the stale
"front_camera" and "left_wrist"/"right_wrist" entries that never matched
anything RoboTwin exposes.
- CI workflow's rename_map updated to match the new camera names.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -366,7 +366,7 @@ jobs:
|
|||||||
--eval.n_episodes=1 \
|
--eval.n_episodes=1 \
|
||||||
--eval.use_async_envs=false \
|
--eval.use_async_envs=false \
|
||||||
--policy.device=cuda \
|
--policy.device=cuda \
|
||||||
'--rename_map={\"observation.images.head_camera\": \"observation.images.camera1\", \"observation.images.front_camera\": \"observation.images.camera2\", \"observation.images.left_wrist\": \"observation.images.camera3\"}' \
|
'--rename_map={\"observation.images.head_camera\": \"observation.images.camera1\", \"observation.images.left_camera\": \"observation.images.camera2\", \"observation.images.right_camera\": \"observation.images.camera3\"}' \
|
||||||
--output_dir=/tmp/eval-artifacts
|
--output_dir=/tmp/eval-artifacts
|
||||||
"
|
"
|
||||||
|
|
||||||
|
|||||||
@@ -594,11 +594,14 @@ class RoboTwinEnvConfig(EnvConfig):
|
|||||||
episode_length: int = 300
|
episode_length: int = 300
|
||||||
obs_type: str = "pixels_agent_pos"
|
obs_type: str = "pixels_agent_pos"
|
||||||
render_mode: str = "rgb_array"
|
render_mode: str = "rgb_array"
|
||||||
# Comma-separated list of cameras to include in observations.
|
# Available cameras from RoboTwin's aloha-agilex embodiment: head_camera
|
||||||
# Available: head_camera, front_camera, left_wrist, right_wrist
|
# (torso-mounted) + left_camera / right_camera (wrists).
|
||||||
camera_names: str = "head_camera,front_camera,left_wrist,right_wrist"
|
camera_names: str = "head_camera,left_camera,right_camera"
|
||||||
observation_height: int = 480
|
# Match the D435 dims in task_config/demo_clean.yml (_camera_config.yml).
|
||||||
observation_width: int = 640
|
# Gym's vector-env concatenate pre-allocates buffers of this shape, so it
|
||||||
|
# must equal what SAPIEN actually renders.
|
||||||
|
observation_height: int = 240
|
||||||
|
observation_width: int = 320
|
||||||
features: dict[str, PolicyFeature] = field(
|
features: dict[str, PolicyFeature] = field(
|
||||||
default_factory=lambda: {
|
default_factory=lambda: {
|
||||||
ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(14,)),
|
ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(14,)),
|
||||||
@@ -608,9 +611,8 @@ class RoboTwinEnvConfig(EnvConfig):
|
|||||||
default_factory=lambda: {
|
default_factory=lambda: {
|
||||||
ACTION: ACTION,
|
ACTION: ACTION,
|
||||||
"pixels/head_camera": f"{OBS_IMAGES}.head_camera",
|
"pixels/head_camera": f"{OBS_IMAGES}.head_camera",
|
||||||
"pixels/front_camera": f"{OBS_IMAGES}.front_camera",
|
"pixels/left_camera": f"{OBS_IMAGES}.left_camera",
|
||||||
"pixels/left_wrist": f"{OBS_IMAGES}.left_wrist",
|
"pixels/right_camera": f"{OBS_IMAGES}.right_camera",
|
||||||
"pixels/right_wrist": f"{OBS_IMAGES}.right_wrist",
|
|
||||||
"agent_pos": OBS_STATE,
|
"agent_pos": OBS_STATE,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -228,8 +228,8 @@ class RoboTwinEnv(gym.Env):
|
|||||||
episode_index: int = 0,
|
episode_index: int = 0,
|
||||||
n_envs: int = 1,
|
n_envs: int = 1,
|
||||||
camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES,
|
camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES,
|
||||||
observation_height: int = 480,
|
observation_height: int | None = None,
|
||||||
observation_width: int = 640,
|
observation_width: int | None = None,
|
||||||
episode_length: int = DEFAULT_EPISODE_LENGTH,
|
episode_length: int = DEFAULT_EPISODE_LENGTH,
|
||||||
render_mode: str = "rgb_array",
|
render_mode: str = "rgb_array",
|
||||||
):
|
):
|
||||||
@@ -240,20 +240,24 @@ class RoboTwinEnv(gym.Env):
|
|||||||
self.episode_index = episode_index
|
self.episode_index = episode_index
|
||||||
self._reset_stride = n_envs
|
self._reset_stride = n_envs
|
||||||
self.camera_names = list(camera_names)
|
self.camera_names = list(camera_names)
|
||||||
self.observation_height = observation_height
|
# Match the declared observation_space to the camera resolution in
|
||||||
self.observation_width = observation_width
|
# RoboTwin's YAML — otherwise gym.vector's concatenate pre-allocates
|
||||||
|
# a buffer of the wrong shape and np.stack raises ValueError.
|
||||||
|
setup_kwargs = _load_robotwin_setup_kwargs(task_name)
|
||||||
|
self.observation_height = observation_height or setup_kwargs["head_camera_h"]
|
||||||
|
self.observation_width = observation_width or setup_kwargs["head_camera_w"]
|
||||||
self.episode_length = episode_length
|
self.episode_length = episode_length
|
||||||
self.render_mode = render_mode
|
self.render_mode = render_mode
|
||||||
|
|
||||||
self._env: Any | None = None # deferred — created on first reset() inside worker
|
self._env: Any | None = None # deferred — created on first reset() inside worker
|
||||||
self._step_count: int = 0
|
self._step_count: int = 0
|
||||||
self._black_frame = np.zeros((observation_height, observation_width, 3), dtype=np.uint8)
|
self._black_frame = np.zeros((self.observation_height, self.observation_width, 3), dtype=np.uint8)
|
||||||
|
|
||||||
image_spaces = {
|
image_spaces = {
|
||||||
cam: spaces.Box(
|
cam: spaces.Box(
|
||||||
low=0,
|
low=0,
|
||||||
high=255,
|
high=255,
|
||||||
shape=(observation_height, observation_width, 3),
|
shape=(self.observation_height, self.observation_width, 3),
|
||||||
dtype=np.uint8,
|
dtype=np.uint8,
|
||||||
)
|
)
|
||||||
for cam in self.camera_names
|
for cam in self.camera_names
|
||||||
|
|||||||
Reference in New Issue
Block a user