diff --git a/.github/workflows/benchmark_tests.yml b/.github/workflows/benchmark_tests.yml
index 74234ed5b..8ac040f67 100644
--- a/.github/workflows/benchmark_tests.yml
+++ b/.github/workflows/benchmark_tests.yml
@@ -366,7 +366,7 @@ jobs:
                 --eval.n_episodes=1 \
                 --eval.use_async_envs=false \
                 --policy.device=cuda \
-                '--rename_map={\"observation.images.head_camera\": \"observation.images.camera1\", \"observation.images.front_camera\": \"observation.images.camera2\", \"observation.images.left_wrist\": \"observation.images.camera3\"}' \
+                '--rename_map={\"observation.images.head_camera\": \"observation.images.camera1\", \"observation.images.left_camera\": \"observation.images.camera2\", \"observation.images.right_camera\": \"observation.images.camera3\"}' \
                 --output_dir=/tmp/eval-artifacts
             "
 
diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py
index 17edeb903..9f27a49da 100644
--- a/src/lerobot/envs/configs.py
+++ b/src/lerobot/envs/configs.py
@@ -594,11 +594,14 @@ class RoboTwinEnvConfig(EnvConfig):
     episode_length: int = 300
     obs_type: str = "pixels_agent_pos"
     render_mode: str = "rgb_array"
-    # Comma-separated list of cameras to include in observations.
-    # Available: head_camera, front_camera, left_wrist, right_wrist
-    camera_names: str = "head_camera,front_camera,left_wrist,right_wrist"
-    observation_height: int = 480
-    observation_width: int = 640
+    # Available cameras from RoboTwin's aloha-agilex embodiment: head_camera
+    # (torso-mounted) + left_camera / right_camera (wrists).
+    camera_names: str = "head_camera,left_camera,right_camera"
+    # Match the D435 dims in task_config/demo_clean.yml (_camera_config.yml).
+    # Gym's vector-env concatenate pre-allocates buffers of this shape, so it
+    # must equal what SAPIEN actually renders.
+    observation_height: int = 240
+    observation_width: int = 320
     features: dict[str, PolicyFeature] = field(
         default_factory=lambda: {
             ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(14,)),
@@ -608,9 +611,8 @@ class RoboTwinEnvConfig(EnvConfig):
         default_factory=lambda: {
             ACTION: ACTION,
             "pixels/head_camera": f"{OBS_IMAGES}.head_camera",
-            "pixels/front_camera": f"{OBS_IMAGES}.front_camera",
-            "pixels/left_wrist": f"{OBS_IMAGES}.left_wrist",
-            "pixels/right_wrist": f"{OBS_IMAGES}.right_wrist",
+            "pixels/left_camera": f"{OBS_IMAGES}.left_camera",
+            "pixels/right_camera": f"{OBS_IMAGES}.right_camera",
             "agent_pos": OBS_STATE,
         }
     )
diff --git a/src/lerobot/envs/robotwin.py b/src/lerobot/envs/robotwin.py
index e18aea7dd..6f370d1d4 100644
--- a/src/lerobot/envs/robotwin.py
+++ b/src/lerobot/envs/robotwin.py
@@ -228,8 +228,8 @@ class RoboTwinEnv(gym.Env):
         episode_index: int = 0,
         n_envs: int = 1,
         camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES,
-        observation_height: int = 480,
-        observation_width: int = 640,
+        observation_height: int | None = None,
+        observation_width: int | None = None,
         episode_length: int = DEFAULT_EPISODE_LENGTH,
         render_mode: str = "rgb_array",
     ):
@@ -240,20 +240,24 @@ class RoboTwinEnv(gym.Env):
         self.episode_index = episode_index
         self._reset_stride = n_envs
         self.camera_names = list(camera_names)
-        self.observation_height = observation_height
-        self.observation_width = observation_width
+        # Match the declared observation_space to the camera resolution in
+        # RoboTwin's YAML — otherwise gym.vector's concatenate pre-allocates
+        # a buffer of the wrong shape and np.stack raises ValueError.
+        setup_kwargs = _load_robotwin_setup_kwargs(task_name)
+        self.observation_height = observation_height or setup_kwargs["head_camera_h"]
+        self.observation_width = observation_width or setup_kwargs["head_camera_w"]
         self.episode_length = episode_length
         self.render_mode = render_mode
 
         self._env: Any | None = None  # deferred — created on first reset() inside worker
         self._step_count: int = 0
-        self._black_frame = np.zeros((observation_height, observation_width, 3), dtype=np.uint8)
+        self._black_frame = np.zeros((self.observation_height, self.observation_width, 3), dtype=np.uint8)
 
         image_spaces = {
             cam: spaces.Box(
                 low=0,
                 high=255,
-                shape=(observation_height, observation_width, 3),
+                shape=(self.observation_height, self.observation_width, 3),
                 dtype=np.uint8,
             )
             for cam in self.camera_names