[HIL-SERL] Update CI to allow installation of prerelease versions for lerobot (#1018)

Co-authored-by: imstevenpmwork <steven.palma@huggingface.co>
2026-07-23 09:46:00 +00:00 · 2025-04-24 10:18:03 +02:00
parent a0018240d5
commit 299effe0f1
10 changed files with 78 additions and 388 deletions
@@ -318,7 +318,7 @@ class LeRobotDatasetMetadata:
        obj.root.mkdir(parents=True, exist_ok=False)

        if robot is not None:
-            features = {**(features or {}), **get_features_from_robot(robot)}
+            features = get_features_from_robot(robot, use_videos)
            robot_type = robot.robot_type
            if not all(cam.fps == fps for cam in robot.cameras.values()):
                logging.warning(
@@ -821,9 +821,7 @@ class LeRobotDataset(torch.utils.data.Dataset):

            if self.features[key]["dtype"] in ["image", "video"]:
                img_path = self._get_image_file_path(
-                    episode_index=self.episode_buffer["episode_index"],
-                    image_key=key,
-                    frame_index=frame_index,
+                    episode_index=self.episode_buffer["episode_index"], image_key=key, frame_index=frame_index
                )
                if frame_index == 0:
                    img_path.parent.mkdir(parents=True, exist_ok=True)
@@ -869,10 +867,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
        for key, ft in self.features.items():
            # index, episode_index, task_index are already processed above, and image and video
            # are processed separately by storing image path and frame info as meta data
-            if key in ["index", "episode_index", "task_index"] or ft["dtype"] in [
-                "image",
-                "video",
-            ]:
+            if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["image", "video"]:
                continue
            episode_buffer[key] = np.stack(episode_buffer[key])

@@ -37,35 +37,29 @@ def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Ten
    """
    # map to expected inputs for the policy
    return_observations = {}
-    # TODO: You have to merge all tensors from agent key and extra key
-    # You don't keep sensor param key in the observation
-    # And you keep sensor data rgb
-    for key, img in observations.items():
-        if "images" not in key:
-            continue
+    if "pixels" in observations:
+        if isinstance(observations["pixels"], dict):
+            imgs = {f"observation.images.{key}": img for key, img in observations["pixels"].items()}
+        else:
+            imgs = {"observation.image": observations["pixels"]}

-        # TODO(aliberts, rcadene): use transforms.ToTensor()?
-        if not torch.is_tensor(img):
+        for imgkey, img in imgs.items():
+            # TODO(aliberts, rcadene): use transforms.ToTensor()?
            img = torch.from_numpy(img)

-        if img.ndim == 3:
-            img = img.unsqueeze(0)
+            # sanity check that images are channel last
+            _, h, w, c = img.shape
+            assert c < h and c < w, f"expect channel last images, but instead got {img.shape=}"

-        # sanity check that images are channel last
-        _, h, w, c = img.shape
-        assert c < h and c < w, f"expect channel last images, but instead got {img.shape=}"
+            # sanity check that images are uint8
+            assert img.dtype == torch.uint8, f"expect torch.uint8, but instead {img.dtype=}"

-        # sanity check that images are uint8
-        assert img.dtype == torch.uint8, f"expect torch.uint8, but instead {img.dtype=}"
+            # convert to channel first of type float32 in range [0,1]
+            img = einops.rearrange(img, "b h w c -> b c h w").contiguous()
+            img = img.type(torch.float32)
+            img /= 255

-        # convert to channel first of type float32 in range [0,1]
-        img = einops.rearrange(img, "b h w c -> b c h w").contiguous()
-        img = img.type(torch.float32)
-        img /= 255
-
-        return_observations[key] = img
-        # obs state agent qpos and qvel
-        # image
+            return_observations[imgkey] = img

    if "environment_state" in observations:
        return_observations["observation.environment_state"] = torch.from_numpy(
@@ -74,8 +68,7 @@ def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Ten

    # TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing
    # requirement for "agent_pos"
-    # return_observations["observation.state"] = torch.from_numpy(observations["agent_pos"]).float()
-    return_observations["observation.state"] = observations["observation.state"].float()
+    return_observations["observation.state"] = torch.from_numpy(observations["agent_pos"]).float()
    return return_observations


@@ -93,7 +86,7 @@ def env_to_policy_features(env_cfg: EnvConfig) -> dict[str, PolicyFeature]:
        else:
            feature = ft

-        policy_key = env_cfg.features_map.get(key, key)
+        policy_key = env_cfg.features_map[key]
        policy_features[policy_key] = feature

    return policy_features
@@ -88,7 +88,7 @@ class RecordControlConfig(ControlConfig):
    # Resume recording on an existing dataset.
    resume: bool = False
    # Reset follower arms to an initial configuration.
-    reset_follower_arms: bool = True
+    reset_follower_arms: bool = False

    def __post_init__(self):
        # HACK: We parse again the cli args here to get the pretrained path if there was one.
@@ -129,22 +129,16 @@ def predict_action(observation, policy, device, use_amp):
    return action


-def init_keyboard_listener(assign_rewards=False):
+def init_keyboard_listener():
    """
    Initializes a keyboard listener to enable early termination of an episode
    or environment reset by pressing the right arrow key ('->'). This may require
    sudo permissions to allow the terminal to monitor keyboard events.
-
-    Args:
-        assign_rewards (bool): If True, allows annotating the collected trajectory
-        with a binary reward at the end of the episode to indicate success.
    """
    events = {}
    events["exit_early"] = False
    events["rerecord_episode"] = False
    events["stop_recording"] = False
-    if assign_rewards:
-        events["next.reward"] = 0

    if is_headless():
        logging.warning(
@@ -169,12 +163,6 @@ def init_keyboard_listener(assign_rewards=False):
                print("Escape key pressed. Stopping data recording...")
                events["stop_recording"] = True
                events["exit_early"] = True
-            elif assign_rewards and key == keyboard.Key.space:
-                events["next.reward"] = 1 if events["next.reward"] == 0 else 0
-                print(
-                    "Space key pressed. Assigning new reward to the subsequent frames. New reward:",
-                    events["next.reward"],
-                )

        except Exception as e:
            print(f"Error handling key press: {e}")