From 407d1882a25727c05d3f9a42b6a35138e05574d7 Mon Sep 17 00:00:00 2001 From: CarolinePascal Date: Mon, 27 Apr 2026 17:36:09 +0200 Subject: [PATCH] feat(robots/so_follower): emit + populate depth keys when use_depth When an SO follower has a camera configured with use_depth=True (e.g. a RealSense), the robot now exposes a paired depth feature so the dataset records both modalities: - _cameras_ft adds a 2D "_depth" entry alongside the 3-channel color shape; hw_to_dataset_features turns this into observation.depth. with the depth-map marker. - get_observation reads cam.read_latest_depth() (float32 metric meters from the RealSense async depth API) into _depth so build_dataset_frame can route it. Detection is duck-typed via getattr(..., "use_depth", False) so other cameras without that attribute keep their RGB-only behaviour unchanged. Made-with: Cursor --- src/lerobot/robots/so_follower/so_follower.py | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/lerobot/robots/so_follower/so_follower.py b/src/lerobot/robots/so_follower/so_follower.py index 0651f566c..759bc5618 100644 --- a/src/lerobot/robots/so_follower/so_follower.py +++ b/src/lerobot/robots/so_follower/so_follower.py @@ -68,9 +68,16 @@ class SOFollower(Robot): @property def _cameras_ft(self) -> dict[str, tuple]: - return { - cam: (self.config.cameras[cam].height, self.config.cameras[cam].width, 3) for cam in self.cameras - } + features: dict[str, tuple] = {} + for cam in self.cameras: + cam_cfg = self.config.cameras[cam] + features[cam] = (cam_cfg.height, cam_cfg.width, 3) + # Cameras with a depth stream (e.g. RealSense with use_depth=True) also + # emit a 2D depth feature; hw_to_dataset_features routes 2D shapes to + # ``observation.depth.`` with the depth-map marker. + if getattr(cam_cfg, "use_depth", False): + features[f"{cam}_depth"] = (cam_cfg.height, cam_cfg.width) + return features @cached_property def observation_features(self) -> dict[str, type | tuple]: @@ -190,6 +197,14 @@ class SOFollower(Robot): dt_ms = (time.perf_counter() - start) * 1e3 logger.debug(f"{self} read {cam_key}: {dt_ms:.1f}ms") + # Cameras with a depth stream populate a sibling ``_depth`` key + # (consumed by hw_to_dataset_features / build_dataset_frame). + if getattr(self.config.cameras[cam_key], "use_depth", False): + start = time.perf_counter() + obs_dict[f"{cam_key}_depth"] = cam.read_latest_depth() + dt_ms = (time.perf_counter() - start) * 1e3 + logger.debug(f"{self} read {cam_key} depth: {dt_ms:.1f}ms") + return obs_dict @check_if_not_connected