From 8f939767ca9fc77d3eeedeeef4882e8d4bc9ec4a Mon Sep 17 00:00:00 2001 From: CarolinePascal Date: Mon, 22 Jun 2026 23:29:22 +0200 Subject: [PATCH] feat(mm as default): adding a global DEFAULT_DEPTH_UNIT variable setting mm as default depth unit --- src/lerobot/annotations/steerable_pipeline/frames.py | 2 +- src/lerobot/configs/__init__.py | 2 ++ src/lerobot/configs/default.py | 6 ++++-- src/lerobot/configs/video.py | 1 + src/lerobot/datasets/dataset_reader.py | 4 ++-- src/lerobot/datasets/lerobot_dataset.py | 8 ++++---- 6 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/lerobot/annotations/steerable_pipeline/frames.py b/src/lerobot/annotations/steerable_pipeline/frames.py index 30daa7244..64ee14caf 100644 --- a/src/lerobot/annotations/steerable_pipeline/frames.py +++ b/src/lerobot/annotations/steerable_pipeline/frames.py @@ -36,7 +36,7 @@ from typing import Any, Protocol import PIL.Image import torch -from lerobot.configs.video import VideoEncoderConfig +from lerobot.configs import VideoEncoderConfig from lerobot.datasets.video_utils import decode_video_frames, reencode_video from .reader import EpisodeRecord, snap_to_frame diff --git a/src/lerobot/configs/__init__.py b/src/lerobot/configs/__init__.py index 93c1f5db6..9f31dd63e 100644 --- a/src/lerobot/configs/__init__.py +++ b/src/lerobot/configs/__init__.py @@ -33,6 +33,7 @@ from .types import ( RTCAttentionSchedule, ) from .video import ( + DEFAULT_DEPTH_UNIT, VALID_VIDEO_CODECS, VIDEO_ENCODER_INFO_KEYS, DepthEncoderConfig, @@ -67,6 +68,7 @@ __all__ = [ # Factories "encoder_config_from_video_info", # Constants + "DEFAULT_DEPTH_UNIT", "VALID_VIDEO_CODECS", "VIDEO_ENCODER_INFO_KEYS", ] diff --git a/src/lerobot/configs/default.py b/src/lerobot/configs/default.py index 2f23b213a..d40ec0b31 100644 --- a/src/lerobot/configs/default.py +++ b/src/lerobot/configs/default.py @@ -19,6 +19,8 @@ from dataclasses import dataclass, field from lerobot.transforms import ImageTransformsConfig from lerobot.utils.import_utils import get_safe_default_video_backend +from .video import DEFAULT_DEPTH_UNIT + @dataclass class DatasetConfig: @@ -38,9 +40,9 @@ class DatasetConfig: # When True, RGB video frames are returned as uint8 tensors (0-255) instead of float32 (0.0-1.0). # This reduces memory and speeds up DataLoader IPC. The training pipeline handles the conversion. return_uint8: bool = False - # Physical unit depth maps are dequantized to at load time: "mm" (millimetres) or "m" (metres). + # Physical unit depth maps are dequantized to at load time: "mm" (millimeters) or "m" (metres). # Has no effect on datasets without depth cameras. - depth_output_unit: str = "mm" + depth_output_unit: str = DEFAULT_DEPTH_UNIT streaming: bool = False def __post_init__(self) -> None: diff --git a/src/lerobot/configs/video.py b/src/lerobot/configs/video.py index c265b00ed..640c0409e 100644 --- a/src/lerobot/configs/video.py +++ b/src/lerobot/configs/video.py @@ -62,6 +62,7 @@ DEFAULT_DEPTH_MAX: float = 10.0 DEFAULT_DEPTH_SHIFT: float = 3.5 DEFAULT_DEPTH_USE_LOG: bool = True DEFAULT_DEPTH_PIX_FMT: str = "gray12le" +DEFAULT_DEPTH_UNIT = "mm" # Depth-specific tuning fields persisted under ``features[*]["info"]`` as ``video.``. DEPTH_ENCODER_INFO_FIELD_NAMES: frozenset[str] = frozenset({"depth_min", "depth_max", "shift", "use_log"}) diff --git a/src/lerobot/datasets/dataset_reader.py b/src/lerobot/datasets/dataset_reader.py index faf271bdb..1d8de670f 100644 --- a/src/lerobot/datasets/dataset_reader.py +++ b/src/lerobot/datasets/dataset_reader.py @@ -22,7 +22,7 @@ from pathlib import Path import datasets import torch -from lerobot.configs.video import DepthEncoderConfig +from lerobot.configs import DEFAULT_DEPTH_UNIT, DepthEncoderConfig from .dataset_metadata import LeRobotDatasetMetadata from .depth_utils import dequantize_depth @@ -54,7 +54,7 @@ class DatasetReader: delta_timestamps: dict[str, list[float]] | None, image_transforms: Callable | None, return_uint8: bool = False, - depth_output_unit: str = "mm", + depth_output_unit: str = DEFAULT_DEPTH_UNIT, ): """Initialize the reader with metadata, filtering, and transform config. diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index 36e0561e6..a694ff776 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -24,7 +24,7 @@ import torch.utils from huggingface_hub import HfApi, snapshot_download from huggingface_hub.errors import RevisionNotFoundError -from lerobot.configs import DepthEncoderConfig, VideoEncoderConfig +from lerobot.configs import DEFAULT_DEPTH_UNIT, DepthEncoderConfig, VideoEncoderConfig from lerobot.utils.constants import HF_LEROBOT_HUB_CACHE from .dataset_metadata import CODEBASE_VERSION, LeRobotDatasetMetadata @@ -58,7 +58,7 @@ class LeRobotDataset(torch.utils.data.Dataset): download_videos: bool = True, video_backend: str | None = None, return_uint8: bool = False, - depth_output_unit: str = "mm", + depth_output_unit: str = DEFAULT_DEPTH_UNIT, batch_encoding_size: int = 1, camera_encoder: VideoEncoderConfig | None = None, depth_encoder: DepthEncoderConfig | None = None, @@ -725,7 +725,7 @@ class LeRobotDataset(torch.utils.data.Dataset): obj.episodes = None obj._video_backend = video_backend if video_backend is not None else get_safe_default_video_backend() obj._return_uint8 = False - obj._depth_output_unit = "mm" + obj._depth_output_unit = DEFAULT_DEPTH_UNIT obj._batch_encoding_size = batch_encoding_size obj._encoder_threads = encoder_threads @@ -824,7 +824,7 @@ class LeRobotDataset(torch.utils.data.Dataset): obj.episodes = None obj._video_backend = video_backend if video_backend else get_safe_default_video_backend() obj._return_uint8 = False - obj._depth_output_unit = "mm" + obj._depth_output_unit = DEFAULT_DEPTH_UNIT obj._batch_encoding_size = batch_encoding_size if obj._requested_root is not None: