feat(mm as default): adding a global DEFAULT_DEPTH_UNIT variable setting mm as default depth unit

This commit is contained in:
CarolinePascal
2026-06-22 23:29:22 +02:00
parent a08c3ec4a6
commit 8f939767ca
6 changed files with 14 additions and 9 deletions
@@ -36,7 +36,7 @@ from typing import Any, Protocol
import PIL.Image
import torch
from lerobot.configs.video import VideoEncoderConfig
from lerobot.configs import VideoEncoderConfig
from lerobot.datasets.video_utils import decode_video_frames, reencode_video
from .reader import EpisodeRecord, snap_to_frame
+2
View File
@@ -33,6 +33,7 @@ from .types import (
RTCAttentionSchedule,
)
from .video import (
DEFAULT_DEPTH_UNIT,
VALID_VIDEO_CODECS,
VIDEO_ENCODER_INFO_KEYS,
DepthEncoderConfig,
@@ -67,6 +68,7 @@ __all__ = [
# Factories
"encoder_config_from_video_info",
# Constants
"DEFAULT_DEPTH_UNIT",
"VALID_VIDEO_CODECS",
"VIDEO_ENCODER_INFO_KEYS",
]
+4 -2
View File
@@ -19,6 +19,8 @@ from dataclasses import dataclass, field
from lerobot.transforms import ImageTransformsConfig
from lerobot.utils.import_utils import get_safe_default_video_backend
from .video import DEFAULT_DEPTH_UNIT
@dataclass
class DatasetConfig:
@@ -38,9 +40,9 @@ class DatasetConfig:
# When True, RGB video frames are returned as uint8 tensors (0-255) instead of float32 (0.0-1.0).
# This reduces memory and speeds up DataLoader IPC. The training pipeline handles the conversion.
return_uint8: bool = False
# Physical unit depth maps are dequantized to at load time: "mm" (millimetres) or "m" (metres).
# Physical unit depth maps are dequantized to at load time: "mm" (millimeters) or "m" (metres).
# Has no effect on datasets without depth cameras.
depth_output_unit: str = "mm"
depth_output_unit: str = DEFAULT_DEPTH_UNIT
streaming: bool = False
def __post_init__(self) -> None:
+1
View File
@@ -62,6 +62,7 @@ DEFAULT_DEPTH_MAX: float = 10.0
DEFAULT_DEPTH_SHIFT: float = 3.5
DEFAULT_DEPTH_USE_LOG: bool = True
DEFAULT_DEPTH_PIX_FMT: str = "gray12le"
DEFAULT_DEPTH_UNIT = "mm"
# Depth-specific tuning fields persisted under ``features[*]["info"]`` as ``video.<name>``.
DEPTH_ENCODER_INFO_FIELD_NAMES: frozenset[str] = frozenset({"depth_min", "depth_max", "shift", "use_log"})
+2 -2
View File
@@ -22,7 +22,7 @@ from pathlib import Path
import datasets
import torch
from lerobot.configs.video import DepthEncoderConfig
from lerobot.configs import DEFAULT_DEPTH_UNIT, DepthEncoderConfig
from .dataset_metadata import LeRobotDatasetMetadata
from .depth_utils import dequantize_depth
@@ -54,7 +54,7 @@ class DatasetReader:
delta_timestamps: dict[str, list[float]] | None,
image_transforms: Callable | None,
return_uint8: bool = False,
depth_output_unit: str = "mm",
depth_output_unit: str = DEFAULT_DEPTH_UNIT,
):
"""Initialize the reader with metadata, filtering, and transform config.
+4 -4
View File
@@ -24,7 +24,7 @@ import torch.utils
from huggingface_hub import HfApi, snapshot_download
from huggingface_hub.errors import RevisionNotFoundError
from lerobot.configs import DepthEncoderConfig, VideoEncoderConfig
from lerobot.configs import DEFAULT_DEPTH_UNIT, DepthEncoderConfig, VideoEncoderConfig
from lerobot.utils.constants import HF_LEROBOT_HUB_CACHE
from .dataset_metadata import CODEBASE_VERSION, LeRobotDatasetMetadata
@@ -58,7 +58,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
download_videos: bool = True,
video_backend: str | None = None,
return_uint8: bool = False,
depth_output_unit: str = "mm",
depth_output_unit: str = DEFAULT_DEPTH_UNIT,
batch_encoding_size: int = 1,
camera_encoder: VideoEncoderConfig | None = None,
depth_encoder: DepthEncoderConfig | None = None,
@@ -725,7 +725,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
obj.episodes = None
obj._video_backend = video_backend if video_backend is not None else get_safe_default_video_backend()
obj._return_uint8 = False
obj._depth_output_unit = "mm"
obj._depth_output_unit = DEFAULT_DEPTH_UNIT
obj._batch_encoding_size = batch_encoding_size
obj._encoder_threads = encoder_threads
@@ -824,7 +824,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
obj.episodes = None
obj._video_backend = video_backend if video_backend else get_safe_default_video_backend()
obj._return_uint8 = False
obj._depth_output_unit = "mm"
obj._depth_output_unit = DEFAULT_DEPTH_UNIT
obj._batch_encoding_size = batch_encoding_size
if obj._requested_root is not None: