mirror of
https://github.com/huggingface/lerobot.git
synced 2026-06-25 20:27:05 +00:00
feat(mm as default): adding a global DEFAULT_DEPTH_UNIT variable setting mm as default depth unit
This commit is contained in:
@@ -36,7 +36,7 @@ from typing import Any, Protocol
|
||||
import PIL.Image
|
||||
import torch
|
||||
|
||||
from lerobot.configs.video import VideoEncoderConfig
|
||||
from lerobot.configs import VideoEncoderConfig
|
||||
from lerobot.datasets.video_utils import decode_video_frames, reencode_video
|
||||
|
||||
from .reader import EpisodeRecord, snap_to_frame
|
||||
|
||||
@@ -33,6 +33,7 @@ from .types import (
|
||||
RTCAttentionSchedule,
|
||||
)
|
||||
from .video import (
|
||||
DEFAULT_DEPTH_UNIT,
|
||||
VALID_VIDEO_CODECS,
|
||||
VIDEO_ENCODER_INFO_KEYS,
|
||||
DepthEncoderConfig,
|
||||
@@ -67,6 +68,7 @@ __all__ = [
|
||||
# Factories
|
||||
"encoder_config_from_video_info",
|
||||
# Constants
|
||||
"DEFAULT_DEPTH_UNIT",
|
||||
"VALID_VIDEO_CODECS",
|
||||
"VIDEO_ENCODER_INFO_KEYS",
|
||||
]
|
||||
|
||||
@@ -19,6 +19,8 @@ from dataclasses import dataclass, field
|
||||
from lerobot.transforms import ImageTransformsConfig
|
||||
from lerobot.utils.import_utils import get_safe_default_video_backend
|
||||
|
||||
from .video import DEFAULT_DEPTH_UNIT
|
||||
|
||||
|
||||
@dataclass
|
||||
class DatasetConfig:
|
||||
@@ -38,9 +40,9 @@ class DatasetConfig:
|
||||
# When True, RGB video frames are returned as uint8 tensors (0-255) instead of float32 (0.0-1.0).
|
||||
# This reduces memory and speeds up DataLoader IPC. The training pipeline handles the conversion.
|
||||
return_uint8: bool = False
|
||||
# Physical unit depth maps are dequantized to at load time: "mm" (millimetres) or "m" (metres).
|
||||
# Physical unit depth maps are dequantized to at load time: "mm" (millimeters) or "m" (metres).
|
||||
# Has no effect on datasets without depth cameras.
|
||||
depth_output_unit: str = "mm"
|
||||
depth_output_unit: str = DEFAULT_DEPTH_UNIT
|
||||
streaming: bool = False
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
|
||||
@@ -62,6 +62,7 @@ DEFAULT_DEPTH_MAX: float = 10.0
|
||||
DEFAULT_DEPTH_SHIFT: float = 3.5
|
||||
DEFAULT_DEPTH_USE_LOG: bool = True
|
||||
DEFAULT_DEPTH_PIX_FMT: str = "gray12le"
|
||||
DEFAULT_DEPTH_UNIT = "mm"
|
||||
|
||||
# Depth-specific tuning fields persisted under ``features[*]["info"]`` as ``video.<name>``.
|
||||
DEPTH_ENCODER_INFO_FIELD_NAMES: frozenset[str] = frozenset({"depth_min", "depth_max", "shift", "use_log"})
|
||||
|
||||
@@ -22,7 +22,7 @@ from pathlib import Path
|
||||
import datasets
|
||||
import torch
|
||||
|
||||
from lerobot.configs.video import DepthEncoderConfig
|
||||
from lerobot.configs import DEFAULT_DEPTH_UNIT, DepthEncoderConfig
|
||||
|
||||
from .dataset_metadata import LeRobotDatasetMetadata
|
||||
from .depth_utils import dequantize_depth
|
||||
@@ -54,7 +54,7 @@ class DatasetReader:
|
||||
delta_timestamps: dict[str, list[float]] | None,
|
||||
image_transforms: Callable | None,
|
||||
return_uint8: bool = False,
|
||||
depth_output_unit: str = "mm",
|
||||
depth_output_unit: str = DEFAULT_DEPTH_UNIT,
|
||||
):
|
||||
"""Initialize the reader with metadata, filtering, and transform config.
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ import torch.utils
|
||||
from huggingface_hub import HfApi, snapshot_download
|
||||
from huggingface_hub.errors import RevisionNotFoundError
|
||||
|
||||
from lerobot.configs import DepthEncoderConfig, VideoEncoderConfig
|
||||
from lerobot.configs import DEFAULT_DEPTH_UNIT, DepthEncoderConfig, VideoEncoderConfig
|
||||
from lerobot.utils.constants import HF_LEROBOT_HUB_CACHE
|
||||
|
||||
from .dataset_metadata import CODEBASE_VERSION, LeRobotDatasetMetadata
|
||||
@@ -58,7 +58,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
download_videos: bool = True,
|
||||
video_backend: str | None = None,
|
||||
return_uint8: bool = False,
|
||||
depth_output_unit: str = "mm",
|
||||
depth_output_unit: str = DEFAULT_DEPTH_UNIT,
|
||||
batch_encoding_size: int = 1,
|
||||
camera_encoder: VideoEncoderConfig | None = None,
|
||||
depth_encoder: DepthEncoderConfig | None = None,
|
||||
@@ -725,7 +725,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
obj.episodes = None
|
||||
obj._video_backend = video_backend if video_backend is not None else get_safe_default_video_backend()
|
||||
obj._return_uint8 = False
|
||||
obj._depth_output_unit = "mm"
|
||||
obj._depth_output_unit = DEFAULT_DEPTH_UNIT
|
||||
obj._batch_encoding_size = batch_encoding_size
|
||||
obj._encoder_threads = encoder_threads
|
||||
|
||||
@@ -824,7 +824,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
obj.episodes = None
|
||||
obj._video_backend = video_backend if video_backend else get_safe_default_video_backend()
|
||||
obj._return_uint8 = False
|
||||
obj._depth_output_unit = "mm"
|
||||
obj._depth_output_unit = DEFAULT_DEPTH_UNIT
|
||||
obj._batch_encoding_size = batch_encoding_size
|
||||
|
||||
if obj._requested_root is not None:
|
||||
|
||||
Reference in New Issue
Block a user