From 23f5459ba13d21d9b26de7365b80ea4ae88fb14e Mon Sep 17 00:00:00 2001 From: CarolinePascal Date: Mon, 15 Jun 2026 14:41:41 +0200 Subject: [PATCH] docs(docstring): updating docstrings --- src/lerobot/datasets/compute_stats.py | 6 ++-- src/lerobot/datasets/dataset_tools.py | 30 +++++++++++------- src/lerobot/datasets/dataset_writer.py | 9 +++--- src/lerobot/datasets/lerobot_dataset.py | 6 ++-- src/lerobot/datasets/video_utils.py | 42 ++++++++++++++++++++++--- src/lerobot/utils/feature_utils.py | 6 ++-- 6 files changed, 71 insertions(+), 28 deletions(-) diff --git a/src/lerobot/datasets/compute_stats.py b/src/lerobot/datasets/compute_stats.py index 312f50dbc..6316dbee3 100644 --- a/src/lerobot/datasets/compute_stats.py +++ b/src/lerobot/datasets/compute_stats.py @@ -506,8 +506,10 @@ def compute_episode_stats( Each statistics dictionary contains min, max, mean, std, count, and quantiles. Note: - Image statistics are normalized to [0,1] range and have shape (3,1,1) for - per-channel values when dtype is 'image' or 'video'. + For 'image'/'video' features, stats are computed per channel and kept with a + leading channel axis (e.g. shape (3, 1, 1) for RGB). RGB stats are divided by + 255 to land in [0, 1]; depth maps (features flagged with ``is_depth_map``) skip + this rescaling and remain in their stored units. """ if quantile_list is None: quantile_list = DEFAULT_QUANTILES diff --git a/src/lerobot/datasets/dataset_tools.py b/src/lerobot/datasets/dataset_tools.py index 707a0ed2b..ec6468433 100644 --- a/src/lerobot/datasets/dataset_tools.py +++ b/src/lerobot/datasets/dataset_tools.py @@ -1679,21 +1679,27 @@ def convert_image_to_video_dataset( LeRobot dataset structure with videos stored in chunked MP4 files. Args: - dataset: The source LeRobot dataset with images - output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig. - repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig. - camera_encoder: Video encoder settings applied to RGB cameras - (``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`). + dataset: The source LeRobot dataset with images. + output_dir: Root directory where the converted dataset will be stored. When + ``None``, defaults to ``$HF_LEROBOT_HOME/repo_id``. Equivalent to + ``new_root`` in ``EditDatasetConfig``. + repo_id: Converted dataset identifier. Equivalent to ``new_repo_id`` in + ``EditDatasetConfig``. + camera_encoder: Video encoder settings applied to RGB cameras. When ``None``, + :func:`~lerobot.configs.video.camera_encoder_defaults` is used. depth_encoder: Video encoder settings applied to depth-map cameras, including - the quantization parameters persisted to the dataset metadata - (``None`` uses :func:`~lerobot.configs.depth_encoder_defaults`). - episode_indices: List of episode indices to convert (None = all episodes) - num_workers: Number of threads for parallel processing (default: 4) - max_episodes_per_batch: Maximum episodes per video batch to avoid memory issues (None = no limit) - max_frames_per_batch: Maximum frames per video batch to avoid memory issues (None = no limit) + the quantization parameters persisted to the dataset metadata. When + ``None``, :func:`~lerobot.configs.video.depth_encoder_defaults` is used. + episode_indices: Episode indices to convert. When ``None``, all episodes are + converted. + num_workers: Number of threads for parallel processing. + max_episodes_per_batch: Maximum episodes per video batch, to bound memory use. + ``None`` means no limit. + max_frames_per_batch: Maximum frames per video batch, to bound memory use. + ``None`` means no limit. Returns: - New LeRobotDataset with images encoded as videos + A new :class:`LeRobotDataset` with images encoded as videos. """ if camera_encoder is None: camera_encoder = camera_encoder_defaults() diff --git a/src/lerobot/datasets/dataset_writer.py b/src/lerobot/datasets/dataset_writer.py index d13e37e6b..8467c31f1 100644 --- a/src/lerobot/datasets/dataset_writer.py +++ b/src/lerobot/datasets/dataset_writer.py @@ -120,10 +120,11 @@ class DatasetWriter: meta: Dataset metadata instance (used for feature schema, chunk settings, and episode persistence). root: Local dataset root directory. - camera_encoder: Video encoder settings applied to all cameras. - ``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`. - depth_encoder: Video encoder settings applied to all **depth** cameras. - ``None`` uses :func:`~lerobot.configs.depth_encoder_defaults`. + camera_encoder: Video encoder settings applied to RGB cameras. When + ``None``, :func:`~lerobot.configs.video.camera_encoder_defaults` is used. + depth_encoder: Video encoder settings applied to depth cameras, including + the quantization parameters. When ``None``, + :func:`~lerobot.configs.video.depth_encoder_defaults` is used. encoder_threads: Number of encoder threads (global). ``None`` lets the codec decide. batch_encoding_size: Number of episodes to accumulate before diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index 1f7cf0503..e79159336 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -189,7 +189,7 @@ class LeRobotDataset(torch.utils.data.Dataset): (codec, quality, etc.). When ``None``, :func:`~lerobot.configs.video.camera_encoder_defaults` is used by the writer. depth_encoder (DepthEncoderConfig | None, optional): Video encoder settings for depth cameras - (codec, quality, etc.). When ``None``, :func:`~lerobot.configs.depth.depth_encoder_defaults` + (codec, quality, etc.). When ``None``, :func:`~lerobot.configs.video.depth_encoder_defaults` is used by the writer. encoder_threads (int | None, optional): Number of encoder threads (global). ``None`` lets the codec decide. @@ -692,7 +692,7 @@ class LeRobotDataset(torch.utils.data.Dataset): camera_encoder: Video encoder settings for cameras (codec, quality, etc.). When ``None``, :func:`~lerobot.configs.video.camera_encoder_defaults` is used. depth_encoder: Video encoder settings for depth cameras (codec, quality, etc.). - When ``None``, :func:`~lerobot.configs.depth.depth_encoder_defaults` is used. + When ``None``, :func:`~lerobot.configs.video.depth_encoder_defaults` is used. encoder_threads: Number of encoder threads (global). ``None`` lets the codec decide. metadata_buffer_size: Number of episode metadata records to buffer @@ -798,7 +798,7 @@ class LeRobotDataset(torch.utils.data.Dataset): camera_encoder: Video encoder settings for cameras (codec, quality, etc.). When ``None``, :func:`~lerobot.configs.video.camera_encoder_defaults` is used. depth_encoder: Video encoder settings for depth cameras (codec, quality, etc.). - When ``None``, :func:`~lerobot.configs.depth.depth_encoder_defaults` is used. + When ``None``, :func:`~lerobot.configs.video.depth_encoder_defaults` is used. encoder_threads: Number of encoder threads (global). ``None`` lets the codec decide. image_writer_processes: Subprocesses for async image writing. diff --git a/src/lerobot/datasets/video_utils.py b/src/lerobot/datasets/video_utils.py index 8f77ca610..de2ddb7bb 100644 --- a/src/lerobot/datasets/video_utils.py +++ b/src/lerobot/datasets/video_utils.py @@ -436,7 +436,28 @@ def encode_video_frames( log_level: int | None = av.logging.WARNING, overwrite: bool = False, ) -> None: - """More info on ffmpeg arguments tuning on `benchmark/video/README.md`""" + """Encode a directory of image frames into an MP4 video. + + When ``video_encoder`` is a :class:`~lerobot.configs.video.DepthEncoderConfig`, + frames are read from ``.tiff`` files and quantized to 12-bit depth codes using the + encoder's ``depth_min`` / ``depth_max`` / ``shift`` / ``use_log``; otherwise ``.png`` + RGB frames are encoded directly. + + Args: + imgs_dir: Directory containing the frames to encode, named ``frame-000000`` + onwards (``.png`` for RGB, ``.tiff`` for depth). + video_path: Output path for the encoded ``.mp4`` file. + fps: Frame rate of the output video. + video_encoder: Encoder settings (codec, pixel format, quality, ...). When + ``None``, :func:`camera_encoder_defaults` is used. Pass a + :class:`~lerobot.configs.video.DepthEncoderConfig` to encode depth frames. + encoder_threads: Per-encoder thread count forwarded to the codec. ``None`` + lets the codec decide. + log_level: libav log level to set while encoding, or ``None`` to leave the + current logging configuration unchanged. + overwrite: When ``False`` and ``video_path`` already exists, skip encoding and + log a warning. When ``True``, re-encode and replace the existing file. + """ if video_encoder is None: video_encoder = camera_encoder_defaults() vcodec = video_encoder.vcodec @@ -873,12 +894,15 @@ class StreamingVideoEncoder: """ Args: fps: Frames per second for the output videos. - camera_encoder: Video encoder settings applied to all cameras. + camera_encoder: Video encoder settings applied to all RGB cameras. When ``None``, :func:`camera_encoder_defaults` is used. - encoder_threads: Number of encoder threads (global setting). - ``None`` lets the codec decide. + depth_encoder: Video encoder settings applied to all depth cameras, + including the depth quantization parameters. When ``None``, + :func:`depth_encoder_defaults` is used. queue_maxsize: Max frames to buffer per camera before back-pressure drops frames. + encoder_threads: Number of encoder threads (global setting). + ``None`` lets the codec decide. """ self.fps = fps self._camera_encoder = camera_encoder or camera_encoder_defaults() @@ -1145,7 +1169,15 @@ def get_video_info( video_path: Path to the encoded video file to probe. video_encoder: If provided, record the exact encoder settings used to encode this video. Stream-derived values take precedence — encoder fields are only written for keys - not already populated from the video file itself. + not already populated from the video file itself. When a + :class:`~lerobot.configs.video.DepthEncoderConfig` is passed, the depth + quantization parameters (``depth_min`` / ``depth_max`` / ``shift`` / + ``use_log``) are recorded so frames can be dequantized on read. + + Returns: + The ``video.*`` / ``audio.*`` info dict, including ``is_depth_map`` which is + ``True`` only when ``video_encoder`` is a + :class:`~lerobot.configs.video.DepthEncoderConfig`. """ logging.getLogger("libav").setLevel(av.logging.WARNING) diff --git a/src/lerobot/utils/feature_utils.py b/src/lerobot/utils/feature_utils.py index 44f3877e4..38516d6ab 100644 --- a/src/lerobot/utils/feature_utils.py +++ b/src/lerobot/utils/feature_utils.py @@ -51,7 +51,9 @@ def hw_to_dataset_features( This function takes a dictionary describing hardware outputs (like joint states or camera image shapes) and formats it into the standard LeRobot feature - specification. + specification. Single-channel cameras (shape ``(H, W, 1)``) are flagged as depth + maps via ``info["is_depth_map"] = True``; three-channel cameras ``(H, W, 3)`` are + treated as RGB. Args: hw_features (dict): Dictionary mapping feature names to their type (float for @@ -61,7 +63,7 @@ def hw_to_dataset_features( use_video (bool): If True, image features are marked as "video", otherwise "image". Returns: - dict: A LeRobot features dictionary. + dict: A LeRobot features dictionary. Depth cameras carry ``info["is_depth_map"] = True``. """ features = {} joint_fts = {