diff --git a/src/lerobot/datasets/dataset_tools.py b/src/lerobot/datasets/dataset_tools.py index e8c0d26af..9aa810b3f 100644 --- a/src/lerobot/datasets/dataset_tools.py +++ b/src/lerobot/datasets/dataset_tools.py @@ -62,7 +62,12 @@ from .utils import ( DEFAULT_EPISODES_PATH, update_chunk_file_indices, ) -from .video_utils import VideoEncoderConfig, encode_video_frames, get_video_info +from .video_utils import ( + VideoEncoderConfig, + camera_encoder_defaults, + encode_video_frames, + get_video_info, +) def _load_episode_with_stats(src_dataset: LeRobotDataset, episode_idx: int) -> dict: @@ -101,7 +106,8 @@ def delete_episodes( episode_indices: List of episode indices to delete. output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig. repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig. - camera_encoder_config: Video encoder settings used when re-encoding video segments (default: :class:`VideoEncoderConfig()`). + camera_encoder_config: Video encoder settings used when re-encoding video segments + (``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`). """ if not episode_indices: raise ValueError("No episodes to delete") @@ -165,7 +171,8 @@ def split_dataset( splits: Either a dict mapping split names to episode indices, or a dict mapping split names to fractions (must sum to <= 1.0). output_dir: Root directory where the split datasets will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. - camera_encoder_config: Video encoder settings used when re-encoding video segments (default: :class:`VideoEncoderConfig()`). + camera_encoder_config: Video encoder settings used when re-encoding video segments + (``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`). Examples: Split by specific episodes @@ -598,10 +605,11 @@ def _keep_episodes_from_video_with_av( Ranges are half-open intervals: [start_frame, end_frame), where start_frame is inclusive and end_frame is exclusive. fps: Frame rate of the video. - camera_encoder_config: Video encoder settings (default: :class:`VideoEncoderConfig()`). + camera_encoder_config: Video encoder settings + (``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`). """ if camera_encoder_config is None: - camera_encoder_config = VideoEncoderConfig() + camera_encoder_config = camera_encoder_defaults() from fractions import Fraction import av @@ -705,13 +713,14 @@ def _copy_and_reindex_videos( src_dataset: Source dataset to copy from dst_meta: Destination metadata object episode_mapping: Mapping from old episode indices to new indices - camera_encoder_config: Video encoder settings used when re-encoding segments (default: :class:`VideoEncoderConfig()`). + camera_encoder_config: Video encoder settings used when re-encoding segments + (``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`). Returns: dict mapping episode index to its video metadata (chunk_index, file_index, timestamps) """ if camera_encoder_config is None: - camera_encoder_config = VideoEncoderConfig() + camera_encoder_config = camera_encoder_defaults() if src_dataset.meta.episodes is None: src_dataset.meta.episodes = load_episodes(src_dataset.meta.root) @@ -1654,7 +1663,8 @@ def convert_image_to_video_dataset( dataset: The source LeRobot dataset with images output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig. repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig. - camera_encoder_config: Video encoder settings (default: :class:`VideoEncoderConfig()`). + camera_encoder_config: Video encoder settings + (``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`). episode_indices: List of episode indices to convert (None = all episodes) num_workers: Number of threads for parallel processing (default: 4) max_episodes_per_batch: Maximum episodes per video batch to avoid memory issues (None = no limit) @@ -1664,7 +1674,7 @@ def convert_image_to_video_dataset( New LeRobotDataset with images encoded as videos """ if camera_encoder_config is None: - camera_encoder_config = VideoEncoderConfig() + camera_encoder_config = camera_encoder_defaults() # Check that it's an image dataset if len(dataset.meta.video_keys) > 0: diff --git a/src/lerobot/datasets/dataset_writer.py b/src/lerobot/datasets/dataset_writer.py index 0a4fb4308..da646d785 100644 --- a/src/lerobot/datasets/dataset_writer.py +++ b/src/lerobot/datasets/dataset_writer.py @@ -53,6 +53,7 @@ from .utils import ( from .video_utils import ( StreamingVideoEncoder, VideoEncoderConfig, + camera_encoder_defaults, concatenate_video_files, encode_video_frames, get_video_duration_in_s, @@ -95,7 +96,7 @@ class DatasetWriter: self, meta: LeRobotDatasetMetadata, root: Path, - camera_encoder_config: VideoEncoderConfig, + camera_encoder_config: VideoEncoderConfig | None, encoder_threads: int | None, batch_encoding_size: int, streaming_encoder: StreamingVideoEncoder | None = None, @@ -108,6 +109,7 @@ class DatasetWriter: settings, and episode persistence). root: Local dataset root directory. camera_encoder_config: Video encoder settings applied to all cameras. + ``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`. encoder_threads: Number of encoder threads (global). ``None`` lets the codec decide. batch_encoding_size: Number of episodes to accumulate before @@ -118,7 +120,7 @@ class DatasetWriter: """ self._meta = meta self._root = root - self._camera_encoder_config = camera_encoder_config or VideoEncoderConfig() + self._camera_encoder_config = camera_encoder_config or camera_encoder_defaults() self._encoder_threads = encoder_threads self._batch_encoding_size = batch_encoding_size self._streaming_encoder = streaming_encoder diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index 759f7dac3..110aac662 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -178,7 +178,8 @@ class LeRobotDataset(torch.utils.data.Dataset): batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos. Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1. camera_encoder_config (VideoEncoderConfig | None, optional): Video encoder settings for cameras - (codec, quality, etc.). + (codec, quality, etc.). When ``None``, :func:`~lerobot.datasets.video_utils.camera_encoder_defaults` + is used by the writer. encoder_threads (int | None, optional): Number of encoder threads (global). ``None`` lets the codec decide. streaming_encoding (bool, optional): If True, encode video frames in real-time during capture @@ -298,7 +299,7 @@ class LeRobotDataset(torch.utils.data.Dataset): @staticmethod def _build_streaming_encoder( fps: int, - camera_encoder_config: VideoEncoderConfig, + camera_encoder_config: VideoEncoderConfig | None, encoder_threads: int | None, encoder_queue_maxsize: int, ) -> StreamingVideoEncoder: @@ -653,6 +654,7 @@ class LeRobotDataset(torch.utils.data.Dataset): batch_encoding_size: Number of episodes to accumulate before batch-encoding videos. ``1`` means encode immediately. camera_encoder_config: Video encoder settings for cameras (codec, quality, etc.). + When ``None``, :func:`~lerobot.datasets.video_utils.camera_encoder_defaults` is used. encoder_threads: Number of encoder threads (global). ``None`` lets the codec decide. metadata_buffer_size: Number of episode metadata records to buffer @@ -753,6 +755,7 @@ class LeRobotDataset(torch.utils.data.Dataset): batch_encoding_size: Number of episodes to accumulate before batch-encoding videos. camera_encoder_config: Video encoder settings for cameras (codec, quality, etc.). + When ``None``, :func:`~lerobot.datasets.video_utils.camera_encoder_defaults` is used. encoder_threads: Number of encoder threads (global). ``None`` lets the codec decide. image_writer_processes: Subprocesses for async image writing. diff --git a/src/lerobot/datasets/video_utils.py b/src/lerobot/datasets/video_utils.py index 5d8d653c6..c2931aa84 100644 --- a/src/lerobot/datasets/video_utils.py +++ b/src/lerobot/datasets/video_utils.py @@ -499,7 +499,7 @@ def encode_video_frames( ) -> None: """More info on ffmpeg arguments tuning on `benchmark/video/README.md`""" if camera_encoder_config is None: - camera_encoder_config = VideoEncoderConfig() + camera_encoder_config = camera_encoder_defaults() vcodec = camera_encoder_config.vcodec pix_fmt = camera_encoder_config.pix_fmt @@ -803,14 +803,14 @@ class StreamingVideoEncoder: Args: fps: Frames per second for the output videos. camera_encoder_config: Video encoder settings applied to all cameras. - When ``None``, :class:`VideoEncoderConfig` defaults are used. + When ``None``, :func:`camera_encoder_defaults` is used. encoder_threads: Number of encoder threads (global setting). ``None`` lets the codec decide. queue_maxsize: Max frames to buffer per camera before back-pressure drops frames. """ self.fps = fps - self._camera_encoder_config = camera_encoder_config or VideoEncoderConfig() + self._camera_encoder_config = camera_encoder_config or camera_encoder_defaults() self._encoder_threads = encoder_threads self.queue_maxsize = queue_maxsize