Compare commits

...

3 Commits

4 changed files with 41 additions and 39 deletions
+19 -9
View File
@@ -62,7 +62,12 @@ from .utils import (
DEFAULT_EPISODES_PATH, DEFAULT_EPISODES_PATH,
update_chunk_file_indices, update_chunk_file_indices,
) )
from .video_utils import VideoEncoderConfig, encode_video_frames, get_video_info from .video_utils import (
VideoEncoderConfig,
camera_encoder_defaults,
encode_video_frames,
get_video_info,
)
def _load_episode_with_stats(src_dataset: LeRobotDataset, episode_idx: int) -> dict: def _load_episode_with_stats(src_dataset: LeRobotDataset, episode_idx: int) -> dict:
@@ -101,7 +106,8 @@ def delete_episodes(
episode_indices: List of episode indices to delete. episode_indices: List of episode indices to delete.
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig. output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig. repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
camera_encoder_config: Video encoder settings used when re-encoding video segments (default: :class:`VideoEncoderConfig()`). camera_encoder_config: Video encoder settings used when re-encoding video segments
(``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`).
""" """
if not episode_indices: if not episode_indices:
raise ValueError("No episodes to delete") raise ValueError("No episodes to delete")
@@ -165,7 +171,8 @@ def split_dataset(
splits: Either a dict mapping split names to episode indices, or a dict mapping splits: Either a dict mapping split names to episode indices, or a dict mapping
split names to fractions (must sum to <= 1.0). split names to fractions (must sum to <= 1.0).
output_dir: Root directory where the split datasets will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. output_dir: Root directory where the split datasets will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id.
camera_encoder_config: Video encoder settings used when re-encoding video segments (default: :class:`VideoEncoderConfig()`). camera_encoder_config: Video encoder settings used when re-encoding video segments
(``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`).
Examples: Examples:
Split by specific episodes Split by specific episodes
@@ -598,10 +605,11 @@ def _keep_episodes_from_video_with_av(
Ranges are half-open intervals: [start_frame, end_frame), where start_frame Ranges are half-open intervals: [start_frame, end_frame), where start_frame
is inclusive and end_frame is exclusive. is inclusive and end_frame is exclusive.
fps: Frame rate of the video. fps: Frame rate of the video.
camera_encoder_config: Video encoder settings (default: :class:`VideoEncoderConfig()`). camera_encoder_config: Video encoder settings
(``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`).
""" """
if camera_encoder_config is None: if camera_encoder_config is None:
camera_encoder_config = VideoEncoderConfig() camera_encoder_config = camera_encoder_defaults()
from fractions import Fraction from fractions import Fraction
import av import av
@@ -705,13 +713,14 @@ def _copy_and_reindex_videos(
src_dataset: Source dataset to copy from src_dataset: Source dataset to copy from
dst_meta: Destination metadata object dst_meta: Destination metadata object
episode_mapping: Mapping from old episode indices to new indices episode_mapping: Mapping from old episode indices to new indices
camera_encoder_config: Video encoder settings used when re-encoding segments (default: :class:`VideoEncoderConfig()`). camera_encoder_config: Video encoder settings used when re-encoding segments
(``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`).
Returns: Returns:
dict mapping episode index to its video metadata (chunk_index, file_index, timestamps) dict mapping episode index to its video metadata (chunk_index, file_index, timestamps)
""" """
if camera_encoder_config is None: if camera_encoder_config is None:
camera_encoder_config = VideoEncoderConfig() camera_encoder_config = camera_encoder_defaults()
if src_dataset.meta.episodes is None: if src_dataset.meta.episodes is None:
src_dataset.meta.episodes = load_episodes(src_dataset.meta.root) src_dataset.meta.episodes = load_episodes(src_dataset.meta.root)
@@ -1654,7 +1663,8 @@ def convert_image_to_video_dataset(
dataset: The source LeRobot dataset with images dataset: The source LeRobot dataset with images
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig. output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig. repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
camera_encoder_config: Video encoder settings (default: :class:`VideoEncoderConfig()`). camera_encoder_config: Video encoder settings
(``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`).
episode_indices: List of episode indices to convert (None = all episodes) episode_indices: List of episode indices to convert (None = all episodes)
num_workers: Number of threads for parallel processing (default: 4) num_workers: Number of threads for parallel processing (default: 4)
max_episodes_per_batch: Maximum episodes per video batch to avoid memory issues (None = no limit) max_episodes_per_batch: Maximum episodes per video batch to avoid memory issues (None = no limit)
@@ -1664,7 +1674,7 @@ def convert_image_to_video_dataset(
New LeRobotDataset with images encoded as videos New LeRobotDataset with images encoded as videos
""" """
if camera_encoder_config is None: if camera_encoder_config is None:
camera_encoder_config = VideoEncoderConfig() camera_encoder_config = camera_encoder_defaults()
# Check that it's an image dataset # Check that it's an image dataset
if len(dataset.meta.video_keys) > 0: if len(dataset.meta.video_keys) > 0:
+4 -2
View File
@@ -53,6 +53,7 @@ from .utils import (
from .video_utils import ( from .video_utils import (
StreamingVideoEncoder, StreamingVideoEncoder,
VideoEncoderConfig, VideoEncoderConfig,
camera_encoder_defaults,
concatenate_video_files, concatenate_video_files,
encode_video_frames, encode_video_frames,
get_video_duration_in_s, get_video_duration_in_s,
@@ -95,7 +96,7 @@ class DatasetWriter:
self, self,
meta: LeRobotDatasetMetadata, meta: LeRobotDatasetMetadata,
root: Path, root: Path,
camera_encoder_config: VideoEncoderConfig, camera_encoder_config: VideoEncoderConfig | None,
encoder_threads: int | None, encoder_threads: int | None,
batch_encoding_size: int, batch_encoding_size: int,
streaming_encoder: StreamingVideoEncoder | None = None, streaming_encoder: StreamingVideoEncoder | None = None,
@@ -108,6 +109,7 @@ class DatasetWriter:
settings, and episode persistence). settings, and episode persistence).
root: Local dataset root directory. root: Local dataset root directory.
camera_encoder_config: Video encoder settings applied to all cameras. camera_encoder_config: Video encoder settings applied to all cameras.
``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`.
encoder_threads: Number of encoder threads (global). ``None`` encoder_threads: Number of encoder threads (global). ``None``
lets the codec decide. lets the codec decide.
batch_encoding_size: Number of episodes to accumulate before batch_encoding_size: Number of episodes to accumulate before
@@ -118,7 +120,7 @@ class DatasetWriter:
""" """
self._meta = meta self._meta = meta
self._root = root self._root = root
self._camera_encoder_config = camera_encoder_config self._camera_encoder_config = camera_encoder_config or camera_encoder_defaults()
self._encoder_threads = encoder_threads self._encoder_threads = encoder_threads
self._batch_encoding_size = batch_encoding_size self._batch_encoding_size = batch_encoding_size
self._streaming_encoder = streaming_encoder self._streaming_encoder = streaming_encoder
+9 -20
View File
@@ -178,8 +178,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos. batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos.
Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1. Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1.
camera_encoder_config (VideoEncoderConfig | None, optional): Video encoder settings for cameras camera_encoder_config (VideoEncoderConfig | None, optional): Video encoder settings for cameras
(codec, quality, etc.). Defaults to (codec, quality, etc.). When ``None``, :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`
:class:`~lerobot.datasets.video_utils.VideoEncoderConfig` defaults when ``None``. is used by the writer.
encoder_threads (int | None, optional): Number of encoder threads (global). ``None`` lets the encoder_threads (int | None, optional): Number of encoder threads (global). ``None`` lets the
codec decide. codec decide.
streaming_encoding (bool, optional): If True, encode video frames in real-time during capture streaming_encoding (bool, optional): If True, encode video frames in real-time during capture
@@ -204,9 +204,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
self._video_backend = video_backend if video_backend else get_safe_default_video_backend() self._video_backend = video_backend if video_backend else get_safe_default_video_backend()
self._return_uint8 = return_uint8 self._return_uint8 = return_uint8
self._batch_encoding_size = batch_encoding_size self._batch_encoding_size = batch_encoding_size
if camera_encoder_config is None:
camera_encoder_config = VideoEncoderConfig()
self._camera_encoder_config = camera_encoder_config
self._encoder_threads = encoder_threads self._encoder_threads = encoder_threads
if self._requested_root is not None: if self._requested_root is not None:
@@ -253,14 +250,14 @@ class LeRobotDataset(torch.utils.data.Dataset):
if streaming_encoding and len(self.meta.video_keys) > 0: if streaming_encoding and len(self.meta.video_keys) > 0:
streaming_enc = self._build_streaming_encoder( streaming_enc = self._build_streaming_encoder(
self.meta.fps, self.meta.fps,
self._camera_encoder_config, camera_encoder_config,
self._encoder_threads, self._encoder_threads,
encoder_queue_maxsize, encoder_queue_maxsize,
) )
self.writer = DatasetWriter( self.writer = DatasetWriter(
meta=self.meta, meta=self.meta,
root=self.root, root=self.root,
camera_encoder_config=self._camera_encoder_config, camera_encoder_config=camera_encoder_config,
encoder_threads=self._encoder_threads, encoder_threads=self._encoder_threads,
batch_encoding_size=batch_encoding_size, batch_encoding_size=batch_encoding_size,
streaming_encoder=streaming_enc, streaming_encoder=streaming_enc,
@@ -302,7 +299,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
@staticmethod @staticmethod
def _build_streaming_encoder( def _build_streaming_encoder(
fps: int, fps: int,
camera_encoder_config: VideoEncoderConfig, camera_encoder_config: VideoEncoderConfig | None,
encoder_threads: int | None, encoder_threads: int | None,
encoder_queue_maxsize: int, encoder_queue_maxsize: int,
) -> StreamingVideoEncoder: ) -> StreamingVideoEncoder:
@@ -656,9 +653,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
video_backend: Video decoding backend (used when reading back). video_backend: Video decoding backend (used when reading back).
batch_encoding_size: Number of episodes to accumulate before batch_encoding_size: Number of episodes to accumulate before
batch-encoding videos. ``1`` means encode immediately. batch-encoding videos. ``1`` means encode immediately.
camera_encoder_config: Video encoder settings for cameras; defaults camera_encoder_config: Video encoder settings for cameras (codec, quality, etc.).
match :class:`~lerobot.datasets.video_utils.VideoEncoderConfig` When ``None``, :func:`~lerobot.datasets.video_utils.camera_encoder_defaults` is used.
when ``None``.
encoder_threads: Number of encoder threads (global). ``None`` encoder_threads: Number of encoder threads (global). ``None``
lets the codec decide. lets the codec decide.
metadata_buffer_size: Number of episode metadata records to buffer metadata_buffer_size: Number of episode metadata records to buffer
@@ -671,8 +667,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
Returns: Returns:
A new :class:`LeRobotDataset` in write mode. A new :class:`LeRobotDataset` in write mode.
""" """
if camera_encoder_config is None:
camera_encoder_config = VideoEncoderConfig()
obj = cls.__new__(cls) obj = cls.__new__(cls)
obj.meta = LeRobotDatasetMetadata.create( obj.meta = LeRobotDatasetMetadata.create(
repo_id=repo_id, repo_id=repo_id,
@@ -696,7 +690,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
obj._video_backend = video_backend if video_backend is not None else get_safe_default_video_backend() obj._video_backend = video_backend if video_backend is not None else get_safe_default_video_backend()
obj._return_uint8 = False obj._return_uint8 = False
obj._batch_encoding_size = batch_encoding_size obj._batch_encoding_size = batch_encoding_size
obj._camera_encoder_config = camera_encoder_config
obj._encoder_threads = encoder_threads obj._encoder_threads = encoder_threads
# Reader is lazily created on first access (write-only mode) # Reader is lazily created on first access (write-only mode)
@@ -761,9 +754,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
video_backend: Video decoding backend for reading back data. video_backend: Video decoding backend for reading back data.
batch_encoding_size: Number of episodes to accumulate before batch_encoding_size: Number of episodes to accumulate before
batch-encoding videos. batch-encoding videos.
camera_encoder_config: Video encoder settings for cameras; defaults camera_encoder_config: Video encoder settings for cameras (codec, quality, etc.).
match :class:`~lerobot.datasets.video_utils.VideoEncoderConfig` When ``None``, :func:`~lerobot.datasets.video_utils.camera_encoder_defaults` is used.
when ``None``.
encoder_threads: Number of encoder threads (global). ``None`` encoder_threads: Number of encoder threads (global). ``None``
lets the codec decide. lets the codec decide.
image_writer_processes: Subprocesses for async image writing. image_writer_processes: Subprocesses for async image writing.
@@ -801,9 +793,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
obj.repo_id, obj._requested_root, obj.revision, force_cache_sync=force_cache_sync obj.repo_id, obj._requested_root, obj.revision, force_cache_sync=force_cache_sync
) )
if camera_encoder_config is None:
camera_encoder_config = VideoEncoderConfig()
obj._camera_encoder_config = camera_encoder_config
obj._encoder_threads = encoder_threads obj._encoder_threads = encoder_threads
obj.root = obj.meta.root obj.root = obj.meta.root
+9 -8
View File
@@ -116,11 +116,12 @@ class VideoEncoderConfig:
check_video_encoder_config_pyav(self) check_video_encoder_config_pyav(self)
def resolve_vcodec(self) -> None: def resolve_vcodec(self) -> None:
"""Validate vcodec and resolve 'auto' to best available HW encoder, fallback to libsvtav1. """Check ``vcodec`` and, when it is ``"auto"``, pick a concrete encoder.
Any explicitly-requested codec that isn't in the local FFmpeg build is For ``"auto"``, the first hardware encoder in the preference list that FFmpeg
also silently rewritten to ``libsvtav1`` so encoding never hard-fails on exposes is chosen; if none are available, ``libsvtav1`` is used. If the
a host missing the requested encoder. resolved codec (explicit or after auto-selection) is not present in the
local FFmpeg build, raises ``ValueError``.
""" """
if self.vcodec not in VALID_VIDEO_CODECS: if self.vcodec not in VALID_VIDEO_CODECS:
raise ValueError(f"Invalid vcodec '{self.vcodec}'. Must be one of: {sorted(VALID_VIDEO_CODECS)}") raise ValueError(f"Invalid vcodec '{self.vcodec}'. Must be one of: {sorted(VALID_VIDEO_CODECS)}")
@@ -131,7 +132,7 @@ class VideoEncoderConfig:
logger.info(f"Auto-selected video codec: {encoder}") logger.info(f"Auto-selected video codec: {encoder}")
self.vcodec = encoder self.vcodec = encoder
return return
logger.info("No hardware encoder available, falling back to software encoder 'libsvtav1'") logger.warning("No hardware encoder available, falling back to software encoder 'libsvtav1'")
self.vcodec = "libsvtav1" self.vcodec = "libsvtav1"
if self.detect_available_encoders(self.vcodec): if self.detect_available_encoders(self.vcodec):
@@ -498,7 +499,7 @@ def encode_video_frames(
) -> None: ) -> None:
"""More info on ffmpeg arguments tuning on `benchmark/video/README.md`""" """More info on ffmpeg arguments tuning on `benchmark/video/README.md`"""
if camera_encoder_config is None: if camera_encoder_config is None:
camera_encoder_config = VideoEncoderConfig() camera_encoder_config = camera_encoder_defaults()
vcodec = camera_encoder_config.vcodec vcodec = camera_encoder_config.vcodec
pix_fmt = camera_encoder_config.pix_fmt pix_fmt = camera_encoder_config.pix_fmt
@@ -802,14 +803,14 @@ class StreamingVideoEncoder:
Args: Args:
fps: Frames per second for the output videos. fps: Frames per second for the output videos.
camera_encoder_config: Video encoder settings applied to all cameras. camera_encoder_config: Video encoder settings applied to all cameras.
When ``None``, :class:`VideoEncoderConfig` defaults are used. When ``None``, :func:`camera_encoder_defaults` is used.
encoder_threads: Number of encoder threads (global setting). encoder_threads: Number of encoder threads (global setting).
``None`` lets the codec decide. ``None`` lets the codec decide.
queue_maxsize: Max frames to buffer per camera before queue_maxsize: Max frames to buffer per camera before
back-pressure drops frames. back-pressure drops frames.
""" """
self.fps = fps self.fps = fps
self._camera_encoder_config = camera_encoder_config or VideoEncoderConfig() self._camera_encoder_config = camera_encoder_config or camera_encoder_defaults()
self._encoder_threads = encoder_threads self._encoder_threads = encoder_threads
self.queue_maxsize = queue_maxsize self.queue_maxsize = queue_maxsize