Compare commits

...

5 Commits

8 changed files with 56 additions and 58 deletions
+6 -5
View File
@@ -14,10 +14,12 @@
"""Shared dataset recording configuration used by both ``lerobot-record`` and ``lerobot-rollout``."""
from dataclasses import dataclass
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from lerobot.datasets.video_utils import VideoEncoderConfig, camera_encoder_defaults
@dataclass
class DatasetRecordConfig:
@@ -55,10 +57,9 @@ class DatasetRecordConfig:
# Number of episodes to record before batch encoding videos
# Set to 1 for immediate encoding (default behavior), or higher for batched encoding
video_encoding_batch_size: int = 1
# Video codec for encoding videos. Options: 'h264', 'hevc', 'libsvtav1', 'auto',
# or hardware-specific: 'h264_videotoolbox', 'h264_nvenc', 'h264_vaapi', 'h264_qsv'.
# Use 'auto' to auto-detect the best available hardware encoder.
vcodec: str = "libsvtav1"
# Video encoder settings for camera MP4s (codec, quality, GOP, etc.). Tuned via CLI nested keys,
# e.g. ``--dataset.camera_encoder_config.vcodec=h264`` (see ``VideoEncoderConfig``).
camera_encoder_config: VideoEncoderConfig = field(default_factory=camera_encoder_defaults)
# Enable streaming video encoding: encode frames in real-time during capture instead
# of writing PNG images first. Makes save_episode() near-instant. More info in the documentation: https://huggingface.co/docs/lerobot/streaming_video_encoding
streaming_encoding: bool = False
+19 -9
View File
@@ -62,7 +62,12 @@ from .utils import (
DEFAULT_EPISODES_PATH,
update_chunk_file_indices,
)
from .video_utils import VideoEncoderConfig, encode_video_frames, get_video_info
from .video_utils import (
VideoEncoderConfig,
camera_encoder_defaults,
encode_video_frames,
get_video_info,
)
def _load_episode_with_stats(src_dataset: LeRobotDataset, episode_idx: int) -> dict:
@@ -101,7 +106,8 @@ def delete_episodes(
episode_indices: List of episode indices to delete.
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
camera_encoder_config: Video encoder settings used when re-encoding video segments (default: :class:`VideoEncoderConfig()`).
camera_encoder_config: Video encoder settings used when re-encoding video segments
(``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`).
"""
if not episode_indices:
raise ValueError("No episodes to delete")
@@ -165,7 +171,8 @@ def split_dataset(
splits: Either a dict mapping split names to episode indices, or a dict mapping
split names to fractions (must sum to <= 1.0).
output_dir: Root directory where the split datasets will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id.
camera_encoder_config: Video encoder settings used when re-encoding video segments (default: :class:`VideoEncoderConfig()`).
camera_encoder_config: Video encoder settings used when re-encoding video segments
(``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`).
Examples:
Split by specific episodes
@@ -598,10 +605,11 @@ def _keep_episodes_from_video_with_av(
Ranges are half-open intervals: [start_frame, end_frame), where start_frame
is inclusive and end_frame is exclusive.
fps: Frame rate of the video.
camera_encoder_config: Video encoder settings (default: :class:`VideoEncoderConfig()`).
camera_encoder_config: Video encoder settings
(``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`).
"""
if camera_encoder_config is None:
camera_encoder_config = VideoEncoderConfig()
camera_encoder_config = camera_encoder_defaults()
from fractions import Fraction
import av
@@ -705,13 +713,14 @@ def _copy_and_reindex_videos(
src_dataset: Source dataset to copy from
dst_meta: Destination metadata object
episode_mapping: Mapping from old episode indices to new indices
camera_encoder_config: Video encoder settings used when re-encoding segments (default: :class:`VideoEncoderConfig()`).
camera_encoder_config: Video encoder settings used when re-encoding segments
(``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`).
Returns:
dict mapping episode index to its video metadata (chunk_index, file_index, timestamps)
"""
if camera_encoder_config is None:
camera_encoder_config = VideoEncoderConfig()
camera_encoder_config = camera_encoder_defaults()
if src_dataset.meta.episodes is None:
src_dataset.meta.episodes = load_episodes(src_dataset.meta.root)
@@ -1654,7 +1663,8 @@ def convert_image_to_video_dataset(
dataset: The source LeRobot dataset with images
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
camera_encoder_config: Video encoder settings (default: :class:`VideoEncoderConfig()`).
camera_encoder_config: Video encoder settings
(``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`).
episode_indices: List of episode indices to convert (None = all episodes)
num_workers: Number of threads for parallel processing (default: 4)
max_episodes_per_batch: Maximum episodes per video batch to avoid memory issues (None = no limit)
@@ -1664,7 +1674,7 @@ def convert_image_to_video_dataset(
New LeRobotDataset with images encoded as videos
"""
if camera_encoder_config is None:
camera_encoder_config = VideoEncoderConfig()
camera_encoder_config = camera_encoder_defaults()
# Check that it's an image dataset
if len(dataset.meta.video_keys) > 0:
+4 -2
View File
@@ -53,6 +53,7 @@ from .utils import (
from .video_utils import (
StreamingVideoEncoder,
VideoEncoderConfig,
camera_encoder_defaults,
concatenate_video_files,
encode_video_frames,
get_video_duration_in_s,
@@ -95,7 +96,7 @@ class DatasetWriter:
self,
meta: LeRobotDatasetMetadata,
root: Path,
camera_encoder_config: VideoEncoderConfig,
camera_encoder_config: VideoEncoderConfig | None,
encoder_threads: int | None,
batch_encoding_size: int,
streaming_encoder: StreamingVideoEncoder | None = None,
@@ -108,6 +109,7 @@ class DatasetWriter:
settings, and episode persistence).
root: Local dataset root directory.
camera_encoder_config: Video encoder settings applied to all cameras.
``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`.
encoder_threads: Number of encoder threads (global). ``None``
lets the codec decide.
batch_encoding_size: Number of episodes to accumulate before
@@ -118,7 +120,7 @@ class DatasetWriter:
"""
self._meta = meta
self._root = root
self._camera_encoder_config = camera_encoder_config
self._camera_encoder_config = camera_encoder_config or camera_encoder_defaults()
self._encoder_threads = encoder_threads
self._batch_encoding_size = batch_encoding_size
self._streaming_encoder = streaming_encoder
+9 -20
View File
@@ -178,8 +178,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos.
Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1.
camera_encoder_config (VideoEncoderConfig | None, optional): Video encoder settings for cameras
(codec, quality, etc.). Defaults to
:class:`~lerobot.datasets.video_utils.VideoEncoderConfig` defaults when ``None``.
(codec, quality, etc.). When ``None``, :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`
is used by the writer.
encoder_threads (int | None, optional): Number of encoder threads (global). ``None`` lets the
codec decide.
streaming_encoding (bool, optional): If True, encode video frames in real-time during capture
@@ -204,9 +204,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
self._video_backend = video_backend if video_backend else get_safe_default_video_backend()
self._return_uint8 = return_uint8
self._batch_encoding_size = batch_encoding_size
if camera_encoder_config is None:
camera_encoder_config = VideoEncoderConfig()
self._camera_encoder_config = camera_encoder_config
self._encoder_threads = encoder_threads
if self._requested_root is not None:
@@ -253,14 +250,14 @@ class LeRobotDataset(torch.utils.data.Dataset):
if streaming_encoding and len(self.meta.video_keys) > 0:
streaming_enc = self._build_streaming_encoder(
self.meta.fps,
self._camera_encoder_config,
camera_encoder_config,
self._encoder_threads,
encoder_queue_maxsize,
)
self.writer = DatasetWriter(
meta=self.meta,
root=self.root,
camera_encoder_config=self._camera_encoder_config,
camera_encoder_config=camera_encoder_config,
encoder_threads=self._encoder_threads,
batch_encoding_size=batch_encoding_size,
streaming_encoder=streaming_enc,
@@ -302,7 +299,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
@staticmethod
def _build_streaming_encoder(
fps: int,
camera_encoder_config: VideoEncoderConfig,
camera_encoder_config: VideoEncoderConfig | None,
encoder_threads: int | None,
encoder_queue_maxsize: int,
) -> StreamingVideoEncoder:
@@ -656,9 +653,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
video_backend: Video decoding backend (used when reading back).
batch_encoding_size: Number of episodes to accumulate before
batch-encoding videos. ``1`` means encode immediately.
camera_encoder_config: Video encoder settings for cameras; defaults
match :class:`~lerobot.datasets.video_utils.VideoEncoderConfig`
when ``None``.
camera_encoder_config: Video encoder settings for cameras (codec, quality, etc.).
When ``None``, :func:`~lerobot.datasets.video_utils.camera_encoder_defaults` is used.
encoder_threads: Number of encoder threads (global). ``None``
lets the codec decide.
metadata_buffer_size: Number of episode metadata records to buffer
@@ -671,8 +667,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
Returns:
A new :class:`LeRobotDataset` in write mode.
"""
if camera_encoder_config is None:
camera_encoder_config = VideoEncoderConfig()
obj = cls.__new__(cls)
obj.meta = LeRobotDatasetMetadata.create(
repo_id=repo_id,
@@ -696,7 +690,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
obj._video_backend = video_backend if video_backend is not None else get_safe_default_video_backend()
obj._return_uint8 = False
obj._batch_encoding_size = batch_encoding_size
obj._camera_encoder_config = camera_encoder_config
obj._encoder_threads = encoder_threads
# Reader is lazily created on first access (write-only mode)
@@ -761,9 +754,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
video_backend: Video decoding backend for reading back data.
batch_encoding_size: Number of episodes to accumulate before
batch-encoding videos.
camera_encoder_config: Video encoder settings for cameras; defaults
match :class:`~lerobot.datasets.video_utils.VideoEncoderConfig`
when ``None``.
camera_encoder_config: Video encoder settings for cameras (codec, quality, etc.).
When ``None``, :func:`~lerobot.datasets.video_utils.camera_encoder_defaults` is used.
encoder_threads: Number of encoder threads (global). ``None``
lets the codec decide.
image_writer_processes: Subprocesses for async image writing.
@@ -801,9 +793,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
obj.repo_id, obj._requested_root, obj.revision, force_cache_sync=force_cache_sync
)
if camera_encoder_config is None:
camera_encoder_config = VideoEncoderConfig()
obj._camera_encoder_config = camera_encoder_config
obj._encoder_threads = encoder_threads
obj.root = obj.meta.root
+1 -5
View File
@@ -177,10 +177,6 @@ def check_video_encoder_config_pyav(config: VideoEncoderConfig) -> None:
vcodec = config.vcodec
options = _get_codec_options_by_name(vcodec)
if not options:
logger.warning(
"Codec %r is not available in the bundled FFmpeg build; ",
vcodec,
)
return
raise ValueError(f"Codec {vcodec!r} is not available in the bundled FFmpeg build")
_check_pixel_format(config.vcodec, config.pix_fmt)
_check_codec_options(config.vcodec, config.get_codec_options(), config)
+14 -14
View File
@@ -47,7 +47,7 @@ logger = logging.getLogger(__name__)
# List of hardware encoders to probe for auto-selection. Availability depends on the platform and FFmpeg build.
# Determines the order of preference for auto-selection when vcodec="auto" is used.
HW_ENCODERS = [
HW_VIDEO_CODECS = [
"h264_videotoolbox", # macOS
"hevc_videotoolbox", # macOS
"h264_nvenc", # NVIDIA GPU
@@ -56,7 +56,7 @@ HW_ENCODERS = [
"h264_qsv", # Intel Quick Sync
]
VALID_VIDEO_CODECS = {"h264", "hevc", "libsvtav1", "auto"} | set(HW_ENCODERS)
VALID_VIDEO_CODECS = {"h264", "hevc", "libsvtav1", "auto"} | set(HW_VIDEO_CODECS)
LIBSVTAV1_DEFAULT_PRESET: int = 12
@@ -116,33 +116,33 @@ class VideoEncoderConfig:
check_video_encoder_config_pyav(self)
def resolve_vcodec(self) -> None:
"""Validate vcodec and resolve 'auto' to best available HW encoder, fallback to libsvtav1.
"""Check ``vcodec`` and, when it is ``"auto"``, pick a concrete encoder.
Any explicitly-requested codec that isn't in the local FFmpeg build is
also silently rewritten to ``libsvtav1`` so encoding never hard-fails on
a host missing the requested encoder.
For ``"auto"``, the first hardware encoder in the preference list that FFmpeg
exposes is chosen; if none are available, ``libsvtav1`` is used. If the
resolved codec (explicit or after auto-selection) is not present in the
local FFmpeg build, raises ``ValueError``.
"""
if self.vcodec not in VALID_VIDEO_CODECS:
raise ValueError(f"Invalid vcodec '{self.vcodec}'. Must be one of: {sorted(VALID_VIDEO_CODECS)}")
if self.vcodec == "auto":
available = self.detect_available_encoders(HW_ENCODERS)
for encoder in HW_ENCODERS:
available = self.detect_available_encoders(HW_VIDEO_CODECS)
for encoder in HW_VIDEO_CODECS:
if encoder in available:
logger.info(f"Auto-selected video codec: {encoder}")
self.vcodec = encoder
return
logger.info("No hardware encoder available, falling back to software encoder 'libsvtav1'")
logger.warning("No hardware encoder available, falling back to software encoder 'libsvtav1'")
self.vcodec = "libsvtav1"
if self.detect_available_encoders(self.vcodec):
logger.info(f"Using video codec: {self.vcodec}")
self.vcodec = self.vcodec
return
raise ValueError(f"Unsupported video codec: {self.vcodec} with video backend {self.video_backend}")
def get_codec_options(
self, encoder_threads: int | None = None, as_strings: bool = False
) -> dict[str, str]:
) -> dict[str, Any]:
"""Translate the tuning fields to codec-specific FFmpeg options.
``VideoEncoderConfig.extra_options`` are merged last but never override a structured field.
@@ -498,7 +498,7 @@ def encode_video_frames(
) -> None:
"""More info on ffmpeg arguments tuning on `benchmark/video/README.md`"""
if camera_encoder_config is None:
camera_encoder_config = VideoEncoderConfig()
camera_encoder_config = camera_encoder_defaults()
vcodec = camera_encoder_config.vcodec
pix_fmt = camera_encoder_config.pix_fmt
@@ -802,14 +802,14 @@ class StreamingVideoEncoder:
Args:
fps: Frames per second for the output videos.
camera_encoder_config: Video encoder settings applied to all cameras.
When ``None``, :class:`VideoEncoderConfig` defaults are used.
When ``None``, :func:`camera_encoder_defaults` is used.
encoder_threads: Number of encoder threads (global setting).
``None`` lets the codec decide.
queue_maxsize: Max frames to buffer per camera before
back-pressure drops frames.
"""
self.fps = fps
self._camera_encoder_config = camera_encoder_config or VideoEncoderConfig()
self._camera_encoder_config = camera_encoder_config or camera_encoder_defaults()
self._encoder_threads = encoder_threads
self.queue_maxsize = queue_maxsize
+2 -2
View File
@@ -332,7 +332,7 @@ def build_rollout_context(
cfg.dataset.repo_id,
root=cfg.dataset.root,
batch_encoding_size=cfg.dataset.video_encoding_batch_size,
vcodec=cfg.dataset.vcodec,
camera_encoder_config=cfg.dataset.camera_encoder_config,
streaming_encoding=cfg.dataset.streaming_encoding,
encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize,
encoder_threads=cfg.dataset.encoder_threads,
@@ -367,7 +367,7 @@ def build_rollout_context(
image_writer_threads=cfg.dataset.num_image_writer_threads_per_camera
* len(robot.cameras if hasattr(robot, "cameras") else []),
batch_encoding_size=cfg.dataset.video_encoding_batch_size,
vcodec=cfg.dataset.vcodec,
camera_encoder_config=cfg.dataset.camera_encoder_config,
streaming_encoding=cfg.dataset.streaming_encoding,
encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize,
encoder_threads=cfg.dataset.encoder_threads,
+1 -1
View File
@@ -298,7 +298,7 @@ class TestEncoderDetection:
@require_videotoolbox
def test_auto_picks_videotoolbox_when_available(self):
"""``h264_videotoolbox`` sits at the top of ``HW_ENCODERS`` so it wins when present."""
"""``h264_videotoolbox`` sits at the top of ``HW_VIDEO_CODECS`` so it wins when present."""
cfg = VideoEncoderConfig(vcodec="auto")
assert cfg.vcodec == "h264_videotoolbox"