mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-24 13:09:43 +00:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 9b3c752b64 | |||
| 3dc73551dd | |||
| 237bae51e8 | |||
| df8b33fc68 | |||
| 50e2d7b5f4 |
@@ -14,10 +14,12 @@
|
|||||||
|
|
||||||
"""Shared dataset recording configuration used by both ``lerobot-record`` and ``lerobot-rollout``."""
|
"""Shared dataset recording configuration used by both ``lerobot-record`` and ``lerobot-rollout``."""
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lerobot.datasets.video_utils import VideoEncoderConfig, camera_encoder_defaults
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DatasetRecordConfig:
|
class DatasetRecordConfig:
|
||||||
@@ -55,10 +57,9 @@ class DatasetRecordConfig:
|
|||||||
# Number of episodes to record before batch encoding videos
|
# Number of episodes to record before batch encoding videos
|
||||||
# Set to 1 for immediate encoding (default behavior), or higher for batched encoding
|
# Set to 1 for immediate encoding (default behavior), or higher for batched encoding
|
||||||
video_encoding_batch_size: int = 1
|
video_encoding_batch_size: int = 1
|
||||||
# Video codec for encoding videos. Options: 'h264', 'hevc', 'libsvtav1', 'auto',
|
# Video encoder settings for camera MP4s (codec, quality, GOP, etc.). Tuned via CLI nested keys,
|
||||||
# or hardware-specific: 'h264_videotoolbox', 'h264_nvenc', 'h264_vaapi', 'h264_qsv'.
|
# e.g. ``--dataset.camera_encoder_config.vcodec=h264`` (see ``VideoEncoderConfig``).
|
||||||
# Use 'auto' to auto-detect the best available hardware encoder.
|
camera_encoder_config: VideoEncoderConfig = field(default_factory=camera_encoder_defaults)
|
||||||
vcodec: str = "libsvtav1"
|
|
||||||
# Enable streaming video encoding: encode frames in real-time during capture instead
|
# Enable streaming video encoding: encode frames in real-time during capture instead
|
||||||
# of writing PNG images first. Makes save_episode() near-instant. More info in the documentation: https://huggingface.co/docs/lerobot/streaming_video_encoding
|
# of writing PNG images first. Makes save_episode() near-instant. More info in the documentation: https://huggingface.co/docs/lerobot/streaming_video_encoding
|
||||||
streaming_encoding: bool = False
|
streaming_encoding: bool = False
|
||||||
|
|||||||
@@ -62,7 +62,12 @@ from .utils import (
|
|||||||
DEFAULT_EPISODES_PATH,
|
DEFAULT_EPISODES_PATH,
|
||||||
update_chunk_file_indices,
|
update_chunk_file_indices,
|
||||||
)
|
)
|
||||||
from .video_utils import VideoEncoderConfig, encode_video_frames, get_video_info
|
from .video_utils import (
|
||||||
|
VideoEncoderConfig,
|
||||||
|
camera_encoder_defaults,
|
||||||
|
encode_video_frames,
|
||||||
|
get_video_info,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _load_episode_with_stats(src_dataset: LeRobotDataset, episode_idx: int) -> dict:
|
def _load_episode_with_stats(src_dataset: LeRobotDataset, episode_idx: int) -> dict:
|
||||||
@@ -101,7 +106,8 @@ def delete_episodes(
|
|||||||
episode_indices: List of episode indices to delete.
|
episode_indices: List of episode indices to delete.
|
||||||
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
|
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
|
||||||
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
|
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
|
||||||
camera_encoder_config: Video encoder settings used when re-encoding video segments (default: :class:`VideoEncoderConfig()`).
|
camera_encoder_config: Video encoder settings used when re-encoding video segments
|
||||||
|
(``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`).
|
||||||
"""
|
"""
|
||||||
if not episode_indices:
|
if not episode_indices:
|
||||||
raise ValueError("No episodes to delete")
|
raise ValueError("No episodes to delete")
|
||||||
@@ -165,7 +171,8 @@ def split_dataset(
|
|||||||
splits: Either a dict mapping split names to episode indices, or a dict mapping
|
splits: Either a dict mapping split names to episode indices, or a dict mapping
|
||||||
split names to fractions (must sum to <= 1.0).
|
split names to fractions (must sum to <= 1.0).
|
||||||
output_dir: Root directory where the split datasets will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id.
|
output_dir: Root directory where the split datasets will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id.
|
||||||
camera_encoder_config: Video encoder settings used when re-encoding video segments (default: :class:`VideoEncoderConfig()`).
|
camera_encoder_config: Video encoder settings used when re-encoding video segments
|
||||||
|
(``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`).
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
Split by specific episodes
|
Split by specific episodes
|
||||||
@@ -598,10 +605,11 @@ def _keep_episodes_from_video_with_av(
|
|||||||
Ranges are half-open intervals: [start_frame, end_frame), where start_frame
|
Ranges are half-open intervals: [start_frame, end_frame), where start_frame
|
||||||
is inclusive and end_frame is exclusive.
|
is inclusive and end_frame is exclusive.
|
||||||
fps: Frame rate of the video.
|
fps: Frame rate of the video.
|
||||||
camera_encoder_config: Video encoder settings (default: :class:`VideoEncoderConfig()`).
|
camera_encoder_config: Video encoder settings
|
||||||
|
(``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`).
|
||||||
"""
|
"""
|
||||||
if camera_encoder_config is None:
|
if camera_encoder_config is None:
|
||||||
camera_encoder_config = VideoEncoderConfig()
|
camera_encoder_config = camera_encoder_defaults()
|
||||||
from fractions import Fraction
|
from fractions import Fraction
|
||||||
|
|
||||||
import av
|
import av
|
||||||
@@ -705,13 +713,14 @@ def _copy_and_reindex_videos(
|
|||||||
src_dataset: Source dataset to copy from
|
src_dataset: Source dataset to copy from
|
||||||
dst_meta: Destination metadata object
|
dst_meta: Destination metadata object
|
||||||
episode_mapping: Mapping from old episode indices to new indices
|
episode_mapping: Mapping from old episode indices to new indices
|
||||||
camera_encoder_config: Video encoder settings used when re-encoding segments (default: :class:`VideoEncoderConfig()`).
|
camera_encoder_config: Video encoder settings used when re-encoding segments
|
||||||
|
(``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict mapping episode index to its video metadata (chunk_index, file_index, timestamps)
|
dict mapping episode index to its video metadata (chunk_index, file_index, timestamps)
|
||||||
"""
|
"""
|
||||||
if camera_encoder_config is None:
|
if camera_encoder_config is None:
|
||||||
camera_encoder_config = VideoEncoderConfig()
|
camera_encoder_config = camera_encoder_defaults()
|
||||||
if src_dataset.meta.episodes is None:
|
if src_dataset.meta.episodes is None:
|
||||||
src_dataset.meta.episodes = load_episodes(src_dataset.meta.root)
|
src_dataset.meta.episodes = load_episodes(src_dataset.meta.root)
|
||||||
|
|
||||||
@@ -1654,7 +1663,8 @@ def convert_image_to_video_dataset(
|
|||||||
dataset: The source LeRobot dataset with images
|
dataset: The source LeRobot dataset with images
|
||||||
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
|
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
|
||||||
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
|
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
|
||||||
camera_encoder_config: Video encoder settings (default: :class:`VideoEncoderConfig()`).
|
camera_encoder_config: Video encoder settings
|
||||||
|
(``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`).
|
||||||
episode_indices: List of episode indices to convert (None = all episodes)
|
episode_indices: List of episode indices to convert (None = all episodes)
|
||||||
num_workers: Number of threads for parallel processing (default: 4)
|
num_workers: Number of threads for parallel processing (default: 4)
|
||||||
max_episodes_per_batch: Maximum episodes per video batch to avoid memory issues (None = no limit)
|
max_episodes_per_batch: Maximum episodes per video batch to avoid memory issues (None = no limit)
|
||||||
@@ -1664,7 +1674,7 @@ def convert_image_to_video_dataset(
|
|||||||
New LeRobotDataset with images encoded as videos
|
New LeRobotDataset with images encoded as videos
|
||||||
"""
|
"""
|
||||||
if camera_encoder_config is None:
|
if camera_encoder_config is None:
|
||||||
camera_encoder_config = VideoEncoderConfig()
|
camera_encoder_config = camera_encoder_defaults()
|
||||||
|
|
||||||
# Check that it's an image dataset
|
# Check that it's an image dataset
|
||||||
if len(dataset.meta.video_keys) > 0:
|
if len(dataset.meta.video_keys) > 0:
|
||||||
|
|||||||
@@ -53,6 +53,7 @@ from .utils import (
|
|||||||
from .video_utils import (
|
from .video_utils import (
|
||||||
StreamingVideoEncoder,
|
StreamingVideoEncoder,
|
||||||
VideoEncoderConfig,
|
VideoEncoderConfig,
|
||||||
|
camera_encoder_defaults,
|
||||||
concatenate_video_files,
|
concatenate_video_files,
|
||||||
encode_video_frames,
|
encode_video_frames,
|
||||||
get_video_duration_in_s,
|
get_video_duration_in_s,
|
||||||
@@ -95,7 +96,7 @@ class DatasetWriter:
|
|||||||
self,
|
self,
|
||||||
meta: LeRobotDatasetMetadata,
|
meta: LeRobotDatasetMetadata,
|
||||||
root: Path,
|
root: Path,
|
||||||
camera_encoder_config: VideoEncoderConfig,
|
camera_encoder_config: VideoEncoderConfig | None,
|
||||||
encoder_threads: int | None,
|
encoder_threads: int | None,
|
||||||
batch_encoding_size: int,
|
batch_encoding_size: int,
|
||||||
streaming_encoder: StreamingVideoEncoder | None = None,
|
streaming_encoder: StreamingVideoEncoder | None = None,
|
||||||
@@ -108,6 +109,7 @@ class DatasetWriter:
|
|||||||
settings, and episode persistence).
|
settings, and episode persistence).
|
||||||
root: Local dataset root directory.
|
root: Local dataset root directory.
|
||||||
camera_encoder_config: Video encoder settings applied to all cameras.
|
camera_encoder_config: Video encoder settings applied to all cameras.
|
||||||
|
``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`.
|
||||||
encoder_threads: Number of encoder threads (global). ``None``
|
encoder_threads: Number of encoder threads (global). ``None``
|
||||||
lets the codec decide.
|
lets the codec decide.
|
||||||
batch_encoding_size: Number of episodes to accumulate before
|
batch_encoding_size: Number of episodes to accumulate before
|
||||||
@@ -118,7 +120,7 @@ class DatasetWriter:
|
|||||||
"""
|
"""
|
||||||
self._meta = meta
|
self._meta = meta
|
||||||
self._root = root
|
self._root = root
|
||||||
self._camera_encoder_config = camera_encoder_config
|
self._camera_encoder_config = camera_encoder_config or camera_encoder_defaults()
|
||||||
self._encoder_threads = encoder_threads
|
self._encoder_threads = encoder_threads
|
||||||
self._batch_encoding_size = batch_encoding_size
|
self._batch_encoding_size = batch_encoding_size
|
||||||
self._streaming_encoder = streaming_encoder
|
self._streaming_encoder = streaming_encoder
|
||||||
|
|||||||
@@ -178,8 +178,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos.
|
batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos.
|
||||||
Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1.
|
Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1.
|
||||||
camera_encoder_config (VideoEncoderConfig | None, optional): Video encoder settings for cameras
|
camera_encoder_config (VideoEncoderConfig | None, optional): Video encoder settings for cameras
|
||||||
(codec, quality, etc.). Defaults to
|
(codec, quality, etc.). When ``None``, :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`
|
||||||
:class:`~lerobot.datasets.video_utils.VideoEncoderConfig` defaults when ``None``.
|
is used by the writer.
|
||||||
encoder_threads (int | None, optional): Number of encoder threads (global). ``None`` lets the
|
encoder_threads (int | None, optional): Number of encoder threads (global). ``None`` lets the
|
||||||
codec decide.
|
codec decide.
|
||||||
streaming_encoding (bool, optional): If True, encode video frames in real-time during capture
|
streaming_encoding (bool, optional): If True, encode video frames in real-time during capture
|
||||||
@@ -204,9 +204,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
self._video_backend = video_backend if video_backend else get_safe_default_video_backend()
|
self._video_backend = video_backend if video_backend else get_safe_default_video_backend()
|
||||||
self._return_uint8 = return_uint8
|
self._return_uint8 = return_uint8
|
||||||
self._batch_encoding_size = batch_encoding_size
|
self._batch_encoding_size = batch_encoding_size
|
||||||
if camera_encoder_config is None:
|
|
||||||
camera_encoder_config = VideoEncoderConfig()
|
|
||||||
self._camera_encoder_config = camera_encoder_config
|
|
||||||
self._encoder_threads = encoder_threads
|
self._encoder_threads = encoder_threads
|
||||||
|
|
||||||
if self._requested_root is not None:
|
if self._requested_root is not None:
|
||||||
@@ -253,14 +250,14 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
if streaming_encoding and len(self.meta.video_keys) > 0:
|
if streaming_encoding and len(self.meta.video_keys) > 0:
|
||||||
streaming_enc = self._build_streaming_encoder(
|
streaming_enc = self._build_streaming_encoder(
|
||||||
self.meta.fps,
|
self.meta.fps,
|
||||||
self._camera_encoder_config,
|
camera_encoder_config,
|
||||||
self._encoder_threads,
|
self._encoder_threads,
|
||||||
encoder_queue_maxsize,
|
encoder_queue_maxsize,
|
||||||
)
|
)
|
||||||
self.writer = DatasetWriter(
|
self.writer = DatasetWriter(
|
||||||
meta=self.meta,
|
meta=self.meta,
|
||||||
root=self.root,
|
root=self.root,
|
||||||
camera_encoder_config=self._camera_encoder_config,
|
camera_encoder_config=camera_encoder_config,
|
||||||
encoder_threads=self._encoder_threads,
|
encoder_threads=self._encoder_threads,
|
||||||
batch_encoding_size=batch_encoding_size,
|
batch_encoding_size=batch_encoding_size,
|
||||||
streaming_encoder=streaming_enc,
|
streaming_encoder=streaming_enc,
|
||||||
@@ -302,7 +299,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _build_streaming_encoder(
|
def _build_streaming_encoder(
|
||||||
fps: int,
|
fps: int,
|
||||||
camera_encoder_config: VideoEncoderConfig,
|
camera_encoder_config: VideoEncoderConfig | None,
|
||||||
encoder_threads: int | None,
|
encoder_threads: int | None,
|
||||||
encoder_queue_maxsize: int,
|
encoder_queue_maxsize: int,
|
||||||
) -> StreamingVideoEncoder:
|
) -> StreamingVideoEncoder:
|
||||||
@@ -656,9 +653,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
video_backend: Video decoding backend (used when reading back).
|
video_backend: Video decoding backend (used when reading back).
|
||||||
batch_encoding_size: Number of episodes to accumulate before
|
batch_encoding_size: Number of episodes to accumulate before
|
||||||
batch-encoding videos. ``1`` means encode immediately.
|
batch-encoding videos. ``1`` means encode immediately.
|
||||||
camera_encoder_config: Video encoder settings for cameras; defaults
|
camera_encoder_config: Video encoder settings for cameras (codec, quality, etc.).
|
||||||
match :class:`~lerobot.datasets.video_utils.VideoEncoderConfig`
|
When ``None``, :func:`~lerobot.datasets.video_utils.camera_encoder_defaults` is used.
|
||||||
when ``None``.
|
|
||||||
encoder_threads: Number of encoder threads (global). ``None``
|
encoder_threads: Number of encoder threads (global). ``None``
|
||||||
lets the codec decide.
|
lets the codec decide.
|
||||||
metadata_buffer_size: Number of episode metadata records to buffer
|
metadata_buffer_size: Number of episode metadata records to buffer
|
||||||
@@ -671,8 +667,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
Returns:
|
Returns:
|
||||||
A new :class:`LeRobotDataset` in write mode.
|
A new :class:`LeRobotDataset` in write mode.
|
||||||
"""
|
"""
|
||||||
if camera_encoder_config is None:
|
|
||||||
camera_encoder_config = VideoEncoderConfig()
|
|
||||||
obj = cls.__new__(cls)
|
obj = cls.__new__(cls)
|
||||||
obj.meta = LeRobotDatasetMetadata.create(
|
obj.meta = LeRobotDatasetMetadata.create(
|
||||||
repo_id=repo_id,
|
repo_id=repo_id,
|
||||||
@@ -696,7 +690,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
obj._video_backend = video_backend if video_backend is not None else get_safe_default_video_backend()
|
obj._video_backend = video_backend if video_backend is not None else get_safe_default_video_backend()
|
||||||
obj._return_uint8 = False
|
obj._return_uint8 = False
|
||||||
obj._batch_encoding_size = batch_encoding_size
|
obj._batch_encoding_size = batch_encoding_size
|
||||||
obj._camera_encoder_config = camera_encoder_config
|
|
||||||
obj._encoder_threads = encoder_threads
|
obj._encoder_threads = encoder_threads
|
||||||
|
|
||||||
# Reader is lazily created on first access (write-only mode)
|
# Reader is lazily created on first access (write-only mode)
|
||||||
@@ -761,9 +754,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
video_backend: Video decoding backend for reading back data.
|
video_backend: Video decoding backend for reading back data.
|
||||||
batch_encoding_size: Number of episodes to accumulate before
|
batch_encoding_size: Number of episodes to accumulate before
|
||||||
batch-encoding videos.
|
batch-encoding videos.
|
||||||
camera_encoder_config: Video encoder settings for cameras; defaults
|
camera_encoder_config: Video encoder settings for cameras (codec, quality, etc.).
|
||||||
match :class:`~lerobot.datasets.video_utils.VideoEncoderConfig`
|
When ``None``, :func:`~lerobot.datasets.video_utils.camera_encoder_defaults` is used.
|
||||||
when ``None``.
|
|
||||||
encoder_threads: Number of encoder threads (global). ``None``
|
encoder_threads: Number of encoder threads (global). ``None``
|
||||||
lets the codec decide.
|
lets the codec decide.
|
||||||
image_writer_processes: Subprocesses for async image writing.
|
image_writer_processes: Subprocesses for async image writing.
|
||||||
@@ -801,9 +793,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
obj.repo_id, obj._requested_root, obj.revision, force_cache_sync=force_cache_sync
|
obj.repo_id, obj._requested_root, obj.revision, force_cache_sync=force_cache_sync
|
||||||
)
|
)
|
||||||
|
|
||||||
if camera_encoder_config is None:
|
|
||||||
camera_encoder_config = VideoEncoderConfig()
|
|
||||||
obj._camera_encoder_config = camera_encoder_config
|
|
||||||
obj._encoder_threads = encoder_threads
|
obj._encoder_threads = encoder_threads
|
||||||
obj.root = obj.meta.root
|
obj.root = obj.meta.root
|
||||||
|
|
||||||
|
|||||||
@@ -177,10 +177,6 @@ def check_video_encoder_config_pyav(config: VideoEncoderConfig) -> None:
|
|||||||
vcodec = config.vcodec
|
vcodec = config.vcodec
|
||||||
options = _get_codec_options_by_name(vcodec)
|
options = _get_codec_options_by_name(vcodec)
|
||||||
if not options:
|
if not options:
|
||||||
logger.warning(
|
raise ValueError(f"Codec {vcodec!r} is not available in the bundled FFmpeg build")
|
||||||
"Codec %r is not available in the bundled FFmpeg build; ",
|
|
||||||
vcodec,
|
|
||||||
)
|
|
||||||
return
|
|
||||||
_check_pixel_format(config.vcodec, config.pix_fmt)
|
_check_pixel_format(config.vcodec, config.pix_fmt)
|
||||||
_check_codec_options(config.vcodec, config.get_codec_options(), config)
|
_check_codec_options(config.vcodec, config.get_codec_options(), config)
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
# List of hardware encoders to probe for auto-selection. Availability depends on the platform and FFmpeg build.
|
# List of hardware encoders to probe for auto-selection. Availability depends on the platform and FFmpeg build.
|
||||||
# Determines the order of preference for auto-selection when vcodec="auto" is used.
|
# Determines the order of preference for auto-selection when vcodec="auto" is used.
|
||||||
HW_ENCODERS = [
|
HW_VIDEO_CODECS = [
|
||||||
"h264_videotoolbox", # macOS
|
"h264_videotoolbox", # macOS
|
||||||
"hevc_videotoolbox", # macOS
|
"hevc_videotoolbox", # macOS
|
||||||
"h264_nvenc", # NVIDIA GPU
|
"h264_nvenc", # NVIDIA GPU
|
||||||
@@ -56,7 +56,7 @@ HW_ENCODERS = [
|
|||||||
"h264_qsv", # Intel Quick Sync
|
"h264_qsv", # Intel Quick Sync
|
||||||
]
|
]
|
||||||
|
|
||||||
VALID_VIDEO_CODECS = {"h264", "hevc", "libsvtav1", "auto"} | set(HW_ENCODERS)
|
VALID_VIDEO_CODECS = {"h264", "hevc", "libsvtav1", "auto"} | set(HW_VIDEO_CODECS)
|
||||||
|
|
||||||
LIBSVTAV1_DEFAULT_PRESET: int = 12
|
LIBSVTAV1_DEFAULT_PRESET: int = 12
|
||||||
|
|
||||||
@@ -116,33 +116,33 @@ class VideoEncoderConfig:
|
|||||||
check_video_encoder_config_pyav(self)
|
check_video_encoder_config_pyav(self)
|
||||||
|
|
||||||
def resolve_vcodec(self) -> None:
|
def resolve_vcodec(self) -> None:
|
||||||
"""Validate vcodec and resolve 'auto' to best available HW encoder, fallback to libsvtav1.
|
"""Check ``vcodec`` and, when it is ``"auto"``, pick a concrete encoder.
|
||||||
|
|
||||||
Any explicitly-requested codec that isn't in the local FFmpeg build is
|
For ``"auto"``, the first hardware encoder in the preference list that FFmpeg
|
||||||
also silently rewritten to ``libsvtav1`` so encoding never hard-fails on
|
exposes is chosen; if none are available, ``libsvtav1`` is used. If the
|
||||||
a host missing the requested encoder.
|
resolved codec (explicit or after auto-selection) is not present in the
|
||||||
|
local FFmpeg build, raises ``ValueError``.
|
||||||
"""
|
"""
|
||||||
if self.vcodec not in VALID_VIDEO_CODECS:
|
if self.vcodec not in VALID_VIDEO_CODECS:
|
||||||
raise ValueError(f"Invalid vcodec '{self.vcodec}'. Must be one of: {sorted(VALID_VIDEO_CODECS)}")
|
raise ValueError(f"Invalid vcodec '{self.vcodec}'. Must be one of: {sorted(VALID_VIDEO_CODECS)}")
|
||||||
if self.vcodec == "auto":
|
if self.vcodec == "auto":
|
||||||
available = self.detect_available_encoders(HW_ENCODERS)
|
available = self.detect_available_encoders(HW_VIDEO_CODECS)
|
||||||
for encoder in HW_ENCODERS:
|
for encoder in HW_VIDEO_CODECS:
|
||||||
if encoder in available:
|
if encoder in available:
|
||||||
logger.info(f"Auto-selected video codec: {encoder}")
|
logger.info(f"Auto-selected video codec: {encoder}")
|
||||||
self.vcodec = encoder
|
self.vcodec = encoder
|
||||||
return
|
return
|
||||||
logger.info("No hardware encoder available, falling back to software encoder 'libsvtav1'")
|
logger.warning("No hardware encoder available, falling back to software encoder 'libsvtav1'")
|
||||||
self.vcodec = "libsvtav1"
|
self.vcodec = "libsvtav1"
|
||||||
|
|
||||||
if self.detect_available_encoders(self.vcodec):
|
if self.detect_available_encoders(self.vcodec):
|
||||||
logger.info(f"Using video codec: {self.vcodec}")
|
logger.info(f"Using video codec: {self.vcodec}")
|
||||||
self.vcodec = self.vcodec
|
|
||||||
return
|
return
|
||||||
raise ValueError(f"Unsupported video codec: {self.vcodec} with video backend {self.video_backend}")
|
raise ValueError(f"Unsupported video codec: {self.vcodec} with video backend {self.video_backend}")
|
||||||
|
|
||||||
def get_codec_options(
|
def get_codec_options(
|
||||||
self, encoder_threads: int | None = None, as_strings: bool = False
|
self, encoder_threads: int | None = None, as_strings: bool = False
|
||||||
) -> dict[str, str]:
|
) -> dict[str, Any]:
|
||||||
"""Translate the tuning fields to codec-specific FFmpeg options.
|
"""Translate the tuning fields to codec-specific FFmpeg options.
|
||||||
|
|
||||||
``VideoEncoderConfig.extra_options`` are merged last but never override a structured field.
|
``VideoEncoderConfig.extra_options`` are merged last but never override a structured field.
|
||||||
@@ -498,7 +498,7 @@ def encode_video_frames(
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""More info on ffmpeg arguments tuning on `benchmark/video/README.md`"""
|
"""More info on ffmpeg arguments tuning on `benchmark/video/README.md`"""
|
||||||
if camera_encoder_config is None:
|
if camera_encoder_config is None:
|
||||||
camera_encoder_config = VideoEncoderConfig()
|
camera_encoder_config = camera_encoder_defaults()
|
||||||
vcodec = camera_encoder_config.vcodec
|
vcodec = camera_encoder_config.vcodec
|
||||||
pix_fmt = camera_encoder_config.pix_fmt
|
pix_fmt = camera_encoder_config.pix_fmt
|
||||||
|
|
||||||
@@ -802,14 +802,14 @@ class StreamingVideoEncoder:
|
|||||||
Args:
|
Args:
|
||||||
fps: Frames per second for the output videos.
|
fps: Frames per second for the output videos.
|
||||||
camera_encoder_config: Video encoder settings applied to all cameras.
|
camera_encoder_config: Video encoder settings applied to all cameras.
|
||||||
When ``None``, :class:`VideoEncoderConfig` defaults are used.
|
When ``None``, :func:`camera_encoder_defaults` is used.
|
||||||
encoder_threads: Number of encoder threads (global setting).
|
encoder_threads: Number of encoder threads (global setting).
|
||||||
``None`` lets the codec decide.
|
``None`` lets the codec decide.
|
||||||
queue_maxsize: Max frames to buffer per camera before
|
queue_maxsize: Max frames to buffer per camera before
|
||||||
back-pressure drops frames.
|
back-pressure drops frames.
|
||||||
"""
|
"""
|
||||||
self.fps = fps
|
self.fps = fps
|
||||||
self._camera_encoder_config = camera_encoder_config or VideoEncoderConfig()
|
self._camera_encoder_config = camera_encoder_config or camera_encoder_defaults()
|
||||||
self._encoder_threads = encoder_threads
|
self._encoder_threads = encoder_threads
|
||||||
self.queue_maxsize = queue_maxsize
|
self.queue_maxsize = queue_maxsize
|
||||||
|
|
||||||
|
|||||||
@@ -332,7 +332,7 @@ def build_rollout_context(
|
|||||||
cfg.dataset.repo_id,
|
cfg.dataset.repo_id,
|
||||||
root=cfg.dataset.root,
|
root=cfg.dataset.root,
|
||||||
batch_encoding_size=cfg.dataset.video_encoding_batch_size,
|
batch_encoding_size=cfg.dataset.video_encoding_batch_size,
|
||||||
vcodec=cfg.dataset.vcodec,
|
camera_encoder_config=cfg.dataset.camera_encoder_config,
|
||||||
streaming_encoding=cfg.dataset.streaming_encoding,
|
streaming_encoding=cfg.dataset.streaming_encoding,
|
||||||
encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize,
|
encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize,
|
||||||
encoder_threads=cfg.dataset.encoder_threads,
|
encoder_threads=cfg.dataset.encoder_threads,
|
||||||
@@ -367,7 +367,7 @@ def build_rollout_context(
|
|||||||
image_writer_threads=cfg.dataset.num_image_writer_threads_per_camera
|
image_writer_threads=cfg.dataset.num_image_writer_threads_per_camera
|
||||||
* len(robot.cameras if hasattr(robot, "cameras") else []),
|
* len(robot.cameras if hasattr(robot, "cameras") else []),
|
||||||
batch_encoding_size=cfg.dataset.video_encoding_batch_size,
|
batch_encoding_size=cfg.dataset.video_encoding_batch_size,
|
||||||
vcodec=cfg.dataset.vcodec,
|
camera_encoder_config=cfg.dataset.camera_encoder_config,
|
||||||
streaming_encoding=cfg.dataset.streaming_encoding,
|
streaming_encoding=cfg.dataset.streaming_encoding,
|
||||||
encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize,
|
encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize,
|
||||||
encoder_threads=cfg.dataset.encoder_threads,
|
encoder_threads=cfg.dataset.encoder_threads,
|
||||||
|
|||||||
@@ -298,7 +298,7 @@ class TestEncoderDetection:
|
|||||||
|
|
||||||
@require_videotoolbox
|
@require_videotoolbox
|
||||||
def test_auto_picks_videotoolbox_when_available(self):
|
def test_auto_picks_videotoolbox_when_available(self):
|
||||||
"""``h264_videotoolbox`` sits at the top of ``HW_ENCODERS`` so it wins when present."""
|
"""``h264_videotoolbox`` sits at the top of ``HW_VIDEO_CODECS`` so it wins when present."""
|
||||||
cfg = VideoEncoderConfig(vcodec="auto")
|
cfg = VideoEncoderConfig(vcodec="auto")
|
||||||
assert cfg.vcodec == "h264_videotoolbox"
|
assert cfg.vcodec == "h264_videotoolbox"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user