mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-27 06:29:47 +00:00
feat(VideoEncoderConfig): propagating the VideoEncoderConfig in the codebase
This commit is contained in:
@@ -39,6 +39,7 @@ from tqdm import tqdm
|
|||||||
|
|
||||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||||
from lerobot.datasets.video_utils import (
|
from lerobot.datasets.video_utils import (
|
||||||
|
VideoEncoderConfig,
|
||||||
decode_video_frames,
|
decode_video_frames,
|
||||||
encode_video_frames,
|
encode_video_frames,
|
||||||
)
|
)
|
||||||
@@ -251,10 +252,13 @@ def benchmark_encoding_decoding(
|
|||||||
imgs_dir=imgs_dir,
|
imgs_dir=imgs_dir,
|
||||||
video_path=video_path,
|
video_path=video_path,
|
||||||
fps=fps,
|
fps=fps,
|
||||||
vcodec=encoding_cfg["vcodec"],
|
camera_encoder_config=VideoEncoderConfig(
|
||||||
pix_fmt=encoding_cfg["pix_fmt"],
|
vcodec=encoding_cfg["vcodec"],
|
||||||
g=encoding_cfg.get("g"),
|
pix_fmt=encoding_cfg["pix_fmt"],
|
||||||
crf=encoding_cfg.get("crf"),
|
g=encoding_cfg.get("g"),
|
||||||
|
crf=encoding_cfg.get("crf"),
|
||||||
|
preset=encoding_cfg.get("preset"),
|
||||||
|
),
|
||||||
# fast_decode=encoding_cfg.get("fastdecode"),
|
# fast_decode=encoding_cfg.get("fastdecode"),
|
||||||
overwrite=True,
|
overwrite=True,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ from .utils import (
|
|||||||
DEFAULT_EPISODES_PATH,
|
DEFAULT_EPISODES_PATH,
|
||||||
update_chunk_file_indices,
|
update_chunk_file_indices,
|
||||||
)
|
)
|
||||||
from .video_utils import encode_video_frames, get_video_info
|
from .video_utils import VideoEncoderConfig, encode_video_frames, get_video_info
|
||||||
|
|
||||||
|
|
||||||
def _load_episode_with_stats(src_dataset: LeRobotDataset, episode_idx: int) -> dict:
|
def _load_episode_with_stats(src_dataset: LeRobotDataset, episode_idx: int) -> dict:
|
||||||
@@ -92,6 +92,7 @@ def delete_episodes(
|
|||||||
episode_indices: list[int],
|
episode_indices: list[int],
|
||||||
output_dir: str | Path | None = None,
|
output_dir: str | Path | None = None,
|
||||||
repo_id: str | None = None,
|
repo_id: str | None = None,
|
||||||
|
camera_encoder_config: VideoEncoderConfig | None = None,
|
||||||
) -> LeRobotDataset:
|
) -> LeRobotDataset:
|
||||||
"""Delete episodes from a LeRobotDataset and create a new dataset.
|
"""Delete episodes from a LeRobotDataset and create a new dataset.
|
||||||
|
|
||||||
@@ -100,6 +101,7 @@ def delete_episodes(
|
|||||||
episode_indices: List of episode indices to delete.
|
episode_indices: List of episode indices to delete.
|
||||||
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
|
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
|
||||||
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
|
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
|
||||||
|
camera_encoder_config: Video encoder settings used when re-encoding video segments (default: :class:`VideoEncoderConfig()`).
|
||||||
"""
|
"""
|
||||||
if not episode_indices:
|
if not episode_indices:
|
||||||
raise ValueError("No episodes to delete")
|
raise ValueError("No episodes to delete")
|
||||||
@@ -132,7 +134,7 @@ def delete_episodes(
|
|||||||
|
|
||||||
video_metadata = None
|
video_metadata = None
|
||||||
if dataset.meta.video_keys:
|
if dataset.meta.video_keys:
|
||||||
video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping)
|
video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping, camera_encoder_config)
|
||||||
|
|
||||||
data_metadata = _copy_and_reindex_data(dataset, new_meta, episode_mapping)
|
data_metadata = _copy_and_reindex_data(dataset, new_meta, episode_mapping)
|
||||||
|
|
||||||
@@ -154,6 +156,7 @@ def split_dataset(
|
|||||||
dataset: LeRobotDataset,
|
dataset: LeRobotDataset,
|
||||||
splits: dict[str, float | list[int]],
|
splits: dict[str, float | list[int]],
|
||||||
output_dir: str | Path | None = None,
|
output_dir: str | Path | None = None,
|
||||||
|
camera_encoder_config: VideoEncoderConfig | None = None,
|
||||||
) -> dict[str, LeRobotDataset]:
|
) -> dict[str, LeRobotDataset]:
|
||||||
"""Split a LeRobotDataset into multiple smaller datasets.
|
"""Split a LeRobotDataset into multiple smaller datasets.
|
||||||
|
|
||||||
@@ -162,6 +165,7 @@ def split_dataset(
|
|||||||
splits: Either a dict mapping split names to episode indices, or a dict mapping
|
splits: Either a dict mapping split names to episode indices, or a dict mapping
|
||||||
split names to fractions (must sum to <= 1.0).
|
split names to fractions (must sum to <= 1.0).
|
||||||
output_dir: Root directory where the split datasets will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id.
|
output_dir: Root directory where the split datasets will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id.
|
||||||
|
camera_encoder_config: Video encoder settings used when re-encoding video segments (default: :class:`VideoEncoderConfig()`).
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
Split by specific episodes
|
Split by specific episodes
|
||||||
@@ -222,7 +226,9 @@ def split_dataset(
|
|||||||
|
|
||||||
video_metadata = None
|
video_metadata = None
|
||||||
if dataset.meta.video_keys:
|
if dataset.meta.video_keys:
|
||||||
video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping)
|
video_metadata = _copy_and_reindex_videos(
|
||||||
|
dataset, new_meta, episode_mapping, camera_encoder_config
|
||||||
|
)
|
||||||
|
|
||||||
data_metadata = _copy_and_reindex_data(dataset, new_meta, episode_mapping)
|
data_metadata = _copy_and_reindex_data(dataset, new_meta, episode_mapping)
|
||||||
|
|
||||||
@@ -578,8 +584,7 @@ def _keep_episodes_from_video_with_av(
|
|||||||
output_path: Path,
|
output_path: Path,
|
||||||
episodes_to_keep: list[tuple[int, int]],
|
episodes_to_keep: list[tuple[int, int]],
|
||||||
fps: float,
|
fps: float,
|
||||||
vcodec: str = "libsvtav1",
|
camera_encoder_config: VideoEncoderConfig | None = None,
|
||||||
pix_fmt: str = "yuv420p",
|
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Keep only specified episodes from a video file using PyAV.
|
"""Keep only specified episodes from a video file using PyAV.
|
||||||
|
|
||||||
@@ -593,9 +598,10 @@ def _keep_episodes_from_video_with_av(
|
|||||||
Ranges are half-open intervals: [start_frame, end_frame), where start_frame
|
Ranges are half-open intervals: [start_frame, end_frame), where start_frame
|
||||||
is inclusive and end_frame is exclusive.
|
is inclusive and end_frame is exclusive.
|
||||||
fps: Frame rate of the video.
|
fps: Frame rate of the video.
|
||||||
vcodec: Video codec to use for encoding.
|
camera_encoder_config: Video encoder settings (default: :class:`VideoEncoderConfig()`).
|
||||||
pix_fmt: Pixel format for output video.
|
|
||||||
"""
|
"""
|
||||||
|
if camera_encoder_config is None:
|
||||||
|
camera_encoder_config = VideoEncoderConfig()
|
||||||
from fractions import Fraction
|
from fractions import Fraction
|
||||||
|
|
||||||
import av
|
import av
|
||||||
@@ -619,12 +625,12 @@ def _keep_episodes_from_video_with_av(
|
|||||||
|
|
||||||
# Convert fps to Fraction for PyAV compatibility.
|
# Convert fps to Fraction for PyAV compatibility.
|
||||||
fps_fraction = Fraction(fps).limit_denominator(1000)
|
fps_fraction = Fraction(fps).limit_denominator(1000)
|
||||||
v_out = out.add_stream(vcodec, rate=fps_fraction)
|
v_out = out.add_stream(camera_encoder_config.vcodec, rate=fps_fraction)
|
||||||
|
|
||||||
# PyAV type stubs don't distinguish video streams from audio/subtitle streams.
|
# PyAV type stubs don't distinguish video streams from audio/subtitle streams.
|
||||||
v_out.width = v_in.codec_context.width
|
v_out.width = v_in.codec_context.width
|
||||||
v_out.height = v_in.codec_context.height
|
v_out.height = v_in.codec_context.height
|
||||||
v_out.pix_fmt = pix_fmt
|
v_out.pix_fmt = camera_encoder_config.pix_fmt
|
||||||
|
|
||||||
# Set time_base to match the frame rate for proper timestamp handling.
|
# Set time_base to match the frame rate for proper timestamp handling.
|
||||||
v_out.time_base = Fraction(1, int(fps))
|
v_out.time_base = Fraction(1, int(fps))
|
||||||
@@ -687,8 +693,7 @@ def _copy_and_reindex_videos(
|
|||||||
src_dataset: LeRobotDataset,
|
src_dataset: LeRobotDataset,
|
||||||
dst_meta: LeRobotDatasetMetadata,
|
dst_meta: LeRobotDatasetMetadata,
|
||||||
episode_mapping: dict[int, int],
|
episode_mapping: dict[int, int],
|
||||||
vcodec: str = "libsvtav1",
|
camera_encoder_config: VideoEncoderConfig | None = None,
|
||||||
pix_fmt: str = "yuv420p",
|
|
||||||
) -> dict[int, dict]:
|
) -> dict[int, dict]:
|
||||||
"""Copy and filter video files, only re-encoding files with deleted episodes.
|
"""Copy and filter video files, only re-encoding files with deleted episodes.
|
||||||
|
|
||||||
@@ -700,10 +705,13 @@ def _copy_and_reindex_videos(
|
|||||||
src_dataset: Source dataset to copy from
|
src_dataset: Source dataset to copy from
|
||||||
dst_meta: Destination metadata object
|
dst_meta: Destination metadata object
|
||||||
episode_mapping: Mapping from old episode indices to new indices
|
episode_mapping: Mapping from old episode indices to new indices
|
||||||
|
camera_encoder_config: Video encoder settings used when re-encoding segments (default: :class:`VideoEncoderConfig()`).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict mapping episode index to its video metadata (chunk_index, file_index, timestamps)
|
dict mapping episode index to its video metadata (chunk_index, file_index, timestamps)
|
||||||
"""
|
"""
|
||||||
|
if camera_encoder_config is None:
|
||||||
|
camera_encoder_config = VideoEncoderConfig()
|
||||||
if src_dataset.meta.episodes is None:
|
if src_dataset.meta.episodes is None:
|
||||||
src_dataset.meta.episodes = load_episodes(src_dataset.meta.root)
|
src_dataset.meta.episodes = load_episodes(src_dataset.meta.root)
|
||||||
|
|
||||||
@@ -792,8 +800,7 @@ def _copy_and_reindex_videos(
|
|||||||
dst_video_path,
|
dst_video_path,
|
||||||
episodes_to_keep_ranges,
|
episodes_to_keep_ranges,
|
||||||
src_dataset.meta.fps,
|
src_dataset.meta.fps,
|
||||||
vcodec,
|
camera_encoder_config,
|
||||||
pix_fmt,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
cumulative_ts = 0.0
|
cumulative_ts = 0.0
|
||||||
@@ -1264,11 +1271,7 @@ def _estimate_frame_size_via_calibration(
|
|||||||
episode_indices: list[int],
|
episode_indices: list[int],
|
||||||
temp_dir: Path,
|
temp_dir: Path,
|
||||||
fps: int,
|
fps: int,
|
||||||
vcodec: str,
|
camera_encoder_config: VideoEncoderConfig,
|
||||||
pix_fmt: str,
|
|
||||||
g: int,
|
|
||||||
crf: int,
|
|
||||||
fast_decode: int,
|
|
||||||
num_calibration_frames: int = 30,
|
num_calibration_frames: int = 30,
|
||||||
) -> float:
|
) -> float:
|
||||||
"""Estimate MB per frame by encoding a small calibration sample.
|
"""Estimate MB per frame by encoding a small calibration sample.
|
||||||
@@ -1282,11 +1285,7 @@ def _estimate_frame_size_via_calibration(
|
|||||||
episode_indices: List of episode indices being processed.
|
episode_indices: List of episode indices being processed.
|
||||||
temp_dir: Temporary directory for calibration files.
|
temp_dir: Temporary directory for calibration files.
|
||||||
fps: Frames per second for video encoding.
|
fps: Frames per second for video encoding.
|
||||||
vcodec: Video codec (libsvtav1, h264, hevc).
|
camera_encoder_config: Video encoder settings used for calibration encoding.
|
||||||
pix_fmt: Pixel format (yuv420p, etc.).
|
|
||||||
g: GOP size (group of pictures).
|
|
||||||
crf: Constant Rate Factor (quality).
|
|
||||||
fast_decode: Fast decode tuning parameter.
|
|
||||||
num_calibration_frames: Number of frames to use for calibration (default: 30).
|
num_calibration_frames: Number of frames to use for calibration (default: 30).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@@ -1322,11 +1321,7 @@ def _estimate_frame_size_via_calibration(
|
|||||||
imgs_dir=calibration_dir,
|
imgs_dir=calibration_dir,
|
||||||
video_path=calibration_video_path,
|
video_path=calibration_video_path,
|
||||||
fps=fps,
|
fps=fps,
|
||||||
vcodec=vcodec,
|
camera_encoder_config=camera_encoder_config,
|
||||||
pix_fmt=pix_fmt,
|
|
||||||
g=g,
|
|
||||||
crf=crf,
|
|
||||||
fast_decode=fast_decode,
|
|
||||||
overwrite=True,
|
overwrite=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1644,11 +1639,7 @@ def convert_image_to_video_dataset(
|
|||||||
dataset: LeRobotDataset,
|
dataset: LeRobotDataset,
|
||||||
output_dir: Path | None = None,
|
output_dir: Path | None = None,
|
||||||
repo_id: str | None = None,
|
repo_id: str | None = None,
|
||||||
vcodec: str = "libsvtav1",
|
camera_encoder_config: VideoEncoderConfig | None = None,
|
||||||
pix_fmt: str = "yuv420p",
|
|
||||||
g: int = 2,
|
|
||||||
crf: int = 30,
|
|
||||||
fast_decode: int = 0,
|
|
||||||
episode_indices: list[int] | None = None,
|
episode_indices: list[int] | None = None,
|
||||||
num_workers: int = 4,
|
num_workers: int = 4,
|
||||||
max_episodes_per_batch: int | None = None,
|
max_episodes_per_batch: int | None = None,
|
||||||
@@ -1663,11 +1654,7 @@ def convert_image_to_video_dataset(
|
|||||||
dataset: The source LeRobot dataset with images
|
dataset: The source LeRobot dataset with images
|
||||||
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
|
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
|
||||||
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
|
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
|
||||||
vcodec: Video codec (default: libsvtav1)
|
camera_encoder_config: Video encoder settings (default: :class:`VideoEncoderConfig()`).
|
||||||
pix_fmt: Pixel format (default: yuv420p)
|
|
||||||
g: Group of pictures size (default: 2)
|
|
||||||
crf: Constant rate factor (default: 30)
|
|
||||||
fast_decode: Fast decode tuning (default: 0)
|
|
||||||
episode_indices: List of episode indices to convert (None = all episodes)
|
episode_indices: List of episode indices to convert (None = all episodes)
|
||||||
num_workers: Number of threads for parallel processing (default: 4)
|
num_workers: Number of threads for parallel processing (default: 4)
|
||||||
max_episodes_per_batch: Maximum episodes per video batch to avoid memory issues (None = no limit)
|
max_episodes_per_batch: Maximum episodes per video batch to avoid memory issues (None = no limit)
|
||||||
@@ -1676,6 +1663,9 @@ def convert_image_to_video_dataset(
|
|||||||
Returns:
|
Returns:
|
||||||
New LeRobotDataset with images encoded as videos
|
New LeRobotDataset with images encoded as videos
|
||||||
"""
|
"""
|
||||||
|
if camera_encoder_config is None:
|
||||||
|
camera_encoder_config = VideoEncoderConfig()
|
||||||
|
|
||||||
# Check that it's an image dataset
|
# Check that it's an image dataset
|
||||||
if len(dataset.meta.video_keys) > 0:
|
if len(dataset.meta.video_keys) > 0:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@@ -1699,7 +1689,10 @@ def convert_image_to_video_dataset(
|
|||||||
logging.info(
|
logging.info(
|
||||||
f"Converting {len(episode_indices)} episodes with {len(img_keys)} cameras from {dataset.repo_id}"
|
f"Converting {len(episode_indices)} episodes with {len(img_keys)} cameras from {dataset.repo_id}"
|
||||||
)
|
)
|
||||||
logging.info(f"Video codec: {vcodec}, pixel format: {pix_fmt}, GOP: {g}, CRF: {crf}")
|
logging.info(
|
||||||
|
f"Video codec: {camera_encoder_config.vcodec}, pixel format: {camera_encoder_config.pix_fmt}, "
|
||||||
|
f"GOP: {camera_encoder_config.g}, CRF: {camera_encoder_config.crf}"
|
||||||
|
)
|
||||||
|
|
||||||
# Create new features dict, converting image features to video features
|
# Create new features dict, converting image features to video features
|
||||||
new_features = {}
|
new_features = {}
|
||||||
@@ -1769,11 +1762,7 @@ def convert_image_to_video_dataset(
|
|||||||
episode_indices=episode_indices,
|
episode_indices=episode_indices,
|
||||||
temp_dir=temp_dir,
|
temp_dir=temp_dir,
|
||||||
fps=fps,
|
fps=fps,
|
||||||
vcodec=vcodec,
|
camera_encoder_config=camera_encoder_config,
|
||||||
pix_fmt=pix_fmt,
|
|
||||||
g=g,
|
|
||||||
crf=crf,
|
|
||||||
fast_decode=fast_decode,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.info(f"Processing camera: {img_key}")
|
logging.info(f"Processing camera: {img_key}")
|
||||||
@@ -1815,11 +1804,7 @@ def convert_image_to_video_dataset(
|
|||||||
imgs_dir=imgs_dir,
|
imgs_dir=imgs_dir,
|
||||||
video_path=video_path,
|
video_path=video_path,
|
||||||
fps=fps,
|
fps=fps,
|
||||||
vcodec=vcodec,
|
camera_encoder_config=camera_encoder_config,
|
||||||
pix_fmt=pix_fmt,
|
|
||||||
g=g,
|
|
||||||
crf=crf,
|
|
||||||
fast_decode=fast_decode,
|
|
||||||
overwrite=True,
|
overwrite=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -52,6 +52,7 @@ from .utils import (
|
|||||||
)
|
)
|
||||||
from .video_utils import (
|
from .video_utils import (
|
||||||
StreamingVideoEncoder,
|
StreamingVideoEncoder,
|
||||||
|
VideoEncoderConfig,
|
||||||
concatenate_video_files,
|
concatenate_video_files,
|
||||||
encode_video_frames,
|
encode_video_frames,
|
||||||
get_video_duration_in_s,
|
get_video_duration_in_s,
|
||||||
@@ -65,14 +66,19 @@ def _encode_video_worker(
|
|||||||
episode_index: int,
|
episode_index: int,
|
||||||
root: Path,
|
root: Path,
|
||||||
fps: int,
|
fps: int,
|
||||||
vcodec: str = "libsvtav1",
|
camera_encoder_config: VideoEncoderConfig | None = None,
|
||||||
encoder_threads: int | None = None,
|
encoder_threads: int | None = None,
|
||||||
) -> Path:
|
) -> Path:
|
||||||
temp_path = Path(tempfile.mkdtemp(dir=root)) / f"{video_key}_{episode_index:03d}.mp4"
|
temp_path = Path(tempfile.mkdtemp(dir=root)) / f"{video_key}_{episode_index:03d}.mp4"
|
||||||
fpath = DEFAULT_IMAGE_PATH.format(image_key=video_key, episode_index=episode_index, frame_index=0)
|
fpath = DEFAULT_IMAGE_PATH.format(image_key=video_key, episode_index=episode_index, frame_index=0)
|
||||||
img_dir = (root / fpath).parent
|
img_dir = (root / fpath).parent
|
||||||
encode_video_frames(
|
encode_video_frames(
|
||||||
img_dir, temp_path, fps, vcodec=vcodec, overwrite=True, encoder_threads=encoder_threads
|
img_dir,
|
||||||
|
temp_path,
|
||||||
|
fps,
|
||||||
|
camera_encoder_config=camera_encoder_config,
|
||||||
|
encoder_threads=encoder_threads,
|
||||||
|
overwrite=True,
|
||||||
)
|
)
|
||||||
shutil.rmtree(img_dir)
|
shutil.rmtree(img_dir)
|
||||||
return temp_path
|
return temp_path
|
||||||
@@ -89,20 +95,21 @@ class DatasetWriter:
|
|||||||
self,
|
self,
|
||||||
meta: LeRobotDatasetMetadata,
|
meta: LeRobotDatasetMetadata,
|
||||||
root: Path,
|
root: Path,
|
||||||
vcodec: str,
|
camera_encoder_config: VideoEncoderConfig,
|
||||||
encoder_threads: int | None,
|
encoder_threads: int | None,
|
||||||
batch_encoding_size: int,
|
batch_encoding_size: int,
|
||||||
streaming_encoder: StreamingVideoEncoder | None = None,
|
streaming_encoder: StreamingVideoEncoder | None = None,
|
||||||
initial_frames: int = 0,
|
initial_frames: int = 0,
|
||||||
):
|
):
|
||||||
"""Initialize the writer with metadata, codec, and encoding config.
|
"""Initialize the writer with metadata, codec, and encoder config.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
meta: Dataset metadata instance (used for feature schema, chunk
|
meta: Dataset metadata instance (used for feature schema, chunk
|
||||||
settings, and episode persistence).
|
settings, and episode persistence).
|
||||||
root: Local dataset root directory.
|
root: Local dataset root directory.
|
||||||
vcodec: Video codec for encoding (e.g. ``'libsvtav1'``, ``'h264'``).
|
camera_encoder_config: Video encoder settings applied to all cameras.
|
||||||
encoder_threads: Threads per encoder instance. ``None`` for auto.
|
encoder_threads: Number of encoder threads (global). ``None``
|
||||||
|
lets the codec decide.
|
||||||
batch_encoding_size: Number of episodes to accumulate before
|
batch_encoding_size: Number of episodes to accumulate before
|
||||||
batch-encoding videos.
|
batch-encoding videos.
|
||||||
streaming_encoder: Optional pre-built :class:`StreamingVideoEncoder`
|
streaming_encoder: Optional pre-built :class:`StreamingVideoEncoder`
|
||||||
@@ -111,7 +118,7 @@ class DatasetWriter:
|
|||||||
"""
|
"""
|
||||||
self._meta = meta
|
self._meta = meta
|
||||||
self._root = root
|
self._root = root
|
||||||
self._vcodec = vcodec
|
self._camera_encoder_config = camera_encoder_config
|
||||||
self._encoder_threads = encoder_threads
|
self._encoder_threads = encoder_threads
|
||||||
self._batch_encoding_size = batch_encoding_size
|
self._batch_encoding_size = batch_encoding_size
|
||||||
self._streaming_encoder = streaming_encoder
|
self._streaming_encoder = streaming_encoder
|
||||||
@@ -284,7 +291,7 @@ class DatasetWriter:
|
|||||||
episode_index,
|
episode_index,
|
||||||
self._root,
|
self._root,
|
||||||
self._meta.fps,
|
self._meta.fps,
|
||||||
self._vcodec,
|
self._camera_encoder_config,
|
||||||
self._encoder_threads,
|
self._encoder_threads,
|
||||||
): video_key
|
): video_key
|
||||||
for video_key in self._meta.video_keys
|
for video_key in self._meta.video_keys
|
||||||
@@ -564,7 +571,12 @@ class DatasetWriter:
|
|||||||
def _encode_temporary_episode_video(self, video_key: str, episode_index: int) -> Path:
|
def _encode_temporary_episode_video(self, video_key: str, episode_index: int) -> Path:
|
||||||
"""Use ffmpeg to convert frames stored as png into mp4 videos."""
|
"""Use ffmpeg to convert frames stored as png into mp4 videos."""
|
||||||
return _encode_video_worker(
|
return _encode_video_worker(
|
||||||
video_key, episode_index, self._root, self._meta.fps, self._vcodec, self._encoder_threads
|
video_key,
|
||||||
|
episode_index,
|
||||||
|
self._root,
|
||||||
|
self._meta.fps,
|
||||||
|
self._camera_encoder_config,
|
||||||
|
self._encoder_threads,
|
||||||
)
|
)
|
||||||
|
|
||||||
def close_writer(self) -> None:
|
def close_writer(self) -> None:
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ from .utils import (
|
|||||||
from .video_utils import (
|
from .video_utils import (
|
||||||
StreamingVideoEncoder,
|
StreamingVideoEncoder,
|
||||||
get_safe_default_video_backend,
|
get_safe_default_video_backend,
|
||||||
resolve_vcodec,
|
VideoEncoderConfig,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -58,10 +58,10 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
video_backend: str | None = None,
|
video_backend: str | None = None,
|
||||||
return_uint8: bool = False,
|
return_uint8: bool = False,
|
||||||
batch_encoding_size: int = 1,
|
batch_encoding_size: int = 1,
|
||||||
vcodec: str = "libsvtav1",
|
camera_encoder_config: VideoEncoderConfig | None = None,
|
||||||
|
encoder_threads: int | None = None,
|
||||||
streaming_encoding: bool = False,
|
streaming_encoding: bool = False,
|
||||||
encoder_queue_maxsize: int = 30,
|
encoder_queue_maxsize: int = 30,
|
||||||
encoder_threads: int | None = None,
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
2 modes are available for instantiating this class, depending on 2 different use cases:
|
2 modes are available for instantiating this class, depending on 2 different use cases:
|
||||||
@@ -177,16 +177,15 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
You can also use the 'pyav' decoder used by Torchvision, which used to be the default option, or 'video_reader' which is another decoder of Torchvision.
|
You can also use the 'pyav' decoder used by Torchvision, which used to be the default option, or 'video_reader' which is another decoder of Torchvision.
|
||||||
batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos.
|
batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos.
|
||||||
Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1.
|
Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1.
|
||||||
vcodec (str, optional): Video codec for encoding videos during recording. Options: 'h264', 'hevc',
|
camera_encoder_config (VideoEncoderConfig | None, optional): Video encoder settings for cameras
|
||||||
'libsvtav1', 'auto', or hardware-specific codecs like 'h264_videotoolbox', 'h264_nvenc'.
|
(codec, quality, etc.). Defaults to
|
||||||
Defaults to 'libsvtav1'. Use 'auto' to auto-detect the best available hardware encoder.
|
:class:`~lerobot.datasets.video_utils.VideoEncoderConfig` defaults when ``None``.
|
||||||
|
encoder_threads (int | None, optional): Number of encoder threads (global). ``None`` lets the
|
||||||
|
codec decide.
|
||||||
streaming_encoding (bool, optional): If True, encode video frames in real-time during capture
|
streaming_encoding (bool, optional): If True, encode video frames in real-time during capture
|
||||||
instead of writing PNG images first. This makes save_episode() near-instant. Defaults to False.
|
instead of writing PNG images first. This makes save_episode() near-instant. Defaults to False.
|
||||||
encoder_queue_maxsize (int, optional): Maximum number of frames to buffer per camera when using
|
encoder_queue_maxsize (int, optional): Maximum number of frames to buffer per camera when using
|
||||||
streaming encoding. Defaults to 30 (~1s at 30fps).
|
streaming encoding. Defaults to 30 (~1s at 30fps).
|
||||||
encoder_threads (int | None, optional): Number of threads per encoder instance. None lets the
|
|
||||||
codec auto-detect (default). Lower values reduce CPU usage per encoder. Maps to 'lp' (via svtav1-params) for
|
|
||||||
libsvtav1 and 'threads' for h264/hevc.
|
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
Write-mode parameters (``streaming_encoding``, ``batch_encoding_size``) passed to
|
Write-mode parameters (``streaming_encoding``, ``batch_encoding_size``) passed to
|
||||||
@@ -205,7 +204,9 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
self._video_backend = video_backend if video_backend else get_safe_default_video_backend()
|
self._video_backend = video_backend if video_backend else get_safe_default_video_backend()
|
||||||
self._return_uint8 = return_uint8
|
self._return_uint8 = return_uint8
|
||||||
self._batch_encoding_size = batch_encoding_size
|
self._batch_encoding_size = batch_encoding_size
|
||||||
self._vcodec = resolve_vcodec(vcodec)
|
if camera_encoder_config is None:
|
||||||
|
camera_encoder_config = VideoEncoderConfig()
|
||||||
|
self._camera_encoder_config = camera_encoder_config
|
||||||
self._encoder_threads = encoder_threads
|
self._encoder_threads = encoder_threads
|
||||||
|
|
||||||
if self._requested_root is not None:
|
if self._requested_root is not None:
|
||||||
@@ -251,13 +252,16 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
streaming_enc = None
|
streaming_enc = None
|
||||||
if streaming_encoding and len(self.meta.video_keys) > 0:
|
if streaming_encoding and len(self.meta.video_keys) > 0:
|
||||||
streaming_enc = self._build_streaming_encoder(
|
streaming_enc = self._build_streaming_encoder(
|
||||||
self.meta.fps, self._vcodec, encoder_queue_maxsize, encoder_threads
|
self.meta.fps,
|
||||||
|
self._camera_encoder_config,
|
||||||
|
self._encoder_threads,
|
||||||
|
encoder_queue_maxsize,
|
||||||
)
|
)
|
||||||
self.writer = DatasetWriter(
|
self.writer = DatasetWriter(
|
||||||
meta=self.meta,
|
meta=self.meta,
|
||||||
root=self.root,
|
root=self.root,
|
||||||
vcodec=self._vcodec,
|
camera_encoder_config=self._camera_encoder_config,
|
||||||
encoder_threads=encoder_threads,
|
encoder_threads=self._encoder_threads,
|
||||||
batch_encoding_size=batch_encoding_size,
|
batch_encoding_size=batch_encoding_size,
|
||||||
streaming_encoder=streaming_enc,
|
streaming_encoder=streaming_enc,
|
||||||
initial_frames=self.meta.total_frames,
|
initial_frames=self.meta.total_frames,
|
||||||
@@ -298,19 +302,15 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _build_streaming_encoder(
|
def _build_streaming_encoder(
|
||||||
fps: int,
|
fps: int,
|
||||||
vcodec: str,
|
camera_encoder_config: VideoEncoderConfig,
|
||||||
encoder_queue_maxsize: int,
|
|
||||||
encoder_threads: int | None,
|
encoder_threads: int | None,
|
||||||
|
encoder_queue_maxsize: int,
|
||||||
) -> StreamingVideoEncoder:
|
) -> StreamingVideoEncoder:
|
||||||
return StreamingVideoEncoder(
|
return StreamingVideoEncoder(
|
||||||
fps=fps,
|
fps=fps,
|
||||||
vcodec=vcodec,
|
camera_encoder_config=camera_encoder_config,
|
||||||
pix_fmt="yuv420p",
|
|
||||||
g=2,
|
|
||||||
crf=30,
|
|
||||||
preset=None,
|
|
||||||
queue_maxsize=encoder_queue_maxsize,
|
|
||||||
encoder_threads=encoder_threads,
|
encoder_threads=encoder_threads,
|
||||||
|
queue_maxsize=encoder_queue_maxsize,
|
||||||
)
|
)
|
||||||
|
|
||||||
# ── Metadata properties ───────────────────────────────────────────
|
# ── Metadata properties ───────────────────────────────────────────
|
||||||
@@ -624,8 +624,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
image_writer_processes: int = 0,
|
image_writer_processes: int = 0,
|
||||||
image_writer_threads: int = 0,
|
image_writer_threads: int = 0,
|
||||||
video_backend: str | None = None,
|
video_backend: str | None = None,
|
||||||
|
camera_encoder_config: VideoEncoderConfig | None = None,
|
||||||
batch_encoding_size: int = 1,
|
batch_encoding_size: int = 1,
|
||||||
vcodec: str = "libsvtav1",
|
|
||||||
metadata_buffer_size: int = 10,
|
metadata_buffer_size: int = 10,
|
||||||
streaming_encoding: bool = False,
|
streaming_encoding: bool = False,
|
||||||
encoder_queue_maxsize: int = 30,
|
encoder_queue_maxsize: int = 30,
|
||||||
@@ -656,20 +656,23 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
video_backend: Video decoding backend (used when reading back).
|
video_backend: Video decoding backend (used when reading back).
|
||||||
batch_encoding_size: Number of episodes to accumulate before
|
batch_encoding_size: Number of episodes to accumulate before
|
||||||
batch-encoding videos. ``1`` means encode immediately.
|
batch-encoding videos. ``1`` means encode immediately.
|
||||||
vcodec: Video codec for encoding. Options include ``'libsvtav1'``,
|
camera_encoder_config: Video encoder settings for cameras; defaults
|
||||||
``'h264'``, ``'hevc'``, ``'auto'``.
|
match :class:`~lerobot.datasets.video_utils.VideoEncoderConfig`
|
||||||
|
when ``None``.
|
||||||
|
encoder_threads: Number of encoder threads (global). ``None``
|
||||||
|
lets the codec decide.
|
||||||
metadata_buffer_size: Number of episode metadata records to buffer
|
metadata_buffer_size: Number of episode metadata records to buffer
|
||||||
before flushing to parquet.
|
before flushing to parquet.
|
||||||
streaming_encoding: If ``True``, encode video frames in real-time
|
streaming_encoding: If ``True``, encode video frames in real-time
|
||||||
during capture instead of writing images first.
|
during capture instead of writing images first.
|
||||||
encoder_queue_maxsize: Max buffered frames per camera when using
|
encoder_queue_maxsize: Max buffered frames per camera when using
|
||||||
streaming encoding.
|
streaming encoding.
|
||||||
encoder_threads: Threads per encoder instance. ``None`` for auto.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A new :class:`LeRobotDataset` in write mode.
|
A new :class:`LeRobotDataset` in write mode.
|
||||||
"""
|
"""
|
||||||
vcodec = resolve_vcodec(vcodec)
|
if camera_encoder_config is None:
|
||||||
|
camera_encoder_config = VideoEncoderConfig()
|
||||||
obj = cls.__new__(cls)
|
obj = cls.__new__(cls)
|
||||||
obj.meta = LeRobotDatasetMetadata.create(
|
obj.meta = LeRobotDatasetMetadata.create(
|
||||||
repo_id=repo_id,
|
repo_id=repo_id,
|
||||||
@@ -693,20 +696,21 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
obj._video_backend = video_backend if video_backend is not None else get_safe_default_video_backend()
|
obj._video_backend = video_backend if video_backend is not None else get_safe_default_video_backend()
|
||||||
obj._return_uint8 = False
|
obj._return_uint8 = False
|
||||||
obj._batch_encoding_size = batch_encoding_size
|
obj._batch_encoding_size = batch_encoding_size
|
||||||
obj._vcodec = vcodec
|
obj._camera_encoder_config = camera_encoder_config
|
||||||
obj._encoder_threads = encoder_threads
|
obj._encoder_threads = encoder_threads
|
||||||
|
|
||||||
# Reader is lazily created on first access (write-only mode)
|
# Reader is lazily created on first access (write-only mode)
|
||||||
obj.reader = None
|
obj.reader = None
|
||||||
|
|
||||||
# Create writer
|
|
||||||
streaming_enc = None
|
streaming_enc = None
|
||||||
if streaming_encoding and len(obj.meta.video_keys) > 0:
|
if streaming_encoding and len(obj.meta.video_keys) > 0:
|
||||||
streaming_enc = cls._build_streaming_encoder(fps, vcodec, encoder_queue_maxsize, encoder_threads)
|
streaming_enc = cls._build_streaming_encoder(
|
||||||
|
fps, camera_encoder_config, encoder_threads, encoder_queue_maxsize
|
||||||
|
)
|
||||||
obj.writer = DatasetWriter(
|
obj.writer = DatasetWriter(
|
||||||
meta=obj.meta,
|
meta=obj.meta,
|
||||||
root=obj.root,
|
root=obj.root,
|
||||||
vcodec=vcodec,
|
camera_encoder_config=camera_encoder_config,
|
||||||
encoder_threads=encoder_threads,
|
encoder_threads=encoder_threads,
|
||||||
batch_encoding_size=batch_encoding_size,
|
batch_encoding_size=batch_encoding_size,
|
||||||
streaming_encoder=streaming_enc,
|
streaming_encoder=streaming_enc,
|
||||||
@@ -729,12 +733,12 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
force_cache_sync: bool = False,
|
force_cache_sync: bool = False,
|
||||||
video_backend: str | None = None,
|
video_backend: str | None = None,
|
||||||
batch_encoding_size: int = 1,
|
batch_encoding_size: int = 1,
|
||||||
vcodec: str = "libsvtav1",
|
camera_encoder_config: VideoEncoderConfig | None = None,
|
||||||
|
encoder_threads: int | None = None,
|
||||||
image_writer_processes: int = 0,
|
image_writer_processes: int = 0,
|
||||||
image_writer_threads: int = 0,
|
image_writer_threads: int = 0,
|
||||||
streaming_encoding: bool = False,
|
streaming_encoding: bool = False,
|
||||||
encoder_queue_maxsize: int = 30,
|
encoder_queue_maxsize: int = 30,
|
||||||
encoder_threads: int | None = None,
|
|
||||||
) -> "LeRobotDataset":
|
) -> "LeRobotDataset":
|
||||||
"""Resume recording on an existing dataset.
|
"""Resume recording on an existing dataset.
|
||||||
|
|
||||||
@@ -757,13 +761,16 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
video_backend: Video decoding backend for reading back data.
|
video_backend: Video decoding backend for reading back data.
|
||||||
batch_encoding_size: Number of episodes to accumulate before
|
batch_encoding_size: Number of episodes to accumulate before
|
||||||
batch-encoding videos.
|
batch-encoding videos.
|
||||||
vcodec: Video codec for encoding.
|
camera_encoder_config: Video encoder settings for cameras; defaults
|
||||||
|
match :class:`~lerobot.datasets.video_utils.VideoEncoderConfig`
|
||||||
|
when ``None``.
|
||||||
|
encoder_threads: Number of encoder threads (global). ``None``
|
||||||
|
lets the codec decide.
|
||||||
image_writer_processes: Subprocesses for async image writing.
|
image_writer_processes: Subprocesses for async image writing.
|
||||||
image_writer_threads: Threads for async image writing.
|
image_writer_threads: Threads for async image writing.
|
||||||
streaming_encoding: If ``True``, encode video in real-time during
|
streaming_encoding: If ``True``, encode video in real-time during
|
||||||
capture.
|
capture.
|
||||||
encoder_queue_maxsize: Max buffered frames per camera for streaming.
|
encoder_queue_maxsize: Max buffered frames per camera for streaming.
|
||||||
encoder_threads: Threads per encoder instance. ``None`` for auto.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A :class:`LeRobotDataset` in write mode, ready to append episodes.
|
A :class:`LeRobotDataset` in write mode, ready to append episodes.
|
||||||
@@ -774,7 +781,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
"Writing into the revision-safe Hub snapshot cache (used when root=None) would corrupt "
|
"Writing into the revision-safe Hub snapshot cache (used when root=None) would corrupt "
|
||||||
"the shared cache. Please provide a local directory path."
|
"the shared cache. Please provide a local directory path."
|
||||||
)
|
)
|
||||||
vcodec = resolve_vcodec(vcodec)
|
|
||||||
obj = cls.__new__(cls)
|
obj = cls.__new__(cls)
|
||||||
obj.repo_id = repo_id
|
obj.repo_id = repo_id
|
||||||
obj._requested_root = Path(root)
|
obj._requested_root = Path(root)
|
||||||
@@ -786,8 +792,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
obj._video_backend = video_backend if video_backend else get_safe_default_video_backend()
|
obj._video_backend = video_backend if video_backend else get_safe_default_video_backend()
|
||||||
obj._return_uint8 = False
|
obj._return_uint8 = False
|
||||||
obj._batch_encoding_size = batch_encoding_size
|
obj._batch_encoding_size = batch_encoding_size
|
||||||
obj._vcodec = vcodec
|
|
||||||
obj._encoder_threads = encoder_threads
|
|
||||||
|
|
||||||
if obj._requested_root is not None:
|
if obj._requested_root is not None:
|
||||||
obj._requested_root.mkdir(exist_ok=True, parents=True)
|
obj._requested_root.mkdir(exist_ok=True, parents=True)
|
||||||
@@ -796,21 +800,25 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
obj.meta = LeRobotDatasetMetadata(
|
obj.meta = LeRobotDatasetMetadata(
|
||||||
obj.repo_id, obj._requested_root, obj.revision, force_cache_sync=force_cache_sync
|
obj.repo_id, obj._requested_root, obj.revision, force_cache_sync=force_cache_sync
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if camera_encoder_config is None:
|
||||||
|
camera_encoder_config = VideoEncoderConfig()
|
||||||
|
obj._camera_encoder_config = camera_encoder_config
|
||||||
|
obj._encoder_threads = encoder_threads
|
||||||
obj.root = obj.meta.root
|
obj.root = obj.meta.root
|
||||||
|
|
||||||
# Reader is lazily created on first access (write-only mode)
|
# Reader is lazily created on first access (write-only mode)
|
||||||
obj.reader = None
|
obj.reader = None
|
||||||
|
|
||||||
# Create writer for appending
|
|
||||||
streaming_enc = None
|
streaming_enc = None
|
||||||
if streaming_encoding and len(obj.meta.video_keys) > 0:
|
if streaming_encoding and len(obj.meta.video_keys) > 0:
|
||||||
streaming_enc = cls._build_streaming_encoder(
|
streaming_enc = cls._build_streaming_encoder(
|
||||||
obj.meta.fps, vcodec, encoder_queue_maxsize, encoder_threads
|
obj.meta.fps, camera_encoder_config, encoder_threads, encoder_queue_maxsize
|
||||||
)
|
)
|
||||||
obj.writer = DatasetWriter(
|
obj.writer = DatasetWriter(
|
||||||
meta=obj.meta,
|
meta=obj.meta,
|
||||||
root=obj.root,
|
root=obj.root,
|
||||||
vcodec=vcodec,
|
camera_encoder_config=camera_encoder_config,
|
||||||
encoder_threads=encoder_threads,
|
encoder_threads=encoder_threads,
|
||||||
batch_encoding_size=batch_encoding_size,
|
batch_encoding_size=batch_encoding_size,
|
||||||
streaming_encoder=streaming_enc,
|
streaming_encoder=streaming_enc,
|
||||||
|
|||||||
@@ -49,6 +49,14 @@ Delete episodes and save to a new dataset at a specific path and with a new repo
|
|||||||
--operation.type delete_episodes \
|
--operation.type delete_episodes \
|
||||||
--operation.episode_indices "[0, 2, 5]"
|
--operation.episode_indices "[0, 2, 5]"
|
||||||
|
|
||||||
|
Delete episodes and re-encode video segments with h264:
|
||||||
|
lerobot-edit-dataset \
|
||||||
|
--repo_id lerobot/pusht \
|
||||||
|
--operation.type delete_episodes \
|
||||||
|
--operation.episode_indices "[0, 2, 5]" \
|
||||||
|
--operation.camera_encoder_config.vcodec h264 \
|
||||||
|
--operation.camera_encoder_config.crf 23
|
||||||
|
|
||||||
Split dataset by fractions (pusht_train, pusht_val):
|
Split dataset by fractions (pusht_train, pusht_val):
|
||||||
lerobot-edit-dataset \
|
lerobot-edit-dataset \
|
||||||
--repo_id lerobot/pusht \
|
--repo_id lerobot/pusht \
|
||||||
@@ -74,6 +82,14 @@ Split into more than two splits:
|
|||||||
--operation.type split \
|
--operation.type split \
|
||||||
--operation.splits '{"train": 0.6, "val": 0.2, "test": 0.2}'
|
--operation.splits '{"train": 0.6, "val": 0.2, "test": 0.2}'
|
||||||
|
|
||||||
|
Split dataset and re-encode video segments with h264:
|
||||||
|
lerobot-edit-dataset \
|
||||||
|
--repo_id lerobot/pusht \
|
||||||
|
--operation.type split \
|
||||||
|
--operation.splits '{"train": 0.8, "val": 0.2}' \
|
||||||
|
--operation.camera_encoder_config.vcodec h264 \
|
||||||
|
--operation.camera_encoder_config.crf 23
|
||||||
|
|
||||||
Merge multiple datasets:
|
Merge multiple datasets:
|
||||||
lerobot-edit-dataset \
|
lerobot-edit-dataset \
|
||||||
--new_repo_id lerobot/pusht_merged \
|
--new_repo_id lerobot/pusht_merged \
|
||||||
@@ -187,7 +203,7 @@ import abc
|
|||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass, field
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import draccus
|
import draccus
|
||||||
@@ -195,6 +211,8 @@ import draccus
|
|||||||
from lerobot.configs import parser
|
from lerobot.configs import parser
|
||||||
from lerobot.datasets import (
|
from lerobot.datasets import (
|
||||||
LeRobotDataset,
|
LeRobotDataset,
|
||||||
|
VideoEncoderConfig,
|
||||||
|
camera_encoder_defaults,
|
||||||
convert_image_to_video_dataset,
|
convert_image_to_video_dataset,
|
||||||
delete_episodes,
|
delete_episodes,
|
||||||
merge_datasets,
|
merge_datasets,
|
||||||
@@ -218,12 +236,14 @@ class OperationConfig(draccus.ChoiceRegistry, abc.ABC):
|
|||||||
@dataclass
|
@dataclass
|
||||||
class DeleteEpisodesConfig(OperationConfig):
|
class DeleteEpisodesConfig(OperationConfig):
|
||||||
episode_indices: list[int] | None = None
|
episode_indices: list[int] | None = None
|
||||||
|
camera_encoder_config: VideoEncoderConfig = field(default_factory=camera_encoder_defaults)
|
||||||
|
|
||||||
|
|
||||||
@OperationConfig.register_subclass("split")
|
@OperationConfig.register_subclass("split")
|
||||||
@dataclass
|
@dataclass
|
||||||
class SplitConfig(OperationConfig):
|
class SplitConfig(OperationConfig):
|
||||||
splits: dict[str, float | list[int]] | None = None
|
splits: dict[str, float | list[int]] | None = None
|
||||||
|
camera_encoder_config: VideoEncoderConfig = field(default_factory=camera_encoder_defaults)
|
||||||
|
|
||||||
|
|
||||||
@OperationConfig.register_subclass("merge")
|
@OperationConfig.register_subclass("merge")
|
||||||
@@ -250,11 +270,7 @@ class ModifyTasksConfig(OperationConfig):
|
|||||||
@dataclass
|
@dataclass
|
||||||
class ConvertImageToVideoConfig(OperationConfig):
|
class ConvertImageToVideoConfig(OperationConfig):
|
||||||
output_dir: str | None = None
|
output_dir: str | None = None
|
||||||
vcodec: str = "libsvtav1"
|
camera_encoder_config: VideoEncoderConfig = field(default_factory=camera_encoder_defaults)
|
||||||
pix_fmt: str = "yuv420p"
|
|
||||||
g: int = 2
|
|
||||||
crf: int = 30
|
|
||||||
fast_decode: int = 0
|
|
||||||
episode_indices: list[int] | None = None
|
episode_indices: list[int] | None = None
|
||||||
num_workers: int = 4
|
num_workers: int = 4
|
||||||
max_episodes_per_batch: int | None = None
|
max_episodes_per_batch: int | None = None
|
||||||
@@ -356,6 +372,7 @@ def handle_delete_episodes(cfg: EditDatasetConfig) -> None:
|
|||||||
episode_indices=cfg.operation.episode_indices,
|
episode_indices=cfg.operation.episode_indices,
|
||||||
output_dir=output_dir,
|
output_dir=output_dir,
|
||||||
repo_id=output_repo_id,
|
repo_id=output_repo_id,
|
||||||
|
camera_encoder_config=cfg.operation.camera_encoder_config,
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.info(f"Dataset saved to {output_dir}")
|
logging.info(f"Dataset saved to {output_dir}")
|
||||||
@@ -387,6 +404,7 @@ def handle_split(cfg: EditDatasetConfig) -> None:
|
|||||||
dataset,
|
dataset,
|
||||||
splits=cfg.operation.splits,
|
splits=cfg.operation.splits,
|
||||||
output_dir=cfg.new_root,
|
output_dir=cfg.new_root,
|
||||||
|
camera_encoder_config=cfg.operation.camera_encoder_config,
|
||||||
)
|
)
|
||||||
|
|
||||||
for split_name, split_ds in split_datasets.items():
|
for split_name, split_ds in split_datasets.items():
|
||||||
@@ -557,11 +575,8 @@ def handle_convert_image_to_video(cfg: EditDatasetConfig) -> None:
|
|||||||
dataset=dataset,
|
dataset=dataset,
|
||||||
output_dir=output_dir,
|
output_dir=output_dir,
|
||||||
repo_id=output_repo_id,
|
repo_id=output_repo_id,
|
||||||
vcodec=getattr(cfg.operation, "vcodec", "libsvtav1"),
|
camera_encoder_config=getattr(cfg.operation, "camera_encoder_config", None)
|
||||||
pix_fmt=getattr(cfg.operation, "pix_fmt", "yuv420p"),
|
or camera_encoder_defaults(),
|
||||||
g=getattr(cfg.operation, "g", 2),
|
|
||||||
crf=getattr(cfg.operation, "crf", 30),
|
|
||||||
fast_decode=getattr(cfg.operation, "fast_decode", 0),
|
|
||||||
episode_indices=getattr(cfg.operation, "episode_indices", None),
|
episode_indices=getattr(cfg.operation, "episode_indices", None),
|
||||||
num_workers=getattr(cfg.operation, "num_workers", 4),
|
num_workers=getattr(cfg.operation, "num_workers", 4),
|
||||||
max_episodes_per_batch=getattr(cfg.operation, "max_episodes_per_batch", None),
|
max_episodes_per_batch=getattr(cfg.operation, "max_episodes_per_batch", None),
|
||||||
|
|||||||
@@ -63,6 +63,27 @@ lerobot-record \\
|
|||||||
--dataset.streaming_encoding=true \\
|
--dataset.streaming_encoding=true \\
|
||||||
--dataset.encoder_threads=2
|
--dataset.encoder_threads=2
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Example recording with custom video encoding parameters:
|
||||||
|
```shell
|
||||||
|
lerobot-record \\
|
||||||
|
--robot.type=so100_follower \\
|
||||||
|
--robot.port=/dev/tty.usbmodem58760431541 \\
|
||||||
|
--robot.cameras="{laptop: {type: opencv, index_or_path: 0, width: 640, height: 480, fps: 30}}" \\
|
||||||
|
--robot.id=black \\
|
||||||
|
--teleop.type=so100_leader \\
|
||||||
|
--teleop.port=/dev/tty.usbmodem58760431551 \\
|
||||||
|
--teleop.id=blue \\
|
||||||
|
--dataset.repo_id=<my_username>/<my_dataset_name> \\
|
||||||
|
--dataset.num_episodes=2 \\
|
||||||
|
--dataset.single_task="Grab the cube" \\
|
||||||
|
--dataset.streaming_encoding=true \\
|
||||||
|
--dataset.encoder_threads=2 \\
|
||||||
|
--dataset.camera_encoder_config.vcodec=h264 \\
|
||||||
|
--dataset.camera_encoder_config.preset=fast \\
|
||||||
|
--dataset.camera_encoder_config.extra_options={"tune": "film", "profile:v": "high", "bf": 2} \\
|
||||||
|
--display_data=true
|
||||||
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
@@ -84,8 +105,10 @@ from lerobot.configs import parser
|
|||||||
from lerobot.configs.dataset import DatasetRecordConfig
|
from lerobot.configs.dataset import DatasetRecordConfig
|
||||||
from lerobot.datasets import (
|
from lerobot.datasets import (
|
||||||
LeRobotDataset,
|
LeRobotDataset,
|
||||||
|
VideoEncoderConfig,
|
||||||
VideoEncodingManager,
|
VideoEncodingManager,
|
||||||
aggregate_pipeline_dataset_features,
|
aggregate_pipeline_dataset_features,
|
||||||
|
camera_encoder_defaults,
|
||||||
create_initial_features,
|
create_initial_features,
|
||||||
safe_stop_image_writer,
|
safe_stop_image_writer,
|
||||||
)
|
)
|
||||||
@@ -377,10 +400,10 @@ def record(
|
|||||||
cfg.dataset.repo_id,
|
cfg.dataset.repo_id,
|
||||||
root=cfg.dataset.root,
|
root=cfg.dataset.root,
|
||||||
batch_encoding_size=cfg.dataset.video_encoding_batch_size,
|
batch_encoding_size=cfg.dataset.video_encoding_batch_size,
|
||||||
vcodec=cfg.dataset.vcodec,
|
camera_encoder_config=cfg.dataset.camera_encoder_config,
|
||||||
|
encoder_threads=cfg.dataset.encoder_threads,
|
||||||
streaming_encoding=cfg.dataset.streaming_encoding,
|
streaming_encoding=cfg.dataset.streaming_encoding,
|
||||||
encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize,
|
encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize,
|
||||||
encoder_threads=cfg.dataset.encoder_threads,
|
|
||||||
image_writer_processes=cfg.dataset.num_image_writer_processes if num_cameras > 0 else 0,
|
image_writer_processes=cfg.dataset.num_image_writer_processes if num_cameras > 0 else 0,
|
||||||
image_writer_threads=cfg.dataset.num_image_writer_threads_per_camera * num_cameras
|
image_writer_threads=cfg.dataset.num_image_writer_threads_per_camera * num_cameras
|
||||||
if num_cameras > 0
|
if num_cameras > 0
|
||||||
@@ -406,10 +429,10 @@ def record(
|
|||||||
image_writer_processes=cfg.dataset.num_image_writer_processes,
|
image_writer_processes=cfg.dataset.num_image_writer_processes,
|
||||||
image_writer_threads=cfg.dataset.num_image_writer_threads_per_camera * len(robot.cameras),
|
image_writer_threads=cfg.dataset.num_image_writer_threads_per_camera * len(robot.cameras),
|
||||||
batch_encoding_size=cfg.dataset.video_encoding_batch_size,
|
batch_encoding_size=cfg.dataset.video_encoding_batch_size,
|
||||||
vcodec=cfg.dataset.vcodec,
|
camera_encoder_config=cfg.dataset.camera_encoder_config,
|
||||||
|
encoder_threads=cfg.dataset.encoder_threads,
|
||||||
streaming_encoding=cfg.dataset.streaming_encoding,
|
streaming_encoding=cfg.dataset.streaming_encoding,
|
||||||
encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize,
|
encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize,
|
||||||
encoder_threads=cfg.dataset.encoder_threads,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
robot.connect()
|
robot.connect()
|
||||||
@@ -420,7 +443,7 @@ def record(
|
|||||||
|
|
||||||
if not cfg.dataset.streaming_encoding:
|
if not cfg.dataset.streaming_encoding:
|
||||||
logging.info(
|
logging.info(
|
||||||
"Streaming encoding is disabled. If you have capable hardware, consider enabling it for way faster episode saving. --dataset.streaming_encoding=true --dataset.encoder_threads=2 # --dataset.vcodec=auto. More info in the documentation: https://huggingface.co/docs/lerobot/streaming_video_encoding"
|
"Streaming encoding is disabled. If you have capable hardware, consider enabling it for way faster episode saving. --dataset.streaming_encoding=true --dataset.encoder_threads=2 # --dataset.camera_encoder_config.vcodec=auto. More info in the documentation: https://huggingface.co/docs/lerobot/streaming_video_encoding"
|
||||||
)
|
)
|
||||||
|
|
||||||
with VideoEncodingManager(dataset):
|
with VideoEncodingManager(dataset):
|
||||||
|
|||||||
Reference in New Issue
Block a user