fix(re-encoding): removing inconsistent re-encoding option in lerobot_edit_dataset

This commit is contained in:
CarolinePascal
2026-05-13 16:22:09 +02:00
parent f0830972f0
commit cf6f92ac74
4 changed files with 76 additions and 39 deletions
+33
View File
@@ -16,6 +16,8 @@
"""Video encoder configurations."""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from typing import Any
@@ -35,6 +37,9 @@ HW_VIDEO_CODECS = [
"h264_qsv", # Intel Quick Sync
]
VALID_VIDEO_CODECS: frozenset[str] = frozenset({"h264", "hevc", "libsvtav1", "auto", *HW_VIDEO_CODECS})
# Aliases for legacy video codec names.
VIDEO_CODECS_ALIASES: dict[str, str] = {"av1": "libsvtav1"}
LIBSVTAV1_DEFAULT_PRESET: int = 12
@@ -88,6 +93,30 @@ class VideoEncoderConfig:
self.preset = LIBSVTAV1_DEFAULT_PRESET
self.validate()
@classmethod
def from_video_info(cls, video_info: dict | None) -> VideoEncoderConfig:
"""Reconstruct a :class:`VideoEncoderConfig` from a video feature's ``info`` block.
Missing or ``None`` values fall back to the class defaults.
"""
video_info = video_info or {}
kwargs: dict[str, Any] = {}
for src_key, dst_field in (("video.codec", "vcodec"), ("video.pix_fmt", "pix_fmt")):
value = video_info.get(src_key)
if value is not None:
kwargs[dst_field] = value
for field_name in VIDEO_ENCODER_INFO_FIELD_NAMES:
value = video_info.get(f"video.{field_name}")
if value is None:
continue
# Persisted as ``{}`` after merges with disagreeing sources — treat as default.
if field_name == "extra_options" and not value:
continue
kwargs[field_name] = value
return cls(**kwargs)
def detect_available_encoders(self, encoders: list[str] | str) -> list[str]:
"""Return the subset of available encoders based on the specified video backend.
@@ -116,7 +145,11 @@ class VideoEncoderConfig:
For ``"auto"``, the first hardware encoder in the preference list that is available is chosen; if none are available, ``libsvtav1`` is used. If the
resolved codec (explicit or after auto-selection) is not available, raises ``ValueError``.
Stream-derived canonical codec names listed in :data:`VIDEO_CODEC_ALIASES` are
rewritten to their corresponding encoder name (e.g. ``"av1"`` → ``"libsvtav1"``).
"""
self.vcodec = VIDEO_CODECS_ALIASES.get(self.vcodec, self.vcodec)
if self.vcodec not in VALID_VIDEO_CODECS:
raise ValueError(f"Invalid vcodec '{self.vcodec}'. Must be one of: {sorted(VALID_VIDEO_CODECS)}")
if self.vcodec == "auto":
+20 -19
View File
@@ -96,17 +96,19 @@ def delete_episodes(
episode_indices: list[int],
output_dir: str | Path | None = None,
repo_id: str | None = None,
camera_encoder: VideoEncoderConfig | None = None,
) -> LeRobotDataset:
"""Delete episodes from a LeRobotDataset and create a new dataset.
Video segments that need re-encoding (because the source file mixes kept and
deleted episodes) are re-encoded with the source dataset's existing encoder
settings — read back from ``meta/info.json`` — so the output dataset stays
consistent with its own metadata.
Args:
dataset: The source LeRobotDataset.
episode_indices: List of episode indices to delete.
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
camera_encoder: Video encoder settings used when re-encoding video segments
(``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`).
"""
if not episode_indices:
raise ValueError("No episodes to delete")
@@ -139,7 +141,7 @@ def delete_episodes(
video_metadata = None
if dataset.meta.video_keys:
video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping, camera_encoder)
video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping)
data_metadata = _copy_and_reindex_data(dataset, new_meta, episode_mapping)
@@ -161,17 +163,19 @@ def split_dataset(
dataset: LeRobotDataset,
splits: dict[str, float | list[int]],
output_dir: str | Path | None = None,
camera_encoder: VideoEncoderConfig | None = None,
) -> dict[str, LeRobotDataset]:
"""Split a LeRobotDataset into multiple smaller datasets.
Video segments that need re-encoding (because the source file mixes episodes
that fall into different splits) are re-encoded with the source dataset's
existing encoder settings — read back from ``meta/info.json`` — so each
output split stays consistent with its own metadata.
Args:
dataset: The source LeRobotDataset to split.
splits: Either a dict mapping split names to episode indices, or a dict mapping
split names to fractions (must sum to <= 1.0).
output_dir: Root directory where the split datasets will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id.
camera_encoder: Video encoder settings used when re-encoding video segments
(``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`).
Examples:
Split by specific episodes
@@ -232,7 +236,7 @@ def split_dataset(
video_metadata = None
if dataset.meta.video_keys:
video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping, camera_encoder)
video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping)
data_metadata = _copy_and_reindex_data(dataset, new_meta, episode_mapping)
@@ -588,7 +592,7 @@ def _keep_episodes_from_video_with_av(
output_path: Path,
episodes_to_keep: list[tuple[int, int]],
fps: float,
camera_encoder: VideoEncoderConfig | None = None,
camera_encoder: VideoEncoderConfig,
) -> None:
"""Keep only specified episodes from a video file using PyAV.
@@ -602,11 +606,8 @@ def _keep_episodes_from_video_with_av(
Ranges are half-open intervals: [start_frame, end_frame), where start_frame
is inclusive and end_frame is exclusive.
fps: Frame rate of the video.
camera_encoder: Video encoder settings
(``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`).
camera_encoder: Video encoder settings used to re-encode the kept frames.
"""
if camera_encoder is None:
camera_encoder = camera_encoder_defaults()
from fractions import Fraction
import av
@@ -699,26 +700,23 @@ def _copy_and_reindex_videos(
src_dataset: LeRobotDataset,
dst_meta: LeRobotDatasetMetadata,
episode_mapping: dict[int, int],
camera_encoder: VideoEncoderConfig | None = None,
) -> dict[int, dict]:
"""Copy and filter video files, only re-encoding files with deleted episodes.
For video files that only contain kept episodes, we copy them directly.
For files with mixed kept/deleted episodes, we use PyAV filters to efficiently
re-encode only the desired segments.
re-encode only the desired segments. The encoder used for re-encoding is
derived per video key from the source dataset's ``meta/info.json`` so the
destination metadata keeps describing the videos accurately.
Args:
src_dataset: Source dataset to copy from
dst_meta: Destination metadata object
episode_mapping: Mapping from old episode indices to new indices
camera_encoder: Video encoder settings used when re-encoding segments
(``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`).
Returns:
dict mapping episode index to its video metadata (chunk_index, file_index, timestamps)
"""
if camera_encoder is None:
camera_encoder = camera_encoder_defaults()
if src_dataset.meta.episodes is None:
src_dataset.meta.episodes = load_episodes(src_dataset.meta.root)
@@ -726,6 +724,9 @@ def _copy_and_reindex_videos(
for video_key in src_dataset.meta.video_keys:
logging.info(f"Processing videos for {video_key}")
camera_encoder = VideoEncoderConfig.from_video_info(
src_dataset.meta.info.features.get(video_key, {}).get("info")
)
if dst_meta.video_path is None:
raise ValueError("Destination metadata has no video_path defined")
@@ -49,14 +49,6 @@ Delete episodes and save to a new dataset at a specific path and with a new repo
--operation.type delete_episodes \
--operation.episode_indices "[0, 2, 5]"
Delete episodes and re-encode video segments with h264:
lerobot-edit-dataset \
--repo_id lerobot/pusht \
--operation.type delete_episodes \
--operation.episode_indices "[0, 2, 5]" \
--operation.camera_encoder.vcodec h264 \
--operation.camera_encoder.crf 23
Split dataset by fractions (pusht_train, pusht_val):
lerobot-edit-dataset \
--repo_id lerobot/pusht \
@@ -82,14 +74,6 @@ Split into more than two splits:
--operation.type split \
--operation.splits '{"train": 0.6, "val": 0.2, "test": 0.2}'
Split dataset and re-encode video segments with h264:
lerobot-edit-dataset \
--repo_id lerobot/pusht \
--operation.type split \
--operation.splits '{"train": 0.8, "val": 0.2}' \
--operation.camera_encoder.vcodec h264 \
--operation.camera_encoder.crf 23
Merge multiple datasets:
lerobot-edit-dataset \
--new_repo_id lerobot/pusht_merged \
@@ -234,14 +218,12 @@ class OperationConfig(draccus.ChoiceRegistry, abc.ABC):
@dataclass
class DeleteEpisodesConfig(OperationConfig):
episode_indices: list[int] | None = None
camera_encoder: VideoEncoderConfig = field(default_factory=camera_encoder_defaults)
@OperationConfig.register_subclass("split")
@dataclass
class SplitConfig(OperationConfig):
splits: dict[str, float | list[int]] | None = None
camera_encoder: VideoEncoderConfig = field(default_factory=camera_encoder_defaults)
@OperationConfig.register_subclass("merge")
@@ -370,7 +352,6 @@ def handle_delete_episodes(cfg: EditDatasetConfig) -> None:
episode_indices=cfg.operation.episode_indices,
output_dir=output_dir,
repo_id=output_repo_id,
camera_encoder=cfg.operation.camera_encoder,
)
logging.info(f"Dataset saved to {output_dir}")
@@ -402,7 +383,6 @@ def handle_split(cfg: EditDatasetConfig) -> None:
dataset,
splits=cfg.operation.splits,
output_dir=cfg.new_root,
camera_encoder=cfg.operation.camera_encoder,
)
for split_name, split_ds in split_datasets.items():
+23
View File
@@ -36,6 +36,7 @@ from lerobot.datasets.video_utils import (
encode_video_frames,
get_video_info,
)
from tests.fixtures.constants import DUMMY_VIDEO_INFO
# Per-codec skip markers — validation tests only fire when the codec is available
@@ -570,3 +571,25 @@ class TestEncoderConfigPersistence:
dataset.finalize()
assert _read_feature_info(dataset) == first_info
class TestFromVideoInfo:
"""``VideoEncoderConfig.from_video_info`` reconstructs an encoder config
from the ``video.*`` keys persisted in a dataset's ``info.json``.
"""
@require_libsvtav1
def test_reconstructs_from_dummy_video_info(self):
cfg = VideoEncoderConfig.from_video_info(DUMMY_VIDEO_INFO)
# Canonical stream codec ``"av1"`` is aliased to the encoder name.
assert cfg.vcodec == "libsvtav1"
assert cfg.pix_fmt == DUMMY_VIDEO_INFO["video.pix_fmt"]
assert cfg.g == DUMMY_VIDEO_INFO["video.g"]
assert cfg.crf == DUMMY_VIDEO_INFO["video.crf"]
assert cfg.preset == DUMMY_VIDEO_INFO["video.preset"]
assert cfg.fast_decode == DUMMY_VIDEO_INFO["video.fast_decode"]
assert cfg.video_backend == DUMMY_VIDEO_INFO["video.video_backend"]
# ``{}`` placeholder (typical after a merge with disagreeing sources)
# must not leak into the reconstructed config.
assert cfg.extra_options == VideoEncoderConfig().extra_options