mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-22 12:09:42 +00:00
fix(re-encoding): removing inconsistent re-encoding option in lerobot_edit_dataset
This commit is contained in:
@@ -16,6 +16,8 @@
|
||||
|
||||
"""Video encoder configurations."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
@@ -35,6 +37,9 @@ HW_VIDEO_CODECS = [
|
||||
"h264_qsv", # Intel Quick Sync
|
||||
]
|
||||
VALID_VIDEO_CODECS: frozenset[str] = frozenset({"h264", "hevc", "libsvtav1", "auto", *HW_VIDEO_CODECS})
|
||||
# Aliases for legacy video codec names.
|
||||
VIDEO_CODECS_ALIASES: dict[str, str] = {"av1": "libsvtav1"}
|
||||
|
||||
|
||||
LIBSVTAV1_DEFAULT_PRESET: int = 12
|
||||
|
||||
@@ -88,6 +93,30 @@ class VideoEncoderConfig:
|
||||
self.preset = LIBSVTAV1_DEFAULT_PRESET
|
||||
self.validate()
|
||||
|
||||
@classmethod
|
||||
def from_video_info(cls, video_info: dict | None) -> VideoEncoderConfig:
|
||||
"""Reconstruct a :class:`VideoEncoderConfig` from a video feature's ``info`` block.
|
||||
Missing or ``None`` values fall back to the class defaults.
|
||||
"""
|
||||
video_info = video_info or {}
|
||||
kwargs: dict[str, Any] = {}
|
||||
|
||||
for src_key, dst_field in (("video.codec", "vcodec"), ("video.pix_fmt", "pix_fmt")):
|
||||
value = video_info.get(src_key)
|
||||
if value is not None:
|
||||
kwargs[dst_field] = value
|
||||
|
||||
for field_name in VIDEO_ENCODER_INFO_FIELD_NAMES:
|
||||
value = video_info.get(f"video.{field_name}")
|
||||
if value is None:
|
||||
continue
|
||||
# Persisted as ``{}`` after merges with disagreeing sources — treat as default.
|
||||
if field_name == "extra_options" and not value:
|
||||
continue
|
||||
kwargs[field_name] = value
|
||||
|
||||
return cls(**kwargs)
|
||||
|
||||
def detect_available_encoders(self, encoders: list[str] | str) -> list[str]:
|
||||
"""Return the subset of available encoders based on the specified video backend.
|
||||
|
||||
@@ -116,7 +145,11 @@ class VideoEncoderConfig:
|
||||
|
||||
For ``"auto"``, the first hardware encoder in the preference list that is available is chosen; if none are available, ``libsvtav1`` is used. If the
|
||||
resolved codec (explicit or after auto-selection) is not available, raises ``ValueError``.
|
||||
|
||||
Stream-derived canonical codec names listed in :data:`VIDEO_CODEC_ALIASES` are
|
||||
rewritten to their corresponding encoder name (e.g. ``"av1"`` → ``"libsvtav1"``).
|
||||
"""
|
||||
self.vcodec = VIDEO_CODECS_ALIASES.get(self.vcodec, self.vcodec)
|
||||
if self.vcodec not in VALID_VIDEO_CODECS:
|
||||
raise ValueError(f"Invalid vcodec '{self.vcodec}'. Must be one of: {sorted(VALID_VIDEO_CODECS)}")
|
||||
if self.vcodec == "auto":
|
||||
|
||||
@@ -96,17 +96,19 @@ def delete_episodes(
|
||||
episode_indices: list[int],
|
||||
output_dir: str | Path | None = None,
|
||||
repo_id: str | None = None,
|
||||
camera_encoder: VideoEncoderConfig | None = None,
|
||||
) -> LeRobotDataset:
|
||||
"""Delete episodes from a LeRobotDataset and create a new dataset.
|
||||
|
||||
Video segments that need re-encoding (because the source file mixes kept and
|
||||
deleted episodes) are re-encoded with the source dataset's existing encoder
|
||||
settings — read back from ``meta/info.json`` — so the output dataset stays
|
||||
consistent with its own metadata.
|
||||
|
||||
Args:
|
||||
dataset: The source LeRobotDataset.
|
||||
episode_indices: List of episode indices to delete.
|
||||
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
|
||||
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
|
||||
camera_encoder: Video encoder settings used when re-encoding video segments
|
||||
(``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`).
|
||||
"""
|
||||
if not episode_indices:
|
||||
raise ValueError("No episodes to delete")
|
||||
@@ -139,7 +141,7 @@ def delete_episodes(
|
||||
|
||||
video_metadata = None
|
||||
if dataset.meta.video_keys:
|
||||
video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping, camera_encoder)
|
||||
video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping)
|
||||
|
||||
data_metadata = _copy_and_reindex_data(dataset, new_meta, episode_mapping)
|
||||
|
||||
@@ -161,17 +163,19 @@ def split_dataset(
|
||||
dataset: LeRobotDataset,
|
||||
splits: dict[str, float | list[int]],
|
||||
output_dir: str | Path | None = None,
|
||||
camera_encoder: VideoEncoderConfig | None = None,
|
||||
) -> dict[str, LeRobotDataset]:
|
||||
"""Split a LeRobotDataset into multiple smaller datasets.
|
||||
|
||||
Video segments that need re-encoding (because the source file mixes episodes
|
||||
that fall into different splits) are re-encoded with the source dataset's
|
||||
existing encoder settings — read back from ``meta/info.json`` — so each
|
||||
output split stays consistent with its own metadata.
|
||||
|
||||
Args:
|
||||
dataset: The source LeRobotDataset to split.
|
||||
splits: Either a dict mapping split names to episode indices, or a dict mapping
|
||||
split names to fractions (must sum to <= 1.0).
|
||||
output_dir: Root directory where the split datasets will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id.
|
||||
camera_encoder: Video encoder settings used when re-encoding video segments
|
||||
(``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`).
|
||||
|
||||
Examples:
|
||||
Split by specific episodes
|
||||
@@ -232,7 +236,7 @@ def split_dataset(
|
||||
|
||||
video_metadata = None
|
||||
if dataset.meta.video_keys:
|
||||
video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping, camera_encoder)
|
||||
video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping)
|
||||
|
||||
data_metadata = _copy_and_reindex_data(dataset, new_meta, episode_mapping)
|
||||
|
||||
@@ -588,7 +592,7 @@ def _keep_episodes_from_video_with_av(
|
||||
output_path: Path,
|
||||
episodes_to_keep: list[tuple[int, int]],
|
||||
fps: float,
|
||||
camera_encoder: VideoEncoderConfig | None = None,
|
||||
camera_encoder: VideoEncoderConfig,
|
||||
) -> None:
|
||||
"""Keep only specified episodes from a video file using PyAV.
|
||||
|
||||
@@ -602,11 +606,8 @@ def _keep_episodes_from_video_with_av(
|
||||
Ranges are half-open intervals: [start_frame, end_frame), where start_frame
|
||||
is inclusive and end_frame is exclusive.
|
||||
fps: Frame rate of the video.
|
||||
camera_encoder: Video encoder settings
|
||||
(``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`).
|
||||
camera_encoder: Video encoder settings used to re-encode the kept frames.
|
||||
"""
|
||||
if camera_encoder is None:
|
||||
camera_encoder = camera_encoder_defaults()
|
||||
from fractions import Fraction
|
||||
|
||||
import av
|
||||
@@ -699,26 +700,23 @@ def _copy_and_reindex_videos(
|
||||
src_dataset: LeRobotDataset,
|
||||
dst_meta: LeRobotDatasetMetadata,
|
||||
episode_mapping: dict[int, int],
|
||||
camera_encoder: VideoEncoderConfig | None = None,
|
||||
) -> dict[int, dict]:
|
||||
"""Copy and filter video files, only re-encoding files with deleted episodes.
|
||||
|
||||
For video files that only contain kept episodes, we copy them directly.
|
||||
For files with mixed kept/deleted episodes, we use PyAV filters to efficiently
|
||||
re-encode only the desired segments.
|
||||
re-encode only the desired segments. The encoder used for re-encoding is
|
||||
derived per video key from the source dataset's ``meta/info.json`` so the
|
||||
destination metadata keeps describing the videos accurately.
|
||||
|
||||
Args:
|
||||
src_dataset: Source dataset to copy from
|
||||
dst_meta: Destination metadata object
|
||||
episode_mapping: Mapping from old episode indices to new indices
|
||||
camera_encoder: Video encoder settings used when re-encoding segments
|
||||
(``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`).
|
||||
|
||||
Returns:
|
||||
dict mapping episode index to its video metadata (chunk_index, file_index, timestamps)
|
||||
"""
|
||||
if camera_encoder is None:
|
||||
camera_encoder = camera_encoder_defaults()
|
||||
if src_dataset.meta.episodes is None:
|
||||
src_dataset.meta.episodes = load_episodes(src_dataset.meta.root)
|
||||
|
||||
@@ -726,6 +724,9 @@ def _copy_and_reindex_videos(
|
||||
|
||||
for video_key in src_dataset.meta.video_keys:
|
||||
logging.info(f"Processing videos for {video_key}")
|
||||
camera_encoder = VideoEncoderConfig.from_video_info(
|
||||
src_dataset.meta.info.features.get(video_key, {}).get("info")
|
||||
)
|
||||
|
||||
if dst_meta.video_path is None:
|
||||
raise ValueError("Destination metadata has no video_path defined")
|
||||
|
||||
@@ -49,14 +49,6 @@ Delete episodes and save to a new dataset at a specific path and with a new repo
|
||||
--operation.type delete_episodes \
|
||||
--operation.episode_indices "[0, 2, 5]"
|
||||
|
||||
Delete episodes and re-encode video segments with h264:
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht \
|
||||
--operation.type delete_episodes \
|
||||
--operation.episode_indices "[0, 2, 5]" \
|
||||
--operation.camera_encoder.vcodec h264 \
|
||||
--operation.camera_encoder.crf 23
|
||||
|
||||
Split dataset by fractions (pusht_train, pusht_val):
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht \
|
||||
@@ -82,14 +74,6 @@ Split into more than two splits:
|
||||
--operation.type split \
|
||||
--operation.splits '{"train": 0.6, "val": 0.2, "test": 0.2}'
|
||||
|
||||
Split dataset and re-encode video segments with h264:
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht \
|
||||
--operation.type split \
|
||||
--operation.splits '{"train": 0.8, "val": 0.2}' \
|
||||
--operation.camera_encoder.vcodec h264 \
|
||||
--operation.camera_encoder.crf 23
|
||||
|
||||
Merge multiple datasets:
|
||||
lerobot-edit-dataset \
|
||||
--new_repo_id lerobot/pusht_merged \
|
||||
@@ -234,14 +218,12 @@ class OperationConfig(draccus.ChoiceRegistry, abc.ABC):
|
||||
@dataclass
|
||||
class DeleteEpisodesConfig(OperationConfig):
|
||||
episode_indices: list[int] | None = None
|
||||
camera_encoder: VideoEncoderConfig = field(default_factory=camera_encoder_defaults)
|
||||
|
||||
|
||||
@OperationConfig.register_subclass("split")
|
||||
@dataclass
|
||||
class SplitConfig(OperationConfig):
|
||||
splits: dict[str, float | list[int]] | None = None
|
||||
camera_encoder: VideoEncoderConfig = field(default_factory=camera_encoder_defaults)
|
||||
|
||||
|
||||
@OperationConfig.register_subclass("merge")
|
||||
@@ -370,7 +352,6 @@ def handle_delete_episodes(cfg: EditDatasetConfig) -> None:
|
||||
episode_indices=cfg.operation.episode_indices,
|
||||
output_dir=output_dir,
|
||||
repo_id=output_repo_id,
|
||||
camera_encoder=cfg.operation.camera_encoder,
|
||||
)
|
||||
|
||||
logging.info(f"Dataset saved to {output_dir}")
|
||||
@@ -402,7 +383,6 @@ def handle_split(cfg: EditDatasetConfig) -> None:
|
||||
dataset,
|
||||
splits=cfg.operation.splits,
|
||||
output_dir=cfg.new_root,
|
||||
camera_encoder=cfg.operation.camera_encoder,
|
||||
)
|
||||
|
||||
for split_name, split_ds in split_datasets.items():
|
||||
|
||||
@@ -36,6 +36,7 @@ from lerobot.datasets.video_utils import (
|
||||
encode_video_frames,
|
||||
get_video_info,
|
||||
)
|
||||
from tests.fixtures.constants import DUMMY_VIDEO_INFO
|
||||
|
||||
|
||||
# Per-codec skip markers — validation tests only fire when the codec is available
|
||||
@@ -570,3 +571,25 @@ class TestEncoderConfigPersistence:
|
||||
dataset.finalize()
|
||||
|
||||
assert _read_feature_info(dataset) == first_info
|
||||
|
||||
|
||||
class TestFromVideoInfo:
|
||||
"""``VideoEncoderConfig.from_video_info`` reconstructs an encoder config
|
||||
from the ``video.*`` keys persisted in a dataset's ``info.json``.
|
||||
"""
|
||||
|
||||
@require_libsvtav1
|
||||
def test_reconstructs_from_dummy_video_info(self):
|
||||
cfg = VideoEncoderConfig.from_video_info(DUMMY_VIDEO_INFO)
|
||||
|
||||
# Canonical stream codec ``"av1"`` is aliased to the encoder name.
|
||||
assert cfg.vcodec == "libsvtav1"
|
||||
assert cfg.pix_fmt == DUMMY_VIDEO_INFO["video.pix_fmt"]
|
||||
assert cfg.g == DUMMY_VIDEO_INFO["video.g"]
|
||||
assert cfg.crf == DUMMY_VIDEO_INFO["video.crf"]
|
||||
assert cfg.preset == DUMMY_VIDEO_INFO["video.preset"]
|
||||
assert cfg.fast_decode == DUMMY_VIDEO_INFO["video.fast_decode"]
|
||||
assert cfg.video_backend == DUMMY_VIDEO_INFO["video.video_backend"]
|
||||
# ``{}`` placeholder (typical after a merge with disagreeing sources)
|
||||
# must not leak into the reconstructed config.
|
||||
assert cfg.extra_options == VideoEncoderConfig().extra_options
|
||||
|
||||
Reference in New Issue
Block a user