mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-23 04:30:10 +00:00
fix(re-encoding): removing inconsistent re-encoding option in lerobot_edit_dataset
This commit is contained in:
@@ -16,6 +16,8 @@
|
|||||||
|
|
||||||
"""Video encoder configurations."""
|
"""Video encoder configurations."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any
|
from typing import Any
|
||||||
@@ -35,6 +37,9 @@ HW_VIDEO_CODECS = [
|
|||||||
"h264_qsv", # Intel Quick Sync
|
"h264_qsv", # Intel Quick Sync
|
||||||
]
|
]
|
||||||
VALID_VIDEO_CODECS: frozenset[str] = frozenset({"h264", "hevc", "libsvtav1", "auto", *HW_VIDEO_CODECS})
|
VALID_VIDEO_CODECS: frozenset[str] = frozenset({"h264", "hevc", "libsvtav1", "auto", *HW_VIDEO_CODECS})
|
||||||
|
# Aliases for legacy video codec names.
|
||||||
|
VIDEO_CODECS_ALIASES: dict[str, str] = {"av1": "libsvtav1"}
|
||||||
|
|
||||||
|
|
||||||
LIBSVTAV1_DEFAULT_PRESET: int = 12
|
LIBSVTAV1_DEFAULT_PRESET: int = 12
|
||||||
|
|
||||||
@@ -88,6 +93,30 @@ class VideoEncoderConfig:
|
|||||||
self.preset = LIBSVTAV1_DEFAULT_PRESET
|
self.preset = LIBSVTAV1_DEFAULT_PRESET
|
||||||
self.validate()
|
self.validate()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_video_info(cls, video_info: dict | None) -> VideoEncoderConfig:
|
||||||
|
"""Reconstruct a :class:`VideoEncoderConfig` from a video feature's ``info`` block.
|
||||||
|
Missing or ``None`` values fall back to the class defaults.
|
||||||
|
"""
|
||||||
|
video_info = video_info or {}
|
||||||
|
kwargs: dict[str, Any] = {}
|
||||||
|
|
||||||
|
for src_key, dst_field in (("video.codec", "vcodec"), ("video.pix_fmt", "pix_fmt")):
|
||||||
|
value = video_info.get(src_key)
|
||||||
|
if value is not None:
|
||||||
|
kwargs[dst_field] = value
|
||||||
|
|
||||||
|
for field_name in VIDEO_ENCODER_INFO_FIELD_NAMES:
|
||||||
|
value = video_info.get(f"video.{field_name}")
|
||||||
|
if value is None:
|
||||||
|
continue
|
||||||
|
# Persisted as ``{}`` after merges with disagreeing sources — treat as default.
|
||||||
|
if field_name == "extra_options" and not value:
|
||||||
|
continue
|
||||||
|
kwargs[field_name] = value
|
||||||
|
|
||||||
|
return cls(**kwargs)
|
||||||
|
|
||||||
def detect_available_encoders(self, encoders: list[str] | str) -> list[str]:
|
def detect_available_encoders(self, encoders: list[str] | str) -> list[str]:
|
||||||
"""Return the subset of available encoders based on the specified video backend.
|
"""Return the subset of available encoders based on the specified video backend.
|
||||||
|
|
||||||
@@ -116,7 +145,11 @@ class VideoEncoderConfig:
|
|||||||
|
|
||||||
For ``"auto"``, the first hardware encoder in the preference list that is available is chosen; if none are available, ``libsvtav1`` is used. If the
|
For ``"auto"``, the first hardware encoder in the preference list that is available is chosen; if none are available, ``libsvtav1`` is used. If the
|
||||||
resolved codec (explicit or after auto-selection) is not available, raises ``ValueError``.
|
resolved codec (explicit or after auto-selection) is not available, raises ``ValueError``.
|
||||||
|
|
||||||
|
Stream-derived canonical codec names listed in :data:`VIDEO_CODEC_ALIASES` are
|
||||||
|
rewritten to their corresponding encoder name (e.g. ``"av1"`` → ``"libsvtav1"``).
|
||||||
"""
|
"""
|
||||||
|
self.vcodec = VIDEO_CODECS_ALIASES.get(self.vcodec, self.vcodec)
|
||||||
if self.vcodec not in VALID_VIDEO_CODECS:
|
if self.vcodec not in VALID_VIDEO_CODECS:
|
||||||
raise ValueError(f"Invalid vcodec '{self.vcodec}'. Must be one of: {sorted(VALID_VIDEO_CODECS)}")
|
raise ValueError(f"Invalid vcodec '{self.vcodec}'. Must be one of: {sorted(VALID_VIDEO_CODECS)}")
|
||||||
if self.vcodec == "auto":
|
if self.vcodec == "auto":
|
||||||
|
|||||||
@@ -96,17 +96,19 @@ def delete_episodes(
|
|||||||
episode_indices: list[int],
|
episode_indices: list[int],
|
||||||
output_dir: str | Path | None = None,
|
output_dir: str | Path | None = None,
|
||||||
repo_id: str | None = None,
|
repo_id: str | None = None,
|
||||||
camera_encoder: VideoEncoderConfig | None = None,
|
|
||||||
) -> LeRobotDataset:
|
) -> LeRobotDataset:
|
||||||
"""Delete episodes from a LeRobotDataset and create a new dataset.
|
"""Delete episodes from a LeRobotDataset and create a new dataset.
|
||||||
|
|
||||||
|
Video segments that need re-encoding (because the source file mixes kept and
|
||||||
|
deleted episodes) are re-encoded with the source dataset's existing encoder
|
||||||
|
settings — read back from ``meta/info.json`` — so the output dataset stays
|
||||||
|
consistent with its own metadata.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
dataset: The source LeRobotDataset.
|
dataset: The source LeRobotDataset.
|
||||||
episode_indices: List of episode indices to delete.
|
episode_indices: List of episode indices to delete.
|
||||||
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
|
output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig.
|
||||||
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
|
repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig.
|
||||||
camera_encoder: Video encoder settings used when re-encoding video segments
|
|
||||||
(``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`).
|
|
||||||
"""
|
"""
|
||||||
if not episode_indices:
|
if not episode_indices:
|
||||||
raise ValueError("No episodes to delete")
|
raise ValueError("No episodes to delete")
|
||||||
@@ -139,7 +141,7 @@ def delete_episodes(
|
|||||||
|
|
||||||
video_metadata = None
|
video_metadata = None
|
||||||
if dataset.meta.video_keys:
|
if dataset.meta.video_keys:
|
||||||
video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping, camera_encoder)
|
video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping)
|
||||||
|
|
||||||
data_metadata = _copy_and_reindex_data(dataset, new_meta, episode_mapping)
|
data_metadata = _copy_and_reindex_data(dataset, new_meta, episode_mapping)
|
||||||
|
|
||||||
@@ -161,17 +163,19 @@ def split_dataset(
|
|||||||
dataset: LeRobotDataset,
|
dataset: LeRobotDataset,
|
||||||
splits: dict[str, float | list[int]],
|
splits: dict[str, float | list[int]],
|
||||||
output_dir: str | Path | None = None,
|
output_dir: str | Path | None = None,
|
||||||
camera_encoder: VideoEncoderConfig | None = None,
|
|
||||||
) -> dict[str, LeRobotDataset]:
|
) -> dict[str, LeRobotDataset]:
|
||||||
"""Split a LeRobotDataset into multiple smaller datasets.
|
"""Split a LeRobotDataset into multiple smaller datasets.
|
||||||
|
|
||||||
|
Video segments that need re-encoding (because the source file mixes episodes
|
||||||
|
that fall into different splits) are re-encoded with the source dataset's
|
||||||
|
existing encoder settings — read back from ``meta/info.json`` — so each
|
||||||
|
output split stays consistent with its own metadata.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
dataset: The source LeRobotDataset to split.
|
dataset: The source LeRobotDataset to split.
|
||||||
splits: Either a dict mapping split names to episode indices, or a dict mapping
|
splits: Either a dict mapping split names to episode indices, or a dict mapping
|
||||||
split names to fractions (must sum to <= 1.0).
|
split names to fractions (must sum to <= 1.0).
|
||||||
output_dir: Root directory where the split datasets will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id.
|
output_dir: Root directory where the split datasets will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id.
|
||||||
camera_encoder: Video encoder settings used when re-encoding video segments
|
|
||||||
(``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`).
|
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
Split by specific episodes
|
Split by specific episodes
|
||||||
@@ -232,7 +236,7 @@ def split_dataset(
|
|||||||
|
|
||||||
video_metadata = None
|
video_metadata = None
|
||||||
if dataset.meta.video_keys:
|
if dataset.meta.video_keys:
|
||||||
video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping, camera_encoder)
|
video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping)
|
||||||
|
|
||||||
data_metadata = _copy_and_reindex_data(dataset, new_meta, episode_mapping)
|
data_metadata = _copy_and_reindex_data(dataset, new_meta, episode_mapping)
|
||||||
|
|
||||||
@@ -588,7 +592,7 @@ def _keep_episodes_from_video_with_av(
|
|||||||
output_path: Path,
|
output_path: Path,
|
||||||
episodes_to_keep: list[tuple[int, int]],
|
episodes_to_keep: list[tuple[int, int]],
|
||||||
fps: float,
|
fps: float,
|
||||||
camera_encoder: VideoEncoderConfig | None = None,
|
camera_encoder: VideoEncoderConfig,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Keep only specified episodes from a video file using PyAV.
|
"""Keep only specified episodes from a video file using PyAV.
|
||||||
|
|
||||||
@@ -602,11 +606,8 @@ def _keep_episodes_from_video_with_av(
|
|||||||
Ranges are half-open intervals: [start_frame, end_frame), where start_frame
|
Ranges are half-open intervals: [start_frame, end_frame), where start_frame
|
||||||
is inclusive and end_frame is exclusive.
|
is inclusive and end_frame is exclusive.
|
||||||
fps: Frame rate of the video.
|
fps: Frame rate of the video.
|
||||||
camera_encoder: Video encoder settings
|
camera_encoder: Video encoder settings used to re-encode the kept frames.
|
||||||
(``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`).
|
|
||||||
"""
|
"""
|
||||||
if camera_encoder is None:
|
|
||||||
camera_encoder = camera_encoder_defaults()
|
|
||||||
from fractions import Fraction
|
from fractions import Fraction
|
||||||
|
|
||||||
import av
|
import av
|
||||||
@@ -699,26 +700,23 @@ def _copy_and_reindex_videos(
|
|||||||
src_dataset: LeRobotDataset,
|
src_dataset: LeRobotDataset,
|
||||||
dst_meta: LeRobotDatasetMetadata,
|
dst_meta: LeRobotDatasetMetadata,
|
||||||
episode_mapping: dict[int, int],
|
episode_mapping: dict[int, int],
|
||||||
camera_encoder: VideoEncoderConfig | None = None,
|
|
||||||
) -> dict[int, dict]:
|
) -> dict[int, dict]:
|
||||||
"""Copy and filter video files, only re-encoding files with deleted episodes.
|
"""Copy and filter video files, only re-encoding files with deleted episodes.
|
||||||
|
|
||||||
For video files that only contain kept episodes, we copy them directly.
|
For video files that only contain kept episodes, we copy them directly.
|
||||||
For files with mixed kept/deleted episodes, we use PyAV filters to efficiently
|
For files with mixed kept/deleted episodes, we use PyAV filters to efficiently
|
||||||
re-encode only the desired segments.
|
re-encode only the desired segments. The encoder used for re-encoding is
|
||||||
|
derived per video key from the source dataset's ``meta/info.json`` so the
|
||||||
|
destination metadata keeps describing the videos accurately.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
src_dataset: Source dataset to copy from
|
src_dataset: Source dataset to copy from
|
||||||
dst_meta: Destination metadata object
|
dst_meta: Destination metadata object
|
||||||
episode_mapping: Mapping from old episode indices to new indices
|
episode_mapping: Mapping from old episode indices to new indices
|
||||||
camera_encoder: Video encoder settings used when re-encoding segments
|
|
||||||
(``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`).
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict mapping episode index to its video metadata (chunk_index, file_index, timestamps)
|
dict mapping episode index to its video metadata (chunk_index, file_index, timestamps)
|
||||||
"""
|
"""
|
||||||
if camera_encoder is None:
|
|
||||||
camera_encoder = camera_encoder_defaults()
|
|
||||||
if src_dataset.meta.episodes is None:
|
if src_dataset.meta.episodes is None:
|
||||||
src_dataset.meta.episodes = load_episodes(src_dataset.meta.root)
|
src_dataset.meta.episodes = load_episodes(src_dataset.meta.root)
|
||||||
|
|
||||||
@@ -726,6 +724,9 @@ def _copy_and_reindex_videos(
|
|||||||
|
|
||||||
for video_key in src_dataset.meta.video_keys:
|
for video_key in src_dataset.meta.video_keys:
|
||||||
logging.info(f"Processing videos for {video_key}")
|
logging.info(f"Processing videos for {video_key}")
|
||||||
|
camera_encoder = VideoEncoderConfig.from_video_info(
|
||||||
|
src_dataset.meta.info.features.get(video_key, {}).get("info")
|
||||||
|
)
|
||||||
|
|
||||||
if dst_meta.video_path is None:
|
if dst_meta.video_path is None:
|
||||||
raise ValueError("Destination metadata has no video_path defined")
|
raise ValueError("Destination metadata has no video_path defined")
|
||||||
|
|||||||
@@ -49,14 +49,6 @@ Delete episodes and save to a new dataset at a specific path and with a new repo
|
|||||||
--operation.type delete_episodes \
|
--operation.type delete_episodes \
|
||||||
--operation.episode_indices "[0, 2, 5]"
|
--operation.episode_indices "[0, 2, 5]"
|
||||||
|
|
||||||
Delete episodes and re-encode video segments with h264:
|
|
||||||
lerobot-edit-dataset \
|
|
||||||
--repo_id lerobot/pusht \
|
|
||||||
--operation.type delete_episodes \
|
|
||||||
--operation.episode_indices "[0, 2, 5]" \
|
|
||||||
--operation.camera_encoder.vcodec h264 \
|
|
||||||
--operation.camera_encoder.crf 23
|
|
||||||
|
|
||||||
Split dataset by fractions (pusht_train, pusht_val):
|
Split dataset by fractions (pusht_train, pusht_val):
|
||||||
lerobot-edit-dataset \
|
lerobot-edit-dataset \
|
||||||
--repo_id lerobot/pusht \
|
--repo_id lerobot/pusht \
|
||||||
@@ -82,14 +74,6 @@ Split into more than two splits:
|
|||||||
--operation.type split \
|
--operation.type split \
|
||||||
--operation.splits '{"train": 0.6, "val": 0.2, "test": 0.2}'
|
--operation.splits '{"train": 0.6, "val": 0.2, "test": 0.2}'
|
||||||
|
|
||||||
Split dataset and re-encode video segments with h264:
|
|
||||||
lerobot-edit-dataset \
|
|
||||||
--repo_id lerobot/pusht \
|
|
||||||
--operation.type split \
|
|
||||||
--operation.splits '{"train": 0.8, "val": 0.2}' \
|
|
||||||
--operation.camera_encoder.vcodec h264 \
|
|
||||||
--operation.camera_encoder.crf 23
|
|
||||||
|
|
||||||
Merge multiple datasets:
|
Merge multiple datasets:
|
||||||
lerobot-edit-dataset \
|
lerobot-edit-dataset \
|
||||||
--new_repo_id lerobot/pusht_merged \
|
--new_repo_id lerobot/pusht_merged \
|
||||||
@@ -234,14 +218,12 @@ class OperationConfig(draccus.ChoiceRegistry, abc.ABC):
|
|||||||
@dataclass
|
@dataclass
|
||||||
class DeleteEpisodesConfig(OperationConfig):
|
class DeleteEpisodesConfig(OperationConfig):
|
||||||
episode_indices: list[int] | None = None
|
episode_indices: list[int] | None = None
|
||||||
camera_encoder: VideoEncoderConfig = field(default_factory=camera_encoder_defaults)
|
|
||||||
|
|
||||||
|
|
||||||
@OperationConfig.register_subclass("split")
|
@OperationConfig.register_subclass("split")
|
||||||
@dataclass
|
@dataclass
|
||||||
class SplitConfig(OperationConfig):
|
class SplitConfig(OperationConfig):
|
||||||
splits: dict[str, float | list[int]] | None = None
|
splits: dict[str, float | list[int]] | None = None
|
||||||
camera_encoder: VideoEncoderConfig = field(default_factory=camera_encoder_defaults)
|
|
||||||
|
|
||||||
|
|
||||||
@OperationConfig.register_subclass("merge")
|
@OperationConfig.register_subclass("merge")
|
||||||
@@ -370,7 +352,6 @@ def handle_delete_episodes(cfg: EditDatasetConfig) -> None:
|
|||||||
episode_indices=cfg.operation.episode_indices,
|
episode_indices=cfg.operation.episode_indices,
|
||||||
output_dir=output_dir,
|
output_dir=output_dir,
|
||||||
repo_id=output_repo_id,
|
repo_id=output_repo_id,
|
||||||
camera_encoder=cfg.operation.camera_encoder,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.info(f"Dataset saved to {output_dir}")
|
logging.info(f"Dataset saved to {output_dir}")
|
||||||
@@ -402,7 +383,6 @@ def handle_split(cfg: EditDatasetConfig) -> None:
|
|||||||
dataset,
|
dataset,
|
||||||
splits=cfg.operation.splits,
|
splits=cfg.operation.splits,
|
||||||
output_dir=cfg.new_root,
|
output_dir=cfg.new_root,
|
||||||
camera_encoder=cfg.operation.camera_encoder,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
for split_name, split_ds in split_datasets.items():
|
for split_name, split_ds in split_datasets.items():
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ from lerobot.datasets.video_utils import (
|
|||||||
encode_video_frames,
|
encode_video_frames,
|
||||||
get_video_info,
|
get_video_info,
|
||||||
)
|
)
|
||||||
|
from tests.fixtures.constants import DUMMY_VIDEO_INFO
|
||||||
|
|
||||||
|
|
||||||
# Per-codec skip markers — validation tests only fire when the codec is available
|
# Per-codec skip markers — validation tests only fire when the codec is available
|
||||||
@@ -570,3 +571,25 @@ class TestEncoderConfigPersistence:
|
|||||||
dataset.finalize()
|
dataset.finalize()
|
||||||
|
|
||||||
assert _read_feature_info(dataset) == first_info
|
assert _read_feature_info(dataset) == first_info
|
||||||
|
|
||||||
|
|
||||||
|
class TestFromVideoInfo:
|
||||||
|
"""``VideoEncoderConfig.from_video_info`` reconstructs an encoder config
|
||||||
|
from the ``video.*`` keys persisted in a dataset's ``info.json``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@require_libsvtav1
|
||||||
|
def test_reconstructs_from_dummy_video_info(self):
|
||||||
|
cfg = VideoEncoderConfig.from_video_info(DUMMY_VIDEO_INFO)
|
||||||
|
|
||||||
|
# Canonical stream codec ``"av1"`` is aliased to the encoder name.
|
||||||
|
assert cfg.vcodec == "libsvtav1"
|
||||||
|
assert cfg.pix_fmt == DUMMY_VIDEO_INFO["video.pix_fmt"]
|
||||||
|
assert cfg.g == DUMMY_VIDEO_INFO["video.g"]
|
||||||
|
assert cfg.crf == DUMMY_VIDEO_INFO["video.crf"]
|
||||||
|
assert cfg.preset == DUMMY_VIDEO_INFO["video.preset"]
|
||||||
|
assert cfg.fast_decode == DUMMY_VIDEO_INFO["video.fast_decode"]
|
||||||
|
assert cfg.video_backend == DUMMY_VIDEO_INFO["video.video_backend"]
|
||||||
|
# ``{}`` placeholder (typical after a merge with disagreeing sources)
|
||||||
|
# must not leak into the reconstructed config.
|
||||||
|
assert cfg.extra_options == VideoEncoderConfig().extra_options
|
||||||
|
|||||||
Reference in New Issue
Block a user