moved concat_video function to video_utils, cleaned some code

2026-07-24 10:16:09 +00:00 · 2025-07-21 14:47:16 +02:00
parent dcb02a951d
commit 066b81aec2
7 changed files with 62 additions and 54 deletions
@@ -14,12 +14,12 @@ from lerobot.datasets.utils import (
    DEFAULT_VIDEO_FILE_SIZE_IN_MB,
    DEFAULT_VIDEO_PATH,
    EPISODES_DIR,
    concat_video_files,
    get_video_duration_in_s,
    get_video_size_in_mb,
    update_chunk_file_indices,
    write_info,
 )
 from lerobot.datasets.video_utils import concat_video_files
 from lerobot.utils.utils import get_elapsed_time_in_days_hours_minutes_seconds
 AGIBOT_FPS = 30
@@ -31,7 +31,6 @@ from lerobot.datasets.utils import (
    DEFAULT_EPISODES_PATH,
    DEFAULT_VIDEO_FILE_SIZE_IN_MB,
    DEFAULT_VIDEO_PATH,
    concat_video_files,
    get_parquet_file_size_in_mb,
    get_video_size_in_mb,
    to_parquet_with_hf_images,
@@ -40,6 +39,7 @@ from lerobot.datasets.utils import (
    write_stats,
    write_tasks,
 )
 from lerobot.datasets.video_utils import concat_video_files
 def validate_all_metadata(all_metadata: list[LeRobotDatasetMetadata]):
@@ -43,7 +43,6 @@ from lerobot.datasets.utils import (
    _validate_feature_names,
    check_delta_timestamps,
    check_version_compatibility,
    concat_video_files,
    create_empty_dataset_info,
    create_lerobot_dataset_card,
    embed_images,
@@ -74,6 +73,7 @@ from lerobot.datasets.utils import (
 )
 from lerobot.datasets.video_utils import (
    VideoFrame,
    concat_video_files,
    decode_video_frames,
    encode_video_frames,
    get_safe_default_codec,
@@ -17,9 +17,7 @@ import contextlib
 import importlib.resources
 import json
 import logging
 import shutil
 import subprocess
 import tempfile
 from collections.abc import Iterator
 from pathlib import Path
 from pprint import pformat
@@ -107,12 +105,6 @@ def get_hf_dataset_size_in_mb(hf_ds: Dataset) -> int:
    return hf_ds.data.nbytes / (1024**2)
 def get_pd_dataframe_size_in_mb(df: pandas.DataFrame) -> int:
    # TODO(rcadene): unused?
    memory_usage_bytes = df.memory_usage(deep=True).sum()
    return memory_usage_bytes / (1024**2)
 def update_chunk_file_indices(chunk_idx: int, file_idx: int, chunks_size: int):
    if file_idx == chunks_size - 1:
        file_idx = 0
@@ -151,40 +143,6 @@ def get_video_size_in_mb(mp4_path: Path):
    return file_size_mb
 def concat_video_files(paths_to_cat: list[Path], root: Path, video_key: str, chunk_idx: int, file_idx: int):
    # TODO(rcadene): move to video_utils.py
    # TODO(rcadene): add docstring
    tmp_dir = Path(tempfile.mkdtemp(dir=root))
    # Create a text file with the list of files to concatenate
    path_concat_video_files = tmp_dir / "concat_video_files.txt"
    with open(path_concat_video_files, "w") as f:
        for ep_path in paths_to_cat:
            f.write(f"file '{str(ep_path)}'\n")
    path_tmp_output = tmp_dir / "tmp_output.mp4"
    command = [
        "ffmpeg",
        "-y",
        "-f",
        "concat",
        "-safe",
        "0",
        "-i",
        str(path_concat_video_files),
        "-c",
        "copy",
        str(path_tmp_output),
    ]
    subprocess.run(command, check=True)
    output_path = root / DEFAULT_VIDEO_PATH.format(
        video_key=video_key, chunk_index=chunk_idx, file_index=file_idx
    )
    output_path.parent.mkdir(parents=True, exist_ok=True)
    shutil.move(str(path_tmp_output), str(output_path))
    shutil.rmtree(str(tmp_dir))
 def get_video_duration_in_s(mp4_file: Path):
    # TODO(rcadene): move to video_utils.py
    command = [
@@ -21,11 +21,7 @@ from tqdm import tqdm
 from lerobot.datasets.compute_stats import aggregate_stats, get_feature_stats, sample_indices
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.datasets.utils import serialize_dict
+from lerobot.datasets.utils import LEGACY_EPISODES_STATS_PATH, serialize_dict
 ### LEGACY FUNCTIONS REMOVED FROM UTILS ###
 LEGACY_EPISODES_STATS_PATH = "episodes_stats.jsonl"
 def append_jsonlines(data: dict, fpath: Path) -> None:
@@ -41,9 +37,6 @@ def legacy_write_episode_stats(episode_index: int, episode_stats: dict, local_di
    append_jsonlines(episode_stats, local_dir / LEGACY_EPISODES_STATS_PATH)
 ######## END OF LEGACY FUNCTIONS ########
 def sample_episode_video_frames(dataset: LeRobotDataset, episode_index: int, ft_key: str) -> np.ndarray:
    ep_len = dataset.meta.episodes[episode_index]["length"]
    sampled_indices = sample_indices(ep_len)
@@ -59,7 +59,6 @@ from lerobot.datasets.utils import (
    LEGACY_EPISODES_STATS_PATH,
    LEGACY_TASKS_PATH,
    cast_stats_to_numpy,
    concat_video_files,
    flatten_dict,
    get_parquet_file_size_in_mb,
    get_parquet_num_frames,
@@ -72,6 +71,7 @@ from lerobot.datasets.utils import (
    write_stats,
    write_tasks,
 )
 from lerobot.datasets.video_utils import concat_video_files
 V21 = "v2.1"
@@ -16,7 +16,9 @@
 import importlib
 import json
 import logging
 import shutil
 import subprocess
 import tempfile
 import warnings
 from collections import OrderedDict
 from dataclasses import dataclass, field
@@ -29,6 +31,8 @@ import torchvision
 from datasets.features.features import register_feature
 from PIL import Image
 from lerobot.datasets.utils import DEFAULT_VIDEO_PATH
 def get_safe_default_codec():
    if importlib.util.find_spec("torchcodec"):
@@ -300,6 +304,59 @@ def encode_video_frames(
        )
 def concat_video_files(paths_to_cat: list[Path], root: Path, video_key: str, chunk_idx: int, file_idx: int):
    """
    Concatenate multiple video files into a single video file using ffmpeg.
    This function takes a list of video file paths and concatenates them into a single
    output video file. It uses ffmpeg's concat demuxer with stream copy mode for fast
    concatenation without re-encoding.
    Args:
        paths_to_cat: List of video file paths to concatenate, in order.
        root: Root directory where temporary files and output will be created.
        video_key: Video key identifier (e.g., camera name) used in output path.
        chunk_idx: Chunk index for organizing output files.
        file_idx: File index within the chunk.
    Note:
        - Creates a temporary directory for intermediate files that is cleaned up after use.
        - Uses ffmpeg's concat demuxer which requires all input videos to have the same
          codec, resolution, and frame rate for proper concatenation.
        - Output path follows the DEFAULT_VIDEO_PATH pattern with video_key, chunk_idx,
          and file_idx parameters.
    """
    tmp_dir = Path(tempfile.mkdtemp(dir=root))
    path_concat_video_files = tmp_dir / "concat_video_files.txt"
    with open(path_concat_video_files, "w") as f:
        for ep_path in paths_to_cat:
            f.write(f"file '{str(ep_path)}'\n")
    path_tmp_output = tmp_dir / "tmp_output.mp4"
    command = [
        "ffmpeg",
        "-y",
        "-f",
        "concat",
        "-safe",
        "0",
        "-i",
        str(path_concat_video_files),
        "-c",
        "copy",
        str(path_tmp_output),
    ]
    subprocess.run(command, check=True)
    output_path = root / DEFAULT_VIDEO_PATH.format(
        video_key=video_key, chunk_index=chunk_idx, file_index=file_idx
    )
    output_path.parent.mkdir(parents=True, exist_ok=True)
    shutil.move(str(path_tmp_output), str(output_path))
    shutil.rmtree(str(tmp_dir))
@dataclass
 class VideoFrame:
    # TODO(rcadene, lhoestq): move to Hugging Face `datasets` repo