moved concat_video function to video_utils, cleaned some code

2026-07-25 18:56:09 +00:00 · 2025-07-21 14:47:16 +02:00
parent dcb02a951d
commit 066b81aec2
7 changed files with 62 additions and 54 deletions
@@ -14,12 +14,12 @@ from lerobot.datasets.utils import (
    DEFAULT_VIDEO_FILE_SIZE_IN_MB,
    DEFAULT_VIDEO_PATH,
    EPISODES_DIR,
-    concat_video_files,
    get_video_duration_in_s,
    get_video_size_in_mb,
    update_chunk_file_indices,
    write_info,
 )
+from lerobot.datasets.video_utils import concat_video_files
 from lerobot.utils.utils import get_elapsed_time_in_days_hours_minutes_seconds

 AGIBOT_FPS = 30
@@ -31,7 +31,6 @@ from lerobot.datasets.utils import (
    DEFAULT_EPISODES_PATH,
    DEFAULT_VIDEO_FILE_SIZE_IN_MB,
    DEFAULT_VIDEO_PATH,
-    concat_video_files,
    get_parquet_file_size_in_mb,
    get_video_size_in_mb,
    to_parquet_with_hf_images,
@@ -40,6 +39,7 @@ from lerobot.datasets.utils import (
    write_stats,
    write_tasks,
 )
+from lerobot.datasets.video_utils import concat_video_files


 def validate_all_metadata(all_metadata: list[LeRobotDatasetMetadata]):
@@ -43,7 +43,6 @@ from lerobot.datasets.utils import (
    _validate_feature_names,
    check_delta_timestamps,
    check_version_compatibility,
-    concat_video_files,
    create_empty_dataset_info,
    create_lerobot_dataset_card,
    embed_images,
@@ -74,6 +73,7 @@ from lerobot.datasets.utils import (
 )
 from lerobot.datasets.video_utils import (
    VideoFrame,
+    concat_video_files,
    decode_video_frames,
    encode_video_frames,
    get_safe_default_codec,
@@ -17,9 +17,7 @@ import contextlib
 import importlib.resources
 import json
 import logging
-import shutil
 import subprocess
-import tempfile
 from collections.abc import Iterator
 from pathlib import Path
 from pprint import pformat
@@ -107,12 +105,6 @@ def get_hf_dataset_size_in_mb(hf_ds: Dataset) -> int:
    return hf_ds.data.nbytes / (1024**2)


-def get_pd_dataframe_size_in_mb(df: pandas.DataFrame) -> int:
-    # TODO(rcadene): unused?
-    memory_usage_bytes = df.memory_usage(deep=True).sum()
-    return memory_usage_bytes / (1024**2)
-
-
 def update_chunk_file_indices(chunk_idx: int, file_idx: int, chunks_size: int):
    if file_idx == chunks_size - 1:
        file_idx = 0
@@ -151,40 +143,6 @@ def get_video_size_in_mb(mp4_path: Path):
    return file_size_mb


-def concat_video_files(paths_to_cat: list[Path], root: Path, video_key: str, chunk_idx: int, file_idx: int):
-    # TODO(rcadene): move to video_utils.py
-    # TODO(rcadene): add docstring
-    tmp_dir = Path(tempfile.mkdtemp(dir=root))
-    # Create a text file with the list of files to concatenate
-    path_concat_video_files = tmp_dir / "concat_video_files.txt"
-    with open(path_concat_video_files, "w") as f:
-        for ep_path in paths_to_cat:
-            f.write(f"file '{str(ep_path)}'\n")
-
-    path_tmp_output = tmp_dir / "tmp_output.mp4"
-    command = [
-        "ffmpeg",
-        "-y",
-        "-f",
-        "concat",
-        "-safe",
-        "0",
-        "-i",
-        str(path_concat_video_files),
-        "-c",
-        "copy",
-        str(path_tmp_output),
-    ]
-    subprocess.run(command, check=True)
-
-    output_path = root / DEFAULT_VIDEO_PATH.format(
-        video_key=video_key, chunk_index=chunk_idx, file_index=file_idx
-    )
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-    shutil.move(str(path_tmp_output), str(output_path))
-    shutil.rmtree(str(tmp_dir))
-
-
 def get_video_duration_in_s(mp4_file: Path):
    # TODO(rcadene): move to video_utils.py
    command = [
@@ -21,11 +21,7 @@ from tqdm import tqdm

 from lerobot.datasets.compute_stats import aggregate_stats, get_feature_stats, sample_indices
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
-from lerobot.datasets.utils import serialize_dict
-
-### LEGACY FUNCTIONS REMOVED FROM UTILS ###
-
-LEGACY_EPISODES_STATS_PATH = "episodes_stats.jsonl"
+from lerobot.datasets.utils import LEGACY_EPISODES_STATS_PATH, serialize_dict


 def append_jsonlines(data: dict, fpath: Path) -> None:
@@ -41,9 +37,6 @@ def legacy_write_episode_stats(episode_index: int, episode_stats: dict, local_di
    append_jsonlines(episode_stats, local_dir / LEGACY_EPISODES_STATS_PATH)


-######## END OF LEGACY FUNCTIONS ########
-
-
 def sample_episode_video_frames(dataset: LeRobotDataset, episode_index: int, ft_key: str) -> np.ndarray:
    ep_len = dataset.meta.episodes[episode_index]["length"]
    sampled_indices = sample_indices(ep_len)
@@ -59,7 +59,6 @@ from lerobot.datasets.utils import (
    LEGACY_EPISODES_STATS_PATH,
    LEGACY_TASKS_PATH,
    cast_stats_to_numpy,
-    concat_video_files,
    flatten_dict,
    get_parquet_file_size_in_mb,
    get_parquet_num_frames,
@@ -72,6 +71,7 @@ from lerobot.datasets.utils import (
    write_stats,
    write_tasks,
 )
+from lerobot.datasets.video_utils import concat_video_files

 V21 = "v2.1"

@@ -16,7 +16,9 @@
 import importlib
 import json
 import logging
+import shutil
 import subprocess
+import tempfile
 import warnings
 from collections import OrderedDict
 from dataclasses import dataclass, field
@@ -29,6 +31,8 @@ import torchvision
 from datasets.features.features import register_feature
 from PIL import Image

+from lerobot.datasets.utils import DEFAULT_VIDEO_PATH
+

 def get_safe_default_codec():
    if importlib.util.find_spec("torchcodec"):
@@ -300,6 +304,59 @@ def encode_video_frames(
        )


+def concat_video_files(paths_to_cat: list[Path], root: Path, video_key: str, chunk_idx: int, file_idx: int):
+    """
+    Concatenate multiple video files into a single video file using ffmpeg.
+
+    This function takes a list of video file paths and concatenates them into a single
+    output video file. It uses ffmpeg's concat demuxer with stream copy mode for fast
+    concatenation without re-encoding.
+
+    Args:
+        paths_to_cat: List of video file paths to concatenate, in order.
+        root: Root directory where temporary files and output will be created.
+        video_key: Video key identifier (e.g., camera name) used in output path.
+        chunk_idx: Chunk index for organizing output files.
+        file_idx: File index within the chunk.
+
+    Note:
+        - Creates a temporary directory for intermediate files that is cleaned up after use.
+        - Uses ffmpeg's concat demuxer which requires all input videos to have the same
+          codec, resolution, and frame rate for proper concatenation.
+        - Output path follows the DEFAULT_VIDEO_PATH pattern with video_key, chunk_idx,
+          and file_idx parameters.
+    """
+
+    tmp_dir = Path(tempfile.mkdtemp(dir=root))
+    path_concat_video_files = tmp_dir / "concat_video_files.txt"
+    with open(path_concat_video_files, "w") as f:
+        for ep_path in paths_to_cat:
+            f.write(f"file '{str(ep_path)}'\n")
+
+    path_tmp_output = tmp_dir / "tmp_output.mp4"
+    command = [
+        "ffmpeg",
+        "-y",
+        "-f",
+        "concat",
+        "-safe",
+        "0",
+        "-i",
+        str(path_concat_video_files),
+        "-c",
+        "copy",
+        str(path_tmp_output),
+    ]
+    subprocess.run(command, check=True)
+
+    output_path = root / DEFAULT_VIDEO_PATH.format(
+        video_key=video_key, chunk_index=chunk_idx, file_index=file_idx
+    )
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    shutil.move(str(path_tmp_output), str(output_path))
+    shutil.rmtree(str(tmp_dir))
+
+
@dataclass
 class VideoFrame:
    # TODO(rcadene, lhoestq): move to Hugging Face `datasets` repo