moved get_video_duration_in_s to video_utils and replaced subprocess and ffmpeg with pyAV

This commit is contained in:
Michel Aractingi
2025-08-29 01:31:53 +02:00
parent 47aee1fdbe
commit 84ffc28854
4 changed files with 27 additions and 27 deletions
+1 -1
View File
@@ -53,7 +53,6 @@ from lerobot.datasets.utils import (
get_parquet_file_size_in_mb,
get_parquet_num_frames,
get_safe_version,
get_video_duration_in_s,
get_video_size_in_mb,
hf_transform_to_torch,
is_valid_version,
@@ -77,6 +76,7 @@ from lerobot.datasets.video_utils import (
decode_video_frames,
encode_video_frames,
get_safe_default_codec,
get_video_duration_in_s,
get_video_info,
)
-21
View File
@@ -17,7 +17,6 @@ import contextlib
import importlib.resources
import json
import logging
import subprocess
from collections.abc import Iterator
from pathlib import Path
from pprint import pformat
@@ -142,26 +141,6 @@ def get_video_size_in_mb(mp4_path: Path) -> float:
return file_size_mb
def get_video_duration_in_s(mp4_file: Path) -> float:
# TODO(rcadene): move to video_utils.py
command = [
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
str(mp4_file),
]
result = subprocess.run(
command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
return float(result.stdout)
def flatten_dict(d: dict, parent_key: str = "", sep: str = "/") -> dict:
"""Flatten a nested dictionary structure by collapsing nested keys into one key with a separator.
@@ -62,7 +62,6 @@ from lerobot.datasets.utils import (
flatten_dict,
get_parquet_file_size_in_mb,
get_parquet_num_frames,
get_video_duration_in_s,
get_video_size_in_mb,
load_info,
update_chunk_file_indices,
@@ -71,7 +70,7 @@ from lerobot.datasets.utils import (
write_stats,
write_tasks,
)
from lerobot.datasets.video_utils import concat_video_files
from lerobot.datasets.video_utils import concat_video_files, get_video_duration_in_s
V21 = "v2.1"
+25 -3
View File
@@ -267,7 +267,7 @@ def encode_video_frames(
video_path = Path(video_path)
imgs_dir = Path(imgs_dir)
video_path.parent.mkdir(parents=True, exist_ok=overwrite)
video_path.parent.mkdir(parents=True, exist_ok=True)
# Encoders/pixel formats incompatibility check
if (vcodec == "libsvtav1" or vcodec == "hevc") and pix_fmt == "yuv444p":
@@ -277,9 +277,9 @@ def encode_video_frames(
pix_fmt = "yuv420p"
# Get input frames
template = "frame_" + ("[0-9]" * 6) + ".png"
template = "frame-" + ("[0-9]" * 6) + ".png"
input_list = sorted(
glob.glob(str(imgs_dir / template)), key=lambda x: int(x.split("_")[-1].split(".")[0])
glob.glob(str(imgs_dir / template)), key=lambda x: int(x.split("-")[-1].split(".")[0])
)
# Define video output frame size (assuming all input frames are the same size)
@@ -512,3 +512,25 @@ def get_image_pixel_channels(image: Image):
return 4 # RGBA
else:
raise ValueError("Unknown format")
def get_video_duration_in_s(video_path: Path | str) -> float:
"""
Get the duration of a video file in seconds using PyAV.
Args:
video_path: Path to the video file.
Returns:
Duration of the video in seconds.
"""
with av.open(str(video_path)) as container:
# Get the first video stream
video_stream = container.streams.video[0]
# Calculate duration: stream.duration * stream.time_base gives duration in seconds
if video_stream.duration is not None:
duration = float(video_stream.duration * video_stream.time_base)
else:
# Fallback to container duration if stream duration is not available
duration = float(container.duration / av.time_base)
return duration