moved get_video_duration_in_s to video_utils and replaced subprocess and ffmpeg with pyAV

This commit is contained in:
Michel Aractingi
2025-08-29 01:31:53 +02:00
parent 47aee1fdbe
commit 84ffc28854
4 changed files with 27 additions and 27 deletions
+1 -1
View File
@@ -53,7 +53,6 @@ from lerobot.datasets.utils import (
get_parquet_file_size_in_mb, get_parquet_file_size_in_mb,
get_parquet_num_frames, get_parquet_num_frames,
get_safe_version, get_safe_version,
get_video_duration_in_s,
get_video_size_in_mb, get_video_size_in_mb,
hf_transform_to_torch, hf_transform_to_torch,
is_valid_version, is_valid_version,
@@ -77,6 +76,7 @@ from lerobot.datasets.video_utils import (
decode_video_frames, decode_video_frames,
encode_video_frames, encode_video_frames,
get_safe_default_codec, get_safe_default_codec,
get_video_duration_in_s,
get_video_info, get_video_info,
) )
-21
View File
@@ -17,7 +17,6 @@ import contextlib
import importlib.resources import importlib.resources
import json import json
import logging import logging
import subprocess
from collections.abc import Iterator from collections.abc import Iterator
from pathlib import Path from pathlib import Path
from pprint import pformat from pprint import pformat
@@ -142,26 +141,6 @@ def get_video_size_in_mb(mp4_path: Path) -> float:
return file_size_mb return file_size_mb
def get_video_duration_in_s(mp4_file: Path) -> float:
# TODO(rcadene): move to video_utils.py
command = [
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
str(mp4_file),
]
result = subprocess.run(
command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
return float(result.stdout)
def flatten_dict(d: dict, parent_key: str = "", sep: str = "/") -> dict: def flatten_dict(d: dict, parent_key: str = "", sep: str = "/") -> dict:
"""Flatten a nested dictionary structure by collapsing nested keys into one key with a separator. """Flatten a nested dictionary structure by collapsing nested keys into one key with a separator.
@@ -62,7 +62,6 @@ from lerobot.datasets.utils import (
flatten_dict, flatten_dict,
get_parquet_file_size_in_mb, get_parquet_file_size_in_mb,
get_parquet_num_frames, get_parquet_num_frames,
get_video_duration_in_s,
get_video_size_in_mb, get_video_size_in_mb,
load_info, load_info,
update_chunk_file_indices, update_chunk_file_indices,
@@ -71,7 +70,7 @@ from lerobot.datasets.utils import (
write_stats, write_stats,
write_tasks, write_tasks,
) )
from lerobot.datasets.video_utils import concat_video_files from lerobot.datasets.video_utils import concat_video_files, get_video_duration_in_s
V21 = "v2.1" V21 = "v2.1"
+25 -3
View File
@@ -267,7 +267,7 @@ def encode_video_frames(
video_path = Path(video_path) video_path = Path(video_path)
imgs_dir = Path(imgs_dir) imgs_dir = Path(imgs_dir)
video_path.parent.mkdir(parents=True, exist_ok=overwrite) video_path.parent.mkdir(parents=True, exist_ok=True)
# Encoders/pixel formats incompatibility check # Encoders/pixel formats incompatibility check
if (vcodec == "libsvtav1" or vcodec == "hevc") and pix_fmt == "yuv444p": if (vcodec == "libsvtav1" or vcodec == "hevc") and pix_fmt == "yuv444p":
@@ -277,9 +277,9 @@ def encode_video_frames(
pix_fmt = "yuv420p" pix_fmt = "yuv420p"
# Get input frames # Get input frames
template = "frame_" + ("[0-9]" * 6) + ".png" template = "frame-" + ("[0-9]" * 6) + ".png"
input_list = sorted( input_list = sorted(
glob.glob(str(imgs_dir / template)), key=lambda x: int(x.split("_")[-1].split(".")[0]) glob.glob(str(imgs_dir / template)), key=lambda x: int(x.split("-")[-1].split(".")[0])
) )
# Define video output frame size (assuming all input frames are the same size) # Define video output frame size (assuming all input frames are the same size)
@@ -512,3 +512,25 @@ def get_image_pixel_channels(image: Image):
return 4 # RGBA return 4 # RGBA
else: else:
raise ValueError("Unknown format") raise ValueError("Unknown format")
def get_video_duration_in_s(video_path: Path | str) -> float:
"""
Get the duration of a video file in seconds using PyAV.
Args:
video_path: Path to the video file.
Returns:
Duration of the video in seconds.
"""
with av.open(str(video_path)) as container:
# Get the first video stream
video_stream = container.streams.video[0]
# Calculate duration: stream.duration * stream.time_base gives duration in seconds
if video_stream.duration is not None:
duration = float(video_stream.duration * video_stream.time_base)
else:
# Fallback to container duration if stream duration is not available
duration = float(container.duration / av.time_base)
return duration