From 84ffc288542b34d5ab9513a4c360ef0b270612c1 Mon Sep 17 00:00:00 2001 From: Michel Aractingi Date: Fri, 29 Aug 2025 01:31:53 +0200 Subject: [PATCH] moved `get_video_duration_in_s` to video_utils and replaced subprocess and ffmpeg with pyAV --- src/lerobot/datasets/lerobot_dataset.py | 2 +- src/lerobot/datasets/utils.py | 21 -------------- .../v30/convert_dataset_v21_to_v30.py | 3 +- src/lerobot/datasets/video_utils.py | 28 +++++++++++++++++-- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index 979453623..c4686f968 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -53,7 +53,6 @@ from lerobot.datasets.utils import ( get_parquet_file_size_in_mb, get_parquet_num_frames, get_safe_version, - get_video_duration_in_s, get_video_size_in_mb, hf_transform_to_torch, is_valid_version, @@ -77,6 +76,7 @@ from lerobot.datasets.video_utils import ( decode_video_frames, encode_video_frames, get_safe_default_codec, + get_video_duration_in_s, get_video_info, ) diff --git a/src/lerobot/datasets/utils.py b/src/lerobot/datasets/utils.py index c807aeb7b..2ed0fc46b 100644 --- a/src/lerobot/datasets/utils.py +++ b/src/lerobot/datasets/utils.py @@ -17,7 +17,6 @@ import contextlib import importlib.resources import json import logging -import subprocess from collections.abc import Iterator from pathlib import Path from pprint import pformat @@ -142,26 +141,6 @@ def get_video_size_in_mb(mp4_path: Path) -> float: return file_size_mb -def get_video_duration_in_s(mp4_file: Path) -> float: - # TODO(rcadene): move to video_utils.py - command = [ - "ffprobe", - "-v", - "error", - "-show_entries", - "format=duration", - "-of", - "default=noprint_wrappers=1:nokey=1", - str(mp4_file), - ] - result = subprocess.run( - command, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - ) - return float(result.stdout) - - def flatten_dict(d: dict, parent_key: str = "", sep: str = "/") -> dict: """Flatten a nested dictionary structure by collapsing nested keys into one key with a separator. diff --git a/src/lerobot/datasets/v30/convert_dataset_v21_to_v30.py b/src/lerobot/datasets/v30/convert_dataset_v21_to_v30.py index f21e59ad8..d8a9aa94d 100644 --- a/src/lerobot/datasets/v30/convert_dataset_v21_to_v30.py +++ b/src/lerobot/datasets/v30/convert_dataset_v21_to_v30.py @@ -62,7 +62,6 @@ from lerobot.datasets.utils import ( flatten_dict, get_parquet_file_size_in_mb, get_parquet_num_frames, - get_video_duration_in_s, get_video_size_in_mb, load_info, update_chunk_file_indices, @@ -71,7 +70,7 @@ from lerobot.datasets.utils import ( write_stats, write_tasks, ) -from lerobot.datasets.video_utils import concat_video_files +from lerobot.datasets.video_utils import concat_video_files, get_video_duration_in_s V21 = "v2.1" diff --git a/src/lerobot/datasets/video_utils.py b/src/lerobot/datasets/video_utils.py index 7552ff808..4fe6dfeb3 100644 --- a/src/lerobot/datasets/video_utils.py +++ b/src/lerobot/datasets/video_utils.py @@ -267,7 +267,7 @@ def encode_video_frames( video_path = Path(video_path) imgs_dir = Path(imgs_dir) - video_path.parent.mkdir(parents=True, exist_ok=overwrite) + video_path.parent.mkdir(parents=True, exist_ok=True) # Encoders/pixel formats incompatibility check if (vcodec == "libsvtav1" or vcodec == "hevc") and pix_fmt == "yuv444p": @@ -277,9 +277,9 @@ def encode_video_frames( pix_fmt = "yuv420p" # Get input frames - template = "frame_" + ("[0-9]" * 6) + ".png" + template = "frame-" + ("[0-9]" * 6) + ".png" input_list = sorted( - glob.glob(str(imgs_dir / template)), key=lambda x: int(x.split("_")[-1].split(".")[0]) + glob.glob(str(imgs_dir / template)), key=lambda x: int(x.split("-")[-1].split(".")[0]) ) # Define video output frame size (assuming all input frames are the same size) @@ -512,3 +512,25 @@ def get_image_pixel_channels(image: Image): return 4 # RGBA else: raise ValueError("Unknown format") + + +def get_video_duration_in_s(video_path: Path | str) -> float: + """ + Get the duration of a video file in seconds using PyAV. + + Args: + video_path: Path to the video file. + + Returns: + Duration of the video in seconds. + """ + with av.open(str(video_path)) as container: + # Get the first video stream + video_stream = container.streams.video[0] + # Calculate duration: stream.duration * stream.time_base gives duration in seconds + if video_stream.duration is not None: + duration = float(video_stream.duration * video_stream.time_base) + else: + # Fallback to container duration if stream duration is not available + duration = float(container.duration / av.time_base) + return duration