mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-25 13:40:00 +00:00
fix(annotate): transcode subclips to H.264 instead of stream-copy
Modern LeRobot datasets store videos in AV1, which vllm's libav build cannot decode (the video processor returns 0 frames and downstream chokes with ZeroDivisionError). Re-encode each per-episode subclip with libx264 (preset ultrafast, crf 23) so the resulting mp4 is universally decodable. Strip audio with -an for a smaller payload. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -225,8 +225,11 @@ def episode_clip_path(
|
|||||||
"""Extract the episode's subclip to ``cache_dir/ep_{idx:06d}.mp4``.
|
"""Extract the episode's subclip to ``cache_dir/ep_{idx:06d}.mp4``.
|
||||||
|
|
||||||
Returns ``None`` if the dataset has no video tracks. Skips re-extract
|
Returns ``None`` if the dataset has no video tracks. Skips re-extract
|
||||||
when the cached clip already exists. Uses ``ffmpeg`` via subprocess
|
when the cached clip already exists. Re-encodes to H.264
|
||||||
with stream-copy where possible (no re-encode) for speed.
|
(libx264) so the resulting mp4 is decodable by every downstream
|
||||||
|
video processor — stream-copy would inherit the source codec
|
||||||
|
(often AV1 in modern LeRobot datasets), which vllm's libav build
|
||||||
|
cannot decode.
|
||||||
"""
|
"""
|
||||||
import subprocess # noqa: PLC0415
|
import subprocess # noqa: PLC0415
|
||||||
|
|
||||||
@@ -253,12 +256,19 @@ def episode_clip_path(
|
|||||||
f"{to_timestamp:.3f}",
|
f"{to_timestamp:.3f}",
|
||||||
"-i",
|
"-i",
|
||||||
str(src),
|
str(src),
|
||||||
"-c",
|
"-c:v",
|
||||||
"copy",
|
"libx264",
|
||||||
|
"-preset",
|
||||||
|
"ultrafast",
|
||||||
|
"-crf",
|
||||||
|
"23",
|
||||||
|
"-pix_fmt",
|
||||||
|
"yuv420p",
|
||||||
|
"-an",
|
||||||
str(out_path),
|
str(out_path),
|
||||||
]
|
]
|
||||||
try:
|
try:
|
||||||
subprocess.run(cmd, check=True, timeout=120)
|
subprocess.run(cmd, check=True, timeout=300)
|
||||||
except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
|
except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
|
||||||
return None
|
return None
|
||||||
return out_path if out_path.exists() and out_path.stat().st_size > 0 else None
|
return out_path if out_path.exists() and out_path.stat().st_size > 0 else None
|
||||||
|
|||||||
Reference in New Issue
Block a user