fix(annotate): decode keyframes via ffmpeg CLI fallback

PyAV segfaulted (exit 139) decoding the AV1 streams modern LeRobot
datasets use — a SIGSEGV that the per-episode try/except cannot catch,
killing the whole job when the interjections phase started.

Replace the PyAV fallback with _decode_frames_ffmpeg, which shells out
to the ffmpeg CLI: a full ffmpeg build decodes AV1, and a child-process
crash is a catchable non-zero exit rather than a segfault. Decoder chain
is now torchcodec -> ffmpeg. _decode_frames_av stays available behind
video_backend="pyav" for callers that want it.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pepijn
2026-05-18 16:08:31 +02:00
parent 7128bb1769
commit 1bd53cc7da
3 changed files with 86 additions and 19 deletions
+24
View File
@@ -41,6 +41,7 @@ pytest.importorskip("datasets", reason="datasets is required (install lerobot[da
from lerobot.annotations.steerable_pipeline.frames import ( # noqa: E402
VideoFrameProvider,
_decode_frames_av,
_decode_frames_ffmpeg,
)
@@ -120,3 +121,26 @@ def test_decode_frames_av_raises_on_missing_file(tmp_path: Path) -> None:
"""A missing video surfaces as an exception the caller can fall back on."""
with pytest.raises(Exception): # noqa: B017, PT011
_decode_frames_av(tmp_path / "does_not_exist.mp4", [0.0])
def test_decode_frames_ffmpeg_returns_one_uint8_frame_per_timestamp(sample_video: Path) -> None:
"""``_decode_frames_ffmpeg`` shells out to the ffmpeg CLI — the always-
available fallback that decodes AV1 and isolates crashes to a child
process.
"""
timestamps = [0.0, 1.0, 2.5]
frames = _decode_frames_ffmpeg(sample_video, timestamps)
assert len(frames) == len(timestamps)
for frame in frames:
assert isinstance(frame, torch.Tensor)
assert frame.dtype == torch.uint8
assert frame.shape == (3, 120, 160)
def test_decode_frames_ffmpeg_raises_on_missing_file(tmp_path: Path) -> None:
"""A missing video raises (non-zero ffmpeg exit), never crashes the job."""
if shutil.which("ffmpeg") is None:
pytest.skip("ffmpeg not available")
with pytest.raises(Exception): # noqa: B017, PT011
_decode_frames_ffmpeg(tmp_path / "does_not_exist.mp4", [0.0])