From 01fc975eb5e550124db257f84a1a56b12cf9e07e Mon Sep 17 00:00:00 2001 From: Pepijn Date: Tue, 28 Apr 2026 14:10:57 +0200 Subject: [PATCH] fix(annotate): default video decode backend to pyav torchcodec's __init__ bad-allocs on the cu128/torch-2.8 stack in some environments (Lustre/conda combos). The annotation pipeline calls decode_video_frames many times per episode, so this is a hard blocker. Default to pyav (always available via the av package) and let users opt back into torchcodec via LEROBOT_VIDEO_BACKEND=torchcodec. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/lerobot/annotations/steerable_pipeline/frames.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/lerobot/annotations/steerable_pipeline/frames.py b/src/lerobot/annotations/steerable_pipeline/frames.py index 68c7cd025..79ef5f3ed 100644 --- a/src/lerobot/annotations/steerable_pipeline/frames.py +++ b/src/lerobot/annotations/steerable_pipeline/frames.py @@ -119,6 +119,8 @@ class VideoFrameProvider: return [img for img in out if img is not None] def _decode(self, episode_index: int, timestamps: list[float]) -> list[Any]: + import os as _os # noqa: PLC0415 + from PIL import Image # noqa: PLC0415 from lerobot.datasets.video_utils import decode_video_frames # noqa: PLC0415 @@ -127,11 +129,17 @@ class VideoFrameProvider: from_timestamp = ep[f"videos/{self.camera_key}/from_timestamp"] shifted = [from_timestamp + ts for ts in timestamps] video_path = self.root / self._meta.get_video_file_path(episode_index, self.camera_key) + # ``torchcodec`` import currently bad-allocs on cu128/torch-2.8 in + # some environments; default to ``pyav`` (always available via + # the ``av`` package) and let users override with + # LEROBOT_VIDEO_BACKEND=torchcodec when their stack supports it. + backend = _os.environ.get("LEROBOT_VIDEO_BACKEND", "pyav") try: frames = decode_video_frames( video_path, shifted, self.tolerance_s, + backend=backend, return_uint8=True, ) except Exception: