From 01fc975eb5e550124db257f84a1a56b12cf9e07e Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Tue, 28 Apr 2026 14:10:57 +0200
Subject: [PATCH] fix(annotate): default video decode backend to pyav

torchcodec's __init__ bad-allocs on the cu128/torch-2.8 stack in some
environments (Lustre/conda combos). The annotation pipeline calls
decode_video_frames many times per episode, so this is a hard blocker.
Default to pyav (always available via the av package) and let users
opt back into torchcodec via LEROBOT_VIDEO_BACKEND=torchcodec.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/lerobot/annotations/steerable_pipeline/frames.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/lerobot/annotations/steerable_pipeline/frames.py b/src/lerobot/annotations/steerable_pipeline/frames.py
index 68c7cd025..79ef5f3ed 100644
--- a/src/lerobot/annotations/steerable_pipeline/frames.py
+++ b/src/lerobot/annotations/steerable_pipeline/frames.py
@@ -119,6 +119,8 @@ class VideoFrameProvider:
         return [img for img in out if img is not None]
 
     def _decode(self, episode_index: int, timestamps: list[float]) -> list[Any]:
+        import os as _os  # noqa: PLC0415
+
         from PIL import Image  # noqa: PLC0415
 
         from lerobot.datasets.video_utils import decode_video_frames  # noqa: PLC0415
@@ -127,11 +129,17 @@ class VideoFrameProvider:
         from_timestamp = ep[f"videos/{self.camera_key}/from_timestamp"]
         shifted = [from_timestamp + ts for ts in timestamps]
         video_path = self.root / self._meta.get_video_file_path(episode_index, self.camera_key)
+        # ``torchcodec`` import currently bad-allocs on cu128/torch-2.8 in
+        # some environments; default to ``pyav`` (always available via
+        # the ``av`` package) and let users override with
+        # LEROBOT_VIDEO_BACKEND=torchcodec when their stack supports it.
+        backend = _os.environ.get("LEROBOT_VIDEO_BACKEND", "pyav")
         try:
             frames = decode_video_frames(
                 video_path,
                 shifted,
                 self.tolerance_s,
+                backend=backend,
                 return_uint8=True,
             )
         except Exception: