From 53c76418857ccccd59c4ade0b5151ba71bf4c5d6 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Fri, 8 May 2026 11:53:43 +0200
Subject: [PATCH] review: fix dead-code bug, add thread safety, atomic writes,
 smaller cleanups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

**Critical: video_for_episode was unreachable dead code.**
``video_for_episode`` was indented inside ``_decode_pyav_direct``, after
its ``return`` statement — Python parsed it as a nested function that
never executed. Module 1's ``_episode_video_block`` calls
``self.frame_provider.video_for_episode(record, target_count)`` on the
``use_video_url=False`` path, which would have AttributeError'd on any
real dataset. Tests passed only because they used ``_StubFrameProvider``
/ ``_NullProvider`` which have the method. Moved it to be a proper
method of ``VideoFrameProvider`` (right after ``frames_at``).

**Thread safety on VideoFrameProvider.**
The executor runs Module 1/2/3 phases under a ``ThreadPoolExecutor``, so
the per-instance ``_cache`` dict and the one-shot ``_warned_decode_fail``
flag were exposed to concurrent reads/writes. Added a ``threading.Lock``
field, wrapped cache reads/writes and the warn-flag check-and-set in
``with self._lock:``. Stub fixtures unaffected.

**episode_clip_path is now a method of VideoFrameProvider.**
Used to be a free function reaching into ``provider._meta.episodes`` and
``provider._meta.get_video_file_path`` from outside the class. As a
method it just uses ``self._meta``. The only caller (Module 1) updated;
no external callers.

**Atomic write in LanguageColumnsWriter.**
``pq.write_table(new_table, path)`` was overwriting the parquet shard
in place — a crash mid-write would corrupt the file. Now writes to a
sibling ``.tmp`` and ``Path.replace`` atomically.

**Smaller items:**
* ``executor.py`` docstring opened with "four phases" but listed six.
  Now says "six phases" to match.
* ``[annotations]`` extra in ``pyproject.toml`` now includes
  ``openai>=1.40,<2.0``. Default ``VlmConfig.backend`` is ``"openai"``,
  so without it ``_make_openai_client`` would ImportError on a fresh
  ``uv sync --extra annotations``.
* ``_snap_to_frame`` was duplicated identically in
  ``plan_subtasks_memory.py`` and ``interjections_and_speech.py``.
  Promoted to ``snap_to_frame`` in ``reader.py`` (next to
  ``EpisodeRecord``); both modules now import it. Backwards-compat alias
  not needed — no external callers.
* ``EpisodeRecord.frames_df()`` was re-reading the full parquet on every
  call. Now memoizes via a private dataclass field so repeat calls from
  different modules pay the cost once. Method signature unchanged.
* ``_extract_first_json_object`` had a redundant ``and not escape`` guard
  that was dead because the prior block already handled and reset
  ``escape``. Replaced with a comment explaining the invariant.

**Pre-existing lint cleanups surfaced once these files entered
pre-commit's scope:**
* dead local ``client = clients[0]`` in ``_make_openai_client`` (the
  real round-robin uses ``clients[rr_counter[...]]``).
* ``cmd = ... if "{port}" in cmd else f"...{port}"`` ternary collapse in
  ``_spawn_parallel_inference_servers``.
* ``seek_pts = 0 if stream.time_base is None else int(...)`` ternary
  collapse in ``_decode_pyav_direct``.
* ``# nosec B310`` on the localhost ``urllib.request.urlopen`` probe in
  ``_server_is_up`` — the URL is the user-configured local-server endpoint
  the CLI itself spawned, not arbitrary user input.

**Test added.**
``tests/annotations/test_frames.py`` pins the regression on
``VideoFrameProvider``: asserts ``video_for_episode`` and
``episode_clip_path`` are callable methods (not nested dead code or
free functions), and that the ``_lock`` field is a real
``threading.Lock``.

Sweep: 64 passed, 2 failed (same pre-existing module-impl bugs as
before this commit). Pre-commit clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 pyproject.toml                                |  10 +-
 .../steerable_pipeline/executor.py            |   4 +-
 .../annotations/steerable_pipeline/frames.py  | 220 +++++++++---------
 .../modules/interjections_and_speech.py       |  16 +-
 .../modules/plan_subtasks_memory.py           |  19 +-
 .../annotations/steerable_pipeline/reader.py  |  38 ++-
 .../steerable_pipeline/vlm_client.py          |  58 ++---
 .../annotations/steerable_pipeline/writer.py  |   8 +-
 tests/annotations/test_frames.py              |  67 ++++++
 uv.lock                                       |  48 +++-
 10 files changed, 284 insertions(+), 204 deletions(-)
 create mode 100644 tests/annotations/test_frames.py

diff --git a/pyproject.toml b/pyproject.toml
index 4fdf6f320..7cedf6e11 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -201,13 +201,15 @@ hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpci
 async = ["lerobot[grpcio-dep]", "lerobot[matplotlib-dep]"]
 peft = ["lerobot[transformers-dep]", "lerobot[peft-dep]"]
 
-# Annotation pipeline (lerobot-annotate). vllm is the preferred backend on
-# Linux, with a transformers fallback elsewhere. Distributed execution is
-# delegated to Hugging Face Jobs (see examples/annotation/run_hf_job.py),
-# so this pipeline pulls no cluster-scheduler dependency.
+# Annotation pipeline (lerobot-annotate). vllm is the preferred backend
+# on Linux, with a transformers fallback elsewhere; openai is the default
+# backend and talks to any OpenAI-compatible server (``vllm serve`` /
+# ``transformers serve`` / hosted endpoints). Distributed execution is
+# delegated to Hugging Face Jobs (see examples/annotation/run_hf_job.py).
 annotations = [
     "lerobot[dataset]",
     "lerobot[transformers-dep]",
+    "openai>=1.40,<2.0",
     "vllm>=0.6.0,<1.0.0; sys_platform == 'linux'",
 ]
 
diff --git a/src/lerobot/annotations/steerable_pipeline/executor.py b/src/lerobot/annotations/steerable_pipeline/executor.py
index 09cd4f18e..0f984d375 100644
--- a/src/lerobot/annotations/steerable_pipeline/executor.py
+++ b/src/lerobot/annotations/steerable_pipeline/executor.py
@@ -13,9 +13,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""In-process executor that runs the four annotation phases.
+"""In-process executor that runs the annotation phases.
 
-The executor plans **four phases** with the dependency order from the plan:
+The executor plans **six phases** in the dependency order from the plan:
 
     phase 1: Module 1 (plan + subtasks + memory)
     phase 2: Module 2 (interjections + speech)
diff --git a/src/lerobot/annotations/steerable_pipeline/frames.py b/src/lerobot/annotations/steerable_pipeline/frames.py
index 20a6b6081..c87a171a3 100644
--- a/src/lerobot/annotations/steerable_pipeline/frames.py
+++ b/src/lerobot/annotations/steerable_pipeline/frames.py
@@ -24,6 +24,7 @@ querying the same timestamp pay decode cost once.
 
 from __future__ import annotations
 
+import threading
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Protocol
@@ -121,6 +122,10 @@ class VideoFrameProvider:
     _meta: Any = field(default=None, init=False, repr=False)
     _cache: dict = field(default_factory=dict, init=False, repr=False)
     _camera_keys: list[str] = field(default_factory=list, init=False, repr=False)
+    # Pipeline runs Module 1/2/3 phases under a ThreadPoolExecutor (see
+    # ``ExecutorConfig.episode_parallelism``); guard the dict cache and the
+    # one-shot warn flag against concurrent updates from worker threads.
+    _lock: threading.Lock = field(default_factory=threading.Lock, init=False, repr=False)
 
     def __post_init__(self) -> None:
         from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata  # noqa: PLC0415
@@ -158,33 +163,110 @@ class VideoFrameProvider:
         out: list[Any] = []
         misses: list[float] = []
         miss_indices: list[int] = []
-        for i, ts in enumerate(timestamps):
-            key = (record.episode_index, target, round(float(ts), 6))
-            cached = self._cache.get(key)
-            if cached is not None:
-                out.append(cached)
-            else:
-                out.append(None)
-                misses.append(float(ts))
-                miss_indices.append(i)
+        with self._lock:
+            for i, ts in enumerate(timestamps):
+                key = (record.episode_index, target, round(float(ts), 6))
+                cached = self._cache.get(key)
+                if cached is not None:
+                    out.append(cached)
+                else:
+                    out.append(None)
+                    misses.append(float(ts))
+                    miss_indices.append(i)
 
         if misses:
             decoded = self._decode(record.episode_index, misses, target)
             # decoder may return fewer frames than requested when some
             # timestamps fall outside the video; pair what we have and
             # leave the rest as None to be filtered below.
-            for i, img in zip(miss_indices, decoded):
-                out[i] = img
-                key = (record.episode_index, target, round(float(timestamps[i]), 6))
-                if len(self._cache) >= self.cache_size:
-                    self._cache.pop(next(iter(self._cache)))
-                self._cache[key] = img
+            with self._lock:
+                for i, img in zip(miss_indices, decoded, strict=False):
+                    out[i] = img
+                    key = (record.episode_index, target, round(float(timestamps[i]), 6))
+                    if len(self._cache) >= self.cache_size:
+                        self._cache.pop(next(iter(self._cache)))
+                    self._cache[key] = img
         # filter out any None left over from decode failures
         return [img for img in out if img is not None]
 
-    def _decode(
-        self, episode_index: int, timestamps: list[float], camera_key: str
+    def video_for_episode(
+        self,
+        record: EpisodeRecord,
+        max_frames: int,
+        camera_key: str | None = None,
     ) -> list[Any]:
+        """Return up to ``max_frames`` images uniformly sampled across the episode.
+
+        The whole episode duration is covered; the model picks subtask
+        boundaries from the temporal pooling it does internally.
+        """
+        target = camera_key if camera_key is not None else self.camera_key
+        if max_frames <= 0 or target is None or not record.frame_timestamps:
+            return []
+        n_frames = min(max_frames, len(record.frame_timestamps))
+        if n_frames == len(record.frame_timestamps):
+            timestamps = list(record.frame_timestamps)
+        else:
+            t0 = record.frame_timestamps[0]
+            t_last = record.frame_timestamps[-1]
+            if t_last <= t0:
+                timestamps = [float(t0)] * n_frames
+            else:
+                step = (t_last - t0) / (n_frames - 1) if n_frames > 1 else 0.0
+                timestamps = [float(t0 + i * step) for i in range(n_frames)]
+        return self.frames_at(record, timestamps, camera_key=target)
+
+    def episode_clip_path(self, record: EpisodeRecord, cache_dir: Path) -> Path | None:
+        """Extract the episode's subclip to ``cache_dir/ep_{idx:06d}.mp4``.
+
+        Returns ``None`` if the dataset has no video tracks. Skips
+        re-extract when the cached clip already exists. Re-encodes to
+        H.264 (libx264) so the resulting mp4 is decodable by every
+        downstream video processor — stream-copy would inherit the
+        source codec (often AV1 in modern LeRobot datasets), which
+        vllm's libav build cannot decode.
+        """
+        import subprocess  # noqa: PLC0415
+
+        if self.camera_key is None:
+            return None
+        cache_dir.mkdir(parents=True, exist_ok=True)
+        out_path = cache_dir / f"ep_{record.episode_index:06d}.mp4"
+        if out_path.exists() and out_path.stat().st_size > 0:
+            return out_path
+        ep = self._meta.episodes[record.episode_index]
+        from_timestamp = float(ep[f"videos/{self.camera_key}/from_timestamp"])
+        to_timestamp = float(ep[f"videos/{self.camera_key}/to_timestamp"])
+        src = self.root / self._meta.get_video_file_path(record.episode_index, self.camera_key)
+        cmd = [
+            "ffmpeg",
+            "-y",
+            "-loglevel",
+            "error",
+            "-ss",
+            f"{from_timestamp:.3f}",
+            "-to",
+            f"{to_timestamp:.3f}",
+            "-i",
+            str(src),
+            "-c:v",
+            "libx264",
+            "-preset",
+            "ultrafast",
+            "-crf",
+            "23",
+            "-pix_fmt",
+            "yuv420p",
+            "-an",
+            str(out_path),
+        ]
+        try:
+            subprocess.run(cmd, check=True, timeout=300)
+        except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
+            return None
+        return out_path if out_path.exists() and out_path.stat().st_size > 0 else None
+
+    def _decode(self, episode_index: int, timestamps: list[float], camera_key: str) -> list[Any]:
         ep = self._meta.episodes[episode_index]
         from_timestamp = ep[f"videos/{camera_key}/from_timestamp"]
         shifted = [from_timestamp + ts for ts in timestamps]
@@ -197,25 +279,25 @@ class VideoFrameProvider:
             # Module-3-no-op (every prompt skipped because frames_at returned
             # []) is debuggable from the job log instead of post-hoc parquet
             # inspection. Subsequent failures stay quiet.
-            if not getattr(self, "_warned_decode_fail", False):
+            with self._lock:
+                already_warned = getattr(self, "_warned_decode_fail", False)
+                if not already_warned:
+                    self._warned_decode_fail = True
+            if not already_warned:
                 import logging  # noqa: PLC0415
 
                 logging.getLogger(__name__).warning(
-                    "VideoFrameProvider._decode failed for episode=%s camera=%s "
-                    "video_path=%s: %s",
+                    "VideoFrameProvider._decode failed for episode=%s camera=%s video_path=%s: %s",
                     episode_index,
                     camera_key,
                     video_path,
                     exc,
                     exc_info=True,
                 )
-                self._warned_decode_fail = True
             return []
 
 
-def _decode_pyav_direct(
-    video_path: Any, timestamps: list[float], tolerance_s: float
-) -> list[Any]:
+def _decode_pyav_direct(video_path: Any, timestamps: list[float], tolerance_s: float) -> list[Any]:
     """Decode the requested timestamps from ``video_path`` using PyAV directly.
 
     Bypasses ``lerobot.datasets.video_utils.decode_video_frames`` entirely
@@ -231,7 +313,6 @@ def _decode_pyav_direct(
     the previous behaviour); callers filter ``None``/missing entries.
     """
     import av  # noqa: PLC0415
-    from PIL import Image  # noqa: PLC0415
 
     if not timestamps:
         return []
@@ -243,10 +324,7 @@ def _decode_pyav_direct(
     try:
         stream = container.streams.video[0]
         # PyAV needs the seek target in stream timebase ticks.
-        if stream.time_base is None:
-            seek_pts = 0
-        else:
-            seek_pts = int(seek_to / float(stream.time_base))
+        seek_pts = 0 if stream.time_base is None else int(seek_to / float(stream.time_base))
         try:
             container.seek(seek_pts, any_frame=False, backward=True, stream=stream)
         except av.AVError:
@@ -276,33 +354,6 @@ def _decode_pyav_direct(
 
     return [results[ts] for ts in timestamps if ts in results]
 
-    def video_for_episode(
-        self,
-        record: EpisodeRecord,
-        max_frames: int,
-        camera_key: str | None = None,
-    ) -> list[Any]:
-        """Return up to ``max_frames`` images uniformly sampled across the episode.
-
-        The whole episode duration is covered; the model picks subtask
-        boundaries from the temporal pooling it does internally.
-        """
-        target = camera_key if camera_key is not None else self.camera_key
-        if max_frames <= 0 or target is None or not record.frame_timestamps:
-            return []
-        n_frames = min(max_frames, len(record.frame_timestamps))
-        if n_frames == len(record.frame_timestamps):
-            timestamps = list(record.frame_timestamps)
-        else:
-            t0 = record.frame_timestamps[0]
-            t_last = record.frame_timestamps[-1]
-            if t_last <= t0:
-                timestamps = [float(t0)] * n_frames
-            else:
-                step = (t_last - t0) / (n_frames - 1) if n_frames > 1 else 0.0
-                timestamps = [float(t0 + i * step) for i in range(n_frames)]
-        return self.frames_at(record, timestamps, camera_key=target)
-
 
 def make_frame_provider(root: Path, camera_key: str | None = None) -> FrameProvider:
     """Build a :class:`VideoFrameProvider` if videos are present, else null."""
@@ -341,60 +392,3 @@ def to_video_url_block(url: str | None, fps: float = 2.0) -> list[dict[str, Any]
     if not url:
         return []
     return [{"type": "video_url", "video_url": {"url": url}, "fps": fps}]
-
-
-def episode_clip_path(
-    record: EpisodeRecord,
-    provider: "VideoFrameProvider",
-    cache_dir: Path,
-) -> Path | None:
-    """Extract the episode's subclip to ``cache_dir/ep_{idx:06d}.mp4``.
-
-    Returns ``None`` if the dataset has no video tracks. Skips re-extract
-    when the cached clip already exists. Re-encodes to H.264
-    (libx264) so the resulting mp4 is decodable by every downstream
-    video processor — stream-copy would inherit the source codec
-    (often AV1 in modern LeRobot datasets), which vllm's libav build
-    cannot decode.
-    """
-    import subprocess  # noqa: PLC0415
-
-    if provider.camera_key is None:
-        return None
-    cache_dir.mkdir(parents=True, exist_ok=True)
-    out_path = cache_dir / f"ep_{record.episode_index:06d}.mp4"
-    if out_path.exists() and out_path.stat().st_size > 0:
-        return out_path
-    ep = provider._meta.episodes[record.episode_index]
-    from_timestamp = float(ep[f"videos/{provider.camera_key}/from_timestamp"])
-    to_timestamp = float(ep[f"videos/{provider.camera_key}/to_timestamp"])
-    src = provider.root / provider._meta.get_video_file_path(
-        record.episode_index, provider.camera_key
-    )
-    cmd = [
-        "ffmpeg",
-        "-y",
-        "-loglevel",
-        "error",
-        "-ss",
-        f"{from_timestamp:.3f}",
-        "-to",
-        f"{to_timestamp:.3f}",
-        "-i",
-        str(src),
-        "-c:v",
-        "libx264",
-        "-preset",
-        "ultrafast",
-        "-crf",
-        "23",
-        "-pix_fmt",
-        "yuv420p",
-        "-an",
-        str(out_path),
-    ]
-    try:
-        subprocess.run(cmd, check=True, timeout=300)
-    except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
-        return None
-    return out_path if out_path.exists() and out_path.stat().st_size > 0 else None
diff --git a/src/lerobot/annotations/steerable_pipeline/modules/interjections_and_speech.py b/src/lerobot/annotations/steerable_pipeline/modules/interjections_and_speech.py
index f434d9b9e..b2ce298ab 100644
--- a/src/lerobot/annotations/steerable_pipeline/modules/interjections_and_speech.py
+++ b/src/lerobot/annotations/steerable_pipeline/modules/interjections_and_speech.py
@@ -40,18 +40,12 @@ from typing import Any
 from ..config import Module2Config
 from ..frames import FrameProvider, null_provider, to_image_blocks
 from ..prompts import load as load_prompt
-from ..reader import EpisodeRecord
+from ..reader import EpisodeRecord, snap_to_frame
 from ..staging import EpisodeStaging
 from ..vlm_client import VlmClient
 from ..writer import speech_atom
 
 
-def _snap_to_frame(t: float, frame_timestamps: Sequence[float]) -> float:
-    if not frame_timestamps:
-        return float(t)
-    return float(min(frame_timestamps, key=lambda f: abs(f - t)))
-
-
 @dataclass
 class InterjectionsAndSpeechModule:
     """Generate task-start speech and mid-episode interjection/speech pairs."""
@@ -161,7 +155,7 @@ class InterjectionsAndSpeechModule:
 
         out: list[dict[str, Any]] = []
         for t, prev_subtask, next_subtask in chosen:
-            t_snap = _snap_to_frame(t, record.frame_timestamps)
+            t_snap = snap_to_frame(t, record.frame_timestamps)
             # Window straddles the boundary so the VLM sees the end of the
             # previous subtask and the start of the next one — same
             # conditioning the policy will see at training time.
@@ -197,9 +191,7 @@ class InterjectionsAndSpeechModule:
             out.append(speech_atom(t_snap, speech_text.strip()))
         return out
 
-    def _window_timestamps(
-        self, t_anchor: float, frame_timestamps: Sequence[float]
-    ) -> list[float]:
+    def _window_timestamps(self, t_anchor: float, frame_timestamps: Sequence[float]) -> list[float]:
         """Return a small set of frame timestamps centered on ``t_anchor``.
 
         The window straddles the subtask boundary the interjection sits
@@ -224,7 +216,7 @@ class InterjectionsAndSpeechModule:
         seen: set[float] = set()
         for tgt in targets:
             clamped = min(last_ts, max(0.0, tgt))
-            t = _snap_to_frame(clamped, frame_timestamps)
+            t = snap_to_frame(clamped, frame_timestamps)
             if t not in seen:
                 seen.add(t)
                 snapped.append(t)
diff --git a/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py b/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py
index cb9290e5a..dc683b952 100644
--- a/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py
+++ b/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py
@@ -26,25 +26,16 @@ from ..config import Module1Config
 from ..frames import (
     FrameProvider,
     VideoFrameProvider,
-    episode_clip_path,
     null_provider,
     to_video_block,
     to_video_url_block,
 )
 from ..prompts import load as load_prompt
-from ..reader import EpisodeRecord
+from ..reader import EpisodeRecord, snap_to_frame
 from ..staging import EpisodeStaging
 from ..vlm_client import VlmClient
 
 
-def _snap_to_frame(t: float, frame_timestamps: Sequence[float]) -> float:
-    """Snap an arbitrary float to the nearest exact source frame timestamp."""
-    if not frame_timestamps:
-        return float(t)
-    nearest = min(frame_timestamps, key=lambda f: abs(f - t))
-    return float(nearest)
-
-
 @dataclass
 class PlanSubtasksMemoryModule:
     """Generate subtask spans, plan, and memory rows.
@@ -109,7 +100,7 @@ class PlanSubtasksMemoryModule:
                     "role": "assistant",
                     "content": span["text"],
                     "style": "subtask",
-                    "timestamp": _snap_to_frame(span["start"], record.frame_timestamps),
+                    "timestamp": snap_to_frame(span["start"], record.frame_timestamps),
                     "tool_calls": None,
                 }
             )
@@ -132,7 +123,7 @@ class PlanSubtasksMemoryModule:
             remaining = [s["text"] for s in subtask_spans[i:]]
             mem_text = self._generate_memory(record, prior_memory, completed, remaining, task=effective_task)
             if mem_text:
-                ts = _snap_to_frame(span["start"], record.frame_timestamps)
+                ts = snap_to_frame(span["start"], record.frame_timestamps)
                 rows.append(
                     {
                         "role": "assistant",
@@ -239,7 +230,7 @@ class PlanSubtasksMemoryModule:
             return []
         if self.config.use_video_url and isinstance(self.frame_provider, VideoFrameProvider):
             cache_dir = Path(self.frame_provider.root) / ".annotate_staging" / ".video_clips"
-            clip = episode_clip_path(record, self.frame_provider, cache_dir)
+            clip = self.frame_provider.episode_clip_path(record, cache_dir)
             return (
                 to_video_url_block(f"file://{clip}", fps=self.config.use_video_url_fps)
                 if clip is not None
@@ -278,7 +269,7 @@ class PlanSubtasksMemoryModule:
             else [str(t) if t else None for t in interjection_texts]
         )
         for raw_t, inter_text in zip(interjection_times, texts, strict=True):
-            t = _snap_to_frame(raw_t, record.frame_timestamps)
+            t = snap_to_frame(raw_t, record.frame_timestamps)
             if t in already_planned:
                 continue
             already_planned.add(t)
diff --git a/src/lerobot/annotations/steerable_pipeline/reader.py b/src/lerobot/annotations/steerable_pipeline/reader.py
index 87cfcb591..3c9a7d3ba 100644
--- a/src/lerobot/annotations/steerable_pipeline/reader.py
+++ b/src/lerobot/annotations/steerable_pipeline/reader.py
@@ -31,8 +31,8 @@ rows into memory at once.
 
 from __future__ import annotations
 
-from collections.abc import Iterator
-from dataclasses import dataclass
+from collections.abc import Iterator, Sequence
+from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
 
@@ -53,14 +53,34 @@ class EpisodeRecord:
     row_offset: int  # row offset within the parquet file where this episode starts
     row_count: int  # number of rows for this episode
 
-    def frames_df(self):  # type: ignore[no-untyped-def]
-        """Lazy-load the pandas slice for this episode."""
-        import pandas as pd  # noqa: PLC0415  - deferred for optional dataset extra
+    # Memoized parquet slice — populated on first ``frames_df()`` call so
+    # repeat queries from different modules don't re-read the whole shard.
+    _frames_df_cache: Any = field(default=None, init=False, repr=False, compare=False)
 
-        table = pq.read_table(self.data_path)
-        df: pd.DataFrame = table.to_pandas()
-        slice_ = df.iloc[self.row_offset : self.row_offset + self.row_count].reset_index(drop=True)
-        return slice_
+    def frames_df(self):  # type: ignore[no-untyped-def]
+        """Lazy-load the pandas slice for this episode (memoized)."""
+        if self._frames_df_cache is None:
+            import pandas as pd  # noqa: PLC0415  - deferred for optional dataset extra
+
+            table = pq.read_table(self.data_path)
+            df: pd.DataFrame = table.to_pandas()
+            self._frames_df_cache = df.iloc[self.row_offset : self.row_offset + self.row_count].reset_index(
+                drop=True
+            )
+        return self._frames_df_cache
+
+
+def snap_to_frame(t: float, frame_timestamps: Sequence[float]) -> float:
+    """Snap an arbitrary float to the nearest exact source frame timestamp.
+
+    Modules use this when emitting event-style rows so the row's
+    timestamp matches a real parquet frame (event rows must land on an
+    exact frame, see PR 1's "exact event matching" rule).
+    """
+    if not frame_timestamps:
+        return float(t)
+    nearest = min(frame_timestamps, key=lambda f: abs(f - t))
+    return float(nearest)
 
 
 def _load_tasks_lookup(root: Path) -> dict[int, str]:
diff --git a/src/lerobot/annotations/steerable_pipeline/vlm_client.py b/src/lerobot/annotations/steerable_pipeline/vlm_client.py
index fd18110d4..85030db07 100644
--- a/src/lerobot/annotations/steerable_pipeline/vlm_client.py
+++ b/src/lerobot/annotations/steerable_pipeline/vlm_client.py
@@ -116,7 +116,9 @@ def _extract_first_json_object(text: str) -> str | None:
         if ch == "\\":
             escape = True
             continue
-        if ch == '"' and not escape:
+        # Note: ``escape`` is always False here — the ``if escape`` branch
+        # above already handled and reset it.
+        if ch == '"':
             in_string = not in_string
             continue
         if in_string:
@@ -247,9 +249,8 @@ def _make_transformers_client(config: VlmConfig) -> VlmClient:
         from transformers import AutoProcessor  # type: ignore[import-not-found]
     except ImportError as exc:
         raise ImportError("transformers + torch are required for backend='transformers'.") from exc
-    auto_cls = (
-        getattr(transformers, "AutoModelForImageTextToText", None)
-        or getattr(transformers, "AutoModelForVision2Seq", None)
+    auto_cls = getattr(transformers, "AutoModelForImageTextToText", None) or getattr(
+        transformers, "AutoModelForVision2Seq", None
     )
     if auto_cls is None:
         raise ImportError(
@@ -257,9 +258,7 @@ def _make_transformers_client(config: VlmConfig) -> VlmClient:
             "transformers version. Install transformers>=4.45 (which has AutoModelForImageTextToText) "
             "for VL models."
         )
-    processor = AutoProcessor.from_pretrained(
-        config.model_id, trust_remote_code=config.trust_remote_code
-    )
+    processor = AutoProcessor.from_pretrained(config.model_id, trust_remote_code=config.trust_remote_code)
     import os as _os  # noqa: PLC0415
 
     use_accelerate = _os.environ.get("LEROBOT_TRANSFORMERS_DEVICE_MAP", "manual") != "manual"
@@ -327,8 +326,7 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
         from openai import OpenAI  # type: ignore[import-not-found]
     except ImportError as exc:
         raise ImportError(
-            "openai package is required for backend='openai'. "
-            "Install with `pip install openai`."
+            "openai package is required for backend='openai'. Install with `pip install openai`."
         ) from exc
 
     api_base = config.api_base
@@ -357,22 +355,17 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
             print(f"[lerobot-annotate] server ready at {api_base}", flush=True)
 
     clients = [OpenAI(base_url=base, api_key=api_key) for base in api_bases]
-    client = clients[0]
     # round-robin counter for parallel mode
     rr_counter = {"i": 0}
 
     # ``mm_processor_kwargs`` is a vllm-specific extra; transformers serve
     # rejects it with HTTP 422. Send it only when explicitly opted in via
     # an env var (e.g. ``LEROBOT_OPENAI_SEND_MM_KWARGS=1`` for vllm).
-    send_mm_kwargs = os.environ.get(
-        "LEROBOT_OPENAI_SEND_MM_KWARGS", ""
-    ).lower() in {"1", "true", "yes"}
+    send_mm_kwargs = os.environ.get("LEROBOT_OPENAI_SEND_MM_KWARGS", "").lower() in {"1", "true", "yes"}
 
     rr_lock = threading.Lock()
 
-    def _one_call(
-        messages: Sequence[dict[str, Any]], max_tok: int, temp: float
-    ) -> str:
+    def _one_call(messages: Sequence[dict[str, Any]], max_tok: int, temp: float) -> str:
         api_messages, mm_kwargs = _to_openai_messages(messages)
         kwargs: dict[str, Any] = {
             "model": config.model_id,
@@ -393,9 +386,7 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
         response = chosen.chat.completions.create(**kwargs)
         return response.choices[0].message.content or ""
 
-    def _gen(
-        batch: Sequence[Sequence[dict[str, Any]]], max_tok: int, temp: float
-    ) -> list[str]:
+    def _gen(batch: Sequence[Sequence[dict[str, Any]]], max_tok: int, temp: float) -> list[str]:
         if len(batch) <= 1 or config.client_concurrency <= 1:
             return [_one_call(messages, max_tok, temp) for messages in batch]
         # Parallel fan-out — vllm batches these on the server side.
@@ -403,9 +394,7 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
 
         max_workers = min(config.client_concurrency, len(batch))
         with ThreadPoolExecutor(max_workers=max_workers) as pool:
-            futures = [
-                pool.submit(_one_call, messages, max_tok, temp) for messages in batch
-            ]
+            futures = [pool.submit(_one_call, messages, max_tok, temp) for messages in batch]
             return [f.result() for f in futures]
 
     return _GenericTextClient(_gen, config)
@@ -462,11 +451,7 @@ def _spawn_parallel_inference_servers(config: VlmConfig) -> list[str]:
         gpu = i % num_gpus
         env = _os.environ.copy()
         env["CUDA_VISIBLE_DEVICES"] = str(gpu)
-        cmd = base_cmd
-        if "{port}" in cmd:
-            cmd = cmd.replace("{port}", str(port))
-        else:
-            cmd = f"{cmd} --port {port}"
+        cmd = base_cmd.replace("{port}", str(port)) if "{port}" in base_cmd else f"{base_cmd} --port {port}"
         api_base = f"http://localhost:{port}/v1"
         api_bases.append(api_base)
         print(f"[server-{i}] launching on GPU {gpu} port {port}: {cmd}", flush=True)
@@ -530,9 +515,7 @@ def _spawn_parallel_inference_servers(config: VlmConfig) -> list[str]:
                 )
         time.sleep(2)
     if any(not ev.is_set() for ev in ready_events):
-        raise RuntimeError(
-            f"[server] not all replicas became ready within {config.serve_ready_timeout_s}s"
-        )
+        raise RuntimeError(f"[server] not all replicas became ready within {config.serve_ready_timeout_s}s")
     print(f"[lerobot-annotate] all {n} servers ready: {api_bases}", flush=True)
     return api_bases
 
@@ -542,8 +525,12 @@ def _server_is_up(api_base: str) -> bool:
     import urllib.request  # noqa: PLC0415
 
     url = api_base.rstrip("/") + "/models"
+    # ``api_base`` is the user-configured local-server URL we just spawned
+    # or the user passed in via ``--vlm.api_base``; the bandit B310 warning
+    # is for arbitrary user-controlled URLs with file:/ schemes which
+    # cannot reach this code path.
     try:
-        with urllib.request.urlopen(url, timeout=2) as resp:
+        with urllib.request.urlopen(url, timeout=2) as resp:  # noqa: S310  # nosec B310
             return resp.status == 200
     except Exception:  # noqa: BLE001
         return False
@@ -566,7 +553,6 @@ def _spawn_inference_server(config: VlmConfig) -> str:
     import sys  # noqa: PLC0415
     import threading  # noqa: PLC0415
     import time  # noqa: PLC0415
-    import urllib.request  # noqa: PLC0415
 
     cmd = config.serve_command
     if not cmd:
@@ -657,9 +643,7 @@ def _spawn_inference_server(config: VlmConfig) -> str:
         if ready_event.wait(timeout=2):
             return api_base
     proc.terminate()
-    raise RuntimeError(
-        f"[server] did not become ready within {config.serve_ready_timeout_s}s"
-    )
+    raise RuntimeError(f"[server] did not become ready within {config.serve_ready_timeout_s}s")
 
 
 def _to_openai_messages(
@@ -693,9 +677,7 @@ def _to_openai_messages(
             elif block_type == "video":
                 frames = block.get("video", [])
                 for img in frames:
-                    out_blocks.append(
-                        {"type": "image_url", "image_url": {"url": _pil_to_data_url(img)}}
-                    )
+                    out_blocks.append({"type": "image_url", "image_url": {"url": _pil_to_data_url(img)}})
             elif block_type == "video_url":
                 video_url = dict(block["video_url"])
                 url = video_url.get("url", "")
diff --git a/src/lerobot/annotations/steerable_pipeline/writer.py b/src/lerobot/annotations/steerable_pipeline/writer.py
index 85c5aff3f..248a08639 100644
--- a/src/lerobot/annotations/steerable_pipeline/writer.py
+++ b/src/lerobot/annotations/steerable_pipeline/writer.py
@@ -264,7 +264,13 @@ class LanguageColumnsWriter:
         new_table = self._materialize_table(
             table, per_row_persistent, per_row_events, drop_old=self.drop_existing_subtask_index
         )
-        pq.write_table(new_table, path)
+        # Atomic replace: write to a sibling tmp path and rename so a crash
+        # mid-write can't leave a half-written shard that ``pq.read_table``
+        # would then fail to open. ``Path.replace`` is atomic on POSIX +
+        # Windows when source and target sit on the same filesystem.
+        tmp_path = path.with_suffix(path.suffix + ".tmp")
+        pq.write_table(new_table, tmp_path)
+        tmp_path.replace(path)
 
     def _materialize_table(
         self,
diff --git a/tests/annotations/test_frames.py b/tests/annotations/test_frames.py
new file mode 100644
index 000000000..af2833c23
--- /dev/null
+++ b/tests/annotations/test_frames.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+
+# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Unit tests for :class:`VideoFrameProvider` method bindings.
+
+These were prompted by a real regression: ``video_for_episode`` was
+indented one level too deep so it ended up nested *inside* the
+``_decode_pyav_direct`` helper (after that function's ``return``
+statement) — silently dead code that meant production runs with
+``use_video_url=False`` would ``AttributeError`` on
+``self.frame_provider.video_for_episode(...)``. The existing module
+tests didn't catch it because they exercise stub providers.
+
+The tests below assert on the class itself (not on an instance), so a
+future reindent regression flips them to red without needing a real
+LeRobot dataset on disk.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])")
+
+from lerobot.annotations.steerable_pipeline.frames import (  # noqa: E402
+    VideoFrameProvider,
+)
+
+
+def test_video_for_episode_is_a_method_of_videoframeprovider():
+    """``video_for_episode`` must be a bound method, not nested dead code."""
+    assert callable(getattr(VideoFrameProvider, "video_for_episode", None))
+
+
+def test_episode_clip_path_is_a_method_of_videoframeprovider():
+    """``episode_clip_path`` is now a method (was a free function reaching
+    into ``provider._meta`` from outside the class)."""
+    assert callable(getattr(VideoFrameProvider, "episode_clip_path", None))
+
+
+def test_videoframeprovider_has_a_lock_for_concurrent_use():
+    """A ``ThreadPoolExecutor`` runs Module 1/2/3 phases concurrently;
+    the cache + warn-flag accesses must be guarded.
+    """
+    import threading
+
+    # Fresh-instance check via a minimal fake to avoid touching the hub.
+    # The lock is declared with ``init=False`` and has a default factory,
+    # so a constructed instance must own a real ``threading.Lock``.
+    lock_field = next(
+        (f for f in VideoFrameProvider.__dataclass_fields__.values() if f.name == "_lock"),
+        None,
+    )
+    assert lock_field is not None
+    assert lock_field.default_factory is threading.Lock
diff --git a/uv.lock b/uv.lock
index b15fd4bf0..a7668241e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2512,6 +2512,8 @@ version = "0.14.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/6e/c1/0cddc6eb17d4c53a99840953f95dd3accdc5cfc7a337b0e9b26476276be9/jiter-0.14.0.tar.gz", hash = "sha256:e8a39e66dac7153cf3f964a12aad515afa8d74938ec5cc0018adcdae5367c79e", size = 165725, upload-time = "2026-04-10T14:28:42.01Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/68/7390a418f10897da93b158f2d5a8bd0bcd73a0f9ec3bb36917085bb759ef/jiter-0.14.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:2fb2ce3a7bc331256dfb14cefc34832366bb28a9aca81deaf43bbf2a5659e607", size = 316295, upload-time = "2026-04-10T14:26:24.887Z" },
+    { url = "https://files.pythonhosted.org/packages/60/a0/5854ac00ff63551c52c6c89534ec6aba4b93474e7924d64e860b1c94165b/jiter-0.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5252a7ca23785cef5d02d4ece6077a1b556a410c591b379f82091c3001e14844", size = 315898, upload-time = "2026-04-10T14:26:26.601Z" },
     { url = "https://files.pythonhosted.org/packages/41/a1/4f44832650a16b18e8391f1bf1d6ca4909bc738351826bcc198bba4357f4/jiter-0.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c409578cbd77c338975670ada777add4efd53379667edf0aceea730cabede6fb", size = 343730, upload-time = "2026-04-10T14:26:28.326Z" },
     { url = "https://files.pythonhosted.org/packages/48/64/a329e9d469f86307203594b1707e11ae51c3348d03bfd514a5f997870012/jiter-0.14.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7ede4331a1899d604463369c730dbb961ffdc5312bc7f16c41c2896415b1304a", size = 370102, upload-time = "2026-04-10T14:26:30.089Z" },
     { url = "https://files.pythonhosted.org/packages/94/c1/5e3dfc59635aa4d4c7bd20a820ac1d09b8ed851568356802cf1c08edb3cf/jiter-0.14.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:92cd8b6025981a041f5310430310b55b25ca593972c16407af8837d3d7d2ca01", size = 461335, upload-time = "2026-04-10T14:26:31.911Z" },
@@ -2521,6 +2523,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2e/48/76750835b87029342727c1a268bea8878ab988caf81ee4e7b880900eeb5a/jiter-0.14.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7d9d51eb96c82a9652933bd769fe6de66877d6eb2b2440e281f2938c51b5643e", size = 393172, upload-time = "2026-04-10T14:26:38.097Z" },
     { url = "https://files.pythonhosted.org/packages/a6/60/456c4e81d5c8045279aefe60e9e483be08793828800a4e64add8fdde7f2a/jiter-0.14.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d824ca4148b705970bf4e120924a212fdfca9859a73e42bd7889a63a4ea6bb98", size = 520300, upload-time = "2026-04-10T14:26:39.532Z" },
     { url = "https://files.pythonhosted.org/packages/a8/9f/2020e0984c235f678dced38fe4eec3058cf528e6af36ebf969b410305941/jiter-0.14.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ff3a6465b3a0f54b1a430f45c3c0ba7d61ceb45cbc3e33f9e1a7f638d690baf3", size = 553059, upload-time = "2026-04-10T14:26:40.991Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/32/e2d298e1a22a4bbe6062136d1c7192db7dba003a6975e51d9a9eecabc4c2/jiter-0.14.0-cp312-cp312-win32.whl", hash = "sha256:5dec7c0a3e98d2a3f8a2e67382d0d7c3ac60c69103a4b271da889b4e8bb1e129", size = 206030, upload-time = "2026-04-10T14:26:42.517Z" },
+    { url = "https://files.pythonhosted.org/packages/36/ac/96369141b3d8a4a8e4590e983085efe1c436f35c0cda940dd76d942e3e40/jiter-0.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:fc7e37b4b8bc7e80a63ad6cfa5fc11fab27dbfea4cc4ae644b1ab3f273dc348f", size = 201603, upload-time = "2026-04-10T14:26:44.328Z" },
+    { url = "https://files.pythonhosted.org/packages/01/c3/75d847f264647017d7e3052bbcc8b1e24b95fa139c320c5f5066fa7a0bdd/jiter-0.14.0-cp312-cp312-win_arm64.whl", hash = "sha256:ee4a72f12847ef29b072aee9ad5474041ab2924106bdca9fcf5d7d965853e057", size = 191525, upload-time = "2026-04-10T14:26:46Z" },
+    { url = "https://files.pythonhosted.org/packages/97/2a/09f70020898507a89279659a1afe3364d57fc1b2c89949081975d135f6f5/jiter-0.14.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:af72f204cf4d44258e5b4c1745130ac45ddab0e71a06333b01de660ab4187a94", size = 315502, upload-time = "2026-04-10T14:26:47.697Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/be/080c96a45cd74f9fce5db4fd68510b88087fb37ffe2541ff73c12db92535/jiter-0.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4b77da71f6e819be5fbcec11a453fde5b1d0267ef6ed487e2a392fd8e14e4e3a", size = 314870, upload-time = "2026-04-10T14:26:49.149Z" },
     { url = "https://files.pythonhosted.org/packages/7d/5e/2d0fee155826a968a832cc32438de5e2a193292c8721ca70d0b53e58245b/jiter-0.14.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f4ea612fe8b84b8b04e51d0e78029ecf3466348e25973f953de6e6a59aa4c1", size = 343406, upload-time = "2026-04-10T14:26:50.762Z" },
     { url = "https://files.pythonhosted.org/packages/70/af/bf9ee0d3a4f8dc0d679fc1337f874fe60cdbf841ebbb304b374e1c9aaceb/jiter-0.14.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:62fe2451f8fcc0240261e6a4df18ecbcd58327857e61e625b2393ea3b468aac9", size = 369415, upload-time = "2026-04-10T14:26:52.188Z" },
     { url = "https://files.pythonhosted.org/packages/0f/83/8e8561eadba31f4d3948a5b712fb0447ec71c3560b57a855449e7b8ddc98/jiter-0.14.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6112f26f5afc75bcb475787d29da3aa92f9d09c7858f632f4be6ffe607be82e9", size = 461456, upload-time = "2026-04-10T14:26:53.611Z" },
@@ -2530,8 +2537,16 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b0/3b/cf71506d270e5f84d97326bf220e47aed9b95e9a4a060758fb07772170ab/jiter-0.14.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ab18d11074485438695f8d34a1b6da61db9754248f96d51341956607a8f39985", size = 392564, upload-time = "2026-04-10T14:27:00.018Z" },
     { url = "https://files.pythonhosted.org/packages/b0/cc/8c6c74a3efb5bd671bfd14f51e8a73375464ca914b1551bc3b40e26ac2c9/jiter-0.14.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:801028dcfc26ac0895e4964cbc0fd62c73be9fd4a7d7b1aaf6e5790033a719b7", size = 520322, upload-time = "2026-04-10T14:27:01.664Z" },
     { url = "https://files.pythonhosted.org/packages/41/24/68d7b883ec959884ddf00d019b2e0e82ba81b167e1253684fa90519ce33c/jiter-0.14.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ad425b087aafb4a1c7e1e98a279200743b9aaf30c3e0ba723aec93f061bd9bc8", size = 552619, upload-time = "2026-04-10T14:27:03.316Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/89/b1a0985223bbf3150ff9e8f46f98fc9360c1de94f48abe271bbe1b465682/jiter-0.14.0-cp313-cp313-win32.whl", hash = "sha256:882bcb9b334318e233950b8be366fe5f92c86b66a7e449e76975dfd6d776a01f", size = 205699, upload-time = "2026-04-10T14:27:04.662Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/19/3f339a5a7f14a11730e67f6be34f9d5105751d547b615ef593fa122a5ded/jiter-0.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:9b8c571a5dba09b98bd3462b5a53f27209a5cbbe85670391692ede71974e979f", size = 201323, upload-time = "2026-04-10T14:27:06.139Z" },
+    { url = "https://files.pythonhosted.org/packages/50/56/752dd89c84be0e022a8ea3720bcfa0a8431db79a962578544812ce061739/jiter-0.14.0-cp313-cp313-win_arm64.whl", hash = "sha256:34f19dcc35cb1abe7c369b3756babf8c7f04595c0807a848df8f26ef8298ef92", size = 191099, upload-time = "2026-04-10T14:27:07.564Z" },
+    { url = "https://files.pythonhosted.org/packages/91/28/292916f354f25a1fe8cf2c918d1415c699a4a659ae00be0430e1c5d9ffea/jiter-0.14.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e89bcd7d426a75bb4952c696b267075790d854a07aad4c9894551a82c5b574ab", size = 320880, upload-time = "2026-04-10T14:27:09.326Z" },
     { url = "https://files.pythonhosted.org/packages/ad/c7/b002a7d8b8957ac3d469bd59c18ef4b1595a5216ae0de639a287b9816023/jiter-0.14.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b25beaa0d4447ea8c7ae0c18c688905d34840d7d0b937f2f7bdd52162c98a40", size = 346563, upload-time = "2026-04-10T14:27:11.287Z" },
     { url = "https://files.pythonhosted.org/packages/f9/3b/f8d07580d8706021d255a6356b8fab13ee4c869412995550ce6ed4ddf97d/jiter-0.14.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:651a8758dd413c51e3b7f6557cdc6921faf70b14106f45f969f091f5cda990ea", size = 357928, upload-time = "2026-04-10T14:27:12.729Z" },
+    { url = "https://files.pythonhosted.org/packages/47/5b/ac1a974da29e35507230383110ffec59998b290a8732585d04e19a9eb5ba/jiter-0.14.0-cp313-cp313t-win_amd64.whl", hash = "sha256:e1a7eead856a5038a8d291f1447176ab0b525c77a279a058121b5fccee257f6f", size = 203519, upload-time = "2026-04-10T14:27:14.125Z" },
+    { url = "https://files.pythonhosted.org/packages/96/6d/9fc8433d667d2454271378a79747d8c76c10b51b482b454e6190e511f244/jiter-0.14.0-cp313-cp313t-win_arm64.whl", hash = "sha256:2e692633a12cda97e352fdcd1c4acc971b1c28707e1e33aeef782b0cbf051975", size = 190113, upload-time = "2026-04-10T14:27:16.638Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/1e/354ed92461b165bd581f9ef5150971a572c873ec3b68a916d5aa91da3cc2/jiter-0.14.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:6f396837fc7577871ca8c12edaf239ed9ccef3bbe39904ae9b8b63ce0a48b140", size = 315277, upload-time = "2026-04-10T14:27:18.109Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/95/8c7c7028aa8636ac21b7a55faef3e34215e6ed0cbf5ae58258427f621aa3/jiter-0.14.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a4d50ea3d8ba4176f79754333bd35f1bbcd28e91adc13eb9b7ca91bc52a6cef9", size = 315923, upload-time = "2026-04-10T14:27:19.603Z" },
     { url = "https://files.pythonhosted.org/packages/47/40/e2a852a44c4a089f2681a16611b7ce113224a80fd8504c46d78491b47220/jiter-0.14.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce17f8a050447d1b4153bda4fb7d26e6a9e74eb4f4a41913f30934c5075bf615", size = 344943, upload-time = "2026-04-10T14:27:21.262Z" },
     { url = "https://files.pythonhosted.org/packages/fc/1f/670f92adee1e9895eac41e8a4d623b6da68c4d46249d8b556b60b63f949e/jiter-0.14.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f4f1c4b125e1652aefbc2e2c1617b60a160ab789d180e3d423c41439e5f32850", size = 369725, upload-time = "2026-04-10T14:27:22.766Z" },
     { url = "https://files.pythonhosted.org/packages/01/2f/541c9ba567d05de1c4874a0f8f8c5e3fd78e2b874266623da9a775cf46e0/jiter-0.14.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be808176a6a3a14321d18c603f2d40741858a7c4fc982f83232842689fe86dd9", size = 461210, upload-time = "2026-04-10T14:27:24.315Z" },
@@ -2541,6 +2556,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ad/d2/8b1461def6b96ba44530df20d07ef7a1c7da22f3f9bf1727e2d611077bf1/jiter-0.14.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cff5708f7ed0fa098f2b53446c6fa74c48469118e5cd7497b4f1cd569ab06928", size = 394512, upload-time = "2026-04-10T14:27:31.344Z" },
     { url = "https://files.pythonhosted.org/packages/e3/88/837566dd6ed6e452e8d3205355afd484ce44b2533edfa4ed73a298ea893e/jiter-0.14.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:2492e5f06c36a976d25c7cc347a60e26d5470178d44cde1b9b75e60b4e519f28", size = 521120, upload-time = "2026-04-10T14:27:33.299Z" },
     { url = "https://files.pythonhosted.org/packages/89/6b/b00b45c4d1b4c031777fe161d620b755b5b02cdade1e316dcb46e4471d63/jiter-0.14.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:7609cfbe3a03d37bfdbf5052012d5a879e72b83168a363deae7b3a26564d57de", size = 553668, upload-time = "2026-04-10T14:27:34.868Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/d8/6fe5b42011d19397433d345716eac16728ac241862a2aac9c91923c7509a/jiter-0.14.0-cp314-cp314-win32.whl", hash = "sha256:7282342d32e357543565286b6450378c3cd402eea333fc1ebe146f1fabb306fc", size = 207001, upload-time = "2026-04-10T14:27:36.455Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/43/5c2e08da1efad5e410f0eaaabeadd954812612c33fbbd8fd5328b489139d/jiter-0.14.0-cp314-cp314-win_amd64.whl", hash = "sha256:bd77945f38866a448e73b0b7637366afa814d4617790ecd88a18ca74377e6c02", size = 202187, upload-time = "2026-04-10T14:27:38Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/1f/6e39ac0b4cdfa23e606af5b245df5f9adaa76f35e0c5096790da430ca506/jiter-0.14.0-cp314-cp314-win_arm64.whl", hash = "sha256:f2d4c61da0821ee42e0cdf5489da60a6d074306313a377c2b35af464955a3611", size = 192257, upload-time = "2026-04-10T14:27:39.504Z" },
+    { url = "https://files.pythonhosted.org/packages/05/57/7dbc0ffbbb5176a27e3518716608aa464aee2e2887dc938f0b900a120449/jiter-0.14.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1bf7ff85517dd2f20a5750081d2b75083c1b269cf75afc7511bdf1f9548beb3b", size = 323441, upload-time = "2026-04-10T14:27:41.039Z" },
     { url = "https://files.pythonhosted.org/packages/83/6e/7b3314398d8983f06b557aa21b670511ec72d3b79a68ee5e4d9bff972286/jiter-0.14.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c8ef8791c3e78d6c6b157c6d360fbb5c715bebb8113bc6a9303c5caff012754a", size = 348109, upload-time = "2026-04-10T14:27:42.552Z" },
     { url = "https://files.pythonhosted.org/packages/ae/4f/8dc674bcd7db6dba566de73c08c763c337058baff1dbeb34567045b27cdc/jiter-0.14.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e74663b8b10da1fe0f4e4703fd7980d24ad17174b6bb35d8498d6e3ebce2ae6a", size = 368328, upload-time = "2026-04-10T14:27:44.574Z" },
     { url = "https://files.pythonhosted.org/packages/3b/5f/188e09a1f20906f98bbdec44ed820e19f4e8eb8aff88b9d1a5a497587ff3/jiter-0.14.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1aca29ba52913f78362ec9c2da62f22cdc4c3083313403f90c15460979b84d9b", size = 463301, upload-time = "2026-04-10T14:27:46.717Z" },
@@ -2550,6 +2569,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c3/0f/7bea65ea2a6d91f2bf989ff11a18136644392bf2b0497a1fa50934c30a9c/jiter-0.14.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:260bf7ca20704d58d41f669e5e9fe7fe2fa72901a6b324e79056f5d52e9c9be2", size = 393926, upload-time = "2026-04-10T14:27:53.368Z" },
     { url = "https://files.pythonhosted.org/packages/3c/a1/b1ff7d70deef61ac0b7c6c2f12d2ace950cdeecb4fdc94500a0926802857/jiter-0.14.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:37826e3df29e60f30a382f9294348d0238ef127f4b5d7f5f8da78b5b9e050560", size = 521052, upload-time = "2026-04-10T14:27:55.058Z" },
     { url = "https://files.pythonhosted.org/packages/0b/7b/3b0649983cbaf15eda26a414b5b1982e910c67bd6f7b1b490f3cfc76896a/jiter-0.14.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:645be49c46f2900937ba0eaf871ad5183c96858c0af74b6becc7f4e367e36e06", size = 553716, upload-time = "2026-04-10T14:27:57.269Z" },
+    { url = "https://files.pythonhosted.org/packages/97/f8/33d78c83bd93ae0c0af05293a6660f88a1977caef39a6d72a84afab94ce0/jiter-0.14.0-cp314-cp314t-win32.whl", hash = "sha256:2f7877ed45118de283786178eceaf877110abacd04fde31efff3940ae9672674", size = 207957, upload-time = "2026-04-10T14:27:59.285Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/ac/2b760516c03e2227826d1f7025d89bf6bf6357a28fe75c2a2800873c50bf/jiter-0.14.0-cp314-cp314t-win_amd64.whl", hash = "sha256:14c0cb10337c49f5eafe8e7364daca5e29a020ea03580b8f8e6c597fed4e1588", size = 204690, upload-time = "2026-04-10T14:28:00.962Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/2e/a44c20c58aeed0355f2d326969a181696aeb551a25195f47563908a815be/jiter-0.14.0-cp314-cp314t-win_arm64.whl", hash = "sha256:5419d4aa2024961da9fe12a9cfe7484996735dca99e8e090b5c88595ef1951ff", size = 191338, upload-time = "2026-04-10T14:28:02.853Z" },
+    { url = "https://files.pythonhosted.org/packages/21/42/9042c3f3019de4adcb8c16591c325ec7255beea9fcd33a42a43f3b0b1000/jiter-0.14.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:fbd9e482663ca9d005d051330e4d2d8150bb208a209409c10f7e7dfdf7c49da9", size = 308810, upload-time = "2026-04-10T14:28:34.673Z" },
+    { url = "https://files.pythonhosted.org/packages/60/cf/a7e19b308bd86bb04776803b1f01a5f9a287a4c55205f4708827ee487fbf/jiter-0.14.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:33a20d838b91ef376b3a56896d5b04e725c7df5bc4864cc6569cf046a8d73b6d", size = 308443, upload-time = "2026-04-10T14:28:36.658Z" },
     { url = "https://files.pythonhosted.org/packages/ca/44/e26ede3f0caeff93f222559cb0cc4ca68579f07d009d7b6010c5b586f9b1/jiter-0.14.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:432c4db5255d86a259efde91e55cb4c8d18c0521d844c9e2e7efcce3899fb016", size = 343039, upload-time = "2026-04-10T14:28:38.356Z" },
     { url = "https://files.pythonhosted.org/packages/da/e9/1f9ada30cef7b05e74bb06f52127e7a724976c225f46adb65c37b1dadfb6/jiter-0.14.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67f00d94b281174144d6532a04b66a12cb866cbdc47c3af3bfe2973677f9861a", size = 349613, upload-time = "2026-04-10T14:28:40.066Z" },
 ]
@@ -3114,6 +3138,7 @@ annotations = [
     { name = "av" },
     { name = "datasets" },
     { name = "jsonlines" },
+    { name = "openai" },
     { name = "pandas" },
     { name = "pyarrow" },
     { name = "torchcodec", marker = "(platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l' and sys_platform == 'linux') or (platform_machine != 'x86_64' and sys_platform == 'darwin') or (sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32')" },
@@ -3544,6 +3569,7 @@ requires-dist = [
     { name = "numpy", specifier = ">=2.0.0,<2.3.0" },
     { name = "onnx", marker = "extra == 'unitree-g1'", specifier = ">=1.16.0,<2.0.0" },
     { name = "onnxruntime", marker = "extra == 'unitree-g1'", specifier = ">=1.16.0,<2.0.0" },
+    { name = "openai", marker = "extra == 'annotations'", specifier = ">=1.40,<2.0" },
     { name = "opencv-python-headless", specifier = ">=4.9.0,<4.14.0" },
     { name = "packaging", specifier = ">=24.2,<26.0" },
     { name = "pandas", marker = "extra == 'dataset'", specifier = ">=2.0.0,<3.0.0" },
@@ -4723,21 +4749,21 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "2.36.0"
+version = "1.109.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "anyio", marker = "sys_platform == 'linux'" },
-    { name = "distro", marker = "sys_platform == 'linux'" },
-    { name = "httpx", marker = "sys_platform == 'linux'" },
-    { name = "jiter", marker = "sys_platform == 'linux'" },
-    { name = "pydantic", marker = "sys_platform == 'linux'" },
-    { name = "sniffio", marker = "sys_platform == 'linux'" },
-    { name = "tqdm", marker = "sys_platform == 'linux'" },
-    { name = "typing-extensions", marker = "sys_platform == 'linux'" },
+    { name = "anyio" },
+    { name = "distro" },
+    { name = "httpx" },
+    { name = "jiter" },
+    { name = "pydantic" },
+    { name = "sniffio" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f4/a1/4d5e84cf51720fc1526cc49e10ac1961abcccb55b0efb3d970db1e9a2728/openai-2.36.0.tar.gz", hash = "sha256:139dea0edd2f1b30c33d46ae1a6929e03906254140318e4608e98fe8c566f2e7", size = 753003, upload-time = "2026-05-07T17:33:17.075Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c6/a1/a303104dc55fc546a3f6914c842d3da471c64eec92043aef8f652eb6c524/openai-1.109.1.tar.gz", hash = "sha256:d173ed8dbca665892a6db099b4a2dfac624f94d20a93f46eb0b56aae940ed869", size = 564133, upload-time = "2025-09-24T13:00:53.075Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9d/1c/5d43735b2553baae2a5e899dcbcd0670a86930d993184d72ca909bf11c9b/openai-2.36.0-py3-none-any.whl", hash = "sha256:143f6194b548dbc2c921af1f1b03b9f14c85fed8a75b5b516f5bcc11a2a50c63", size = 1302361, upload-time = "2026-05-07T17:33:15.063Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/2a/7dd3d207ec669cacc1f186fd856a0f61dbc255d24f6fdc1a6715d6051b0f/openai-1.109.1-py3-none-any.whl", hash = "sha256:6bcaf57086cf59159b8e27447e4e7dd019db5d29a438072fbd49c290c7e65315", size = 948627, upload-time = "2025-09-24T13:00:50.754Z" },
 ]
 
 [[package]]