From a81e23b0e9db4d6040a8112cc657fe871b50b637 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Tue, 28 Apr 2026 09:44:15 +0200
Subject: [PATCH] fix(annotate): pass trust_remote_code=True to HF auto-classes

Required for many newer VL checkpoints (Qwen3.x FP8 in particular) that
ship custom loader code in their repo. Without it, the FP8
weight_scale_inv parameters never bind to FP8Linear modules and the
post-load dispatch path bad-allocs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/lerobot/annotations/steerable_pipeline/config.py     | 3 +++
 src/lerobot/annotations/steerable_pipeline/vlm_client.py | 7 ++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/lerobot/annotations/steerable_pipeline/config.py b/src/lerobot/annotations/steerable_pipeline/config.py
index 1d8c6c941..0ea8240a5 100644
--- a/src/lerobot/annotations/steerable_pipeline/config.py
+++ b/src/lerobot/annotations/steerable_pipeline/config.py
@@ -68,6 +68,9 @@ class VlmConfig:
     json_mode: bool = True
     batch_size: int = 4
     tensor_parallel_size: int = 1
+    trust_remote_code: bool = True
+    """Pass ``trust_remote_code`` to HF auto-classes. Required for many
+    newer VL checkpoints (Qwen3.x FP8, etc.) that ship custom loader code."""
     camera_key: str | None = None
     """Override the camera stream used for keyframe attachment. ``None`` picks
     the first ``observation.images.*`` key the dataset declares."""
diff --git a/src/lerobot/annotations/steerable_pipeline/vlm_client.py b/src/lerobot/annotations/steerable_pipeline/vlm_client.py
index cb7264446..67017976c 100644
--- a/src/lerobot/annotations/steerable_pipeline/vlm_client.py
+++ b/src/lerobot/annotations/steerable_pipeline/vlm_client.py
@@ -180,17 +180,22 @@ def _make_transformers_client(config: VlmConfig) -> VlmClient:
             "transformers version. Install transformers>=4.45 (which has AutoModelForImageTextToText) "
             "for VL models."
         )
-    processor = AutoProcessor.from_pretrained(config.model_id)
+    processor = AutoProcessor.from_pretrained(
+        config.model_id, trust_remote_code=config.trust_remote_code
+    )
     # ``low_cpu_mem_usage=True`` avoids a transformers-internal staging
     # buffer that has caused std::bad_alloc on Qwen3-line architectures
     # even on hosts with TBs of RAM (the failing alloc is in the
     # post-load tensor-placement path, not a real OOM).
     # ``device_map='auto'`` then streams shards directly to the GPU.
+    # ``trust_remote_code`` is required for many newer VL releases
+    # (Qwen3.6-FP8, etc.) that ship a custom loader in the repo.
     model = auto_cls.from_pretrained(
         config.model_id,
         torch_dtype="auto",
         device_map="auto",
         low_cpu_mem_usage=True,
+        trust_remote_code=config.trust_remote_code,
     )
     model.eval()