From 7a7b8ac111fde0b2e09cfc7a3de9a95245368f28 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Tue, 28 Apr 2026 19:11:58 +0200
Subject: [PATCH] fix(annotate): omit mm_processor_kwargs by default;
 transformers serve rejects it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

transformers serve returns HTTP 422 'Unexpected fields' when
mm_processor_kwargs is in extra_body — that field is vllm-specific.
Drop it by default; opt in via LEROBOT_OPENAI_SEND_MM_KWARGS=1 when
talking to vllm serve.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../steerable_pipeline/vlm_client.py          | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/lerobot/annotations/steerable_pipeline/vlm_client.py b/src/lerobot/annotations/steerable_pipeline/vlm_client.py
index 61bb9e027..1c58363b2 100644
--- a/src/lerobot/annotations/steerable_pipeline/vlm_client.py
+++ b/src/lerobot/annotations/steerable_pipeline/vlm_client.py
@@ -33,6 +33,7 @@ The client speaks one method, :meth:`VlmClient.generate_json`, which:
 from __future__ import annotations
 
 import json
+import os
 from collections.abc import Callable, Sequence
 from dataclasses import dataclass
 from typing import Any, Protocol
@@ -291,25 +292,30 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
 
     client = OpenAI(base_url=api_base, api_key=config.api_key)
 
+    # ``mm_processor_kwargs`` is a vllm-specific extra; transformers serve
+    # rejects it with HTTP 422. Send it only when explicitly opted in via
+    # an env var (e.g. ``LEROBOT_OPENAI_SEND_MM_KWARGS=1`` for vllm).
+    send_mm_kwargs = os.environ.get(
+        "LEROBOT_OPENAI_SEND_MM_KWARGS", ""
+    ).lower() in {"1", "true", "yes"}
+
     def _gen(
         batch: Sequence[Sequence[dict[str, Any]]], max_tok: int, temp: float
     ) -> list[str]:
         outs: list[str] = []
         for messages in batch:
             api_messages, mm_kwargs = _to_openai_messages(messages)
-            extra_body: dict[str, Any] = {}
-            if mm_kwargs:
-                extra_body["mm_processor_kwargs"] = {
-                    **mm_kwargs,
-                    "do_sample_frames": True,
+            kwargs: dict[str, Any] = {
+                "model": config.model_id,
+                "messages": api_messages,
+                "max_tokens": max_tok,
+                "temperature": temp,
+            }
+            if send_mm_kwargs and mm_kwargs:
+                kwargs["extra_body"] = {
+                    "mm_processor_kwargs": {**mm_kwargs, "do_sample_frames": True}
                 }
-            response = client.chat.completions.create(
-                model=config.model_id,
-                messages=api_messages,
-                max_tokens=max_tok,
-                temperature=temp,
-                extra_body=extra_body or None,
-            )
+            response = client.chat.completions.create(**kwargs)
             outs.append(response.choices[0].message.content or "")
         return outs