From 7a7b8ac111fde0b2e09cfc7a3de9a95245368f28 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Tue, 28 Apr 2026 19:11:58 +0200 Subject: [PATCH] fix(annotate): omit mm_processor_kwargs by default; transformers serve rejects it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit transformers serve returns HTTP 422 'Unexpected fields' when mm_processor_kwargs is in extra_body — that field is vllm-specific. Drop it by default; opt in via LEROBOT_OPENAI_SEND_MM_KWARGS=1 when talking to vllm serve. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../steerable_pipeline/vlm_client.py | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/lerobot/annotations/steerable_pipeline/vlm_client.py b/src/lerobot/annotations/steerable_pipeline/vlm_client.py index 61bb9e027..1c58363b2 100644 --- a/src/lerobot/annotations/steerable_pipeline/vlm_client.py +++ b/src/lerobot/annotations/steerable_pipeline/vlm_client.py @@ -33,6 +33,7 @@ The client speaks one method, :meth:`VlmClient.generate_json`, which: from __future__ import annotations import json +import os from collections.abc import Callable, Sequence from dataclasses import dataclass from typing import Any, Protocol @@ -291,25 +292,30 @@ def _make_openai_client(config: VlmConfig) -> VlmClient: client = OpenAI(base_url=api_base, api_key=config.api_key) + # ``mm_processor_kwargs`` is a vllm-specific extra; transformers serve + # rejects it with HTTP 422. Send it only when explicitly opted in via + # an env var (e.g. ``LEROBOT_OPENAI_SEND_MM_KWARGS=1`` for vllm). + send_mm_kwargs = os.environ.get( + "LEROBOT_OPENAI_SEND_MM_KWARGS", "" + ).lower() in {"1", "true", "yes"} + def _gen( batch: Sequence[Sequence[dict[str, Any]]], max_tok: int, temp: float ) -> list[str]: outs: list[str] = [] for messages in batch: api_messages, mm_kwargs = _to_openai_messages(messages) - extra_body: dict[str, Any] = {} - if mm_kwargs: - extra_body["mm_processor_kwargs"] = { - **mm_kwargs, - "do_sample_frames": True, + kwargs: dict[str, Any] = { + "model": config.model_id, + "messages": api_messages, + "max_tokens": max_tok, + "temperature": temp, + } + if send_mm_kwargs and mm_kwargs: + kwargs["extra_body"] = { + "mm_processor_kwargs": {**mm_kwargs, "do_sample_frames": True} } - response = client.chat.completions.create( - model=config.model_id, - messages=api_messages, - max_tokens=max_tok, - temperature=temp, - extra_body=extra_body or None, - ) + response = client.chat.completions.create(**kwargs) outs.append(response.choices[0].message.content or "") return outs