fix(annotate): omit mm_processor_kwargs by default; transformers serve rejects it

transformers serve returns HTTP 422 'Unexpected fields' when mm_processor_kwargs is in extra_body — that field is vllm-specific. Drop it by default; opt in via LEROBOT_OPENAI_SEND_MM_KWARGS=1 when talking to vllm serve. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 11:09:59 +00:00 · 2026-04-28 19:11:58 +02:00
parent 8807e0b41e
commit 3916f17c4a
1 changed files with 18 additions and 12 deletions
@@ -33,6 +33,7 @@ The client speaks one method, :meth:`VlmClient.generate_json`, which:
 from __future__ import annotations
 import json
 import os
 from collections.abc import Callable, Sequence
 from dataclasses import dataclass
 from typing import Any, Protocol
@@ -291,25 +292,30 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
    client = OpenAI(base_url=api_base, api_key=config.api_key)
    # ``mm_processor_kwargs`` is a vllm-specific extra; transformers serve
    # rejects it with HTTP 422. Send it only when explicitly opted in via
    # an env var (e.g. ``LEROBOT_OPENAI_SEND_MM_KWARGS=1`` for vllm).
    send_mm_kwargs = os.environ.get(
        "LEROBOT_OPENAI_SEND_MM_KWARGS", ""
    ).lower() in {"1", "true", "yes"}
    def _gen(
        batch: Sequence[Sequence[dict[str, Any]]], max_tok: int, temp: float
    ) -> list[str]:
        outs: list[str] = []
        for messages in batch:
            api_messages, mm_kwargs = _to_openai_messages(messages)
-            extra_body: dict[str, Any] = {}
+            kwargs: dict[str, Any] = {
-            if mm_kwargs:
+                "model": config.model_id,
-                extra_body["mm_processor_kwargs"] = {
+                "messages": api_messages,
-                    **mm_kwargs,
+                "max_tokens": max_tok,
-                    "do_sample_frames": True,
+                "temperature": temp,
            }
            if send_mm_kwargs and mm_kwargs:
                kwargs["extra_body"] = {
                    "mm_processor_kwargs": {**mm_kwargs, "do_sample_frames": True}
                }
-            response = client.chat.completions.create(
+            response = client.chat.completions.create(**kwargs)
                model=config.model_id,
                messages=api_messages,
                max_tokens=max_tok,
                temperature=temp,
                extra_body=extra_body or None,
            )
            outs.append(response.choices[0].message.content or "")
        return outs