fix(annotate): omit mm_processor_kwargs by default; transformers serve rejects it

transformers serve returns HTTP 422 'Unexpected fields' when
mm_processor_kwargs is in extra_body — that field is vllm-specific.
Drop it by default; opt in via LEROBOT_OPENAI_SEND_MM_KWARGS=1 when
talking to vllm serve.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pepijn
2026-04-28 19:11:58 +02:00
parent 8807e0b41e
commit 3916f17c4a
@@ -33,6 +33,7 @@ The client speaks one method, :meth:`VlmClient.generate_json`, which:
from __future__ import annotations from __future__ import annotations
import json import json
import os
from collections.abc import Callable, Sequence from collections.abc import Callable, Sequence
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Protocol from typing import Any, Protocol
@@ -291,25 +292,30 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
client = OpenAI(base_url=api_base, api_key=config.api_key) client = OpenAI(base_url=api_base, api_key=config.api_key)
# ``mm_processor_kwargs`` is a vllm-specific extra; transformers serve
# rejects it with HTTP 422. Send it only when explicitly opted in via
# an env var (e.g. ``LEROBOT_OPENAI_SEND_MM_KWARGS=1`` for vllm).
send_mm_kwargs = os.environ.get(
"LEROBOT_OPENAI_SEND_MM_KWARGS", ""
).lower() in {"1", "true", "yes"}
def _gen( def _gen(
batch: Sequence[Sequence[dict[str, Any]]], max_tok: int, temp: float batch: Sequence[Sequence[dict[str, Any]]], max_tok: int, temp: float
) -> list[str]: ) -> list[str]:
outs: list[str] = [] outs: list[str] = []
for messages in batch: for messages in batch:
api_messages, mm_kwargs = _to_openai_messages(messages) api_messages, mm_kwargs = _to_openai_messages(messages)
extra_body: dict[str, Any] = {} kwargs: dict[str, Any] = {
if mm_kwargs: "model": config.model_id,
extra_body["mm_processor_kwargs"] = { "messages": api_messages,
**mm_kwargs, "max_tokens": max_tok,
"do_sample_frames": True, "temperature": temp,
}
if send_mm_kwargs and mm_kwargs:
kwargs["extra_body"] = {
"mm_processor_kwargs": {**mm_kwargs, "do_sample_frames": True}
} }
response = client.chat.completions.create( response = client.chat.completions.create(**kwargs)
model=config.model_id,
messages=api_messages,
max_tokens=max_tok,
temperature=temp,
extra_body=extra_body or None,
)
outs.append(response.choices[0].message.content or "") outs.append(response.choices[0].message.content or "")
return outs return outs