feat(annotate): forward chat_template_kwargs to OpenAI extra_body

Lets callers pass per-request template flags such as
{"enable_thinking": false} for Qwen3.5/Qwen3.6 models, where the
default thinking preamble otherwise consumes the entire max_new_tokens
budget before any JSON is emitted.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pepijn
2026-04-29 15:00:23 +02:00
parent 824aac9ad8
commit bda829af2f
2 changed files with 13 additions and 3 deletions
@@ -18,6 +18,7 @@ from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
@dataclass
@@ -140,6 +141,12 @@ class VlmConfig:
camera_key: str | None = None
"""Override the camera stream used for keyframe attachment. ``None`` picks
the first ``observation.images.*`` key the dataset declares."""
chat_template_kwargs: dict[str, Any] | None = None
"""Forwarded as ``extra_body.chat_template_kwargs`` on every chat call.
Use this to pass model-specific template flags such as
``{"enable_thinking": false}`` for Qwen3.5/Qwen3.6 to suppress the
reasoning preamble that otherwise eats the entire ``max_new_tokens``
budget before any JSON is emitted."""
@dataclass
@@ -380,10 +380,13 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
"max_tokens": max_tok,
"temperature": temp,
}
extra_body: dict[str, Any] = {}
if send_mm_kwargs and mm_kwargs:
kwargs["extra_body"] = {
"mm_processor_kwargs": {**mm_kwargs, "do_sample_frames": True}
}
extra_body["mm_processor_kwargs"] = {**mm_kwargs, "do_sample_frames": True}
if config.chat_template_kwargs:
extra_body["chat_template_kwargs"] = config.chat_template_kwargs
if extra_body:
kwargs["extra_body"] = extra_body
with rr_lock:
chosen = clients[rr_counter["i"] % len(clients)]
rr_counter["i"] += 1