feat(annotate): forward chat_template_kwargs to OpenAI extra_body

Lets callers pass per-request template flags such as {"enable_thinking": false} for Qwen3.5/Qwen3.6 models, where the default thinking preamble otherwise consumes the entire max_new_tokens budget before any JSON is emitted. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-07-24 10:16:09 +00:00 · 2026-04-29 15:00:23 +02:00
parent 7f8bf108e8
commit 27f7829b09
2 changed files with 13 additions and 3 deletions
@@ -18,6 +18,7 @@ from __future__ import annotations
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
@dataclass
@@ -140,6 +141,12 @@ class VlmConfig:
    camera_key: str | None = None
    """Override the camera stream used for keyframe attachment. ``None`` picks
    the first ``observation.images.*`` key the dataset declares."""
    chat_template_kwargs: dict[str, Any] | None = None
    """Forwarded as ``extra_body.chat_template_kwargs`` on every chat call.
    Use this to pass model-specific template flags such as
    ``{"enable_thinking": false}`` for Qwen3.5/Qwen3.6 to suppress the
    reasoning preamble that otherwise eats the entire ``max_new_tokens``
    budget before any JSON is emitted."""
@dataclass
@@ -380,10 +380,13 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
            "max_tokens": max_tok,
            "temperature": temp,
        }
        extra_body: dict[str, Any] = {}
        if send_mm_kwargs and mm_kwargs:
-            kwargs["extra_body"] = {
+            extra_body["mm_processor_kwargs"] = {**mm_kwargs, "do_sample_frames": True}
-                "mm_processor_kwargs": {**mm_kwargs, "do_sample_frames": True}
+        if config.chat_template_kwargs:
-            }
+            extra_body["chat_template_kwargs"] = config.chat_template_kwargs
        if extra_body:
            kwargs["extra_body"] = extra_body
        with rr_lock:
            chosen = clients[rr_counter["i"] % len(clients)]
            rr_counter["i"] += 1