From 27f7829b09c7549e3b1ec661e442729db90abf28 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Wed, 29 Apr 2026 15:00:23 +0200 Subject: [PATCH] feat(annotate): forward chat_template_kwargs to OpenAI extra_body Lets callers pass per-request template flags such as {"enable_thinking": false} for Qwen3.5/Qwen3.6 models, where the default thinking preamble otherwise consumes the entire max_new_tokens budget before any JSON is emitted. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/lerobot/annotations/steerable_pipeline/config.py | 7 +++++++ src/lerobot/annotations/steerable_pipeline/vlm_client.py | 9 ++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/lerobot/annotations/steerable_pipeline/config.py b/src/lerobot/annotations/steerable_pipeline/config.py index 81e1a6a13..297839d06 100644 --- a/src/lerobot/annotations/steerable_pipeline/config.py +++ b/src/lerobot/annotations/steerable_pipeline/config.py @@ -18,6 +18,7 @@ from __future__ import annotations from dataclasses import dataclass, field from pathlib import Path +from typing import Any @dataclass @@ -140,6 +141,12 @@ class VlmConfig: camera_key: str | None = None """Override the camera stream used for keyframe attachment. ``None`` picks the first ``observation.images.*`` key the dataset declares.""" + chat_template_kwargs: dict[str, Any] | None = None + """Forwarded as ``extra_body.chat_template_kwargs`` on every chat call. + Use this to pass model-specific template flags such as + ``{"enable_thinking": false}`` for Qwen3.5/Qwen3.6 to suppress the + reasoning preamble that otherwise eats the entire ``max_new_tokens`` + budget before any JSON is emitted.""" @dataclass diff --git a/src/lerobot/annotations/steerable_pipeline/vlm_client.py b/src/lerobot/annotations/steerable_pipeline/vlm_client.py index 1f1f83037..d2659321b 100644 --- a/src/lerobot/annotations/steerable_pipeline/vlm_client.py +++ b/src/lerobot/annotations/steerable_pipeline/vlm_client.py @@ -380,10 +380,13 @@ def _make_openai_client(config: VlmConfig) -> VlmClient: "max_tokens": max_tok, "temperature": temp, } + extra_body: dict[str, Any] = {} if send_mm_kwargs and mm_kwargs: - kwargs["extra_body"] = { - "mm_processor_kwargs": {**mm_kwargs, "do_sample_frames": True} - } + extra_body["mm_processor_kwargs"] = {**mm_kwargs, "do_sample_frames": True} + if config.chat_template_kwargs: + extra_body["chat_template_kwargs"] = config.chat_template_kwargs + if extra_body: + kwargs["extra_body"] = extra_body with rr_lock: chosen = clients[rr_counter["i"] % len(clients)] rr_counter["i"] += 1