diff --git a/src/lerobot/annotations/steerable_pipeline/config.py b/src/lerobot/annotations/steerable_pipeline/config.py index 81e1a6a13..297839d06 100644 --- a/src/lerobot/annotations/steerable_pipeline/config.py +++ b/src/lerobot/annotations/steerable_pipeline/config.py @@ -18,6 +18,7 @@ from __future__ import annotations from dataclasses import dataclass, field from pathlib import Path +from typing import Any @dataclass @@ -140,6 +141,12 @@ class VlmConfig: camera_key: str | None = None """Override the camera stream used for keyframe attachment. ``None`` picks the first ``observation.images.*`` key the dataset declares.""" + chat_template_kwargs: dict[str, Any] | None = None + """Forwarded as ``extra_body.chat_template_kwargs`` on every chat call. + Use this to pass model-specific template flags such as + ``{"enable_thinking": false}`` for Qwen3.5/Qwen3.6 to suppress the + reasoning preamble that otherwise eats the entire ``max_new_tokens`` + budget before any JSON is emitted.""" @dataclass diff --git a/src/lerobot/annotations/steerable_pipeline/vlm_client.py b/src/lerobot/annotations/steerable_pipeline/vlm_client.py index 1f1f83037..d2659321b 100644 --- a/src/lerobot/annotations/steerable_pipeline/vlm_client.py +++ b/src/lerobot/annotations/steerable_pipeline/vlm_client.py @@ -380,10 +380,13 @@ def _make_openai_client(config: VlmConfig) -> VlmClient: "max_tokens": max_tok, "temperature": temp, } + extra_body: dict[str, Any] = {} if send_mm_kwargs and mm_kwargs: - kwargs["extra_body"] = { - "mm_processor_kwargs": {**mm_kwargs, "do_sample_frames": True} - } + extra_body["mm_processor_kwargs"] = {**mm_kwargs, "do_sample_frames": True} + if config.chat_template_kwargs: + extra_body["chat_template_kwargs"] = config.chat_template_kwargs + if extra_body: + kwargs["extra_body"] = extra_body with rr_lock: chosen = clients[rr_counter["i"] % len(clients)] rr_counter["i"] += 1