From 89f15e173198b1e9f599be417742c05b38144de8 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Tue, 28 Apr 2026 16:14:31 +0200 Subject: [PATCH] fix(annotate): drop guided_decoding=dict (api differs across vllm) vllm 0.10.2 expects guided_decoding to be a GuidedDecodingParams object, not a dict. Different vllm versions differ here. The parser already has a one-retry JSON-recovery path, so drop guided decoding entirely for portability. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/lerobot/annotations/steerable_pipeline/vlm_client.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/lerobot/annotations/steerable_pipeline/vlm_client.py b/src/lerobot/annotations/steerable_pipeline/vlm_client.py index 4f9df78a5..a7828c65d 100644 --- a/src/lerobot/annotations/steerable_pipeline/vlm_client.py +++ b/src/lerobot/annotations/steerable_pipeline/vlm_client.py @@ -169,11 +169,10 @@ def _make_vllm_client(config: VlmConfig) -> VlmClient: llm = LLM(**llm_kwargs) def _gen(batch: Sequence[Sequence[dict[str, Any]]], max_tok: int, temp: float) -> list[str]: - params = SamplingParams( - max_tokens=max_tok, - temperature=temp, - guided_decoding={"json": {}} if config.json_mode else None, - ) + # ``guided_decoding`` would speed up parsing but its API differs across + # vllm releases (dict vs GuidedDecodingParams). The _GenericTextClient + # wrapper already has a one-retry JSON-recovery path, so we skip it. + params = SamplingParams(max_tokens=max_tok, temperature=temp) prompts = [_messages_to_prompt(m) for m in batch] outputs = llm.generate(prompts, params) return [o.outputs[0].text for o in outputs]