From 89f15e173198b1e9f599be417742c05b38144de8 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Tue, 28 Apr 2026 16:14:31 +0200
Subject: [PATCH] fix(annotate): drop guided_decoding=dict (api differs across
 vllm)

vllm 0.10.2 expects guided_decoding to be a GuidedDecodingParams object,
not a dict. Different vllm versions differ here. The parser already has
a one-retry JSON-recovery path, so drop guided decoding entirely for
portability.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/lerobot/annotations/steerable_pipeline/vlm_client.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/lerobot/annotations/steerable_pipeline/vlm_client.py b/src/lerobot/annotations/steerable_pipeline/vlm_client.py
index 4f9df78a5..a7828c65d 100644
--- a/src/lerobot/annotations/steerable_pipeline/vlm_client.py
+++ b/src/lerobot/annotations/steerable_pipeline/vlm_client.py
@@ -169,11 +169,10 @@ def _make_vllm_client(config: VlmConfig) -> VlmClient:
     llm = LLM(**llm_kwargs)
 
     def _gen(batch: Sequence[Sequence[dict[str, Any]]], max_tok: int, temp: float) -> list[str]:
-        params = SamplingParams(
-            max_tokens=max_tok,
-            temperature=temp,
-            guided_decoding={"json": {}} if config.json_mode else None,
-        )
+        # ``guided_decoding`` would speed up parsing but its API differs across
+        # vllm releases (dict vs GuidedDecodingParams). The _GenericTextClient
+        # wrapper already has a one-retry JSON-recovery path, so we skip it.
+        params = SamplingParams(max_tokens=max_tok, temperature=temp)
         prompts = [_messages_to_prompt(m) for m in batch]
         outputs = llm.generate(prompts, params)
         return [o.outputs[0].text for o in outputs]