From 7c10c4fcdde2259201e1ede227f00ebad3c3fd59 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Tue, 28 Apr 2026 22:15:25 +0200 Subject: [PATCH] fix(annotate): robust JSON extraction (think tags + first balanced object) Models often wrap JSON in prose or ... blocks. Strip the think tags first, then try direct json.loads, then fall back to scanning for the first balanced {...} substring (ignoring braces inside strings). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../steerable_pipeline/vlm_client.py | 48 ++++++++++++++++++- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/src/lerobot/annotations/steerable_pipeline/vlm_client.py b/src/lerobot/annotations/steerable_pipeline/vlm_client.py index 6b8056c05..14955d9f6 100644 --- a/src/lerobot/annotations/steerable_pipeline/vlm_client.py +++ b/src/lerobot/annotations/steerable_pipeline/vlm_client.py @@ -76,13 +76,57 @@ class StubVlmClient: def _strip_to_json(text: str) -> Any: text = text.strip() + # Strip ... blocks (Qwen3 Thinking style) + while "" in text and "" in text: + start = text.find("") + end = text.find("", start) + len("") + text = (text[:start] + text[end:]).strip() + # Strip ```json ... ``` fences from chat-tuned backbones if text.startswith("```"): - # tolerate ```json ... ``` fences from chat-tuned backbones first = text.find("\n") last = text.rfind("```") if first != -1 and last != -1 and last > first: text = text[first + 1 : last].strip() - return json.loads(text) + try: + return json.loads(text) + except (ValueError, json.JSONDecodeError): + pass + # Fall back to extracting the first balanced {...} block. + obj_text = _extract_first_json_object(text) + if obj_text is None: + raise json.JSONDecodeError("No JSON object found", text, 0) + return json.loads(obj_text) + + +def _extract_first_json_object(text: str) -> str | None: + """Return the first balanced ``{...}`` substring, ignoring braces in + string literals. Returns ``None`` if no balanced block is found.""" + start = text.find("{") + if start < 0: + return None + depth = 0 + in_string = False + escape = False + for i in range(start, len(text)): + ch = text[i] + if escape: + escape = False + continue + if ch == "\\": + escape = True + continue + if ch == '"' and not escape: + in_string = not in_string + continue + if in_string: + continue + if ch == "{": + depth += 1 + elif ch == "}": + depth -= 1 + if depth == 0: + return text[start : i + 1] + return None @dataclass