From db9118f16fc92d8ab8d7dc673df7a897b2c2ca0e Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Tue, 5 May 2026 14:07:25 +0200
Subject: [PATCH] fix(smolvla2): reject gibberish high-level generations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Memorised models can collapse to dominant-mode outputs (the
JSON-token salad ``":":":":...`` from VQA training) when the prompt
drifts even slightly from training distribution. Without a guard,
that gibberish lands in ``current_subtask`` / ``current_plan`` /
``current_memory``, which feeds the next tick's prompt and cascades
into worse outputs. The user observed exactly this: a clean run
followed by a tick that wrote ``" " "`` into plan and memory, then
slow recovery several ticks later.

Add ``_looks_like_gibberish`` heuristic (alpha density, repeating
chars, JSON-prefix sniff) and apply it before mutating state in
``HighLevelSubtaskFwd`` / ``MemoryUpdateFwd`` / ``UserInterjectionFwd``.
Bad generations are logged inline (``[info] subtask gen rejected
(gibberish): "":":":..."``) so the user can see what was dropped, but
the state stays at its last-known-good value (typically the dataset
bootstrap) instead of being polluted.

VQA path is intentionally exempt — its training targets *are*
JSON-shaped, so the heuristic would false-positive on them.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../policies/smolvla2/inference/steps.py      | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)
diff --git a/src/lerobot/policies/smolvla2/inference/steps.py b/src/lerobot/policies/smolvla2/inference/steps.py
index d30bc9dc9..3db87bdb7 100644
--- a/src/lerobot/policies/smolvla2/inference/steps.py
+++ b/src/lerobot/policies/smolvla2/inference/steps.py
@@ -271,6 +271,9 @@ class HighLevelSubtaskFwd(InferenceStep):
         msg = _generate_with_policy(
             self.policy, ctx, observation=observation, state=state, label="subtask gen"
         )
+        if msg and _looks_like_gibberish(msg):
+            push_log(state, f"  [info] subtask gen rejected (gibberish): {msg[:60]!r}")
+            return None
         if msg:
             changed = set_if_changed(state, "current_subtask", msg, label="subtask")
             if changed:
@@ -307,6 +310,9 @@ class MemoryUpdateFwd(InferenceStep):
         new_memory = _generate_with_policy(
             self.policy, ctx, observation=observation, state=state, label="memory gen"
         )
+        if new_memory and _looks_like_gibberish(new_memory):
+            push_log(state, f"  [info] memory gen rejected (gibberish): {new_memory[:60]!r}")
+            return None
         if new_memory:
             set_if_changed(state, "current_memory", new_memory, label="memory")
         return None
@@ -340,11 +346,16 @@ class UserInterjectionFwd(InferenceStep):
         if not out:
             push_log(state, "  [info] plan/say gen produced no text this tick")
             return None
+        if _looks_like_gibberish(out):
+            push_log(state, f"  [info] plan/say gen rejected (gibberish): {out[:60]!r}")
+            return None
         # Heuristic split: model is trained to emit one assistant turn
         # carrying both plan text AND a `say` tool call. Look for a
         # "<say>...</say>" or "say(...)" marker; fall back to whole
         # text → plan, no speech.
         plan_text, speech_text = _split_plan_and_say(out)
+        if plan_text and _looks_like_gibberish(plan_text):
+            plan_text = ""
         if plan_text:
             set_if_changed(state, "current_plan", plan_text, label="plan")
         if speech_text:
@@ -390,6 +401,9 @@ class AskVQAFwd(InferenceStep):
         answer = _generate_with_policy(
             self.policy, ctx, observation=observation, state=state, label="vqa gen"
         )
+        # VQA answers are intentionally JSON-like during training, so
+        # ``_looks_like_gibberish`` would false-positive on them. Keep
+        # the answer as-is — the VQA panel line lets the user judge.
         if answer:
             push_log(state, f"  vqa: {answer}")
         state["recent_vqa_query"] = None
@@ -432,6 +446,38 @@ class DispatchToolCalls(InferenceStep):
 # ---------------------------------------------------------------------------
 
 
+def _looks_like_gibberish(text: str) -> bool:
+    """Heuristically detect generation that's clearly off the rails.
+
+    Memorised models can collapse to dominant-mode outputs (often the
+    JSON-token salad ``":":":":...`` from VQA training) when the prompt
+    drifts even slightly from training distribution. If we accept those
+    as new state, they pollute the next tick's prompt and cascade into
+    worse outputs. Reject anything that looks pathological:
+
+    * empty / whitespace-only
+    * mostly punctuation (``"``, ``:``, ``,``)
+    * a single character repeated past the threshold
+    * starts with ``":"`` and contains no letters
+
+    The thresholds are intentionally lenient — a real subtask like
+    ``"close the gripper"`` has ~70%+ alpha characters, while gibberish
+    like ``":":":"`` has ~0%.
+    """
+    if not text or not text.strip():
+        return True
+    stripped = text.strip()
+    alpha = sum(1 for c in stripped if c.isalpha())
+    if alpha < max(3, len(stripped) // 8):
+        return True
+    if stripped.startswith('":') and stripped.count('"') > stripped.count(" "):
+        return True
+    # Single repeating char: e.g. ``""""""``
+    if len(set(stripped)) <= 2 and len(stripped) > 4:
+        return True
+    return False
+
+
 def _control_context_messages(
     state: dict[str, Any],
     *,