From f1a0a663cc2960acf01b89b6d2d167608d4d2d04 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Fri, 15 May 2026 13:52:26 +0200 Subject: [PATCH] fix(inference): gibberish detector catches long repetition collapse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ``_looks_like_gibberish`` low-unique-token check was gated on ``len(stripped) < 80``, so an LM head that loops an n-gram for the whole 256-token budget — "the arm the arm … the the the the" — sailed straight through (``gibberish:0`` in the panel) and the garbage subtask got accepted and fed to the action expert. Added a length-independent check: ``>= 8 tokens`` but unique-token count ``<= max(3, tokens // 10)`` ⇒ repetition collapse. Now the runtime rejects the looped output and keeps the previous (real) subtask instead of propagating nonsense. This is a guard, not a cure — the underlying issue is the LM head on the current checkpoint being undertrained / collapsed; re- annotate with the short prompts and train longer. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/lerobot/policies/smolvla2/inference/steps.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/lerobot/policies/smolvla2/inference/steps.py b/src/lerobot/policies/smolvla2/inference/steps.py index c9b84b167..a36ae26b5 100644 --- a/src/lerobot/policies/smolvla2/inference/steps.py +++ b/src/lerobot/policies/smolvla2/inference/steps.py @@ -683,12 +683,19 @@ def _looks_like_gibberish(text: str) -> bool: for marker in ("Assistant", "User", "Ass "): if marker in cleaned and len(cleaned.split()) < 4: return True - # Too few unique alphabetic tokens — model stuck on ``the`` or - # similar memorised single-token continuations. tokens = [t for t in cleaned.split() if any(c.isalpha() for c in t)] unique_alpha = {t.lower() for t in tokens} + # Short degenerate output — model stuck on ``the`` or a couple of + # memorised single-token continuations. if len(unique_alpha) < 3 and len(stripped) < 80: return True + # Long repetition collapse — the LM head loops an n-gram for the + # whole generation budget ("the arm the arm … the the the the"). + # Length-independent: many tokens but a tiny unique ratio. The + # earlier ``< 80`` check missed these because the looped string + # blows well past 80 chars. + if len(tokens) >= 8 and len(unique_alpha) <= max(3, len(tokens) // 10): + return True return False