From 7b4d281ef588e382126989a4add50da689d25d28 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Tue, 5 May 2026 11:27:12 +0200
Subject: [PATCH] fix(smolvla2): build preprocessor fresh, don't round-trip the
 recipe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

``PolicyProcessorPipeline.from_pretrained`` reconstructs each saved
step by passing the persisted JSON config back to ``__init__``, but
``RenderMessagesStep.recipe`` (a ``TrainingRecipe``) doesn't survive
the JSON round-trip — the saved entry is ``{}`` and the reconstructor
crashes with ``missing 1 required argument: 'recipe'``.

Bypass the round-trip in the runtime CLI by passing
``pretrained_path=None`` to ``make_pre_post_processors``. That re-runs
``make_smolvla2_pre_post_processors``, which reloads the recipe YAML
referenced by ``cfg.recipe_path`` and wires it back into the step
correctly. ``NormalizerProcessorStep`` still gets stats from
``ds_meta.stats`` so normalization matches training.

Proper fix is to make ``RenderMessagesStep`` serializable (e.g. by
persisting the recipe path / contents); this commit keeps it scoped to
the runtime path so dry-run testing isn't blocked.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/lerobot/scripts/lerobot_smolvla2_runtime.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/lerobot/scripts/lerobot_smolvla2_runtime.py b/src/lerobot/scripts/lerobot_smolvla2_runtime.py
index 941da8a49..f721c20d1 100644
--- a/src/lerobot/scripts/lerobot_smolvla2_runtime.py
+++ b/src/lerobot/scripts/lerobot_smolvla2_runtime.py
@@ -197,9 +197,22 @@ def _load_policy_and_preprocessor(
 
         ds_meta = LeRobotDatasetMetadata(dataset_repo_id)
         policy = make_policy(cfg, ds_meta=ds_meta)
+        # NOTE: we deliberately pass ``pretrained_path=None`` here even
+        # though the checkpoint ships a ``policy_preprocessor.json``.
+        # ``RenderMessagesStep`` carries a ``TrainingRecipe`` field that
+        # isn't faithfully serialized into that JSON, so the saved
+        # pipeline can't currently be round-tripped via
+        # ``PolicyProcessorPipeline.from_pretrained`` — it crashes with
+        # ``RenderMessagesStep.__init__() missing 1 required argument:
+        # 'recipe'``. Building fresh from ``cfg`` re-runs
+        # ``make_smolvla2_pre_post_processors``, which loads the recipe
+        # YAML referenced by ``cfg.recipe_path`` and wires it back into
+        # ``RenderMessagesStep`` correctly. Normalization stats come
+        # from ``ds_meta.stats`` (the same dataset the user is feeding
+        # into the runtime), so no quality loss in practice.
         preprocessor, _ = make_pre_post_processors(
             cfg,
-            pretrained_path=policy_path,
+            pretrained_path=None,
             dataset_stats=ds_meta.stats,
         )
     else: