From c3503b774fc5d031ea6e4c1c0359dc3e1907ec0b Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Wed, 13 May 2026 16:43:51 +0200
Subject: [PATCH] fix(debug): dumper now shows real stream + target flags

The dumper was printing ``stream=None target=None`` for every
message because it read those fields off the message dicts, but
the recipe renderer keeps them in parallel arrays
(``message_streams`` / ``target_message_indices`` in
COMPLEMENTARY_DATA) so the chat template doesn't see unknown
keys. Zip them back into the dump-time dicts so the printed
metadata is accurate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../policies/pi052/text_processor_pi052.py    | 13 +++++++-
 .../smolvla2/chat_processor_smolvla2.py       | 31 +++++++++++++++++--
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/src/lerobot/policies/pi052/text_processor_pi052.py b/src/lerobot/policies/pi052/text_processor_pi052.py
index fddcba9df..c957cf590 100644
--- a/src/lerobot/policies/pi052/text_processor_pi052.py
+++ b/src/lerobot/policies/pi052/text_processor_pi052.py
@@ -297,8 +297,19 @@ class PI052TextTokenizerStep(ProcessorStep):
         )
 
         if _DUMP_BUDGET > 0:
+            # Stream / target metadata live in parallel arrays; zip them
+            # back into the dicts so the dump shows them per message.
+            target_set = {int(i) for i in target_indices}
+            annotated_msgs = [
+                {
+                    **m,
+                    "stream": message_streams[i] if i < len(message_streams) else None,
+                    "target": True if i in target_set else None,
+                }
+                for i, m in enumerate(messages)
+            ]
             _dump_recipe_sample(
-                messages=messages,
+                messages=annotated_msgs,
                 prompt_text=prompt,
                 token_ids=input_ids.tolist(),
                 labels=labels.tolist(),
diff --git a/src/lerobot/policies/smolvla2/chat_processor_smolvla2.py b/src/lerobot/policies/smolvla2/chat_processor_smolvla2.py
index 23a5e5730..36fc02dca 100644
--- a/src/lerobot/policies/smolvla2/chat_processor_smolvla2.py
+++ b/src/lerobot/policies/smolvla2/chat_processor_smolvla2.py
@@ -239,10 +239,35 @@ class SmolVLA2ChatTokenizerStep(ProcessorStep):
         # model actually sees. No-op unless ``LEROBOT_DUMP_RECIPE_SAMPLES``
         # is set; stops globally after the budget is exhausted.
         if _DUMP_BUDGET > 0:
-            msgs_iter = messages if _is_batched_messages(messages) else [messages]
-            for msg, (ids, labels, predict_action) in zip(msgs_iter, encoded, strict=False):
+            # Stream / target metadata live in parallel arrays in
+            # COMPLEMENTARY_DATA, not on the message dicts themselves
+            # (the recipe renderer keeps them separate so the chat
+            # template doesn't choke on unknown keys). Zip them back
+            # together for the dumper so each printed message shows
+            # its actual stream + target flag.
+            if _is_batched_messages(messages):
+                msgs_iter = messages
+                streams_iter = comp.get("message_streams") or [[] for _ in messages]
+                targets_iter = comp.get("target_message_indices") or [[] for _ in messages]
+            else:
+                msgs_iter = [messages]
+                streams_iter = [list(comp.get("message_streams") or [])]
+                targets_iter = [list(comp.get("target_message_indices") or [])]
+            for msg, streams, targets, (ids, labels, predict_action) in zip(
+                msgs_iter, streams_iter, targets_iter, encoded, strict=False
+            ):
+                target_set = {int(i) for i in targets}
+                annotated_msgs = []
+                for i, m in enumerate(msg):
+                    annotated_msgs.append(
+                        {
+                            **m,
+                            "stream": streams[i] if i < len(streams) else None,
+                            "target": True if i in target_set else None,
+                        }
+                    )
                 _dump_recipe_sample(
-                    messages=msg,
+                    messages=annotated_msgs,
                     token_ids=ids,
                     labels=labels,
                     predict_actions=predict_action,