diff --git a/src/lerobot/policies/pi052/text_processor_pi052.py b/src/lerobot/policies/pi052/text_processor_pi052.py index fddcba9df..c957cf590 100644 --- a/src/lerobot/policies/pi052/text_processor_pi052.py +++ b/src/lerobot/policies/pi052/text_processor_pi052.py @@ -297,8 +297,19 @@ class PI052TextTokenizerStep(ProcessorStep): ) if _DUMP_BUDGET > 0: + # Stream / target metadata live in parallel arrays; zip them + # back into the dicts so the dump shows them per message. + target_set = {int(i) for i in target_indices} + annotated_msgs = [ + { + **m, + "stream": message_streams[i] if i < len(message_streams) else None, + "target": True if i in target_set else None, + } + for i, m in enumerate(messages) + ] _dump_recipe_sample( - messages=messages, + messages=annotated_msgs, prompt_text=prompt, token_ids=input_ids.tolist(), labels=labels.tolist(), diff --git a/src/lerobot/policies/smolvla2/chat_processor_smolvla2.py b/src/lerobot/policies/smolvla2/chat_processor_smolvla2.py index 23a5e5730..36fc02dca 100644 --- a/src/lerobot/policies/smolvla2/chat_processor_smolvla2.py +++ b/src/lerobot/policies/smolvla2/chat_processor_smolvla2.py @@ -239,10 +239,35 @@ class SmolVLA2ChatTokenizerStep(ProcessorStep): # model actually sees. No-op unless ``LEROBOT_DUMP_RECIPE_SAMPLES`` # is set; stops globally after the budget is exhausted. if _DUMP_BUDGET > 0: - msgs_iter = messages if _is_batched_messages(messages) else [messages] - for msg, (ids, labels, predict_action) in zip(msgs_iter, encoded, strict=False): + # Stream / target metadata live in parallel arrays in + # COMPLEMENTARY_DATA, not on the message dicts themselves + # (the recipe renderer keeps them separate so the chat + # template doesn't choke on unknown keys). Zip them back + # together for the dumper so each printed message shows + # its actual stream + target flag. + if _is_batched_messages(messages): + msgs_iter = messages + streams_iter = comp.get("message_streams") or [[] for _ in messages] + targets_iter = comp.get("target_message_indices") or [[] for _ in messages] + else: + msgs_iter = [messages] + streams_iter = [list(comp.get("message_streams") or [])] + targets_iter = [list(comp.get("target_message_indices") or [])] + for msg, streams, targets, (ids, labels, predict_action) in zip( + msgs_iter, streams_iter, targets_iter, encoded, strict=False + ): + target_set = {int(i) for i in targets} + annotated_msgs = [] + for i, m in enumerate(msg): + annotated_msgs.append( + { + **m, + "stream": streams[i] if i < len(streams) else None, + "target": True if i in target_set else None, + } + ) _dump_recipe_sample( - messages=msg, + messages=annotated_msgs, token_ids=ids, labels=labels, predict_actions=predict_action,