From b1e83f556c2eb85755eeb81a94f5830c05b22af5 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Mon, 25 May 2026 16:57:15 +0200
Subject: [PATCH] train: periodic wandb log of training examples (images + text
 + actions)

Adds an opt-in cadence for pushing rich training examples to W&B,
independent of the scalar log_freq. Off by default; turn on with
--wandb.log_examples_freq=5000 (one wandb.Table dump every 5k steps).

WandBConfig (configs/default.py):
  + log_examples_freq: int = 0       # 0 disables
  + log_examples_n: int = 4          # batch elements per dump
  + log_examples_predict_actions: bool = False
                                     # opt-in extra forward pass to
                                     # show predicted vs GT action chunk

WandBLogger.log_training_examples (common/wandb_utils.py):
  Builds one wandb.Table row per sampled batch element with:
    * one wandb.Image column per camera (auto handles CHW/HWC,
      uint8/float32 [0,1])
    * any text fields present in the batch (task / subtask /
      memory / instruction)
    * gt_action_first / gt_action_last (chunk endpoints)
    * pred_action_first / pred_action_last when --wandb.log_examples_
      predict_actions=true (policy.eval() + no_grad; restores train
      mode after)
  Defensive: per-camera failures don't poison the row; predict_action_
  chunk exceptions are logged and the predicted columns are dropped.

Training loop (scripts/lerobot_train.py):
  One new gated block right after the existing scalar log_step clause.
  Reads batch + dataset.meta.camera_keys, hands them to
  log_training_examples. Wrapped in try/except so a bad sample never
  kills the run.

Usage:
  lerobot-train ... \
    --wandb.enable=true --wandb.project=robocasa_composite_seen \
    --wandb.log_examples_freq=5000 \
    --wandb.log_examples_n=4 \
    --wandb.log_examples_predict_actions=true

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/lerobot/scripts/lerobot_train.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py
index 99afb2e89..e617c5cc4 100644
--- a/src/lerobot/scripts/lerobot_train.py
+++ b/src/lerobot/scripts/lerobot_train.py
@@ -750,6 +750,28 @@ def train(cfg: TrainPipelineConfig, accelerator: "Accelerator | None" = None):
                 wandb_logger.log_dict(wandb_log_dict, step)
             train_tracker.reset_averages()
 
+        # Periodic training-example dump to wandb (camera images + text
+        # fields + action endpoints). Opt-in via ``--wandb.log_examples_freq``;
+        # independent of ``--log_freq`` so you can keep scalar logs frequent
+        # and the heavier visual dump rare (e.g. every 5000 steps).
+        if (
+            wandb_logger is not None
+            and cfg.wandb.log_examples_freq > 0
+            and step % cfg.wandb.log_examples_freq == 0
+            and is_main_process
+        ):
+            try:
+                wandb_logger.log_training_examples(
+                    batch=batch,
+                    step=step,
+                    camera_keys=list(dataset.meta.camera_keys),
+                    n_samples=cfg.wandb.log_examples_n,
+                    policy=accelerator.unwrap_model(policy),
+                    predict_actions=cfg.wandb.log_examples_predict_actions,
+                )
+            except Exception as exc:  # noqa: BLE001
+                logging.warning("wandb log_training_examples failed: %s", exc)
+
         if cfg.save_checkpoint and is_saving_step:
             if is_main_process:
                 logging.info(f"Checkpoint policy after step {step}")