From 12cce8f2cc134fcb7e7e8e6d85770dc74a2c6a8b Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Wed, 13 May 2026 11:02:17 +0200
Subject: [PATCH] =?UTF-8?q?fix(smolvla2):=20align=20flow=5Floss=5Fweight?=
 =?UTF-8?q?=20default=20with=20Pi=200.5=20paper's=20=CE=B1=3D10?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pi 0.5 paper §IV.D Eq. (1) sets the loss balance to α=10 between text
CE and flow MSE: actions are the primary output and the flow head
should dominate the gradient signal. SmolVLA2 was defaulting both
weights to 1.0, which inverts that — text CE (~0.5-2.0 nats) ends up
larger than flow MSE (~0.1-1.0), so the action expert gets less
gradient than the LM head despite being the primary task.

Match the paper's split: text_loss_weight=1.0, flow_loss_weight=10.0.
Same as ``pi052`` (the new full reproduction policy).

Also pin the values explicitly in the SLURM launcher so the choice is
visible and overridable per-run rather than buried in the config
default.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 examples/training/smolvla2_hirobot.slurm          |  2 ++
 .../policies/smolvla2/configuration_smolvla2.py   | 15 +++++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/examples/training/smolvla2_hirobot.slurm b/examples/training/smolvla2_hirobot.slurm
index c1f950e8b..2a3eac1f8 100644
--- a/examples/training/smolvla2_hirobot.slurm
+++ b/examples/training/smolvla2_hirobot.slurm
@@ -63,6 +63,8 @@ accelerate launch --multi_gpu --num_processes="$NUM_PROCESSES" \
     --policy.compile_model=false \
     --policy.device=cuda \
     --policy.tokenizer_max_length=512 \
+    --policy.text_loss_weight=1.0 \
+    --policy.flow_loss_weight=10.0 \
     --steps="$STEPS" \
     --policy.scheduler_decay_steps="$STEPS" \
     --batch_size="$BATCH_SIZE" \
diff --git a/src/lerobot/policies/smolvla2/configuration_smolvla2.py b/src/lerobot/policies/smolvla2/configuration_smolvla2.py
index 99ce917e3..bc24139fd 100644
--- a/src/lerobot/policies/smolvla2/configuration_smolvla2.py
+++ b/src/lerobot/policies/smolvla2/configuration_smolvla2.py
@@ -69,12 +69,23 @@ class SmolVLA2Config(SmolVLAConfig):
     matches its training distribution."""
 
     # Loss weights --------------------------------------------------------
+    # Pi 0.5 paper §IV.D (Eq. 1) sets α = 10 between the text-CE term
+    # and the flow-MSE term: L = H(text) + α * ‖ω - a - f_θ‖². The
+    # rationale is that actions are the primary output and the flow
+    # head should dominate the gradient signal; text is supervised as
+    # an auxiliary task and its CE scale (~0.5-2.0 in nats) tends to
+    # be larger than the flow MSE scale (~0.1-1.0), so without
+    # up-weighting the action head gets starved. We mirror the paper's
+    # split here: text_loss_weight=1, flow_loss_weight=10.
     text_loss_weight: float = 1.0
     """Weight on the LM-head cross-entropy term. Set to ``0`` to disable
     text training entirely (reverts to flow-only / SmolVLA behaviour)."""
 
-    flow_loss_weight: float = 1.0
-    """Weight on the action-expert flow-matching term."""
+    flow_loss_weight: float = 10.0
+    """Weight on the action-expert flow-matching term. Default 10.0
+    matches Pi 0.5 paper's α (§IV.D). Set lower if the text head is
+    underfitting relative to the action expert; set higher if the
+    action expert is degrading because text loss dominates."""
 
     # Backbone training ---------------------------------------------------
     unfreeze_lm_head: bool = True