From 5e3b9ba82cccb62f915ad78f4b86ec86283b297c Mon Sep 17 00:00:00 2001 From: Pepijn Date: Mon, 18 May 2026 10:41:13 +0200 Subject: [PATCH] tune(smolvla2): override optimizer_lr to 2.5e-5 for pretrained-LM fine-tuning SmolVLA's 1e-4 is safe only because it freezes the language head. SmolVLA2 unfreezes lm_head + the last text layer and fine-tunes the pretrained SmolVLM2 language weights; 1e-4 is too aggressive there and destabilises generation into degenerate repetition. Match pi05's 2.5e-5 peak LR. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../policies/smolvla2/configuration_smolvla2.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/lerobot/policies/smolvla2/configuration_smolvla2.py b/src/lerobot/policies/smolvla2/configuration_smolvla2.py index 19923ac47..86231e6ff 100644 --- a/src/lerobot/policies/smolvla2/configuration_smolvla2.py +++ b/src/lerobot/policies/smolvla2/configuration_smolvla2.py @@ -91,6 +91,20 @@ class SmolVLA2Config(SmolVLAConfig): relative to the action expert; set higher if the action expert is degrading because text loss dominates.""" + # Optimizer ----------------------------------------------------------- + optimizer_lr: float = 2.5e-5 + """Peak learning rate. Overrides ``SmolVLAConfig``'s ``1e-4``. + + SmolVLA can afford ``1e-4`` because it *freezes* the language head — + only the from-scratch action expert sees that LR. SmolVLA2 unfreezes + ``lm_head`` + the last text layer and fine-tunes the **pretrained** + SmolVLM2 language weights, and ``1e-4`` is too aggressive for a + pretrained LM: it destabilises the language representations and + collapses generation into degenerate repetition. ``2.5e-5`` matches + pi05's peak LR (openpi ``CosineDecaySchedule``), the comparable + text-co-trained policy. The action expert trains slightly slower at + this LR, so budget more steps.""" + # Backbone training --------------------------------------------------- unfreeze_lm_head: bool = True """Whether to unfreeze the SmolVLM ``lm_head`` (and the immediately