diff --git a/src/lerobot/policies/pi052/configuration_pi052.py b/src/lerobot/policies/pi052/configuration_pi052.py index 5b4c25924..84a570c67 100644 --- a/src/lerobot/policies/pi052/configuration_pi052.py +++ b/src/lerobot/policies/pi052/configuration_pi052.py @@ -163,6 +163,23 @@ class PI052Config(PI05Config): """If True, route every transformer layer through the KI attention path that blocks action→VLM gradient flow on K/V.""" + # Learning-rate defaults -------------------------------------------- + # pi052 inherits π0.5's openpi-validated optimizer config (peak LR + # 2.5e-5, cosine→2.5e-6, 1k warmup, AdamW (0.9, 0.95), wd=0.01, + # grad_clip=1.0). The only place pi052 needs to diverge from pi05 + # is the LM-head LR multiplier: pi05 has no text supervision so the + # head doesn't get gradients; pi052 always has text supervision + # (subtask / memory / VQA) via the recipe, and under KI the LM head + # only sees gradients on ~30–45% of the batch (the text-CE mask + # share of the recipe). Under aggressive cosine decay this is too + # weak to keep the head pinned, so it drifts back toward PaliGemma's + # pretrained ```` first-token bias. 5x is the documented fix + # (see ``PI05Config.lm_head_lr_scale`` docstring); the wiring is + # already in ``PI05Policy.get_optim_params`` — it splits the LM head + # + tied ``embed_tokens`` into their own param group while sharing + # the same cosine lambda, so the 5x ratio is preserved across decay. + lm_head_lr_scale: float = 5.0 + def __post_init__(self) -> None: super().__post_init__() # Backbone needs gradients flowing through the text head when