Refactor: Move PEFT config from training script to policy level (#2806)

* move peft config from `lerobot_train` to policy level * Update src/lerobot/scripts/lerobot_train.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Michel Aractingi <michel.aractingi@huggingface.co> * copilot response * Change the polciy function to return targets rather than peft config.`_get_default_peft_targets()` override in PI0, PI0.5, SmolVLA * remove none check when building config dict --------- Signed-off-by: Michel Aractingi <michel.aractingi@huggingface.co>
2026-05-20 11:09:59 +00:00 · 2026-01-16 17:14:28 +01:00
parent 112b2d173a
commit 19dce78457
5 changed files with 211 additions and 87 deletions
@@ -148,92 +148,6 @@ def update_policy(
    return train_metrics, output_dict


-def get_default_peft_configuration(policy_type):
-    """Build a basic PEFT configuration for the given policy type assuming that we train a policy from a checkpoint."""
-
-    common_projections = "state_proj|action_in_proj|action_out_proj|action_time_mlp_in|action_time_mlp_out"
-
-    if policy_type == "smolvla":
-        return {
-            "target_modules": rf"(model\.vlm_with_expert\.lm_expert\..*\.(q|v)_proj|model\.({common_projections}))",
-            "modules_to_save": [],
-        }
-    elif policy_type in ("pi0", "pi05"):
-        return {
-            "target_modules": rf"(.*\.gemma_expert\..*\.self_attn.(q|v)_proj|model\.({common_projections}))",
-            "modules_to_save": [],
-        }
-
-    return {"modules_to_save": None}
-
-
-def wrap_policy_in_peft_model(cfg, policy):
-    from peft import PEFT_TYPE_TO_CONFIG_MAPPING, PeftType, get_peft_model
-
-    # Disable all gradients because we'll only train the parameters selected by the PEFT method.
-    # Layers that should receive gradients anyway need to be listed in `modules_to_save`.
-    for p in policy.parameters():
-        p.requires_grad_(False)
-
-    if not cfg.policy.pretrained_path:
-        raise ValueError(
-            "Training from scratch using PEFT. This is unlikely to yield good results. "
-            "Supply a `policy.path` to fine-tune an existing model."
-        )
-
-    if cfg.policy.type == "smolvla" and not cfg.policy.load_vlm_weights:
-        logging.warning(
-            "Training SmolVLA from scratch using PEFT. This is unlikely to yield good results. Set "
-            "`load_vlm_weights=True` to fine-tune the existing policy."
-        )
-
-    peft_config_policy = get_default_peft_configuration(cfg.policy.type)
-    peft_config_cli = dataclasses.asdict(cfg.peft) if cfg.peft else {}
-    peft_config_cli["modules_to_save"] = peft_config_cli["full_training_modules"]  # compatibility with PEFT
-    peft_method_type = PeftType[peft_config_cli["method_type"].upper()]
-    peft_config_cls = PEFT_TYPE_TO_CONFIG_MAPPING[peft_method_type]
-
-    # Handle specific CLI overrides
-    for key in ["target_modules", "modules_to_save", "r"]:
-        if peft_config_cli[key] is not None:
-            peft_config_policy[key] = peft_config_cli[key]
-
-    if "target_modules" not in peft_config_policy:
-        raise ValueError(
-            f"There is no default `target_modules` value for policy {cfg.policy.type}. Please pass it manually."
-        )
-
-    # Init method depends on the used PEFT method, your specific PEFT method
-    # might not be considered here, in that case an error is raised.
-    if peft_config_cli["init_type"] is not None:
-        if peft_method_type == "LORA":
-            peft_config_policy["init_lora_weights"] = peft_config_cli["init_type"]
-        elif peft_method_type == "MISS":
-            peft_config_policy["init_weights"] = peft_config_cli["init_type"]
-        else:
-            raise ValueError(
-                f"Init type {peft_config_cli['init_type']} unknown for PEFT method {peft_method_type}."
-            )
-
-    # PEFT uses this attribute to set adapter_config.base_name_or_path which we use for loading the
-    # correct base model in `make_policy` since in a PEFT loading setting we only get the path to the
-    # adapter, not the base model.
-    if policy.config.pretrained_path:
-        policy.name_or_path = str(policy.config.pretrained_path)
-
-    # Finally wrap the policy in a PEFT model
-    policy = get_peft_model(
-        policy,
-        peft_config_cls(**peft_config_policy),
-    )
-
-    # Make sure that the config is tagged as using PEFT so that the loading code can take the
-    # appropriate steps to use the adapter weights and the PEFT config instead of the full model weights.
-    policy.config.use_peft = True
-
-    return policy
-
-
@parser.wrap()
 def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
    """
@@ -326,7 +240,9 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):

    if cfg.peft is not None:
        logging.info("Using PEFT! Wrapping model.")
-        policy = wrap_policy_in_peft_model(cfg, policy)
+        # Convert CLI peft config to dict for overrides
+        peft_cli_overrides = dataclasses.asdict(cfg.peft)
+        policy = policy.wrap_with_peft(peft_cli_overrides=peft_cli_overrides)

    # Wait for all processes to finish policy creation before continuing
    accelerator.wait_for_everyone()