fix(peft): fix LoRA resume from Hub (PosixPath + double wrap) (#3485)

This commit is contained in:
Jash Shah
2026-05-04 01:52:37 -07:00
committed by GitHub
parent d656da8ccc
commit fdbfc015a2
2 changed files with 12 additions and 5 deletions
+4 -2
View File
@@ -514,7 +514,7 @@ def make_policy(
logging.info("Loading policy's PEFT adapter.")
peft_pretrained_path = cfg.pretrained_path
peft_pretrained_path = str(cfg.pretrained_path)
peft_config = PeftConfig.from_pretrained(peft_pretrained_path)
kwargs["pretrained_name_or_path"] = peft_config.base_model_name_or_path
@@ -527,7 +527,9 @@ def make_policy(
)
policy = policy_cls.from_pretrained(**kwargs)
policy = PeftModel.from_pretrained(policy, peft_pretrained_path, config=peft_config)
policy = PeftModel.from_pretrained(
policy, peft_pretrained_path, config=peft_config, is_trainable=True
)
else:
# Make a fresh policy.
+5
View File
@@ -277,6 +277,11 @@ def train(cfg: TrainPipelineConfig, accelerator: "Accelerator | None" = None):
if cfg.peft is not None:
if cfg.is_reward_model_training:
raise ValueError("PEFT is only supported for policy training. ")
from peft import PeftModel
if isinstance(policy, PeftModel):
logging.info("PEFT adapter already loaded from checkpoint, skipping wrap_with_peft.")
else:
logging.info("Using PEFT! Wrapping model.")
peft_cli_overrides = dataclasses.asdict(cfg.peft)
policy = policy.wrap_with_peft(peft_cli_overrides=peft_cli_overrides)