diff --git a/src/lerobot/configs/policies.py b/src/lerobot/configs/policies.py index 91701af6d..b0f003519 100644 --- a/src/lerobot/configs/policies.py +++ b/src/lerobot/configs/policies.py @@ -79,6 +79,8 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC): # type: igno # Either the repo ID of a model hosted on the Hub or a path to a directory containing weights # saved using `Policy.save_pretrained`. If not provided, the policy is initialized from scratch. pretrained_path: Path | None = None + # Optional Hub revision (commit hash, branch, or tag) to pin the pretrained model version. + pretrained_revision: str | None = None def __post_init__(self) -> None: if not self.device or not is_torch_device_available(self.device): diff --git a/src/lerobot/configs/rewards.py b/src/lerobot/configs/rewards.py index 7e99e7f71..92490bc9f 100644 --- a/src/lerobot/configs/rewards.py +++ b/src/lerobot/configs/rewards.py @@ -56,6 +56,8 @@ class RewardModelConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC): device: str | None = None pretrained_path: str | None = None + # Optional Hub revision (commit hash, branch, or tag) to pin the pretrained reward model version. + pretrained_revision: str | None = None push_to_hub: bool = False repo_id: str | None = None diff --git a/src/lerobot/policies/factory.py b/src/lerobot/policies/factory.py index a42b38ba4..b82eaeb72 100644 --- a/src/lerobot/policies/factory.py +++ b/src/lerobot/policies/factory.py @@ -252,6 +252,7 @@ class ProcessorConfigKwargs(TypedDict, total=False): def make_pre_post_processors( policy_cfg: PreTrainedConfig, pretrained_path: str | None = None, + pretrained_revision: str | None = None, **kwargs: Unpack[ProcessorConfigKwargs], ) -> tuple[ PolicyProcessorPipeline[dict[str, Any], dict[str, Any]], @@ -309,6 +310,7 @@ def make_pre_post_processors( overrides=kwargs.get("preprocessor_overrides", {}), to_transition=batch_to_transition, to_output=transition_to_batch, + revision=pretrained_revision, ) postprocessor = PolicyProcessorPipeline.from_pretrained( pretrained_model_name_or_path=pretrained_path, @@ -318,6 +320,7 @@ def make_pre_post_processors( overrides=kwargs.get("postprocessor_overrides", {}), to_transition=policy_action_to_transition, to_output=transition_to_policy_action, + revision=pretrained_revision, ) _reconnect_relative_absolute_steps(preprocessor, postprocessor) return preprocessor, postprocessor @@ -557,6 +560,7 @@ def make_policy( # Load a pretrained policy and override the config if needed (for example, if there are inference-time # hyperparameters that we want to vary). kwargs["pretrained_name_or_path"] = cfg.pretrained_path + kwargs["revision"] = cfg.pretrained_revision policy = policy_cls.from_pretrained(**kwargs) elif cfg.pretrained_path and cfg.use_peft: # Load a pretrained PEFT model on top of the policy. The pretrained path points to the folder/repo diff --git a/src/lerobot/rewards/factory.py b/src/lerobot/rewards/factory.py index 2d73ae575..fee90c211 100644 --- a/src/lerobot/rewards/factory.py +++ b/src/lerobot/rewards/factory.py @@ -124,6 +124,7 @@ def make_reward_model(cfg: RewardModelConfig, **kwargs) -> PreTrainedRewardModel if cfg.pretrained_path: kwargs["pretrained_name_or_path"] = cfg.pretrained_path + kwargs["revision"] = cfg.pretrained_revision reward_model = reward_cls.from_pretrained(**kwargs) else: reward_model = reward_cls(**kwargs) diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py index 9e4a9a5b5..bf91d83a1 100644 --- a/src/lerobot/scripts/lerobot_train.py +++ b/src/lerobot/scripts/lerobot_train.py @@ -346,6 +346,7 @@ def train(cfg: TrainPipelineConfig, accelerator: "Accelerator | None" = None): preprocessor, postprocessor = make_pre_post_processors( policy_cfg=cfg.policy, pretrained_path=processor_pretrained_path, + pretrained_revision=getattr(cfg.policy, "pretrained_revision", None), **processor_kwargs, )