diff --git a/src/lerobot/configs/rewards.py b/src/lerobot/configs/rewards.py index a53d5a417..7e99e7f71 100644 --- a/src/lerobot/configs/rewards.py +++ b/src/lerobot/configs/rewards.py @@ -90,9 +90,9 @@ class RewardModelConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC): def reward_delta_indices(self) -> list | None: # type: ignore[type-arg] return None - @abc.abstractmethod - def get_optimizer_preset(self) -> OptimizerConfig: - raise NotImplementedError + def get_optimizer_preset(self) -> OptimizerConfig | None: + """Default optimizer for this reward model, or ``None`` for zero-shot models.""" + return None def get_scheduler_preset(self) -> LRSchedulerConfig | None: return None diff --git a/src/lerobot/templates/lerobot_modelcard_template.md b/src/lerobot/templates/lerobot_modelcard_template.md index f0dd0da07..b93e83b6e 100644 --- a/src/lerobot/templates/lerobot_modelcard_template.md +++ b/src/lerobot/templates/lerobot_modelcard_template.md @@ -41,8 +41,6 @@ For more details, see the [Physical Intelligence π₀ blog post](https://www.ph For more details, see the [Physical Intelligence π₀.₅ blog post](https://www.physicalintelligence.company/blog/pi05). {% elif model_name == "gaussian_actor" %} This is a Gaussian Actor policy (Gaussian policy with a tanh squash) — the policy-side component used by [Soft Actor-Critic (SAC)](https://huggingface.co/papers/1801.01290) and related maximum-entropy continuous-control algorithms. -{% elif model_name == "reward_classifier" %} -A reward classifier is a lightweight neural network that scores observations or trajectories for task success, providing a learned reward signal or offline evaluation when explicit rewards are unavailable. {% else %} _Model type not recognized — please update this template._ {% endif %}