Update reward config and model card template (#3625)

2026-07-23 17:56:07 +00:00 · 2026-05-18 13:12:15 +02:00
parent 01dcb4c292
commit 6e035fb169
2 changed files with 3 additions and 5 deletions
@@ -90,9 +90,9 @@ class RewardModelConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
    def reward_delta_indices(self) -> list | None:  # type: ignore[type-arg]
        return None
-    @abc.abstractmethod
+    def get_optimizer_preset(self) -> OptimizerConfig | None:
-    def get_optimizer_preset(self) -> OptimizerConfig:
+        """Default optimizer for this reward model, or ``None`` for zero-shot models."""
-        raise NotImplementedError
+        return None
    def get_scheduler_preset(self) -> LRSchedulerConfig | None:
        return None
@@ -41,8 +41,6 @@ For more details, see the [Physical Intelligence π₀ blog post](https://www.ph
 For more details, see the [Physical Intelligence π₀.₅ blog post](https://www.physicalintelligence.company/blog/pi05).
 {% elif model_name == "gaussian_actor" %}
 This is a Gaussian Actor policy (Gaussian policy with a tanh squash) — the policy-side component used by [Soft Actor-Critic (SAC)](https://huggingface.co/papers/1801.01290) and related maximum-entropy continuous-control algorithms.
 {% elif model_name == "reward_classifier" %}
 A reward classifier is a lightweight neural network that scores observations or trajectories for task success, providing a learned reward signal or offline evaluation when explicit rewards are unavailable.
 {% else %}
 _Model type not recognized — please update this template._
 {% endif %}