Update reward config and model card template (#3625)

This commit is contained in:
Khalil Meftah
2026-05-18 13:12:15 +02:00
committed by GitHub
parent 01dcb4c292
commit 6e035fb169
2 changed files with 3 additions and 5 deletions
+3 -3
View File
@@ -90,9 +90,9 @@ class RewardModelConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
def reward_delta_indices(self) -> list | None: # type: ignore[type-arg]
return None
@abc.abstractmethod
def get_optimizer_preset(self) -> OptimizerConfig:
raise NotImplementedError
def get_optimizer_preset(self) -> OptimizerConfig | None:
"""Default optimizer for this reward model, or ``None`` for zero-shot models."""
return None
def get_scheduler_preset(self) -> LRSchedulerConfig | None:
return None
@@ -41,8 +41,6 @@ For more details, see the [Physical Intelligence π₀ blog post](https://www.ph
For more details, see the [Physical Intelligence π₀.₅ blog post](https://www.physicalintelligence.company/blog/pi05).
{% elif model_name == "gaussian_actor" %}
This is a Gaussian Actor policy (Gaussian policy with a tanh squash) — the policy-side component used by [Soft Actor-Critic (SAC)](https://huggingface.co/papers/1801.01290) and related maximum-entropy continuous-control algorithms.
{% elif model_name == "reward_classifier" %}
A reward classifier is a lightweight neural network that scores observations or trajectories for task success, providing a learned reward signal or offline evaluation when explicit rewards are unavailable.
{% else %}
_Model type not recognized — please update this template._
{% endif %}