mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-21 03:30:10 +00:00
Update reward config and model card template (#3625)
This commit is contained in:
@@ -90,9 +90,9 @@ class RewardModelConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
|
|||||||
def reward_delta_indices(self) -> list | None: # type: ignore[type-arg]
|
def reward_delta_indices(self) -> list | None: # type: ignore[type-arg]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@abc.abstractmethod
|
def get_optimizer_preset(self) -> OptimizerConfig | None:
|
||||||
def get_optimizer_preset(self) -> OptimizerConfig:
|
"""Default optimizer for this reward model, or ``None`` for zero-shot models."""
|
||||||
raise NotImplementedError
|
return None
|
||||||
|
|
||||||
def get_scheduler_preset(self) -> LRSchedulerConfig | None:
|
def get_scheduler_preset(self) -> LRSchedulerConfig | None:
|
||||||
return None
|
return None
|
||||||
|
|||||||
@@ -41,8 +41,6 @@ For more details, see the [Physical Intelligence π₀ blog post](https://www.ph
|
|||||||
For more details, see the [Physical Intelligence π₀.₅ blog post](https://www.physicalintelligence.company/blog/pi05).
|
For more details, see the [Physical Intelligence π₀.₅ blog post](https://www.physicalintelligence.company/blog/pi05).
|
||||||
{% elif model_name == "gaussian_actor" %}
|
{% elif model_name == "gaussian_actor" %}
|
||||||
This is a Gaussian Actor policy (Gaussian policy with a tanh squash) — the policy-side component used by [Soft Actor-Critic (SAC)](https://huggingface.co/papers/1801.01290) and related maximum-entropy continuous-control algorithms.
|
This is a Gaussian Actor policy (Gaussian policy with a tanh squash) — the policy-side component used by [Soft Actor-Critic (SAC)](https://huggingface.co/papers/1801.01290) and related maximum-entropy continuous-control algorithms.
|
||||||
{% elif model_name == "reward_classifier" %}
|
|
||||||
A reward classifier is a lightweight neural network that scores observations or trajectories for task success, providing a learned reward signal or offline evaluation when explicit rewards are unavailable.
|
|
||||||
{% else %}
|
{% else %}
|
||||||
_Model type not recognized — please update this template._
|
_Model type not recognized — please update this template._
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|||||||
Reference in New Issue
Block a user