Compare commits

..

2 Commits

Author SHA1 Message Date
github-actions[bot] 058378e82f chore(dependencies): update uv.lock 2026-06-10 10:43:18 +00:00
Steven Palma d947983a78 chore(dependecies): update mujoco transitives 2026-06-10 12:11:10 +02:00
2 changed files with 886 additions and 853 deletions
+2 -17
View File
@@ -40,7 +40,6 @@ T = TypeVar("T", bound="PreTrainedPolicy")
class ActionSelectKwargs(TypedDict, total=False):
noise: Tensor | None
return_intermediate_predictions: bool
class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
@@ -188,34 +187,20 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
raise NotImplementedError
@abc.abstractmethod
def predict_action_chunk(
self, batch: dict[str, Tensor], **kwargs: Unpack[ActionSelectKwargs]
) -> Tensor | tuple[Tensor, dict[str, Tensor]]:
def predict_action_chunk(self, batch: dict[str, Tensor], **kwargs: Unpack[ActionSelectKwargs]) -> Tensor:
"""Returns the action chunk (for action chunking policies) for a given observation, potentially in batch mode.
Child classes using action chunking should use this method within `select_action` to form the action chunk
cached for selection.
By default returns just the action `Tensor`. If `return_intermediate_predictions=True`,
returns `(action, predictions)` where `predictions` is a (possibly empty) `dict[str, Tensor]`
of additional model predictions a policy may expose (e.g. world-model predicted frames).
Policies that produce nothing extra may ignore the kwarg.
"""
raise NotImplementedError
@abc.abstractmethod
def select_action(
self, batch: dict[str, Tensor], **kwargs: Unpack[ActionSelectKwargs]
) -> Tensor | tuple[Tensor, dict[str, Tensor]]:
def select_action(self, batch: dict[str, Tensor], **kwargs: Unpack[ActionSelectKwargs]) -> Tensor:
"""Return one action to run in the environment (potentially in batch mode).
When the model uses a history of observations, or outputs a sequence of actions, this method deals
with caching.
By default returns just the action `Tensor`. If `return_intermediate_predictions=True`,
returns `(action, predictions)` where `predictions` is a (possibly empty) `dict[str, Tensor]`
of additional model predictions a policy may expose (e.g. world-model predicted frames).
Policies that produce nothing extra may ignore the kwarg.
"""
raise NotImplementedError
Generated
+884 -836
View File
File diff suppressed because it is too large Load Diff