renaming to return_intermediate_predictions

feat(policy): adding return_extra to policy contracts
2026-06-18 08:47:05 +00:00 · 2026-06-10 13:50:59 +02:00 · 2026-06-10 11:23:30 +00:00
1 changed files with 17 additions and 2 deletions
@@ -40,6 +40,7 @@ T = TypeVar("T", bound="PreTrainedPolicy")
 class ActionSelectKwargs(TypedDict, total=False):
    noise: Tensor | None
    return_intermediate_predictions: bool
 class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
@@ -187,20 +188,34 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
        raise NotImplementedError
    @abc.abstractmethod
-    def predict_action_chunk(self, batch: dict[str, Tensor], **kwargs: Unpack[ActionSelectKwargs]) -> Tensor:
+    def predict_action_chunk(
        self, batch: dict[str, Tensor], **kwargs: Unpack[ActionSelectKwargs]
    ) -> Tensor | tuple[Tensor, dict[str, Tensor]]:
        """Returns the action chunk (for action chunking policies) for a given observation, potentially in batch mode.
        Child classes using action chunking should use this method within `select_action` to form the action chunk
        cached for selection.
        By default returns just the action `Tensor`. If `return_intermediate_predictions=True`,
        returns `(action, predictions)` where `predictions` is a (possibly empty) `dict[str, Tensor]`
        of additional model predictions a policy may expose (e.g. world-model predicted frames).
        Policies that produce nothing extra may ignore the kwarg.
        """
        raise NotImplementedError
    @abc.abstractmethod
-    def select_action(self, batch: dict[str, Tensor], **kwargs: Unpack[ActionSelectKwargs]) -> Tensor:
+    def select_action(
        self, batch: dict[str, Tensor], **kwargs: Unpack[ActionSelectKwargs]
    ) -> Tensor | tuple[Tensor, dict[str, Tensor]]:
        """Return one action to run in the environment (potentially in batch mode).
        When the model uses a history of observations, or outputs a sequence of actions, this method deals
        with caching.
        By default returns just the action `Tensor`. If `return_intermediate_predictions=True`,
        returns `(action, predictions)` where `predictions` is a (possibly empty) `dict[str, Tensor]`
        of additional model predictions a policy may expose (e.g. world-model predicted frames).
        Policies that produce nothing extra may ignore the kwarg.
        """
        raise NotImplementedError
Author	SHA1	Message	Date
Maximellerbach	811727d462	renaming to return_intermediate_predictions	2026-06-10 13:50:59 +02:00
Maxime Ellerbach	d1a8910f60	feat(policy): adding return_extra to policy contracts	2026-06-10 11:23:30 +00:00