renaming to return_intermediate_predictions

2026-08-03 15:09:51 +00:00 · 2026-06-10 13:50:59 +02:00
parent d1a8910f60
commit 811727d462
1 changed files with 9 additions and 7 deletions
@@ -40,7 +40,7 @@ T = TypeVar("T", bound="PreTrainedPolicy")

 class ActionSelectKwargs(TypedDict, total=False):
    noise: Tensor | None
-    return_extra: bool
+    return_intermediate_predictions: bool


 class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
@@ -196,9 +196,10 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
        Child classes using action chunking should use this method within `select_action` to form the action chunk
        cached for selection.

-        By default returns just the action `Tensor`. If `return_extra=True`, returns `(action, extra)`
-        where `extra` is a (possibly empty) `dict[str, Tensor]` of auxiliary outputs a policy may
-        expose (e.g. world-model predictions). Policies that produce nothing extra may ignore the kwarg.
+        By default returns just the action `Tensor`. If `return_intermediate_predictions=True`,
+        returns `(action, predictions)` where `predictions` is a (possibly empty) `dict[str, Tensor]`
+        of additional model predictions a policy may expose (e.g. world-model predicted frames).
+        Policies that produce nothing extra may ignore the kwarg.
        """
        raise NotImplementedError

@@ -211,9 +212,10 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
        When the model uses a history of observations, or outputs a sequence of actions, this method deals
        with caching.

-        By default returns just the action `Tensor`. If `return_extra=True`, returns `(action, extra)`
-        where `extra` is a (possibly empty) `dict[str, Tensor]` of auxiliary outputs a policy may
-        expose (e.g. world-model predictions). Policies that produce nothing extra may ignore the kwarg.
+        By default returns just the action `Tensor`. If `return_intermediate_predictions=True`,
+        returns `(action, predictions)` where `predictions` is a (possibly empty) `dict[str, Tensor]`
+        of additional model predictions a policy may expose (e.g. world-model predicted frames).
+        Policies that produce nothing extra may ignore the kwarg.
        """
        raise NotImplementedError