renaming to return_intermediate_predictions

feat(policy): adding return_extra to policy contracts
2026-06-12 05:59:53 +00:00 · 2026-06-10 13:50:59 +02:00 · 2026-06-10 11:23:30 +00:00
5 changed files with 24 additions and 69 deletions
@@ -30,7 +30,6 @@ class EpisodeAwareSampler:
        drop_n_first_frames: int = 0,
        drop_n_last_frames: int = 0,
        shuffle: bool = False,
-        generator: torch.Generator | None = None,
    ):
        """Sampler that optionally incorporates episode boundary information.

@@ -42,10 +41,6 @@ class EpisodeAwareSampler:
            drop_n_first_frames: Number of frames to drop from the start of each episode.
            drop_n_last_frames: Number of frames to drop from the end of each episode.
            shuffle: Whether to shuffle the indices.
-            generator: Generator used for shuffling. Exposing this attribute (even when None) lets
-                       `accelerate` register it as the synchronized RNG in distributed training, so
-                       every rank draws the same permutation and batch shards stay disjoint. When
-                       None, shuffling falls back to the global torch RNG.
        """
        if drop_n_first_frames < 0:
            raise ValueError(f"drop_n_first_frames must be >= 0, got {drop_n_first_frames}")
@@ -78,11 +73,10 @@ class EpisodeAwareSampler:

        self.indices = indices
        self.shuffle = shuffle
-        self.generator = generator

    def __iter__(self) -> Iterator[int]:
        if self.shuffle:
-            for i in torch.randperm(len(self.indices), generator=self.generator):
+            for i in torch.randperm(len(self.indices)):
                yield self.indices[i]
        else:
            for i in self.indices:
@@ -481,10 +481,8 @@ def reencode_video(
    encoder_threads: int | None = None,
    log_level: int | None = av.logging.WARNING,
    overwrite: bool = False,
-    start_time_s: float | None = None,
-    end_time_s: float | None = None,
 ) -> None:
-    """Re-encode a video file, optionally trimming it to ``[start_time_s, end_time_s)``.
+    """Re-encode a video file using the given encoder configuration.

    Args:
        input_video_path: Existing video file to read.
@@ -493,17 +491,10 @@ def reencode_video(
        encoder_threads: Optional thread count forwarded to :meth:`VideoEncoderConfig.get_codec_options`.
        log_level: libav log level while encoding, or ``None`` to leave logging unchanged. Defaults to WARNING.
        overwrite: When ``False`` and ``output_video_path`` already exists, skip and log a warning.
-        start_time_s: When set, trim the output to start at this timestamp (seconds).
-        end_time_s: When set, trim the output to end at this timestamp (seconds, exclusive).
    """

    camera_encoder = camera_encoder or camera_encoder_defaults()

-    if (start_time_s is not None and start_time_s < 0) or (end_time_s is not None and end_time_s < 0):
-        raise ValueError(f"Trim times must be non-negative, got start={start_time_s}, end={end_time_s}.")
-    if start_time_s is not None and end_time_s is not None and end_time_s <= start_time_s:
-        raise ValueError(f"end_time_s ({end_time_s}) must be greater than start_time_s ({start_time_s}).")
-
    output_video_path = Path(output_video_path)

    if output_video_path.exists() and not overwrite:
@@ -535,10 +526,6 @@ def reencode_video(
            width = int(in_stream.width)
            height = int(in_stream.height)

-            # Seek to the keyframe at or before start_time_s to avoid reading from the start.
-            if start_time_s is not None:
-                src.seek(int(start_time_s * av.time_base), backward=True)
-
            with av.open(
                tmp_output_video_path,
                mode="w",
@@ -552,14 +539,7 @@ def reencode_video(
                out_stream.height = height

                for frame in src.decode(in_stream):
-                    frame_time_s = frame.time
-                    if start_time_s is not None and frame_time_s < start_time_s:
-                        continue
-                    if end_time_s is not None and frame_time_s >= end_time_s:
-                        break
                    frame = frame.reformat(width=width, height=height, format=pix_fmt)
-                    if start_time_s is not None:
-                        frame.pts = None  # reset timestamps so the trimmed output starts at t=0
                    packet = out_stream.encode(frame)
                    if packet:
                        dst.mux(packet)
@@ -40,6 +40,7 @@ T = TypeVar("T", bound="PreTrainedPolicy")

 class ActionSelectKwargs(TypedDict, total=False):
    noise: Tensor | None
+    return_intermediate_predictions: bool


 class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
@@ -187,20 +188,34 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
        raise NotImplementedError

    @abc.abstractmethod
-    def predict_action_chunk(self, batch: dict[str, Tensor], **kwargs: Unpack[ActionSelectKwargs]) -> Tensor:
+    def predict_action_chunk(
+        self, batch: dict[str, Tensor], **kwargs: Unpack[ActionSelectKwargs]
+    ) -> Tensor | tuple[Tensor, dict[str, Tensor]]:
        """Returns the action chunk (for action chunking policies) for a given observation, potentially in batch mode.

        Child classes using action chunking should use this method within `select_action` to form the action chunk
        cached for selection.
+
+        By default returns just the action `Tensor`. If `return_intermediate_predictions=True`,
+        returns `(action, predictions)` where `predictions` is a (possibly empty) `dict[str, Tensor]`
+        of additional model predictions a policy may expose (e.g. world-model predicted frames).
+        Policies that produce nothing extra may ignore the kwarg.
        """
        raise NotImplementedError

    @abc.abstractmethod
-    def select_action(self, batch: dict[str, Tensor], **kwargs: Unpack[ActionSelectKwargs]) -> Tensor:
+    def select_action(
+        self, batch: dict[str, Tensor], **kwargs: Unpack[ActionSelectKwargs]
+    ) -> Tensor | tuple[Tensor, dict[str, Tensor]]:
        """Return one action to run in the environment (potentially in batch mode).

        When the model uses a history of observations, or outputs a sequence of actions, this method deals
        with caching.
+
+        By default returns just the action `Tensor`. If `return_intermediate_predictions=True`,
+        returns `(action, predictions)` where `predictions` is a (possibly empty) `dict[str, Tensor]`
+        of additional model predictions a policy may expose (e.g. world-model predicted frames).
+        Policies that produce nothing extra may ignore the kwarg.
        """
        raise NotImplementedError

@@ -232,18 +232,15 @@ def train(cfg: TrainPipelineConfig, accelerator: "Accelerator | None" = None):
        torch.backends.cudnn.benchmark = True
    torch.backends.cuda.matmul.allow_tf32 = True

-    # Dataset loading synchronization: each node's local main process downloads first to avoid
-    # race conditions (the global main process only exists on node 0, so gating on it would let
-    # all ranks of the other nodes download and build the Arrow cache concurrently).
-    if accelerator.is_local_main_process:
-        if is_main_process:
-            logging.info("Creating dataset")
+    # Dataset loading synchronization: main process downloads first to avoid race conditions
+    if is_main_process:
+        logging.info("Creating dataset")
        dataset = make_dataset(cfg)

    accelerator.wait_for_everyone()

-    # Now all other processes can safely load the dataset from the local cache
-    if not accelerator.is_local_main_process:
+    # Now all other processes can safely load the dataset
+    if not is_main_process:
        dataset = make_dataset(cfg)

    # Create environment used for evaluating checkpoints during training on simulation data.
@@ -389,19 +386,12 @@ def train(cfg: TrainPipelineConfig, accelerator: "Accelerator | None" = None):
    # create dataloader for offline training
    if hasattr(active_cfg, "drop_n_last_frames"):
        shuffle = False
-        # A dedicated generator (rather than the global torch RNG) lets accelerator.prepare
-        # synchronize the shuffle permutation across ranks, keeping batch shards disjoint even
-        # when ranks consume the global RNG asymmetrically (e.g. eval on the main process only).
-        sampler_generator = torch.Generator()
-        if cfg.seed is not None:
-            sampler_generator.manual_seed(cfg.seed)
        sampler = EpisodeAwareSampler(
            dataset.meta.episodes["dataset_from_index"],
            dataset.meta.episodes["dataset_to_index"],
            episode_indices_to_use=dataset.episodes,
            drop_n_last_frames=active_cfg.drop_n_last_frames,
            shuffle=True,
-            generator=sampler_generator,
        )
    else:
        shuffle = True
@@ -114,30 +114,6 @@ def test_shuffle():
    assert set(sampler) == {0, 1, 2, 3, 4, 5}


-def test_shuffle_with_generator_is_deterministic():
-    # Two samplers shuffling with same-seed generators must yield identical permutations.
-    # This is what keeps batch shards disjoint across ranks in distributed training, where
-    # accelerate synchronizes the sampler's generator state instead of the global torch RNG.
-    sampler_a = EpisodeAwareSampler([0], [6], shuffle=True, generator=torch.Generator().manual_seed(42))
-    sampler_b = EpisodeAwareSampler([0], [6], shuffle=True, generator=torch.Generator().manual_seed(42))
-    assert list(sampler_a) == list(sampler_b)
-
-    # Desyncing the global RNG must not affect the permutation.
-    sampler_c = EpisodeAwareSampler([0], [6], shuffle=True, generator=torch.Generator().manual_seed(42))
-    order_before = list(sampler_c)
-    sampler_c.generator.manual_seed(42)
-    torch.randperm(1000)  # consume global RNG, as rank-asymmetric code (e.g. eval) would
-    assert list(sampler_c) == order_before
-
-
-def test_generator_attribute_defaults_to_none():
-    # accelerate detects synchronizable samplers via `hasattr(sampler, "generator")`,
-    # so the attribute must exist even when no generator is passed.
-    sampler = EpisodeAwareSampler([0], [6], shuffle=True)
-    assert sampler.generator is None
-    assert set(sampler) == {0, 1, 2, 3, 4, 5}
-
-
 def test_negative_drop_first_frames_raises():
    with pytest.raises(ValueError, match="drop_n_first_frames must be >= 0"):
        EpisodeAwareSampler([0], [10], drop_n_first_frames=-1)
Author	SHA1	Message	Date
Maximellerbach	811727d462	renaming to return_intermediate_predictions	2026-06-10 13:50:59 +02:00
Maxime Ellerbach	d1a8910f60	feat(policy): adding return_extra to policy contracts	2026-06-10 11:23:30 +00:00