From ecf342d4814a180c3a5eb9faccb85a52f3746f20 Mon Sep 17 00:00:00 2001 From: Maxime Ellerbach Date: Tue, 16 Jun 2026 11:27:51 +0000 Subject: [PATCH] small fix for the preprocessor and padded images --- src/lerobot/policies/fastwam/processor_fastwam.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/lerobot/policies/fastwam/processor_fastwam.py b/src/lerobot/policies/fastwam/processor_fastwam.py index 080fdb9a4..9c31543f9 100644 --- a/src/lerobot/policies/fastwam/processor_fastwam.py +++ b/src/lerobot/policies/fastwam/processor_fastwam.py @@ -55,19 +55,23 @@ class FastWAMImageCropResizeProcessorStep(ImageCropResizeProcessorStep): """ def observation(self, observation: dict) -> dict: + # Delta-timestamp video loading adds `_is_pad` boolean masks ([B, T]) that share + # the `observation.images.` prefix but are padding flags, not frames. The base crop/resize + # matches on the `"image"` substring, so set these aside and restore them untouched rather + # than letting it try to resize a mask. + pad_keys = {key: value for key, value in observation.items() if "_is_pad" in key} leads: dict[str, tuple] = {} - flat_input = dict(observation) - for key, img in observation.items(): + flat_input = {key: value for key, value in observation.items() if key not in pad_keys} + for key, img in list(flat_input.items()): if "image" in key and torch.is_tensor(img) and img.ndim > 4: leads[key] = tuple(img.shape[:-3]) flat_input[key] = img.reshape(-1, *img.shape[-3:]) processed = super().observation(flat_input) - if not leads: - return processed out = dict(processed) for key, lead in leads.items(): im = processed[key] out[key] = im.reshape(*lead, *im.shape[-3:]) + out.update(pad_keys) return out