From 2bf6359d241a9389451216e49d59ef2256cd7dc9 Mon Sep 17 00:00:00 2001
From: Jade Choghari <chogharijade@gmail.com>
Date: Tue, 27 Jan 2026 11:14:22 +0000
Subject: [PATCH] more changes

---
 src/lerobot/datasets/lerobot_dataset.py              | 12 +++++++++++-
 src/lerobot/policies/pi05_full/configuration_pi05.py |  4 +---
 src/lerobot/policies/pi05_full/modeling_pi05.py      |  5 +++--
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py
index 5c8df37e3..f59db7a43 100644
--- a/src/lerobot/datasets/lerobot_dataset.py
+++ b/src/lerobot/datasets/lerobot_dataset.py
@@ -1070,7 +1070,17 @@ class LeRobotDataset(torch.utils.data.Dataset):
         if len(self.meta.video_keys) > 0:
             current_ts = item["timestamp"].item()
             query_timestamps = self._get_query_timestamps(current_ts, query_indices)
-            video_frames = self._query_videos(query_timestamps, ep_idx)
+            try:
+                video_frames = self._query_videos(query_timestamps, ep_idx)
+            except Exception as e:
+                print("\n" + "=" * 120)
+                print("[VIDEO DECODE FAILURE]")
+                print(f"item={item}")
+                print(f"query_indices={query_indices}")
+                print(f"query_timestamps={query_timestamps}")
+                print(f"ep_idx={ep_idx}")
+                print("=" * 120 + "\n")
+                raise
             item = {**video_frames, **item}
 
         if self.image_transforms is not None:
diff --git a/src/lerobot/policies/pi05_full/configuration_pi05.py b/src/lerobot/policies/pi05_full/configuration_pi05.py
index 3b3356f1e..fa91e3edb 100644
--- a/src/lerobot/policies/pi05_full/configuration_pi05.py
+++ b/src/lerobot/policies/pi05_full/configuration_pi05.py
@@ -61,8 +61,6 @@ class PI05FullConfig(PreTrainedConfig):
     # Add empty images. Used to add empty cameras when no image features are present.
     empty_cameras: int = 0
 
-    tokenizer_max_length: int = 200  # see openpi `__post_init__`
-
     normalization_mapping: dict[str, NormalizationMode] = field(
         default_factory=lambda: {
             "VISUAL": NormalizationMode.IDENTITY,
@@ -104,7 +102,7 @@ class PI05FullConfig(PreTrainedConfig):
     scheduler_decay_steps: int = 30_000
     scheduler_decay_lr: float = 2.5e-6
 
-    tokenizer_max_length: int = 200  # see openpi `__post_init__`
+    tokenizer_max_length: int = 48  # see openpi `__post_init__`
 
     def __post_init__(self):
         super().__post_init__()
diff --git a/src/lerobot/policies/pi05_full/modeling_pi05.py b/src/lerobot/policies/pi05_full/modeling_pi05.py
index 8a00215f0..641c11da9 100644
--- a/src/lerobot/policies/pi05_full/modeling_pi05.py
+++ b/src/lerobot/policies/pi05_full/modeling_pi05.py
@@ -375,8 +375,9 @@ def compute_layer_complete(
         out_emb = layer.self_attn.o_proj(att_output[:, start_pos:end_pos])
         # first residual
         out_emb = modeling_gemma._gated_residual(hidden_states, out_emb, gates[i])  # noqa: SLF001
-        after_first_residual = out_emb.clone()
-        out_emb, gate = layer.post_attention_layernorm(out_emb, cond=adarms_cond[i])
+        # Store reference instead of clone - we need original for second residual
+        after_first_residual = out_emb
+        out_emb, gate = layer.post_attention_layernorm(out_emb.clone(), cond=adarms_cond[i])
         # convert to bfloat16 if the next layer (mlp) uses bfloat16
         if layer.mlp.up_proj.weight.dtype == torch.bfloat16:
             out_emb = out_emb.to(dtype=torch.bfloat16)