From 3b185f7f9d8faf16bbdf3a833e670feac08f881e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=9B=9B=E4=B8=83?= <41624527+SevenFo@users.noreply.github.com> Date: Sat, 28 Mar 2026 18:37:57 +0800 Subject: [PATCH] fix(datasets): remove unreachable timestamp branch in add_frame (#3163) * fix(datasets): remove unreachable timestamp branch in add_frame and document caller contract - Remove dead code: frame.pop("timestamp") branch in add_frame() could never execute because validate_frame() raises ValueError for any DEFAULT_FEATURES key (including timestamp) before we reach that line. - Expand add_frame() docstring: explicitly document that timestamp and frame_index must NOT be passed by the caller. - Add explanatory comment in validate_frame(): clarifies why DEFAULT_FEATURES are excluded from expected_features, preventing future re-introduction of the dead branch. The dead branch originated in #1200, which fixed a shape-(1,) mismatch for a code path that was subsequently made unreachable by a refactor of validate_frame. * chore(datasets): narrow PR scope * fix(datasets): move add_frame timestamp cleanup to dataset_writer --- src/lerobot/datasets/dataset_writer.py | 13 +++++++++++-- src/lerobot/datasets/feature_utils.py | 4 ++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/lerobot/datasets/dataset_writer.py b/src/lerobot/datasets/dataset_writer.py index b74b18e0c..787ecd337 100644 --- a/src/lerobot/datasets/dataset_writer.py +++ b/src/lerobot/datasets/dataset_writer.py @@ -155,7 +155,16 @@ class DatasetWriter: self.image_writer.save_image(image=image, fpath=fpath, compress_level=compress_level) def add_frame(self, frame: dict) -> None: - """Add a frame to the episode_buffer. Images are written to a temporary directory.""" + """ + Add a single frame to the current episode buffer. + + Apart from images written to a temporary directory, nothing is written to disk + until ``save_episode()`` is called. + + The caller must provide all user-defined features plus ``"task"``, and must + not provide ``"timestamp"`` or ``"frame_index"``; those are computed + automatically. + """ # Convert torch to numpy if needed for name in frame: if isinstance(frame[name], torch.Tensor): @@ -168,7 +177,7 @@ class DatasetWriter: # Automatically add frame_index and timestamp to episode buffer frame_index = self.episode_buffer["size"] - timestamp = frame.pop("timestamp") if "timestamp" in frame else frame_index / self._meta.fps + timestamp = frame_index / self._meta.fps self.episode_buffer["frame_index"].append(frame_index) self.episode_buffer["timestamp"].append(timestamp) self.episode_buffer["task"].append(frame.pop("task")) diff --git a/src/lerobot/datasets/feature_utils.py b/src/lerobot/datasets/feature_utils.py index d9a3c6301..46154d92a 100644 --- a/src/lerobot/datasets/feature_utils.py +++ b/src/lerobot/datasets/feature_utils.py @@ -365,6 +365,10 @@ def get_delta_indices(delta_timestamps: dict[str, list[float]], fps: int) -> dic def validate_frame(frame: dict, features: dict) -> None: + # DEFAULT_FEATURES (timestamp, frame_index, episode_index, index, task_index) are + # auto-populated by the recording pipeline (add_frame / save_episode) and must not + # be supplied by the caller. Excluding them here means any frame dict that contains + # these keys will be rejected as extra features. expected_features = set(features) - set(DEFAULT_FEATURES) actual_features = set(frame)