mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-21 19:49:49 +00:00
fix(datasets): remove unreachable timestamp branch in add_frame (#3163)
* fix(datasets): remove unreachable timestamp branch in add_frame and document caller contract
- Remove dead code: frame.pop("timestamp") branch in add_frame() could never
execute because validate_frame() raises ValueError for any DEFAULT_FEATURES
key (including timestamp) before we reach that line.
- Expand add_frame() docstring: explicitly document that timestamp and
frame_index must NOT be passed by the caller.
- Add explanatory comment in validate_frame(): clarifies why DEFAULT_FEATURES
are excluded from expected_features, preventing future re-introduction of
the dead branch.
The dead branch originated in #1200, which fixed a shape-(1,) mismatch for a
code path that was subsequently made unreachable by a refactor of validate_frame.
* chore(datasets): narrow PR scope
* fix(datasets): move add_frame timestamp cleanup to dataset_writer
This commit is contained in:
@@ -155,7 +155,16 @@ class DatasetWriter:
|
|||||||
self.image_writer.save_image(image=image, fpath=fpath, compress_level=compress_level)
|
self.image_writer.save_image(image=image, fpath=fpath, compress_level=compress_level)
|
||||||
|
|
||||||
def add_frame(self, frame: dict) -> None:
|
def add_frame(self, frame: dict) -> None:
|
||||||
"""Add a frame to the episode_buffer. Images are written to a temporary directory."""
|
"""
|
||||||
|
Add a single frame to the current episode buffer.
|
||||||
|
|
||||||
|
Apart from images written to a temporary directory, nothing is written to disk
|
||||||
|
until ``save_episode()`` is called.
|
||||||
|
|
||||||
|
The caller must provide all user-defined features plus ``"task"``, and must
|
||||||
|
not provide ``"timestamp"`` or ``"frame_index"``; those are computed
|
||||||
|
automatically.
|
||||||
|
"""
|
||||||
# Convert torch to numpy if needed
|
# Convert torch to numpy if needed
|
||||||
for name in frame:
|
for name in frame:
|
||||||
if isinstance(frame[name], torch.Tensor):
|
if isinstance(frame[name], torch.Tensor):
|
||||||
@@ -168,7 +177,7 @@ class DatasetWriter:
|
|||||||
|
|
||||||
# Automatically add frame_index and timestamp to episode buffer
|
# Automatically add frame_index and timestamp to episode buffer
|
||||||
frame_index = self.episode_buffer["size"]
|
frame_index = self.episode_buffer["size"]
|
||||||
timestamp = frame.pop("timestamp") if "timestamp" in frame else frame_index / self._meta.fps
|
timestamp = frame_index / self._meta.fps
|
||||||
self.episode_buffer["frame_index"].append(frame_index)
|
self.episode_buffer["frame_index"].append(frame_index)
|
||||||
self.episode_buffer["timestamp"].append(timestamp)
|
self.episode_buffer["timestamp"].append(timestamp)
|
||||||
self.episode_buffer["task"].append(frame.pop("task"))
|
self.episode_buffer["task"].append(frame.pop("task"))
|
||||||
|
|||||||
@@ -365,6 +365,10 @@ def get_delta_indices(delta_timestamps: dict[str, list[float]], fps: int) -> dic
|
|||||||
|
|
||||||
|
|
||||||
def validate_frame(frame: dict, features: dict) -> None:
|
def validate_frame(frame: dict, features: dict) -> None:
|
||||||
|
# DEFAULT_FEATURES (timestamp, frame_index, episode_index, index, task_index) are
|
||||||
|
# auto-populated by the recording pipeline (add_frame / save_episode) and must not
|
||||||
|
# be supplied by the caller. Excluding them here means any frame dict that contains
|
||||||
|
# these keys will be rejected as extra features.
|
||||||
expected_features = set(features) - set(DEFAULT_FEATURES)
|
expected_features = set(features) - set(DEFAULT_FEATURES)
|
||||||
actual_features = set(frame)
|
actual_features = set(frame)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user