mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-18 10:10:08 +00:00
fix(dataset): adding metadata loading when reading from a dataset after writing (#3305)
* fix(one shot load): adding metadata loading when reading from a dataset after writing * refactor(one shot load): move metadata reload to ensure_readable() on LeRobotDatasetMetadata Move the metadata reload from DatasetReader.load_and_activate() to a new public ensure_readable() method on LeRobotDatasetMetadata, called from LeRobotDataset._ensure_reader(). This places lifecycle management in the right layer: metadata owns its readiness check, the dataset orchestrates the write-to-read transition, and the reader stays clean. Also adds a regression test using delta_timestamps to exercise the meta.episodes access path in the create -> write -> finalize -> read flow. Co-authored-by: Steven Palma <imstevenpmwork@users.noreply.github.com> --------- Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com> Co-authored-by: Steven Palma <imstevenpmwork@users.noreply.github.com>
This commit is contained in:
@@ -535,6 +535,31 @@ def test_getitem_works_after_finalize(tmp_path):
|
||||
assert "task" in item
|
||||
|
||||
|
||||
def test_getitem_after_finalize_with_delta_timestamps(tmp_path):
|
||||
"""After finalize(), dataset[0] works when delta_timestamps require episode metadata.
|
||||
|
||||
Regression test for https://github.com/huggingface/lerobot/pull/3305.
|
||||
The create -> write -> finalize -> read path left meta.episodes as None
|
||||
because the write path flushes episodes to disk without updating them
|
||||
in memory. Features that access meta.episodes (video decoding,
|
||||
delta_timestamps) would crash with a TypeError.
|
||||
"""
|
||||
dataset = LeRobotDataset.create(
|
||||
repo_id=DUMMY_REPO_ID, fps=DEFAULT_FPS, features=SIMPLE_FEATURES, root=tmp_path / "ds"
|
||||
)
|
||||
for _ in range(5):
|
||||
dataset.add_frame(_make_frame())
|
||||
dataset.save_episode()
|
||||
dataset.finalize()
|
||||
|
||||
# Set delta_timestamps so get_item() accesses meta.episodes via _get_query_indices
|
||||
dataset.delta_timestamps = {"state": [0.0]}
|
||||
|
||||
item = dataset[0]
|
||||
assert "state" in item
|
||||
assert "state_is_pad" in item
|
||||
|
||||
|
||||
# ── Property delegation ──────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user