diff --git a/src/lerobot/datasets/dataset_metadata.py b/src/lerobot/datasets/dataset_metadata.py index 65dbc9c4a..d79f4bfba 100644 --- a/src/lerobot/datasets/dataset_metadata.py +++ b/src/lerobot/datasets/dataset_metadata.py @@ -180,6 +180,16 @@ class LeRobotDatasetMetadata: self.episodes = load_episodes(self.root) self.stats = load_stats(self.root) + def ensure_readable(self) -> None: + """Guarantee metadata is fully loaded for read operations. + + Idempotent — when metadata is already in memory this is a single + ``is None`` check. Call this before transitioning from write to + read mode on the same instance. + """ + if self.episodes is None: + self._load_metadata() + def _pull_from_repo( self, allow_patterns: list[str] | str | None = None, diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index 1725046f2..2f0154cda 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -278,6 +278,7 @@ class LeRobotDataset(torch.utils.data.Dataset): def _ensure_reader(self) -> DatasetReader: """Lazily create the reader on first access.""" if self.reader is None: + self.meta.ensure_readable() self.reader = DatasetReader( meta=self.meta, root=self.root, diff --git a/tests/datasets/test_lerobot_dataset.py b/tests/datasets/test_lerobot_dataset.py index a8aa47ed2..5c3c24f99 100644 --- a/tests/datasets/test_lerobot_dataset.py +++ b/tests/datasets/test_lerobot_dataset.py @@ -535,6 +535,31 @@ def test_getitem_works_after_finalize(tmp_path): assert "task" in item +def test_getitem_after_finalize_with_delta_timestamps(tmp_path): + """After finalize(), dataset[0] works when delta_timestamps require episode metadata. + + Regression test for https://github.com/huggingface/lerobot/pull/3305. + The create -> write -> finalize -> read path left meta.episodes as None + because the write path flushes episodes to disk without updating them + in memory. Features that access meta.episodes (video decoding, + delta_timestamps) would crash with a TypeError. + """ + dataset = LeRobotDataset.create( + repo_id=DUMMY_REPO_ID, fps=DEFAULT_FPS, features=SIMPLE_FEATURES, root=tmp_path / "ds" + ) + for _ in range(5): + dataset.add_frame(_make_frame()) + dataset.save_episode() + dataset.finalize() + + # Set delta_timestamps so get_item() accesses meta.episodes via _get_query_indices + dataset.delta_timestamps = {"state": [0.0]} + + item = dataset[0] + assert "state" in item + assert "state_is_pad" in item + + # ── Property delegation ──────────────────────────────────────────────