fix(dataset): adding metadata loading when reading from a dataset after writing (#3305)

* fix(one shot load): adding metadata loading when reading from a dataset after writing

* refactor(one shot load): move metadata reload to ensure_readable() on LeRobotDatasetMetadata

Move the metadata reload from DatasetReader.load_and_activate() to a new
public ensure_readable() method on LeRobotDatasetMetadata, called from
LeRobotDataset._ensure_reader(). This places lifecycle management in the
right layer: metadata owns its readiness check, the dataset orchestrates
the write-to-read transition, and the reader stays clean.

Also adds a regression test using delta_timestamps to exercise the
meta.episodes access path in the create -> write -> finalize -> read flow.

Co-authored-by: Steven Palma <imstevenpmwork@users.noreply.github.com>

---------

Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com>
Co-authored-by: Steven Palma <imstevenpmwork@users.noreply.github.com>
This commit is contained in:
Caroline Pascal
2026-04-10 11:29:40 +02:00
committed by GitHub
parent 6799da35eb
commit d762f4bfe8
3 changed files with 36 additions and 0 deletions
+10
View File
@@ -180,6 +180,16 @@ class LeRobotDatasetMetadata:
self.episodes = load_episodes(self.root) self.episodes = load_episodes(self.root)
self.stats = load_stats(self.root) self.stats = load_stats(self.root)
def ensure_readable(self) -> None:
"""Guarantee metadata is fully loaded for read operations.
Idempotent — when metadata is already in memory this is a single
``is None`` check. Call this before transitioning from write to
read mode on the same instance.
"""
if self.episodes is None:
self._load_metadata()
def _pull_from_repo( def _pull_from_repo(
self, self,
allow_patterns: list[str] | str | None = None, allow_patterns: list[str] | str | None = None,
+1
View File
@@ -278,6 +278,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
def _ensure_reader(self) -> DatasetReader: def _ensure_reader(self) -> DatasetReader:
"""Lazily create the reader on first access.""" """Lazily create the reader on first access."""
if self.reader is None: if self.reader is None:
self.meta.ensure_readable()
self.reader = DatasetReader( self.reader = DatasetReader(
meta=self.meta, meta=self.meta,
root=self.root, root=self.root,
+25
View File
@@ -535,6 +535,31 @@ def test_getitem_works_after_finalize(tmp_path):
assert "task" in item assert "task" in item
def test_getitem_after_finalize_with_delta_timestamps(tmp_path):
"""After finalize(), dataset[0] works when delta_timestamps require episode metadata.
Regression test for https://github.com/huggingface/lerobot/pull/3305.
The create -> write -> finalize -> read path left meta.episodes as None
because the write path flushes episodes to disk without updating them
in memory. Features that access meta.episodes (video decoding,
delta_timestamps) would crash with a TypeError.
"""
dataset = LeRobotDataset.create(
repo_id=DUMMY_REPO_ID, fps=DEFAULT_FPS, features=SIMPLE_FEATURES, root=tmp_path / "ds"
)
for _ in range(5):
dataset.add_frame(_make_frame())
dataset.save_episode()
dataset.finalize()
# Set delta_timestamps so get_item() accesses meta.episodes via _get_query_indices
dataset.delta_timestamps = {"state": [0.0]}
item = dataset[0]
assert "state" in item
assert "state_is_pad" in item
# ── Property delegation ────────────────────────────────────────────── # ── Property delegation ──────────────────────────────────────────────