refactor(dataset): split LeRobotDataset into DatasetReader & DatasetWriter (+ API cleanup) (#3180)

* refactor(dataset): split reader and writer

* chore(dataset): remove proxys

* refactor(dataset): better reader & writer encapsulation

* refactor(datasets): clean API + reduce leaky implementations

* refactor(dataset): API cleaning for writer, reader and meta

* refactor(dataset): expose writer & reader + other minor improvements

* refactor(dataset): improve teardown routine

* refactor(dataset): add hf_dataset property at the facade level

* chore(dataset): add init for datasset module

* docs(dataset): add docstrings for public API of the dataset classes

* tests(dataset): add tests for new classes

* fix(dataset): remove circular dependecy
This commit is contained in:
Steven Palma
2026-03-26 19:09:25 +01:00
committed by GitHub
parent 017ff73fbf
commit 123495250b
28 changed files with 2742 additions and 1158 deletions
+3 -3
View File
@@ -563,7 +563,7 @@ class ReplayBuffer:
)
# Start writing images if needed
lerobot_dataset.start_image_writer(num_processes=0, num_threads=3)
lerobot_dataset.writer.start_image_writer(num_processes=0, num_threads=3)
# Convert transitions into episodes and frames
@@ -603,10 +603,10 @@ class ReplayBuffer:
lerobot_dataset.save_episode()
# Save any remaining frames in the buffer
if lerobot_dataset.episode_buffer["size"] > 0:
if lerobot_dataset.has_pending_frames():
lerobot_dataset.save_episode()
lerobot_dataset.stop_image_writer()
lerobot_dataset.writer.stop_image_writer()
lerobot_dataset.finalize()
return lerobot_dataset
+1 -2
View File
@@ -752,8 +752,7 @@ def replay_trajectory(
episodes=[cfg.dataset.replay_episode],
download_videos=False,
)
episode_frames = dataset.hf_dataset.filter(lambda x: x["episode_index"] == cfg.dataset.replay_episode)
actions = episode_frames.select_columns(ACTION)
actions = dataset.select_columns(ACTION)
_, info = env.reset()