Add variable-size test datasets (#1610)

* fix: dummy datasets can be written to multiple files in multiple folders based on arbitrary data size

* fix: writing atomic episodes to multiple files (maybe)

* fix: moving unused write dataset function to test code
This commit is contained in:
Francesco Capuano
2025-07-30 11:26:28 +02:00
committed by GitHub
parent 890b1e473d
commit 527ae8e557
4 changed files with 114 additions and 14 deletions
+6
View File
@@ -488,6 +488,8 @@ def lerobot_dataset_factory(
tasks: pd.DataFrame | None = None,
episodes_metadata: datasets.Dataset | None = None,
hf_dataset: datasets.Dataset | None = None,
data_files_size_in_mb: float = DEFAULT_DATA_FILE_SIZE_IN_MB,
chunks_size: int = DEFAULT_CHUNK_SIZE,
**kwargs,
) -> LeRobotDataset:
# Instantiate objects
@@ -497,6 +499,8 @@ def lerobot_dataset_factory(
total_frames=total_frames,
total_tasks=total_tasks,
use_videos=use_videos,
data_files_size_in_mb=data_files_size_in_mb,
chunks_size=chunks_size,
)
if stats is None:
stats = stats_factory(features=info["features"])
@@ -525,6 +529,8 @@ def lerobot_dataset_factory(
tasks=tasks,
episodes=episodes_metadata,
hf_dataset=hf_dataset,
data_files_size_in_mb=data_files_size_in_mb,
chunks_size=chunks_size,
)
mock_metadata = lerobot_dataset_metadata_factory(
root=root,