mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-16 09:09:48 +00:00
add update_chunk_settings method for LeRobotDatasetMetadata. Introduce tests for chunk settings updates and validation of parameters.
This commit is contained in:
@@ -35,6 +35,9 @@ from lerobot.datasets.lerobot_dataset import (
|
||||
MultiLeRobotDataset,
|
||||
)
|
||||
from lerobot.datasets.utils import (
|
||||
DEFAULT_CHUNK_SIZE,
|
||||
DEFAULT_DATA_FILE_SIZE_IN_MB,
|
||||
DEFAULT_VIDEO_FILE_SIZE_IN_MB,
|
||||
create_branch,
|
||||
get_hf_features_from_features,
|
||||
hf_transform_to_torch,
|
||||
@@ -654,3 +657,135 @@ def test_check_cached_episodes_sufficient(tmp_path, lerobot_dataset_factory):
|
||||
# Test requesting a mix of available and unavailable episodes
|
||||
sparse_dataset.episodes = [0, 1, 2]
|
||||
assert sparse_dataset._check_cached_episodes_sufficient() is False
|
||||
|
||||
|
||||
def test_update_chunk_settings(tmp_path, empty_lerobot_dataset_factory):
|
||||
"""Test the update_chunk_settings functionality for both LeRobotDataset and LeRobotDatasetMetadata."""
|
||||
features = {
|
||||
"observation.state": {
|
||||
"dtype": "float32",
|
||||
"shape": (6,),
|
||||
"names": ["shoulder_pan", "shoulder_lift", "elbow", "wrist_1", "wrist_2", "wrist_3"],
|
||||
},
|
||||
"action": {
|
||||
"dtype": "float32",
|
||||
"shape": (6,),
|
||||
"names": ["shoulder_pan", "shoulder_lift", "elbow", "wrist_1", "wrist_2", "wrist_3"],
|
||||
},
|
||||
}
|
||||
|
||||
# Create dataset with default chunk settings
|
||||
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
|
||||
|
||||
# Test initial default values
|
||||
initial_settings = dataset.meta.get_chunk_settings()
|
||||
assert initial_settings["chunks_size"] == DEFAULT_CHUNK_SIZE
|
||||
assert initial_settings["data_files_size_in_mb"] == DEFAULT_DATA_FILE_SIZE_IN_MB
|
||||
assert initial_settings["video_files_size_in_mb"] == DEFAULT_VIDEO_FILE_SIZE_IN_MB
|
||||
|
||||
# Test updating all settings at once
|
||||
new_chunks_size = 2000
|
||||
new_data_size = 200
|
||||
new_video_size = 1000
|
||||
|
||||
dataset.meta.update_chunk_settings(
|
||||
chunks_size=new_chunks_size,
|
||||
data_files_size_in_mb=new_data_size,
|
||||
video_files_size_in_mb=new_video_size,
|
||||
)
|
||||
|
||||
# Verify settings were updated
|
||||
updated_settings = dataset.meta.get_chunk_settings()
|
||||
assert updated_settings["chunks_size"] == new_chunks_size
|
||||
assert updated_settings["data_files_size_in_mb"] == new_data_size
|
||||
assert updated_settings["video_files_size_in_mb"] == new_video_size
|
||||
|
||||
# Test updating individual settings
|
||||
dataset.meta.update_chunk_settings(chunks_size=1500)
|
||||
settings_after_partial = dataset.meta.get_chunk_settings()
|
||||
assert settings_after_partial["chunks_size"] == 1500
|
||||
assert settings_after_partial["data_files_size_in_mb"] == new_data_size
|
||||
assert settings_after_partial["video_files_size_in_mb"] == new_video_size
|
||||
|
||||
# Test updating only data file size
|
||||
dataset.meta.update_chunk_settings(data_files_size_in_mb=150)
|
||||
settings_after_data = dataset.meta.get_chunk_settings()
|
||||
assert settings_after_data["chunks_size"] == 1500
|
||||
assert settings_after_data["data_files_size_in_mb"] == 150
|
||||
assert settings_after_data["video_files_size_in_mb"] == new_video_size
|
||||
|
||||
# Test updating only video file size
|
||||
dataset.meta.update_chunk_settings(video_files_size_in_mb=800)
|
||||
settings_after_video = dataset.meta.get_chunk_settings()
|
||||
assert settings_after_video["chunks_size"] == 1500
|
||||
assert settings_after_video["data_files_size_in_mb"] == 150
|
||||
assert settings_after_video["video_files_size_in_mb"] == 800
|
||||
|
||||
# Test that settings persist in the info file
|
||||
info_path = dataset.root / "meta" / "info.json"
|
||||
assert info_path.exists()
|
||||
|
||||
# Verify the underlying metadata properties
|
||||
assert dataset.meta.chunks_size == 1500
|
||||
assert dataset.meta.data_files_size_in_mb == 150
|
||||
assert dataset.meta.video_files_size_in_mb == 800
|
||||
|
||||
# Test error handling for invalid values
|
||||
with pytest.raises(ValueError, match="chunks_size must be positive"):
|
||||
dataset.meta.update_chunk_settings(chunks_size=0)
|
||||
|
||||
with pytest.raises(ValueError, match="chunks_size must be positive"):
|
||||
dataset.meta.update_chunk_settings(chunks_size=-100)
|
||||
|
||||
with pytest.raises(ValueError, match="data_files_size_in_mb must be positive"):
|
||||
dataset.meta.update_chunk_settings(data_files_size_in_mb=0)
|
||||
|
||||
with pytest.raises(ValueError, match="data_files_size_in_mb must be positive"):
|
||||
dataset.meta.update_chunk_settings(data_files_size_in_mb=-50)
|
||||
|
||||
with pytest.raises(ValueError, match="video_files_size_in_mb must be positive"):
|
||||
dataset.meta.update_chunk_settings(video_files_size_in_mb=0)
|
||||
|
||||
with pytest.raises(ValueError, match="video_files_size_in_mb must be positive"):
|
||||
dataset.meta.update_chunk_settings(video_files_size_in_mb=-200)
|
||||
|
||||
# Test calling with None values (should not change anything)
|
||||
settings_before_none = dataset.meta.get_chunk_settings()
|
||||
dataset.meta.update_chunk_settings(
|
||||
chunks_size=None, data_files_size_in_mb=None, video_files_size_in_mb=None
|
||||
)
|
||||
settings_after_none = dataset.meta.get_chunk_settings()
|
||||
assert settings_before_none == settings_after_none
|
||||
|
||||
# Test metadata direct access
|
||||
meta_settings = dataset.meta.get_chunk_settings()
|
||||
assert meta_settings == dataset.meta.get_chunk_settings()
|
||||
|
||||
# Test updating via metadata directly
|
||||
dataset.meta.update_chunk_settings(chunks_size=3000)
|
||||
assert dataset.meta.get_chunk_settings()["chunks_size"] == 3000
|
||||
|
||||
|
||||
def test_update_chunk_settings_video_dataset(tmp_path):
|
||||
"""Test update_chunk_settings with a video dataset to ensure video-specific logic works."""
|
||||
features = {
|
||||
"observation.images.cam": {
|
||||
"dtype": "video",
|
||||
"shape": (480, 640, 3),
|
||||
"names": ["height", "width", "channels"],
|
||||
},
|
||||
"action": {"dtype": "float32", "shape": (6,), "names": ["j1", "j2", "j3", "j4", "j5", "j6"]},
|
||||
}
|
||||
|
||||
# Create video dataset
|
||||
dataset = LeRobotDataset.create(
|
||||
repo_id=DUMMY_REPO_ID, fps=30, features=features, root=tmp_path / "video_test", use_videos=True
|
||||
)
|
||||
|
||||
# Test that video-specific settings work
|
||||
original_video_size = dataset.meta.get_chunk_settings()["video_files_size_in_mb"]
|
||||
new_video_size = original_video_size * 2
|
||||
|
||||
dataset.meta.update_chunk_settings(video_files_size_in_mb=new_video_size)
|
||||
assert dataset.meta.get_chunk_settings()["video_files_size_in_mb"] == new_video_size
|
||||
assert dataset.meta.video_files_size_in_mb == new_video_size
|
||||
|
||||
Reference in New Issue
Block a user