diff --git a/src/lerobot/datasets/v30/convert_dataset_v21_to_v30.py b/src/lerobot/datasets/v30/convert_dataset_v21_to_v30.py index 2c9e08a6c..a58a62717 100644 --- a/src/lerobot/datasets/v30/convert_dataset_v21_to_v30.py +++ b/src/lerobot/datasets/v30/convert_dataset_v21_to_v30.py @@ -34,8 +34,9 @@ from lerobot.common.datasets.compute_stats import aggregate_stats from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset from lerobot.common.datasets.utils import ( DEFAULT_CHUNK_SIZE, + DEFAULT_DATA_FILE_SIZE_IN_MB, DEFAULT_DATA_PATH, - DEFAULT_FILE_SIZE_IN_MB, + DEFAULT_VIDEO_FILE_SIZE_IN_MB, DEFAULT_VIDEO_PATH, cast_stats_to_numpy, concat_video_files, @@ -174,7 +175,7 @@ def convert_data(root, new_root): episodes_metadata.append(ep_metadata) ep_idx += 1 - if size_in_mb < DEFAULT_FILE_SIZE_IN_MB: + if size_in_mb < DEFAULT_DATA_FILE_SIZE_IN_MB: paths_to_cat.append(ep_path) continue @@ -263,7 +264,7 @@ def convert_videos_of_camera(root: Path, new_root: Path, video_key): episodes_metadata.append(ep_metadata) ep_idx += 1 - if size_in_mb < DEFAULT_FILE_SIZE_IN_MB: + if size_in_mb < DEFAULT_VIDEO_FILE_SIZE_IN_MB: paths_to_cat.append(ep_path) continue @@ -337,8 +338,8 @@ def convert_info(root, new_root): info["codebase_version"] = "v3.0" del info["total_chunks"] del info["total_videos"] - info["files_size_in_mb"] = DEFAULT_FILE_SIZE_IN_MB - # TODO(rcadene): chunk- or chunk_ or file- or file_ + info["data_files_size_in_mb"] = DEFAULT_DATA_FILE_SIZE_IN_MB + info["video_files_size_in_mb"] = DEFAULT_VIDEO_FILE_SIZE_IN_MB info["data_path"] = DEFAULT_DATA_PATH info["video_path"] = DEFAULT_VIDEO_PATH info["fps"] = float(info["fps"]) diff --git a/tests/fixtures/dataset_factories.py b/tests/fixtures/dataset_factories.py index f030ea878..17fa692e7 100644 --- a/tests/fixtures/dataset_factories.py +++ b/tests/fixtures/dataset_factories.py @@ -28,12 +28,14 @@ from datasets import Dataset from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset, LeRobotDatasetMetadata from lerobot.datasets.utils import ( DEFAULT_CHUNK_SIZE, + DEFAULT_DATA_FILE_SIZE_IN_MB, DEFAULT_DATA_PATH, DEFAULT_FEATURES, - DEFAULT_FILE_SIZE_IN_MB, + DEFAULT_VIDEO_FILE_SIZE_IN_MB, DEFAULT_VIDEO_PATH, flatten_dict, get_hf_features_from_features, + hf_transform_to_torch, ) from tests.fixtures.constants import ( DEFAULT_FPS, @@ -121,7 +123,8 @@ def info_factory(features_factory): total_tasks: int = 0, total_videos: int = 0, chunks_size: int = DEFAULT_CHUNK_SIZE, - files_size_in_mb: float = DEFAULT_FILE_SIZE_IN_MB, + data_files_size_in_mb: float = DEFAULT_DATA_FILE_SIZE_IN_MB, + video_files_size_in_mb: float = DEFAULT_VIDEO_FILE_SIZE_IN_MB, data_path: str = DEFAULT_DATA_PATH, video_path: str = DEFAULT_VIDEO_PATH, motor_features: dict = DUMMY_MOTOR_FEATURES, @@ -137,7 +140,8 @@ def info_factory(features_factory): "total_tasks": total_tasks, "total_videos": total_videos, "chunks_size": chunks_size, - "files_size_in_mb": files_size_in_mb, + "data_files_size_in_mb": data_files_size_in_mb, + "video_files_size_in_mb": video_files_size_in_mb, "fps": fps, "splits": {}, "data_path": data_path, @@ -352,7 +356,7 @@ def hf_dataset_factory(features_factory, tasks_factory, episodes_factory, img_ar }, features=hf_features, ) - dataset.set_format("torch") + dataset.set_transform(hf_transform_to_torch) return dataset return _create_hf_dataset