Faster self.meta.episodes[...]

switch back to set_transform instead of set_format

Add video_files_size_in_mb

pre-commit run --all-files
This commit is contained in:
Remi Cadene
2025-04-22 14:46:33 +00:00
committed by Michel Aractingi
parent eaec52a7b7
commit 8c43b3d05e
2 changed files with 14 additions and 9 deletions
@@ -34,8 +34,9 @@ from lerobot.common.datasets.compute_stats import aggregate_stats
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
from lerobot.common.datasets.utils import ( from lerobot.common.datasets.utils import (
DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_SIZE,
DEFAULT_DATA_FILE_SIZE_IN_MB,
DEFAULT_DATA_PATH, DEFAULT_DATA_PATH,
DEFAULT_FILE_SIZE_IN_MB, DEFAULT_VIDEO_FILE_SIZE_IN_MB,
DEFAULT_VIDEO_PATH, DEFAULT_VIDEO_PATH,
cast_stats_to_numpy, cast_stats_to_numpy,
concat_video_files, concat_video_files,
@@ -174,7 +175,7 @@ def convert_data(root, new_root):
episodes_metadata.append(ep_metadata) episodes_metadata.append(ep_metadata)
ep_idx += 1 ep_idx += 1
if size_in_mb < DEFAULT_FILE_SIZE_IN_MB: if size_in_mb < DEFAULT_DATA_FILE_SIZE_IN_MB:
paths_to_cat.append(ep_path) paths_to_cat.append(ep_path)
continue continue
@@ -263,7 +264,7 @@ def convert_videos_of_camera(root: Path, new_root: Path, video_key):
episodes_metadata.append(ep_metadata) episodes_metadata.append(ep_metadata)
ep_idx += 1 ep_idx += 1
if size_in_mb < DEFAULT_FILE_SIZE_IN_MB: if size_in_mb < DEFAULT_VIDEO_FILE_SIZE_IN_MB:
paths_to_cat.append(ep_path) paths_to_cat.append(ep_path)
continue continue
@@ -337,8 +338,8 @@ def convert_info(root, new_root):
info["codebase_version"] = "v3.0" info["codebase_version"] = "v3.0"
del info["total_chunks"] del info["total_chunks"]
del info["total_videos"] del info["total_videos"]
info["files_size_in_mb"] = DEFAULT_FILE_SIZE_IN_MB info["data_files_size_in_mb"] = DEFAULT_DATA_FILE_SIZE_IN_MB
# TODO(rcadene): chunk- or chunk_ or file- or file_ info["video_files_size_in_mb"] = DEFAULT_VIDEO_FILE_SIZE_IN_MB
info["data_path"] = DEFAULT_DATA_PATH info["data_path"] = DEFAULT_DATA_PATH
info["video_path"] = DEFAULT_VIDEO_PATH info["video_path"] = DEFAULT_VIDEO_PATH
info["fps"] = float(info["fps"]) info["fps"] = float(info["fps"])
+8 -4
View File
@@ -28,12 +28,14 @@ from datasets import Dataset
from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset, LeRobotDatasetMetadata from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset, LeRobotDatasetMetadata
from lerobot.datasets.utils import ( from lerobot.datasets.utils import (
DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_SIZE,
DEFAULT_DATA_FILE_SIZE_IN_MB,
DEFAULT_DATA_PATH, DEFAULT_DATA_PATH,
DEFAULT_FEATURES, DEFAULT_FEATURES,
DEFAULT_FILE_SIZE_IN_MB, DEFAULT_VIDEO_FILE_SIZE_IN_MB,
DEFAULT_VIDEO_PATH, DEFAULT_VIDEO_PATH,
flatten_dict, flatten_dict,
get_hf_features_from_features, get_hf_features_from_features,
hf_transform_to_torch,
) )
from tests.fixtures.constants import ( from tests.fixtures.constants import (
DEFAULT_FPS, DEFAULT_FPS,
@@ -121,7 +123,8 @@ def info_factory(features_factory):
total_tasks: int = 0, total_tasks: int = 0,
total_videos: int = 0, total_videos: int = 0,
chunks_size: int = DEFAULT_CHUNK_SIZE, chunks_size: int = DEFAULT_CHUNK_SIZE,
files_size_in_mb: float = DEFAULT_FILE_SIZE_IN_MB, data_files_size_in_mb: float = DEFAULT_DATA_FILE_SIZE_IN_MB,
video_files_size_in_mb: float = DEFAULT_VIDEO_FILE_SIZE_IN_MB,
data_path: str = DEFAULT_DATA_PATH, data_path: str = DEFAULT_DATA_PATH,
video_path: str = DEFAULT_VIDEO_PATH, video_path: str = DEFAULT_VIDEO_PATH,
motor_features: dict = DUMMY_MOTOR_FEATURES, motor_features: dict = DUMMY_MOTOR_FEATURES,
@@ -137,7 +140,8 @@ def info_factory(features_factory):
"total_tasks": total_tasks, "total_tasks": total_tasks,
"total_videos": total_videos, "total_videos": total_videos,
"chunks_size": chunks_size, "chunks_size": chunks_size,
"files_size_in_mb": files_size_in_mb, "data_files_size_in_mb": data_files_size_in_mb,
"video_files_size_in_mb": video_files_size_in_mb,
"fps": fps, "fps": fps,
"splits": {}, "splits": {},
"data_path": data_path, "data_path": data_path,
@@ -352,7 +356,7 @@ def hf_dataset_factory(features_factory, tasks_factory, episodes_factory, img_ar
}, },
features=hf_features, features=hf_features,
) )
dataset.set_format("torch") dataset.set_transform(hf_transform_to_torch)
return dataset return dataset
return _create_hf_dataset return _create_hf_dataset