diff --git a/examples/dataset/load_lerobot_dataset.py b/examples/dataset/load_lerobot_dataset.py index 4fda25884..ea3516710 100644 --- a/examples/dataset/load_lerobot_dataset.py +++ b/examples/dataset/load_lerobot_dataset.py @@ -32,7 +32,8 @@ import torch from huggingface_hub import HfApi import lerobot -from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata +from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata +from lerobot.datasets.lerobot_dataset import LeRobotDataset def main(): diff --git a/examples/lekiwi/evaluate.py b/examples/lekiwi/evaluate.py index a3144a442..ef98640aa 100644 --- a/examples/lekiwi/evaluate.py +++ b/examples/lekiwi/evaluate.py @@ -14,8 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from lerobot.datasets.feature_utils import hw_to_dataset_features from lerobot.datasets.lerobot_dataset import LeRobotDataset -from lerobot.datasets.utils import hw_to_dataset_features from lerobot.policies.act.modeling_act import ACTPolicy from lerobot.policies.factory import make_pre_post_processors from lerobot.processor import make_default_processors diff --git a/examples/lekiwi/record.py b/examples/lekiwi/record.py index 9292157f7..ace2e35b8 100644 --- a/examples/lekiwi/record.py +++ b/examples/lekiwi/record.py @@ -14,8 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from lerobot.datasets.feature_utils import hw_to_dataset_features from lerobot.datasets.lerobot_dataset import LeRobotDataset -from lerobot.datasets.utils import hw_to_dataset_features from lerobot.processor import make_default_processors from lerobot.robots.lekiwi.config_lekiwi import LeKiwiClientConfig from lerobot.robots.lekiwi.lekiwi_client import LeKiwiClient diff --git a/examples/phone_to_so100/evaluate.py b/examples/phone_to_so100/evaluate.py index c1291d101..9cd7a98c2 100644 --- a/examples/phone_to_so100/evaluate.py +++ b/examples/phone_to_so100/evaluate.py @@ -16,9 +16,9 @@ from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.configs.types import FeatureType, PolicyFeature +from lerobot.datasets.feature_utils import combine_feature_dicts from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features -from lerobot.datasets.utils import combine_feature_dicts from lerobot.model.kinematics import RobotKinematics from lerobot.policies.act.modeling_act import ACTPolicy from lerobot.policies.factory import make_pre_post_processors diff --git a/examples/phone_to_so100/record.py b/examples/phone_to_so100/record.py index 756c6f42d..f2a17cd33 100644 --- a/examples/phone_to_so100/record.py +++ b/examples/phone_to_so100/record.py @@ -15,9 +15,9 @@ # limitations under the License. from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig +from lerobot.datasets.feature_utils import combine_feature_dicts from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features -from lerobot.datasets.utils import combine_feature_dicts from lerobot.model.kinematics import RobotKinematics from lerobot.processor import RobotProcessorPipeline from lerobot.processor.converters import ( diff --git a/examples/port_datasets/port_droid.py b/examples/port_datasets/port_droid.py index a1fb50914..f58bacbe0 100644 --- a/examples/port_datasets/port_droid.py +++ b/examples/port_datasets/port_droid.py @@ -22,7 +22,8 @@ from pathlib import Path import numpy as np import tensorflow_datasets as tfds -from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata +from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata +from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.utils.utils import get_elapsed_time_in_days_hours_minutes_seconds DROID_SHARDS = 2048 diff --git a/examples/port_datasets/slurm_upload.py b/examples/port_datasets/slurm_upload.py index 55002c0be..7fb01c11b 100644 --- a/examples/port_datasets/slurm_upload.py +++ b/examples/port_datasets/slurm_upload.py @@ -26,7 +26,7 @@ from huggingface_hub import HfApi from huggingface_hub.constants import REPOCARD_NAME from port_droid import DROID_SHARDS -from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDatasetMetadata +from lerobot.datasets.dataset_metadata import CODEBASE_VERSION, LeRobotDatasetMetadata from lerobot.datasets.utils import create_lerobot_dataset_card from lerobot.utils.utils import init_logging @@ -155,7 +155,7 @@ class UploadDataset(PipelineStep): from datasets.utils.tqdm import disable_progress_bars from huggingface_hub import CommitOperationAdd, preupload_lfs_files - from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata + from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata from lerobot.utils.utils import init_logging init_logging() diff --git a/examples/rtc/eval_dataset.py b/examples/rtc/eval_dataset.py index 613fd67d7..a94d4da48 100644 --- a/examples/rtc/eval_dataset.py +++ b/examples/rtc/eval_dataset.py @@ -113,8 +113,9 @@ from lerobot.configs import parser from lerobot.configs.default import DatasetConfig from lerobot.configs.policies import PreTrainedConfig from lerobot.configs.types import RTCAttentionSchedule +from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata from lerobot.datasets.factory import resolve_delta_timestamps -from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata +from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.policies.factory import get_policy_class, make_pre_post_processors from lerobot.policies.rtc.configuration_rtc import RTCConfig from lerobot.policies.rtc.debug_visualizer import RTCDebugVisualizer diff --git a/examples/rtc/eval_with_real_robot.py b/examples/rtc/eval_with_real_robot.py index 9d9e1364a..36da88e1b 100644 --- a/examples/rtc/eval_with_real_robot.py +++ b/examples/rtc/eval_with_real_robot.py @@ -82,7 +82,7 @@ from lerobot.cameras.zmq.configuration_zmq import ZMQCameraConfig # noqa: F401 from lerobot.configs import parser from lerobot.configs.policies import PreTrainedConfig from lerobot.configs.types import RTCAttentionSchedule -from lerobot.datasets.utils import build_dataset_frame, hw_to_dataset_features +from lerobot.datasets.feature_utils import build_dataset_frame, hw_to_dataset_features from lerobot.policies.factory import get_policy_class, make_pre_post_processors from lerobot.policies.rtc.action_queue import ActionQueue from lerobot.policies.rtc.configuration_rtc import RTCConfig diff --git a/examples/so100_to_so100_EE/evaluate.py b/examples/so100_to_so100_EE/evaluate.py index 45a87ebad..638591021 100644 --- a/examples/so100_to_so100_EE/evaluate.py +++ b/examples/so100_to_so100_EE/evaluate.py @@ -16,9 +16,9 @@ from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.configs.types import FeatureType, PolicyFeature +from lerobot.datasets.feature_utils import combine_feature_dicts from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features -from lerobot.datasets.utils import combine_feature_dicts from lerobot.model.kinematics import RobotKinematics from lerobot.policies.act.modeling_act import ACTPolicy from lerobot.policies.factory import make_pre_post_processors diff --git a/examples/so100_to_so100_EE/record.py b/examples/so100_to_so100_EE/record.py index 8fa862d6e..634bd891a 100644 --- a/examples/so100_to_so100_EE/record.py +++ b/examples/so100_to_so100_EE/record.py @@ -16,9 +16,9 @@ from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig +from lerobot.datasets.feature_utils import combine_feature_dicts from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features -from lerobot.datasets.utils import combine_feature_dicts from lerobot.model.kinematics import RobotKinematics from lerobot.processor import RobotProcessorPipeline from lerobot.processor.converters import ( diff --git a/examples/training/train_policy.py b/examples/training/train_policy.py index 16f2a4d87..07ec10c92 100644 --- a/examples/training/train_policy.py +++ b/examples/training/train_policy.py @@ -19,8 +19,9 @@ from pathlib import Path import torch from lerobot.configs.types import FeatureType -from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata -from lerobot.datasets.utils import dataset_to_policy_features +from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata +from lerobot.datasets.feature_utils import dataset_to_policy_features +from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy from lerobot.policies.factory import make_pre_post_processors diff --git a/examples/training/train_with_streaming.py b/examples/training/train_with_streaming.py index 185be5b13..973698e74 100644 --- a/examples/training/train_with_streaming.py +++ b/examples/training/train_with_streaming.py @@ -20,9 +20,9 @@ from pathlib import Path import torch from lerobot.configs.types import FeatureType -from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata +from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata +from lerobot.datasets.feature_utils import dataset_to_policy_features from lerobot.datasets.streaming_dataset import StreamingLeRobotDataset -from lerobot.datasets.utils import dataset_to_policy_features from lerobot.policies.act.configuration_act import ACTConfig from lerobot.policies.act.modeling_act import ACTPolicy from lerobot.policies.factory import make_pre_post_processors diff --git a/examples/tutorial/act/act_training_example.py b/examples/tutorial/act/act_training_example.py index fe70f3023..b62c49cac 100644 --- a/examples/tutorial/act/act_training_example.py +++ b/examples/tutorial/act/act_training_example.py @@ -5,8 +5,9 @@ from pathlib import Path import torch from lerobot.configs.types import FeatureType -from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata -from lerobot.datasets.utils import dataset_to_policy_features +from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata +from lerobot.datasets.feature_utils import dataset_to_policy_features +from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.policies.act.configuration_act import ACTConfig from lerobot.policies.act.modeling_act import ACTPolicy from lerobot.policies.factory import make_pre_post_processors diff --git a/examples/tutorial/act/act_using_example.py b/examples/tutorial/act/act_using_example.py index 60bc802d8..15254d8eb 100644 --- a/examples/tutorial/act/act_using_example.py +++ b/examples/tutorial/act/act_using_example.py @@ -1,7 +1,7 @@ import torch from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig -from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata +from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata from lerobot.policies.act.modeling_act import ACTPolicy from lerobot.policies.factory import make_pre_post_processors from lerobot.policies.utils import build_inference_frame, make_robot_action diff --git a/examples/tutorial/diffusion/diffusion_training_example.py b/examples/tutorial/diffusion/diffusion_training_example.py index 6db081450..dc6ca68a3 100644 --- a/examples/tutorial/diffusion/diffusion_training_example.py +++ b/examples/tutorial/diffusion/diffusion_training_example.py @@ -5,8 +5,9 @@ from pathlib import Path import torch from lerobot.configs.types import FeatureType -from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata -from lerobot.datasets.utils import dataset_to_policy_features +from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata +from lerobot.datasets.feature_utils import dataset_to_policy_features +from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy from lerobot.policies.factory import make_pre_post_processors diff --git a/examples/tutorial/diffusion/diffusion_using_example.py b/examples/tutorial/diffusion/diffusion_using_example.py index d8ac75cfe..9b31cf359 100644 --- a/examples/tutorial/diffusion/diffusion_using_example.py +++ b/examples/tutorial/diffusion/diffusion_using_example.py @@ -1,7 +1,7 @@ import torch from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig -from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata +from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy from lerobot.policies.factory import make_pre_post_processors from lerobot.policies.utils import build_inference_frame, make_robot_action diff --git a/examples/tutorial/pi0/using_pi0_example.py b/examples/tutorial/pi0/using_pi0_example.py index 056c3d81a..d8cf9dbff 100644 --- a/examples/tutorial/pi0/using_pi0_example.py +++ b/examples/tutorial/pi0/using_pi0_example.py @@ -1,7 +1,7 @@ import torch from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig -from lerobot.datasets.utils import hw_to_dataset_features +from lerobot.datasets.feature_utils import hw_to_dataset_features from lerobot.policies.factory import make_pre_post_processors from lerobot.policies.pi0.modeling_pi0 import PI0Policy from lerobot.policies.utils import build_inference_frame, make_robot_action diff --git a/examples/tutorial/rl/hilserl_example.py b/examples/tutorial/rl/hilserl_example.py index 980ac7985..d367a01ce 100644 --- a/examples/tutorial/rl/hilserl_example.py +++ b/examples/tutorial/rl/hilserl_example.py @@ -6,8 +6,8 @@ from queue import Empty, Full import torch import torch.optim as optim +from lerobot.datasets.feature_utils import hw_to_dataset_features from lerobot.datasets.lerobot_dataset import LeRobotDataset -from lerobot.datasets.utils import hw_to_dataset_features from lerobot.envs.configs import HILSerlProcessorConfig, HILSerlRobotEnvConfig from lerobot.policies.sac.configuration_sac import SACConfig from lerobot.policies.sac.modeling_sac import SACPolicy diff --git a/examples/tutorial/smolvla/using_smolvla_example.py b/examples/tutorial/smolvla/using_smolvla_example.py index ce3aa7bca..b99126efa 100644 --- a/examples/tutorial/smolvla/using_smolvla_example.py +++ b/examples/tutorial/smolvla/using_smolvla_example.py @@ -1,7 +1,7 @@ import torch from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig -from lerobot.datasets.utils import hw_to_dataset_features +from lerobot.datasets.feature_utils import hw_to_dataset_features from lerobot.policies.factory import make_pre_post_processors from lerobot.policies.smolvla.modeling_smolvla import SmolVLAPolicy from lerobot.policies.utils import build_inference_frame, make_robot_action diff --git a/src/lerobot/async_inference/helpers.py b/src/lerobot/async_inference/helpers.py index 8b12920d9..9dd44eb44 100644 --- a/src/lerobot/async_inference/helpers.py +++ b/src/lerobot/async_inference/helpers.py @@ -23,7 +23,7 @@ from typing import Any import torch from lerobot.configs.types import PolicyFeature -from lerobot.datasets.utils import build_dataset_frame, hw_to_dataset_features +from lerobot.datasets.feature_utils import build_dataset_frame, hw_to_dataset_features # NOTE: Configs need to be loaded for the client to be able to instantiate the policy config from lerobot.policies import ( # noqa: F401 diff --git a/src/lerobot/data_processing/sarm_annotations/subtask_annotation.py b/src/lerobot/data_processing/sarm_annotations/subtask_annotation.py index 67e37bab8..8f3a65e39 100644 --- a/src/lerobot/data_processing/sarm_annotations/subtask_annotation.py +++ b/src/lerobot/data_processing/sarm_annotations/subtask_annotation.py @@ -746,7 +746,8 @@ def save_annotations_to_dataset( dataset_path: Path, annotations: dict[int, SubtaskAnnotation], fps: int, prefix: str = "sparse" ): """Save annotations to LeRobot dataset parquet format.""" - from lerobot.datasets.utils import DEFAULT_EPISODES_PATH, load_episodes + from lerobot.datasets.io_utils import load_episodes + from lerobot.datasets.utils import DEFAULT_EPISODES_PATH episodes_dataset = load_episodes(dataset_path) if not episodes_dataset or len(episodes_dataset) == 0: @@ -840,7 +841,7 @@ def generate_auto_sparse_annotations( def load_annotations_from_dataset(dataset_path: Path, prefix: str = "sparse") -> dict[int, SubtaskAnnotation]: """Load annotations from LeRobot dataset parquet files.""" - from lerobot.datasets.utils import load_episodes + from lerobot.datasets.io_utils import load_episodes episodes_dataset = load_episodes(dataset_path) if not episodes_dataset or len(episodes_dataset) == 0: diff --git a/src/lerobot/datasets/aggregate.py b/src/lerobot/datasets/aggregate.py index b32116233..66f055f04 100644 --- a/src/lerobot/datasets/aggregate.py +++ b/src/lerobot/datasets/aggregate.py @@ -24,7 +24,16 @@ import pandas as pd import tqdm from lerobot.datasets.compute_stats import aggregate_stats -from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata +from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata +from lerobot.datasets.feature_utils import get_hf_features_from_features +from lerobot.datasets.io_utils import ( + get_file_size_in_mb, + get_parquet_file_size_in_mb, + to_parquet_with_hf_images, + write_info, + write_stats, + write_tasks, +) from lerobot.datasets.utils import ( DEFAULT_CHUNK_SIZE, DEFAULT_DATA_FILE_SIZE_IN_MB, @@ -32,14 +41,7 @@ from lerobot.datasets.utils import ( DEFAULT_EPISODES_PATH, DEFAULT_VIDEO_FILE_SIZE_IN_MB, DEFAULT_VIDEO_PATH, - get_file_size_in_mb, - get_hf_features_from_features, - get_parquet_file_size_in_mb, - to_parquet_with_hf_images, update_chunk_file_indices, - write_info, - write_stats, - write_tasks, ) from lerobot.datasets.video_utils import concatenate_video_files, get_video_duration_in_s diff --git a/src/lerobot/datasets/backtracking.py b/src/lerobot/datasets/backtracking.py deleted file mode 100644 index 16363be86..000000000 --- a/src/lerobot/datasets/backtracking.py +++ /dev/null @@ -1,175 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from collections import deque -from collections.abc import Iterable, Iterator - - -class LookBackError(Exception): - """ - Exception raised when trying to look back in the history of a Backtrackable object. - """ - - pass - - -class LookAheadError(Exception): - """ - Exception raised when trying to look ahead in the future of a Backtrackable object. - """ - - pass - - -class Backtrackable[T]: - """ - Wrap any iterator/iterable so you can step back up to `history` items - and look ahead up to `lookahead` items. - - This is useful for streaming datasets where you need to access previous and future items - but can't load the entire dataset into memory. - - Example: - ------- - ```python - ds = load_dataset("c4", "en", streaming=True, split="train") - rev = Backtrackable(ds, history=3, lookahead=2) - - x0 = next(rev) # forward - x1 = next(rev) - x2 = next(rev) - - # Look ahead - x3_peek = rev.peek_ahead(1) # next item without moving cursor - x4_peek = rev.peek_ahead(2) # two items ahead - - # Look back - x1_again = rev.peek_back(1) # previous item without moving cursor - x0_again = rev.peek_back(2) # two items back - - # Move backward - x1_back = rev.prev() # back one step - next(rev) # returns x2, continues forward from where we were - ``` - """ - - __slots__ = ("_source", "_back_buf", "_ahead_buf", "_cursor", "_history", "_lookahead") - - def __init__(self, iterable: Iterable[T], *, history: int = 1, lookahead: int = 0): - if history < 1: - raise ValueError("history must be >= 1") - if lookahead <= 0: - raise ValueError("lookahead must be > 0") - - self._source: Iterator[T] = iter(iterable) - self._back_buf: deque[T] = deque(maxlen=history) - self._ahead_buf: deque[T] = deque(maxlen=lookahead) if lookahead > 0 else deque() - self._cursor: int = 0 - self._history = history - self._lookahead = lookahead - - def __iter__(self) -> "Backtrackable[T]": - return self - - def __next__(self) -> T: - # If we've stepped back, consume from back buffer first - if self._cursor < 0: # -1 means "last item", etc. - self._cursor += 1 - return self._back_buf[self._cursor] - - # If we have items in the ahead buffer, use them first - item = self._ahead_buf.popleft() if self._ahead_buf else next(self._source) - - # Add current item to back buffer and reset cursor - self._back_buf.append(item) - self._cursor = 0 - return item - - def prev(self) -> T: - """ - Step one item back in history and return it. - Raises IndexError if already at the oldest buffered item. - """ - if len(self._back_buf) + self._cursor <= 1: - raise LookBackError("At start of history") - - self._cursor -= 1 - return self._back_buf[self._cursor] - - def peek_back(self, n: int = 1) -> T: - """ - Look `n` items back (n=1 == previous item) without moving the cursor. - """ - if n < 0 or n + 1 > len(self._back_buf) + self._cursor: - raise LookBackError("peek_back distance out of range") - - return self._back_buf[self._cursor - (n + 1)] - - def peek_ahead(self, n: int = 1) -> T: - """ - Look `n` items ahead (n=1 == next item) without moving the cursor. - Fills the ahead buffer if necessary. - """ - if n < 1: - raise LookAheadError("peek_ahead distance must be 1 or more") - elif n > self._lookahead: - raise LookAheadError("peek_ahead distance exceeds lookahead limit") - - # Fill ahead buffer if we don't have enough items - while len(self._ahead_buf) < n: - try: - item = next(self._source) - self._ahead_buf.append(item) - - except StopIteration as err: - raise LookAheadError("peek_ahead: not enough items in source") from err - - return self._ahead_buf[n - 1] - - def history(self) -> list[T]: - """ - Return a copy of the buffered history (most recent last). - The list length ≤ `history` argument passed at construction. - """ - if self._cursor == 0: - return list(self._back_buf) - - # When cursor<0, slice so the order remains chronological - return list(self._back_buf)[: self._cursor or None] - - def can_peek_back(self, steps: int = 1) -> bool: - """ - Check if we can go back `steps` items without raising an IndexError. - """ - return steps <= len(self._back_buf) + self._cursor - - def can_peek_ahead(self, steps: int = 1) -> bool: - """ - Check if we can peek ahead `steps` items. - This may involve trying to fill the ahead buffer. - """ - if self._lookahead > 0 and steps > self._lookahead: - return False - - # Try to fill ahead buffer to check if we can peek that far - try: - while len(self._ahead_buf) < steps: - if self._lookahead > 0 and len(self._ahead_buf) >= self._lookahead: - return False - item = next(self._source) - self._ahead_buf.append(item) - return True - except StopIteration: - return False diff --git a/src/lerobot/datasets/backward_compatibility.py b/src/lerobot/datasets/backward_compatibility.py deleted file mode 100644 index aefbfd55b..000000000 --- a/src/lerobot/datasets/backward_compatibility.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import packaging.version - -V30_MESSAGE = """ -The dataset you requested ({repo_id}) is in {version} format. - -We introduced a new format since v3.0 which is not backward compatible with v2.1. -Please, update your dataset to the new format using this command: -``` -python -m lerobot.scripts.convert_dataset_v21_to_v30 --repo-id={repo_id} -``` - -If you already have a converted version uploaded to the hub, then this error might be because of -an older version in your local cache. Consider deleting the cached version and retrying. - -If you encounter a problem, contact LeRobot maintainers on [Discord](https://discord.com/invite/s3KuuzsPFb) -or open an [issue on GitHub](https://github.com/huggingface/lerobot/issues/new/choose). -""" - -FUTURE_MESSAGE = """ -The dataset you requested ({repo_id}) is only available in {version} format. -As we cannot ensure forward compatibility with it, please update your current version of lerobot. -""" - - -class CompatibilityError(Exception): ... - - -class BackwardCompatibilityError(CompatibilityError): - def __init__(self, repo_id: str, version: packaging.version.Version): - if version.major == 2 and version.minor == 1: - message = V30_MESSAGE.format(repo_id=repo_id, version=version) - else: - raise NotImplementedError( - "Contact the maintainer on [Discord](https://discord.com/invite/s3KuuzsPFb)." - ) - super().__init__(message) - - -class ForwardCompatibilityError(CompatibilityError): - def __init__(self, repo_id: str, version: packaging.version.Version): - message = FUTURE_MESSAGE.format(repo_id=repo_id, version=version) - super().__init__(message) diff --git a/src/lerobot/datasets/compute_stats.py b/src/lerobot/datasets/compute_stats.py index 61e174d5c..5bd95810b 100644 --- a/src/lerobot/datasets/compute_stats.py +++ b/src/lerobot/datasets/compute_stats.py @@ -15,7 +15,7 @@ # limitations under the License. import numpy as np -from lerobot.datasets.utils import load_image_as_numpy +from lerobot.datasets.io_utils import load_image_as_numpy DEFAULT_QUANTILES = [0.01, 0.10, 0.50, 0.90, 0.99] diff --git a/src/lerobot/datasets/dataset_metadata.py b/src/lerobot/datasets/dataset_metadata.py index 61585a8a3..560a90a6e 100644 --- a/src/lerobot/datasets/dataset_metadata.py +++ b/src/lerobot/datasets/dataset_metadata.py @@ -23,28 +23,29 @@ import pyarrow.parquet as pq from huggingface_hub import snapshot_download from lerobot.datasets.compute_stats import aggregate_stats -from lerobot.datasets.utils import ( - DEFAULT_EPISODES_PATH, - DEFAULT_FEATURES, - INFO_PATH, - _validate_feature_names, - check_version_compatibility, - create_empty_dataset_info, - flatten_dict, +from lerobot.datasets.feature_utils import _validate_feature_names, create_empty_dataset_info +from lerobot.datasets.io_utils import ( get_file_size_in_mb, - get_safe_version, - is_valid_version, load_episodes, load_info, load_stats, load_subtasks, load_tasks, - update_chunk_file_indices, write_info, write_json, write_stats, write_tasks, ) +from lerobot.datasets.utils import ( + DEFAULT_EPISODES_PATH, + DEFAULT_FEATURES, + INFO_PATH, + check_version_compatibility, + flatten_dict, + get_safe_version, + is_valid_version, + update_chunk_file_indices, +) from lerobot.datasets.video_utils import get_video_info from lerobot.utils.constants import HF_LEROBOT_HOME diff --git a/src/lerobot/datasets/dataset_tools.py b/src/lerobot/datasets/dataset_tools.py index 546b3d67f..87cdc18e5 100644 --- a/src/lerobot/datasets/dataset_tools.py +++ b/src/lerobot/datasets/dataset_tools.py @@ -38,19 +38,22 @@ from tqdm import tqdm from lerobot.datasets.aggregate import aggregate_datasets from lerobot.datasets.compute_stats import aggregate_stats -from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata +from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata +from lerobot.datasets.io_utils import ( + get_parquet_file_size_in_mb, + load_episodes, + write_info, + write_stats, + write_tasks, +) +from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.utils import ( DATA_DIR, DEFAULT_CHUNK_SIZE, DEFAULT_DATA_FILE_SIZE_IN_MB, DEFAULT_DATA_PATH, DEFAULT_EPISODES_PATH, - get_parquet_file_size_in_mb, - load_episodes, update_chunk_file_indices, - write_info, - write_stats, - write_tasks, ) from lerobot.datasets.video_utils import encode_video_frames, get_video_info from lerobot.utils.constants import HF_LEROBOT_HOME, OBS_IMAGE @@ -915,7 +918,8 @@ def _write_parquet(df: pd.DataFrame, path: Path, meta: LeRobotDatasetMetadata) - This ensures images are properly embedded and the file can be loaded correctly by HF datasets. """ - from lerobot.datasets.utils import embed_images, get_hf_features_from_features + from lerobot.datasets.feature_utils import get_hf_features_from_features + from lerobot.datasets.io_utils import embed_images hf_features = get_hf_features_from_features(meta.features) ep_dataset = datasets.Dataset.from_dict(df.to_dict(orient="list"), features=hf_features, split="train") diff --git a/src/lerobot/datasets/factory.py b/src/lerobot/datasets/factory.py index 31e939809..76ece8961 100644 --- a/src/lerobot/datasets/factory.py +++ b/src/lerobot/datasets/factory.py @@ -20,11 +20,9 @@ import torch from lerobot.configs.policies import PreTrainedConfig from lerobot.configs.train import TrainPipelineConfig -from lerobot.datasets.lerobot_dataset import ( - LeRobotDataset, - LeRobotDatasetMetadata, - MultiLeRobotDataset, -) +from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata +from lerobot.datasets.lerobot_dataset import LeRobotDataset +from lerobot.datasets.multi_dataset import MultiLeRobotDataset from lerobot.datasets.streaming_dataset import StreamingLeRobotDataset from lerobot.datasets.transforms import ImageTransforms from lerobot.utils.constants import ACTION, OBS_PREFIX, REWARD diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index 6aecc016e..8f0600ba8 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -33,25 +33,29 @@ from huggingface_hub.errors import RevisionNotFoundError from lerobot.datasets.compute_stats import compute_episode_stats from lerobot.datasets.dataset_metadata import CODEBASE_VERSION, LeRobotDatasetMetadata +from lerobot.datasets.feature_utils import ( + check_delta_timestamps, + get_delta_indices, + get_hf_features_from_features, + validate_episode_buffer, + validate_frame, +) from lerobot.datasets.image_writer import AsyncImageWriter, write_image +from lerobot.datasets.io_utils import ( + embed_images, + get_file_size_in_mb, + hf_transform_to_torch, + load_episodes, + load_nested_dataset, + write_info, +) from lerobot.datasets.utils import ( DEFAULT_EPISODES_PATH, DEFAULT_IMAGE_PATH, - check_delta_timestamps, create_lerobot_dataset_card, - embed_images, - get_delta_indices, - get_file_size_in_mb, - get_hf_features_from_features, get_safe_version, - hf_transform_to_torch, is_valid_version, - load_episodes, - load_nested_dataset, update_chunk_file_indices, - validate_episode_buffer, - validate_frame, - write_info, ) from lerobot.datasets.video_utils import ( StreamingVideoEncoder, @@ -1238,9 +1242,3 @@ class LeRobotDataset(torch.utils.data.Dataset): obj._streaming_encoder = None return obj - - -# --------------------------------------------------------------------------- -# Backward-compatible re-export -# --------------------------------------------------------------------------- -from lerobot.datasets.multi_dataset import MultiLeRobotDataset # noqa: E402, F401 diff --git a/src/lerobot/datasets/pipeline_features.py b/src/lerobot/datasets/pipeline_features.py index fe8cabbeb..96779fdc6 100644 --- a/src/lerobot/datasets/pipeline_features.py +++ b/src/lerobot/datasets/pipeline_features.py @@ -17,7 +17,7 @@ from collections.abc import Sequence from typing import Any from lerobot.configs.types import PipelineFeatureType -from lerobot.datasets.utils import hw_to_dataset_features +from lerobot.datasets.feature_utils import hw_to_dataset_features from lerobot.processor import DataProcessorPipeline from lerobot.types import RobotAction, RobotObservation from lerobot.utils.constants import ACTION, OBS_IMAGES, OBS_STATE, OBS_STR diff --git a/src/lerobot/datasets/streaming_dataset.py b/src/lerobot/datasets/streaming_dataset.py index 454389d46..62e00558a 100644 --- a/src/lerobot/datasets/streaming_dataset.py +++ b/src/lerobot/datasets/streaming_dataset.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from collections.abc import Callable, Generator, Iterator +from collections import deque +from collections.abc import Callable, Generator, Iterable, Iterator from pathlib import Path import datasets @@ -21,16 +22,13 @@ import numpy as np import torch from datasets import load_dataset -from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDatasetMetadata +from lerobot.datasets.dataset_metadata import CODEBASE_VERSION, LeRobotDatasetMetadata +from lerobot.datasets.feature_utils import get_delta_indices +from lerobot.datasets.io_utils import item_to_torch from lerobot.datasets.utils import ( - Backtrackable, - LookAheadError, - LookBackError, check_version_compatibility, find_float_index, - get_delta_indices, is_float_in_list, - item_to_torch, safe_shard, ) from lerobot.datasets.video_utils import ( @@ -40,6 +38,164 @@ from lerobot.datasets.video_utils import ( from lerobot.utils.constants import HF_LEROBOT_HOME, LOOKAHEAD_BACKTRACKTABLE, LOOKBACK_BACKTRACKTABLE +class LookBackError(Exception): + """ + Exception raised when trying to look back in the history of a Backtrackable object. + """ + + pass + + +class LookAheadError(Exception): + """ + Exception raised when trying to look ahead in the future of a Backtrackable object. + """ + + pass + + +class Backtrackable[T]: + """ + Wrap any iterator/iterable so you can step back up to `history` items + and look ahead up to `lookahead` items. + + This is useful for streaming datasets where you need to access previous and future items + but can't load the entire dataset into memory. + + Example: + ------- + ```python + ds = load_dataset("c4", "en", streaming=True, split="train") + rev = Backtrackable(ds, history=3, lookahead=2) + + x0 = next(rev) # forward + x1 = next(rev) + x2 = next(rev) + + # Look ahead + x3_peek = rev.peek_ahead(1) # next item without moving cursor + x4_peek = rev.peek_ahead(2) # two items ahead + + # Look back + x1_again = rev.peek_back(1) # previous item without moving cursor + x0_again = rev.peek_back(2) # two items back + + # Move backward + x1_back = rev.prev() # back one step + next(rev) # returns x2, continues forward from where we were + ``` + """ + + __slots__ = ("_source", "_back_buf", "_ahead_buf", "_cursor", "_history", "_lookahead") + + def __init__(self, iterable: Iterable[T], *, history: int = 1, lookahead: int = 0): + if history < 1: + raise ValueError("history must be >= 1") + if lookahead <= 0: + raise ValueError("lookahead must be > 0") + + self._source: Iterator[T] = iter(iterable) + self._back_buf: deque[T] = deque(maxlen=history) + self._ahead_buf: deque[T] = deque(maxlen=lookahead) if lookahead > 0 else deque() + self._cursor: int = 0 + self._history = history + self._lookahead = lookahead + + def __iter__(self) -> "Backtrackable[T]": + return self + + def __next__(self) -> T: + # If we've stepped back, consume from back buffer first + if self._cursor < 0: # -1 means "last item", etc. + self._cursor += 1 + return self._back_buf[self._cursor] + + # If we have items in the ahead buffer, use them first + item = self._ahead_buf.popleft() if self._ahead_buf else next(self._source) + + # Add current item to back buffer and reset cursor + self._back_buf.append(item) + self._cursor = 0 + return item + + def prev(self) -> T: + """ + Step one item back in history and return it. + Raises IndexError if already at the oldest buffered item. + """ + if len(self._back_buf) + self._cursor <= 1: + raise LookBackError("At start of history") + + self._cursor -= 1 + return self._back_buf[self._cursor] + + def peek_back(self, n: int = 1) -> T: + """ + Look `n` items back (n=1 == previous item) without moving the cursor. + """ + if n < 0 or n + 1 > len(self._back_buf) + self._cursor: + raise LookBackError("peek_back distance out of range") + + return self._back_buf[self._cursor - (n + 1)] + + def peek_ahead(self, n: int = 1) -> T: + """ + Look `n` items ahead (n=1 == next item) without moving the cursor. + Fills the ahead buffer if necessary. + """ + if n < 1: + raise LookAheadError("peek_ahead distance must be 1 or more") + elif n > self._lookahead: + raise LookAheadError("peek_ahead distance exceeds lookahead limit") + + # Fill ahead buffer if we don't have enough items + while len(self._ahead_buf) < n: + try: + item = next(self._source) + self._ahead_buf.append(item) + + except StopIteration as err: + raise LookAheadError("peek_ahead: not enough items in source") from err + + return self._ahead_buf[n - 1] + + def history(self) -> list[T]: + """ + Return a copy of the buffered history (most recent last). + The list length ≤ `history` argument passed at construction. + """ + if self._cursor == 0: + return list(self._back_buf) + + # When cursor<0, slice so the order remains chronological + return list(self._back_buf)[: self._cursor or None] + + def can_peek_back(self, steps: int = 1) -> bool: + """ + Check if we can go back `steps` items without raising an IndexError. + """ + return steps <= len(self._back_buf) + self._cursor + + def can_peek_ahead(self, steps: int = 1) -> bool: + """ + Check if we can peek ahead `steps` items. + This may involve trying to fill the ahead buffer. + """ + if self._lookahead > 0 and steps > self._lookahead: + return False + + # Try to fill ahead buffer to check if we can peek that far + try: + while len(self._ahead_buf) < steps: + if self._lookahead > 0 and len(self._ahead_buf) >= self._lookahead: + return False + item = next(self._source) + self._ahead_buf.append(item) + return True + except StopIteration: + return False + + class StreamingLeRobotDataset(torch.utils.data.IterableDataset): """LeRobotDataset with streaming capabilities. diff --git a/src/lerobot/datasets/utils.py b/src/lerobot/datasets/utils.py index d1bddbf44..2e1d360f9 100644 --- a/src/lerobot/datasets/utils.py +++ b/src/lerobot/datasets/utils.py @@ -27,11 +27,47 @@ import torch from huggingface_hub import DatasetCard, DatasetCardData, HfApi from huggingface_hub.errors import RevisionNotFoundError -from lerobot.datasets.backward_compatibility import ( - FUTURE_MESSAGE, - BackwardCompatibilityError, - ForwardCompatibilityError, -) +V30_MESSAGE = """ +The dataset you requested ({repo_id}) is in {version} format. + +We introduced a new format since v3.0 which is not backward compatible with v2.1. +Please, update your dataset to the new format using this command: +``` +python -m lerobot.scripts.convert_dataset_v21_to_v30 --repo-id={repo_id} +``` + +If you already have a converted version uploaded to the hub, then this error might be because of +an older version in your local cache. Consider deleting the cached version and retrying. + +If you encounter a problem, contact LeRobot maintainers on [Discord](https://discord.com/invite/s3KuuzsPFb) +or open an [issue on GitHub](https://github.com/huggingface/lerobot/issues/new/choose). +""" + +FUTURE_MESSAGE = """ +The dataset you requested ({repo_id}) is only available in {version} format. +As we cannot ensure forward compatibility with it, please update your current version of lerobot. +""" + + +class CompatibilityError(Exception): ... + + +class BackwardCompatibilityError(CompatibilityError): + def __init__(self, repo_id: str, version: packaging.version.Version): + if version.major == 2 and version.minor == 1: + message = V30_MESSAGE.format(repo_id=repo_id, version=version) + else: + raise NotImplementedError( + "Contact the maintainer on [Discord](https://discord.com/invite/s3KuuzsPFb)." + ) + super().__init__(message) + + +class ForwardCompatibilityError(CompatibilityError): + def __init__(self, repo_id: str, version: packaging.version.Version): + message = FUTURE_MESSAGE.format(repo_id=repo_id, version=version) + super().__init__(message) + DEFAULT_CHUNK_SIZE = 1000 # Max number of files per chunk DEFAULT_DATA_FILE_SIZE_IN_MB = 100 # Max size per file @@ -392,52 +428,3 @@ def safe_shard(dataset: datasets.IterableDataset, index: int, num_shards: int) - shard_idx = min(dataset.num_shards, index + 1) - 1 return dataset.shard(num_shards, index=shard_idx) - - -# --------------------------------------------------------------------------- -# Backward-compatible re-exports: symbols moved to focused submodules. -# Existing ``from lerobot.datasets.utils import `` will keep working. -# --------------------------------------------------------------------------- -from lerobot.datasets.backtracking import Backtrackable, LookAheadError, LookBackError # noqa: E402, F401 -from lerobot.datasets.feature_utils import ( # noqa: E402, F401 - _validate_feature_names, - build_dataset_frame, - check_delta_timestamps, - combine_feature_dicts, - create_empty_dataset_info, - dataset_to_policy_features, - get_delta_indices, - get_hf_features_from_features, - hw_to_dataset_features, - validate_episode_buffer, - validate_feature_dtype_and_shape, - validate_feature_image_or_video, - validate_feature_numpy_array, - validate_feature_string, - validate_features_presence, - validate_frame, -) -from lerobot.datasets.io_utils import ( # noqa: E402, F401 - cast_stats_to_numpy, - embed_images, - get_file_size_in_mb, - get_hf_dataset_size_in_mb, - get_parquet_file_size_in_mb, - get_parquet_num_frames, - hf_transform_to_torch, - item_to_torch, - load_episodes, - load_image_as_numpy, - load_info, - load_json, - load_nested_dataset, - load_stats, - load_subtasks, - load_tasks, - to_parquet_with_hf_images, - write_episodes, - write_info, - write_json, - write_stats, - write_tasks, -) diff --git a/src/lerobot/optim/optimizers.py b/src/lerobot/optim/optimizers.py index 2b75353d9..e2e3d8937 100644 --- a/src/lerobot/optim/optimizers.py +++ b/src/lerobot/optim/optimizers.py @@ -23,7 +23,8 @@ import draccus import torch from safetensors.torch import load_file, save_file -from lerobot.datasets.utils import flatten_dict, unflatten_dict, write_json +from lerobot.datasets.io_utils import write_json +from lerobot.datasets.utils import flatten_dict, unflatten_dict from lerobot.utils.constants import ( OPTIMIZER_PARAM_GROUPS, OPTIMIZER_STATE, diff --git a/src/lerobot/optim/schedulers.py b/src/lerobot/optim/schedulers.py index 4af7f0802..19c3fd7bd 100644 --- a/src/lerobot/optim/schedulers.py +++ b/src/lerobot/optim/schedulers.py @@ -23,7 +23,7 @@ import draccus from torch.optim import Optimizer from torch.optim.lr_scheduler import LambdaLR, LRScheduler -from lerobot.datasets.utils import write_json +from lerobot.datasets.io_utils import write_json from lerobot.utils.constants import SCHEDULER_STATE from lerobot.utils.io_utils import deserialize_json_into_object diff --git a/src/lerobot/policies/factory.py b/src/lerobot/policies/factory.py index 9515d5b82..2320cd624 100644 --- a/src/lerobot/policies/factory.py +++ b/src/lerobot/policies/factory.py @@ -24,8 +24,8 @@ import torch from lerobot.configs.policies import PreTrainedConfig from lerobot.configs.types import FeatureType -from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata -from lerobot.datasets.utils import dataset_to_policy_features +from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata +from lerobot.datasets.feature_utils import dataset_to_policy_features from lerobot.envs.configs import EnvConfig from lerobot.envs.utils import env_to_policy_features from lerobot.policies.act.configuration_act import ACTConfig diff --git a/src/lerobot/policies/utils.py b/src/lerobot/policies/utils.py index 9ad5dac4a..82ab51005 100644 --- a/src/lerobot/policies/utils.py +++ b/src/lerobot/policies/utils.py @@ -23,7 +23,7 @@ from torch import nn from lerobot.configs.policies import PreTrainedConfig from lerobot.configs.types import FeatureType, PolicyFeature -from lerobot.datasets.utils import build_dataset_frame +from lerobot.datasets.feature_utils import build_dataset_frame from lerobot.types import PolicyAction, RobotAction, RobotObservation from lerobot.utils.constants import ACTION, OBS_STR diff --git a/src/lerobot/scripts/augment_dataset_quantile_stats.py b/src/lerobot/scripts/augment_dataset_quantile_stats.py index e6ab6867e..4d80c9332 100644 --- a/src/lerobot/scripts/augment_dataset_quantile_stats.py +++ b/src/lerobot/scripts/augment_dataset_quantile_stats.py @@ -45,8 +45,9 @@ from requests import HTTPError from tqdm import tqdm from lerobot.datasets.compute_stats import DEFAULT_QUANTILES, aggregate_stats, get_feature_stats -from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset -from lerobot.datasets.utils import write_stats +from lerobot.datasets.dataset_metadata import CODEBASE_VERSION +from lerobot.datasets.io_utils import write_stats +from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.utils.utils import init_logging diff --git a/src/lerobot/scripts/convert_dataset_v21_to_v30.py b/src/lerobot/scripts/convert_dataset_v21_to_v30.py index dc81cc51c..2b6dcf732 100644 --- a/src/lerobot/scripts/convert_dataset_v21_to_v30.py +++ b/src/lerobot/scripts/convert_dataset_v21_to_v30.py @@ -60,7 +60,19 @@ from huggingface_hub import HfApi, snapshot_download from requests import HTTPError from lerobot.datasets.compute_stats import aggregate_stats -from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset +from lerobot.datasets.dataset_metadata import CODEBASE_VERSION +from lerobot.datasets.io_utils import ( + cast_stats_to_numpy, + get_file_size_in_mb, + get_parquet_file_size_in_mb, + get_parquet_num_frames, + load_info, + write_episodes, + write_info, + write_stats, + write_tasks, +) +from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.utils import ( DEFAULT_CHUNK_SIZE, DEFAULT_DATA_FILE_SIZE_IN_MB, @@ -70,17 +82,8 @@ from lerobot.datasets.utils import ( LEGACY_EPISODES_PATH, LEGACY_EPISODES_STATS_PATH, LEGACY_TASKS_PATH, - cast_stats_to_numpy, flatten_dict, - get_file_size_in_mb, - get_parquet_file_size_in_mb, - get_parquet_num_frames, - load_info, update_chunk_file_indices, - write_episodes, - write_info, - write_stats, - write_tasks, ) from lerobot.datasets.video_utils import concatenate_video_files, get_video_duration_in_s from lerobot.utils.constants import HF_LEROBOT_HOME diff --git a/src/lerobot/scripts/lerobot_record.py b/src/lerobot/scripts/lerobot_record.py index 345d18f23..819634ba2 100644 --- a/src/lerobot/scripts/lerobot_record.py +++ b/src/lerobot/scripts/lerobot_record.py @@ -83,10 +83,10 @@ from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraCon from lerobot.cameras.zmq.configuration_zmq import ZMQCameraConfig # noqa: F401 from lerobot.configs import parser from lerobot.configs.policies import PreTrainedConfig +from lerobot.datasets.feature_utils import build_dataset_frame, combine_feature_dicts from lerobot.datasets.image_writer import safe_stop_image_writer from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features -from lerobot.datasets.utils import build_dataset_frame, combine_feature_dicts from lerobot.datasets.video_utils import VideoEncodingManager from lerobot.policies.factory import make_policy, make_pre_post_processors from lerobot.policies.pretrained import PreTrainedPolicy diff --git a/src/lerobot/utils/train_utils.py b/src/lerobot/utils/train_utils.py index d8481f4b9..02f6aebb3 100644 --- a/src/lerobot/utils/train_utils.py +++ b/src/lerobot/utils/train_utils.py @@ -19,7 +19,7 @@ from torch.optim import Optimizer from torch.optim.lr_scheduler import LRScheduler from lerobot.configs.train import TrainPipelineConfig -from lerobot.datasets.utils import load_json, write_json +from lerobot.datasets.io_utils import load_json, write_json from lerobot.optim.optimizers import load_optimizer_state, save_optimizer_state from lerobot.optim.schedulers import load_scheduler_state, save_scheduler_state from lerobot.policies.pretrained import PreTrainedPolicy diff --git a/tests/datasets/test_dataset_tools.py b/tests/datasets/test_dataset_tools.py index 24daed91e..5ed7aa1a3 100644 --- a/tests/datasets/test_dataset_tools.py +++ b/tests/datasets/test_dataset_tools.py @@ -1020,7 +1020,7 @@ def test_modify_features_preserves_file_structure(sample_dataset, tmp_path): # Get original chunk/file indices from first episode if train_dataset.meta.episodes is None: - from lerobot.datasets.utils import load_episodes + from lerobot.datasets.io_utils import load_episodes train_dataset.meta.episodes = load_episodes(train_dataset.meta.root) original_chunk_indices = [ep["data/chunk_index"] for ep in train_dataset.meta.episodes] @@ -1040,7 +1040,7 @@ def test_modify_features_preserves_file_structure(sample_dataset, tmp_path): # Check that chunk/file indices are preserved if modified_dataset.meta.episodes is None: - from lerobot.datasets.utils import load_episodes + from lerobot.datasets.io_utils import load_episodes modified_dataset.meta.episodes = load_episodes(modified_dataset.meta.root) new_chunk_indices = [ep["data/chunk_index"] for ep in modified_dataset.meta.episodes] @@ -1194,7 +1194,7 @@ def test_modify_tasks_in_place(sample_dataset): def test_modify_tasks_keeps_original_when_not_overridden(sample_dataset): """Test that original tasks are kept when using episode_tasks without new_task.""" - from lerobot.datasets.utils import load_episodes + from lerobot.datasets.io_utils import load_episodes # Ensure episodes metadata is loaded if sample_dataset.meta.episodes is None: diff --git a/tests/datasets/test_dataset_utils.py b/tests/datasets/test_dataset_utils.py index d40ee238f..874099e2b 100644 --- a/tests/datasets/test_dataset_utils.py +++ b/tests/datasets/test_dataset_utils.py @@ -19,7 +19,9 @@ import torch from datasets import Dataset from huggingface_hub import DatasetCard -from lerobot.datasets.utils import combine_feature_dicts, create_lerobot_dataset_card, hf_transform_to_torch +from lerobot.datasets.feature_utils import combine_feature_dicts +from lerobot.datasets.io_utils import hf_transform_to_torch +from lerobot.datasets.utils import create_lerobot_dataset_card from lerobot.utils.constants import ACTION, OBS_IMAGES diff --git a/tests/datasets/test_datasets.py b/tests/datasets/test_datasets.py index 6f99eb301..67878d8f6 100644 --- a/tests/datasets/test_datasets.py +++ b/tests/datasets/test_datasets.py @@ -29,20 +29,19 @@ import lerobot from lerobot.configs.default import DatasetConfig from lerobot.configs.train import TrainPipelineConfig from lerobot.datasets.factory import make_dataset +from lerobot.datasets.feature_utils import get_hf_features_from_features, hw_to_dataset_features from lerobot.datasets.image_writer import image_array_to_pil_image +from lerobot.datasets.io_utils import hf_transform_to_torch from lerobot.datasets.lerobot_dataset import ( LeRobotDataset, - MultiLeRobotDataset, _encode_video_worker, ) +from lerobot.datasets.multi_dataset import MultiLeRobotDataset from lerobot.datasets.utils import ( DEFAULT_CHUNK_SIZE, DEFAULT_DATA_FILE_SIZE_IN_MB, DEFAULT_VIDEO_FILE_SIZE_IN_MB, create_branch, - get_hf_features_from_features, - hf_transform_to_torch, - hw_to_dataset_features, ) from lerobot.datasets.video_utils import VALID_VIDEO_CODECS from lerobot.envs.factory import make_env_config @@ -1329,7 +1328,7 @@ def test_frames_in_current_file_calculation(tmp_path, empty_lerobot_dataset_fact dataset.finalize() - from lerobot.datasets.utils import load_episodes + from lerobot.datasets.io_utils import load_episodes dataset.meta.episodes = load_episodes(dataset.root) assert dataset.meta.episodes is not None diff --git a/tests/datasets/test_delta_timestamps.py b/tests/datasets/test_delta_timestamps.py index 72f69bc72..8d9529f68 100644 --- a/tests/datasets/test_delta_timestamps.py +++ b/tests/datasets/test_delta_timestamps.py @@ -13,7 +13,7 @@ # limitations under the License. import pytest -from lerobot.datasets.utils import ( +from lerobot.datasets.feature_utils import ( check_delta_timestamps, get_delta_indices, ) diff --git a/tests/datasets/test_sampler.py b/tests/datasets/test_sampler.py index a5d463349..18fb1c8ac 100644 --- a/tests/datasets/test_sampler.py +++ b/tests/datasets/test_sampler.py @@ -19,10 +19,10 @@ import pytest import torch from datasets import Dataset -from lerobot.datasets.sampler import EpisodeAwareSampler -from lerobot.datasets.utils import ( +from lerobot.datasets.io_utils import ( hf_transform_to_torch, ) +from lerobot.datasets.sampler import EpisodeAwareSampler def calculate_episode_data_index(hf_dataset: Dataset) -> dict[str, torch.Tensor]: diff --git a/tests/fixtures/dataset_factories.py b/tests/fixtures/dataset_factories.py index f53a16924..5ecb52145 100644 --- a/tests/fixtures/dataset_factories.py +++ b/tests/fixtures/dataset_factories.py @@ -26,7 +26,10 @@ import pytest import torch from datasets import Dataset -from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset, LeRobotDatasetMetadata +from lerobot.datasets.dataset_metadata import CODEBASE_VERSION, LeRobotDatasetMetadata +from lerobot.datasets.feature_utils import get_hf_features_from_features +from lerobot.datasets.io_utils import hf_transform_to_torch +from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.utils import ( DEFAULT_CHUNK_SIZE, DEFAULT_DATA_FILE_SIZE_IN_MB, @@ -35,8 +38,6 @@ from lerobot.datasets.utils import ( DEFAULT_VIDEO_FILE_SIZE_IN_MB, DEFAULT_VIDEO_PATH, flatten_dict, - get_hf_features_from_features, - hf_transform_to_torch, ) from lerobot.datasets.video_utils import encode_video_frames from tests.fixtures.constants import ( diff --git a/tests/fixtures/files.py b/tests/fixtures/files.py index 11f3fa94a..92d9ca1e2 100644 --- a/tests/fixtures/files.py +++ b/tests/fixtures/files.py @@ -20,17 +20,19 @@ import pandas as pd import pytest from datasets import Dataset -from lerobot.datasets.utils import ( - DEFAULT_CHUNK_SIZE, - DEFAULT_DATA_FILE_SIZE_IN_MB, - DEFAULT_DATA_PATH, +from lerobot.datasets.io_utils import ( get_hf_dataset_size_in_mb, - update_chunk_file_indices, write_episodes, write_info, write_stats, write_tasks, ) +from lerobot.datasets.utils import ( + DEFAULT_CHUNK_SIZE, + DEFAULT_DATA_FILE_SIZE_IN_MB, + DEFAULT_DATA_PATH, + update_chunk_file_indices, +) def write_hf_dataset( diff --git a/tests/policies/test_policies.py b/tests/policies/test_policies.py index 1ba82ffd0..1aae3fcc8 100644 --- a/tests/policies/test_policies.py +++ b/tests/policies/test_policies.py @@ -28,7 +28,8 @@ from lerobot.configs.default import DatasetConfig from lerobot.configs.train import TrainPipelineConfig from lerobot.configs.types import FeatureType, PolicyFeature from lerobot.datasets.factory import make_dataset -from lerobot.datasets.utils import cycle, dataset_to_policy_features +from lerobot.datasets.feature_utils import dataset_to_policy_features +from lerobot.datasets.utils import cycle from lerobot.envs.factory import make_env, make_env_config from lerobot.envs.utils import preprocess_observation from lerobot.optim.factory import make_optimizer_and_scheduler