mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-18 10:10:08 +00:00
- Added missing license in the new scripts
- Added back legacy functions in conversion script of v2 to v21 - Updated README description for dataset_v3
This commit is contained in:
@@ -200,20 +200,30 @@ dataset attributes:
|
|||||||
│ ├ timestamp (float32): timestamp in the episode
|
│ ├ timestamp (float32): timestamp in the episode
|
||||||
│ ├ next.done (bool): indicates the end of an episode ; True for the last frame in each episode
|
│ ├ next.done (bool): indicates the end of an episode ; True for the last frame in each episode
|
||||||
│ └ index (int64): general index in the whole dataset
|
│ └ index (int64): general index in the whole dataset
|
||||||
├ episode_data_index: contains 2 tensors with the start and end indices of each episode
|
├ meta: a LeRobotDatasetMetadata object containing:
|
||||||
│ ├ from (1D int64 tensor): first frame index for each episode — shape (num episodes,) starts with 0
|
│ ├ info: a dictionary of metadata on the dataset
|
||||||
│ └ to: (1D int64 tensor): last frame index for each episode — shape (num episodes,)
|
│ │ ├ codebase_version (str): this is to keep track of the codebase version the dataset was created with
|
||||||
├ stats: a dictionary of statistics (max, mean, min, std) for each feature in the dataset, for instance
|
│ │ ├ fps (int): frame per second the dataset is recorded/synchronized to
|
||||||
│ ├ observation.images.cam_high: {'max': tensor with same number of dimensions (e.g. `(c, 1, 1)` for images, `(c,)` for states), etc.}
|
│ │ ├ features (dict): all features contained in the dataset with their shapes and types
|
||||||
│ ...
|
│ │ ├ total_episodes (int): total number of episodes in the dataset
|
||||||
├ info: a dictionary of metadata on the dataset
|
│ │ ├ total_frames (int): total number of frames in the dataset
|
||||||
│ ├ codebase_version (str): this is to keep track of the codebase version the dataset was created with
|
│ │ ├ robot_type (str): robot type used for recording
|
||||||
│ ├ fps (float): frame per second the dataset is recorded/synchronized to
|
│ │ ├ data_path (str): formattable string for the parquet files
|
||||||
│ ├ video (bool): indicates if frames are encoded in mp4 video files to save space or stored as png files
|
│ │ └ video_path (str): formattable string for the video files (if using videos)
|
||||||
│ └ encoding (dict): if video, this documents the main options that were used with ffmpeg to encode the videos
|
│ ├ episodes: a DataFrame containing episode metadata with columns:
|
||||||
├ videos_dir (Path): where the mp4 videos or png images are stored/accessed
|
│ │ ├ episode_index (int): index of the episode
|
||||||
└ camera_keys (list of string): the keys to access camera features in the item returned by the dataset (e.g. `["observation.images.cam_high", ...]`)
|
│ │ ├ tasks (list): list of tasks for this episode
|
||||||
```
|
│ │ ├ length (int): number of frames in this episode
|
||||||
|
│ │ ├ dataset_from_index (int): start index of this episode in the dataset
|
||||||
|
│ │ └ dataset_to_index (int): end index of this episode in the dataset
|
||||||
|
│ ├ stats: a dictionary of statistics (max, mean, min, std) for each feature in the dataset, for instance
|
||||||
|
│ │ ├ observation.images.front_cam: {'max': tensor with same number of dimensions (e.g. `(c, 1, 1)` for images, `(c,)` for states), etc.}
|
||||||
|
│ │ └ ...
|
||||||
|
│ └ tasks: a DataFrame containing task information with task names as index and task_index as values
|
||||||
|
├ root (Path): local directory where the dataset is stored
|
||||||
|
├ image_transforms (Callable): optional image transformations to apply to visual modalities
|
||||||
|
└ delta_timestamps (dict): optional delta timestamps for temporal queries
|
||||||
|
decoding videos (e.g., 'pyav', 'torchcodec')
|
||||||
|
|
||||||
A `LeRobotDataset` is serialised using several widespread file formats for each of its parts, namely:
|
A `LeRobotDataset` is serialised using several widespread file formats for each of its parts, namely:
|
||||||
- hf_dataset stored using Hugging Face datasets library serialization to parquet
|
- hf_dataset stored using Hugging Face datasets library serialization to parquet
|
||||||
|
|||||||
@@ -1,3 +1,19 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import tarfile
|
import tarfile
|
||||||
|
|||||||
@@ -1,3 +1,19 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
|
|||||||
@@ -1,3 +1,19 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,19 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|||||||
@@ -33,16 +33,38 @@ python -m lerobot.datasets.v21.convert_dataset_v20_to_v21 \
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import jsonlines
|
||||||
from huggingface_hub import HfApi
|
from huggingface_hub import HfApi
|
||||||
|
|
||||||
from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
|
from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
|
||||||
from lerobot.datasets.utils import LEGACY_EPISODES_STATS_PATH, STATS_PATH, load_stats, write_info
|
from lerobot.datasets.utils import STATS_PATH, load_stats, serialize_dict, write_info
|
||||||
from lerobot.datasets.v21.convert_stats import check_aggregate_stats, convert_stats
|
from lerobot.datasets.v21.convert_stats import check_aggregate_stats, convert_stats
|
||||||
|
|
||||||
V20 = "v2.0"
|
V20 = "v2.0"
|
||||||
V21 = "v2.1"
|
V21 = "v2.1"
|
||||||
|
|
||||||
|
### LEGACY FUNCTIONS REMOVED FROM UTILS ###
|
||||||
|
|
||||||
|
LEGACY_EPISODES_STATS_PATH = "episodes_stats.jsonl"
|
||||||
|
|
||||||
|
|
||||||
|
def append_jsonlines(data: dict, fpath: Path) -> None:
|
||||||
|
fpath.parent.mkdir(exist_ok=True, parents=True)
|
||||||
|
with jsonlines.open(fpath, "a") as writer:
|
||||||
|
writer.write(data)
|
||||||
|
|
||||||
|
|
||||||
|
def legacy_write_episode_stats(episode_index: int, episode_stats: dict, local_dir: Path):
|
||||||
|
# We wrap episode_stats in a dictionary since `episode_stats["episode_index"]`
|
||||||
|
# is a dictionary of stats and not an integer.
|
||||||
|
episode_stats = {"episode_index": episode_index, "stats": serialize_dict(episode_stats)}
|
||||||
|
append_jsonlines(episode_stats, local_dir / LEGACY_EPISODES_STATS_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
######## END OF LEGACY FUNCTIONS ########
|
||||||
|
|
||||||
|
|
||||||
class SuppressWarnings:
|
class SuppressWarnings:
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
@@ -61,9 +83,6 @@ def convert_dataset(
|
|||||||
with SuppressWarnings():
|
with SuppressWarnings():
|
||||||
dataset = LeRobotDataset(repo_id, revision=V20, force_cache_sync=True)
|
dataset = LeRobotDataset(repo_id, revision=V20, force_cache_sync=True)
|
||||||
|
|
||||||
if (dataset.root / LEGACY_EPISODES_STATS_PATH).is_file():
|
|
||||||
(dataset.root / LEGACY_EPISODES_STATS_PATH).unlink()
|
|
||||||
|
|
||||||
convert_stats(dataset, num_workers=num_workers)
|
convert_stats(dataset, num_workers=num_workers)
|
||||||
ref_stats = load_stats(dataset.root)
|
ref_stats = load_stats(dataset.root)
|
||||||
check_aggregate_stats(dataset, ref_stats)
|
check_aggregate_stats(dataset, ref_stats)
|
||||||
|
|||||||
@@ -1,3 +1,19 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
This script will help you convert any LeRobot dataset already pushed to the hub from codebase version 2.1 to
|
This script will help you convert any LeRobot dataset already pushed to the hub from codebase version 2.1 to
|
||||||
3.0. It will:
|
3.0. It will:
|
||||||
@@ -11,7 +27,7 @@ This script will help you convert any LeRobot dataset already pushed to the hub
|
|||||||
Usage:
|
Usage:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python lerobot/datasets/v30/convert_dataset_v21_to_v30.py \
|
python src/lerobot/datasets/v30/convert_dataset_v21_to_v30.py \
|
||||||
--repo-id=lerobot/pusht
|
--repo-id=lerobot/pusht
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,19 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from lerobot.datasets.aggregate import aggregate_datasets
|
from lerobot.datasets.aggregate import aggregate_datasets
|
||||||
|
|||||||
Reference in New Issue
Block a user