From f09ad642300626287270f20418752b08815b41bf Mon Sep 17 00:00:00 2001 From: Tavish Date: Sun, 9 Mar 2025 10:29:24 +0800 Subject: [PATCH] make script compatible with LeRobotDataset v2.1 --- README.md | 31 +++++++++++++------------------ convert.sh | 6 ++---- openx_rlds.py | 33 +++++++-------------------------- 3 files changed, 22 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index b2e78de..3f6dadd 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,8 @@ > [!NOTE] > This repository supports converting datasets from OpenX format to LeRobot V2.0 dataset format. - -> [!WARNING] -> `2025.02.25`: LeRobot has updated the dataset from v2.0 to v2.1, scripts need to be updated accordingly. +> +> Current script is now compatible with LeRobot V2.1. ## 🚀 What's New in This Script @@ -21,7 +20,7 @@ Dataset Structure of `meta/info.json`: ```json { - "codebase_version": "v2.0", // lastest lerobot format + "codebase_version": "v2.1", // lastest lerobot format "robot_type": "franka", // specific robot type, unknown if not provided "fps": 3, // control frequency, 10 if not provided // will add an additional key "control_frequency" @@ -86,21 +85,19 @@ pip install -e . ## Get started > [!IMPORTANT] -> 1.Before running the following code, modify `consolidate()` function in lerobot. +> 1.Before running the following code, modify `save_episode()` function in lerobot. > ```python -> def consolidate(self, run_compute_stats: bool = True, keep_image_files: bool = False, stat_kwargs: dict = {}) -> None: +> def save_episode(self, episode_data: dict | None = None, keep_images: bool | None = False) -> None: > ... -> if run_compute_stats: -> self.stop_image_writer() -> # TODO(aliberts): refactor stats in save_episodes -> self.meta.stats = compute_stats(self, **stat_kwargs) +> # delete images +> if not keep_images: +> img_dir = self.root / "images" +> if img_dir.is_dir(): +> shutil.rmtree(self.root / "images") > ... > ``` -> 2.for `bc_z` dataset, two source codes need to be modified. +> 2.for `bc_z` dataset, modify `encode_video_frames()` in `lerobot/common/datasets/video_utils.py`. > -> path: `lerobot/common/datasets/video_utils.py` -> -> method: `encode_video_frames` > ```python > # add the following content to line 141: > vf: str = "pad=ceil(iw/2)*2:ceil(ih/2)*2", @@ -128,10 +125,8 @@ python openx_rlds.py \ --raw-dir /path/to/droid/1.0.0 \ --local-dir /path/to/LEROBOT_DATASET \ --repo-id your_hf_id \ - --push-to-hub \ - --batch-size 16 \ - --num-workers 8 \ - --use-videos + --use-videos \ + --push-to-hub ``` Execute the script: diff --git a/convert.sh b/convert.sh index 6579be8..91d2591 100644 --- a/convert.sh +++ b/convert.sh @@ -2,7 +2,5 @@ python openx_rlds.py \ --raw-dir /path/to/droid/1.0.0 \ --local-dir /path/to/LEROBOT_DATASET \ --repo-id your_hf_id \ - --push-to-hub \ - --batch-size 16 \ - --num-workers 8 \ - --use-videos + --use-videos \ + --push-to-hub diff --git a/openx_rlds.py b/openx_rlds.py index 060a714..b2d6e9f 100644 --- a/openx_rlds.py +++ b/openx_rlds.py @@ -37,7 +37,9 @@ from pathlib import Path import numpy as np import tensorflow as tf import tensorflow_datasets as tfds -from lerobot.common.datasets.lerobot_dataset import LEROBOT_HOME, LeRobotDataset +from huggingface_hub import HfApi +from lerobot.common.constants import HF_LEROBOT_HOME +from lerobot.common.datasets.lerobot_dataset import LeRobotDataset from oxe_utils.configs import OXE_DATASET_CONFIGS, ActionEncoding, StateEncoding from oxe_utils.transforms import OXE_STANDARDIZATION_TRANSFORMS @@ -147,15 +149,10 @@ def save_as_lerobot_dataset(lerobot_dataset: LeRobotDataset, raw_dataset: tf.dat **image_dict, "observation.state": traj["proprio"][i], "action": traj["action"][i], + "task": traj["task"][0].decode(), } ) - lerobot_dataset.save_episode(task=traj["task"][0].decode()) - - lerobot_dataset.consolidate( - run_compute_stats=True, - keep_image_files=kwargs["keep_images"], - stat_kwargs={"batch_size": kwargs["batch_size"], "num_workers": kwargs["num_workers"]}, - ) + lerobot_dataset.save_episode(keep_images=kwargs.get("keep_images", False)) def create_lerobot_dataset( @@ -166,8 +163,6 @@ def create_lerobot_dataset( fps: int = None, robot_type: str = None, use_videos: bool = True, - batch_size: int = 32, - num_workers: int = 8, image_writer_process: int = 5, image_writer_threads: int = 10, keep_images: bool = True, @@ -183,7 +178,7 @@ def create_lerobot_dataset( data_dir = raw_dir.parent if local_dir is None: - local_dir = Path(LEROBOT_HOME) + local_dir = Path(HF_LEROBOT_HOME) local_dir /= f"{dataset_name}_{version}_lerobot" if local_dir.exists(): shutil.rmtree(local_dir) @@ -221,9 +216,7 @@ def create_lerobot_dataset( image_writer_processes=image_writer_process, ) - save_as_lerobot_dataset( - lerobot_dataset, raw_dataset, keep_images=keep_images, batch_size=batch_size, num_workers=num_workers - ) + save_as_lerobot_dataset(lerobot_dataset, raw_dataset, keep_images=keep_images) if push_to_hub: assert repo_id is not None @@ -282,18 +275,6 @@ def main(): action="store_true", help="Convert each episode of the raw dataset to an mp4 video. This option allows 60 times lower disk space consumption and 25 faster loading time during training.", ) - parser.add_argument( - "--batch-size", - type=int, - default=32, - help="Batch size loaded by DataLoader for computing the dataset statistics.", - ) - parser.add_argument( - "--num-workers", - type=int, - default=8, - help="Number of processes of Dataloader for computing the dataset statistics.", - ) parser.add_argument( "--image-writer-process", type=int,