mirror of
https://github.com/Tavish9/any4lerobot.git
synced 2026-05-30 04:39:40 +00:00
make script compatible with LeRobotDataset v2.1
This commit is contained in:
@@ -4,9 +4,8 @@
|
|||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> This repository supports converting datasets from OpenX format to LeRobot V2.0 dataset format.
|
> This repository supports converting datasets from OpenX format to LeRobot V2.0 dataset format.
|
||||||
|
>
|
||||||
> [!WARNING]
|
> Current script is now compatible with LeRobot V2.1.
|
||||||
> `2025.02.25`: LeRobot has updated the dataset from v2.0 to v2.1, scripts need to be updated accordingly.
|
|
||||||
|
|
||||||
## 🚀 What's New in This Script
|
## 🚀 What's New in This Script
|
||||||
|
|
||||||
@@ -21,7 +20,7 @@ Dataset Structure of `meta/info.json`:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"codebase_version": "v2.0", // lastest lerobot format
|
"codebase_version": "v2.1", // lastest lerobot format
|
||||||
"robot_type": "franka", // specific robot type, unknown if not provided
|
"robot_type": "franka", // specific robot type, unknown if not provided
|
||||||
"fps": 3, // control frequency, 10 if not provided
|
"fps": 3, // control frequency, 10 if not provided
|
||||||
// will add an additional key "control_frequency"
|
// will add an additional key "control_frequency"
|
||||||
@@ -86,21 +85,19 @@ pip install -e .
|
|||||||
## Get started
|
## Get started
|
||||||
|
|
||||||
> [!IMPORTANT]
|
> [!IMPORTANT]
|
||||||
> 1.Before running the following code, modify `consolidate()` function in lerobot.
|
> 1.Before running the following code, modify `save_episode()` function in lerobot.
|
||||||
> ```python
|
> ```python
|
||||||
> def consolidate(self, run_compute_stats: bool = True, keep_image_files: bool = False, stat_kwargs: dict = {}) -> None:
|
> def save_episode(self, episode_data: dict | None = None, keep_images: bool | None = False) -> None:
|
||||||
> ...
|
> ...
|
||||||
> if run_compute_stats:
|
> # delete images
|
||||||
> self.stop_image_writer()
|
> if not keep_images:
|
||||||
> # TODO(aliberts): refactor stats in save_episodes
|
> img_dir = self.root / "images"
|
||||||
> self.meta.stats = compute_stats(self, **stat_kwargs)
|
> if img_dir.is_dir():
|
||||||
|
> shutil.rmtree(self.root / "images")
|
||||||
> ...
|
> ...
|
||||||
> ```
|
> ```
|
||||||
> 2.for `bc_z` dataset, two source codes need to be modified.
|
> 2.for `bc_z` dataset, modify `encode_video_frames()` in `lerobot/common/datasets/video_utils.py`.
|
||||||
>
|
>
|
||||||
> path: `lerobot/common/datasets/video_utils.py`
|
|
||||||
>
|
|
||||||
> method: `encode_video_frames`
|
|
||||||
> ```python
|
> ```python
|
||||||
> # add the following content to line 141:
|
> # add the following content to line 141:
|
||||||
> vf: str = "pad=ceil(iw/2)*2:ceil(ih/2)*2",
|
> vf: str = "pad=ceil(iw/2)*2:ceil(ih/2)*2",
|
||||||
@@ -128,10 +125,8 @@ python openx_rlds.py \
|
|||||||
--raw-dir /path/to/droid/1.0.0 \
|
--raw-dir /path/to/droid/1.0.0 \
|
||||||
--local-dir /path/to/LEROBOT_DATASET \
|
--local-dir /path/to/LEROBOT_DATASET \
|
||||||
--repo-id your_hf_id \
|
--repo-id your_hf_id \
|
||||||
--push-to-hub \
|
--use-videos \
|
||||||
--batch-size 16 \
|
--push-to-hub
|
||||||
--num-workers 8 \
|
|
||||||
--use-videos
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Execute the script:
|
Execute the script:
|
||||||
|
|||||||
+2
-4
@@ -2,7 +2,5 @@ python openx_rlds.py \
|
|||||||
--raw-dir /path/to/droid/1.0.0 \
|
--raw-dir /path/to/droid/1.0.0 \
|
||||||
--local-dir /path/to/LEROBOT_DATASET \
|
--local-dir /path/to/LEROBOT_DATASET \
|
||||||
--repo-id your_hf_id \
|
--repo-id your_hf_id \
|
||||||
--push-to-hub \
|
--use-videos \
|
||||||
--batch-size 16 \
|
--push-to-hub
|
||||||
--num-workers 8 \
|
|
||||||
--use-videos
|
|
||||||
|
|||||||
+7
-26
@@ -37,7 +37,9 @@ from pathlib import Path
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import tensorflow_datasets as tfds
|
import tensorflow_datasets as tfds
|
||||||
from lerobot.common.datasets.lerobot_dataset import LEROBOT_HOME, LeRobotDataset
|
from huggingface_hub import HfApi
|
||||||
|
from lerobot.common.constants import HF_LEROBOT_HOME
|
||||||
|
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
||||||
|
|
||||||
from oxe_utils.configs import OXE_DATASET_CONFIGS, ActionEncoding, StateEncoding
|
from oxe_utils.configs import OXE_DATASET_CONFIGS, ActionEncoding, StateEncoding
|
||||||
from oxe_utils.transforms import OXE_STANDARDIZATION_TRANSFORMS
|
from oxe_utils.transforms import OXE_STANDARDIZATION_TRANSFORMS
|
||||||
@@ -147,15 +149,10 @@ def save_as_lerobot_dataset(lerobot_dataset: LeRobotDataset, raw_dataset: tf.dat
|
|||||||
**image_dict,
|
**image_dict,
|
||||||
"observation.state": traj["proprio"][i],
|
"observation.state": traj["proprio"][i],
|
||||||
"action": traj["action"][i],
|
"action": traj["action"][i],
|
||||||
|
"task": traj["task"][0].decode(),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
lerobot_dataset.save_episode(task=traj["task"][0].decode())
|
lerobot_dataset.save_episode(keep_images=kwargs.get("keep_images", False))
|
||||||
|
|
||||||
lerobot_dataset.consolidate(
|
|
||||||
run_compute_stats=True,
|
|
||||||
keep_image_files=kwargs["keep_images"],
|
|
||||||
stat_kwargs={"batch_size": kwargs["batch_size"], "num_workers": kwargs["num_workers"]},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def create_lerobot_dataset(
|
def create_lerobot_dataset(
|
||||||
@@ -166,8 +163,6 @@ def create_lerobot_dataset(
|
|||||||
fps: int = None,
|
fps: int = None,
|
||||||
robot_type: str = None,
|
robot_type: str = None,
|
||||||
use_videos: bool = True,
|
use_videos: bool = True,
|
||||||
batch_size: int = 32,
|
|
||||||
num_workers: int = 8,
|
|
||||||
image_writer_process: int = 5,
|
image_writer_process: int = 5,
|
||||||
image_writer_threads: int = 10,
|
image_writer_threads: int = 10,
|
||||||
keep_images: bool = True,
|
keep_images: bool = True,
|
||||||
@@ -183,7 +178,7 @@ def create_lerobot_dataset(
|
|||||||
data_dir = raw_dir.parent
|
data_dir = raw_dir.parent
|
||||||
|
|
||||||
if local_dir is None:
|
if local_dir is None:
|
||||||
local_dir = Path(LEROBOT_HOME)
|
local_dir = Path(HF_LEROBOT_HOME)
|
||||||
local_dir /= f"{dataset_name}_{version}_lerobot"
|
local_dir /= f"{dataset_name}_{version}_lerobot"
|
||||||
if local_dir.exists():
|
if local_dir.exists():
|
||||||
shutil.rmtree(local_dir)
|
shutil.rmtree(local_dir)
|
||||||
@@ -221,9 +216,7 @@ def create_lerobot_dataset(
|
|||||||
image_writer_processes=image_writer_process,
|
image_writer_processes=image_writer_process,
|
||||||
)
|
)
|
||||||
|
|
||||||
save_as_lerobot_dataset(
|
save_as_lerobot_dataset(lerobot_dataset, raw_dataset, keep_images=keep_images)
|
||||||
lerobot_dataset, raw_dataset, keep_images=keep_images, batch_size=batch_size, num_workers=num_workers
|
|
||||||
)
|
|
||||||
|
|
||||||
if push_to_hub:
|
if push_to_hub:
|
||||||
assert repo_id is not None
|
assert repo_id is not None
|
||||||
@@ -282,18 +275,6 @@ def main():
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="Convert each episode of the raw dataset to an mp4 video. This option allows 60 times lower disk space consumption and 25 faster loading time during training.",
|
help="Convert each episode of the raw dataset to an mp4 video. This option allows 60 times lower disk space consumption and 25 faster loading time during training.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"--batch-size",
|
|
||||||
type=int,
|
|
||||||
default=32,
|
|
||||||
help="Batch size loaded by DataLoader for computing the dataset statistics.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--num-workers",
|
|
||||||
type=int,
|
|
||||||
default=8,
|
|
||||||
help="Number of processes of Dataloader for computing the dataset statistics.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--image-writer-process",
|
"--image-writer-process",
|
||||||
type=int,
|
type=int,
|
||||||
|
|||||||
Reference in New Issue
Block a user