From 5f27a840639fba1be29bc3cbf7be7f631a5b31e8 Mon Sep 17 00:00:00 2001
From: Tavish <tavish9.chen@gmail.com>
Date: Fri, 11 Apr 2025 19:59:29 +0800
Subject: [PATCH] change from openx2lerobot to any4lerobot

---
 .gitignore                                    | 173 ++++++++++++++++++
 README.md                                     | 164 ++++-------------
 convert.sh                                    |   6 -
 openx2lerobot/README.md                       | 124 +++++++++++++
 openx2lerobot/convert.sh                      |   9 +
 openx_rlds.py => openx2lerobot/openx_rlds.py  |   0
 .../oxe_utils}/configs.py                     |   1 +
 .../oxe_utils}/transform_utils.py             |   2 +-
 .../oxe_utils}/transforms.py                  |  12 +-
 9 files changed, 353 insertions(+), 138 deletions(-)
 create mode 100644 .gitignore
 delete mode 100644 convert.sh
 create mode 100644 openx2lerobot/README.md
 create mode 100644 openx2lerobot/convert.sh
 rename openx_rlds.py => openx2lerobot/openx_rlds.py (100%)
 rename {oxe_utils => openx2lerobot/oxe_utils}/configs.py (99%)
 rename {oxe_utils => openx2lerobot/oxe_utils}/transform_utils.py (100%)
 rename {oxe_utils => openx2lerobot/oxe_utils}/transforms.py (99%)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..276c1af
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,173 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Logging
+logs
+tmp
+wandb
+
+# Data
+data
+outputs
+
+# Apple
+.DS_Store
+
+# VS Code
+.vscode
+
+# HPC
+nautilus/*.yaml
+*.key
+
+# Slurm
+sbatch*.sh
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# uv/poetry lock files
+poetry.lock
+uv.lock
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+!tests/artifacts
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Ignore .cache except calibration
+.cache/*
+!.cache/calibration/
+!.cache/calibration/**
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
\ No newline at end of file
diff --git a/README.md b/README.md
index be7438f..b8e843e 100644
--- a/README.md
+++ b/README.md
@@ -1,146 +1,60 @@
 <h1 align="center">
-    <p>LeRobot: State-of-the-art AI for real-world robotics</p>
+    <p>Any4LeRobot: A tool collection for LeRobot</p>
 </h1>
 
-> [!NOTE]
-> This repository supports converting datasets from OpenX format to LeRobot V2.0 dataset format.
-> 
-> Current script is now compatible with LeRobot V2.1.
+<div align="center">
 
-## 🚀 What's New in This Script
+[![Python versions](https://img.shields.io/pypi/pyversions/lerobot)](https://www.python.org/downloads/)
+[![LeRobot Dataset](https://img.shields.io/badge/LeRobot%20Dataset-v2.1-ff69b4.svg)](https://github.com/huggingface/lerobot/pull/711)
+[![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 
-In this dataset, we have made several key improvements:
+</div>
 
-- **OXE Standard Transformations** 🔄: We have integrated OXE's standard transformations to ensure uniformity across data.
-- **Alignment of State and Action Information** 🤖: State and action information are now perfectly aligned, enhancing the clarity and coherence of the dataset.
-- **Robot Type and Control Frequency** 📊: Annotations have been added for robot type and control frequency to improve dataset comprehensibility.
-- **Joint Information** 🦾: Joint-specific details have been included to assist with fine-grained understanding.
+> [!IMPORTANT]
+>
+> **Star and Contribute**, let's make community of robotics better and better! 🔥
 
-Dataset Structure of `meta/info.json`:
+A curated collection of utilities for [LeRobot Projects](https://github.com/huggingface/lerobot), including data conversion scripts, preprocessing tools, training workflow helpers and etc..
 
-```json
-{
-  "codebase_version": "v2.1", // lastest lerobot format
-  "robot_type": "franka", // specific robot type, unknown if not provided
-  "fps": 3, // control frequency, 10 if not provided
-  // will add an additional key "control_frequency"
-  "features": {
-    "observation.images.image_key": {
-      "dtype": "video",
-      "shape": [128, 128, 3],
-      "names": ["height", "width", "rgb"], // bgr to rgb if needed
-      "info": {
-        "video.fps": 3.0,
-        "video.height": 128,
-        "video.width": 128,
-        "video.channels": 3,
-        "video.codec": "av1",
-        "video.pix_fmt": "yuv420p",
-        "video.is_depth_map": false,
-        "has_audio": false
-      }
-    },
-    "observation.state": {
-      "dtype": "float32",
-      "shape": [8],
-      "names": {
-        "motors": ["x", "y", "z", "roll", "pitch", "yaw", "pad", "gripper"] 
-        // unified 8-dim vector: [xyz, state type, gripper], motor_x if not provided
-      }
-    },
-    "action": {
-      "dtype": "float32",
-      "shape": [7],
-      "names": {
-        "motors": ["x", "y", "z", "roll", "pitch", "yaw", "gripper"] 
-        // unified 7-dim vector: [xyz, action type, gripper], motor_x if not provided
-      }
-    }
-  }
-}
-```
 
-## Installation
+## 🚀 What's New <a><img width="35" height="20" src="https://user-images.githubusercontent.com/12782558/212848161-5e783dd6-11e8-4fe0-bbba-39ffb77730be.png"></a>
 
-Download lerobot code:
+- **\[2025.04.11\]** We Change the repo from `openx2lerobot` to `any4lerobot`, making a ​​universal toolbox for LeRobot​​! 🔥🔥🔥
+- **\[2025.02.19\]** We have supported Data Conversion from Open X-Embodiment to LeRobot! 🔥🔥🔥
 
-```bash
-git clone https://github.com/huggingface/lerobot.git
-cd lerobot
-```
 
-Create a virtual environment with Python 3.10 and activate it, e.g. with [`miniconda`](https://docs.anaconda.com/free/miniconda/index.html):
+## ✨ Features
+- ​**​Data Conversion​**​:
+    - [x] [Open X-Embodiment to LeRobot](./openx2lerobot/README.md)
+    - [ ] AgiBot-World to LeRobot
+    - [ ] LeRobot to Open X-Embodiment
 
-```bash
-conda create -y -n lerobot python=3.10
-conda activate lerobot
-```
+- ​**Version Conversion​**​:
+    - [ ] LeRobotv2.0 to LeRobotv2.1
+    - [ ] LeRobotv2.1 to LeRobotv2.0
 
-Install 🤗 LeRobot:
+## 📂 Public Resources
+- Available datasets in LeRobot format:
+    - [Open X-Embodiment Dataset](https://huggingface.co/IPEC-COMMUNITY)
+    - [Open X-Embodiment Visualizer](https://huggingface.co/spaces/IPEC-COMMUNITY/openx_dataset_lerobot_v2.0)
 
-```bash
-pip install -e .
-```
 
-## Get started
+## 👷‍♂️ Contributing
+We appreciate all contributions to improving Any4LeRobot. Please refer to the contributing guideline for the best practice.
 
-> [!IMPORTANT]  
-> 1.Before running the following code, modify `save_episode()` function in lerobot.
-> ```python
-> def save_episode(self, episode_data: dict | None = None, keep_images: bool | None = False) -> None:
->     ...
->     # delete images
->     if not keep_images:
->         img_dir = self.root / "images"
->         if img_dir.is_dir():
->             shutil.rmtree(self.root / "images")
->     ...
-> ```
-> 2.for `bc_z` dataset, modify `encode_video_frames()` in `lerobot/common/datasets/video_utils.py`.
-> 
-> ```python
-> # add the following content to line 141:
-> vf: str = "pad=ceil(iw/2)*2:ceil(ih/2)*2",
-> # Add the following content to line 171:
-> ffmpeg_args["-vf"] = vf
-> ```
+<a href="https://github.com/Tavish9/any4lerobot/graphs/contributors" target="_blank">
+  <table>
+    <tr>
+      <th colspan="2">
+        <br><img src="https://contrib.rocks/image?repo=tavish9/any4lerobot"><br><br>
+      </th>
+    </tr>
+  </table>
+</a>
 
-> [!TIP]
-> We recommend using `libsvtav1` as the vcodec for ffmpeg when encoding videos during dataset conversion.
 
-Compile FFmpeg with libsvtav1 encoder (Optional):
+## 🤝 Acknowledgements
 
-`libsvtav1` is only supported in higher version of ffmpeg, so many users need to compile ffmpeg to enable it. You can follow this [link](https://trac.ffmpeg.org/wiki/CompilationGuide) for detailed compilation instructions..
+Special thanks to the [LeRobot teams](https://github.com/huggingface/lerobot) for making this great framework.
 
-Download source code:
-
-```bash
-git clone https://github.com/Tavish9/openx2lerobot.git
-```
-
-Modify path in `convert.sh`:
-
-```bash
-python openx_rlds.py \
-    --raw-dir /path/to/droid/1.0.0 \
-    --local-dir /path/to/LEROBOT_DATASET \
-    --repo-id your_hf_id \
-    --use-videos \
-    --push-to-hub
-```
-
-Execute the script:
-
-```bash
-bash convert.sh
-```
-
-## Available OpenX_LeRobot Dataset
-
-We have upload most of the OpenX datasets in [huggingface](https://huggingface.co/IPEC-COMMUNITY)🤗.
-
-You can visualize the dataset in this [space](https://huggingface.co/spaces/IPEC-COMMUNITY/openx_dataset_lerobot_v2.0).
-
-## Acknowledgment
-
-Special thanks to the [Lerobot teams](https://github.com/huggingface/lerobot) for making this great framework.
+<p align="right"><a href="#top">🔝Back to top</a></p>
\ No newline at end of file
diff --git a/convert.sh b/convert.sh
deleted file mode 100644
index 91d2591..0000000
--- a/convert.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-python openx_rlds.py \
-    --raw-dir /path/to/droid/1.0.0 \
-    --local-dir /path/to/LEROBOT_DATASET \
-    --repo-id your_hf_id \
-    --use-videos \
-    --push-to-hub
diff --git a/openx2lerobot/README.md b/openx2lerobot/README.md
new file mode 100644
index 0000000..e6ca4f6
--- /dev/null
+++ b/openx2lerobot/README.md
@@ -0,0 +1,124 @@
+# OpenX to LeRobot 
+
+## 🚀 What's New in This Script
+
+In this dataset, we have made several key improvements:
+
+- **OXE Standard Transformations** 🔄: We have integrated OXE's standard transformations to ensure uniformity across data.
+- **Alignment of State and Action Information** 🤖: State and action information are now perfectly aligned, enhancing the clarity and coherence of the dataset.
+- **Robot Type and Control Frequency** 📊: Annotations have been added for robot type and control frequency to improve dataset comprehensibility.
+- **Joint Information** 🦾: Joint-specific details have been included to assist with fine-grained understanding.
+
+Dataset Structure of `meta/info.json`:
+
+```json
+{
+  "codebase_version": "v2.1", // lastest lerobot format
+  "robot_type": "franka", // specific robot type, unknown if not provided
+  "fps": 3, // control frequency, 10 if not provided
+  // will add an additional key "control_frequency"
+  "features": {
+    "observation.images.image_key": {
+      "dtype": "video",
+      "shape": [128, 128, 3],
+      "names": ["height", "width", "rgb"], // bgr to rgb if needed
+      "info": {
+        "video.fps": 3.0,
+        "video.height": 128,
+        "video.width": 128,
+        "video.channels": 3,
+        "video.codec": "av1",
+        "video.pix_fmt": "yuv420p",
+        "video.is_depth_map": false,
+        "has_audio": false
+      }
+    },
+    "observation.state": {
+      "dtype": "float32",
+      "shape": [8],
+      "names": {
+        "motors": ["x", "y", "z", "roll", "pitch", "yaw", "pad", "gripper"] 
+        // unified 8-dim vector: [xyz, state type, gripper], motor_x if not provided
+      }
+    },
+    "action": {
+      "dtype": "float32",
+      "shape": [7],
+      "names": {
+        "motors": ["x", "y", "z", "roll", "pitch", "yaw", "gripper"] 
+        // unified 7-dim vector: [xyz, action type, gripper], motor_x if not provided
+      }
+    }
+  }
+}
+```
+
+## Installation
+
+1. Install LeRobot:
+  Follow instructions in [official repo](https://github.com/huggingface/lerobot?tab=readme-ov-file#installation).
+
+2. Install others:
+  For reading tfds/rlds, we need to install `tensorflow-datasets`:
+    ```bash
+    pip install tensorflow
+    pip install tensorflow-datasets
+    ```
+
+
+## Get started
+
+> [!IMPORTANT]  
+> 1.Before running the following code, modify `save_episode()` function in lerobot.
+> ```python
+> def save_episode(self, episode_data: dict | None = None, keep_images: bool | None = False) -> None:
+>     ...
+>     # delete images
+>     if not keep_images:
+>         img_dir = self.root / "images"
+>         if img_dir.is_dir():
+>             shutil.rmtree(self.root / "images")
+>     ...
+> ```
+> 2.for `bc_z` dataset, modify `encode_video_frames()` in `lerobot/common/datasets/video_utils.py`.
+> 
+> ```python
+> # add the following content to line 141:
+> vf: str = "pad=ceil(iw/2)*2:ceil(ih/2)*2",
+> # Add the following content to line 171:
+> ffmpeg_args["-vf"] = vf
+> ```
+
+> [!TIP]
+> We recommend using `libsvtav1` as the vcodec for ffmpeg when encoding videos during dataset conversion. If you can't use libsvtav1 after installing lerobot, you need to compile it yourself. Follow this [link](https://trac.ffmpeg.org/wiki/CompilationGuide) for detailed compilation instructions.
+
+
+1. Download source code:
+
+    ```bash
+    git clone https://github.com/Tavish9/openx2lerobot.git
+    ```
+
+2. Modify path in `convert.sh`:
+
+    ```bash
+    python openx_rlds.py \
+        --raw-dir /path/to/droid/1.0.0 \
+        --local-dir /path/to/LEROBOT_DATASET \
+        --repo-id your_hf_id \
+        --use-videos \
+        --push-to-hub
+    ```
+
+3. Execute the script:
+
+    ```bash
+    bash convert.sh
+    ```
+
+## Available OpenX_LeRobot Dataset
+
+We have upload most of the OpenX datasets in [huggingface](https://huggingface.co/IPEC-COMMUNITY)🤗.
+
+You can visualize the dataset in this [space](https://huggingface.co/spaces/IPEC-COMMUNITY/openx_dataset_lerobot_v2.0).
+
diff --git a/openx2lerobot/convert.sh b/openx2lerobot/convert.sh
new file mode 100644
index 0000000..0a9eec7
--- /dev/null
+++ b/openx2lerobot/convert.sh
@@ -0,0 +1,9 @@
+export LD_LIBRARY_PATH="/cpfs01/shared/optimal/vla_ptm/miniconda3/envs/vla_next/lib/python3.10/site-packages/nvidia/nvjitlink/lib:${LD_LIBRARY_PATH}"
+export LD_LIBRARY_PATH="/cpfs01/shared/optimal/vla_ptm/miniconda3/envs/vla_next/lib/python3.10/site-packages/nvidia/cusparse/lib:${LD_LIBRARY_PATH}"
+
+python openx_rlds.py \
+    --raw-dir /oss/vla_ptm_hwfile/DATA/fine_tune/kitchen_banana/0.1.0 \
+    --local-dir /cpfs01/shared/optimal/vla_next/LEROBOT_DATASET/Franka \
+    --repo-id your_hf_id \
+    --use-videos
+    # --push-to-hub
diff --git a/openx_rlds.py b/openx2lerobot/openx_rlds.py
similarity index 100%
rename from openx_rlds.py
rename to openx2lerobot/openx_rlds.py
diff --git a/oxe_utils/configs.py b/openx2lerobot/oxe_utils/configs.py
similarity index 99%
rename from oxe_utils/configs.py
rename to openx2lerobot/oxe_utils/configs.py
index 02522f8..49d55c7 100644
--- a/oxe_utils/configs.py
+++ b/openx2lerobot/oxe_utils/configs.py
@@ -76,6 +76,7 @@ class StateEncoding(IntEnum):
 class ActionEncoding(IntEnum):
     # fmt: off
     EEF_POS = 1             # EEF Delta XYZ (3) + Roll-Pitch-Yaw (3) + Gripper Open/Close (1)
+    EEF_POS_QUAT = 5        # EEF Delta XYZ (3) + Quaternion (4) + Gripper Open/Close (1)
     JOINT_POS = 2           # Joint Delta Position (7) + Gripper Open/Close (1)
     JOINT_POS_BIMANUAL = 3  # Joint Delta Position (2 x [ Joint Delta Position (6) + Gripper Open/Close (1) ])
     EEF_R6 = 4              # EEF Delta XYZ (3) + R6 (6) + Gripper Open/Close (1)
diff --git a/oxe_utils/transform_utils.py b/openx2lerobot/oxe_utils/transform_utils.py
similarity index 100%
rename from oxe_utils/transform_utils.py
rename to openx2lerobot/oxe_utils/transform_utils.py
index ca250ca..3d7e884 100644
--- a/oxe_utils/transform_utils.py
+++ b/openx2lerobot/oxe_utils/transform_utils.py
@@ -2,7 +2,6 @@
 Copied from https://github.com/openvla/openvla/blob/main/prismatic/vla/datasets/rlds/utils/data_utils.py
 """
 
-
 from typing import Any, Dict
 
 import tensorflow as tf
@@ -66,6 +65,7 @@ def rel2abs_gripper_actions(actions: tf.Tensor) -> tf.Tensor:
 
     return new_actions
 
+
 # === Bridge-V2 =>> Dataset-Specific Transform ===
 def relabel_bridge_actions(traj: Dict[str, Any]) -> Dict[str, Any]:
     """Relabels actions to use reached proprioceptive state; discards last timestep (no-action)."""
diff --git a/oxe_utils/transforms.py b/openx2lerobot/oxe_utils/transforms.py
similarity index 99%
rename from oxe_utils/transforms.py
rename to openx2lerobot/oxe_utils/transforms.py
index 01c9598..b196310 100644
--- a/oxe_utils/transforms.py
+++ b/openx2lerobot/oxe_utils/transforms.py
@@ -19,7 +19,6 @@ Transforms adopt the following structure:
 from typing import Any, Dict
 
 import tensorflow as tf
-
 from oxe_utils.transform_utils import (
     binarize_gripper_actions,
     invert_gripper_actions,
@@ -32,6 +31,7 @@ def droid_baseact_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
     """
     DROID dataset transformation for actions expressed in *base* frame of the robot.
     """
+
     def rand_swap_exterior_images(img1, img2):
         """
         Randomly swaps the two exterior images (for training with single exterior input).
@@ -56,11 +56,11 @@ def droid_baseact_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]:
         )
     )
     # trajectory["observation"]["proprio"] = tf.concat(
-        # (
-            # trajectory["observation"]["cartesian_position"],
-            # trajectory["observation"]["gripper_position"],
-        # ),
-        # axis=-1,
+    # (
+    # trajectory["observation"]["cartesian_position"],
+    # trajectory["observation"]["gripper_position"],
+    # ),
+    # axis=-1,
     # )
     trajectory["observation"]["EEF_state"] = trajectory["observation"]["cartesian_position"]
     trajectory["observation"]["gripper_state"] = trajectory["observation"]["gripper_position"]