add support for agibot2lerobot (#15)

Co-authored-by: ModiShi <modishi@buaa.edu.cn>
Co-authored-by: aopolin-lv <aopolin.ii@gmail.com>
Co-authored-by: HaomingSong <haomingsong24@gmail.com>
This commit is contained in:
Qizhi Chen
2025-04-14 20:01:09 +08:00
committed by GitHub
parent 9ca6ce773b
commit fe558f7adb
8 changed files with 1038 additions and 1 deletions
+1 -1
View File
@@ -27,7 +27,7 @@ A curated collection of utilities for [LeRobot Projects](https://github.com/hugg
## ✨ Features
- **Data Conversion**:
- [x] [Open X-Embodiment to LeRobot](./openx2lerobot/README.md)
- [ ] AgiBot-World to LeRobot
- [x] [AgiBot-World to LeRobot](./agibot2lerobot/README.md)
- [ ] RoboMIND to LeRobot
- [ ] LeRobot to RLDS
+242
View File
@@ -0,0 +1,242 @@
# AgiBot-World to LeRobot
## 🚀 What's New in This Script
In this dataset, we have made several key improvements:
- **Preservation of Agibots Original Information** 🧠: We have preserved as much of Agibots original information as possible, with field names strictly adhering to the original datasets naming conventions to ensure compatibility and consistency.
- **State and Action as Dictionaries** 🧾: The traditional one-dimensional state and action have been transformed into dictionaries, allowing for greater flexibility in designing custom states and actions, enabling modular and scalable handling.
Dataset Structure of `meta/info.json`:
```json
{
"codebase_version": "v2.1", // lastest lerobot format
"robot_type": "a2d", // specific robot type
"fps": 30, // control frequency
"features": {
"observation.images.image_key": {
"dtype": "video",
"shape": [480, 640, 3],
"names": ["height", "width", "rgb"],
"info": {
"video.fps": 3.0,
"video.height": 128,
"video.width": 128,
"video.channels": 3,
"video.codec": "av1",
"video.pix_fmt": "yuv420p",
"video.is_depth_map": false,
"has_audio": false
}
},
// for more states key, see config.py
"observation.states.joint.position": {
"dtype": "float32",
"shape": [14],
"names": {
"motors": [
"left_arm_0",
"left_arm_1",
"left_arm_2",
"left_arm_3",
"left_arm_4",
"left_arm_5",
"left_arm_6",
"right_arm_0",
"right_arm_1",
"right_arm_2",
"right_arm_3",
"right_arm_4",
"right_arm_5",
"right_arm_6"
]
}
},
"observation.states.head.position": {
"dtype": "float32",
"shape": [
2
],
"names": {
"motors": [
"yaw",
"patch"
]
}
},
...
// for more actions key, see config.py
"actions.head.position": {
"dtype": "float32",
"shape": [2],
"names": {
"motors": ["yaw", "patch"]
}
},
"actions.waist.position": {
"dtype": "float32",
"shape": [2],
"names": {
"motors": ["pitch", "lift"]
}
},
...
}
}
```
## Installation
1. Install LeRobot:
Follow instructions in [official repo](https://github.com/huggingface/lerobot?tab=readme-ov-file#installation).
2. Install others:
We use ray for parallel conversion, significantly speeding up data processing tasks by distributing the workload across multiple cores or nodes (if any).
```bash
pip install h5py
pip install -U "ray[default]"
```
## Get started
> [!IMPORTANT]
> 1.If you want to save depth when converting the dataset, modify `_assert_type_and_shape()` function in [lerobot.common.datasets.compute_stats.py](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/compute_stats.py).
>
> ```python
> def _assert_type_and_shape(stats_list: list[dict[str, dict]]):
> for i in range(len(stats_list)):
> for fkey in stats_list[i]:
> for k, v in stats_list[i][fkey].items():
> if not isinstance(v, np.ndarray):
> raise ValueError(
> f"Stats must be composed of numpy array, but key '{k}' of feature '{fkey}' is of type '{type(v)}' instead."
> )
> if v.ndim == 0:
> raise ValueError("Number of dimensions must be at least 1, and is 0 instead.")
> if k == "count" and v.shape != (1,):
> raise ValueError(f"Shape of 'count' must be (1), but is {v.shape} instead.")
> # bypass depth check
> if "image" in fkey and k != "count":
> if "depth" not in fkey and v.shape != (3, 1, 1):
> raise ValueError(f"Shape of '{k}' must be (3,1,1), but is {v.shape} instead.")
> if "depth" in fkey and v.shape != (1, 1, 1):
> raise ValueError(f"Shape of '{k}' must be (1,1,1), but is {v.shape} instead.")
> ```
> [!NOTE]
> The conversion speed of this script is limited by the performance of the physical machine running it, including **CPU cores and memory**. We recommend using **3 CPU cores per task** for optimal performance. However, each task requires approximately 20 GiB of memory. To avoid running out of memory, you may need to increase the number of CPU cores per task depending on your systems available memory.
### Download source code:
```bash
git clone https://github.com/Tavish9/any4lerobot.git
```
### Modify path in `convert.sh`:
There are three types of end-effector, `gripper`, `dexhand` and `tactile`, specify the type before converting
```bash
python convert.py \
--src-path /path/to/AgiBotWorld-Beta \
--output-path /path/to/local \
--eef-type gripper \
--num-cpus-per-task 3
```
### Execute the script:
#### For single node
```bash
cd agibot2lerobot && bash convert.sh
```
#### For multi nodes
**Direct Access to Nodes (2 nodes in example)**
On Node 1:
```bash
ray start --head --port=6379
```
On Node 2:
```bash
ray start --address='node_1_ip:6379'
```
On either Node, check the ray cluster status, and start the script
```bash
ray status
cd agibot2lerobot && bash convert.sh
```
**Slurm-managed System**
```bash
#!/bin/bash
#SBATCH --job-name=ray-cluster
#SBATCH --ntasks=2
#SBATCH --nodes=2
#SBATCH --partition=partition
# Getting the node names
nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST")
nodes_array=($nodes)
head_node=${nodes_array[0]}
head_node_ip=$(srun --nodes=1 --ntasks=1 -w "$head_node" hostname --ip-address)
# if we detect a space character in the head node IP, we'll
# convert it to an ipv4 address. This step is optional.
if [[ "$head_node_ip" == *" "* ]]; then
IFS=' ' read -ra ADDR <<<"$head_node_ip"
if [[ ${#ADDR[0]} -gt 16 ]]; then
head_node_ip=${ADDR[1]}
else
head_node_ip=${ADDR[0]}
fi
echo "IPV6 address detected. We split the IPV4 address as $head_node_ip"
fi
port=6379
ip_head=$head_node_ip:$port
export ip_head
echo "IP Head: $ip_head"
echo "Starting HEAD at $head_node"
srun --nodes=1 --ntasks=1 -w "$head_node" \
ray start --head \
--node-ip-address="$head_node_ip" \
--port=$port \
--block &
sleep 10
# number of nodes other than the head node
worker_num=$((SLURM_JOB_NUM_NODES - 1))
for ((i = 1; i <= worker_num; i++)); do
node_i=${nodes_array[$i]}
echo "Starting WORKER $i at $node_i"
srun --nodes=1 --ntasks=1 -w "$node_i" \
ray start \
--address "$ip_head" \
--block &
sleep 5
done
sleep 10
cd agibot2lerobot && bash convert.sh
```
**Other Community Supported Cluster Managers**
See the [doc](https://docs.ray.io/en/latest/cluster/vms/user-guides/community/index.html) for more details.
+309
View File
@@ -0,0 +1,309 @@
import argparse
import gc
import shutil
from concurrent.futures import (
ThreadPoolExecutor,
as_completed,
)
from pathlib import Path
from typing import Callable
import numpy as np
import ray
import torch
from agibot_utils.agibot_utils import get_task_instruction, load_local_dataset
from agibot_utils.config import AgiBotWorld_TASK_TYPE
from agibot_utils.lerobot_utils import compute_episode_stats, generate_features_from_config
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
from lerobot.common.datasets.utils import (
check_timestamps_sync,
get_episode_data_index,
validate_episode_buffer,
validate_frame,
)
from ray.runtime_env import RuntimeEnv
class AgiBotDataset(LeRobotDataset):
def __init__(
self,
repo_id: str,
root: str | Path | None = None,
episodes: list[int] | None = None,
image_transforms: Callable | None = None,
delta_timestamps: dict[list[float]] | None = None,
tolerance_s: float = 1e-4,
download_videos: bool = True,
local_files_only: bool = False,
video_backend: str | None = None,
):
super().__init__(
repo_id=repo_id,
root=root,
episodes=episodes,
image_transforms=image_transforms,
delta_timestamps=delta_timestamps,
tolerance_s=tolerance_s,
download_videos=download_videos,
local_files_only=local_files_only,
video_backend=video_backend,
)
def add_frame(self, frame: dict) -> None:
"""
This function only adds the frame to the episode_buffer. Apart from images — which are written in a
temporary directory — nothing is written to disk. To save those frames, the 'save_episode()' method
then needs to be called.
"""
# Convert torch to numpy if needed
for name in frame:
if isinstance(frame[name], torch.Tensor):
frame[name] = frame[name].numpy()
features = {key: value for key, value in self.features.items() if key in self.hf_features} # remove video keys
validate_frame(frame, features)
if self.episode_buffer is None:
self.episode_buffer = self.create_episode_buffer()
# Automatically add frame_index and timestamp to episode buffer
frame_index = self.episode_buffer["size"]
timestamp = frame.pop("timestamp") if "timestamp" in frame else frame_index / self.fps
self.episode_buffer["frame_index"].append(frame_index)
self.episode_buffer["timestamp"].append(timestamp)
# Add frame features to episode_buffer
for key, value in frame.items():
if key == "task":
# Note: we associate the task in natural language to its task index during `save_episode`
self.episode_buffer["task"].append(frame["task"])
continue
if key not in self.features:
raise ValueError(
f"An element of the frame is not in the features. '{key}' not in '{self.features.keys()}'."
)
self.episode_buffer[key].append(value)
self.episode_buffer["size"] += 1
def save_episode(self, episode_data: dict | None = None, videos: dict | None = None) -> None:
"""
This will save to disk the current episode in self.episode_buffer.
Args:
episode_data (dict | None, optional): Dict containing the episode data to save. If None, this will
save the current episode in self.episode_buffer, which is filled with 'add_frame'. Defaults to
None.
"""
if not episode_data:
episode_buffer = self.episode_buffer
validate_episode_buffer(episode_buffer, self.meta.total_episodes, self.features)
# size and task are special cases that won't be added to hf_dataset
episode_length = episode_buffer.pop("size")
tasks = episode_buffer.pop("task")
episode_tasks = list(set(tasks))
episode_index = episode_buffer["episode_index"]
episode_buffer["index"] = np.arange(self.meta.total_frames, self.meta.total_frames + episode_length)
episode_buffer["episode_index"] = np.full((episode_length,), episode_index)
# Add new tasks to the tasks dictionary
for task in episode_tasks:
task_index = self.meta.get_task_index(task)
if task_index is None:
self.meta.add_task(task)
# Given tasks in natural language, find their corresponding task indices
episode_buffer["task_index"] = np.array([self.meta.get_task_index(task) for task in tasks])
for key, ft in self.features.items():
# index, episode_index, task_index are already processed above, and image and video
# are processed separately by storing image path and frame info as meta data
if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["video"]:
continue
episode_buffer[key] = np.stack(episode_buffer[key]).squeeze()
for key in self.meta.video_keys:
video_path = self.root / self.meta.get_video_file_path(episode_index, key)
episode_buffer[key] = str(video_path) # PosixPath -> str
video_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copyfile(videos[key], video_path)
ep_stats = compute_episode_stats(episode_buffer, self.features)
self._save_episode_table(episode_buffer, episode_index)
# `meta.save_episode` be executed after encoding the videos
self.meta.save_episode(episode_index, episode_length, episode_tasks, ep_stats)
ep_data_index = get_episode_data_index(self.meta.episodes, [episode_index])
ep_data_index_np = {k: t.numpy() for k, t in ep_data_index.items()}
check_timestamps_sync(
episode_buffer["timestamp"],
episode_buffer["episode_index"],
ep_data_index_np,
self.fps,
self.tolerance_s,
)
if not episode_data: # Reset the buffer
self.episode_buffer = self.create_episode_buffer()
def get_all_tasks(src_path: Path, output_path: Path):
json_files = src_path.glob("task_info/*.json")
for json_file in json_files:
local_dir = output_path / "agibotworld" / json_file.stem
yield (json_file, local_dir.resolve())
def save_as_lerobot_dataset(agibot_world_config, task: tuple[Path, Path], num_threads, save_depth, debug):
json_file, local_dir = task
print(f"processing {json_file.stem}, saving to {local_dir}")
src_path = json_file.parent.parent
task_name = get_task_instruction(json_file)
task_id = json_file.stem.split("_")[-1]
features = generate_features_from_config(agibot_world_config)
if local_dir.exists():
shutil.rmtree(local_dir)
if not save_depth:
features.pop("observation.images.head_depth")
dataset = AgiBotDataset.create(
repo_id=json_file.stem,
root=local_dir,
fps=30,
robot_type="a2d",
features=features,
)
all_subdir = [f.as_posix() for f in src_path.glob(f"observations/{task_id}/*") if f.is_dir()]
all_subdir_eids = [int(Path(path).name) for path in all_subdir]
if debug or not save_depth:
for eid in all_subdir_eids:
try:
raw_dataset = load_local_dataset(
eid,
src_path=src_path,
task_id=task_id,
task_name=task_name,
save_depth=save_depth,
AgiBotWorld_CONFIG=agibot_world_config,
)
frames, videos = raw_dataset
if not all([video_path.exists() for video_path in videos.values()]):
print(f"{json_file.stem}, episode_{eid}: some of the videos does not exist, skipping")
continue
for frame_data in frames:
dataset.add_frame(frame_data)
dataset.save_episode(videos=videos)
except Exception as e:
raise Exception(f"{json_file.stem}, {eid}") from e
gc.collect()
print(f"process done for {json_file.stem}, episode_id {eid}, len {len(frames)}")
else:
with ThreadPoolExecutor(max_workers=num_threads) as executor:
futures = []
for episode_id in all_subdir_eids:
futures.append(
executor.submit(
load_local_dataset,
episode_id,
src_path=src_path,
task_id=task_id,
task_name=task_name,
save_depth=save_depth,
AgiBotWorld_CONFIG=agibot_world_config,
)
)
for raw_dataset in as_completed(futures):
frames, videos = raw_dataset.result()
for frame_data in frames:
dataset.add_frame(frame_data)
dataset.save_episode(videos=videos)
gc.collect()
def main(
src_path: str,
output_path: str,
eef_type: str,
task_ids: list,
cpus_per_task: int,
num_threads_per_task: int,
save_depth: bool,
debug: bool = False,
):
tasks = get_all_tasks(src_path, output_path)
agibot_world_config, type_task_ids = (
AgiBotWorld_TASK_TYPE[eef_type]["task_config"],
AgiBotWorld_TASK_TYPE[eef_type]["task_ids"],
)
if eef_type == "gripper":
remaining_ids = AgiBotWorld_TASK_TYPE["dexhand"]["task_ids"] + AgiBotWorld_TASK_TYPE["tactile"]["task_ids"]
tasks = filter(lambda task: task[0].stem not in remaining_ids, tasks)
else:
tasks = filter(lambda task: task[0].stem in type_task_ids, tasks)
if task_ids:
tasks = filter(lambda task: task[0].stem in task_ids, tasks)
if debug:
save_as_lerobot_dataset(agibot_world_config, next(tasks), num_threads_per_task, save_depth, debug)
else:
runtime_env = RuntimeEnv(
env_vars={
"HDF5_USE_FILE_LOCKING": "FALSE",
"HF_DATASETS_DISABLE_PROGRESS_BARS": "TRUE",
"LD_PRELOAD": str(Path(__file__).resolve().parent / "libtcmalloc.so.4.5.3"),
}
)
ray.init(runtime_env=runtime_env)
resources = ray.available_resources()
cpus = int(resources["CPU"])
print(f"Available CPUs: {cpus}, num_cpus_per_task: {cpus_per_task}")
remote_task = ray.remote(save_as_lerobot_dataset).options(num_cpus=cpus_per_task)
futures = []
for task in tasks:
futures.append(
(task[0].stem, remote_task.remote(agibot_world_config, task, num_threads_per_task, save_depth, debug))
)
for task, future in futures:
try:
ray.get(future)
except Exception as e:
print(f"Exception occurred for {task}")
with open("output.txt", "a") as f:
f.write(f"{task}, exception details: {str(e)}\n")
ray.shutdown()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--src-path", type=Path, required=True)
parser.add_argument("--output-path", type=Path, required=True)
parser.add_argument("--eef-type", type=str, choices=["gripper", "dexhand", "tactile"], default="gripper")
parser.add_argument("--task-ids", type=str, nargs="+", help="task_327 task_351 ...", default=[])
parser.add_argument("--cpus-per-task", type=int, default=3)
parser.add_argument("--num-threads-per-task", type=int, default=2)
parser.add_argument("--save-depth", action="store_true")
parser.add_argument("--debug", action="store_true")
args = parser.parse_args()
main(**vars(args))
@@ -0,0 +1,94 @@
import json
from pathlib import Path
import h5py
import numpy as np
from PIL import Image
def get_task_instruction(task_json_path: str) -> dict:
"""Get task language instruction"""
with open(task_json_path, "r") as f:
task_info = json.load(f)
task_name = task_info[0]["task_name"]
task_init_scene = task_info[0]["init_scene_text"]
task_instruction = f"{task_name}.{task_init_scene}"
return task_instruction
def load_depths(root_dir: str, camera_name: str):
cam_path = Path(root_dir)
all_imgs = sorted(list(cam_path.glob(f"{camera_name}*")))
return [np.array(Image.open(f)).astype(np.float32)[:, :, None] / 1000 for f in all_imgs]
def load_local_dataset(
episode_id: int, src_path: str, task_id: int, task_name: str, save_depth: bool, AgiBotWorld_CONFIG: dict
) -> tuple[list, dict]:
"""Load local dataset and return a dict with observations and actions"""
ob_dir = Path(src_path) / f"observations/{task_id}/{episode_id}"
proprio_dir = Path(src_path) / f"proprio_stats/{task_id}/{episode_id}"
state = {}
action = {}
with h5py.File(proprio_dir / "proprio_stats.h5", "r") as f:
for key in AgiBotWorld_CONFIG["states"]:
state[f"observation.states.{key}"] = np.array(f["state/" + key.replace(".", "/")], dtype=np.float32)
for key in AgiBotWorld_CONFIG["actions"]:
action[f"actions.{key}"] = np.array(f["action/" + key.replace(".", "/")], dtype=np.float32)
# HACK: agibot team forgot to pad some of the values
num_frames = len(next(iter(state.values())))
for action_key, action_value in action.items():
if action_value.size and len(action_value) != num_frames:
state_key = action_key.replace("actions", "state").replace(".", "/")
new_action_value = np.array(f[state_key], dtype=np.float32).copy()
action_index_key = "/".join(list(action_key.replace("actions", "action").split(".")[:-1]) + ["index"])
action_index = np.array(f[action_index_key])
# agibot lost end index, replace it with joint
if not action_index.size:
action_index_key = action_index_key.replace("end", "joint")
action_index = np.array(f[action_index_key])
new_action_value[action_index] = action_value
action[action_key] = new_action_value
if save_depth:
depth_imgs = load_depths(ob_dir / "depth", "head_depth")
assert num_frames == len(depth_imgs), "Number of images and states are not equal"
state_key_prefix_len = len("observation.states.")
action_key_prefix_len = len("actions.")
frames = [
{
**({"observation.images.head_depth": depth_imgs[i]} if save_depth else {}),
**{
key: value[i]
if value.size
else np.zeros(
AgiBotWorld_CONFIG["states"][key[state_key_prefix_len:]]["shape"],
dtype=AgiBotWorld_CONFIG["states"][key[state_key_prefix_len:]]["dtype"],
)
for key, value in state.items()
},
**{
key: value[i]
if value.size
else np.zeros(
AgiBotWorld_CONFIG["actions"][key[action_key_prefix_len:]]["shape"],
dtype=AgiBotWorld_CONFIG["actions"][key[action_key_prefix_len:]]["dtype"],
)
for key, value in action.items()
},
"task": task_name,
}
for i in range(num_frames)
]
videos = {
f"observation.images.{key}": ob_dir / "videos" / f"{key}_color.mp4"
if "sensor" not in key
else ob_dir / "tactile" / f"{key}.mp4" # HACK: handle tactile videos
for key in AgiBotWorld_CONFIG["images"]
if "depth" not in key
}
return frames, videos
+310
View File
@@ -0,0 +1,310 @@
AgiBotWorld_BETA_GRIPPER_CONFIG = {
"images": {
"head": {
"dtype": "video",
"shape": (480, 640, 3),
"names": ["height", "width", "rgb"],
},
"head_center_fisheye": {
"dtype": "video",
"shape": (748, 960, 3),
"names": ["height", "width", "rgb"],
},
"head_depth": {
"dtype": "image",
"shape": (480, 640, 1),
"names": ["height", "width", "channel"],
},
"head_left_fisheye": {
"dtype": "video",
"shape": (748, 960, 3),
"names": ["height", "width", "rgb"],
},
"head_right_fisheye": {
"dtype": "video",
"shape": (748, 960, 3),
"names": ["height", "width", "rgb"],
},
"hand_left": {
"dtype": "video",
"shape": (480, 640, 3),
"names": ["height", "width", "rgb"],
},
"hand_right": {
"dtype": "video",
"shape": (480, 640, 3),
"names": ["height", "width", "rgb"],
},
"back_left_fisheye": {
"dtype": "video",
"shape": (748, 960, 3),
"names": ["height", "width", "rgb"],
},
"back_right_fisheye": {
"dtype": "video",
"shape": (748, 960, 3),
"names": ["height", "width", "rgb"],
},
},
"states": {
"effector.position": {
"dtype": "float32",
"shape": (2,),
"names": {"motors": ["left_gripper", "right_gripper"]},
},
"end.orientation": {"dtype": "float32", "shape": (2, 4), "names": {"motors": ["left_xyzw", "right_xyzw"]}},
"end.position": {"dtype": "float32", "shape": (2, 3), "names": {"motors": ["left_xyz", "right_xyz"]}},
"head.position": {"dtype": "float32", "shape": (2,), "names": {"motors": ["yaw", "patch"]}},
"joint.current_value": {
"dtype": "float32",
"shape": (14,),
"names": {
"motors": [
"left_arm_0",
"left_arm_1",
"left_arm_2",
"left_arm_3",
"left_arm_4",
"left_arm_5",
"left_arm_6",
"right_arm_0",
"right_arm_1",
"right_arm_2",
"right_arm_3",
"right_arm_4",
"right_arm_5",
"right_arm_6",
]
},
},
"joint.position": {
"dtype": "float32",
"shape": (14,),
"names": {
"motors": [
"left_arm_0",
"left_arm_1",
"left_arm_2",
"left_arm_3",
"left_arm_4",
"left_arm_5",
"left_arm_6",
"right_arm_0",
"right_arm_1",
"right_arm_2",
"right_arm_3",
"right_arm_4",
"right_arm_5",
"right_arm_6",
]
},
},
"robot.orientation": {"dtype": "float32", "shape": (4,), "names": {"motors": ["x", "y", "z", "w"]}},
"robot.position": {"dtype": "float32", "shape": (3,), "names": {"motors": ["x", "y", "z"]}},
"waist.position": {"dtype": "float32", "shape": (2,), "names": {"motors": ["pitch", "lift"]}},
},
"actions": {
"effector.position": {
"dtype": "float32",
"shape": (2,),
"names": {"motors": ["left_gripper", "right_gripper"]},
},
"end.orientation": {"dtype": "float32", "shape": (2, 4), "names": {"motors": ["left_xyzw", "right_xyzw"]}},
"end.position": {"dtype": "float32", "shape": (2, 3), "names": {"motors": ["left_xyz", "right_xyz"]}},
"head.position": {"dtype": "float32", "shape": (2,), "names": {"motors": ["yaw", "patch"]}},
"joint.position": {
"dtype": "float32",
"shape": (14,),
"names": {
"motors": [
"left_arm_0",
"left_arm_1",
"left_arm_2",
"left_arm_3",
"left_arm_4",
"left_arm_5",
"left_arm_6",
"right_arm_0",
"right_arm_1",
"right_arm_2",
"right_arm_3",
"right_arm_4",
"right_arm_5",
"right_arm_6",
]
},
},
"robot.velocity": {"dtype": "float32", "shape": (2,), "names": {"motors": ["x_vel", "yaw_vel"]}},
"waist.position": {"dtype": "float32", "shape": (2,), "names": {"motors": ["pitch", "lift"]}},
},
}
AgiBotWorld_BETA_DEXHAND_CONFIG = {
"images": {
"head": {
"dtype": "video",
"shape": (480, 640, 3),
"names": ["height", "width", "rgb"],
},
"head_center_fisheye": {
"dtype": "video",
"shape": (748, 960, 3),
"names": ["height", "width", "rgb"],
},
"head_depth": {
"dtype": "image",
"shape": (480, 640, 1),
"names": ["height", "width", "channel"],
},
"head_left_fisheye": {
"dtype": "video",
"shape": (748, 960, 3),
"names": ["height", "width", "rgb"],
},
"head_right_fisheye": {
"dtype": "video",
"shape": (748, 960, 3),
"names": ["height", "width", "rgb"],
},
"hand_left_fisheye": {
"dtype": "video",
"shape": (748, 960, 3),
"names": ["height", "width", "rgb"],
},
"hand_right_fisheye": {
"dtype": "video",
"shape": (748, 960, 3),
"names": ["height", "width", "rgb"],
},
"back_left_fisheye": {
"dtype": "video",
"shape": (748, 960, 3),
"names": ["height", "width", "rgb"],
},
"back_right_fisheye": {
"dtype": "video",
"shape": (748, 960, 3),
"names": ["height", "width", "rgb"],
},
},
"states": {
**AgiBotWorld_BETA_GRIPPER_CONFIG["states"],
"effector.position": {
"dtype": "float32",
"shape": (12,),
"names": {
"motors": [
"left_joint_0",
"left_joint_1",
"left_joint_2",
"left_joint_3",
"left_joint_4",
"left_joint_5",
"right_joint_0",
"right_joint_1",
"right_joint_2",
"right_joint_3",
"right_joint_4",
"right_joint_5",
]
},
},
},
"actions": {
**AgiBotWorld_BETA_GRIPPER_CONFIG["actions"],
"effector.position": {
"dtype": "float32",
"shape": (12,),
"names": {
"motors": [
"left_joint_0",
"left_joint_1",
"left_joint_2",
"left_joint_3",
"left_joint_4",
"left_joint_5",
"right_joint_0",
"right_joint_1",
"right_joint_2",
"right_joint_3",
"right_joint_4",
"right_joint_5",
]
},
},
},
}
AgiBotWorld_BETA_TACTILE_CONFIG = {
**AgiBotWorld_BETA_GRIPPER_CONFIG,
"images": {
**AgiBotWorld_BETA_GRIPPER_CONFIG["images"],
"left_sensor_1": {
"dtype": "video",
"shape": (700, 400, 3),
"names": ["height", "width", "rgb"],
},
"left_sensor_2": {
"dtype": "video",
"shape": (700, 400, 3),
"names": ["height", "width", "rgb"],
},
"right_sensor_1": {
"dtype": "video",
"shape": (700, 400, 3),
"names": ["height", "width", "rgb"],
},
"right_sensor_2": {
"dtype": "video",
"shape": (700, 400, 3),
"names": ["height", "width", "rgb"],
},
},
}
# Task statistics coming from https://docs.google.com/spreadsheets/d/1GWMFHYo3UJADS7kkScoJ5ObbQfAFasPuaeC7TJUr1Cc/edit?gid=0#gid=0
AgiBotWorld_TASK_TYPE = {
"gripper": {
"task_config": AgiBotWorld_BETA_GRIPPER_CONFIG,
"task_ids": [], # The remaining are all gripper
},
"dexhand": {
"task_config": AgiBotWorld_BETA_DEXHAND_CONFIG,
"task_ids": [
"task_475",
"task_536",
"task_547",
"task_548",
"task_549",
"task_554",
"task_577",
"task_578",
"task_591",
"task_595",
"task_608",
"task_620",
"task_622",
"task_660",
"task_679",
"task_705",
"task_710",
"task_727",
"task_730",
"task_731",
"task_749",
"task_753",
],
},
"tactile": {
"task_config": AgiBotWorld_BETA_TACTILE_CONFIG,
"task_ids": [
"task_666",
"task_675",
"task_676",
"task_677",
"task_694",
"task_737",
"task_774",
],
},
}
@@ -0,0 +1,75 @@
import numpy as np
import torch
import torchvision
from lerobot.common.datasets.compute_stats import auto_downsample_height_width, get_feature_stats, sample_indices
torchvision.set_video_backend("pyav")
def generate_features_from_config(AgiBotWorld_CONFIG):
features = {}
for key, value in AgiBotWorld_CONFIG["images"].items():
features[f"observation.images.{key}"] = value
for key, value in AgiBotWorld_CONFIG["states"].items():
features[f"observation.states.{key}"] = value
for key, value in AgiBotWorld_CONFIG["actions"].items():
features[f"actions.{key}"] = value
return features
def sample_images(input):
if type(input) is str:
video_path = input
reader = torchvision.io.VideoReader(video_path, stream="video")
frames = [frame["data"] for frame in reader]
frames_array = torch.stack(frames).numpy() # Shape: [T, C, H, W]
sampled_indices = sample_indices(len(frames_array))
images = None
for i, idx in enumerate(sampled_indices):
img = frames_array[idx]
img = auto_downsample_height_width(img)
if images is None:
images = np.empty((len(sampled_indices), *img.shape), dtype=np.uint8)
images[i] = img
elif type(input) is np.ndarray:
frames_array = input[:, None, :, :] # Shape: [T, C, H, W]
sampled_indices = sample_indices(len(frames_array))
images = None
for i, idx in enumerate(sampled_indices):
img = frames_array[idx]
img = auto_downsample_height_width(img)
if images is None:
images = np.empty((len(sampled_indices), *img.shape), dtype=np.uint8)
images[i] = img
return images
def compute_episode_stats(episode_data: dict[str, list[str] | np.ndarray], features: dict) -> dict:
ep_stats = {}
for key, data in episode_data.items():
if features[key]["dtype"] == "string":
continue # HACK: we should receive np.arrays of strings
elif features[key]["dtype"] in ["image", "video"]:
ep_ft_array = sample_images(data)
axes_to_reduce = (0, 2, 3) # keep channel dim
keepdims = True
else:
ep_ft_array = data # data is already a np.ndarray
axes_to_reduce = 0 # compute stats over the first axis
keepdims = data.ndim == 1 # keep as np.array
ep_stats[key] = get_feature_stats(ep_ft_array, axis=axes_to_reduce, keepdims=keepdims)
if features[key]["dtype"] in ["image", "video"]:
value_norm = 1.0 if "depth" in key else 255.0
ep_stats[key] = {
k: v if k == "count" else np.squeeze(v / value_norm, axis=0) for k, v in ep_stats[key].items()
}
return ep_stats
+7
View File
@@ -0,0 +1,7 @@
export HDF5_USE_FILE_LOCKING=FALSE
export RAY_DEDUP_LOGS=0
python agibot_h5.py \
--src-path /path/to/AgiBotWorld-Beta/ \
--output-path /path/to/local \
--eef-type gripper \
--cpus-per-task 3
Binary file not shown.