mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-19 10:40:04 +00:00
sync recent changes
This commit is contained in:
@@ -0,0 +1,280 @@
|
||||
import dataclasses
|
||||
from typing import List, Dict
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class RobotConfig:
|
||||
motors: List[str]
|
||||
cameras: List[str]
|
||||
camera_to_image_key: Dict[str, str]
|
||||
json_state_data_name: List[str]
|
||||
json_action_data_name: List[str]
|
||||
|
||||
|
||||
Z1_CONFIG = RobotConfig(
|
||||
motors=[
|
||||
"kLeftWaist",
|
||||
"kLeftShoulder",
|
||||
"kLeftElbow",
|
||||
"kLeftForearmRoll",
|
||||
"kLeftWristAngle",
|
||||
"kLeftWristRotate",
|
||||
"kLeftGripper",
|
||||
"kRightWaist",
|
||||
"kRightShoulder",
|
||||
"kRightElbow",
|
||||
"kRightForearmRoll",
|
||||
"kRightWristAngle",
|
||||
"kRightWristRotate",
|
||||
"kRightGripper",
|
||||
],
|
||||
cameras=[
|
||||
"cam_high",
|
||||
"cam_left_wrist",
|
||||
"cam_right_wrist",
|
||||
],
|
||||
camera_to_image_key={"color_0": "cam_high", "color_1": "cam_left_wrist", "color_2": "cam_right_wrist"},
|
||||
json_state_data_name=["left_arm", "right_arm"],
|
||||
json_action_data_name=["left_arm", "right_arm"],
|
||||
)
|
||||
|
||||
|
||||
Z1_SINGLE_CONFIG = RobotConfig(
|
||||
motors=[
|
||||
"kWaist",
|
||||
"kShoulder",
|
||||
"kElbow",
|
||||
"kForearmRoll",
|
||||
"kWristAngle",
|
||||
"kWristRotate",
|
||||
"kGripper",
|
||||
],
|
||||
cameras=[
|
||||
"cam_high",
|
||||
"cam_wrist",
|
||||
],
|
||||
camera_to_image_key={"color_0": "cam_high", "color_1": "cam_wrist"},
|
||||
json_state_data_name=["left_arm", "right_arm"],
|
||||
json_action_data_name=["left_arm", "right_arm"],
|
||||
)
|
||||
|
||||
|
||||
G1_DEX1_CONFIG = RobotConfig(
|
||||
motors=[
|
||||
"kLeftShoulderPitch",
|
||||
"kLeftShoulderRoll",
|
||||
"kLeftShoulderYaw",
|
||||
"kLeftElbow",
|
||||
"kLeftWristRoll",
|
||||
"kLeftWristPitch",
|
||||
"kLeftWristYaw",
|
||||
"kRightShoulderPitch",
|
||||
"kRightShoulderRoll",
|
||||
"kRightShoulderYaw",
|
||||
"kRightElbow",
|
||||
"kRightWristRoll",
|
||||
"kRightWristPitch",
|
||||
"kRightWristYaw",
|
||||
"kLeftGripper",
|
||||
"kRightGripper",
|
||||
],
|
||||
cameras=[
|
||||
"cam_left_high",
|
||||
"cam_right_high",
|
||||
"cam_left_wrist",
|
||||
"cam_right_wrist",
|
||||
],
|
||||
camera_to_image_key={
|
||||
"color_0": "cam_left_high",
|
||||
"color_1": "cam_right_high",
|
||||
"color_2": "cam_left_wrist",
|
||||
"color_3": "cam_right_wrist",
|
||||
},
|
||||
json_state_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
|
||||
json_action_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
|
||||
)
|
||||
|
||||
|
||||
G1_DEX1_CONFIG_SIM = RobotConfig(
|
||||
motors=[
|
||||
"kLeftShoulderPitch",
|
||||
"kLeftShoulderRoll",
|
||||
"kLeftShoulderYaw",
|
||||
"kLeftElbow",
|
||||
"kLeftWristRoll",
|
||||
"kLeftWristPitch",
|
||||
"kLeftWristYaw",
|
||||
"kRightShoulderPitch",
|
||||
"kRightShoulderRoll",
|
||||
"kRightShoulderYaw",
|
||||
"kRightElbow",
|
||||
"kRightWristRoll",
|
||||
"kRightWristPitch",
|
||||
"kRightWristYaw",
|
||||
"kLeftGripper",
|
||||
"kRightGripper",
|
||||
],
|
||||
cameras=[
|
||||
"cam_left_high",
|
||||
"cam_left_wrist",
|
||||
"cam_right_wrist",
|
||||
],
|
||||
camera_to_image_key={
|
||||
"color_0": "cam_left_high",
|
||||
"color_1": "cam_left_wrist",
|
||||
"color_2": "cam_right_wrist",
|
||||
},
|
||||
json_state_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
|
||||
json_action_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
|
||||
)
|
||||
|
||||
|
||||
G1_DEX3_CONFIG = RobotConfig(
|
||||
motors=[
|
||||
"kLeftShoulderPitch",
|
||||
"kLeftShoulderRoll",
|
||||
"kLeftShoulderYaw",
|
||||
"kLeftElbow",
|
||||
"kLeftWristRoll",
|
||||
"kLeftWristPitch",
|
||||
"kLeftWristYaw",
|
||||
"kRightShoulderPitch",
|
||||
"kRightShoulderRoll",
|
||||
"kRightShoulderYaw",
|
||||
"kRightElbow",
|
||||
"kRightWristRoll",
|
||||
"kRightWristPitch",
|
||||
"kRightWristYaw",
|
||||
"kLeftHandThumb0",
|
||||
"kLeftHandThumb1",
|
||||
"kLeftHandThumb2",
|
||||
"kLeftHandMiddle0",
|
||||
"kLeftHandMiddle1",
|
||||
"kLeftHandIndex0",
|
||||
"kLeftHandIndex1",
|
||||
"kRightHandThumb0",
|
||||
"kRightHandThumb1",
|
||||
"kRightHandThumb2",
|
||||
"kRightHandIndex0",
|
||||
"kRightHandIndex1",
|
||||
"kRightHandMiddle0",
|
||||
"kRightHandMiddle1",
|
||||
],
|
||||
cameras=[
|
||||
"cam_left_high",
|
||||
"cam_right_high",
|
||||
"cam_left_wrist",
|
||||
"cam_right_wrist",
|
||||
],
|
||||
camera_to_image_key={
|
||||
"color_0": "cam_left_high",
|
||||
"color_1": "cam_right_high",
|
||||
"color_2": "cam_left_wrist",
|
||||
"color_3": "cam_right_wrist",
|
||||
},
|
||||
json_state_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
|
||||
json_action_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
|
||||
)
|
||||
|
||||
|
||||
G1_BRAINCO_CONFIG = RobotConfig(
|
||||
motors=[
|
||||
"kLeftShoulderPitch",
|
||||
"kLeftShoulderRoll",
|
||||
"kLeftShoulderYaw",
|
||||
"kLeftElbow",
|
||||
"kLeftWristRoll",
|
||||
"kLeftWristPitch",
|
||||
"kLeftWristYaw",
|
||||
"kRightShoulderPitch",
|
||||
"kRightShoulderRoll",
|
||||
"kRightShoulderYaw",
|
||||
"kRightElbow",
|
||||
"kRightWristRoll",
|
||||
"kRightWristPitch",
|
||||
"kRightWristYaw",
|
||||
"kLeftHandThumb",
|
||||
"kLeftHandThumbAux",
|
||||
"kLeftHandIndex",
|
||||
"kLeftHandMiddle",
|
||||
"kLeftHandRing",
|
||||
"kLeftHandPinky",
|
||||
"kRightHandThumb",
|
||||
"kRightHandThumbAux",
|
||||
"kRightHandIndex",
|
||||
"kRightHandMiddle",
|
||||
"kRightHandRing",
|
||||
"kRightHandPinky",
|
||||
],
|
||||
cameras=[
|
||||
"cam_left_high",
|
||||
"cam_right_high",
|
||||
"cam_left_wrist",
|
||||
"cam_right_wrist",
|
||||
],
|
||||
camera_to_image_key={
|
||||
"color_0": "cam_left_high",
|
||||
"color_1": "cam_right_high",
|
||||
"color_2": "cam_left_wrist",
|
||||
"color_3": "cam_right_wrist",
|
||||
},
|
||||
json_state_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
|
||||
json_action_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
|
||||
)
|
||||
|
||||
|
||||
G1_INSPIRE_CONFIG = RobotConfig(
|
||||
motors=[
|
||||
"kLeftShoulderPitch",
|
||||
"kLeftShoulderRoll",
|
||||
"kLeftShoulderYaw",
|
||||
"kLeftElbow",
|
||||
"kLeftWristRoll",
|
||||
"kLeftWristPitch",
|
||||
"kLeftWristYaw",
|
||||
"kRightShoulderPitch",
|
||||
"kRightShoulderRoll",
|
||||
"kRightShoulderYaw",
|
||||
"kRightElbow",
|
||||
"kRightWristRoll",
|
||||
"kRightWristPitch",
|
||||
"kRightWristYaw",
|
||||
"kLeftHandPinky",
|
||||
"kLeftHandRing",
|
||||
"kLeftHandMiddle",
|
||||
"kLeftHandIndex",
|
||||
"kLeftHandThumbBend",
|
||||
"kLeftHandThumbRotation",
|
||||
"kRightHandPinky",
|
||||
"kRightHandRing",
|
||||
"kRightHandMiddle",
|
||||
"kRightHandIndex",
|
||||
"kRightHandThumbBend",
|
||||
"kRightHandThumbRotation",
|
||||
],
|
||||
cameras=[
|
||||
"cam_left_high",
|
||||
"cam_right_high",
|
||||
"cam_left_wrist",
|
||||
"cam_right_wrist",
|
||||
],
|
||||
camera_to_image_key={
|
||||
"color_0": "cam_left_high",
|
||||
"color_1": "cam_right_high",
|
||||
"color_2": "cam_left_wrist",
|
||||
"color_3": "cam_right_wrist",
|
||||
},
|
||||
json_state_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
|
||||
json_action_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
|
||||
)
|
||||
|
||||
|
||||
ROBOT_CONFIGS = {
|
||||
"Unitree_Z1_Single": Z1_SINGLE_CONFIG,
|
||||
"Unitree_Z1_Dual": Z1_CONFIG,
|
||||
"Unitree_G1_Dex1": G1_DEX1_CONFIG,
|
||||
"Unitree_G1_Dex1_Sim": G1_DEX1_CONFIG_SIM,
|
||||
"Unitree_G1_Dex3": G1_DEX3_CONFIG,
|
||||
"Unitree_G1_Brainco": G1_BRAINCO_CONFIG,
|
||||
"Unitree_G1_Inspire": G1_INSPIRE_CONFIG,
|
||||
}
|
||||
@@ -0,0 +1,161 @@
|
||||
"""
|
||||
Script lerobot to h5.
|
||||
# --repo-id Your unique repo ID on Hugging Face Hub
|
||||
# --output_dir Save path to h5 file
|
||||
|
||||
python unitree_lerobot/utils/convert_lerobot_to_h5.py.py \
|
||||
--repo-id your_name/g1_grabcube_double_hand \
|
||||
--output_dir "$HOME/datasets/g1_grabcube_double_hand"
|
||||
"""
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import h5py
|
||||
import tyro
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
|
||||
|
||||
class LeRobotDataProcessor:
|
||||
def __init__(self, repo_id: str, root: str = None, image_dtype: str = "to_unit8") -> None:
|
||||
self.image_dtype = image_dtype
|
||||
self.dataset = LeRobotDataset(repo_id=repo_id, root=root)
|
||||
|
||||
def process_episode(self, episode_index: int) -> dict:
|
||||
"""Process a single episode to extract camera images, state, and action."""
|
||||
from_idx = self.dataset.episode_data_index["from"][episode_index].item()
|
||||
to_idx = self.dataset.episode_data_index["to"][episode_index].item()
|
||||
|
||||
episode = defaultdict(list)
|
||||
cameras = defaultdict(list)
|
||||
|
||||
for step_idx in tqdm(
|
||||
range(from_idx, to_idx), desc=f"Episode {episode_index}", position=1, leave=False, dynamic_ncols=True
|
||||
):
|
||||
step = self.dataset[step_idx]
|
||||
|
||||
image_dict = {
|
||||
key.split(".")[2]: cv2.cvtColor(
|
||||
np.transpose((value.numpy() * 255).astype(np.uint8), (1, 2, 0)), cv2.COLOR_BGR2RGB
|
||||
)
|
||||
for key, value in step.items()
|
||||
if key.startswith("observation.image") and len(key.split(".")) >= 3
|
||||
}
|
||||
|
||||
for key, value in image_dict.items():
|
||||
if self.image_dtype == "to_unit8":
|
||||
cameras[key].append(value)
|
||||
elif self.image_dtype == "to_bytes":
|
||||
success, encoded_img = cv2.imencode(".jpg", value, [cv2.IMWRITE_JPEG_QUALITY, 100])
|
||||
if not success:
|
||||
raise ValueError(f"Image encoding failed for key: {key}")
|
||||
cameras[key].append(np.void(encoded_img.tobytes()))
|
||||
|
||||
cam_height, cam_width = next(iter(image_dict.values())).shape[:2]
|
||||
episode["state"].append(step["observation.state"])
|
||||
episode["action"].append(step["action"])
|
||||
|
||||
episode["cameras"] = cameras
|
||||
episode["task"] = step["task"]
|
||||
episode["episode_length"] = to_idx - from_idx
|
||||
|
||||
# Data configuration for later use
|
||||
episode["data_cfg"] = {
|
||||
"camera_names": list(image_dict.keys()),
|
||||
"cam_height": cam_height,
|
||||
"cam_width": cam_width,
|
||||
"state_dim": np.squeeze(step["observation.state"].numpy().shape),
|
||||
"action_dim": np.squeeze(step["action"].numpy().shape),
|
||||
}
|
||||
episode["episode_index"] = episode_index
|
||||
|
||||
return episode
|
||||
|
||||
|
||||
class H5Writer:
|
||||
def __init__(self, output_dir: Path) -> None:
|
||||
self.output_dir = output_dir
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
def write_to_h5(self, episode: dict) -> None:
|
||||
"""Write episode data to HDF5 file."""
|
||||
|
||||
episode_length = episode["episode_length"]
|
||||
episode_index = episode["episode_index"]
|
||||
state = episode["state"]
|
||||
action = episode["action"]
|
||||
qvel = np.zeros_like(episode["state"])
|
||||
cameras = episode["cameras"]
|
||||
task = episode["task"]
|
||||
data_cfg = episode["data_cfg"]
|
||||
|
||||
# Prepare data dictionary
|
||||
data_dict = {
|
||||
"/observations/qpos": [state],
|
||||
"/observations/qvel": [qvel],
|
||||
"/action": [action],
|
||||
**{f"/observations/images/{k}": [v] for k, v in cameras.items()},
|
||||
}
|
||||
|
||||
h5_path = os.path.join(self.output_dir, f"episode_{episode_index}.hdf5")
|
||||
|
||||
with h5py.File(h5_path, "w", rdcc_nbytes=1024**2 * 2, libver="latest") as root:
|
||||
# Set attributes
|
||||
root.attrs["sim"] = False
|
||||
|
||||
# Create datasets
|
||||
obs = root.create_group("observations")
|
||||
image = obs.create_group("images")
|
||||
|
||||
# Write camera images
|
||||
for cam_name, images in cameras.items():
|
||||
data_dtype = images[0].dtype
|
||||
shape = (
|
||||
(episode_length, data_cfg["cam_height"], data_cfg["cam_width"], 3)
|
||||
if data_dtype == "uint8"
|
||||
else (episode_length,)
|
||||
)
|
||||
chunks = (1, data_cfg["cam_height"], data_cfg["cam_width"], 3) if data_dtype == "uint8" else (1,)
|
||||
image.create_dataset(cam_name, shape=shape, dtype=data_dtype, chunks=chunks, compression="gzip")
|
||||
# root[f'/observations/images/{cam_name}'][...] = images
|
||||
|
||||
# Write state and action data
|
||||
obs.create_dataset("qpos", (episode_length, data_cfg["state_dim"]), dtype="float32", compression="gzip")
|
||||
obs.create_dataset("qvel", (episode_length, data_cfg["state_dim"]), dtype="float32", compression="gzip")
|
||||
root.create_dataset("action", (episode_length, data_cfg["action_dim"]), dtype="float32", compression="gzip")
|
||||
|
||||
# Write metadata
|
||||
root.create_dataset("is_edited", (1,), dtype="uint8")
|
||||
substep_reasonings = root.create_dataset(
|
||||
"substep_reasonings", (episode_length,), dtype=h5py.string_dtype(encoding="utf-8"), compression="gzip"
|
||||
)
|
||||
root.create_dataset("language_raw", data=task)
|
||||
substep_reasonings[:] = [task] * episode_length
|
||||
|
||||
# Write additional data
|
||||
for name, array in data_dict.items():
|
||||
root[name][...] = array
|
||||
|
||||
|
||||
def lerobot_to_h5(repo_id: str, output_dir: Path, root: str = None) -> None:
|
||||
"""Main function to process and write LeRobot data to HDF5 format."""
|
||||
|
||||
# Initialize data processor and H5 writer
|
||||
data_processor = LeRobotDataProcessor(
|
||||
repo_id, root, image_dtype="to_unit8"
|
||||
) # image_dtype Options: "to_unit8", "to_bytes"
|
||||
h5_writer = H5Writer(output_dir)
|
||||
|
||||
# Process each episode
|
||||
for episode_index in tqdm(
|
||||
range(data_processor.dataset.num_episodes), desc="Episodes", position=0, dynamic_ncols=True
|
||||
):
|
||||
episode = data_processor.process_episode(episode_index)
|
||||
h5_writer.write_to_h5(episode)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tyro.cli(lerobot_to_h5)
|
||||
@@ -0,0 +1,250 @@
|
||||
"""
|
||||
Script Json to h5.
|
||||
|
||||
# --data_dirs Corresponds to the directory of your JSON dataset
|
||||
# --output_dir Save path to h5 file
|
||||
# --robot_type The type of the robot used in the dataset (e.g., Unitree_Z1_Single, Unitree_Z1_Dual, Unitree_G1_Dex1, Unitree_G1_Dex3, Unitree_G1_Brainco, Unitree_G1_Inspire)
|
||||
|
||||
python unitree_lerobot/utils/convert_unitree_json_to_h5.py \
|
||||
--data_dirs $HOME/datasets/json \
|
||||
--output_dir $HOME/datasets/h5 \
|
||||
--robot_type Unitree_G1_Dex3
|
||||
"""
|
||||
|
||||
import os
|
||||
import tyro
|
||||
import json
|
||||
import h5py
|
||||
import cv2
|
||||
import tqdm
|
||||
import glob
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
from collections import defaultdict
|
||||
from unitree_lerobot.utils.constants import ROBOT_CONFIGS
|
||||
|
||||
|
||||
class JsonDataset:
|
||||
def __init__(self, data_dirs: Path, robot_type: str) -> None:
|
||||
"""
|
||||
Initialize the dataset for loading and processing HDF5 files containing robot manipulation data.
|
||||
|
||||
Args:
|
||||
data_dirs: Path to directory containing training data
|
||||
"""
|
||||
assert data_dirs is not None, "Data directory cannot be None"
|
||||
assert robot_type is not None, "Robot type cannot be None"
|
||||
self.data_dirs = data_dirs
|
||||
self.json_file = "data.json"
|
||||
|
||||
# Initialize paths and cache
|
||||
self._init_paths()
|
||||
self._init_cache()
|
||||
self.json_state_data_name = ROBOT_CONFIGS[robot_type].json_state_data_name
|
||||
self.json_action_data_name = ROBOT_CONFIGS[robot_type].json_action_data_name
|
||||
self.camera_to_image_key = ROBOT_CONFIGS[robot_type].camera_to_image_key
|
||||
|
||||
def _init_paths(self) -> None:
|
||||
"""Initialize episode and task paths."""
|
||||
|
||||
self.episode_paths = []
|
||||
self.task_paths = []
|
||||
|
||||
for task_path in glob.glob(os.path.join(self.data_dirs, "*")):
|
||||
if os.path.isdir(task_path):
|
||||
episode_paths = glob.glob(os.path.join(task_path, "*"))
|
||||
if episode_paths:
|
||||
self.task_paths.append(task_path)
|
||||
self.episode_paths.extend(episode_paths)
|
||||
|
||||
self.episode_paths = sorted(self.episode_paths)
|
||||
self.episode_ids = list(range(len(self.episode_paths)))
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Return the number of episodes in the dataset."""
|
||||
return len(self.episode_paths)
|
||||
|
||||
def _init_cache(self) -> List:
|
||||
"""Initialize data cache if enabled."""
|
||||
|
||||
self.episodes_data_cached = []
|
||||
for episode_path in tqdm.tqdm(self.episode_paths, desc="Loading Cache Json"):
|
||||
json_path = os.path.join(episode_path, self.json_file)
|
||||
with open(json_path, "r", encoding="utf-8") as jsonf:
|
||||
self.episodes_data_cached.append(json.load(jsonf))
|
||||
|
||||
print(f"==> Cached {len(self.episodes_data_cached)} episodes")
|
||||
|
||||
return self.episodes_data_cached
|
||||
|
||||
def _extract_data(self, episode_data: Dict, key: str, parts: List[str]) -> np.ndarray:
|
||||
"""
|
||||
Extract data from episode dictionary for specified parts.
|
||||
|
||||
Args:
|
||||
episode_data: Dictionary containing episode data
|
||||
key: Data key to extract ('states' or 'actions')
|
||||
parts: List of parts to include ('left_arm', 'right_arm')
|
||||
|
||||
Returns:
|
||||
Concatenated numpy array of the requested data
|
||||
"""
|
||||
result = []
|
||||
for sample_data in episode_data["data"]:
|
||||
data_array = np.array([], dtype=np.float32)
|
||||
for part in parts:
|
||||
if part in sample_data[key] and sample_data[key][part] is not None:
|
||||
qpos = np.array(sample_data[key][part]["qpos"], dtype=np.float32)
|
||||
data_array = np.concatenate([data_array, qpos])
|
||||
result.append(data_array)
|
||||
return np.array(result)
|
||||
|
||||
def _parse_images(self, episode_path: str, episode_data) -> dict[str, list[np.ndarray]]:
|
||||
"""Load and stack images for a given camera key."""
|
||||
|
||||
images = defaultdict(list)
|
||||
|
||||
keys = episode_data["data"][0]["colors"].keys()
|
||||
cameras = [key for key in keys if "depth" not in key]
|
||||
|
||||
for camera in cameras:
|
||||
image_key = self.camera_to_image_key.get(camera)
|
||||
|
||||
for sample_data in episode_data["data"]:
|
||||
image_path = os.path.join(episode_path, sample_data["colors"].get(camera))
|
||||
if not os.path.exists(image_path):
|
||||
raise FileNotFoundError(f"Image path does not exist: {image_path}")
|
||||
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f"Failed to read image: {image_path}")
|
||||
|
||||
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
images[image_key].append(image_rgb)
|
||||
|
||||
return images
|
||||
|
||||
def get_item(
|
||||
self,
|
||||
index: Optional[int] = None,
|
||||
) -> Dict:
|
||||
"""Get a training sample from the dataset."""
|
||||
|
||||
file_path = np.random.choice(self.episode_paths) if index is None else self.episode_paths[index]
|
||||
episode_data = self.episodes_data_cached[index]
|
||||
|
||||
# Load state and action data
|
||||
action = self._extract_data(episode_data, "actions", self.json_action_data_name)
|
||||
state = self._extract_data(episode_data, "states", self.json_state_data_name)
|
||||
episode_length = len(state)
|
||||
state_dim = state.shape[1] if len(state.shape) == 2 else state.shape[0]
|
||||
action_dim = action.shape[1] if len(action.shape) == 2 else state.shape[0]
|
||||
|
||||
# Load task description
|
||||
task = episode_data.get("text", {}).get("goal", "")
|
||||
|
||||
# Load camera images
|
||||
cameras = self._parse_images(file_path, episode_data)
|
||||
|
||||
# Extract camera configuration
|
||||
cam_height, cam_width = next(img for imgs in cameras.values() if imgs for img in imgs).shape[:2]
|
||||
data_cfg = {
|
||||
"camera_names": list(cameras.keys()),
|
||||
"cam_height": cam_height,
|
||||
"cam_width": cam_width,
|
||||
"state_dim": state_dim,
|
||||
"action_dim": action_dim,
|
||||
}
|
||||
|
||||
return {
|
||||
"episode_index": index,
|
||||
"episode_length": episode_length,
|
||||
"state": state,
|
||||
"action": action,
|
||||
"cameras": cameras,
|
||||
"task": task,
|
||||
"data_cfg": data_cfg,
|
||||
}
|
||||
|
||||
|
||||
class H5Writer:
|
||||
def __init__(self, output_dir: Path) -> None:
|
||||
self.output_dir = output_dir
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
def write_to_h5(self, episode: dict) -> None:
|
||||
"""Write episode data to HDF5 file."""
|
||||
|
||||
episode_length = episode["episode_length"]
|
||||
episode_index = episode["episode_index"]
|
||||
state = episode["state"]
|
||||
action = episode["action"]
|
||||
qvel = np.zeros_like(episode["state"])
|
||||
cameras = episode["cameras"]
|
||||
task = episode["task"]
|
||||
data_cfg = episode["data_cfg"]
|
||||
|
||||
# Prepare data dictionary
|
||||
data_dict = {
|
||||
"/observations/qpos": [state],
|
||||
"/observations/qvel": [qvel],
|
||||
"/action": [action],
|
||||
**{f"/observations/images/{k}": [v] for k, v in cameras.items()},
|
||||
}
|
||||
|
||||
h5_path = os.path.join(self.output_dir, f"episode_{episode_index}.hdf5")
|
||||
|
||||
with h5py.File(h5_path, "w", rdcc_nbytes=1024**2 * 2, libver="latest") as root:
|
||||
# Set attributes
|
||||
root.attrs["sim"] = False
|
||||
|
||||
# Create datasets
|
||||
obs = root.create_group("observations")
|
||||
image = obs.create_group("images")
|
||||
|
||||
# Write camera images
|
||||
for cam_name, images in cameras.items():
|
||||
image.create_dataset(
|
||||
cam_name,
|
||||
shape=(episode_length, data_cfg["cam_height"], data_cfg["cam_width"], 3),
|
||||
dtype="uint8",
|
||||
chunks=(1, data_cfg["cam_height"], data_cfg["cam_width"], 3),
|
||||
compression="gzip",
|
||||
)
|
||||
# root[f'/observations/images/{cam_name}'][...] = images
|
||||
|
||||
# Write state and action data
|
||||
obs.create_dataset("qpos", (episode_length, data_cfg["state_dim"]), dtype="float32", compression="gzip")
|
||||
obs.create_dataset("qvel", (episode_length, data_cfg["state_dim"]), dtype="float32", compression="gzip")
|
||||
root.create_dataset("action", (episode_length, data_cfg["action_dim"]), dtype="float32", compression="gzip")
|
||||
|
||||
# Write metadata
|
||||
root.create_dataset("is_edited", (1,), dtype="uint8")
|
||||
substep_reasonings = root.create_dataset(
|
||||
"substep_reasonings", (episode_length,), dtype=h5py.string_dtype(encoding="utf-8"), compression="gzip"
|
||||
)
|
||||
root.create_dataset("language_raw", data=task)
|
||||
substep_reasonings[:] = [task] * episode_length
|
||||
|
||||
# Write additional data
|
||||
for name, array in data_dict.items():
|
||||
root[name][...] = array
|
||||
|
||||
|
||||
def json_to_h5(
|
||||
data_dirs: Path,
|
||||
output_dir: Path,
|
||||
robot_type: str,
|
||||
) -> None:
|
||||
"""Convert JSON episode data to HDF5 format."""
|
||||
dataset = JsonDataset(data_dirs, robot_type)
|
||||
h5_writer = H5Writer(output_dir)
|
||||
|
||||
for i in tqdm.tqdm(range(len(dataset))):
|
||||
episode = dataset.get_item(i)
|
||||
h5_writer.write_to_h5(episode)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tyro.cli(json_to_h5)
|
||||
@@ -0,0 +1,341 @@
|
||||
"""
|
||||
Script Json to Lerobot.
|
||||
|
||||
# --raw-dir Corresponds to the directory of your JSON dataset
|
||||
# --repo-id Your unique repo ID on Hugging Face Hub
|
||||
# --robot_type The type of the robot used in the dataset (e.g., Unitree_Z1_Single, Unitree_Z1_Dual, Unitree_G1_Dex1, Unitree_G1_Dex3, Unitree_G1_Brainco, Unitree_G1_Inspire)
|
||||
# --push_to_hub Whether or not to upload the dataset to Hugging Face Hub (true or false)
|
||||
|
||||
python unitree_lerobot/utils/convert_unitree_json_to_lerobot.py \
|
||||
--raw-dir $HOME/datasets/g1_grabcube_double_hand \
|
||||
--repo-id your_name/g1_grabcube_double_hand \
|
||||
--robot_type Unitree_G1_Dex3 \
|
||||
--push_to_hub
|
||||
"""
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import tqdm
|
||||
import tyro
|
||||
import json
|
||||
import glob
|
||||
import dataclasses
|
||||
import shutil
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
from typing import Literal, List, Dict, Optional
|
||||
|
||||
from lerobot.constants import HF_LEROBOT_HOME
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
|
||||
from unitree_lerobot.utils.constants import ROBOT_CONFIGS
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class DatasetConfig:
|
||||
use_videos: bool = True
|
||||
tolerance_s: float = 0.0001
|
||||
image_writer_processes: int = 10
|
||||
image_writer_threads: int = 5
|
||||
video_backend: str | None = None
|
||||
|
||||
|
||||
DEFAULT_DATASET_CONFIG = DatasetConfig()
|
||||
|
||||
|
||||
class JsonDataset:
|
||||
def __init__(self, data_dirs: Path, robot_type: str) -> None:
|
||||
"""
|
||||
Initialize the dataset for loading and processing HDF5 files containing robot manipulation data.
|
||||
|
||||
Args:
|
||||
data_dirs: Path to directory containing training data
|
||||
"""
|
||||
assert data_dirs is not None, "Data directory cannot be None"
|
||||
assert robot_type is not None, "Robot type cannot be None"
|
||||
self.data_dirs = data_dirs
|
||||
self.json_file = "data.json"
|
||||
|
||||
# Initialize paths and cache
|
||||
self._init_paths()
|
||||
self._init_cache()
|
||||
self.json_state_data_name = ROBOT_CONFIGS[robot_type].json_state_data_name
|
||||
self.json_action_data_name = ROBOT_CONFIGS[robot_type].json_action_data_name
|
||||
self.camera_to_image_key = ROBOT_CONFIGS[robot_type].camera_to_image_key
|
||||
|
||||
def _init_paths(self) -> None:
|
||||
"""Initialize episode and task paths."""
|
||||
|
||||
self.episode_paths = []
|
||||
self.task_paths = []
|
||||
|
||||
for task_path in glob.glob(os.path.join(self.data_dirs, "*")):
|
||||
if os.path.isdir(task_path):
|
||||
episode_paths = glob.glob(os.path.join(task_path, "*"))
|
||||
if episode_paths:
|
||||
self.task_paths.append(task_path)
|
||||
self.episode_paths.extend(episode_paths)
|
||||
|
||||
self.episode_paths = sorted(self.episode_paths)
|
||||
self.episode_ids = list(range(len(self.episode_paths)))
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Return the number of episodes in the dataset."""
|
||||
return len(self.episode_paths)
|
||||
|
||||
def _init_cache(self) -> List:
|
||||
"""Initialize data cache if enabled."""
|
||||
|
||||
self.episodes_data_cached = []
|
||||
for episode_path in tqdm.tqdm(self.episode_paths, desc="Loading Cache Json"):
|
||||
json_path = os.path.join(episode_path, self.json_file)
|
||||
with open(json_path, "r", encoding="utf-8") as jsonf:
|
||||
self.episodes_data_cached.append(json.load(jsonf))
|
||||
|
||||
print(f"==> Cached {len(self.episodes_data_cached)} episodes")
|
||||
|
||||
return self.episodes_data_cached
|
||||
|
||||
def _extract_data(self, episode_data: Dict, key: str, parts: List[str]) -> np.ndarray:
|
||||
"""
|
||||
Extract data from episode dictionary for specified parts.
|
||||
|
||||
Args:
|
||||
episode_data: Dictionary containing episode data
|
||||
key: Data key to extract ('states' or 'actions')
|
||||
parts: List of parts to include ('left_arm', 'right_arm')
|
||||
|
||||
Returns:
|
||||
Concatenated numpy array of the requested data
|
||||
"""
|
||||
result = []
|
||||
for sample_data in episode_data["data"]:
|
||||
data_array = np.array([], dtype=np.float32)
|
||||
for part in parts:
|
||||
if part in sample_data[key] and sample_data[key][part] is not None:
|
||||
qpos = np.array(sample_data[key][part]["qpos"], dtype=np.float32)
|
||||
data_array = np.concatenate([data_array, qpos])
|
||||
result.append(data_array)
|
||||
return np.array(result)
|
||||
|
||||
def _parse_images(self, episode_path: str, episode_data) -> dict[str, list[np.ndarray]]:
|
||||
"""Load and stack images for a given camera key."""
|
||||
|
||||
images = defaultdict(list)
|
||||
|
||||
keys = episode_data["data"][0]["colors"].keys()
|
||||
cameras = [key for key in keys if "depth" not in key]
|
||||
|
||||
for camera in cameras:
|
||||
image_key = self.camera_to_image_key.get(camera)
|
||||
if image_key is None:
|
||||
continue
|
||||
|
||||
for sample_data in episode_data["data"]:
|
||||
relative_path = sample_data["colors"].get(camera)
|
||||
if not relative_path:
|
||||
continue
|
||||
|
||||
image_path = os.path.join(episode_path, relative_path)
|
||||
if not os.path.exists(image_path):
|
||||
raise FileNotFoundError(f"Image path does not exist: {image_path}")
|
||||
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f"Failed to read image: {image_path}")
|
||||
|
||||
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
images[image_key].append(image_rgb)
|
||||
|
||||
return images
|
||||
|
||||
def get_item(
|
||||
self,
|
||||
index: Optional[int] = None,
|
||||
) -> Dict:
|
||||
"""Get a training sample from the dataset."""
|
||||
|
||||
file_path = np.random.choice(self.episode_paths) if index is None else self.episode_paths[index]
|
||||
episode_data = self.episodes_data_cached[index]
|
||||
|
||||
# Load state and action data
|
||||
action = self._extract_data(episode_data, "actions", self.json_action_data_name)
|
||||
state = self._extract_data(episode_data, "states", self.json_state_data_name)
|
||||
episode_length = len(state)
|
||||
state_dim = state.shape[1] if len(state.shape) == 2 else state.shape[0]
|
||||
action_dim = action.shape[1] if len(action.shape) == 2 else state.shape[0]
|
||||
|
||||
# Load task description
|
||||
task = episode_data.get("text", {}).get("goal", "")
|
||||
|
||||
# Load camera images
|
||||
cameras = self._parse_images(file_path, episode_data)
|
||||
|
||||
# Extract camera configuration
|
||||
cam_height, cam_width = next(img for imgs in cameras.values() if imgs for img in imgs).shape[:2]
|
||||
data_cfg = {
|
||||
"camera_names": list(cameras.keys()),
|
||||
"cam_height": cam_height,
|
||||
"cam_width": cam_width,
|
||||
"state_dim": state_dim,
|
||||
"action_dim": action_dim,
|
||||
}
|
||||
|
||||
return {
|
||||
"episode_index": index,
|
||||
"episode_length": episode_length,
|
||||
"state": state,
|
||||
"action": action,
|
||||
"cameras": cameras,
|
||||
"task": task,
|
||||
"data_cfg": data_cfg,
|
||||
}
|
||||
|
||||
|
||||
def create_empty_dataset(
|
||||
repo_id: str,
|
||||
robot_type: str,
|
||||
mode: Literal["video", "image"] = "video",
|
||||
*,
|
||||
has_velocity: bool = False,
|
||||
has_effort: bool = False,
|
||||
dataset_config: DatasetConfig = DEFAULT_DATASET_CONFIG,
|
||||
) -> LeRobotDataset:
|
||||
motors = ROBOT_CONFIGS[robot_type].motors
|
||||
cameras = ROBOT_CONFIGS[robot_type].cameras
|
||||
|
||||
features = {
|
||||
"observation.state": {
|
||||
"dtype": "float32",
|
||||
"shape": (len(motors),),
|
||||
"names": [
|
||||
motors,
|
||||
],
|
||||
},
|
||||
"action": {
|
||||
"dtype": "float32",
|
||||
"shape": (len(motors),),
|
||||
"names": [
|
||||
motors,
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
if has_velocity:
|
||||
features["observation.velocity"] = {
|
||||
"dtype": "float32",
|
||||
"shape": (len(motors),),
|
||||
"names": [
|
||||
motors,
|
||||
],
|
||||
}
|
||||
|
||||
if has_effort:
|
||||
features["observation.effort"] = {
|
||||
"dtype": "float32",
|
||||
"shape": (len(motors),),
|
||||
"names": [
|
||||
motors,
|
||||
],
|
||||
}
|
||||
|
||||
for cam in cameras:
|
||||
features[f"observation.images.{cam}"] = {
|
||||
"dtype": mode,
|
||||
"shape": (480, 640, 3),
|
||||
"names": [
|
||||
"height",
|
||||
"width",
|
||||
"channel",
|
||||
],
|
||||
}
|
||||
|
||||
if Path(HF_LEROBOT_HOME / repo_id).exists():
|
||||
shutil.rmtree(HF_LEROBOT_HOME / repo_id)
|
||||
|
||||
return LeRobotDataset.create(
|
||||
repo_id=repo_id,
|
||||
fps=30,
|
||||
robot_type=robot_type,
|
||||
features=features,
|
||||
use_videos=dataset_config.use_videos,
|
||||
tolerance_s=dataset_config.tolerance_s,
|
||||
image_writer_processes=dataset_config.image_writer_processes,
|
||||
image_writer_threads=dataset_config.image_writer_threads,
|
||||
video_backend=dataset_config.video_backend,
|
||||
)
|
||||
|
||||
|
||||
def populate_dataset(
|
||||
dataset: LeRobotDataset,
|
||||
raw_dir: Path,
|
||||
robot_type: str,
|
||||
) -> LeRobotDataset:
|
||||
json_dataset = JsonDataset(raw_dir, robot_type)
|
||||
for i in tqdm.tqdm(range(len(json_dataset))):
|
||||
episode = json_dataset.get_item(i)
|
||||
|
||||
state = episode["state"]
|
||||
action = episode["action"]
|
||||
cameras = episode["cameras"]
|
||||
task = episode["task"]
|
||||
episode_length = episode["episode_length"]
|
||||
|
||||
num_frames = episode_length
|
||||
for i in range(num_frames):
|
||||
frame = {
|
||||
"observation.state": state[i],
|
||||
"action": action[i],
|
||||
}
|
||||
|
||||
for camera, img_array in cameras.items():
|
||||
frame[f"observation.images.{camera}"] = img_array[i]
|
||||
|
||||
dataset.add_frame(frame, task=task)
|
||||
|
||||
dataset.save_episode()
|
||||
|
||||
return dataset
|
||||
|
||||
|
||||
def json_to_lerobot(
|
||||
raw_dir: Path,
|
||||
repo_id: str,
|
||||
robot_type: str, # e.g., Unitree_Z1_Single, Unitree_Z1_Dual, Unitree_G1_Dex1, Unitree_G1_Dex3, Unitree_G1_Brainco, Unitree_G1_Inspire
|
||||
*,
|
||||
push_to_hub: bool = False,
|
||||
mode: Literal["video", "image"] = "video",
|
||||
dataset_config: DatasetConfig = DEFAULT_DATASET_CONFIG,
|
||||
):
|
||||
if (HF_LEROBOT_HOME / repo_id).exists():
|
||||
shutil.rmtree(HF_LEROBOT_HOME / repo_id)
|
||||
|
||||
dataset = create_empty_dataset(
|
||||
repo_id,
|
||||
robot_type=robot_type,
|
||||
mode=mode,
|
||||
has_effort=False,
|
||||
has_velocity=False,
|
||||
dataset_config=dataset_config,
|
||||
)
|
||||
dataset = populate_dataset(
|
||||
dataset,
|
||||
raw_dir,
|
||||
robot_type=robot_type,
|
||||
)
|
||||
|
||||
if push_to_hub:
|
||||
dataset.push_to_hub(upload_large_folder=True)
|
||||
|
||||
|
||||
def local_push_to_hub(
|
||||
repo_id: str,
|
||||
root_path: Path,
|
||||
):
|
||||
dataset = LeRobotDataset(repo_id=repo_id, root=root_path)
|
||||
dataset.push_to_hub(upload_large_folder=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tyro.cli(json_to_lerobot)
|
||||
@@ -0,0 +1,40 @@
|
||||
"""
|
||||
Script to convert Unitree json data to the LeRobot dataset v2.0 format.
|
||||
|
||||
python unitree_lerobot/utils/sort_and_rename_folders.py --data_dir $HOME/datasets/g1_grabcube_double_hand
|
||||
"""
|
||||
|
||||
import os
|
||||
import tyro
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def sort_and_rename_folders(data_dir: Path) -> None:
|
||||
# Get the list of folders sorted by name
|
||||
folders = sorted([f for f in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, f))])
|
||||
|
||||
temp_mapping = {}
|
||||
|
||||
# First, rename all folders to unique temporary names
|
||||
for folder in folders:
|
||||
temp_name = str(uuid.uuid4())
|
||||
original_path = os.path.join(data_dir, folder)
|
||||
temp_path = os.path.join(data_dir, temp_name)
|
||||
os.rename(original_path, temp_path)
|
||||
temp_mapping[temp_name] = folder
|
||||
|
||||
# Then, rename them to the final target names
|
||||
start_number = 0
|
||||
for temp_name, original_folder in temp_mapping.items():
|
||||
new_folder_name = f"episode_{start_number:04d}"
|
||||
temp_path = os.path.join(data_dir, temp_name)
|
||||
new_path = os.path.join(data_dir, new_folder_name)
|
||||
os.rename(temp_path, new_path)
|
||||
start_number += 1
|
||||
|
||||
print("The folders have been successfully renamed.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tyro.cli(sort_and_rename_folders)
|
||||
Reference in New Issue
Block a user