mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-21 03:30:10 +00:00
sync recent changes
This commit is contained in:
@@ -0,0 +1,341 @@
|
||||
"""
|
||||
Script Json to Lerobot.
|
||||
|
||||
# --raw-dir Corresponds to the directory of your JSON dataset
|
||||
# --repo-id Your unique repo ID on Hugging Face Hub
|
||||
# --robot_type The type of the robot used in the dataset (e.g., Unitree_Z1_Single, Unitree_Z1_Dual, Unitree_G1_Dex1, Unitree_G1_Dex3, Unitree_G1_Brainco, Unitree_G1_Inspire)
|
||||
# --push_to_hub Whether or not to upload the dataset to Hugging Face Hub (true or false)
|
||||
|
||||
python unitree_lerobot/utils/convert_unitree_json_to_lerobot.py \
|
||||
--raw-dir $HOME/datasets/g1_grabcube_double_hand \
|
||||
--repo-id your_name/g1_grabcube_double_hand \
|
||||
--robot_type Unitree_G1_Dex3 \
|
||||
--push_to_hub
|
||||
"""
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import tqdm
|
||||
import tyro
|
||||
import json
|
||||
import glob
|
||||
import dataclasses
|
||||
import shutil
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
from typing import Literal, List, Dict, Optional
|
||||
|
||||
from lerobot.constants import HF_LEROBOT_HOME
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
|
||||
from unitree_lerobot.utils.constants import ROBOT_CONFIGS
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class DatasetConfig:
|
||||
use_videos: bool = True
|
||||
tolerance_s: float = 0.0001
|
||||
image_writer_processes: int = 10
|
||||
image_writer_threads: int = 5
|
||||
video_backend: str | None = None
|
||||
|
||||
|
||||
DEFAULT_DATASET_CONFIG = DatasetConfig()
|
||||
|
||||
|
||||
class JsonDataset:
|
||||
def __init__(self, data_dirs: Path, robot_type: str) -> None:
|
||||
"""
|
||||
Initialize the dataset for loading and processing HDF5 files containing robot manipulation data.
|
||||
|
||||
Args:
|
||||
data_dirs: Path to directory containing training data
|
||||
"""
|
||||
assert data_dirs is not None, "Data directory cannot be None"
|
||||
assert robot_type is not None, "Robot type cannot be None"
|
||||
self.data_dirs = data_dirs
|
||||
self.json_file = "data.json"
|
||||
|
||||
# Initialize paths and cache
|
||||
self._init_paths()
|
||||
self._init_cache()
|
||||
self.json_state_data_name = ROBOT_CONFIGS[robot_type].json_state_data_name
|
||||
self.json_action_data_name = ROBOT_CONFIGS[robot_type].json_action_data_name
|
||||
self.camera_to_image_key = ROBOT_CONFIGS[robot_type].camera_to_image_key
|
||||
|
||||
def _init_paths(self) -> None:
|
||||
"""Initialize episode and task paths."""
|
||||
|
||||
self.episode_paths = []
|
||||
self.task_paths = []
|
||||
|
||||
for task_path in glob.glob(os.path.join(self.data_dirs, "*")):
|
||||
if os.path.isdir(task_path):
|
||||
episode_paths = glob.glob(os.path.join(task_path, "*"))
|
||||
if episode_paths:
|
||||
self.task_paths.append(task_path)
|
||||
self.episode_paths.extend(episode_paths)
|
||||
|
||||
self.episode_paths = sorted(self.episode_paths)
|
||||
self.episode_ids = list(range(len(self.episode_paths)))
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Return the number of episodes in the dataset."""
|
||||
return len(self.episode_paths)
|
||||
|
||||
def _init_cache(self) -> List:
|
||||
"""Initialize data cache if enabled."""
|
||||
|
||||
self.episodes_data_cached = []
|
||||
for episode_path in tqdm.tqdm(self.episode_paths, desc="Loading Cache Json"):
|
||||
json_path = os.path.join(episode_path, self.json_file)
|
||||
with open(json_path, "r", encoding="utf-8") as jsonf:
|
||||
self.episodes_data_cached.append(json.load(jsonf))
|
||||
|
||||
print(f"==> Cached {len(self.episodes_data_cached)} episodes")
|
||||
|
||||
return self.episodes_data_cached
|
||||
|
||||
def _extract_data(self, episode_data: Dict, key: str, parts: List[str]) -> np.ndarray:
|
||||
"""
|
||||
Extract data from episode dictionary for specified parts.
|
||||
|
||||
Args:
|
||||
episode_data: Dictionary containing episode data
|
||||
key: Data key to extract ('states' or 'actions')
|
||||
parts: List of parts to include ('left_arm', 'right_arm')
|
||||
|
||||
Returns:
|
||||
Concatenated numpy array of the requested data
|
||||
"""
|
||||
result = []
|
||||
for sample_data in episode_data["data"]:
|
||||
data_array = np.array([], dtype=np.float32)
|
||||
for part in parts:
|
||||
if part in sample_data[key] and sample_data[key][part] is not None:
|
||||
qpos = np.array(sample_data[key][part]["qpos"], dtype=np.float32)
|
||||
data_array = np.concatenate([data_array, qpos])
|
||||
result.append(data_array)
|
||||
return np.array(result)
|
||||
|
||||
def _parse_images(self, episode_path: str, episode_data) -> dict[str, list[np.ndarray]]:
|
||||
"""Load and stack images for a given camera key."""
|
||||
|
||||
images = defaultdict(list)
|
||||
|
||||
keys = episode_data["data"][0]["colors"].keys()
|
||||
cameras = [key for key in keys if "depth" not in key]
|
||||
|
||||
for camera in cameras:
|
||||
image_key = self.camera_to_image_key.get(camera)
|
||||
if image_key is None:
|
||||
continue
|
||||
|
||||
for sample_data in episode_data["data"]:
|
||||
relative_path = sample_data["colors"].get(camera)
|
||||
if not relative_path:
|
||||
continue
|
||||
|
||||
image_path = os.path.join(episode_path, relative_path)
|
||||
if not os.path.exists(image_path):
|
||||
raise FileNotFoundError(f"Image path does not exist: {image_path}")
|
||||
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f"Failed to read image: {image_path}")
|
||||
|
||||
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
images[image_key].append(image_rgb)
|
||||
|
||||
return images
|
||||
|
||||
def get_item(
|
||||
self,
|
||||
index: Optional[int] = None,
|
||||
) -> Dict:
|
||||
"""Get a training sample from the dataset."""
|
||||
|
||||
file_path = np.random.choice(self.episode_paths) if index is None else self.episode_paths[index]
|
||||
episode_data = self.episodes_data_cached[index]
|
||||
|
||||
# Load state and action data
|
||||
action = self._extract_data(episode_data, "actions", self.json_action_data_name)
|
||||
state = self._extract_data(episode_data, "states", self.json_state_data_name)
|
||||
episode_length = len(state)
|
||||
state_dim = state.shape[1] if len(state.shape) == 2 else state.shape[0]
|
||||
action_dim = action.shape[1] if len(action.shape) == 2 else state.shape[0]
|
||||
|
||||
# Load task description
|
||||
task = episode_data.get("text", {}).get("goal", "")
|
||||
|
||||
# Load camera images
|
||||
cameras = self._parse_images(file_path, episode_data)
|
||||
|
||||
# Extract camera configuration
|
||||
cam_height, cam_width = next(img for imgs in cameras.values() if imgs for img in imgs).shape[:2]
|
||||
data_cfg = {
|
||||
"camera_names": list(cameras.keys()),
|
||||
"cam_height": cam_height,
|
||||
"cam_width": cam_width,
|
||||
"state_dim": state_dim,
|
||||
"action_dim": action_dim,
|
||||
}
|
||||
|
||||
return {
|
||||
"episode_index": index,
|
||||
"episode_length": episode_length,
|
||||
"state": state,
|
||||
"action": action,
|
||||
"cameras": cameras,
|
||||
"task": task,
|
||||
"data_cfg": data_cfg,
|
||||
}
|
||||
|
||||
|
||||
def create_empty_dataset(
|
||||
repo_id: str,
|
||||
robot_type: str,
|
||||
mode: Literal["video", "image"] = "video",
|
||||
*,
|
||||
has_velocity: bool = False,
|
||||
has_effort: bool = False,
|
||||
dataset_config: DatasetConfig = DEFAULT_DATASET_CONFIG,
|
||||
) -> LeRobotDataset:
|
||||
motors = ROBOT_CONFIGS[robot_type].motors
|
||||
cameras = ROBOT_CONFIGS[robot_type].cameras
|
||||
|
||||
features = {
|
||||
"observation.state": {
|
||||
"dtype": "float32",
|
||||
"shape": (len(motors),),
|
||||
"names": [
|
||||
motors,
|
||||
],
|
||||
},
|
||||
"action": {
|
||||
"dtype": "float32",
|
||||
"shape": (len(motors),),
|
||||
"names": [
|
||||
motors,
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
if has_velocity:
|
||||
features["observation.velocity"] = {
|
||||
"dtype": "float32",
|
||||
"shape": (len(motors),),
|
||||
"names": [
|
||||
motors,
|
||||
],
|
||||
}
|
||||
|
||||
if has_effort:
|
||||
features["observation.effort"] = {
|
||||
"dtype": "float32",
|
||||
"shape": (len(motors),),
|
||||
"names": [
|
||||
motors,
|
||||
],
|
||||
}
|
||||
|
||||
for cam in cameras:
|
||||
features[f"observation.images.{cam}"] = {
|
||||
"dtype": mode,
|
||||
"shape": (480, 640, 3),
|
||||
"names": [
|
||||
"height",
|
||||
"width",
|
||||
"channel",
|
||||
],
|
||||
}
|
||||
|
||||
if Path(HF_LEROBOT_HOME / repo_id).exists():
|
||||
shutil.rmtree(HF_LEROBOT_HOME / repo_id)
|
||||
|
||||
return LeRobotDataset.create(
|
||||
repo_id=repo_id,
|
||||
fps=30,
|
||||
robot_type=robot_type,
|
||||
features=features,
|
||||
use_videos=dataset_config.use_videos,
|
||||
tolerance_s=dataset_config.tolerance_s,
|
||||
image_writer_processes=dataset_config.image_writer_processes,
|
||||
image_writer_threads=dataset_config.image_writer_threads,
|
||||
video_backend=dataset_config.video_backend,
|
||||
)
|
||||
|
||||
|
||||
def populate_dataset(
|
||||
dataset: LeRobotDataset,
|
||||
raw_dir: Path,
|
||||
robot_type: str,
|
||||
) -> LeRobotDataset:
|
||||
json_dataset = JsonDataset(raw_dir, robot_type)
|
||||
for i in tqdm.tqdm(range(len(json_dataset))):
|
||||
episode = json_dataset.get_item(i)
|
||||
|
||||
state = episode["state"]
|
||||
action = episode["action"]
|
||||
cameras = episode["cameras"]
|
||||
task = episode["task"]
|
||||
episode_length = episode["episode_length"]
|
||||
|
||||
num_frames = episode_length
|
||||
for i in range(num_frames):
|
||||
frame = {
|
||||
"observation.state": state[i],
|
||||
"action": action[i],
|
||||
}
|
||||
|
||||
for camera, img_array in cameras.items():
|
||||
frame[f"observation.images.{camera}"] = img_array[i]
|
||||
|
||||
dataset.add_frame(frame, task=task)
|
||||
|
||||
dataset.save_episode()
|
||||
|
||||
return dataset
|
||||
|
||||
|
||||
def json_to_lerobot(
|
||||
raw_dir: Path,
|
||||
repo_id: str,
|
||||
robot_type: str, # e.g., Unitree_Z1_Single, Unitree_Z1_Dual, Unitree_G1_Dex1, Unitree_G1_Dex3, Unitree_G1_Brainco, Unitree_G1_Inspire
|
||||
*,
|
||||
push_to_hub: bool = False,
|
||||
mode: Literal["video", "image"] = "video",
|
||||
dataset_config: DatasetConfig = DEFAULT_DATASET_CONFIG,
|
||||
):
|
||||
if (HF_LEROBOT_HOME / repo_id).exists():
|
||||
shutil.rmtree(HF_LEROBOT_HOME / repo_id)
|
||||
|
||||
dataset = create_empty_dataset(
|
||||
repo_id,
|
||||
robot_type=robot_type,
|
||||
mode=mode,
|
||||
has_effort=False,
|
||||
has_velocity=False,
|
||||
dataset_config=dataset_config,
|
||||
)
|
||||
dataset = populate_dataset(
|
||||
dataset,
|
||||
raw_dir,
|
||||
robot_type=robot_type,
|
||||
)
|
||||
|
||||
if push_to_hub:
|
||||
dataset.push_to_hub(upload_large_folder=True)
|
||||
|
||||
|
||||
def local_push_to_hub(
|
||||
repo_id: str,
|
||||
root_path: Path,
|
||||
):
|
||||
dataset = LeRobotDataset(repo_id=repo_id, root=root_path)
|
||||
dataset.push_to_hub(upload_large_folder=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tyro.cli(json_to_lerobot)
|
||||
Reference in New Issue
Block a user