sync recent changes

2026-07-23 09:46:00 +00:00 · 2025-11-21 14:13:05 +01:00
parent e5cae6be64
commit 9a052566a3
326 changed files with 20122 additions and 15 deletions
@@ -0,0 +1,341 @@
+"""
+Script Json to Lerobot.
+
+# --raw-dir     Corresponds to the directory of your JSON dataset
+# --repo-id     Your unique repo ID on Hugging Face Hub
+# --robot_type  The type of the robot used in the dataset (e.g., Unitree_Z1_Single, Unitree_Z1_Dual, Unitree_G1_Dex1, Unitree_G1_Dex3, Unitree_G1_Brainco, Unitree_G1_Inspire)
+# --push_to_hub Whether or not to upload the dataset to Hugging Face Hub (true or false)
+
+python unitree_lerobot/utils/convert_unitree_json_to_lerobot.py \
+    --raw-dir $HOME/datasets/g1_grabcube_double_hand \
+    --repo-id your_name/g1_grabcube_double_hand \
+    --robot_type Unitree_G1_Dex3 \ 
+    --push_to_hub
+"""
+
+import os
+import cv2
+import tqdm
+import tyro
+import json
+import glob
+import dataclasses
+import shutil
+import numpy as np
+from pathlib import Path
+from collections import defaultdict
+from typing import Literal, List, Dict, Optional
+
+from lerobot.constants import HF_LEROBOT_HOME
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+
+from unitree_lerobot.utils.constants import ROBOT_CONFIGS
+
+
+@dataclasses.dataclass(frozen=True)
+class DatasetConfig:
+    use_videos: bool = True
+    tolerance_s: float = 0.0001
+    image_writer_processes: int = 10
+    image_writer_threads: int = 5
+    video_backend: str | None = None
+
+
+DEFAULT_DATASET_CONFIG = DatasetConfig()
+
+
+class JsonDataset:
+    def __init__(self, data_dirs: Path, robot_type: str) -> None:
+        """
+        Initialize the dataset for loading and processing HDF5 files containing robot manipulation data.
+
+        Args:
+            data_dirs: Path to directory containing training data
+        """
+        assert data_dirs is not None, "Data directory cannot be None"
+        assert robot_type is not None, "Robot type cannot be None"
+        self.data_dirs = data_dirs
+        self.json_file = "data.json"
+
+        # Initialize paths and cache
+        self._init_paths()
+        self._init_cache()
+        self.json_state_data_name = ROBOT_CONFIGS[robot_type].json_state_data_name
+        self.json_action_data_name = ROBOT_CONFIGS[robot_type].json_action_data_name
+        self.camera_to_image_key = ROBOT_CONFIGS[robot_type].camera_to_image_key
+
+    def _init_paths(self) -> None:
+        """Initialize episode and task paths."""
+
+        self.episode_paths = []
+        self.task_paths = []
+
+        for task_path in glob.glob(os.path.join(self.data_dirs, "*")):
+            if os.path.isdir(task_path):
+                episode_paths = glob.glob(os.path.join(task_path, "*"))
+                if episode_paths:
+                    self.task_paths.append(task_path)
+                    self.episode_paths.extend(episode_paths)
+
+        self.episode_paths = sorted(self.episode_paths)
+        self.episode_ids = list(range(len(self.episode_paths)))
+
+    def __len__(self) -> int:
+        """Return the number of episodes in the dataset."""
+        return len(self.episode_paths)
+
+    def _init_cache(self) -> List:
+        """Initialize data cache if enabled."""
+
+        self.episodes_data_cached = []
+        for episode_path in tqdm.tqdm(self.episode_paths, desc="Loading Cache Json"):
+            json_path = os.path.join(episode_path, self.json_file)
+            with open(json_path, "r", encoding="utf-8") as jsonf:
+                self.episodes_data_cached.append(json.load(jsonf))
+
+        print(f"==> Cached {len(self.episodes_data_cached)} episodes")
+
+        return self.episodes_data_cached
+
+    def _extract_data(self, episode_data: Dict, key: str, parts: List[str]) -> np.ndarray:
+        """
+        Extract data from episode dictionary for specified parts.
+
+        Args:
+            episode_data: Dictionary containing episode data
+            key: Data key to extract ('states' or 'actions')
+            parts: List of parts to include ('left_arm', 'right_arm')
+
+        Returns:
+            Concatenated numpy array of the requested data
+        """
+        result = []
+        for sample_data in episode_data["data"]:
+            data_array = np.array([], dtype=np.float32)
+            for part in parts:
+                if part in sample_data[key] and sample_data[key][part] is not None:
+                    qpos = np.array(sample_data[key][part]["qpos"], dtype=np.float32)
+                    data_array = np.concatenate([data_array, qpos])
+            result.append(data_array)
+        return np.array(result)
+
+    def _parse_images(self, episode_path: str, episode_data) -> dict[str, list[np.ndarray]]:
+        """Load and stack images for a given camera key."""
+
+        images = defaultdict(list)
+
+        keys = episode_data["data"][0]["colors"].keys()
+        cameras = [key for key in keys if "depth" not in key]
+
+        for camera in cameras:
+            image_key = self.camera_to_image_key.get(camera)
+            if image_key is None:
+                continue
+
+            for sample_data in episode_data["data"]:
+                relative_path = sample_data["colors"].get(camera)
+                if not relative_path:
+                    continue
+
+                image_path = os.path.join(episode_path, relative_path)
+                if not os.path.exists(image_path):
+                    raise FileNotFoundError(f"Image path does not exist: {image_path}")
+
+                image = cv2.imread(image_path)
+                if image is None:
+                    raise RuntimeError(f"Failed to read image: {image_path}")
+
+                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+                images[image_key].append(image_rgb)
+
+        return images
+
+    def get_item(
+        self,
+        index: Optional[int] = None,
+    ) -> Dict:
+        """Get a training sample from the dataset."""
+
+        file_path = np.random.choice(self.episode_paths) if index is None else self.episode_paths[index]
+        episode_data = self.episodes_data_cached[index]
+
+        # Load state and action data
+        action = self._extract_data(episode_data, "actions", self.json_action_data_name)
+        state = self._extract_data(episode_data, "states", self.json_state_data_name)
+        episode_length = len(state)
+        state_dim = state.shape[1] if len(state.shape) == 2 else state.shape[0]
+        action_dim = action.shape[1] if len(action.shape) == 2 else state.shape[0]
+
+        # Load task description
+        task = episode_data.get("text", {}).get("goal", "")
+
+        # Load camera images
+        cameras = self._parse_images(file_path, episode_data)
+
+        # Extract camera configuration
+        cam_height, cam_width = next(img for imgs in cameras.values() if imgs for img in imgs).shape[:2]
+        data_cfg = {
+            "camera_names": list(cameras.keys()),
+            "cam_height": cam_height,
+            "cam_width": cam_width,
+            "state_dim": state_dim,
+            "action_dim": action_dim,
+        }
+
+        return {
+            "episode_index": index,
+            "episode_length": episode_length,
+            "state": state,
+            "action": action,
+            "cameras": cameras,
+            "task": task,
+            "data_cfg": data_cfg,
+        }
+
+
+def create_empty_dataset(
+    repo_id: str,
+    robot_type: str,
+    mode: Literal["video", "image"] = "video",
+    *,
+    has_velocity: bool = False,
+    has_effort: bool = False,
+    dataset_config: DatasetConfig = DEFAULT_DATASET_CONFIG,
+) -> LeRobotDataset:
+    motors = ROBOT_CONFIGS[robot_type].motors
+    cameras = ROBOT_CONFIGS[robot_type].cameras
+
+    features = {
+        "observation.state": {
+            "dtype": "float32",
+            "shape": (len(motors),),
+            "names": [
+                motors,
+            ],
+        },
+        "action": {
+            "dtype": "float32",
+            "shape": (len(motors),),
+            "names": [
+                motors,
+            ],
+        },
+    }
+
+    if has_velocity:
+        features["observation.velocity"] = {
+            "dtype": "float32",
+            "shape": (len(motors),),
+            "names": [
+                motors,
+            ],
+        }
+
+    if has_effort:
+        features["observation.effort"] = {
+            "dtype": "float32",
+            "shape": (len(motors),),
+            "names": [
+                motors,
+            ],
+        }
+
+    for cam in cameras:
+        features[f"observation.images.{cam}"] = {
+            "dtype": mode,
+            "shape": (480, 640, 3),
+            "names": [
+                "height",
+                "width",
+                "channel",
+            ],
+        }
+
+    if Path(HF_LEROBOT_HOME / repo_id).exists():
+        shutil.rmtree(HF_LEROBOT_HOME / repo_id)
+
+    return LeRobotDataset.create(
+        repo_id=repo_id,
+        fps=30,
+        robot_type=robot_type,
+        features=features,
+        use_videos=dataset_config.use_videos,
+        tolerance_s=dataset_config.tolerance_s,
+        image_writer_processes=dataset_config.image_writer_processes,
+        image_writer_threads=dataset_config.image_writer_threads,
+        video_backend=dataset_config.video_backend,
+    )
+
+
+def populate_dataset(
+    dataset: LeRobotDataset,
+    raw_dir: Path,
+    robot_type: str,
+) -> LeRobotDataset:
+    json_dataset = JsonDataset(raw_dir, robot_type)
+    for i in tqdm.tqdm(range(len(json_dataset))):
+        episode = json_dataset.get_item(i)
+
+        state = episode["state"]
+        action = episode["action"]
+        cameras = episode["cameras"]
+        task = episode["task"]
+        episode_length = episode["episode_length"]
+
+        num_frames = episode_length
+        for i in range(num_frames):
+            frame = {
+                "observation.state": state[i],
+                "action": action[i],
+            }
+
+            for camera, img_array in cameras.items():
+                frame[f"observation.images.{camera}"] = img_array[i]
+
+            dataset.add_frame(frame, task=task)
+
+        dataset.save_episode()
+
+    return dataset
+
+
+def json_to_lerobot(
+    raw_dir: Path,
+    repo_id: str,
+    robot_type: str,  # e.g., Unitree_Z1_Single, Unitree_Z1_Dual, Unitree_G1_Dex1, Unitree_G1_Dex3, Unitree_G1_Brainco, Unitree_G1_Inspire
+    *,
+    push_to_hub: bool = False,
+    mode: Literal["video", "image"] = "video",
+    dataset_config: DatasetConfig = DEFAULT_DATASET_CONFIG,
+):
+    if (HF_LEROBOT_HOME / repo_id).exists():
+        shutil.rmtree(HF_LEROBOT_HOME / repo_id)
+
+    dataset = create_empty_dataset(
+        repo_id,
+        robot_type=robot_type,
+        mode=mode,
+        has_effort=False,
+        has_velocity=False,
+        dataset_config=dataset_config,
+    )
+    dataset = populate_dataset(
+        dataset,
+        raw_dir,
+        robot_type=robot_type,
+    )
+
+    if push_to_hub:
+        dataset.push_to_hub(upload_large_folder=True)
+
+
+def local_push_to_hub(
+    repo_id: str,
+    root_path: Path,
+):
+    dataset = LeRobotDataset(repo_id=repo_id, root=root_path)
+    dataset.push_to_hub(upload_large_folder=True)
+
+
+if __name__ == "__main__":
+    tyro.cli(json_to_lerobot)