sync recent changes

2026-05-19 10:40:04 +00:00 · 2025-11-21 14:13:05 +01:00
parent e5cae6be64
commit 9a052566a3
326 changed files with 20122 additions and 15 deletions
@@ -0,0 +1,280 @@
+import dataclasses
+from typing import List, Dict
+
+
+@dataclasses.dataclass(frozen=True)
+class RobotConfig:
+    motors: List[str]
+    cameras: List[str]
+    camera_to_image_key: Dict[str, str]
+    json_state_data_name: List[str]
+    json_action_data_name: List[str]
+
+
+Z1_CONFIG = RobotConfig(
+    motors=[
+        "kLeftWaist",
+        "kLeftShoulder",
+        "kLeftElbow",
+        "kLeftForearmRoll",
+        "kLeftWristAngle",
+        "kLeftWristRotate",
+        "kLeftGripper",
+        "kRightWaist",
+        "kRightShoulder",
+        "kRightElbow",
+        "kRightForearmRoll",
+        "kRightWristAngle",
+        "kRightWristRotate",
+        "kRightGripper",
+    ],
+    cameras=[
+        "cam_high",
+        "cam_left_wrist",
+        "cam_right_wrist",
+    ],
+    camera_to_image_key={"color_0": "cam_high", "color_1": "cam_left_wrist", "color_2": "cam_right_wrist"},
+    json_state_data_name=["left_arm", "right_arm"],
+    json_action_data_name=["left_arm", "right_arm"],
+)
+
+
+Z1_SINGLE_CONFIG = RobotConfig(
+    motors=[
+        "kWaist",
+        "kShoulder",
+        "kElbow",
+        "kForearmRoll",
+        "kWristAngle",
+        "kWristRotate",
+        "kGripper",
+    ],
+    cameras=[
+        "cam_high",
+        "cam_wrist",
+    ],
+    camera_to_image_key={"color_0": "cam_high", "color_1": "cam_wrist"},
+    json_state_data_name=["left_arm", "right_arm"],
+    json_action_data_name=["left_arm", "right_arm"],
+)
+
+
+G1_DEX1_CONFIG = RobotConfig(
+    motors=[
+        "kLeftShoulderPitch",
+        "kLeftShoulderRoll",
+        "kLeftShoulderYaw",
+        "kLeftElbow",
+        "kLeftWristRoll",
+        "kLeftWristPitch",
+        "kLeftWristYaw",
+        "kRightShoulderPitch",
+        "kRightShoulderRoll",
+        "kRightShoulderYaw",
+        "kRightElbow",
+        "kRightWristRoll",
+        "kRightWristPitch",
+        "kRightWristYaw",
+        "kLeftGripper",
+        "kRightGripper",
+    ],
+    cameras=[
+        "cam_left_high",
+        "cam_right_high",
+        "cam_left_wrist",
+        "cam_right_wrist",
+    ],
+    camera_to_image_key={
+        "color_0": "cam_left_high",
+        "color_1": "cam_right_high",
+        "color_2": "cam_left_wrist",
+        "color_3": "cam_right_wrist",
+    },
+    json_state_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
+    json_action_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
+)
+
+
+G1_DEX1_CONFIG_SIM = RobotConfig(
+    motors=[
+        "kLeftShoulderPitch",
+        "kLeftShoulderRoll",
+        "kLeftShoulderYaw",
+        "kLeftElbow",
+        "kLeftWristRoll",
+        "kLeftWristPitch",
+        "kLeftWristYaw",
+        "kRightShoulderPitch",
+        "kRightShoulderRoll",
+        "kRightShoulderYaw",
+        "kRightElbow",
+        "kRightWristRoll",
+        "kRightWristPitch",
+        "kRightWristYaw",
+        "kLeftGripper",
+        "kRightGripper",
+    ],
+    cameras=[
+        "cam_left_high",
+        "cam_left_wrist",
+        "cam_right_wrist",
+    ],
+    camera_to_image_key={
+        "color_0": "cam_left_high",
+        "color_1": "cam_left_wrist",
+        "color_2": "cam_right_wrist",
+    },
+    json_state_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
+    json_action_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
+)
+
+
+G1_DEX3_CONFIG = RobotConfig(
+    motors=[
+        "kLeftShoulderPitch",
+        "kLeftShoulderRoll",
+        "kLeftShoulderYaw",
+        "kLeftElbow",
+        "kLeftWristRoll",
+        "kLeftWristPitch",
+        "kLeftWristYaw",
+        "kRightShoulderPitch",
+        "kRightShoulderRoll",
+        "kRightShoulderYaw",
+        "kRightElbow",
+        "kRightWristRoll",
+        "kRightWristPitch",
+        "kRightWristYaw",
+        "kLeftHandThumb0",
+        "kLeftHandThumb1",
+        "kLeftHandThumb2",
+        "kLeftHandMiddle0",
+        "kLeftHandMiddle1",
+        "kLeftHandIndex0",
+        "kLeftHandIndex1",
+        "kRightHandThumb0",
+        "kRightHandThumb1",
+        "kRightHandThumb2",
+        "kRightHandIndex0",
+        "kRightHandIndex1",
+        "kRightHandMiddle0",
+        "kRightHandMiddle1",
+    ],
+    cameras=[
+        "cam_left_high",
+        "cam_right_high",
+        "cam_left_wrist",
+        "cam_right_wrist",
+    ],
+    camera_to_image_key={
+        "color_0": "cam_left_high",
+        "color_1": "cam_right_high",
+        "color_2": "cam_left_wrist",
+        "color_3": "cam_right_wrist",
+    },
+    json_state_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
+    json_action_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
+)
+
+
+G1_BRAINCO_CONFIG = RobotConfig(
+    motors=[
+        "kLeftShoulderPitch",
+        "kLeftShoulderRoll",
+        "kLeftShoulderYaw",
+        "kLeftElbow",
+        "kLeftWristRoll",
+        "kLeftWristPitch",
+        "kLeftWristYaw",
+        "kRightShoulderPitch",
+        "kRightShoulderRoll",
+        "kRightShoulderYaw",
+        "kRightElbow",
+        "kRightWristRoll",
+        "kRightWristPitch",
+        "kRightWristYaw",
+        "kLeftHandThumb",
+        "kLeftHandThumbAux",
+        "kLeftHandIndex",
+        "kLeftHandMiddle",
+        "kLeftHandRing",
+        "kLeftHandPinky",
+        "kRightHandThumb",
+        "kRightHandThumbAux",
+        "kRightHandIndex",
+        "kRightHandMiddle",
+        "kRightHandRing",
+        "kRightHandPinky",
+    ],
+    cameras=[
+        "cam_left_high",
+        "cam_right_high",
+        "cam_left_wrist",
+        "cam_right_wrist",
+    ],
+    camera_to_image_key={
+        "color_0": "cam_left_high",
+        "color_1": "cam_right_high",
+        "color_2": "cam_left_wrist",
+        "color_3": "cam_right_wrist",
+    },
+    json_state_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
+    json_action_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
+)
+
+
+G1_INSPIRE_CONFIG = RobotConfig(
+    motors=[
+        "kLeftShoulderPitch",
+        "kLeftShoulderRoll",
+        "kLeftShoulderYaw",
+        "kLeftElbow",
+        "kLeftWristRoll",
+        "kLeftWristPitch",
+        "kLeftWristYaw",
+        "kRightShoulderPitch",
+        "kRightShoulderRoll",
+        "kRightShoulderYaw",
+        "kRightElbow",
+        "kRightWristRoll",
+        "kRightWristPitch",
+        "kRightWristYaw",
+        "kLeftHandPinky",
+        "kLeftHandRing",
+        "kLeftHandMiddle",
+        "kLeftHandIndex",
+        "kLeftHandThumbBend",
+        "kLeftHandThumbRotation",
+        "kRightHandPinky",
+        "kRightHandRing",
+        "kRightHandMiddle",
+        "kRightHandIndex",
+        "kRightHandThumbBend",
+        "kRightHandThumbRotation",
+    ],
+    cameras=[
+        "cam_left_high",
+        "cam_right_high",
+        "cam_left_wrist",
+        "cam_right_wrist",
+    ],
+    camera_to_image_key={
+        "color_0": "cam_left_high",
+        "color_1": "cam_right_high",
+        "color_2": "cam_left_wrist",
+        "color_3": "cam_right_wrist",
+    },
+    json_state_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
+    json_action_data_name=["left_arm", "right_arm", "left_ee", "right_ee"],
+)
+
+
+ROBOT_CONFIGS = {
+    "Unitree_Z1_Single": Z1_SINGLE_CONFIG,
+    "Unitree_Z1_Dual": Z1_CONFIG,
+    "Unitree_G1_Dex1": G1_DEX1_CONFIG,
+    "Unitree_G1_Dex1_Sim": G1_DEX1_CONFIG_SIM,
+    "Unitree_G1_Dex3": G1_DEX3_CONFIG,
+    "Unitree_G1_Brainco": G1_BRAINCO_CONFIG,
+    "Unitree_G1_Inspire": G1_INSPIRE_CONFIG,
+}
@@ -0,0 +1,161 @@
+"""
+Script lerobot to h5.
+# --repo-id     Your unique repo ID on Hugging Face Hub
+# --output_dir  Save path to h5 file
+
+python unitree_lerobot/utils/convert_lerobot_to_h5.py.py \
+    --repo-id your_name/g1_grabcube_double_hand \
+    --output_dir "$HOME/datasets/g1_grabcube_double_hand" 
+"""
+
+import os
+import cv2
+import h5py
+import tyro
+import numpy as np
+from tqdm import tqdm
+from pathlib import Path
+from collections import defaultdict
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+
+
+class LeRobotDataProcessor:
+    def __init__(self, repo_id: str, root: str = None, image_dtype: str = "to_unit8") -> None:
+        self.image_dtype = image_dtype
+        self.dataset = LeRobotDataset(repo_id=repo_id, root=root)
+
+    def process_episode(self, episode_index: int) -> dict:
+        """Process a single episode to extract camera images, state, and action."""
+        from_idx = self.dataset.episode_data_index["from"][episode_index].item()
+        to_idx = self.dataset.episode_data_index["to"][episode_index].item()
+
+        episode = defaultdict(list)
+        cameras = defaultdict(list)
+
+        for step_idx in tqdm(
+            range(from_idx, to_idx), desc=f"Episode {episode_index}", position=1, leave=False, dynamic_ncols=True
+        ):
+            step = self.dataset[step_idx]
+
+            image_dict = {
+                key.split(".")[2]: cv2.cvtColor(
+                    np.transpose((value.numpy() * 255).astype(np.uint8), (1, 2, 0)), cv2.COLOR_BGR2RGB
+                )
+                for key, value in step.items()
+                if key.startswith("observation.image") and len(key.split(".")) >= 3
+            }
+
+            for key, value in image_dict.items():
+                if self.image_dtype == "to_unit8":
+                    cameras[key].append(value)
+                elif self.image_dtype == "to_bytes":
+                    success, encoded_img = cv2.imencode(".jpg", value, [cv2.IMWRITE_JPEG_QUALITY, 100])
+                    if not success:
+                        raise ValueError(f"Image encoding failed for key: {key}")
+                    cameras[key].append(np.void(encoded_img.tobytes()))
+
+            cam_height, cam_width = next(iter(image_dict.values())).shape[:2]
+            episode["state"].append(step["observation.state"])
+            episode["action"].append(step["action"])
+
+        episode["cameras"] = cameras
+        episode["task"] = step["task"]
+        episode["episode_length"] = to_idx - from_idx
+
+        # Data configuration for later use
+        episode["data_cfg"] = {
+            "camera_names": list(image_dict.keys()),
+            "cam_height": cam_height,
+            "cam_width": cam_width,
+            "state_dim": np.squeeze(step["observation.state"].numpy().shape),
+            "action_dim": np.squeeze(step["action"].numpy().shape),
+        }
+        episode["episode_index"] = episode_index
+
+        return episode
+
+
+class H5Writer:
+    def __init__(self, output_dir: Path) -> None:
+        self.output_dir = output_dir
+        os.makedirs(output_dir, exist_ok=True)
+
+    def write_to_h5(self, episode: dict) -> None:
+        """Write episode data to HDF5 file."""
+
+        episode_length = episode["episode_length"]
+        episode_index = episode["episode_index"]
+        state = episode["state"]
+        action = episode["action"]
+        qvel = np.zeros_like(episode["state"])
+        cameras = episode["cameras"]
+        task = episode["task"]
+        data_cfg = episode["data_cfg"]
+
+        # Prepare data dictionary
+        data_dict = {
+            "/observations/qpos": [state],
+            "/observations/qvel": [qvel],
+            "/action": [action],
+            **{f"/observations/images/{k}": [v] for k, v in cameras.items()},
+        }
+
+        h5_path = os.path.join(self.output_dir, f"episode_{episode_index}.hdf5")
+
+        with h5py.File(h5_path, "w", rdcc_nbytes=1024**2 * 2, libver="latest") as root:
+            # Set attributes
+            root.attrs["sim"] = False
+
+            # Create datasets
+            obs = root.create_group("observations")
+            image = obs.create_group("images")
+
+            # Write camera images
+            for cam_name, images in cameras.items():
+                data_dtype = images[0].dtype
+                shape = (
+                    (episode_length, data_cfg["cam_height"], data_cfg["cam_width"], 3)
+                    if data_dtype == "uint8"
+                    else (episode_length,)
+                )
+                chunks = (1, data_cfg["cam_height"], data_cfg["cam_width"], 3) if data_dtype == "uint8" else (1,)
+                image.create_dataset(cam_name, shape=shape, dtype=data_dtype, chunks=chunks, compression="gzip")
+                # root[f'/observations/images/{cam_name}'][...] = images
+
+            # Write state and action data
+            obs.create_dataset("qpos", (episode_length, data_cfg["state_dim"]), dtype="float32", compression="gzip")
+            obs.create_dataset("qvel", (episode_length, data_cfg["state_dim"]), dtype="float32", compression="gzip")
+            root.create_dataset("action", (episode_length, data_cfg["action_dim"]), dtype="float32", compression="gzip")
+
+            # Write metadata
+            root.create_dataset("is_edited", (1,), dtype="uint8")
+            substep_reasonings = root.create_dataset(
+                "substep_reasonings", (episode_length,), dtype=h5py.string_dtype(encoding="utf-8"), compression="gzip"
+            )
+            root.create_dataset("language_raw", data=task)
+            substep_reasonings[:] = [task] * episode_length
+
+            # Write additional data
+            for name, array in data_dict.items():
+                root[name][...] = array
+
+
+def lerobot_to_h5(repo_id: str, output_dir: Path, root: str = None) -> None:
+    """Main function to process and write LeRobot data to HDF5 format."""
+
+    # Initialize data processor and H5 writer
+    data_processor = LeRobotDataProcessor(
+        repo_id, root, image_dtype="to_unit8"
+    )  # image_dtype Options: "to_unit8", "to_bytes"
+    h5_writer = H5Writer(output_dir)
+
+    # Process each episode
+    for episode_index in tqdm(
+        range(data_processor.dataset.num_episodes), desc="Episodes", position=0, dynamic_ncols=True
+    ):
+        episode = data_processor.process_episode(episode_index)
+        h5_writer.write_to_h5(episode)
+
+
+if __name__ == "__main__":
+    tyro.cli(lerobot_to_h5)
@@ -0,0 +1,250 @@
+"""
+Script Json to h5.
+
+# --data_dirs   Corresponds to the directory of your JSON dataset
+# --output_dir  Save path to h5 file
+# --robot_type  The type of the robot used in the dataset (e.g., Unitree_Z1_Single, Unitree_Z1_Dual, Unitree_G1_Dex1, Unitree_G1_Dex3, Unitree_G1_Brainco, Unitree_G1_Inspire)
+
+python unitree_lerobot/utils/convert_unitree_json_to_h5.py \
+    --data_dirs $HOME/datasets/json \
+    --output_dir $HOME/datasets/h5 \
+    --robot_type Unitree_G1_Dex3
+"""
+
+import os
+import tyro
+import json
+import h5py
+import cv2
+import tqdm
+import glob
+import numpy as np
+from pathlib import Path
+from typing import List, Dict, Optional
+from collections import defaultdict
+from unitree_lerobot.utils.constants import ROBOT_CONFIGS
+
+
+class JsonDataset:
+    def __init__(self, data_dirs: Path, robot_type: str) -> None:
+        """
+        Initialize the dataset for loading and processing HDF5 files containing robot manipulation data.
+
+        Args:
+            data_dirs: Path to directory containing training data
+        """
+        assert data_dirs is not None, "Data directory cannot be None"
+        assert robot_type is not None, "Robot type cannot be None"
+        self.data_dirs = data_dirs
+        self.json_file = "data.json"
+
+        # Initialize paths and cache
+        self._init_paths()
+        self._init_cache()
+        self.json_state_data_name = ROBOT_CONFIGS[robot_type].json_state_data_name
+        self.json_action_data_name = ROBOT_CONFIGS[robot_type].json_action_data_name
+        self.camera_to_image_key = ROBOT_CONFIGS[robot_type].camera_to_image_key
+
+    def _init_paths(self) -> None:
+        """Initialize episode and task paths."""
+
+        self.episode_paths = []
+        self.task_paths = []
+
+        for task_path in glob.glob(os.path.join(self.data_dirs, "*")):
+            if os.path.isdir(task_path):
+                episode_paths = glob.glob(os.path.join(task_path, "*"))
+                if episode_paths:
+                    self.task_paths.append(task_path)
+                    self.episode_paths.extend(episode_paths)
+
+        self.episode_paths = sorted(self.episode_paths)
+        self.episode_ids = list(range(len(self.episode_paths)))
+
+    def __len__(self) -> int:
+        """Return the number of episodes in the dataset."""
+        return len(self.episode_paths)
+
+    def _init_cache(self) -> List:
+        """Initialize data cache if enabled."""
+
+        self.episodes_data_cached = []
+        for episode_path in tqdm.tqdm(self.episode_paths, desc="Loading Cache Json"):
+            json_path = os.path.join(episode_path, self.json_file)
+            with open(json_path, "r", encoding="utf-8") as jsonf:
+                self.episodes_data_cached.append(json.load(jsonf))
+
+        print(f"==> Cached {len(self.episodes_data_cached)} episodes")
+
+        return self.episodes_data_cached
+
+    def _extract_data(self, episode_data: Dict, key: str, parts: List[str]) -> np.ndarray:
+        """
+        Extract data from episode dictionary for specified parts.
+
+        Args:
+            episode_data: Dictionary containing episode data
+            key: Data key to extract ('states' or 'actions')
+            parts: List of parts to include ('left_arm', 'right_arm')
+
+        Returns:
+            Concatenated numpy array of the requested data
+        """
+        result = []
+        for sample_data in episode_data["data"]:
+            data_array = np.array([], dtype=np.float32)
+            for part in parts:
+                if part in sample_data[key] and sample_data[key][part] is not None:
+                    qpos = np.array(sample_data[key][part]["qpos"], dtype=np.float32)
+                    data_array = np.concatenate([data_array, qpos])
+            result.append(data_array)
+        return np.array(result)
+
+    def _parse_images(self, episode_path: str, episode_data) -> dict[str, list[np.ndarray]]:
+        """Load and stack images for a given camera key."""
+
+        images = defaultdict(list)
+
+        keys = episode_data["data"][0]["colors"].keys()
+        cameras = [key for key in keys if "depth" not in key]
+
+        for camera in cameras:
+            image_key = self.camera_to_image_key.get(camera)
+
+            for sample_data in episode_data["data"]:
+                image_path = os.path.join(episode_path, sample_data["colors"].get(camera))
+                if not os.path.exists(image_path):
+                    raise FileNotFoundError(f"Image path does not exist: {image_path}")
+
+                image = cv2.imread(image_path)
+                if image is None:
+                    raise RuntimeError(f"Failed to read image: {image_path}")
+
+                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+                images[image_key].append(image_rgb)
+
+        return images
+
+    def get_item(
+        self,
+        index: Optional[int] = None,
+    ) -> Dict:
+        """Get a training sample from the dataset."""
+
+        file_path = np.random.choice(self.episode_paths) if index is None else self.episode_paths[index]
+        episode_data = self.episodes_data_cached[index]
+
+        # Load state and action data
+        action = self._extract_data(episode_data, "actions", self.json_action_data_name)
+        state = self._extract_data(episode_data, "states", self.json_state_data_name)
+        episode_length = len(state)
+        state_dim = state.shape[1] if len(state.shape) == 2 else state.shape[0]
+        action_dim = action.shape[1] if len(action.shape) == 2 else state.shape[0]
+
+        # Load task description
+        task = episode_data.get("text", {}).get("goal", "")
+
+        # Load camera images
+        cameras = self._parse_images(file_path, episode_data)
+
+        # Extract camera configuration
+        cam_height, cam_width = next(img for imgs in cameras.values() if imgs for img in imgs).shape[:2]
+        data_cfg = {
+            "camera_names": list(cameras.keys()),
+            "cam_height": cam_height,
+            "cam_width": cam_width,
+            "state_dim": state_dim,
+            "action_dim": action_dim,
+        }
+
+        return {
+            "episode_index": index,
+            "episode_length": episode_length,
+            "state": state,
+            "action": action,
+            "cameras": cameras,
+            "task": task,
+            "data_cfg": data_cfg,
+        }
+
+
+class H5Writer:
+    def __init__(self, output_dir: Path) -> None:
+        self.output_dir = output_dir
+        os.makedirs(output_dir, exist_ok=True)
+
+    def write_to_h5(self, episode: dict) -> None:
+        """Write episode data to HDF5 file."""
+
+        episode_length = episode["episode_length"]
+        episode_index = episode["episode_index"]
+        state = episode["state"]
+        action = episode["action"]
+        qvel = np.zeros_like(episode["state"])
+        cameras = episode["cameras"]
+        task = episode["task"]
+        data_cfg = episode["data_cfg"]
+
+        # Prepare data dictionary
+        data_dict = {
+            "/observations/qpos": [state],
+            "/observations/qvel": [qvel],
+            "/action": [action],
+            **{f"/observations/images/{k}": [v] for k, v in cameras.items()},
+        }
+
+        h5_path = os.path.join(self.output_dir, f"episode_{episode_index}.hdf5")
+
+        with h5py.File(h5_path, "w", rdcc_nbytes=1024**2 * 2, libver="latest") as root:
+            # Set attributes
+            root.attrs["sim"] = False
+
+            # Create datasets
+            obs = root.create_group("observations")
+            image = obs.create_group("images")
+
+            # Write camera images
+            for cam_name, images in cameras.items():
+                image.create_dataset(
+                    cam_name,
+                    shape=(episode_length, data_cfg["cam_height"], data_cfg["cam_width"], 3),
+                    dtype="uint8",
+                    chunks=(1, data_cfg["cam_height"], data_cfg["cam_width"], 3),
+                    compression="gzip",
+                )
+                # root[f'/observations/images/{cam_name}'][...] = images
+
+            # Write state and action data
+            obs.create_dataset("qpos", (episode_length, data_cfg["state_dim"]), dtype="float32", compression="gzip")
+            obs.create_dataset("qvel", (episode_length, data_cfg["state_dim"]), dtype="float32", compression="gzip")
+            root.create_dataset("action", (episode_length, data_cfg["action_dim"]), dtype="float32", compression="gzip")
+
+            # Write metadata
+            root.create_dataset("is_edited", (1,), dtype="uint8")
+            substep_reasonings = root.create_dataset(
+                "substep_reasonings", (episode_length,), dtype=h5py.string_dtype(encoding="utf-8"), compression="gzip"
+            )
+            root.create_dataset("language_raw", data=task)
+            substep_reasonings[:] = [task] * episode_length
+
+            # Write additional data
+            for name, array in data_dict.items():
+                root[name][...] = array
+
+
+def json_to_h5(
+    data_dirs: Path,
+    output_dir: Path,
+    robot_type: str,
+) -> None:
+    """Convert JSON episode data to HDF5 format."""
+    dataset = JsonDataset(data_dirs, robot_type)
+    h5_writer = H5Writer(output_dir)
+
+    for i in tqdm.tqdm(range(len(dataset))):
+        episode = dataset.get_item(i)
+        h5_writer.write_to_h5(episode)
+
+
+if __name__ == "__main__":
+    tyro.cli(json_to_h5)
@@ -0,0 +1,341 @@
+"""
+Script Json to Lerobot.
+
+# --raw-dir     Corresponds to the directory of your JSON dataset
+# --repo-id     Your unique repo ID on Hugging Face Hub
+# --robot_type  The type of the robot used in the dataset (e.g., Unitree_Z1_Single, Unitree_Z1_Dual, Unitree_G1_Dex1, Unitree_G1_Dex3, Unitree_G1_Brainco, Unitree_G1_Inspire)
+# --push_to_hub Whether or not to upload the dataset to Hugging Face Hub (true or false)
+
+python unitree_lerobot/utils/convert_unitree_json_to_lerobot.py \
+    --raw-dir $HOME/datasets/g1_grabcube_double_hand \
+    --repo-id your_name/g1_grabcube_double_hand \
+    --robot_type Unitree_G1_Dex3 \ 
+    --push_to_hub
+"""
+
+import os
+import cv2
+import tqdm
+import tyro
+import json
+import glob
+import dataclasses
+import shutil
+import numpy as np
+from pathlib import Path
+from collections import defaultdict
+from typing import Literal, List, Dict, Optional
+
+from lerobot.constants import HF_LEROBOT_HOME
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+
+from unitree_lerobot.utils.constants import ROBOT_CONFIGS
+
+
+@dataclasses.dataclass(frozen=True)
+class DatasetConfig:
+    use_videos: bool = True
+    tolerance_s: float = 0.0001
+    image_writer_processes: int = 10
+    image_writer_threads: int = 5
+    video_backend: str | None = None
+
+
+DEFAULT_DATASET_CONFIG = DatasetConfig()
+
+
+class JsonDataset:
+    def __init__(self, data_dirs: Path, robot_type: str) -> None:
+        """
+        Initialize the dataset for loading and processing HDF5 files containing robot manipulation data.
+
+        Args:
+            data_dirs: Path to directory containing training data
+        """
+        assert data_dirs is not None, "Data directory cannot be None"
+        assert robot_type is not None, "Robot type cannot be None"
+        self.data_dirs = data_dirs
+        self.json_file = "data.json"
+
+        # Initialize paths and cache
+        self._init_paths()
+        self._init_cache()
+        self.json_state_data_name = ROBOT_CONFIGS[robot_type].json_state_data_name
+        self.json_action_data_name = ROBOT_CONFIGS[robot_type].json_action_data_name
+        self.camera_to_image_key = ROBOT_CONFIGS[robot_type].camera_to_image_key
+
+    def _init_paths(self) -> None:
+        """Initialize episode and task paths."""
+
+        self.episode_paths = []
+        self.task_paths = []
+
+        for task_path in glob.glob(os.path.join(self.data_dirs, "*")):
+            if os.path.isdir(task_path):
+                episode_paths = glob.glob(os.path.join(task_path, "*"))
+                if episode_paths:
+                    self.task_paths.append(task_path)
+                    self.episode_paths.extend(episode_paths)
+
+        self.episode_paths = sorted(self.episode_paths)
+        self.episode_ids = list(range(len(self.episode_paths)))
+
+    def __len__(self) -> int:
+        """Return the number of episodes in the dataset."""
+        return len(self.episode_paths)
+
+    def _init_cache(self) -> List:
+        """Initialize data cache if enabled."""
+
+        self.episodes_data_cached = []
+        for episode_path in tqdm.tqdm(self.episode_paths, desc="Loading Cache Json"):
+            json_path = os.path.join(episode_path, self.json_file)
+            with open(json_path, "r", encoding="utf-8") as jsonf:
+                self.episodes_data_cached.append(json.load(jsonf))
+
+        print(f"==> Cached {len(self.episodes_data_cached)} episodes")
+
+        return self.episodes_data_cached
+
+    def _extract_data(self, episode_data: Dict, key: str, parts: List[str]) -> np.ndarray:
+        """
+        Extract data from episode dictionary for specified parts.
+
+        Args:
+            episode_data: Dictionary containing episode data
+            key: Data key to extract ('states' or 'actions')
+            parts: List of parts to include ('left_arm', 'right_arm')
+
+        Returns:
+            Concatenated numpy array of the requested data
+        """
+        result = []
+        for sample_data in episode_data["data"]:
+            data_array = np.array([], dtype=np.float32)
+            for part in parts:
+                if part in sample_data[key] and sample_data[key][part] is not None:
+                    qpos = np.array(sample_data[key][part]["qpos"], dtype=np.float32)
+                    data_array = np.concatenate([data_array, qpos])
+            result.append(data_array)
+        return np.array(result)
+
+    def _parse_images(self, episode_path: str, episode_data) -> dict[str, list[np.ndarray]]:
+        """Load and stack images for a given camera key."""
+
+        images = defaultdict(list)
+
+        keys = episode_data["data"][0]["colors"].keys()
+        cameras = [key for key in keys if "depth" not in key]
+
+        for camera in cameras:
+            image_key = self.camera_to_image_key.get(camera)
+            if image_key is None:
+                continue
+
+            for sample_data in episode_data["data"]:
+                relative_path = sample_data["colors"].get(camera)
+                if not relative_path:
+                    continue
+
+                image_path = os.path.join(episode_path, relative_path)
+                if not os.path.exists(image_path):
+                    raise FileNotFoundError(f"Image path does not exist: {image_path}")
+
+                image = cv2.imread(image_path)
+                if image is None:
+                    raise RuntimeError(f"Failed to read image: {image_path}")
+
+                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+                images[image_key].append(image_rgb)
+
+        return images
+
+    def get_item(
+        self,
+        index: Optional[int] = None,
+    ) -> Dict:
+        """Get a training sample from the dataset."""
+
+        file_path = np.random.choice(self.episode_paths) if index is None else self.episode_paths[index]
+        episode_data = self.episodes_data_cached[index]
+
+        # Load state and action data
+        action = self._extract_data(episode_data, "actions", self.json_action_data_name)
+        state = self._extract_data(episode_data, "states", self.json_state_data_name)
+        episode_length = len(state)
+        state_dim = state.shape[1] if len(state.shape) == 2 else state.shape[0]
+        action_dim = action.shape[1] if len(action.shape) == 2 else state.shape[0]
+
+        # Load task description
+        task = episode_data.get("text", {}).get("goal", "")
+
+        # Load camera images
+        cameras = self._parse_images(file_path, episode_data)
+
+        # Extract camera configuration
+        cam_height, cam_width = next(img for imgs in cameras.values() if imgs for img in imgs).shape[:2]
+        data_cfg = {
+            "camera_names": list(cameras.keys()),
+            "cam_height": cam_height,
+            "cam_width": cam_width,
+            "state_dim": state_dim,
+            "action_dim": action_dim,
+        }
+
+        return {
+            "episode_index": index,
+            "episode_length": episode_length,
+            "state": state,
+            "action": action,
+            "cameras": cameras,
+            "task": task,
+            "data_cfg": data_cfg,
+        }
+
+
+def create_empty_dataset(
+    repo_id: str,
+    robot_type: str,
+    mode: Literal["video", "image"] = "video",
+    *,
+    has_velocity: bool = False,
+    has_effort: bool = False,
+    dataset_config: DatasetConfig = DEFAULT_DATASET_CONFIG,
+) -> LeRobotDataset:
+    motors = ROBOT_CONFIGS[robot_type].motors
+    cameras = ROBOT_CONFIGS[robot_type].cameras
+
+    features = {
+        "observation.state": {
+            "dtype": "float32",
+            "shape": (len(motors),),
+            "names": [
+                motors,
+            ],
+        },
+        "action": {
+            "dtype": "float32",
+            "shape": (len(motors),),
+            "names": [
+                motors,
+            ],
+        },
+    }
+
+    if has_velocity:
+        features["observation.velocity"] = {
+            "dtype": "float32",
+            "shape": (len(motors),),
+            "names": [
+                motors,
+            ],
+        }
+
+    if has_effort:
+        features["observation.effort"] = {
+            "dtype": "float32",
+            "shape": (len(motors),),
+            "names": [
+                motors,
+            ],
+        }
+
+    for cam in cameras:
+        features[f"observation.images.{cam}"] = {
+            "dtype": mode,
+            "shape": (480, 640, 3),
+            "names": [
+                "height",
+                "width",
+                "channel",
+            ],
+        }
+
+    if Path(HF_LEROBOT_HOME / repo_id).exists():
+        shutil.rmtree(HF_LEROBOT_HOME / repo_id)
+
+    return LeRobotDataset.create(
+        repo_id=repo_id,
+        fps=30,
+        robot_type=robot_type,
+        features=features,
+        use_videos=dataset_config.use_videos,
+        tolerance_s=dataset_config.tolerance_s,
+        image_writer_processes=dataset_config.image_writer_processes,
+        image_writer_threads=dataset_config.image_writer_threads,
+        video_backend=dataset_config.video_backend,
+    )
+
+
+def populate_dataset(
+    dataset: LeRobotDataset,
+    raw_dir: Path,
+    robot_type: str,
+) -> LeRobotDataset:
+    json_dataset = JsonDataset(raw_dir, robot_type)
+    for i in tqdm.tqdm(range(len(json_dataset))):
+        episode = json_dataset.get_item(i)
+
+        state = episode["state"]
+        action = episode["action"]
+        cameras = episode["cameras"]
+        task = episode["task"]
+        episode_length = episode["episode_length"]
+
+        num_frames = episode_length
+        for i in range(num_frames):
+            frame = {
+                "observation.state": state[i],
+                "action": action[i],
+            }
+
+            for camera, img_array in cameras.items():
+                frame[f"observation.images.{camera}"] = img_array[i]
+
+            dataset.add_frame(frame, task=task)
+
+        dataset.save_episode()
+
+    return dataset
+
+
+def json_to_lerobot(
+    raw_dir: Path,
+    repo_id: str,
+    robot_type: str,  # e.g., Unitree_Z1_Single, Unitree_Z1_Dual, Unitree_G1_Dex1, Unitree_G1_Dex3, Unitree_G1_Brainco, Unitree_G1_Inspire
+    *,
+    push_to_hub: bool = False,
+    mode: Literal["video", "image"] = "video",
+    dataset_config: DatasetConfig = DEFAULT_DATASET_CONFIG,
+):
+    if (HF_LEROBOT_HOME / repo_id).exists():
+        shutil.rmtree(HF_LEROBOT_HOME / repo_id)
+
+    dataset = create_empty_dataset(
+        repo_id,
+        robot_type=robot_type,
+        mode=mode,
+        has_effort=False,
+        has_velocity=False,
+        dataset_config=dataset_config,
+    )
+    dataset = populate_dataset(
+        dataset,
+        raw_dir,
+        robot_type=robot_type,
+    )
+
+    if push_to_hub:
+        dataset.push_to_hub(upload_large_folder=True)
+
+
+def local_push_to_hub(
+    repo_id: str,
+    root_path: Path,
+):
+    dataset = LeRobotDataset(repo_id=repo_id, root=root_path)
+    dataset.push_to_hub(upload_large_folder=True)
+
+
+if __name__ == "__main__":
+    tyro.cli(json_to_lerobot)
@@ -0,0 +1,40 @@
+"""
+Script to convert Unitree json data to the LeRobot dataset v2.0 format.
+
+python unitree_lerobot/utils/sort_and_rename_folders.py --data_dir $HOME/datasets/g1_grabcube_double_hand
+"""
+
+import os
+import tyro
+import uuid
+from pathlib import Path
+
+
+def sort_and_rename_folders(data_dir: Path) -> None:
+    # Get the list of folders sorted by name
+    folders = sorted([f for f in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, f))])
+
+    temp_mapping = {}
+
+    # First, rename all folders to unique temporary names
+    for folder in folders:
+        temp_name = str(uuid.uuid4())
+        original_path = os.path.join(data_dir, folder)
+        temp_path = os.path.join(data_dir, temp_name)
+        os.rename(original_path, temp_path)
+        temp_mapping[temp_name] = folder
+
+    # Then, rename them to the final target names
+    start_number = 0
+    for temp_name, original_folder in temp_mapping.items():
+        new_folder_name = f"episode_{start_number:04d}"
+        temp_path = os.path.join(data_dir, temp_name)
+        new_path = os.path.join(data_dir, new_folder_name)
+        os.rename(temp_path, new_path)
+        start_number += 1
+
+    print("The folders have been successfully renamed.")
+
+
+if __name__ == "__main__":
+    tyro.cli(sort_and_rename_folders)