diff --git a/examples/behavior_1k/behavior_lerobot_dataset_v3.py b/examples/behavior_1k/behavior_lerobot_dataset_v3.py new file mode 100644 index 000000000..74f2b0fa2 --- /dev/null +++ b/examples/behavior_1k/behavior_lerobot_dataset_v3.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python + +import json +import numpy as np +import torch as th +from pathlib import Path +from typing import Dict, Any + +from lerobot.datasets.lerobot_dataset import LeRobotDataset + +from .behaviour_1k_constants import ( + TASK_INDICES_TO_NAMES, + ROBOT_CAMERA_NAMES, + PROPRIOCEPTION_INDICES, + BEHAVIOR_DATASET_FEATURES, +) + +import logging +from lerobot.utils.utils import init_logging + +init_logging() + +class BehaviorLeRobotDatasetV3(LeRobotDataset): + """ + Extends LeRobotDataset v3.0 for BEHAVIOR-1K specific requirements. + Handles task-based episode organization and BEHAVIOR-1K metadata. + """ + @classmethod + def create( + cls, + repo_id: str, + fps: int = 30, + root: str | Path | None = None, + robot_type: str = "R1Pro", + use_videos: bool = True, + video_backend: str = "pyav", + batch_encoding_size: int = 1, + image_writer_processes: int = 0, + image_writer_threads: int = 4, + ) -> "BehaviorLeRobotDatasetV3": + """ + Create a new BEHAVIOR-1K dataset in v3.0 format. + + Args: + repo_id: HuggingFace repository ID + fps: Frames per second (default: 30) + root: Local directory for the dataset + robot_type: Robot type (default: "R1Pro") + use_videos: Whether to encode videos (default: True) + video_backend: Video backend to use (default: "pyav") + batch_encoding_size: Number of episodes to batch before encoding videos + image_writer_processes: Number of processes for async image writing + image_writer_threads: Number of threads per process for image writing + + Returns: + BehaviorLeRobotDatasetV3 instance + """ + # Create the dataset using parent class method with BEHAVIOR-1K features + obj = super().create( + repo_id=repo_id, + fps=fps, + features=BEHAVIOR_DATASET_FEATURES, + root=root, + robot_type=robot_type, + use_videos=use_videos, + tolerance_s=1e-4, + image_writer_processes=image_writer_processes, + image_writer_threads=image_writer_threads, + video_backend=video_backend, + batch_encoding_size=batch_encoding_size, + ) + + # Convert to BehaviorLeRobotDatasetV3 instance + obj.__class__ = cls + + # Initialize BEHAVIOR-1K specific attributes + obj.task_episode_mapping = {} # Maps task_id to list of episode indices + obj.episode_task_mapping = {} # Maps episode_index to task info + + # Additional metadata for BEHAVIOR-1K + obj.behavior_metadata = { + "robot_type": robot_type, + "task_names": TASK_INDICES_TO_NAMES, + "proprioception_indices": PROPRIOCEPTION_INDICES[robot_type], + "camera_names": ROBOT_CAMERA_NAMES[robot_type], + } + + logging.info(f"Created BehaviorLeRobotDatasetV3 with repo_id: {repo_id}") + return obj + + def __init__(self, *args, **kwargs): + """ + Initialize from existing dataset. + Use the create() classmethod to create a new dataset. + """ + super().__init__(*args, **kwargs) + + # Initialize BEHAVIOR-1K specific attributes for loading existing datasets + self.task_episode_mapping = {} + self.episode_task_mapping = {} + self.behavior_metadata = {} + + # Try to load BEHAVIOR-1K metadata if it exists + metadata_path = self.root / "meta" / "behavior_metadata.json" + if metadata_path.exists(): + with open(metadata_path, "r") as f: + stored_metadata = json.load(f) + self.behavior_metadata = stored_metadata + self.task_episode_mapping = stored_metadata.get("task_episode_mapping", {}) + self.episode_task_mapping = stored_metadata.get("episode_task_mapping", {}) + + def add_episode_from_hdf5( + self, + hdf5_data: Dict[str, Any], + task_id: int, + episode_id: int, + include_videos: bool = True, + ) -> None: + """ + Add an episode from HDF5 data to the dataset. + + Args: + hdf5_data: Dictionary containing the HDF5 episode data + task_id: Task ID for this episode + episode_id: Episode ID (should be task_id * 10000 + local_episode_id) + include_videos: Whether to include video data + """ + task_name = TASK_INDICES_TO_NAMES[task_id] + num_frames = len(hdf5_data["action"]) + + logging.info(f"Adding episode {episode_id} (task: {task_name}) with {num_frames} frames") + + # Process each frame + for frame_idx in range(num_frames): + frame_data = { + "action": hdf5_data["action"][frame_idx], + "observation.state": hdf5_data["obs"]["robot_r1::proprio"][frame_idx], + "observation.cam_rel_poses": hdf5_data["obs"]["robot_r1::cam_rel_poses"][frame_idx], + "observation.task_info": hdf5_data["obs"]["task::low_dim"][frame_idx], + "task": task_name, + "timestamp": frame_idx / self.fps, + } + + # Add video frames if requested + if include_videos: + for modality in ["rgb", "depth_linear", "seg_instance_id"]: + # Map depth_linear to depth for consistency + output_modality = "depth" if modality == "depth_linear" else modality + + for camera_name, robot_camera_name in ROBOT_CAMERA_NAMES[self.robot_type].items(): + key = f"observation.images.{output_modality}.{camera_name}" + hdf5_key = f"{robot_camera_name}::{modality}" + + if hdf5_key in hdf5_data["obs"]: + # Get the frame data + frame = hdf5_data["obs"][hdf5_key][frame_idx] + + # Handle different data types + if isinstance(frame, th.Tensor): + frame = frame.numpy() + + # Ensure correct shape + if modality == "seg_instance_id" and len(frame.shape) == 2: + # Add channel dimension for grayscale + frame = np.expand_dims(frame, axis=-1) + elif modality == "depth_linear" and len(frame.shape) == 2: + frame = np.expand_dims(frame, axis=-1) + + frame_data[key] = frame + + # Add frame to dataset + self.add_frame(frame_data) + + # Save episode with metadata + episode_metadata = { + "task_id": task_id, + "task_name": task_name, + "original_episode_id": episode_id, + } + + # Add any additional HDF5 attributes as metadata + if "attrs" in hdf5_data: + for attr_name, attr_value in hdf5_data["attrs"].items(): + if isinstance(attr_value, (list, np.ndarray)): + episode_metadata[attr_name] = list(attr_value) + else: + episode_metadata[attr_name] = attr_value + + # Save the episode + self.save_episode(episode_data=None) + + # Track task-episode mapping + if task_id not in self.task_episode_mapping: + self.task_episode_mapping[task_id] = [] + self.task_episode_mapping[task_id].append(self.num_episodes - 1) + self.episode_task_mapping[self.num_episodes - 1] = { + "task_id": task_id, + "task_name": task_name, + "original_episode_id": episode_id, + } + + def finalize(self) -> None: + """Finalize the dataset and save additional BEHAVIOR-1K metadata.""" + # Save BEHAVIOR-1K specific metadata + metadata_path = self.root / "meta" / "behavior_metadata.json" + metadata_path.parent.mkdir(parents=True, exist_ok=True) + + self.behavior_metadata.update({ + "task_episode_mapping": self.task_episode_mapping, + "episode_task_mapping": self.episode_task_mapping, + "total_tasks": len(self.task_episode_mapping), + "total_episodes": self.num_episodes, + "total_frames": self.num_frames, + }) + + with open(metadata_path, "w") as f: + json.dump(self.behavior_metadata, f, indent=2) + + # Finalize the parent dataset + super().finalize() + + logging.info(f"Finalized dataset with {self.num_episodes} episodes " + f"and {self.num_frames} frames across {len(self.task_episode_mapping)} tasks") diff --git a/examples/behavior_1k/behaviour_1k_constants.py b/examples/behavior_1k/behaviour_1k_constants.py new file mode 100644 index 000000000..c0f8191d5 --- /dev/null +++ b/examples/behavior_1k/behaviour_1k_constants.py @@ -0,0 +1,320 @@ +import numpy as np +import torch as th +from collections import OrderedDict + + +ROBOT_CAMERA_NAMES = { + "A1": { + "external": "external::external_camera", + "wrist": "external::wrist_camera", + }, + "R1Pro": { + "left_wrist": "robot_r1::robot_r1:left_realsense_link:Camera:0", + "right_wrist": "robot_r1::robot_r1:right_realsense_link:Camera:0", + "head": "robot_r1::robot_r1:zed_link:Camera:0", + }, +} + +# Camera resolutions and corresponding intrinstics +HEAD_RESOLUTION = (720, 720) +WRIST_RESOLUTION = (480, 480) +# TODO: Fix A1 +CAMERA_INTRINSICS = { + "A1": { + "external": np.array([[306.0, 0.0, 360.0], [0.0, 306.0, 360.0], [0.0, 0.0, 1.0]], dtype=np.float32), # 240x240 + "wrist": np.array( + [[388.6639, 0.0, 240.0], [0.0, 388.6639, 240.0], [0.0, 0.0, 1.0]], dtype=np.float32 + ), # 240x240 + }, + "R1Pro": { + "head": np.array([[306.0, 0.0, 360.0], [0.0, 306.0, 360.0], [0.0, 0.0, 1.0]], dtype=np.float32), # 720x720 + "left_wrist": np.array( + [[388.6639, 0.0, 240.0], [0.0, 388.6639, 240.0], [0.0, 0.0, 1.0]], dtype=np.float32 + ), # 480x480 + "right_wrist": np.array( + [[388.6639, 0.0, 240.0], [0.0, 388.6639, 240.0], [0.0, 0.0, 1.0]], dtype=np.float32 + ), # 480x480 + }, +} + + +# Dataset features for BEHAVIOR-1K LeRobotDataset v3.0 +BEHAVIOR_DATASET_FEATURES = { + # Actions + "action": { + "dtype": "float32", + "shape": (23,), # 23-dimensional action space for R1Pro + "names": None, + }, + # Proprioception + "observation.state": { + "dtype": "float32", + "shape": (256,), # Full proprioception state + "names": None, + }, + # Camera relative poses + "observation.cam_rel_poses": { + "dtype": "float32", + "shape": (21,), # 3 cameras * 7 (pos + quat) + "names": None, + }, + # Task information + "observation.task_info": { + "dtype": "float32", + "shape": (None,), # Variable size + "names": None, + }, + # RGB images + "observation.images.rgb.head": { + "dtype": "video", + "shape": [720, 720, 3], + "names": ["height", "width", "channels"], + }, + "observation.images.rgb.left_wrist": { + "dtype": "video", + "shape": [480, 480, 3], + "names": ["height", "width", "channels"], + }, + "observation.images.rgb.right_wrist": { + "dtype": "video", + "shape": [480, 480, 3], + "names": ["height", "width", "channels"], + }, + # Depth images + "observation.images.depth.head": { + "dtype": "video", + "shape": [720, 720, 1], + "names": ["height", "width", "channels"], + }, + "observation.images.depth.left_wrist": { + "dtype": "video", + "shape": [480, 480, 1], + "names": ["height", "width", "channels"], + }, + "observation.images.depth.right_wrist": { + "dtype": "video", + "shape": [480, 480, 1], + "names": ["height", "width", "channels"], + }, + # Segmentation instance ID images + "observation.images.seg_instance_id.head": { + "dtype": "video", + "shape": [720, 720, 1], + "names": ["height", "width", "channels"], + }, + "observation.images.seg_instance_id.left_wrist": { + "dtype": "video", + "shape": [480, 480, 1], + "names": ["height", "width", "channels"], + }, + "observation.images.seg_instance_id.right_wrist": { + "dtype": "video", + "shape": [480, 480, 1], + "names": ["height", "width", "channels"], + }, +} + + +# Action indices +ACTION_QPOS_INDICES = { + "A1": OrderedDict( + { + "arm": np.s_[0:6], + "gripper": np.s_[6:7], + } + ), + "R1Pro": OrderedDict( + { + "base": np.s_[0:3], + "torso": np.s_[3:7], + "left_arm": np.s_[7:14], + "left_gripper": np.s_[14:15], + "right_arm": np.s_[15:22], + "right_gripper": np.s_[22:23], + } + ), +} + + +# Proprioception configuration +PROPRIOCEPTION_INDICES = { + "A1": OrderedDict( + { + "joint_qpos": np.s_[0:8], + "joint_qpos_sin": np.s_[8:16], + "joint_qpos_cos": np.s_[16:24], + "joint_qvel": np.s_[24:32], + "joint_qeffort": np.s_[32:40], + "eef_0_pos": np.s_[40:43], + "eef_0_quat": np.s_[43:47], + "grasp_0": np.s_[47:48], + "gripper_0_qpos": np.s_[48:50], + "gripper_0_qvel": np.s_[50:52], + } + ), + "R1Pro": OrderedDict( + { + "joint_qpos": np.s_[ + 0:28 + ], # Full robot joint positions, the first 6 are base joints, which is NOT allowed in standard track + "joint_qpos_sin": np.s_[ + 28:56 + ], # Full robot joint positions, the first 6 are base joints, which is NOT allowed in standard track + "joint_qpos_cos": np.s_[ + 56:84 + ], # Full robot joint positions, the first 6 are base joints, which is NOT allowed in standard track + "joint_qvel": np.s_[84:112], + "joint_qeffort": np.s_[112:140], + "robot_pos": np.s_[140:143], # Global pos, this is NOT allowed in standard track + "robot_ori_cos": np.s_[143:146], # Global ori, this is NOT allowed in standard track + "robot_ori_sin": np.s_[146:149], # Global ori, this is NOT allowed in standard track + "robot_2d_ori": np.s_[149:150], # 2D global ori, this is NOT allowed in standard track + "robot_2d_ori_cos": np.s_[150:151], # 2D global ori, this is NOT allowed in standard track + "robot_2d_ori_sin": np.s_[151:152], # 2D global ori, this is NOT allowed in standard track + "robot_lin_vel": np.s_[152:155], + "robot_ang_vel": np.s_[155:158], + "arm_left_qpos": np.s_[158:165], + "arm_left_qpos_sin": np.s_[165:172], + "arm_left_qpos_cos": np.s_[172:179], + "arm_left_qvel": np.s_[179:186], + "eef_left_pos": np.s_[186:189], + "eef_left_quat": np.s_[189:193], + "gripper_left_qpos": np.s_[193:195], + "gripper_left_qvel": np.s_[195:197], + "arm_right_qpos": np.s_[197:204], + "arm_right_qpos_sin": np.s_[204:211], + "arm_right_qpos_cos": np.s_[211:218], + "arm_right_qvel": np.s_[218:225], + "eef_right_pos": np.s_[225:228], + "eef_right_quat": np.s_[228:232], + "gripper_right_qpos": np.s_[232:234], + "gripper_right_qvel": np.s_[234:236], + "trunk_qpos": np.s_[236:240], + "trunk_qvel": np.s_[240:244], + "base_qpos": np.s_[244:247], # Base joint position, this is NOT allowed in standard track + "base_qpos_sin": np.s_[247:250], # Base joint position, this is NOT allowed in standard track + "base_qpos_cos": np.s_[250:253], # Base joint position, this is NOT allowed in standard track + "base_qvel": np.s_[253:256], + } + ), +} + +# Proprioception indices +PROPRIO_QPOS_INDICES = { + "A1": OrderedDict( + { + "arm": np.s_[0:6], + "gripper": np.s_[6:8], + } + ), + "R1Pro": OrderedDict( + { + "torso": np.s_[6:10], + "left_arm": np.s_[10:24:2], + "right_arm": np.s_[11:24:2], + "left_gripper": np.s_[24:26], + "right_gripper": np.s_[26:28], + } + ), +} + + +# Joint limits (lower, upper) +JOINT_RANGE = { + "A1": { + "arm": ( + th.tensor([-2.8798, 0.0, -3.3161, -2.8798, -1.6581, -2.8798], dtype=th.float32), + th.tensor([2.8798, 3.1415, 0.0, 2.8798, 1.6581, 2.8798], dtype=th.float32), + ), + "gripper": (th.tensor([0.00], dtype=th.float32), th.tensor([0.03], dtype=th.float32)), + }, + "R1Pro": { + "base": (th.tensor([-0.75, -0.75, -1.0], dtype=th.float32), th.tensor([0.75, 0.75, 1.0], dtype=th.float32)), + "torso": ( + th.tensor([-1.1345, -2.7925, -1.8326, -3.0543], dtype=th.float32), + th.tensor([1.8326, 2.5307, 1.5708, 3.0543], dtype=th.float32), + ), + "left_arm": ( + th.tensor([-4.4506, -0.1745, -2.3562, -2.0944, -2.3562, -1.0472, -1.5708], dtype=th.float32), + th.tensor([1.3090, 3.1416, 2.3562, 0.3491, 2.3562, 1.0472, 1.5708], dtype=th.float32), + ), + "left_gripper": (th.tensor([0.00], dtype=th.float32), th.tensor([0.05], dtype=th.float32)), + "right_arm": ( + th.tensor([-4.4506, -3.1416, -2.3562, -2.0944, -2.3562, -1.0472, -1.5708], dtype=th.float32), + th.tensor([1.3090, 0.1745, 2.3562, 0.3491, 2.3562, 1.0472, 1.5708], dtype=th.float32), + ), + "right_gripper": (th.tensor([0.00], dtype=th.float32), th.tensor([0.05], dtype=th.float32)), + }, +} + + +EEF_POSITION_RANGE = { + "A1": { + "0": (th.tensor([0.0, -0.7, 0.0], dtype=th.float32), th.tensor([0.7, 0.7, 0.7], dtype=th.float32)), + }, + "R1Pro": { + "left": (th.tensor([0.0, -0.65, 0.0], dtype=th.float32), th.tensor([0.65, 0.65, 2.5], dtype=th.float32)), + "right": (th.tensor([0.0, -0.65, 0.0], dtype=th.float32), th.tensor([0.65, 0.65, 2.5], dtype=th.float32)), + }, +} + + +TASK_NAMES_TO_INDICES = { + # B10 + "turning_on_radio": 0, + "picking_up_trash": 1, + "putting_away_Halloween_decorations": 2, + "cleaning_up_plates_and_food": 3, + "can_meat": 4, + "setting_mousetraps": 5, + "hiding_Easter_eggs": 6, + "picking_up_toys": 7, + "rearranging_kitchen_furniture": 8, + "putting_up_Christmas_decorations_inside": 9, + # B20 + "set_up_a_coffee_station_in_your_kitchen": 10, + "putting_dishes_away_after_cleaning": 11, + "preparing_lunch_box": 12, + "loading_the_car": 13, + "carrying_in_groceries": 14, + "bringing_in_wood": 15, + "moving_boxes_to_storage": 16, + "bringing_water": 17, + "tidying_bedroom": 18, + "outfit_a_basic_toolbox": 19, + # B30 + "sorting_vegetables": 20, + "collecting_childrens_toys": 21, + "putting_shoes_on_rack": 22, + "boxing_books_up_for_storage": 23, + "storing_food": 24, + "clearing_food_from_table_into_fridge": 25, + "assembling_gift_baskets": 26, + "sorting_household_items": 27, + "getting_organized_for_work": 28, + "clean_up_your_desk": 29, + # B40 + "setting_the_fire": 30, + "clean_boxing_gloves": 31, + "wash_a_baseball_cap": 32, + "wash_dog_toys": 33, + "hanging_pictures": 34, + "attach_a_camera_to_a_tripod": 35, + "clean_a_patio": 36, + "clean_a_trumpet": 37, + "spraying_for_bugs": 38, + "spraying_fruit_trees": 39, + # B50 + "make_microwave_popcorn": 40, + "cook_cabbage": 41, + "chop_an_onion": 42, + "slicing_vegetables": 43, + "chopping_wood": 44, + "cook_hot_dogs": 45, + "cook_bacon": 46, + "freeze_pies": 47, + "canning_food": 48, + "make_pizza": 49, +} +TASK_INDICES_TO_NAMES = {v: k for k, v in TASK_NAMES_TO_INDICES.items()} + diff --git a/examples/behavior_1k/convert_to_lerobot_v3.py b/examples/behavior_1k/convert_to_lerobot_v3.py new file mode 100755 index 000000000..4c9c4865e --- /dev/null +++ b/examples/behavior_1k/convert_to_lerobot_v3.py @@ -0,0 +1,283 @@ +#!/usr/bin/env python + +import argparse +import h5py +import numpy as np +import os +import torch as th +from pathlib import Path +from tqdm import tqdm +import logging + +from .behavior_lerobot_dataset_v3 import BehaviorLeRobotDatasetV3 +from .behaviour_1k_constants import TASK_NAMES_TO_INDICES, TASK_INDICES_TO_NAMES, BEHAVIOR_DATASET_FEATURES + +from lerobot.utils.utils import init_logging + +init_logging() + +def load_hdf5_episode(hdf5_path: str, episode_id: int = 0) -> dict: + """ + Load episode data from HDF5 file. + + Args: + hdf5_path: Path to the HDF5 file + episode_id: Episode ID to load (default: 0) + + Returns: + Dictionary containing episode data + """ + episode_data = {} + + with h5py.File(hdf5_path, "r") as f: + # Find the episode with most samples if episode_id not specified + if episode_id == -1: + num_samples = [f["data"][key].attrs["num_samples"] for key in f["data"].keys()] + episode_id = num_samples.index(max(num_samples)) + + demo_key = f"demo_{episode_id}" + if demo_key not in f["data"]: + raise ValueError(f"Episode {episode_id} not found in {hdf5_path}") + + demo_data = f["data"][demo_key] + + # Load actions + episode_data["action"] = np.array(demo_data["action"][:]) + + # Load observations + episode_data["obs"] = {} + for key in demo_data["obs"].keys(): + episode_data["obs"][key] = np.array(demo_data["obs"][key][:]) + + # Load attributes + episode_data["attrs"] = {} + for attr_name in demo_data.attrs: + episode_data["attrs"][attr_name] = demo_data.attrs[attr_name] + + # Add global attributes + for attr_name in f["data"].attrs: + episode_data["attrs"][f"global_{attr_name}"] = f["data"].attrs[attr_name] + + return episode_data + + +def convert_episode( + data_folder: str, + output_repo_id: str, + task_id: int, + demo_id: int, + dataset: BehaviorLeRobotDatasetV3, + include_videos: bool = True, + include_segmentation: bool = True, +) -> None: + """ + Convert a single episode from HDF5 to LeRobotDataset v3.0 format. + + Args: + data_folder: Base data folder containing HDF5 files + output_repo_id: Output repository ID for the dataset + task_id: Task ID + demo_id: Demo ID (episode ID) + dataset: BehaviorLeRobotDatasetV3 instance to add data to + include_videos: Whether to include video data + include_segmentation: Whether to include segmentation data + """ + # Construct paths + task_name = TASK_INDICES_TO_NAMES[task_id] + hdf5_path = f"{data_folder}/2025-challenge-rawdata/task-{task_id:04d}/episode_{demo_id:08d}.hdf5" + + if not os.path.exists(hdf5_path): + logging.error(f"HDF5 file not found: {hdf5_path}") + return + + logging.info(f"Converting episode {demo_id} from task {task_name}") + + # Load episode data + try: + episode_data = load_hdf5_episode(hdf5_path, episode_id=0) + except Exception as e: + logging.error(f"Failed to load episode data: {e}") + return + + # Filter out segmentation if not requested + if not include_segmentation: + keys_to_remove = [k for k in episode_data["obs"].keys() if "seg_instance_id" in k] + for key in keys_to_remove: + del episode_data["obs"][key] + + # Add episode to dataset + dataset.add_episode_from_hdf5( + hdf5_data=episode_data, + task_id=task_id, + episode_id=demo_id, + include_videos=include_videos, + ) + + +def convert_dataset( + data_folder: str, + output_repo_id: str, + task_names: list = None, + episode_ids: list = None, + max_episodes_per_task: int = None, + include_videos: bool = True, + include_segmentation: bool = True, + fps: int = 30, + batch_encoding_size: int = 1, + image_writer_processes: int = 0, + image_writer_threads: int = 4, + push_to_hub: bool = False, +) -> None: + """ + Convert BEHAVIOR-1K dataset from HDF5 to LeRobotDataset v3.0 format. + + Args: + data_folder: Base folder containing HDF5 data + output_repo_id: Output repository ID (e.g., "username/dataset-name") + task_names: List of task names to convert (None = all tasks) + episode_ids: Specific episode IDs to convert (None = all episodes) + max_episodes_per_task: Maximum episodes per task to convert + include_videos: Whether to include video data + include_segmentation: Whether to include segmentation data + fps: Frames per second + batch_encoding_size: Number of episodes to batch before encoding + image_writer_processes: Number of processes for image writing + image_writer_threads: Number of threads for image writing + push_to_hub: Whether to push to HuggingFace Hub + """ + # Create output directory + output_dir = Path.home() / ".cache/huggingface/lerobot" / output_repo_id + output_dir.mkdir(parents=True, exist_ok=True) + + logging.info(f"Converting dataset to: {output_dir}") + + # Initialize dataset + dataset = BehaviorLeRobotDatasetV3.create( + repo_id=output_repo_id, + root=output_dir, + fps=fps, + robot_type="R1Pro", + use_videos=include_videos, + video_backend="pyav", + batch_encoding_size=batch_encoding_size, + image_writer_processes=image_writer_processes, + image_writer_threads=image_writer_threads, + ) + + # Determine which tasks to process + if task_names is None: + task_names = list(TASK_NAMES_TO_INDICES.keys()) + + task_ids = [TASK_NAMES_TO_INDICES[name] for name in task_names] + + # Process each task + total_episodes = 0 + for task_id in tqdm(task_ids, desc="Processing tasks"): + task_name = TASK_INDICES_TO_NAMES[task_id] + task_folder = f"{data_folder}/2025-challenge-rawdata/task-{task_id:04d}" + + if not os.path.exists(task_folder): + logging.warning(f"Task folder not found: {task_folder}") + continue + + # Find all episodes for this task + if episode_ids is not None: + # Use specified episode IDs + task_episode_ids = [eid for eid in episode_ids if eid // 10000 == task_id] + else: + # Find all episodes in the task folder + task_episode_ids = [] + for filename in os.listdir(task_folder): + if filename.startswith("episode_") and filename.endswith(".hdf5"): + eid = int(filename.split("_")[1].split(".")[0]) + task_episode_ids.append(eid) + task_episode_ids.sort() + + # Limit episodes if requested + if max_episodes_per_task is not None: + task_episode_ids = task_episode_ids[:max_episodes_per_task] + + logging.info(f"Processing {len(task_episode_ids)} episodes for task {task_name}") + + # Convert each episode + for demo_id in tqdm(task_episode_ids, desc=f"Task {task_name}", leave=False): + try: + convert_episode( + data_folder=data_folder, + output_repo_id=output_repo_id, + task_id=task_id, + demo_id=demo_id, + dataset=dataset, + include_videos=include_videos, + include_segmentation=include_segmentation, + ) + total_episodes += 1 + except Exception as e: + logging.error(f"Failed to convert episode {demo_id}: {e}") + continue + + logging.info(f"Converted {total_episodes} episodes total") + + # Finalize dataset + logging.info("Finalizing dataset...") + dataset.finalize() + + # Push to hub if requested + if push_to_hub: + logging.info("Pushing dataset to HuggingFace Hub...") + dataset.push_to_hub( + private=True, + license="apache-2.0", + ) + + logging.info("Conversion complete!") + + +def main(): + parser = argparse.ArgumentParser(description="Convert BEHAVIOR-1K data to LeRobotDataset v3.0") + parser.add_argument("--data_folder", type=str, required=True, help="Path to the data folder") + parser.add_argument("--output_repo_id", type=str, required=True, + help="Output repository ID (e.g., 'username/behavior-dataset-v3')") + parser.add_argument("--task_names", type=str, nargs="+", default=None, + help="Task names to convert (default: all)") + parser.add_argument("--episode_ids", type=int, nargs="+", default=None, + help="Specific episode IDs to convert") + parser.add_argument("--max_episodes_per_task", type=int, default=None, + help="Maximum episodes per task to convert") + parser.add_argument("--no_videos", action="store_true", + help="Exclude video data") + parser.add_argument("--no_segmentation", action="store_true", + help="Exclude segmentation data") + parser.add_argument("--fps", type=int, default=30, + help="Frames per second (default: 30)") + parser.add_argument("--batch_encoding_size", type=int, default=1, + help="Number of episodes to batch before encoding videos") + parser.add_argument("--image_writer_processes", type=int, default=0, + help="Number of processes for async image writing") + parser.add_argument("--image_writer_threads", type=int, default=4, + help="Number of threads for image writing") + parser.add_argument("--push_to_hub", action="store_true", + help="Push dataset to HuggingFace Hub") + + args = parser.parse_args() + + # Convert dataset + convert_dataset( + data_folder=args.data_folder, + output_repo_id=args.output_repo_id, + task_names=args.task_names, + episode_ids=args.episode_ids, + max_episodes_per_task=args.max_episodes_per_task, + include_videos=not args.no_videos, + include_segmentation=not args.no_segmentation, + fps=args.fps, + batch_encoding_size=args.batch_encoding_size, + image_writer_processes=args.image_writer_processes, + image_writer_threads=args.image_writer_threads, + push_to_hub=args.push_to_hub, + ) + + + +if __name__ == "__main__": + main()