add scripts for convert behavior-1k to datasetv3

2026-07-08 10:32:00 +00:00 · 2025-10-24 14:17:30 +02:00
parent 0f551df8f4
commit 950a6fb83d
3 changed files with 826 additions and 0 deletions
@@ -0,0 +1,223 @@
+#!/usr/bin/env python
+
+import json
+import numpy as np
+import torch as th
+from pathlib import Path
+from typing import Dict, Any
+
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+
+from .behaviour_1k_constants import (
+    TASK_INDICES_TO_NAMES,
+    ROBOT_CAMERA_NAMES,
+    PROPRIOCEPTION_INDICES,
+    BEHAVIOR_DATASET_FEATURES,
+)
+
+import logging
+from lerobot.utils.utils import init_logging
+
+init_logging()
+
+class BehaviorLeRobotDatasetV3(LeRobotDataset):
+    """
+    Extends LeRobotDataset v3.0 for BEHAVIOR-1K specific requirements.
+    Handles task-based episode organization and BEHAVIOR-1K metadata.
+    """
+    @classmethod
+    def create(
+        cls,
+        repo_id: str,
+        fps: int = 30,
+        root: str | Path | None = None,
+        robot_type: str = "R1Pro",
+        use_videos: bool = True,
+        video_backend: str = "pyav",
+        batch_encoding_size: int = 1,
+        image_writer_processes: int = 0,
+        image_writer_threads: int = 4,
+    ) -> "BehaviorLeRobotDatasetV3":
+        """
+        Create a new BEHAVIOR-1K dataset in v3.0 format.
+        
+        Args:
+            repo_id: HuggingFace repository ID
+            fps: Frames per second (default: 30)
+            root: Local directory for the dataset
+            robot_type: Robot type (default: "R1Pro")
+            use_videos: Whether to encode videos (default: True)
+            video_backend: Video backend to use (default: "pyav")
+            batch_encoding_size: Number of episodes to batch before encoding videos
+            image_writer_processes: Number of processes for async image writing
+            image_writer_threads: Number of threads per process for image writing
+            
+        Returns:
+            BehaviorLeRobotDatasetV3 instance
+        """
+        # Create the dataset using parent class method with BEHAVIOR-1K features
+        obj = super().create(
+            repo_id=repo_id,
+            fps=fps,
+            features=BEHAVIOR_DATASET_FEATURES,
+            root=root,
+            robot_type=robot_type,
+            use_videos=use_videos,
+            tolerance_s=1e-4,
+            image_writer_processes=image_writer_processes,
+            image_writer_threads=image_writer_threads,
+            video_backend=video_backend,
+            batch_encoding_size=batch_encoding_size,
+        )
+        
+        # Convert to BehaviorLeRobotDatasetV3 instance
+        obj.__class__ = cls
+        
+        # Initialize BEHAVIOR-1K specific attributes
+        obj.task_episode_mapping = {}  # Maps task_id to list of episode indices
+        obj.episode_task_mapping = {}  # Maps episode_index to task info
+        
+        # Additional metadata for BEHAVIOR-1K
+        obj.behavior_metadata = {
+            "robot_type": robot_type,
+            "task_names": TASK_INDICES_TO_NAMES,
+            "proprioception_indices": PROPRIOCEPTION_INDICES[robot_type],
+            "camera_names": ROBOT_CAMERA_NAMES[robot_type],
+        }
+        
+        logging.info(f"Created BehaviorLeRobotDatasetV3 with repo_id: {repo_id}")
+        return obj
+    
+    def __init__(self, *args, **kwargs):
+        """
+        Initialize from existing dataset.
+        Use the create() classmethod to create a new dataset.
+        """
+        super().__init__(*args, **kwargs)
+        
+        # Initialize BEHAVIOR-1K specific attributes for loading existing datasets
+        self.task_episode_mapping = {}
+        self.episode_task_mapping = {}
+        self.behavior_metadata = {}
+        
+        # Try to load BEHAVIOR-1K metadata if it exists
+        metadata_path = self.root / "meta" / "behavior_metadata.json"
+        if metadata_path.exists():
+            with open(metadata_path, "r") as f:
+                stored_metadata = json.load(f)
+                self.behavior_metadata = stored_metadata
+                self.task_episode_mapping = stored_metadata.get("task_episode_mapping", {})
+                self.episode_task_mapping = stored_metadata.get("episode_task_mapping", {})
+    
+    def add_episode_from_hdf5(
+        self,
+        hdf5_data: Dict[str, Any],
+        task_id: int,
+        episode_id: int,
+        include_videos: bool = True,
+    ) -> None:
+        """
+        Add an episode from HDF5 data to the dataset.
+        
+        Args:
+            hdf5_data: Dictionary containing the HDF5 episode data
+            task_id: Task ID for this episode
+            episode_id: Episode ID (should be task_id * 10000 + local_episode_id)
+            include_videos: Whether to include video data
+        """
+        task_name = TASK_INDICES_TO_NAMES[task_id]
+        num_frames = len(hdf5_data["action"])
+        
+        logging.info(f"Adding episode {episode_id} (task: {task_name}) with {num_frames} frames")
+        
+        # Process each frame
+        for frame_idx in range(num_frames):
+            frame_data = {
+                "action": hdf5_data["action"][frame_idx],
+                "observation.state": hdf5_data["obs"]["robot_r1::proprio"][frame_idx],
+                "observation.cam_rel_poses": hdf5_data["obs"]["robot_r1::cam_rel_poses"][frame_idx],
+                "observation.task_info": hdf5_data["obs"]["task::low_dim"][frame_idx],
+                "task": task_name,
+                "timestamp": frame_idx / self.fps,
+            }
+            
+            # Add video frames if requested
+            if include_videos:
+                for modality in ["rgb", "depth_linear", "seg_instance_id"]:
+                    # Map depth_linear to depth for consistency
+                    output_modality = "depth" if modality == "depth_linear" else modality
+                    
+                    for camera_name, robot_camera_name in ROBOT_CAMERA_NAMES[self.robot_type].items():
+                        key = f"observation.images.{output_modality}.{camera_name}"
+                        hdf5_key = f"{robot_camera_name}::{modality}"
+                        
+                        if hdf5_key in hdf5_data["obs"]:
+                            # Get the frame data
+                            frame = hdf5_data["obs"][hdf5_key][frame_idx]
+                            
+                            # Handle different data types
+                            if isinstance(frame, th.Tensor):
+                                frame = frame.numpy()
+                            
+                            # Ensure correct shape
+                            if modality == "seg_instance_id" and len(frame.shape) == 2:
+                                # Add channel dimension for grayscale
+                                frame = np.expand_dims(frame, axis=-1)
+                            elif modality == "depth_linear" and len(frame.shape) == 2:
+                                frame = np.expand_dims(frame, axis=-1)
+                            
+                            frame_data[key] = frame
+            
+            # Add frame to dataset
+            self.add_frame(frame_data)
+        
+        # Save episode with metadata
+        episode_metadata = {
+            "task_id": task_id,
+            "task_name": task_name,
+            "original_episode_id": episode_id,
+        }
+        
+        # Add any additional HDF5 attributes as metadata
+        if "attrs" in hdf5_data:
+            for attr_name, attr_value in hdf5_data["attrs"].items():
+                if isinstance(attr_value, (list, np.ndarray)):
+                    episode_metadata[attr_name] = list(attr_value)
+                else:
+                    episode_metadata[attr_name] = attr_value
+        
+        # Save the episode
+        self.save_episode(episode_data=None)
+        
+        # Track task-episode mapping
+        if task_id not in self.task_episode_mapping:
+            self.task_episode_mapping[task_id] = []
+        self.task_episode_mapping[task_id].append(self.num_episodes - 1)
+        self.episode_task_mapping[self.num_episodes - 1] = {
+            "task_id": task_id,
+            "task_name": task_name,
+            "original_episode_id": episode_id,
+        }
+    
+    def finalize(self) -> None:
+        """Finalize the dataset and save additional BEHAVIOR-1K metadata."""
+        # Save BEHAVIOR-1K specific metadata
+        metadata_path = self.root / "meta" / "behavior_metadata.json"
+        metadata_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        self.behavior_metadata.update({
+            "task_episode_mapping": self.task_episode_mapping,
+            "episode_task_mapping": self.episode_task_mapping,
+            "total_tasks": len(self.task_episode_mapping),
+            "total_episodes": self.num_episodes,
+            "total_frames": self.num_frames,
+        })
+        
+        with open(metadata_path, "w") as f:
+            json.dump(self.behavior_metadata, f, indent=2)
+        
+        # Finalize the parent dataset
+        super().finalize()
+        
+        logging.info(f"Finalized dataset with {self.num_episodes} episodes "
+                   f"and {self.num_frames} frames across {len(self.task_episode_mapping)} tasks")
@@ -0,0 +1,320 @@
+import numpy as np
+import torch as th
+from collections import OrderedDict
+
+
+ROBOT_CAMERA_NAMES = {
+    "A1": {
+        "external": "external::external_camera",
+        "wrist": "external::wrist_camera",
+    },
+    "R1Pro": {
+        "left_wrist": "robot_r1::robot_r1:left_realsense_link:Camera:0",
+        "right_wrist": "robot_r1::robot_r1:right_realsense_link:Camera:0",
+        "head": "robot_r1::robot_r1:zed_link:Camera:0",
+    },
+}
+
+# Camera resolutions and corresponding intrinstics
+HEAD_RESOLUTION = (720, 720)
+WRIST_RESOLUTION = (480, 480)
+# TODO: Fix A1
+CAMERA_INTRINSICS = {
+    "A1": {
+        "external": np.array([[306.0, 0.0, 360.0], [0.0, 306.0, 360.0], [0.0, 0.0, 1.0]], dtype=np.float32),  # 240x240
+        "wrist": np.array(
+            [[388.6639, 0.0, 240.0], [0.0, 388.6639, 240.0], [0.0, 0.0, 1.0]], dtype=np.float32
+        ),  # 240x240
+    },
+    "R1Pro": {
+        "head": np.array([[306.0, 0.0, 360.0], [0.0, 306.0, 360.0], [0.0, 0.0, 1.0]], dtype=np.float32),  # 720x720
+        "left_wrist": np.array(
+            [[388.6639, 0.0, 240.0], [0.0, 388.6639, 240.0], [0.0, 0.0, 1.0]], dtype=np.float32
+        ),  # 480x480
+        "right_wrist": np.array(
+            [[388.6639, 0.0, 240.0], [0.0, 388.6639, 240.0], [0.0, 0.0, 1.0]], dtype=np.float32
+        ),  # 480x480
+    },
+}
+
+
+# Dataset features for BEHAVIOR-1K LeRobotDataset v3.0
+BEHAVIOR_DATASET_FEATURES = {
+    # Actions
+    "action": {
+        "dtype": "float32",
+        "shape": (23,),  # 23-dimensional action space for R1Pro
+        "names": None,
+    },
+    # Proprioception
+    "observation.state": {
+        "dtype": "float32", 
+        "shape": (256,),  # Full proprioception state
+        "names": None,
+    },
+    # Camera relative poses
+    "observation.cam_rel_poses": {
+        "dtype": "float32",
+        "shape": (21,),  # 3 cameras * 7 (pos + quat)
+        "names": None,
+    },
+    # Task information
+    "observation.task_info": {
+        "dtype": "float32",
+        "shape": (None,),  # Variable size
+        "names": None,
+    },
+    # RGB images
+    "observation.images.rgb.head": {
+        "dtype": "video",
+        "shape": [720, 720, 3],
+        "names": ["height", "width", "channels"],
+    },
+    "observation.images.rgb.left_wrist": {
+        "dtype": "video",
+        "shape": [480, 480, 3],
+        "names": ["height", "width", "channels"],
+    },
+    "observation.images.rgb.right_wrist": {
+        "dtype": "video",
+        "shape": [480, 480, 3],
+        "names": ["height", "width", "channels"],
+    },
+    # Depth images
+    "observation.images.depth.head": {
+        "dtype": "video",
+        "shape": [720, 720, 1],
+        "names": ["height", "width", "channels"],
+    },
+    "observation.images.depth.left_wrist": {
+        "dtype": "video",
+        "shape": [480, 480, 1],
+        "names": ["height", "width", "channels"],
+    },
+    "observation.images.depth.right_wrist": {
+        "dtype": "video",
+        "shape": [480, 480, 1],
+        "names": ["height", "width", "channels"],
+    },
+    # Segmentation instance ID images
+    "observation.images.seg_instance_id.head": {
+        "dtype": "video",
+        "shape": [720, 720, 1],
+        "names": ["height", "width", "channels"],
+    },
+    "observation.images.seg_instance_id.left_wrist": {
+        "dtype": "video",
+        "shape": [480, 480, 1],
+        "names": ["height", "width", "channels"],
+    },
+    "observation.images.seg_instance_id.right_wrist": {
+        "dtype": "video",
+        "shape": [480, 480, 1],
+        "names": ["height", "width", "channels"],
+    },
+}
+
+
+# Action indices
+ACTION_QPOS_INDICES = {
+    "A1": OrderedDict(
+        {
+            "arm": np.s_[0:6],
+            "gripper": np.s_[6:7],
+        }
+    ),
+    "R1Pro": OrderedDict(
+        {
+            "base": np.s_[0:3],
+            "torso": np.s_[3:7],
+            "left_arm": np.s_[7:14],
+            "left_gripper": np.s_[14:15],
+            "right_arm": np.s_[15:22],
+            "right_gripper": np.s_[22:23],
+        }
+    ),
+}
+
+
+# Proprioception configuration
+PROPRIOCEPTION_INDICES = {
+    "A1": OrderedDict(
+        {
+            "joint_qpos": np.s_[0:8],
+            "joint_qpos_sin": np.s_[8:16],
+            "joint_qpos_cos": np.s_[16:24],
+            "joint_qvel": np.s_[24:32],
+            "joint_qeffort": np.s_[32:40],
+            "eef_0_pos": np.s_[40:43],
+            "eef_0_quat": np.s_[43:47],
+            "grasp_0": np.s_[47:48],
+            "gripper_0_qpos": np.s_[48:50],
+            "gripper_0_qvel": np.s_[50:52],
+        }
+    ),
+    "R1Pro": OrderedDict(
+        {
+            "joint_qpos": np.s_[
+                0:28
+            ],  # Full robot joint positions, the first 6 are base joints, which is NOT allowed in standard track
+            "joint_qpos_sin": np.s_[
+                28:56
+            ],  # Full robot joint positions, the first 6 are base joints, which is NOT allowed in standard track
+            "joint_qpos_cos": np.s_[
+                56:84
+            ],  # Full robot joint positions, the first 6 are base joints, which is NOT allowed in standard track
+            "joint_qvel": np.s_[84:112],
+            "joint_qeffort": np.s_[112:140],
+            "robot_pos": np.s_[140:143],  # Global pos, this is NOT allowed in standard track
+            "robot_ori_cos": np.s_[143:146],  # Global ori, this is NOT allowed in standard track
+            "robot_ori_sin": np.s_[146:149],  # Global ori, this is NOT allowed in standard track
+            "robot_2d_ori": np.s_[149:150],  # 2D global ori, this is NOT allowed in standard track
+            "robot_2d_ori_cos": np.s_[150:151],  # 2D global ori, this is NOT allowed in standard track
+            "robot_2d_ori_sin": np.s_[151:152],  # 2D global ori, this is NOT allowed in standard track
+            "robot_lin_vel": np.s_[152:155],
+            "robot_ang_vel": np.s_[155:158],
+            "arm_left_qpos": np.s_[158:165],
+            "arm_left_qpos_sin": np.s_[165:172],
+            "arm_left_qpos_cos": np.s_[172:179],
+            "arm_left_qvel": np.s_[179:186],
+            "eef_left_pos": np.s_[186:189],
+            "eef_left_quat": np.s_[189:193],
+            "gripper_left_qpos": np.s_[193:195],
+            "gripper_left_qvel": np.s_[195:197],
+            "arm_right_qpos": np.s_[197:204],
+            "arm_right_qpos_sin": np.s_[204:211],
+            "arm_right_qpos_cos": np.s_[211:218],
+            "arm_right_qvel": np.s_[218:225],
+            "eef_right_pos": np.s_[225:228],
+            "eef_right_quat": np.s_[228:232],
+            "gripper_right_qpos": np.s_[232:234],
+            "gripper_right_qvel": np.s_[234:236],
+            "trunk_qpos": np.s_[236:240],
+            "trunk_qvel": np.s_[240:244],
+            "base_qpos": np.s_[244:247],  # Base joint position, this is NOT allowed in standard track
+            "base_qpos_sin": np.s_[247:250],  # Base joint position, this is NOT allowed in standard track
+            "base_qpos_cos": np.s_[250:253],  # Base joint position, this is NOT allowed in standard track
+            "base_qvel": np.s_[253:256],
+        }
+    ),
+}
+
+# Proprioception indices
+PROPRIO_QPOS_INDICES = {
+    "A1": OrderedDict(
+        {
+            "arm": np.s_[0:6],
+            "gripper": np.s_[6:8],
+        }
+    ),
+    "R1Pro": OrderedDict(
+        {
+            "torso": np.s_[6:10],
+            "left_arm": np.s_[10:24:2],
+            "right_arm": np.s_[11:24:2],
+            "left_gripper": np.s_[24:26],
+            "right_gripper": np.s_[26:28],
+        }
+    ),
+}
+
+
+# Joint limits (lower, upper)
+JOINT_RANGE = {
+    "A1": {
+        "arm": (
+            th.tensor([-2.8798, 0.0, -3.3161, -2.8798, -1.6581, -2.8798], dtype=th.float32),
+            th.tensor([2.8798, 3.1415, 0.0, 2.8798, 1.6581, 2.8798], dtype=th.float32),
+        ),
+        "gripper": (th.tensor([0.00], dtype=th.float32), th.tensor([0.03], dtype=th.float32)),
+    },
+    "R1Pro": {
+        "base": (th.tensor([-0.75, -0.75, -1.0], dtype=th.float32), th.tensor([0.75, 0.75, 1.0], dtype=th.float32)),
+        "torso": (
+            th.tensor([-1.1345, -2.7925, -1.8326, -3.0543], dtype=th.float32),
+            th.tensor([1.8326, 2.5307, 1.5708, 3.0543], dtype=th.float32),
+        ),
+        "left_arm": (
+            th.tensor([-4.4506, -0.1745, -2.3562, -2.0944, -2.3562, -1.0472, -1.5708], dtype=th.float32),
+            th.tensor([1.3090, 3.1416, 2.3562, 0.3491, 2.3562, 1.0472, 1.5708], dtype=th.float32),
+        ),
+        "left_gripper": (th.tensor([0.00], dtype=th.float32), th.tensor([0.05], dtype=th.float32)),
+        "right_arm": (
+            th.tensor([-4.4506, -3.1416, -2.3562, -2.0944, -2.3562, -1.0472, -1.5708], dtype=th.float32),
+            th.tensor([1.3090, 0.1745, 2.3562, 0.3491, 2.3562, 1.0472, 1.5708], dtype=th.float32),
+        ),
+        "right_gripper": (th.tensor([0.00], dtype=th.float32), th.tensor([0.05], dtype=th.float32)),
+    },
+}
+
+
+EEF_POSITION_RANGE = {
+    "A1": {
+        "0": (th.tensor([0.0, -0.7, 0.0], dtype=th.float32), th.tensor([0.7, 0.7, 0.7], dtype=th.float32)),
+    },
+    "R1Pro": {
+        "left": (th.tensor([0.0, -0.65, 0.0], dtype=th.float32), th.tensor([0.65, 0.65, 2.5], dtype=th.float32)),
+        "right": (th.tensor([0.0, -0.65, 0.0], dtype=th.float32), th.tensor([0.65, 0.65, 2.5], dtype=th.float32)),
+    },
+}
+
+
+TASK_NAMES_TO_INDICES = {
+    # B10
+    "turning_on_radio": 0,
+    "picking_up_trash": 1,
+    "putting_away_Halloween_decorations": 2,
+    "cleaning_up_plates_and_food": 3,
+    "can_meat": 4,
+    "setting_mousetraps": 5,
+    "hiding_Easter_eggs": 6,
+    "picking_up_toys": 7,
+    "rearranging_kitchen_furniture": 8,
+    "putting_up_Christmas_decorations_inside": 9,
+    # B20
+    "set_up_a_coffee_station_in_your_kitchen": 10,
+    "putting_dishes_away_after_cleaning": 11,
+    "preparing_lunch_box": 12,
+    "loading_the_car": 13,
+    "carrying_in_groceries": 14,
+    "bringing_in_wood": 15,
+    "moving_boxes_to_storage": 16,
+    "bringing_water": 17,
+    "tidying_bedroom": 18,
+    "outfit_a_basic_toolbox": 19,
+    # B30
+    "sorting_vegetables": 20,
+    "collecting_childrens_toys": 21,
+    "putting_shoes_on_rack": 22,
+    "boxing_books_up_for_storage": 23,
+    "storing_food": 24,
+    "clearing_food_from_table_into_fridge": 25,
+    "assembling_gift_baskets": 26,
+    "sorting_household_items": 27,
+    "getting_organized_for_work": 28,
+    "clean_up_your_desk": 29,
+    # B40
+    "setting_the_fire": 30,
+    "clean_boxing_gloves": 31,
+    "wash_a_baseball_cap": 32,
+    "wash_dog_toys": 33,
+    "hanging_pictures": 34,
+    "attach_a_camera_to_a_tripod": 35,
+    "clean_a_patio": 36,
+    "clean_a_trumpet": 37,
+    "spraying_for_bugs": 38,
+    "spraying_fruit_trees": 39,
+    # B50
+    "make_microwave_popcorn": 40,
+    "cook_cabbage": 41,
+    "chop_an_onion": 42,
+    "slicing_vegetables": 43,
+    "chopping_wood": 44,
+    "cook_hot_dogs": 45,
+    "cook_bacon": 46,
+    "freeze_pies": 47,
+    "canning_food": 48,
+    "make_pizza": 49,
+}
+TASK_INDICES_TO_NAMES = {v: k for k, v in TASK_NAMES_TO_INDICES.items()}
+
@@ -0,0 +1,283 @@
+#!/usr/bin/env python
+
+import argparse
+import h5py
+import numpy as np
+import os
+import torch as th
+from pathlib import Path
+from tqdm import tqdm
+import logging
+
+from .behavior_lerobot_dataset_v3 import BehaviorLeRobotDatasetV3
+from .behaviour_1k_constants import TASK_NAMES_TO_INDICES, TASK_INDICES_TO_NAMES, BEHAVIOR_DATASET_FEATURES
+
+from lerobot.utils.utils import init_logging
+
+init_logging()
+
+def load_hdf5_episode(hdf5_path: str, episode_id: int = 0) -> dict:
+    """
+    Load episode data from HDF5 file.
+    
+    Args:
+        hdf5_path: Path to the HDF5 file
+        episode_id: Episode ID to load (default: 0)
+        
+    Returns:
+        Dictionary containing episode data
+    """
+    episode_data = {}
+    
+    with h5py.File(hdf5_path, "r") as f:
+        # Find the episode with most samples if episode_id not specified
+        if episode_id == -1:
+            num_samples = [f["data"][key].attrs["num_samples"] for key in f["data"].keys()]
+            episode_id = num_samples.index(max(num_samples))
+        
+        demo_key = f"demo_{episode_id}"
+        if demo_key not in f["data"]:
+            raise ValueError(f"Episode {episode_id} not found in {hdf5_path}")
+        
+        demo_data = f["data"][demo_key]
+        
+        # Load actions
+        episode_data["action"] = np.array(demo_data["action"][:])
+        
+        # Load observations
+        episode_data["obs"] = {}
+        for key in demo_data["obs"].keys():
+            episode_data["obs"][key] = np.array(demo_data["obs"][key][:])
+        
+        # Load attributes
+        episode_data["attrs"] = {}
+        for attr_name in demo_data.attrs:
+            episode_data["attrs"][attr_name] = demo_data.attrs[attr_name]
+        
+        # Add global attributes
+        for attr_name in f["data"].attrs:
+            episode_data["attrs"][f"global_{attr_name}"] = f["data"].attrs[attr_name]
+    
+    return episode_data
+
+
+def convert_episode(
+    data_folder: str,
+    output_repo_id: str,
+    task_id: int,
+    demo_id: int,
+    dataset: BehaviorLeRobotDatasetV3,
+    include_videos: bool = True,
+    include_segmentation: bool = True,
+) -> None:
+    """
+    Convert a single episode from HDF5 to LeRobotDataset v3.0 format.
+    
+    Args:
+        data_folder: Base data folder containing HDF5 files
+        output_repo_id: Output repository ID for the dataset
+        task_id: Task ID
+        demo_id: Demo ID (episode ID)
+        dataset: BehaviorLeRobotDatasetV3 instance to add data to
+        include_videos: Whether to include video data
+        include_segmentation: Whether to include segmentation data
+    """
+    # Construct paths
+    task_name = TASK_INDICES_TO_NAMES[task_id]
+    hdf5_path = f"{data_folder}/2025-challenge-rawdata/task-{task_id:04d}/episode_{demo_id:08d}.hdf5"
+    
+    if not os.path.exists(hdf5_path):
+        logging.error(f"HDF5 file not found: {hdf5_path}")
+        return
+    
+    logging.info(f"Converting episode {demo_id} from task {task_name}")
+    
+    # Load episode data
+    try:
+        episode_data = load_hdf5_episode(hdf5_path, episode_id=0)
+    except Exception as e:
+        logging.error(f"Failed to load episode data: {e}")
+        return
+    
+    # Filter out segmentation if not requested
+    if not include_segmentation:
+        keys_to_remove = [k for k in episode_data["obs"].keys() if "seg_instance_id" in k]
+        for key in keys_to_remove:
+            del episode_data["obs"][key]
+    
+    # Add episode to dataset
+    dataset.add_episode_from_hdf5(
+        hdf5_data=episode_data,
+        task_id=task_id,
+        episode_id=demo_id,
+        include_videos=include_videos,
+    )
+
+
+def convert_dataset(
+    data_folder: str,
+    output_repo_id: str,
+    task_names: list = None,
+    episode_ids: list = None,
+    max_episodes_per_task: int = None,
+    include_videos: bool = True,
+    include_segmentation: bool = True,
+    fps: int = 30,
+    batch_encoding_size: int = 1,
+    image_writer_processes: int = 0,
+    image_writer_threads: int = 4,
+    push_to_hub: bool = False,
+) -> None:
+    """
+    Convert BEHAVIOR-1K dataset from HDF5 to LeRobotDataset v3.0 format.
+    
+    Args:
+        data_folder: Base folder containing HDF5 data
+        output_repo_id: Output repository ID (e.g., "username/dataset-name")
+        task_names: List of task names to convert (None = all tasks)
+        episode_ids: Specific episode IDs to convert (None = all episodes)
+        max_episodes_per_task: Maximum episodes per task to convert
+        include_videos: Whether to include video data
+        include_segmentation: Whether to include segmentation data
+        fps: Frames per second
+        batch_encoding_size: Number of episodes to batch before encoding
+        image_writer_processes: Number of processes for image writing
+        image_writer_threads: Number of threads for image writing
+        push_to_hub: Whether to push to HuggingFace Hub
+    """
+    # Create output directory
+    output_dir = Path.home() / ".cache/huggingface/lerobot" / output_repo_id
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    logging.info(f"Converting dataset to: {output_dir}")
+    
+    # Initialize dataset
+    dataset = BehaviorLeRobotDatasetV3.create(
+        repo_id=output_repo_id,
+        root=output_dir,
+        fps=fps,
+        robot_type="R1Pro",
+        use_videos=include_videos,
+        video_backend="pyav",
+        batch_encoding_size=batch_encoding_size,
+        image_writer_processes=image_writer_processes,
+        image_writer_threads=image_writer_threads,
+    )
+    
+    # Determine which tasks to process
+    if task_names is None:
+        task_names = list(TASK_NAMES_TO_INDICES.keys())
+    
+    task_ids = [TASK_NAMES_TO_INDICES[name] for name in task_names]
+    
+    # Process each task
+    total_episodes = 0
+    for task_id in tqdm(task_ids, desc="Processing tasks"):
+        task_name = TASK_INDICES_TO_NAMES[task_id]
+        task_folder = f"{data_folder}/2025-challenge-rawdata/task-{task_id:04d}"
+        
+        if not os.path.exists(task_folder):
+            logging.warning(f"Task folder not found: {task_folder}")
+            continue
+        
+        # Find all episodes for this task
+        if episode_ids is not None:
+            # Use specified episode IDs
+            task_episode_ids = [eid for eid in episode_ids if eid // 10000 == task_id]
+        else:
+            # Find all episodes in the task folder
+            task_episode_ids = []
+            for filename in os.listdir(task_folder):
+                if filename.startswith("episode_") and filename.endswith(".hdf5"):
+                    eid = int(filename.split("_")[1].split(".")[0])
+                    task_episode_ids.append(eid)
+            task_episode_ids.sort()
+        
+        # Limit episodes if requested
+        if max_episodes_per_task is not None:
+            task_episode_ids = task_episode_ids[:max_episodes_per_task]
+        
+        logging.info(f"Processing {len(task_episode_ids)} episodes for task {task_name}")
+        
+        # Convert each episode
+        for demo_id in tqdm(task_episode_ids, desc=f"Task {task_name}", leave=False):
+            try:
+                convert_episode(
+                    data_folder=data_folder,
+                    output_repo_id=output_repo_id,
+                    task_id=task_id,
+                    demo_id=demo_id,
+                    dataset=dataset,
+                    include_videos=include_videos,
+                    include_segmentation=include_segmentation,
+                )
+                total_episodes += 1
+            except Exception as e:
+                logging.error(f"Failed to convert episode {demo_id}: {e}")
+                continue
+    
+    logging.info(f"Converted {total_episodes} episodes total")
+    
+    # Finalize dataset
+    logging.info("Finalizing dataset...")
+    dataset.finalize()
+    
+    # Push to hub if requested
+    if push_to_hub:
+        logging.info("Pushing dataset to HuggingFace Hub...")
+        dataset.push_to_hub(
+            private=True,
+            license="apache-2.0",
+        )
+    
+    logging.info("Conversion complete!")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Convert BEHAVIOR-1K data to LeRobotDataset v3.0")
+    parser.add_argument("--data_folder", type=str, required=True, help="Path to the data folder")
+    parser.add_argument("--output_repo_id", type=str, required=True, 
+                       help="Output repository ID (e.g., 'username/behavior-dataset-v3')")
+    parser.add_argument("--task_names", type=str, nargs="+", default=None,
+                       help="Task names to convert (default: all)")
+    parser.add_argument("--episode_ids", type=int, nargs="+", default=None,
+                       help="Specific episode IDs to convert")
+    parser.add_argument("--max_episodes_per_task", type=int, default=None,
+                       help="Maximum episodes per task to convert")
+    parser.add_argument("--no_videos", action="store_true",
+                       help="Exclude video data")
+    parser.add_argument("--no_segmentation", action="store_true",
+                       help="Exclude segmentation data")
+    parser.add_argument("--fps", type=int, default=30,
+                       help="Frames per second (default: 30)")
+    parser.add_argument("--batch_encoding_size", type=int, default=1,
+                       help="Number of episodes to batch before encoding videos")
+    parser.add_argument("--image_writer_processes", type=int, default=0,
+                       help="Number of processes for async image writing")
+    parser.add_argument("--image_writer_threads", type=int, default=4,
+                       help="Number of threads for image writing")
+    parser.add_argument("--push_to_hub", action="store_true",
+                       help="Push dataset to HuggingFace Hub")
+    
+    args = parser.parse_args()
+    
+    # Convert dataset
+    convert_dataset(
+        data_folder=args.data_folder,
+        output_repo_id=args.output_repo_id,
+        task_names=args.task_names,
+        episode_ids=args.episode_ids,
+        max_episodes_per_task=args.max_episodes_per_task,
+        include_videos=not args.no_videos,
+        include_segmentation=not args.no_segmentation,
+        fps=args.fps,
+        batch_encoding_size=args.batch_encoding_size,
+        image_writer_processes=args.image_writer_processes,
+        image_writer_threads=args.image_writer_threads,
+        push_to_hub=args.push_to_hub,
+    )
+    
+
+
+if __name__ == "__main__":
+    main()