Merge branch 'main' into feat/behavior-1k

* refactor behaviour1k_lerobot_dataset.py
2026-05-11 14:49:43 +00:00 · 2025-12-04 18:50:56 +01:00 · 2025-11-03 13:28:31 +01:00 · 2025-11-03 12:23:12 +00:00 · 2025-10-30 18:14:09 +01:00 · 2025-10-30 18:12:50 +01:00
4 changed files with 1549 additions and 0 deletions
@@ -0,0 +1,464 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+BehaviorLeRobotDatasetV3: A wrapper around LeRobotDataset v3.0 for loading BEHAVIOR-1K data.
+
+This wrapper extends LeRobotDataset to support BEHAVIOR-1K specific features:
+- Modality and camera selection (rgb, depth, seg_instance_id)
+- Efficient chunk streaming mode with keyframe access
+- Additional BEHAVIOR-1K metadata (cam_rel_poses, task_info, etc.)
+"""
+
+import logging
+from collections.abc import Callable
+from pathlib import Path
+
+import datasets
+import numpy as np
+from behaviour_1k_constants import ROBOT_CAMERA_NAMES, ROBOT_TYPE
+from torch.utils.data import Dataset, get_worker_info
+
+from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset, LeRobotDatasetMetadata
+from lerobot.datasets.utils import (
+    check_delta_timestamps,
+    get_delta_indices,
+    get_safe_version,
+    hf_transform_to_torch,
+)
+from lerobot.datasets.video_utils import decode_video_frames, get_safe_default_codec
+from lerobot.utils.constants import HF_LEROBOT_HOME
+
+logger = logging.getLogger(__name__)
+
+
+class BehaviorLeRobotDatasetMetadata(LeRobotDatasetMetadata):
+    """
+    Extended metadata class for BEHAVIOR-1K datasets.
+
+    Adds support for:
+    - Modality and camera filtering
+    - Custom metainfo and annotation paths
+    """
+
+    def __init__(
+        self,
+        repo_id: str,
+        root: str | Path | None = None,
+        revision: str | None = None,
+        force_cache_sync: bool = False,
+        metadata_buffer_size: int = 10,
+        modalities: set[str] | None = None,
+        cameras: set[str] | None = None,
+    ):
+        self.modalities = set(modalities) if modalities else {"rgb", "depth", "seg_instance_id"}
+        self.camera_names = set(cameras) if cameras else {"head", "left_wrist", "right_wrist"}
+
+        assert self.modalities.issubset({"rgb", "depth", "seg_instance_id"}), (
+            f"Modalities must be subset of ['rgb', 'depth', 'seg_instance_id'], got {self.modalities}"
+        )
+
+        assert self.camera_names.issubset(set(ROBOT_CAMERA_NAMES[ROBOT_TYPE])), (
+            f"Camera names must be subset of {list(ROBOT_CAMERA_NAMES[ROBOT_TYPE])}, got {self.camera_names}"
+        )
+
+        super().__init__(repo_id, root, revision, force_cache_sync, metadata_buffer_size)
+
+    @property
+    def filtered_features(self) -> dict[str, dict]:
+        """Return only features matching selected modalities and cameras."""
+        features = {}
+        for name, feature_info in self.features.items():
+            if not name.startswith("observation.images."):
+                features[name] = feature_info
+                continue
+
+            parts = name.split(".")
+            if len(parts) >= 4:
+                modality = parts[2]
+                camera = parts[3]
+                if modality in self.modalities and camera in self.camera_names:
+                    features[name] = feature_info
+
+        return features
+
+    @property
+    def video_keys(self) -> list[str]:
+        """Return only video keys for selected modalities and cameras."""
+        all_video_keys = super().video_keys
+
+        filtered_keys = []
+        for key in all_video_keys:
+            parts = key.split(".")
+            if len(parts) >= 4:
+                modality = parts[2]
+                camera = parts[3]
+                if modality in self.modalities and camera in self.camera_names:
+                    filtered_keys.append(key)
+
+        return filtered_keys
+
+    def get_metainfo_path(self, ep_index: int) -> Path:
+        """Get path to episode metainfo file."""
+        if "metainfo_path" in self.info:
+            fpath = self.info["metainfo_path"].format(episode_index=ep_index)
+            return Path(fpath)
+        return None
+
+    def get_annotation_path(self, ep_index: int) -> Path:
+        """Get path to episode annotation file."""
+        if "annotation_path" in self.info:
+            fpath = self.info["annotation_path"].format(episode_index=ep_index)
+            return Path(fpath)
+        return None
+
+
+class BehaviorLeRobotDatasetV3(LeRobotDataset):
+    """
+    BEHAVIOR-1K wrapper for LeRobotDataset v3.0.
+
+    Each BEHAVIOR-1K dataset contains a single task (e.g., behavior1k-task0000).
+    See https://huggingface.co/collections/lerobot/behavior-1k for all available tasks.
+
+    Key features:
+    - Modality and camera selection
+    - Efficient chunk streaming with keyframe access (recommended for B1K with GOP=250)
+    - Support for BEHAVIOR-1K specific observations (cam_rel_poses, task_info, task_index)
+    """
+
+    def __init__(
+        self,
+        repo_id: str,
+        root: str | Path | None = None,
+        episodes: list[int] | None = None,
+        image_transforms: Callable | None = None,
+        delta_timestamps: dict[list[float]] | None = None,
+        tolerance_s: float = 1e-4,
+        revision: str | None = None,
+        force_cache_sync: bool = False,
+        download_videos: bool = True,
+        video_backend: str | None = None,
+        batch_encoding_size: int = 1,
+        # BEHAVIOR-1K specific arguments
+        modalities: list[str] | None = None,
+        cameras: list[str] | None = None,
+        check_timestamp_sync: bool = True,
+        chunk_streaming_using_keyframe: bool = True,
+        shuffle: bool = True,
+        seed: int = 42,
+    ):
+        """
+        Initialize BEHAVIOR-1K dataset.
+
+        Args:
+            repo_id: HuggingFace repository ID (e.g., "lerobot/behavior1k-task0000")
+            root: Local directory for dataset storage
+            episodes: List of episode indices to load (for train/val split)
+            image_transforms: Torchvision v2 transforms for images
+            delta_timestamps: Temporal offsets for history/future frames
+            tolerance_s: Tolerance for timestamp synchronization
+            revision: Git revision/branch to load
+            force_cache_sync: Force re-download from hub
+            download_videos: Whether to download video files
+            video_backend: Video decoder ('pyav' or 'torchcodec')
+            batch_encoding_size: Batch size for video encoding
+            modalities: List of modalities to load (None = all: rgb, depth, seg_instance_id)
+            cameras: List of cameras to load (None = all: head, left_wrist, right_wrist)
+            check_timestamp_sync: Verify timestamp synchronization (can be slow)
+            chunk_streaming_using_keyframe: Use keyframe-based streaming (STRONGLY RECOMMENDED for B1K)
+            shuffle: Shuffle chunks in streaming mode
+            seed: Random seed for shuffling
+        """
+        Dataset.__init__(self)
+
+        self.repo_id = repo_id
+        if root:
+            self.root = Path(root)
+        else:
+            dataset_name = repo_id.split("/")[-1] if "/" in repo_id else repo_id
+            self.root = HF_LEROBOT_HOME / dataset_name
+
+        self.image_transforms = image_transforms
+        self.delta_timestamps = delta_timestamps
+        self.tolerance_s = tolerance_s
+        self.revision = revision if revision else CODEBASE_VERSION
+        self.video_backend = video_backend if video_backend else get_safe_default_codec()
+        self.delta_indices = None
+        self.batch_encoding_size = batch_encoding_size
+        self.episodes_since_last_encoding = 0
+        self.seed = seed
+
+        self.image_writer = None
+        self.episode_buffer = None
+        self.writer = None
+        self.latest_episode = None
+        self._current_file_start_frame = None
+
+        self.root.mkdir(exist_ok=True, parents=True)
+
+        if modalities is None:
+            modalities = ["rgb", "depth", "seg_instance_id"]
+        if "seg_instance_id" in modalities:
+            assert chunk_streaming_using_keyframe, (
+                "For performance, seg_instance_id requires chunk_streaming_using_keyframe=True"
+            )
+        if "depth" in modalities:
+            assert self.video_backend == "pyav", "Depth videos require video_backend='pyav'"
+        if cameras is None:
+            cameras = ["head", "left_wrist", "right_wrist"]
+
+        self.meta = BehaviorLeRobotDatasetMetadata(
+            repo_id=self.repo_id,
+            root=self.root,
+            revision=self.revision,
+            force_cache_sync=force_cache_sync,
+            modalities=modalities,
+            cameras=cameras,
+        )
+
+        if episodes is not None:
+            self.episodes = sorted([i for i in episodes if i < len(self.meta.episodes)])
+        else:
+            self.episodes = list(range(len(self.meta.episodes)))
+
+        logger.info(f"Total episodes: {len(self.episodes)}")
+
+        self._chunk_streaming_using_keyframe = chunk_streaming_using_keyframe
+        if self._chunk_streaming_using_keyframe:
+            if not shuffle:
+                logger.warning("Chunk streaming enabled but shuffle=False. This may reduce randomness.")
+            self.chunks = self._get_keyframe_chunk_indices()
+            self.current_streaming_chunk_idx = None if shuffle else 0
+            self.current_streaming_frame_idx = None if shuffle else self.chunks[0][0] if self.chunks else 0
+            self.obs_loaders = {}
+            self._should_obs_loaders_reload = True
+
+        self._lazy_loading = False
+        self._recorded_frames = self.meta.total_frames
+        self._writer_closed_for_reading = False
+
+        try:
+            if force_cache_sync:
+                raise FileNotFoundError
+            self.hf_dataset = self.load_hf_dataset()
+        except (AssertionError, FileNotFoundError, NotADirectoryError):
+            self.revision = get_safe_version(self.repo_id, self.revision)
+            self.download_episodes(download_videos)
+            self.hf_dataset = self.load_hf_dataset()
+
+        if self.delta_timestamps is not None:
+            check_delta_timestamps(self.delta_timestamps, self.meta.fps, self.tolerance_s)
+            self.delta_indices = get_delta_indices(self.delta_timestamps, self.meta.fps)
+
+    @property
+    def fps(self) -> int:
+        """Frames per second."""
+        return self.meta.fps
+
+    @property
+    def features(self) -> dict:
+        """Dataset features (filtered by modalities/cameras)."""
+        return self.meta.filtered_features
+
+    @property
+    def num_episodes(self) -> int:
+        """Number of episodes."""
+        return len(self.episodes)
+
+    @property
+    def num_frames(self) -> int:
+        """Total number of frames."""
+        return len(self.hf_dataset)
+
+    def get_episodes_file_paths(self) -> list[str]:
+        """
+        Get download patterns for requested episodes.
+
+        Returns glob patterns for download rather than specific file paths.
+
+        Note: Unlike the base LeRobotDataset, this method cannot filter downloads to only
+        requested episodes because:
+        1. BEHAVIOR-1K episode indices are encoded (e.g., 10010 for task 1, episode 10)
+        2. Episodes are chunked across multiple parquet/video files
+        3. The parquet files are organized by chunk, not by episode
+
+        Therefore, we download full data/meta/video directories and rely on
+        `self.load_hf_dataset()` to filter to requested episodes from the loaded data.
+        """
+        allow_patterns = ["data/**", "meta/**"]
+
+        # Filter by modalities and cameras for video patterns
+        if len(self.meta.video_keys) > 0:
+            if len(self.meta.modalities) != 3 or len(self.meta.camera_names) != 3:
+                # Only download specific modality/camera combinations
+                for modality in self.meta.modalities:
+                    for camera in self.meta.camera_names:
+                        allow_patterns.append(f"**/observation.images.{modality}.{camera}/**")
+            else:
+                # Download all videos (no filtering needed)
+                allow_patterns.append("videos/**")
+
+        return allow_patterns
+
+    def download_episodes(self, download_videos: bool = True) -> None:
+        """
+        Download episodes with modality/camera filtering.
+
+        Follows the same pattern as base LeRobotDataset.download() but uses
+        get_episodes_file_paths() which returns patterns for modality/camera filtering.
+        """
+        ignore_patterns = None if download_videos else "videos/"
+        files = self.get_episodes_file_paths()
+        self.pull_from_repo(allow_patterns=files, ignore_patterns=ignore_patterns)
+
+    def pull_from_repo(
+        self,
+        allow_patterns: list[str] | str | None = None,
+        ignore_patterns: list[str] | str | None = None,
+    ) -> None:
+        """Pull dataset from HuggingFace Hub."""
+
+        from huggingface_hub import snapshot_download
+
+        logger.info(f"Pulling dataset {self.repo_id} from HuggingFace Hub...")
+        snapshot_download(
+            self.repo_id,
+            repo_type="dataset",
+            revision=self.revision,
+            local_dir=self.root,
+            allow_patterns=allow_patterns,
+            ignore_patterns=ignore_patterns,
+        )
+
+    def load_hf_dataset(self) -> datasets.Dataset:
+        """Load dataset from parquet files."""
+        from datasets import load_dataset
+
+        path = str(self.root / "data")
+        hf_dataset = load_dataset("parquet", data_dir=path, split="train")
+
+        hf_dataset.set_transform(hf_transform_to_torch)
+        return hf_dataset
+
+    def _get_keyframe_chunk_indices(self, chunk_size: int = 250) -> list[tuple[int, int, int]]:
+        """
+        Divide episodes into chunks based on GOP size (keyframe interval).
+
+        For BEHAVIOR-1K, GOP size is 250 frames for efficient storage.
+
+        Returns:
+            List of (start_index, end_index, local_start_index) tuples
+        """
+        chunks = []
+        offset = 0
+
+        for ep_array_idx in self.episodes:
+            # self.episodes contains array indices, so access directly
+            ep = self.meta.episodes[ep_array_idx]
+            length = ep["length"]
+            local_starts = list(range(0, length, chunk_size))
+            local_ends = local_starts[1:] + [length]
+
+            for local_start, local_end in zip(local_starts, local_ends, strict=True):
+                chunks.append((offset + local_start, offset + local_end, local_start))
+            offset += length
+
+        return chunks
+
+    def __getitem__(self, idx: int) -> dict:
+        """Get item by index, with optional chunk streaming."""
+        if not self._chunk_streaming_using_keyframe:
+            item = self.hf_dataset[idx]
+
+            for key in self.meta.video_keys:
+                if key in self.features:
+                    ep_idx = item["episode_index"].item()
+                    timestamp = item["timestamp"].item()
+                    video_path = self.root / self.meta.get_video_file_path(ep_idx, key)
+                    frames = decode_video_frames(
+                        video_path, [timestamp], self.tolerance_s, self.video_backend
+                    )
+                    item[key] = frames.squeeze(0)
+
+            if self.image_transforms is not None:
+                for key in self.features:
+                    if key.startswith("observation.images."):
+                        item[key] = self.image_transforms(item[key])
+
+            if "task_index" in item:
+                task_idx = item["task_index"].item()
+                try:
+                    item["task"] = self.meta.tasks.iloc[task_idx].name
+                except (IndexError, AttributeError):
+                    item["task"] = f"task_{task_idx}"
+
+            return item
+
+        return self._get_item_streaming(idx)
+
+    def _get_item_streaming(self, idx: int) -> dict:
+        """Get item in chunk streaming mode."""
+        if self.current_streaming_chunk_idx is None:
+            worker_info = get_worker_info()
+            worker_id = 0 if worker_info is None else worker_info.id
+            rng = np.random.default_rng(self.seed + worker_id)
+            rng.shuffle(self.chunks)
+            self.current_streaming_chunk_idx = rng.integers(0, len(self.chunks)).item()
+            self.current_streaming_frame_idx = self.chunks[self.current_streaming_chunk_idx][0]
+
+        if self.current_streaming_frame_idx >= self.chunks[self.current_streaming_chunk_idx][1]:
+            self.current_streaming_chunk_idx += 1
+            if self.current_streaming_chunk_idx >= len(self.chunks):
+                self.current_streaming_chunk_idx = 0
+            self.current_streaming_frame_idx = self.chunks[self.current_streaming_chunk_idx][0]
+            self._should_obs_loaders_reload = True
+
+        item = self.hf_dataset[self.current_streaming_frame_idx]
+        ep_idx = item["episode_index"].item()
+
+        if self._should_obs_loaders_reload:
+            for loader in self.obs_loaders.values():
+                if hasattr(loader, "close"):
+                    loader.close()
+            self.obs_loaders = {}
+            self.current_streaming_episode_idx = ep_idx
+            self._should_obs_loaders_reload = False
+
+        for key in self.meta.video_keys:
+            if key in self.features:
+                timestamp = item["timestamp"].item()
+                video_path = self.root / self.meta.get_video_file_path(ep_idx, key)
+                frames = decode_video_frames(video_path, [timestamp], self.tolerance_s, self.video_backend)
+                item[key] = frames.squeeze(0)
+
+        if self.image_transforms is not None:
+            for key in self.features:
+                if key.startswith("observation.images."):
+                    item[key] = self.image_transforms(item[key])
+
+        if "task_index" in item:
+            task_idx = item["task_index"].item()
+            try:
+                item["task"] = self.meta.tasks.iloc[task_idx].name
+            except (IndexError, AttributeError):
+                item["task"] = f"task_{task_idx}"
+
+        self.current_streaming_frame_idx += 1
+        return item
+
+    def __len__(self) -> int:
+        """Total number of frames."""
+        return len(self.hf_dataset)
@@ -0,0 +1,350 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import OrderedDict
+
+import numpy as np
+import torch as th
+
+ROBOT_TYPE = "R1Pro"
+FPS = 30
+
+ROBOT_CAMERA_NAMES = {
+    "A1": {
+        "external": "external::external_camera",
+        "wrist": "external::wrist_camera",
+    },
+    "R1Pro": {
+        "left_wrist": "robot_r1::robot_r1:left_realsense_link:Camera:0",
+        "right_wrist": "robot_r1::robot_r1:right_realsense_link:Camera:0",
+        "head": "robot_r1::robot_r1:zed_link:Camera:0",
+    },
+}
+
+# Camera resolutions and corresponding intrinstics
+HEAD_RESOLUTION = (720, 720)
+WRIST_RESOLUTION = (480, 480)
+# TODO: Fix A1
+CAMERA_INTRINSICS = {
+    "A1": {
+        "external": np.array(
+            [[306.0, 0.0, 360.0], [0.0, 306.0, 360.0], [0.0, 0.0, 1.0]], dtype=np.float32
+        ),  # 240x240
+        "wrist": np.array(
+            [[388.6639, 0.0, 240.0], [0.0, 388.6639, 240.0], [0.0, 0.0, 1.0]], dtype=np.float32
+        ),  # 240x240
+    },
+    "R1Pro": {
+        "head": np.array(
+            [[306.0, 0.0, 360.0], [0.0, 306.0, 360.0], [0.0, 0.0, 1.0]], dtype=np.float32
+        ),  # 720x720
+        "left_wrist": np.array(
+            [[388.6639, 0.0, 240.0], [0.0, 388.6639, 240.0], [0.0, 0.0, 1.0]], dtype=np.float32
+        ),  # 480x480
+        "right_wrist": np.array(
+            [[388.6639, 0.0, 240.0], [0.0, 388.6639, 240.0], [0.0, 0.0, 1.0]], dtype=np.float32
+        ),  # 480x480
+    },
+}
+
+
+# Dataset features for BEHAVIOR-1K LeRobotDataset v3.0
+BEHAVIOR_DATASET_FEATURES = {
+    # Actions
+    "action": {
+        "dtype": "float32",
+        "shape": (23,),  # 23-dimensional action space for R1Pro
+        "names": None,
+    },
+    # Proprioception
+    "observation.state": {
+        "dtype": "float32",
+        "shape": (256,),  # Full proprioception state
+        "names": None,
+    },
+    # Camera relative poses
+    "observation.cam_rel_poses": {
+        "dtype": "float32",
+        "shape": (21,),  # 3 cameras * 7 (pos + quat)
+        "names": None,
+    },
+    # Task information
+    "observation.task_info": {
+        "dtype": "float32",
+        "shape": (None,),  # Variable size
+        "names": None,
+    },
+    # RGB images
+    "observation.images.rgb.head": {
+        "dtype": "video",
+        "shape": [720, 720, 3],
+        "names": ["height", "width", "channels"],
+    },
+    "observation.images.rgb.left_wrist": {
+        "dtype": "video",
+        "shape": [480, 480, 3],
+        "names": ["height", "width", "channels"],
+    },
+    "observation.images.rgb.right_wrist": {
+        "dtype": "video",
+        "shape": [480, 480, 3],
+        "names": ["height", "width", "channels"],
+    },
+    # Depth images
+    "observation.images.depth.head": {
+        "dtype": "video",
+        "shape": [720, 720, 1],
+        "names": ["height", "width", "channels"],
+    },
+    "observation.images.depth.left_wrist": {
+        "dtype": "video",
+        "shape": [480, 480, 1],
+        "names": ["height", "width", "channels"],
+    },
+    "observation.images.depth.right_wrist": {
+        "dtype": "video",
+        "shape": [480, 480, 1],
+        "names": ["height", "width", "channels"],
+    },
+    # Segmentation instance ID images
+    "observation.images.seg_instance_id.head": {
+        "dtype": "video",
+        "shape": [720, 720, 1],
+        "names": ["height", "width", "channels"],
+    },
+    "observation.images.seg_instance_id.left_wrist": {
+        "dtype": "video",
+        "shape": [480, 480, 1],
+        "names": ["height", "width", "channels"],
+    },
+    "observation.images.seg_instance_id.right_wrist": {
+        "dtype": "video",
+        "shape": [480, 480, 1],
+        "names": ["height", "width", "channels"],
+    },
+}
+
+
+# Action indices
+ACTION_QPOS_INDICES = {
+    "A1": OrderedDict(
+        {
+            "arm": np.s_[0:6],
+            "gripper": np.s_[6:7],
+        }
+    ),
+    "R1Pro": OrderedDict(
+        {
+            "base": np.s_[0:3],
+            "torso": np.s_[3:7],
+            "left_arm": np.s_[7:14],
+            "left_gripper": np.s_[14:15],
+            "right_arm": np.s_[15:22],
+            "right_gripper": np.s_[22:23],
+        }
+    ),
+}
+
+
+# Proprioception configuration
+PROPRIOCEPTION_INDICES = {
+    "A1": OrderedDict(
+        {
+            "joint_qpos": np.s_[0:8],
+            "joint_qpos_sin": np.s_[8:16],
+            "joint_qpos_cos": np.s_[16:24],
+            "joint_qvel": np.s_[24:32],
+            "joint_qeffort": np.s_[32:40],
+            "eef_0_pos": np.s_[40:43],
+            "eef_0_quat": np.s_[43:47],
+            "grasp_0": np.s_[47:48],
+            "gripper_0_qpos": np.s_[48:50],
+            "gripper_0_qvel": np.s_[50:52],
+        }
+    ),
+    "R1Pro": OrderedDict(
+        {
+            "joint_qpos": np.s_[
+                0:28
+            ],  # Full robot joint positions, the first 6 are base joints, which is NOT allowed in standard track
+            "joint_qpos_sin": np.s_[
+                28:56
+            ],  # Full robot joint positions, the first 6 are base joints, which is NOT allowed in standard track
+            "joint_qpos_cos": np.s_[
+                56:84
+            ],  # Full robot joint positions, the first 6 are base joints, which is NOT allowed in standard track
+            "joint_qvel": np.s_[84:112],
+            "joint_qeffort": np.s_[112:140],
+            "robot_pos": np.s_[140:143],  # Global pos, this is NOT allowed in standard track
+            "robot_ori_cos": np.s_[143:146],  # Global ori, this is NOT allowed in standard track
+            "robot_ori_sin": np.s_[146:149],  # Global ori, this is NOT allowed in standard track
+            "robot_2d_ori": np.s_[149:150],  # 2D global ori, this is NOT allowed in standard track
+            "robot_2d_ori_cos": np.s_[150:151],  # 2D global ori, this is NOT allowed in standard track
+            "robot_2d_ori_sin": np.s_[151:152],  # 2D global ori, this is NOT allowed in standard track
+            "robot_lin_vel": np.s_[152:155],
+            "robot_ang_vel": np.s_[155:158],
+            "arm_left_qpos": np.s_[158:165],
+            "arm_left_qpos_sin": np.s_[165:172],
+            "arm_left_qpos_cos": np.s_[172:179],
+            "arm_left_qvel": np.s_[179:186],
+            "eef_left_pos": np.s_[186:189],
+            "eef_left_quat": np.s_[189:193],
+            "gripper_left_qpos": np.s_[193:195],
+            "gripper_left_qvel": np.s_[195:197],
+            "arm_right_qpos": np.s_[197:204],
+            "arm_right_qpos_sin": np.s_[204:211],
+            "arm_right_qpos_cos": np.s_[211:218],
+            "arm_right_qvel": np.s_[218:225],
+            "eef_right_pos": np.s_[225:228],
+            "eef_right_quat": np.s_[228:232],
+            "gripper_right_qpos": np.s_[232:234],
+            "gripper_right_qvel": np.s_[234:236],
+            "trunk_qpos": np.s_[236:240],
+            "trunk_qvel": np.s_[240:244],
+            "base_qpos": np.s_[244:247],  # Base joint position, this is NOT allowed in standard track
+            "base_qpos_sin": np.s_[247:250],  # Base joint position, this is NOT allowed in standard track
+            "base_qpos_cos": np.s_[250:253],  # Base joint position, this is NOT allowed in standard track
+            "base_qvel": np.s_[253:256],
+        }
+    ),
+}
+
+# Proprioception indices
+PROPRIO_QPOS_INDICES = {
+    "A1": OrderedDict(
+        {
+            "arm": np.s_[0:6],
+            "gripper": np.s_[6:8],
+        }
+    ),
+    "R1Pro": OrderedDict(
+        {
+            "torso": np.s_[6:10],
+            "left_arm": np.s_[10:24:2],
+            "right_arm": np.s_[11:24:2],
+            "left_gripper": np.s_[24:26],
+            "right_gripper": np.s_[26:28],
+        }
+    ),
+}
+
+
+# Joint limits (lower, upper)
+JOINT_RANGE = {
+    "A1": {
+        "arm": (
+            th.tensor([-2.8798, 0.0, -3.3161, -2.8798, -1.6581, -2.8798], dtype=th.float32),
+            th.tensor([2.8798, 3.1415, 0.0, 2.8798, 1.6581, 2.8798], dtype=th.float32),
+        ),
+        "gripper": (th.tensor([0.00], dtype=th.float32), th.tensor([0.03], dtype=th.float32)),
+    },
+    "R1Pro": {
+        "base": (
+            th.tensor([-0.75, -0.75, -1.0], dtype=th.float32),
+            th.tensor([0.75, 0.75, 1.0], dtype=th.float32),
+        ),
+        "torso": (
+            th.tensor([-1.1345, -2.7925, -1.8326, -3.0543], dtype=th.float32),
+            th.tensor([1.8326, 2.5307, 1.5708, 3.0543], dtype=th.float32),
+        ),
+        "left_arm": (
+            th.tensor([-4.4506, -0.1745, -2.3562, -2.0944, -2.3562, -1.0472, -1.5708], dtype=th.float32),
+            th.tensor([1.3090, 3.1416, 2.3562, 0.3491, 2.3562, 1.0472, 1.5708], dtype=th.float32),
+        ),
+        "left_gripper": (th.tensor([0.00], dtype=th.float32), th.tensor([0.05], dtype=th.float32)),
+        "right_arm": (
+            th.tensor([-4.4506, -3.1416, -2.3562, -2.0944, -2.3562, -1.0472, -1.5708], dtype=th.float32),
+            th.tensor([1.3090, 0.1745, 2.3562, 0.3491, 2.3562, 1.0472, 1.5708], dtype=th.float32),
+        ),
+        "right_gripper": (th.tensor([0.00], dtype=th.float32), th.tensor([0.05], dtype=th.float32)),
+    },
+}
+
+
+EEF_POSITION_RANGE = {
+    "A1": {
+        "0": (th.tensor([0.0, -0.7, 0.0], dtype=th.float32), th.tensor([0.7, 0.7, 0.7], dtype=th.float32)),
+    },
+    "R1Pro": {
+        "left": (
+            th.tensor([0.0, -0.65, 0.0], dtype=th.float32),
+            th.tensor([0.65, 0.65, 2.5], dtype=th.float32),
+        ),
+        "right": (
+            th.tensor([0.0, -0.65, 0.0], dtype=th.float32),
+            th.tensor([0.65, 0.65, 2.5], dtype=th.float32),
+        ),
+    },
+}
+
+
+TASK_NAMES_TO_INDICES = {
+    # B10
+    "turning_on_radio": 0,
+    "picking_up_trash": 1,
+    "putting_away_Halloween_decorations": 2,
+    "cleaning_up_plates_and_food": 3,
+    "can_meat": 4,
+    "setting_mousetraps": 5,
+    "hiding_Easter_eggs": 6,
+    "picking_up_toys": 7,
+    "rearranging_kitchen_furniture": 8,
+    "putting_up_Christmas_decorations_inside": 9,
+    # B20
+    "set_up_a_coffee_station_in_your_kitchen": 10,
+    "putting_dishes_away_after_cleaning": 11,
+    "preparing_lunch_box": 12,
+    "loading_the_car": 13,
+    "carrying_in_groceries": 14,
+    "bringing_in_wood": 15,
+    "moving_boxes_to_storage": 16,
+    "bringing_water": 17,
+    "tidying_bedroom": 18,
+    "outfit_a_basic_toolbox": 19,
+    # B30
+    "sorting_vegetables": 20,
+    "collecting_childrens_toys": 21,
+    "putting_shoes_on_rack": 22,
+    "boxing_books_up_for_storage": 23,
+    "storing_food": 24,
+    "clearing_food_from_table_into_fridge": 25,
+    "assembling_gift_baskets": 26,
+    "sorting_household_items": 27,
+    "getting_organized_for_work": 28,
+    "clean_up_your_desk": 29,
+    # B40
+    "setting_the_fire": 30,
+    "clean_boxing_gloves": 31,
+    "wash_a_baseball_cap": 32,
+    "wash_dog_toys": 33,
+    "hanging_pictures": 34,
+    "attach_a_camera_to_a_tripod": 35,
+    "clean_a_patio": 36,
+    "clean_a_trumpet": 37,
+    "spraying_for_bugs": 38,
+    "spraying_fruit_trees": 39,
+    # B50
+    "make_microwave_popcorn": 40,
+    "cook_cabbage": 41,
+    "chop_an_onion": 42,
+    "slicing_vegetables": 43,
+    "chopping_wood": 44,
+    "cook_hot_dogs": 45,
+    "cook_bacon": 46,
+    "freeze_pies": 47,
+    "canning_food": 48,
+    "make_pizza": 49,
+}
+TASK_INDICES_TO_NAMES = {v: k for k, v in TASK_NAMES_TO_INDICES.items()}
@@ -0,0 +1,605 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Convert Behavior Dataset to LeRobotDataset v3.0 format"""
+
+import argparse
+import json
+import logging
+import shutil
+from pathlib import Path
+
+import jsonlines
+import pandas as pd
+import pyarrow as pa
+import tqdm
+from datasets import Dataset, Features, Image
+
+from lerobot.datasets.compute_stats import aggregate_stats
+from lerobot.datasets.utils import (
+    DEFAULT_CHUNK_SIZE,
+    DEFAULT_DATA_FILE_SIZE_IN_MB,
+    DEFAULT_DATA_PATH,
+    DEFAULT_VIDEO_FILE_SIZE_IN_MB,
+    DEFAULT_VIDEO_PATH,
+    LEGACY_EPISODES_PATH,
+    LEGACY_EPISODES_STATS_PATH,
+    LEGACY_TASKS_PATH,
+    cast_stats_to_numpy,
+    flatten_dict,
+    get_file_size_in_mb,
+    get_parquet_file_size_in_mb,
+    get_parquet_num_frames,
+    load_info,
+    update_chunk_file_indices,
+    write_episodes,
+    write_info,
+    write_stats,
+    write_tasks,
+)
+from lerobot.datasets.video_utils import concatenate_video_files, get_video_duration_in_s
+from lerobot.utils.utils import init_logging
+
+# script to convert one single task to v3.1
+# TASK = 1
+NEW_ROOT = Path("/fsx/jade_choghari/tmp/bb")
+
+
+def get_total_episodes_task(local_dir: Path, task_id: int, task_ranges: dict, step) -> int:
+    """
+    Calculates the total number of episodes for a single, specified task.
+    """
+    # Simply load the episodes for the task and count them.
+    episodes = legacy_load_episodes_task(
+        local_dir=local_dir, task_id=task_id, task_ranges=task_ranges, step=step
+    )
+    return len(episodes)
+
+
+NUM_CAMERAS = 9
+
+
+def get_total_frames_task(local_dir, meta_path, task_id: int, task_ranges: dict, step: int) -> int:
+    episodes_metadata = legacy_load_episodes_task(
+        local_dir=local_dir, task_id=task_id, task_ranges=task_ranges, step=step
+    )
+    total_frames = 0
+    # like 'duration'
+    for ep in episodes_metadata.values():
+        duration_s = ep["length"]
+        total_frames += int(duration_s)
+    return total_frames
+
+
+def convert_info(
+    root, new_root, data_file_size_in_mb, video_file_size_in_mb, meta_path, task_id: int, task_ranges, step
+):
+    info = load_info(root)
+    info["codebase_version"] = "v3.0"
+    del info["total_videos"]
+    info["data_files_size_in_mb"] = data_file_size_in_mb
+    info["video_files_size_in_mb"] = video_file_size_in_mb
+    info["data_path"] = DEFAULT_DATA_PATH
+    info["video_path"] = DEFAULT_VIDEO_PATH if info["video_path"] is not None else None
+    info["fps"] = int(info["fps"])
+    for key in info["features"]:
+        if info["features"][key]["dtype"] == "video":
+            # already has fps in video_info
+            continue
+        info["features"][key]["fps"] = info["fps"]
+
+    info["total_episodes"] = get_total_episodes_task(root, task_id, task_ranges, step)
+    info["total_videos"] = info["total_episodes"] * NUM_CAMERAS
+    info["total_frames"] = get_total_frames_task(root, meta_path, task_id, task_ranges, step)
+    info["total_tasks"] = 1
+    write_info(info, new_root)
+
+
+def load_jsonlines(fpath: Path) -> list[any]:
+    with jsonlines.open(fpath, "r") as reader:
+        return list(reader)
+
+
+def legacy_load_tasks(local_dir: Path) -> tuple[dict, dict]:
+    tasks = load_jsonlines(local_dir / LEGACY_TASKS_PATH)
+    # return tasks dict such that
+    tasks = {item["task_index"]: item["task"] for item in sorted(tasks, key=lambda x: x["task_index"])}
+    task_to_task_index = {task: task_index for task_index, task in tasks.items()}
+    return tasks, task_to_task_index
+
+
+def convert_tasks(root, new_root, task_id: int):
+    tasks, _ = legacy_load_tasks(root)
+    if task_id not in tasks:
+        raise ValueError(f"Task ID {task_id} not found in tasks (available: {list(tasks.keys())})")
+    tasks = {task_id: tasks[task_id]}
+    task_indices = tasks.keys()
+    task_strings = tasks.values()
+    df_tasks = pd.DataFrame({"task_index": task_indices}, index=task_strings)
+    write_tasks(df_tasks, new_root)
+
+
+def concat_data_files(paths_to_cat, new_root, chunk_idx, file_idx, image_keys):
+    # TODO(rcadene): to save RAM use Dataset.from_parquet(file) and concatenate_datasets
+    dataframes = [pd.read_parquet(file) for file in paths_to_cat]
+    # Concatenate all DataFrames along rows
+    concatenated_df = pd.concat(dataframes, ignore_index=True)
+
+    path = new_root / DEFAULT_DATA_PATH.format(chunk_index=chunk_idx, file_index=file_idx)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    if len(image_keys) > 0:
+        schema = pa.Schema.from_pandas(concatenated_df)
+        features = Features.from_arrow_schema(schema)
+        for key in image_keys:
+            features[key] = Image()
+        schema = features.arrow_schema
+    else:
+        schema = None
+
+    concatenated_df.to_parquet(path, index=False, schema=schema)
+
+
+def get_image_keys(root):
+    info = load_info(root)
+    features = info["features"]
+    image_keys = [key for key, ft in features.items() if ft["dtype"] == "image"]
+    return image_keys
+
+
+def convert_data(root: Path, new_root: Path, data_file_size_in_mb: int, task_index: int):
+    task_dir_name = f"task-00{task_index}"
+    data_dir = root / "data" / task_dir_name
+    ep_paths = sorted(data_dir.glob("*.parquet"))
+    image_keys = get_image_keys(root)
+
+    ep_idx = 0
+    chunk_idx = 0
+    file_idx = 0
+    size_in_mb = 0
+    num_frames = 0
+    paths_to_cat = []
+    episodes_metadata = []
+
+    logging.info(f"Converting data files from {len(ep_paths)} episodes")
+
+    for ep_path in tqdm.tqdm(ep_paths, desc="convert data files"):
+        ep_size_in_mb = get_parquet_file_size_in_mb(ep_path)
+        ep_num_frames = get_parquet_num_frames(ep_path)
+        ep_metadata = {
+            "episode_index": ep_idx,
+            "data/chunk_index": chunk_idx,
+            "data/file_index": file_idx,
+            "dataset_from_index": num_frames,
+            "dataset_to_index": num_frames + ep_num_frames,
+        }
+        size_in_mb += ep_size_in_mb
+        num_frames += ep_num_frames
+        episodes_metadata.append(ep_metadata)
+        ep_idx += 1
+
+        if size_in_mb < data_file_size_in_mb:
+            paths_to_cat.append(ep_path)
+            continue
+
+        if paths_to_cat:
+            concat_data_files(paths_to_cat, new_root, chunk_idx, file_idx, image_keys)
+
+        # Reset for the next file
+        size_in_mb = ep_size_in_mb
+        paths_to_cat = [ep_path]
+
+        chunk_idx, file_idx = update_chunk_file_indices(chunk_idx, file_idx, DEFAULT_CHUNK_SIZE)
+
+    # Write remaining data if any
+    if paths_to_cat:
+        concat_data_files(paths_to_cat, new_root, chunk_idx, file_idx, image_keys)
+
+    return episodes_metadata
+
+
+def convert_videos_of_camera(
+    root: Path, new_root: Path, video_key: str, video_file_size_in_mb: int, task_index: int
+):
+    # Access old paths to mp4
+    # videos_dir = root / "videos"
+    # ep_paths = sorted(videos_dir.glob(f"*/{video_key}/*.mp4"))
+    task_dir_name = f"task-00{task_index}"
+    videos_dir = root / "videos" / task_dir_name / video_key
+    ep_paths = sorted(videos_dir.glob("*.mp4"))
+    print("ep_paths", ep_paths)
+    ep_idx = 0
+    chunk_idx = 0
+    file_idx = 0
+    size_in_mb = 0
+    duration_in_s = 0.0
+    paths_to_cat = []
+    episodes_metadata = []
+
+    for ep_path in tqdm.tqdm(ep_paths, desc=f"convert videos of {video_key}"):
+        ep_size_in_mb = get_file_size_in_mb(ep_path)
+        ep_duration_in_s = get_video_duration_in_s(ep_path)
+
+        # Check if adding this episode would exceed the limit
+        if size_in_mb + ep_size_in_mb >= video_file_size_in_mb and len(paths_to_cat) > 0:
+            # Size limit would be exceeded, save current accumulation WITHOUT this episode
+            concatenate_video_files(
+                paths_to_cat,
+                new_root
+                / DEFAULT_VIDEO_PATH.format(video_key=video_key, chunk_index=chunk_idx, file_index=file_idx),
+            )
+
+            # Update episodes metadata for the file we just saved
+            for i, _ in enumerate(paths_to_cat):
+                past_ep_idx = ep_idx - len(paths_to_cat) + i
+                episodes_metadata[past_ep_idx][f"videos/{video_key}/chunk_index"] = chunk_idx
+                episodes_metadata[past_ep_idx][f"videos/{video_key}/file_index"] = file_idx
+
+            # Move to next file and start fresh with current episode
+            chunk_idx, file_idx = update_chunk_file_indices(chunk_idx, file_idx, DEFAULT_CHUNK_SIZE)
+            size_in_mb = 0
+            duration_in_s = 0.0
+            paths_to_cat = []
+
+        # Add current episode metadata
+        ep_metadata = {
+            "episode_index": ep_idx,
+            f"videos/{video_key}/chunk_index": chunk_idx,  # Will be updated when file is saved
+            f"videos/{video_key}/file_index": file_idx,  # Will be updated when file is saved
+            f"videos/{video_key}/from_timestamp": duration_in_s,
+            f"videos/{video_key}/to_timestamp": duration_in_s + ep_duration_in_s,
+        }
+        episodes_metadata.append(ep_metadata)
+
+        # Add current episode to accumulation
+        paths_to_cat.append(ep_path)
+        size_in_mb += ep_size_in_mb
+        duration_in_s += ep_duration_in_s
+        ep_idx += 1
+
+    # Write remaining videos if any
+    if paths_to_cat:
+        concatenate_video_files(
+            paths_to_cat,
+            new_root
+            / DEFAULT_VIDEO_PATH.format(video_key=video_key, chunk_index=chunk_idx, file_index=file_idx),
+        )
+
+        # Update episodes metadata for the final file
+        for i, _ in enumerate(paths_to_cat):
+            past_ep_idx = ep_idx - len(paths_to_cat) + i
+            episodes_metadata[past_ep_idx][f"videos/{video_key}/chunk_index"] = chunk_idx
+            episodes_metadata[past_ep_idx][f"videos/{video_key}/file_index"] = file_idx
+
+    return episodes_metadata
+
+
+def get_video_keys(root):
+    info = load_info(root)
+    features = info["features"]
+    video_keys = [key for key, ft in features.items() if ft["dtype"] == "video"]
+    return video_keys
+
+
+def convert_videos(root: Path, new_root: Path, video_file_size_in_mb: int, task_id: int):
+    logging.info(f"Converting videos from {root} to {new_root}")
+
+    video_keys = get_video_keys(root)
+    if len(video_keys) == 0:
+        return None
+
+    video_keys = sorted(video_keys)
+
+    eps_metadata_per_cam = []
+    for camera in video_keys:
+        eps_metadata = convert_videos_of_camera(root, new_root, camera, video_file_size_in_mb, task_id)
+        eps_metadata_per_cam.append(eps_metadata)
+
+    num_eps_per_cam = [len(eps_cam_map) for eps_cam_map in eps_metadata_per_cam]
+    if len(set(num_eps_per_cam)) != 1:
+        raise ValueError(f"All cams dont have same number of episodes ({num_eps_per_cam}).")
+
+    episods_metadata = []
+    num_cameras = len(video_keys)
+    num_episodes = num_eps_per_cam[0]
+    for ep_idx in tqdm.tqdm(range(num_episodes), desc="convert videos"):
+        # Sanity check
+        ep_ids = [eps_metadata_per_cam[cam_idx][ep_idx]["episode_index"] for cam_idx in range(num_cameras)]
+        ep_ids += [ep_idx]
+        if len(set(ep_ids)) != 1:
+            raise ValueError(f"All episode indices need to match ({ep_ids}).")
+
+        ep_dict = {}
+        for cam_idx in range(num_cameras):
+            ep_dict.update(eps_metadata_per_cam[cam_idx][ep_idx])
+        episods_metadata.append(ep_dict)
+
+    return episods_metadata
+
+
+def infer_task_episode_ranges(episodes_jsonl_path: Path) -> dict:
+    """
+    Parse the Behavior-1K episodes.jsonl metadata and infer contiguous episode ranges per unique task.
+    Returns a dict:
+      { task_id: { "task_string": ..., "ep_start": ..., "ep_end": ... } }
+    """
+    task_ranges = {}
+    task_id = 0
+    current_task_str = None
+    ep_start = None
+    ep_end = None
+
+    with open(episodes_jsonl_path) as f:
+        for line in f:
+            if not line.strip():
+                continue
+            ep = json.loads(line)
+            ep_idx = ep["episode_index"]
+            task_str = ep["tasks"][0] if ep["tasks"] else "UNKNOWN"
+
+            if current_task_str is None:
+                current_task_str = task_str
+                ep_start = ep_idx
+                ep_end = ep_idx
+            elif task_str == current_task_str:
+                ep_end = ep_idx
+            else:
+                # close previous task group
+                task_ranges[task_id] = {
+                    "task_string": current_task_str,
+                    "ep_start": ep_start,
+                    "ep_end": ep_end,
+                }
+                task_id += 1
+                # start new one
+                current_task_str = task_str
+                ep_start = ep_idx
+                ep_end = ep_idx
+
+    # store last task
+    if current_task_str is not None:
+        task_ranges[task_id] = {
+            "task_string": current_task_str,
+            "ep_start": ep_start,
+            "ep_end": ep_end,
+        }
+
+    return task_ranges
+
+
+def legacy_load_episodes_task(local_dir: Path, task_id: int, task_ranges: dict, step: int = 10) -> dict:
+    """
+    Load only the episodes belonging to a specific task, inferred automatically from episode ranges.
+
+    Args:
+        local_dir (Path): Root path containing legacy meta/episodes.jsonl
+        task_id (int): Which task to load (key from the inferred task_ranges dict)
+        task_ranges (dict): Mapping from infer_task_episode_ranges()
+        step (int): Episode index step (Behavior-1K = 10)
+    """
+    all_episodes = legacy_load_episodes(local_dir)
+
+    # get the range for this task
+    if task_id not in task_ranges:
+        raise ValueError(f"Task id {task_id} not found in task_ranges")
+
+    ep_start = task_ranges[task_id]["ep_start"]
+    ep_end = task_ranges[task_id]["ep_end"]
+
+    task_episode_indices = range(ep_start, ep_end + step, step)
+    return {i: all_episodes[i] for i in task_episode_indices if i in all_episodes}
+
+
+def legacy_load_episodes(local_dir: Path) -> dict:
+    episodes = load_jsonlines(local_dir / LEGACY_EPISODES_PATH)
+    return {item["episode_index"]: item for item in sorted(episodes, key=lambda x: x["episode_index"])}
+
+
+def legacy_load_episodes_stats(local_dir: Path) -> dict:
+    episodes_stats = load_jsonlines(local_dir / LEGACY_EPISODES_STATS_PATH)
+    return {
+        item["episode_index"]: cast_stats_to_numpy(item["stats"])
+        for item in sorted(episodes_stats, key=lambda x: x["episode_index"])
+    }
+
+
+def legacy_load_episodes_stats_task(local_dir: Path, task_id: int, task_ranges: dict, step: int = 10) -> dict:
+    all_stats = legacy_load_episodes_stats(local_dir)
+
+    if task_id not in task_ranges:
+        raise ValueError(f"Task id {task_id} not found in task_ranges")
+
+    ep_start = task_ranges[task_id]["ep_start"]
+    ep_end = task_ranges[task_id]["ep_end"]
+
+    task_episode_indices = range(ep_start, ep_end + step, step)
+    return {i: all_stats[i] for i in task_episode_indices if i in all_stats}
+
+
+def generate_episode_metadata_dict(
+    episodes_legacy_metadata, episodes_metadata, episodes_stats, episodes_videos=None
+):
+    num_episodes = len(episodes_metadata)
+    episodes_legacy_metadata_vals = list(episodes_legacy_metadata.values())
+    episodes_stats_vals = list(episodes_stats.values())
+    episodes_stats_keys = list(episodes_stats.keys())
+
+    for i in range(num_episodes):
+        ep_legacy_metadata = episodes_legacy_metadata_vals[i]
+        ep_metadata = episodes_metadata[i]
+        ep_stats = episodes_stats_vals[i]
+
+        ep_ids_set = {
+            ep_legacy_metadata["episode_index"],
+            ep_metadata["episode_index"],
+            episodes_stats_keys[i],
+        }
+
+        if episodes_videos is None:
+            ep_video = {}
+        else:
+            ep_video = episodes_videos[i]
+            ep_ids_set.add(ep_video["episode_index"])
+        # we skip this check because ep_ids have a step of 10, whereas we convert with a step of 1
+        # if len(ep_ids_set) != 1:
+        #     raise ValueError(f"Number of episodes is not the same ({ep_ids_set}).")
+
+        ep_dict = {**ep_metadata, **ep_video, **ep_legacy_metadata, **flatten_dict({"stats": ep_stats})}
+        ep_dict["meta/episodes/chunk_index"] = 0
+        ep_dict["meta/episodes/file_index"] = 0
+        yield ep_dict
+
+
+def convert_episodes_metadata(
+    root, new_root, episodes_metadata, task_id: int, task_ranges, episodes_video_metadata=None
+):
+    logging.info(f"Converting episodes metadata from {root} to {new_root}")
+
+    # filter by task
+    episodes_legacy_metadata = legacy_load_episodes_task(root, task_id=task_id, task_ranges=task_ranges)
+    episodes_stats = legacy_load_episodes_stats_task(root, task_id=task_id, task_ranges=task_ranges)
+
+    num_eps_set = {len(episodes_legacy_metadata), len(episodes_metadata)}
+    if episodes_video_metadata is not None:
+        num_eps_set.add(len(episodes_video_metadata))
+
+    if len(num_eps_set) != 1:
+        raise ValueError(f"Number of episodes is not the same ({num_eps_set}).")
+
+    ds_episodes = Dataset.from_generator(
+        lambda: generate_episode_metadata_dict(
+            episodes_legacy_metadata, episodes_metadata, episodes_stats, episodes_video_metadata
+        )
+    )
+    write_episodes(ds_episodes, new_root)
+
+    stats = aggregate_stats(list(episodes_stats.values()))
+    write_stats(stats, new_root)
+
+
+def convert_dataset_local(
+    data_path: Path,
+    new_repo: Path,
+    task_id: int,
+    data_file_size_in_mb: int = DEFAULT_DATA_FILE_SIZE_IN_MB,
+    video_file_size_in_mb: int = DEFAULT_VIDEO_FILE_SIZE_IN_MB,
+    force_conversion: bool = False,
+):
+    """
+    Convert a local dataset to v3.x format, task-by-task, without using the Hugging Face Hub.
+
+    Args:
+        data_path (Path): path to local dataset root (e.g. /fsx/.../2025-challenge-demos)
+        new_repo (Path): path where converted dataset will be written (e.g. /fsx/.../behavior1k_v3)
+        task_id (int): which task to convert (index)
+        data_file_size_in_mb (int): max size per data chunk
+        video_file_size_in_mb (int): max size per video chunk
+        force_conversion (bool): overwrite existing conversion if True
+    """
+
+    root = Path(data_path)
+    new_root = Path(new_repo)
+
+    # Clean up if needed
+    if new_root.exists() and force_conversion:
+        shutil.rmtree(new_root)
+    new_root.mkdir(parents=True, exist_ok=True)
+
+    print(f"🔹 Starting conversion for task {task_id}")
+    print(f"Input root: {root}")
+    print(f"Output root: {new_root}")
+    # Infer task episode ranges
+    episodes_meta_path = root / "meta" / "episodes.jsonl"
+    task_ranges = infer_task_episode_ranges(episodes_meta_path)
+    convert_info(
+        root,
+        new_root,
+        data_file_size_in_mb,
+        video_file_size_in_mb,
+        episodes_meta_path,
+        task_id,
+        task_ranges,
+        step=10,
+    )
+    convert_tasks(root, new_root, task_id)
+    episodes_metadata = convert_data(root, new_root, data_file_size_in_mb, task_index=task_id)
+    episodes_videos_metadata = convert_videos(root, new_root, video_file_size_in_mb, task_id=task_id)
+    convert_episodes_metadata(
+        root,
+        new_root,
+        episodes_metadata,
+        task_id=task_id,
+        task_ranges=task_ranges,
+        episodes_video_metadata=episodes_videos_metadata,
+    )
+
+    print(f"✅ Conversion complete for task {task_id}")
+    print(f"Converted dataset written to: {new_root}")
+
+
+if __name__ == "__main__":
+    import argparse
+    from pathlib import Path
+
+    init_logging()
+
+    parser = argparse.ArgumentParser(
+        description="Convert Behavior-1K tasks to LeRobot v3 format (local only)"
+    )
+    parser.add_argument(
+        "--data-path",
+        type=str,
+        required=True,
+        help="Path to the local Behavior-1K dataset (e.g. /fsx/francesco_capuano/.cache/behavior-1k/2025-challenge-demos)",
+    )
+    parser.add_argument(
+        "--new-repo",
+        type=str,
+        required=True,
+        help="Path to the output directory for the converted dataset",
+    )
+    parser.add_argument(
+        "--task-id",
+        type=int,
+        required=True,
+        help="Task index to convert (e.g. 0, 1, 2, ...)",
+    )
+    parser.add_argument(
+        "--data-file-size-in-mb",
+        type=int,
+        default=DEFAULT_DATA_FILE_SIZE_IN_MB,
+        help=f"Maximum size per data chunk (default: {DEFAULT_DATA_FILE_SIZE_IN_MB})",
+    )
+    parser.add_argument(
+        "--video-file-size-in-mb",
+        type=int,
+        default=DEFAULT_VIDEO_FILE_SIZE_IN_MB,
+        help=f"Maximum size per video chunk (default: {DEFAULT_VIDEO_FILE_SIZE_IN_MB})",
+    )
+    parser.add_argument(
+        "--force-conversion",
+        action="store_true",
+        help="Force overwrite of existing conversion output if present.",
+    )
+
+    args = parser.parse_args()
+
+    convert_dataset_local(
+        data_path=Path(args.data_path),
+        new_repo=Path(args.new_repo),
+        task_id=args.task_id,
+        data_file_size_in_mb=args.data_file_size_in_mb,
+        video_file_size_in_mb=args.video_file_size_in_mb,
+        force_conversion=args.force_conversion,
+    )
@@ -0,0 +1,130 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Test script to verify BEHAVIOR-1K dataset loading with v3.0 wrapper.
+"""
+
+import argparse
+import logging
+
+from behavior_lerobot_dataset_v3 import BehaviorLeRobotDatasetV3
+
+from lerobot.utils.utils import init_logging
+
+init_logging()
+
+
+def load_behavior1k_dataset(repo_id, root):
+    """Test basic dataset loading."""
+    logging.info("=" * 80)
+    logging.info("Testing BEHAVIOR-1K dataset loading")
+    logging.info("=" * 80)
+
+    logging.info(f"\n1. Loading dataset with repo_id: {repo_id}")
+    dataset = BehaviorLeRobotDatasetV3(
+        repo_id=repo_id,
+        root=root,
+        modalities=["rgb"],
+        cameras=["head"],
+        chunk_streaming_using_keyframe=False,
+        check_timestamp_sync=False,
+    )
+
+    logging.info("\n2. Dataset loaded successfully!")
+    logging.info(f"   - Number of episodes: {dataset.num_episodes}")
+    logging.info(f"   - Number of frames: {dataset.num_frames}")
+    logging.info(f"   - FPS: {dataset.fps}")
+    logging.info(f"   - Features: {list(dataset.features)}")
+
+    return dataset
+
+
+def load_behavior1k_dataset_with_multiple_modalities(repo_id, root):
+    """Test loading multiple modalities and cameras."""
+    logging.info("\n" + "=" * 80)
+    logging.info("Testing multi-modality loading with repo_id: {repo_id}")
+    logging.info("=" * 80)
+
+    logging.info(f"\n1. Loading dataset with RGB + Depth with repo_id: {repo_id}")
+    dataset = BehaviorLeRobotDatasetV3(
+        repo_id=repo_id,
+        root=root,
+        modalities=["rgb", "depth"],
+        cameras=["head", "left_wrist", "right_wrist"],
+        chunk_streaming_using_keyframe=False,
+        check_timestamp_sync=False,
+        video_backend="pyav",
+    )
+
+    logging.info(f"\n2. Dataset loaded with modalities: {list(dataset.features)}")
+    logging.info(f"   - Total features: {len(dataset.features)}")
+
+    rgb_keys = [k for k in dataset.features if "rgb" in k]
+    depth_keys = [k for k in dataset.features if "depth" in k]
+    logging.info(f"   - RGB features: {rgb_keys}")
+    logging.info(f"   - Depth features: {depth_keys}")
+
+    logging.info("\n3. SUCCESS! Multi-modality loading works.")
+
+    return dataset
+
+
+def stream_behavior1k_dataset(repo_id, root):
+    """Test chunk streaming mode."""
+    logging.info("\n" + "=" * 80)
+    logging.info("Testing chunk streaming mode")
+    logging.info("=" * 80)
+
+    logging.info("\n1. Loading dataset with chunk streaming...")
+    dataset = BehaviorLeRobotDatasetV3(
+        repo_id=repo_id,
+        root=root,
+        modalities=["rgb"],
+        cameras=["head"],
+        chunk_streaming_using_keyframe=True,
+        shuffle=True,
+        seed=42,
+        check_timestamp_sync=False,
+    )
+
+    logging.info("\n2. Dataset loaded in streaming mode")
+    logging.info(f"   - Number of chunks: {len(dataset.chunks)}")
+    logging.info(f"   - First chunk range: {dataset.chunks[0]}")
+
+    logging.info("\n3. Testing frame access in streaming mode...")
+    for i in range(min(3, len(dataset))):
+        frame = dataset[i]
+        logging.info(
+            f"   - Frame {i}: episode_index={frame['episode_index'].item()}, "
+            f"task_index={frame['task_index'].item()}"
+        )
+
+    logging.info("\n4. SUCCESS! Chunk streaming works.")
+
+    return dataset
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--repo-id", type=str, default=None)
+    parser.add_argument("--root", type=str, default=None)
+
+    args = parser.parse_args()
+
+    load_behavior1k_dataset(args.repo_id, args.root)
+    load_behavior1k_dataset_with_multiple_modalities(args.repo_id, args.root)
+    stream_behavior1k_dataset(args.repo_id, args.root)
Author	SHA1	Message	Date
Jade Choghari	03cce79c88	Merge branch 'main' into feat/behavior-1k	2025-12-04 18:50:56 +01:00
Michel Aractingi	3918ab7882	Merge branch 'main' into feat/behavior-1k	2025-11-03 13:28:31 +01:00
Michel Aractingi	65b0e73ae4	* refactor behaviour1k_lerobot_dataset.py * add example scripts to load behaviour 1k data in `load_behaviour1k_dataset.py`	2025-11-03 12:23:12 +00:00
Jade Choghari	ca7c5fcdfe	remove tester	2025-10-30 18:14:09 +01:00
Jade Choghari	28f8098df4	fix style	2025-10-30 18:12:50 +01:00
Jade Choghari	db7d501281	remove comments	2025-10-30 18:12:06 +01:00
Jade Choghari	88380fe34e	update changes	2025-10-30 18:11:27 +01:00
Jade Choghari	154abfd233	update Signed-off-by: Jade Choghari <chogharijade@gmail.com>	2025-10-27 17:52:21 +01:00
Jade Choghari	dc14266762	add Signed-off-by: Jade Choghari <chogharijade@gmail.com>	2025-10-27 16:44:58 +01:00
Michel Aractingi	fd623e0cc5	Modify convert_to_lerobot_v3 script for behaviours dataset to take a single task id and create a dataset outof it	2025-10-24 17:06:21 +02:00
Michel Aractingi	a52e88d349	add scripts for convert behavior-1k to datasetv3	2025-10-24 14:17:30 +02:00