Modify convert_to_lerobot_v3 script for behaviours dataset to take a single task id and create a dataset outof it

2026-05-21 11:39:50 +00:00 · 2025-10-24 17:06:21 +02:00
parent 950a6fb83d
commit 9f00d2c3a2
3 changed files with 203 additions and 238 deletions
@@ -1,46 +1,49 @@
 #!/usr/bin/env python

 import json
+import logging
+from pathlib import Path
+from typing import Any
+
 import numpy as np
 import torch as th
-from pathlib import Path
-from typing import Dict, Any

 from lerobot.datasets.lerobot_dataset import LeRobotDataset
-
-from .behaviour_1k_constants import (
-    TASK_INDICES_TO_NAMES,
-    ROBOT_CAMERA_NAMES,
-    PROPRIOCEPTION_INDICES,
-    BEHAVIOR_DATASET_FEATURES,
-)
-
-import logging
 from lerobot.utils.utils import init_logging

+from .behaviour_1k_constants import (
+    PROPRIOCEPTION_INDICES,
+    ROBOT_CAMERA_NAMES,
+    TASK_INDICES_TO_NAMES,
+)
+
 init_logging()

+
 class BehaviorLeRobotDatasetV3(LeRobotDataset):
    """
    Extends LeRobotDataset v3.0 for BEHAVIOR-1K specific requirements.
    Handles task-based episode organization and BEHAVIOR-1K metadata.
    """
+
    @classmethod
    def create(
        cls,
        repo_id: str,
-        fps: int = 30,
+        fps: int,
+        features: dict,
        root: str | Path | None = None,
-        robot_type: str = "R1Pro",
+        robot_type: str | None = None,
        use_videos: bool = True,
-        video_backend: str = "pyav",
-        batch_encoding_size: int = 1,
+        tolerance_s: float = 1e-4,
        image_writer_processes: int = 0,
-        image_writer_threads: int = 4,
+        image_writer_threads: int = 0,
+        video_backend: str | None = None,
+        batch_encoding_size: int = 1,
    ) -> "BehaviorLeRobotDatasetV3":
        """
        Create a new BEHAVIOR-1K dataset in v3.0 format.
-        
+
        Args:
            repo_id: HuggingFace repository ID
            fps: Frames per second (default: 30)
@@ -51,7 +54,7 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
            batch_encoding_size: Number of episodes to batch before encoding videos
            image_writer_processes: Number of processes for async image writing
            image_writer_threads: Number of threads per process for image writing
-            
+
        Returns:
            BehaviorLeRobotDatasetV3 instance
        """
@@ -59,7 +62,7 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
        obj = super().create(
            repo_id=repo_id,
            fps=fps,
-            features=BEHAVIOR_DATASET_FEATURES,
+            features=features,
            root=root,
            robot_type=robot_type,
            use_videos=use_videos,
@@ -69,14 +72,14 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
            video_backend=video_backend,
            batch_encoding_size=batch_encoding_size,
        )
-        
+
        # Convert to BehaviorLeRobotDatasetV3 instance
        obj.__class__ = cls
-        
+
        # Initialize BEHAVIOR-1K specific attributes
        obj.task_episode_mapping = {}  # Maps task_id to list of episode indices
        obj.episode_task_mapping = {}  # Maps episode_index to task info
-        
+
        # Additional metadata for BEHAVIOR-1K
        obj.behavior_metadata = {
            "robot_type": robot_type,
@@ -84,41 +87,41 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
            "proprioception_indices": PROPRIOCEPTION_INDICES[robot_type],
            "camera_names": ROBOT_CAMERA_NAMES[robot_type],
        }
-        
+
        logging.info(f"Created BehaviorLeRobotDatasetV3 with repo_id: {repo_id}")
        return obj
-    
+
    def __init__(self, *args, **kwargs):
        """
        Initialize from existing dataset.
        Use the create() classmethod to create a new dataset.
        """
        super().__init__(*args, **kwargs)
-        
+
        # Initialize BEHAVIOR-1K specific attributes for loading existing datasets
        self.task_episode_mapping = {}
        self.episode_task_mapping = {}
        self.behavior_metadata = {}
-        
+
        # Try to load BEHAVIOR-1K metadata if it exists
        metadata_path = self.root / "meta" / "behavior_metadata.json"
        if metadata_path.exists():
-            with open(metadata_path, "r") as f:
+            with open(metadata_path) as f:
                stored_metadata = json.load(f)
                self.behavior_metadata = stored_metadata
                self.task_episode_mapping = stored_metadata.get("task_episode_mapping", {})
                self.episode_task_mapping = stored_metadata.get("episode_task_mapping", {})
-    
+
    def add_episode_from_hdf5(
        self,
-        hdf5_data: Dict[str, Any],
+        hdf5_data: dict[str, Any],
        task_id: int,
        episode_id: int,
        include_videos: bool = True,
    ) -> None:
        """
        Add an episode from HDF5 data to the dataset.
-        
+
        Args:
            hdf5_data: Dictionary containing the HDF5 episode data
            task_id: Task ID for this episode
@@ -127,9 +130,9 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
        """
        task_name = TASK_INDICES_TO_NAMES[task_id]
        num_frames = len(hdf5_data["action"])
-        
+
        logging.info(f"Adding episode {episode_id} (task: {task_name}) with {num_frames} frames")
-        
+
        # Process each frame
        for frame_idx in range(num_frames):
            frame_data = {
@@ -140,44 +143,44 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
                "task": task_name,
                "timestamp": frame_idx / self.fps,
            }
-            
+
            # Add video frames if requested
            if include_videos:
                for modality in ["rgb", "depth_linear", "seg_instance_id"]:
                    # Map depth_linear to depth for consistency
                    output_modality = "depth" if modality == "depth_linear" else modality
-                    
+
                    for camera_name, robot_camera_name in ROBOT_CAMERA_NAMES[self.robot_type].items():
                        key = f"observation.images.{output_modality}.{camera_name}"
                        hdf5_key = f"{robot_camera_name}::{modality}"
-                        
+
                        if hdf5_key in hdf5_data["obs"]:
                            # Get the frame data
                            frame = hdf5_data["obs"][hdf5_key][frame_idx]
-                            
+
                            # Handle different data types
                            if isinstance(frame, th.Tensor):
                                frame = frame.numpy()
-                            
+
                            # Ensure correct shape
                            if modality == "seg_instance_id" and len(frame.shape) == 2:
                                # Add channel dimension for grayscale
                                frame = np.expand_dims(frame, axis=-1)
                            elif modality == "depth_linear" and len(frame.shape) == 2:
                                frame = np.expand_dims(frame, axis=-1)
-                            
+
                            frame_data[key] = frame
-            
+
            # Add frame to dataset
            self.add_frame(frame_data)
-        
+
        # Save episode with metadata
        episode_metadata = {
            "task_id": task_id,
            "task_name": task_name,
            "original_episode_id": episode_id,
        }
-        
+
        # Add any additional HDF5 attributes as metadata
        if "attrs" in hdf5_data:
            for attr_name, attr_value in hdf5_data["attrs"].items():
@@ -185,10 +188,10 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
                    episode_metadata[attr_name] = list(attr_value)
                else:
                    episode_metadata[attr_name] = attr_value
-        
+
        # Save the episode
        self.save_episode(episode_data=None)
-        
+
        # Track task-episode mapping
        if task_id not in self.task_episode_mapping:
            self.task_episode_mapping[task_id] = []
@@ -198,26 +201,30 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
            "task_name": task_name,
            "original_episode_id": episode_id,
        }
-    
+
    def finalize(self) -> None:
        """Finalize the dataset and save additional BEHAVIOR-1K metadata."""
        # Save BEHAVIOR-1K specific metadata
        metadata_path = self.root / "meta" / "behavior_metadata.json"
        metadata_path.parent.mkdir(parents=True, exist_ok=True)
-        
-        self.behavior_metadata.update({
-            "task_episode_mapping": self.task_episode_mapping,
-            "episode_task_mapping": self.episode_task_mapping,
-            "total_tasks": len(self.task_episode_mapping),
-            "total_episodes": self.num_episodes,
-            "total_frames": self.num_frames,
-        })
-        
+
+        self.behavior_metadata.update(
+            {
+                "task_episode_mapping": self.task_episode_mapping,
+                "episode_task_mapping": self.episode_task_mapping,
+                "total_tasks": len(self.task_episode_mapping),
+                "total_episodes": self.num_episodes,
+                "total_frames": self.num_frames,
+            }
+        )
+
        with open(metadata_path, "w") as f:
            json.dump(self.behavior_metadata, f, indent=2)
-        
+
        # Finalize the parent dataset
        super().finalize()
-        
-        logging.info(f"Finalized dataset with {self.num_episodes} episodes "
-                   f"and {self.num_frames} frames across {len(self.task_episode_mapping)} tasks")
+
+        logging.info(
+            f"Finalized dataset with {self.num_episodes} episodes "
+            f"and {self.num_frames} frames across {len(self.task_episode_mapping)} tasks"
+        )