mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-21 19:49:49 +00:00
Modify convert_to_lerobot_v3 script for behaviours dataset to take a single task id and create a dataset outof it
This commit is contained in:
@@ -1,46 +1,49 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch as th
|
import torch as th
|
||||||
from pathlib import Path
|
|
||||||
from typing import Dict, Any
|
|
||||||
|
|
||||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||||
|
|
||||||
from .behaviour_1k_constants import (
|
|
||||||
TASK_INDICES_TO_NAMES,
|
|
||||||
ROBOT_CAMERA_NAMES,
|
|
||||||
PROPRIOCEPTION_INDICES,
|
|
||||||
BEHAVIOR_DATASET_FEATURES,
|
|
||||||
)
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from lerobot.utils.utils import init_logging
|
from lerobot.utils.utils import init_logging
|
||||||
|
|
||||||
|
from .behaviour_1k_constants import (
|
||||||
|
PROPRIOCEPTION_INDICES,
|
||||||
|
ROBOT_CAMERA_NAMES,
|
||||||
|
TASK_INDICES_TO_NAMES,
|
||||||
|
)
|
||||||
|
|
||||||
init_logging()
|
init_logging()
|
||||||
|
|
||||||
|
|
||||||
class BehaviorLeRobotDatasetV3(LeRobotDataset):
|
class BehaviorLeRobotDatasetV3(LeRobotDataset):
|
||||||
"""
|
"""
|
||||||
Extends LeRobotDataset v3.0 for BEHAVIOR-1K specific requirements.
|
Extends LeRobotDataset v3.0 for BEHAVIOR-1K specific requirements.
|
||||||
Handles task-based episode organization and BEHAVIOR-1K metadata.
|
Handles task-based episode organization and BEHAVIOR-1K metadata.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(
|
def create(
|
||||||
cls,
|
cls,
|
||||||
repo_id: str,
|
repo_id: str,
|
||||||
fps: int = 30,
|
fps: int,
|
||||||
|
features: dict,
|
||||||
root: str | Path | None = None,
|
root: str | Path | None = None,
|
||||||
robot_type: str = "R1Pro",
|
robot_type: str | None = None,
|
||||||
use_videos: bool = True,
|
use_videos: bool = True,
|
||||||
video_backend: str = "pyav",
|
tolerance_s: float = 1e-4,
|
||||||
batch_encoding_size: int = 1,
|
|
||||||
image_writer_processes: int = 0,
|
image_writer_processes: int = 0,
|
||||||
image_writer_threads: int = 4,
|
image_writer_threads: int = 0,
|
||||||
|
video_backend: str | None = None,
|
||||||
|
batch_encoding_size: int = 1,
|
||||||
) -> "BehaviorLeRobotDatasetV3":
|
) -> "BehaviorLeRobotDatasetV3":
|
||||||
"""
|
"""
|
||||||
Create a new BEHAVIOR-1K dataset in v3.0 format.
|
Create a new BEHAVIOR-1K dataset in v3.0 format.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
repo_id: HuggingFace repository ID
|
repo_id: HuggingFace repository ID
|
||||||
fps: Frames per second (default: 30)
|
fps: Frames per second (default: 30)
|
||||||
@@ -51,7 +54,7 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
|
|||||||
batch_encoding_size: Number of episodes to batch before encoding videos
|
batch_encoding_size: Number of episodes to batch before encoding videos
|
||||||
image_writer_processes: Number of processes for async image writing
|
image_writer_processes: Number of processes for async image writing
|
||||||
image_writer_threads: Number of threads per process for image writing
|
image_writer_threads: Number of threads per process for image writing
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
BehaviorLeRobotDatasetV3 instance
|
BehaviorLeRobotDatasetV3 instance
|
||||||
"""
|
"""
|
||||||
@@ -59,7 +62,7 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
|
|||||||
obj = super().create(
|
obj = super().create(
|
||||||
repo_id=repo_id,
|
repo_id=repo_id,
|
||||||
fps=fps,
|
fps=fps,
|
||||||
features=BEHAVIOR_DATASET_FEATURES,
|
features=features,
|
||||||
root=root,
|
root=root,
|
||||||
robot_type=robot_type,
|
robot_type=robot_type,
|
||||||
use_videos=use_videos,
|
use_videos=use_videos,
|
||||||
@@ -69,14 +72,14 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
|
|||||||
video_backend=video_backend,
|
video_backend=video_backend,
|
||||||
batch_encoding_size=batch_encoding_size,
|
batch_encoding_size=batch_encoding_size,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Convert to BehaviorLeRobotDatasetV3 instance
|
# Convert to BehaviorLeRobotDatasetV3 instance
|
||||||
obj.__class__ = cls
|
obj.__class__ = cls
|
||||||
|
|
||||||
# Initialize BEHAVIOR-1K specific attributes
|
# Initialize BEHAVIOR-1K specific attributes
|
||||||
obj.task_episode_mapping = {} # Maps task_id to list of episode indices
|
obj.task_episode_mapping = {} # Maps task_id to list of episode indices
|
||||||
obj.episode_task_mapping = {} # Maps episode_index to task info
|
obj.episode_task_mapping = {} # Maps episode_index to task info
|
||||||
|
|
||||||
# Additional metadata for BEHAVIOR-1K
|
# Additional metadata for BEHAVIOR-1K
|
||||||
obj.behavior_metadata = {
|
obj.behavior_metadata = {
|
||||||
"robot_type": robot_type,
|
"robot_type": robot_type,
|
||||||
@@ -84,41 +87,41 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
|
|||||||
"proprioception_indices": PROPRIOCEPTION_INDICES[robot_type],
|
"proprioception_indices": PROPRIOCEPTION_INDICES[robot_type],
|
||||||
"camera_names": ROBOT_CAMERA_NAMES[robot_type],
|
"camera_names": ROBOT_CAMERA_NAMES[robot_type],
|
||||||
}
|
}
|
||||||
|
|
||||||
logging.info(f"Created BehaviorLeRobotDatasetV3 with repo_id: {repo_id}")
|
logging.info(f"Created BehaviorLeRobotDatasetV3 with repo_id: {repo_id}")
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
Initialize from existing dataset.
|
Initialize from existing dataset.
|
||||||
Use the create() classmethod to create a new dataset.
|
Use the create() classmethod to create a new dataset.
|
||||||
"""
|
"""
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
# Initialize BEHAVIOR-1K specific attributes for loading existing datasets
|
# Initialize BEHAVIOR-1K specific attributes for loading existing datasets
|
||||||
self.task_episode_mapping = {}
|
self.task_episode_mapping = {}
|
||||||
self.episode_task_mapping = {}
|
self.episode_task_mapping = {}
|
||||||
self.behavior_metadata = {}
|
self.behavior_metadata = {}
|
||||||
|
|
||||||
# Try to load BEHAVIOR-1K metadata if it exists
|
# Try to load BEHAVIOR-1K metadata if it exists
|
||||||
metadata_path = self.root / "meta" / "behavior_metadata.json"
|
metadata_path = self.root / "meta" / "behavior_metadata.json"
|
||||||
if metadata_path.exists():
|
if metadata_path.exists():
|
||||||
with open(metadata_path, "r") as f:
|
with open(metadata_path) as f:
|
||||||
stored_metadata = json.load(f)
|
stored_metadata = json.load(f)
|
||||||
self.behavior_metadata = stored_metadata
|
self.behavior_metadata = stored_metadata
|
||||||
self.task_episode_mapping = stored_metadata.get("task_episode_mapping", {})
|
self.task_episode_mapping = stored_metadata.get("task_episode_mapping", {})
|
||||||
self.episode_task_mapping = stored_metadata.get("episode_task_mapping", {})
|
self.episode_task_mapping = stored_metadata.get("episode_task_mapping", {})
|
||||||
|
|
||||||
def add_episode_from_hdf5(
|
def add_episode_from_hdf5(
|
||||||
self,
|
self,
|
||||||
hdf5_data: Dict[str, Any],
|
hdf5_data: dict[str, Any],
|
||||||
task_id: int,
|
task_id: int,
|
||||||
episode_id: int,
|
episode_id: int,
|
||||||
include_videos: bool = True,
|
include_videos: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Add an episode from HDF5 data to the dataset.
|
Add an episode from HDF5 data to the dataset.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
hdf5_data: Dictionary containing the HDF5 episode data
|
hdf5_data: Dictionary containing the HDF5 episode data
|
||||||
task_id: Task ID for this episode
|
task_id: Task ID for this episode
|
||||||
@@ -127,9 +130,9 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
|
|||||||
"""
|
"""
|
||||||
task_name = TASK_INDICES_TO_NAMES[task_id]
|
task_name = TASK_INDICES_TO_NAMES[task_id]
|
||||||
num_frames = len(hdf5_data["action"])
|
num_frames = len(hdf5_data["action"])
|
||||||
|
|
||||||
logging.info(f"Adding episode {episode_id} (task: {task_name}) with {num_frames} frames")
|
logging.info(f"Adding episode {episode_id} (task: {task_name}) with {num_frames} frames")
|
||||||
|
|
||||||
# Process each frame
|
# Process each frame
|
||||||
for frame_idx in range(num_frames):
|
for frame_idx in range(num_frames):
|
||||||
frame_data = {
|
frame_data = {
|
||||||
@@ -140,44 +143,44 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
|
|||||||
"task": task_name,
|
"task": task_name,
|
||||||
"timestamp": frame_idx / self.fps,
|
"timestamp": frame_idx / self.fps,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add video frames if requested
|
# Add video frames if requested
|
||||||
if include_videos:
|
if include_videos:
|
||||||
for modality in ["rgb", "depth_linear", "seg_instance_id"]:
|
for modality in ["rgb", "depth_linear", "seg_instance_id"]:
|
||||||
# Map depth_linear to depth for consistency
|
# Map depth_linear to depth for consistency
|
||||||
output_modality = "depth" if modality == "depth_linear" else modality
|
output_modality = "depth" if modality == "depth_linear" else modality
|
||||||
|
|
||||||
for camera_name, robot_camera_name in ROBOT_CAMERA_NAMES[self.robot_type].items():
|
for camera_name, robot_camera_name in ROBOT_CAMERA_NAMES[self.robot_type].items():
|
||||||
key = f"observation.images.{output_modality}.{camera_name}"
|
key = f"observation.images.{output_modality}.{camera_name}"
|
||||||
hdf5_key = f"{robot_camera_name}::{modality}"
|
hdf5_key = f"{robot_camera_name}::{modality}"
|
||||||
|
|
||||||
if hdf5_key in hdf5_data["obs"]:
|
if hdf5_key in hdf5_data["obs"]:
|
||||||
# Get the frame data
|
# Get the frame data
|
||||||
frame = hdf5_data["obs"][hdf5_key][frame_idx]
|
frame = hdf5_data["obs"][hdf5_key][frame_idx]
|
||||||
|
|
||||||
# Handle different data types
|
# Handle different data types
|
||||||
if isinstance(frame, th.Tensor):
|
if isinstance(frame, th.Tensor):
|
||||||
frame = frame.numpy()
|
frame = frame.numpy()
|
||||||
|
|
||||||
# Ensure correct shape
|
# Ensure correct shape
|
||||||
if modality == "seg_instance_id" and len(frame.shape) == 2:
|
if modality == "seg_instance_id" and len(frame.shape) == 2:
|
||||||
# Add channel dimension for grayscale
|
# Add channel dimension for grayscale
|
||||||
frame = np.expand_dims(frame, axis=-1)
|
frame = np.expand_dims(frame, axis=-1)
|
||||||
elif modality == "depth_linear" and len(frame.shape) == 2:
|
elif modality == "depth_linear" and len(frame.shape) == 2:
|
||||||
frame = np.expand_dims(frame, axis=-1)
|
frame = np.expand_dims(frame, axis=-1)
|
||||||
|
|
||||||
frame_data[key] = frame
|
frame_data[key] = frame
|
||||||
|
|
||||||
# Add frame to dataset
|
# Add frame to dataset
|
||||||
self.add_frame(frame_data)
|
self.add_frame(frame_data)
|
||||||
|
|
||||||
# Save episode with metadata
|
# Save episode with metadata
|
||||||
episode_metadata = {
|
episode_metadata = {
|
||||||
"task_id": task_id,
|
"task_id": task_id,
|
||||||
"task_name": task_name,
|
"task_name": task_name,
|
||||||
"original_episode_id": episode_id,
|
"original_episode_id": episode_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add any additional HDF5 attributes as metadata
|
# Add any additional HDF5 attributes as metadata
|
||||||
if "attrs" in hdf5_data:
|
if "attrs" in hdf5_data:
|
||||||
for attr_name, attr_value in hdf5_data["attrs"].items():
|
for attr_name, attr_value in hdf5_data["attrs"].items():
|
||||||
@@ -185,10 +188,10 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
|
|||||||
episode_metadata[attr_name] = list(attr_value)
|
episode_metadata[attr_name] = list(attr_value)
|
||||||
else:
|
else:
|
||||||
episode_metadata[attr_name] = attr_value
|
episode_metadata[attr_name] = attr_value
|
||||||
|
|
||||||
# Save the episode
|
# Save the episode
|
||||||
self.save_episode(episode_data=None)
|
self.save_episode(episode_data=None)
|
||||||
|
|
||||||
# Track task-episode mapping
|
# Track task-episode mapping
|
||||||
if task_id not in self.task_episode_mapping:
|
if task_id not in self.task_episode_mapping:
|
||||||
self.task_episode_mapping[task_id] = []
|
self.task_episode_mapping[task_id] = []
|
||||||
@@ -198,26 +201,30 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset):
|
|||||||
"task_name": task_name,
|
"task_name": task_name,
|
||||||
"original_episode_id": episode_id,
|
"original_episode_id": episode_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
def finalize(self) -> None:
|
def finalize(self) -> None:
|
||||||
"""Finalize the dataset and save additional BEHAVIOR-1K metadata."""
|
"""Finalize the dataset and save additional BEHAVIOR-1K metadata."""
|
||||||
# Save BEHAVIOR-1K specific metadata
|
# Save BEHAVIOR-1K specific metadata
|
||||||
metadata_path = self.root / "meta" / "behavior_metadata.json"
|
metadata_path = self.root / "meta" / "behavior_metadata.json"
|
||||||
metadata_path.parent.mkdir(parents=True, exist_ok=True)
|
metadata_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
self.behavior_metadata.update({
|
self.behavior_metadata.update(
|
||||||
"task_episode_mapping": self.task_episode_mapping,
|
{
|
||||||
"episode_task_mapping": self.episode_task_mapping,
|
"task_episode_mapping": self.task_episode_mapping,
|
||||||
"total_tasks": len(self.task_episode_mapping),
|
"episode_task_mapping": self.episode_task_mapping,
|
||||||
"total_episodes": self.num_episodes,
|
"total_tasks": len(self.task_episode_mapping),
|
||||||
"total_frames": self.num_frames,
|
"total_episodes": self.num_episodes,
|
||||||
})
|
"total_frames": self.num_frames,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
with open(metadata_path, "w") as f:
|
with open(metadata_path, "w") as f:
|
||||||
json.dump(self.behavior_metadata, f, indent=2)
|
json.dump(self.behavior_metadata, f, indent=2)
|
||||||
|
|
||||||
# Finalize the parent dataset
|
# Finalize the parent dataset
|
||||||
super().finalize()
|
super().finalize()
|
||||||
|
|
||||||
logging.info(f"Finalized dataset with {self.num_episodes} episodes "
|
logging.info(
|
||||||
f"and {self.num_frames} frames across {len(self.task_episode_mapping)} tasks")
|
f"Finalized dataset with {self.num_episodes} episodes "
|
||||||
|
f"and {self.num_frames} frames across {len(self.task_episode_mapping)} tasks"
|
||||||
|
)
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
import numpy as np
|
|
||||||
import torch as th
|
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch as th
|
||||||
|
|
||||||
|
ROBOT_TYPE = "R1Pro"
|
||||||
|
FPS = 30
|
||||||
|
|
||||||
ROBOT_CAMERA_NAMES = {
|
ROBOT_CAMERA_NAMES = {
|
||||||
"A1": {
|
"A1": {
|
||||||
@@ -21,13 +24,17 @@ WRIST_RESOLUTION = (480, 480)
|
|||||||
# TODO: Fix A1
|
# TODO: Fix A1
|
||||||
CAMERA_INTRINSICS = {
|
CAMERA_INTRINSICS = {
|
||||||
"A1": {
|
"A1": {
|
||||||
"external": np.array([[306.0, 0.0, 360.0], [0.0, 306.0, 360.0], [0.0, 0.0, 1.0]], dtype=np.float32), # 240x240
|
"external": np.array(
|
||||||
|
[[306.0, 0.0, 360.0], [0.0, 306.0, 360.0], [0.0, 0.0, 1.0]], dtype=np.float32
|
||||||
|
), # 240x240
|
||||||
"wrist": np.array(
|
"wrist": np.array(
|
||||||
[[388.6639, 0.0, 240.0], [0.0, 388.6639, 240.0], [0.0, 0.0, 1.0]], dtype=np.float32
|
[[388.6639, 0.0, 240.0], [0.0, 388.6639, 240.0], [0.0, 0.0, 1.0]], dtype=np.float32
|
||||||
), # 240x240
|
), # 240x240
|
||||||
},
|
},
|
||||||
"R1Pro": {
|
"R1Pro": {
|
||||||
"head": np.array([[306.0, 0.0, 360.0], [0.0, 306.0, 360.0], [0.0, 0.0, 1.0]], dtype=np.float32), # 720x720
|
"head": np.array(
|
||||||
|
[[306.0, 0.0, 360.0], [0.0, 306.0, 360.0], [0.0, 0.0, 1.0]], dtype=np.float32
|
||||||
|
), # 720x720
|
||||||
"left_wrist": np.array(
|
"left_wrist": np.array(
|
||||||
[[388.6639, 0.0, 240.0], [0.0, 388.6639, 240.0], [0.0, 0.0, 1.0]], dtype=np.float32
|
[[388.6639, 0.0, 240.0], [0.0, 388.6639, 240.0], [0.0, 0.0, 1.0]], dtype=np.float32
|
||||||
), # 480x480
|
), # 480x480
|
||||||
@@ -48,7 +55,7 @@ BEHAVIOR_DATASET_FEATURES = {
|
|||||||
},
|
},
|
||||||
# Proprioception
|
# Proprioception
|
||||||
"observation.state": {
|
"observation.state": {
|
||||||
"dtype": "float32",
|
"dtype": "float32",
|
||||||
"shape": (256,), # Full proprioception state
|
"shape": (256,), # Full proprioception state
|
||||||
"names": None,
|
"names": None,
|
||||||
},
|
},
|
||||||
@@ -229,7 +236,10 @@ JOINT_RANGE = {
|
|||||||
"gripper": (th.tensor([0.00], dtype=th.float32), th.tensor([0.03], dtype=th.float32)),
|
"gripper": (th.tensor([0.00], dtype=th.float32), th.tensor([0.03], dtype=th.float32)),
|
||||||
},
|
},
|
||||||
"R1Pro": {
|
"R1Pro": {
|
||||||
"base": (th.tensor([-0.75, -0.75, -1.0], dtype=th.float32), th.tensor([0.75, 0.75, 1.0], dtype=th.float32)),
|
"base": (
|
||||||
|
th.tensor([-0.75, -0.75, -1.0], dtype=th.float32),
|
||||||
|
th.tensor([0.75, 0.75, 1.0], dtype=th.float32),
|
||||||
|
),
|
||||||
"torso": (
|
"torso": (
|
||||||
th.tensor([-1.1345, -2.7925, -1.8326, -3.0543], dtype=th.float32),
|
th.tensor([-1.1345, -2.7925, -1.8326, -3.0543], dtype=th.float32),
|
||||||
th.tensor([1.8326, 2.5307, 1.5708, 3.0543], dtype=th.float32),
|
th.tensor([1.8326, 2.5307, 1.5708, 3.0543], dtype=th.float32),
|
||||||
@@ -253,8 +263,14 @@ EEF_POSITION_RANGE = {
|
|||||||
"0": (th.tensor([0.0, -0.7, 0.0], dtype=th.float32), th.tensor([0.7, 0.7, 0.7], dtype=th.float32)),
|
"0": (th.tensor([0.0, -0.7, 0.0], dtype=th.float32), th.tensor([0.7, 0.7, 0.7], dtype=th.float32)),
|
||||||
},
|
},
|
||||||
"R1Pro": {
|
"R1Pro": {
|
||||||
"left": (th.tensor([0.0, -0.65, 0.0], dtype=th.float32), th.tensor([0.65, 0.65, 2.5], dtype=th.float32)),
|
"left": (
|
||||||
"right": (th.tensor([0.0, -0.65, 0.0], dtype=th.float32), th.tensor([0.65, 0.65, 2.5], dtype=th.float32)),
|
th.tensor([0.0, -0.65, 0.0], dtype=th.float32),
|
||||||
|
th.tensor([0.65, 0.65, 2.5], dtype=th.float32),
|
||||||
|
),
|
||||||
|
"right": (
|
||||||
|
th.tensor([0.0, -0.65, 0.0], dtype=th.float32),
|
||||||
|
th.tensor([0.65, 0.65, 2.5], dtype=th.float32),
|
||||||
|
),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -317,4 +333,3 @@ TASK_NAMES_TO_INDICES = {
|
|||||||
"make_pizza": 49,
|
"make_pizza": 49,
|
||||||
}
|
}
|
||||||
TASK_INDICES_TO_NAMES = {v: k for k, v in TASK_NAMES_TO_INDICES.items()}
|
TASK_INDICES_TO_NAMES = {v: k for k, v in TASK_NAMES_TO_INDICES.items()}
|
||||||
|
|
||||||
|
|||||||
@@ -1,69 +1,82 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
"""
|
||||||
|
Convert a single BEHAVIOR-1K task from HDF5 to LeRobotDataset v3.0 format.
|
||||||
|
|
||||||
|
Usage examples:
|
||||||
|
# Convert a single task
|
||||||
|
python convert_to_lerobot_v3.py \
|
||||||
|
--data-folder /path/to/data \
|
||||||
|
--repo-id "username/behavior-1k-assembling-gift-baskets" \
|
||||||
|
--task-id 0 \
|
||||||
|
--push-to-hub
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import h5py
|
import h5py
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import os
|
|
||||||
import torch as th
|
|
||||||
from pathlib import Path
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import logging
|
|
||||||
|
|
||||||
from .behavior_lerobot_dataset_v3 import BehaviorLeRobotDatasetV3
|
|
||||||
from .behaviour_1k_constants import TASK_NAMES_TO_INDICES, TASK_INDICES_TO_NAMES, BEHAVIOR_DATASET_FEATURES
|
|
||||||
|
|
||||||
from lerobot.utils.utils import init_logging
|
from lerobot.utils.utils import init_logging
|
||||||
|
|
||||||
|
from .behavior_lerobot_dataset_v3 import BehaviorLeRobotDatasetV3
|
||||||
|
from .behaviour_1k_constants import BEHAVIOR_DATASET_FEATURES, FPS, ROBOT_TYPE, TASK_INDICES_TO_NAMES
|
||||||
|
|
||||||
init_logging()
|
init_logging()
|
||||||
|
|
||||||
|
|
||||||
def load_hdf5_episode(hdf5_path: str, episode_id: int = 0) -> dict:
|
def load_hdf5_episode(hdf5_path: str, episode_id: int = 0) -> dict:
|
||||||
"""
|
"""
|
||||||
Load episode data from HDF5 file.
|
Load episode data from HDF5 file.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
hdf5_path: Path to the HDF5 file
|
hdf5_path: Path to the HDF5 file
|
||||||
episode_id: Episode ID to load (default: 0)
|
episode_id: Episode ID to load (default: 0)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dictionary containing episode data
|
Dictionary containing episode data
|
||||||
"""
|
"""
|
||||||
episode_data = {}
|
episode_data = {}
|
||||||
|
|
||||||
with h5py.File(hdf5_path, "r") as f:
|
with h5py.File(hdf5_path, "r") as f:
|
||||||
# Find the episode with most samples if episode_id not specified
|
# Find the episode with most samples if episode_id not specified
|
||||||
if episode_id == -1:
|
if episode_id == -1:
|
||||||
num_samples = [f["data"][key].attrs["num_samples"] for key in f["data"].keys()]
|
num_samples = [f["data"][key].attrs["num_samples"] for key in f["data"]]
|
||||||
episode_id = num_samples.index(max(num_samples))
|
episode_id = num_samples.index(max(num_samples))
|
||||||
|
|
||||||
demo_key = f"demo_{episode_id}"
|
demo_key = f"demo_{episode_id}"
|
||||||
if demo_key not in f["data"]:
|
if demo_key not in f["data"]:
|
||||||
raise ValueError(f"Episode {episode_id} not found in {hdf5_path}")
|
raise ValueError(f"Episode {episode_id} not found in {hdf5_path}")
|
||||||
|
|
||||||
demo_data = f["data"][demo_key]
|
demo_data = f["data"][demo_key]
|
||||||
|
|
||||||
# Load actions
|
# Load actions
|
||||||
episode_data["action"] = np.array(demo_data["action"][:])
|
episode_data["action"] = np.array(demo_data["action"][:])
|
||||||
|
|
||||||
# Load observations
|
# Load observations
|
||||||
episode_data["obs"] = {}
|
episode_data["obs"] = {}
|
||||||
for key in demo_data["obs"].keys():
|
for key in demo_data["obs"]:
|
||||||
episode_data["obs"][key] = np.array(demo_data["obs"][key][:])
|
episode_data["obs"][key] = np.array(demo_data["obs"][key][:])
|
||||||
|
|
||||||
# Load attributes
|
# Load attributes
|
||||||
episode_data["attrs"] = {}
|
episode_data["attrs"] = {}
|
||||||
for attr_name in demo_data.attrs:
|
for attr_name in demo_data.attrs:
|
||||||
episode_data["attrs"][attr_name] = demo_data.attrs[attr_name]
|
episode_data["attrs"][attr_name] = demo_data.attrs[attr_name]
|
||||||
|
|
||||||
# Add global attributes
|
# Add global attributes
|
||||||
for attr_name in f["data"].attrs:
|
for attr_name in f["data"].attrs:
|
||||||
episode_data["attrs"][f"global_{attr_name}"] = f["data"].attrs[attr_name]
|
episode_data["attrs"][f"global_{attr_name}"] = f["data"].attrs[attr_name]
|
||||||
|
|
||||||
return episode_data
|
return episode_data
|
||||||
|
|
||||||
|
|
||||||
def convert_episode(
|
def convert_episode(
|
||||||
data_folder: str,
|
data_folder: str,
|
||||||
output_repo_id: str,
|
|
||||||
task_id: int,
|
task_id: int,
|
||||||
demo_id: int,
|
demo_id: int,
|
||||||
dataset: BehaviorLeRobotDatasetV3,
|
dataset: BehaviorLeRobotDatasetV3,
|
||||||
@@ -72,10 +85,10 @@ def convert_episode(
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Convert a single episode from HDF5 to LeRobotDataset v3.0 format.
|
Convert a single episode from HDF5 to LeRobotDataset v3.0 format.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
data_folder: Base data folder containing HDF5 files
|
data_folder: Base data folder containing HDF5 files
|
||||||
output_repo_id: Output repository ID for the dataset
|
repo_id: Repository ID for the dataset
|
||||||
task_id: Task ID
|
task_id: Task ID
|
||||||
demo_id: Demo ID (episode ID)
|
demo_id: Demo ID (episode ID)
|
||||||
dataset: BehaviorLeRobotDatasetV3 instance to add data to
|
dataset: BehaviorLeRobotDatasetV3 instance to add data to
|
||||||
@@ -85,26 +98,22 @@ def convert_episode(
|
|||||||
# Construct paths
|
# Construct paths
|
||||||
task_name = TASK_INDICES_TO_NAMES[task_id]
|
task_name = TASK_INDICES_TO_NAMES[task_id]
|
||||||
hdf5_path = f"{data_folder}/2025-challenge-rawdata/task-{task_id:04d}/episode_{demo_id:08d}.hdf5"
|
hdf5_path = f"{data_folder}/2025-challenge-rawdata/task-{task_id:04d}/episode_{demo_id:08d}.hdf5"
|
||||||
|
|
||||||
if not os.path.exists(hdf5_path):
|
if not os.path.exists(hdf5_path):
|
||||||
logging.error(f"HDF5 file not found: {hdf5_path}")
|
logging.error(f"HDF5 file not found: {hdf5_path}")
|
||||||
return
|
return
|
||||||
|
|
||||||
logging.info(f"Converting episode {demo_id} from task {task_name}")
|
logging.info(f"Converting episode {demo_id} from task {task_name}")
|
||||||
|
|
||||||
# Load episode data
|
# Load episode data
|
||||||
try:
|
episode_data = load_hdf5_episode(hdf5_path, episode_id=0)
|
||||||
episode_data = load_hdf5_episode(hdf5_path, episode_id=0)
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"Failed to load episode data: {e}")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Filter out segmentation if not requested
|
# Filter out segmentation if not requested
|
||||||
if not include_segmentation:
|
if not include_segmentation:
|
||||||
keys_to_remove = [k for k in episode_data["obs"].keys() if "seg_instance_id" in k]
|
keys_to_remove = [k for k in episode_data["obs"] if "seg_instance_id" in k]
|
||||||
for key in keys_to_remove:
|
for key in keys_to_remove:
|
||||||
del episode_data["obs"][key]
|
del episode_data["obs"][key]
|
||||||
|
|
||||||
# Add episode to dataset
|
# Add episode to dataset
|
||||||
dataset.add_episode_from_hdf5(
|
dataset.add_episode_from_hdf5(
|
||||||
hdf5_data=episode_data,
|
hdf5_data=episode_data,
|
||||||
@@ -114,169 +123,103 @@ def convert_episode(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def convert_dataset(
|
def convert_task_to_dataset(
|
||||||
data_folder: str,
|
data_folder: str,
|
||||||
output_repo_id: str,
|
repo_id: str,
|
||||||
task_names: list = None,
|
task_id: int,
|
||||||
episode_ids: list = None,
|
|
||||||
max_episodes_per_task: int = None,
|
|
||||||
include_videos: bool = True,
|
|
||||||
include_segmentation: bool = True,
|
|
||||||
fps: int = 30,
|
|
||||||
batch_encoding_size: int = 1,
|
|
||||||
image_writer_processes: int = 0,
|
|
||||||
image_writer_threads: int = 4,
|
|
||||||
push_to_hub: bool = False,
|
push_to_hub: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Convert BEHAVIOR-1K dataset from HDF5 to LeRobotDataset v3.0 format.
|
Convert a single BEHAVIOR-1K task from HDF5 to LeRobotDataset v3.0 format.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
data_folder: Base folder containing HDF5 data
|
data_folder: Base folder containing HDF5 data
|
||||||
output_repo_id: Output repository ID (e.g., "username/dataset-name")
|
repo_id: Repository ID (e.g., "username/behavior-1k-task-name")
|
||||||
task_names: List of task names to convert (None = all tasks)
|
task_id: Task ID to convert
|
||||||
episode_ids: Specific episode IDs to convert (None = all episodes)
|
|
||||||
max_episodes_per_task: Maximum episodes per task to convert
|
|
||||||
include_videos: Whether to include video data
|
|
||||||
include_segmentation: Whether to include segmentation data
|
|
||||||
fps: Frames per second
|
|
||||||
batch_encoding_size: Number of episodes to batch before encoding
|
|
||||||
image_writer_processes: Number of processes for image writing
|
|
||||||
image_writer_threads: Number of threads for image writing
|
|
||||||
push_to_hub: Whether to push to HuggingFace Hub
|
push_to_hub: Whether to push to HuggingFace Hub
|
||||||
"""
|
"""
|
||||||
|
task_name = TASK_INDICES_TO_NAMES[task_id]
|
||||||
|
task_folder = f"{data_folder}/2025-challenge-rawdata/task-{task_id:04d}"
|
||||||
|
|
||||||
|
if not os.path.exists(task_folder):
|
||||||
|
raise ValueError(f"Task folder not found: {task_folder}")
|
||||||
|
|
||||||
# Create output directory
|
# Create output directory
|
||||||
output_dir = Path.home() / ".cache/huggingface/lerobot" / output_repo_id
|
output_dir = Path.home() / ".cache/huggingface/lerobot" / repo_id
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
logging.info(f"Converting dataset to: {output_dir}")
|
logging.info(f"Converting task '{task_name}' (ID: {task_id}) to: {output_dir}")
|
||||||
|
|
||||||
# Initialize dataset
|
# Initialize dataset for this task
|
||||||
dataset = BehaviorLeRobotDatasetV3.create(
|
dataset = BehaviorLeRobotDatasetV3.create(
|
||||||
repo_id=output_repo_id,
|
repo_id=repo_id,
|
||||||
root=output_dir,
|
fps=FPS,
|
||||||
fps=fps,
|
features=BEHAVIOR_DATASET_FEATURES,
|
||||||
robot_type="R1Pro",
|
robot_type=ROBOT_TYPE,
|
||||||
use_videos=include_videos,
|
|
||||||
video_backend="pyav",
|
|
||||||
batch_encoding_size=batch_encoding_size,
|
|
||||||
image_writer_processes=image_writer_processes,
|
|
||||||
image_writer_threads=image_writer_threads,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Determine which tasks to process
|
# Find all episodes in the task folder
|
||||||
if task_names is None:
|
task_episode_ids = []
|
||||||
task_names = list(TASK_NAMES_TO_INDICES.keys())
|
for filename in os.listdir(task_folder):
|
||||||
|
if filename.startswith("episode_") and filename.endswith(".hdf5"):
|
||||||
task_ids = [TASK_NAMES_TO_INDICES[name] for name in task_names]
|
eid = int(filename.split("_")[1].split(".")[0])
|
||||||
|
task_episode_ids.append(eid)
|
||||||
# Process each task
|
task_episode_ids.sort()
|
||||||
total_episodes = 0
|
|
||||||
for task_id in tqdm(task_ids, desc="Processing tasks"):
|
logging.info(f"Processing {len(task_episode_ids)} episodes for task {task_name}")
|
||||||
task_name = TASK_INDICES_TO_NAMES[task_id]
|
|
||||||
task_folder = f"{data_folder}/2025-challenge-rawdata/task-{task_id:04d}"
|
# Convert each episode
|
||||||
|
episodes_converted = 0
|
||||||
if not os.path.exists(task_folder):
|
for demo_id in tqdm(task_episode_ids, desc="Converting episodes"):
|
||||||
logging.warning(f"Task folder not found: {task_folder}")
|
convert_episode(
|
||||||
continue
|
data_folder=data_folder,
|
||||||
|
task_id=task_id,
|
||||||
# Find all episodes for this task
|
demo_id=demo_id,
|
||||||
if episode_ids is not None:
|
dataset=dataset,
|
||||||
# Use specified episode IDs
|
include_videos=True,
|
||||||
task_episode_ids = [eid for eid in episode_ids if eid // 10000 == task_id]
|
include_segmentation=True,
|
||||||
else:
|
)
|
||||||
# Find all episodes in the task folder
|
episodes_converted += 1
|
||||||
task_episode_ids = []
|
|
||||||
for filename in os.listdir(task_folder):
|
logging.info(f"Converted {episodes_converted} episodes for task {task_name}")
|
||||||
if filename.startswith("episode_") and filename.endswith(".hdf5"):
|
|
||||||
eid = int(filename.split("_")[1].split(".")[0])
|
|
||||||
task_episode_ids.append(eid)
|
|
||||||
task_episode_ids.sort()
|
|
||||||
|
|
||||||
# Limit episodes if requested
|
|
||||||
if max_episodes_per_task is not None:
|
|
||||||
task_episode_ids = task_episode_ids[:max_episodes_per_task]
|
|
||||||
|
|
||||||
logging.info(f"Processing {len(task_episode_ids)} episodes for task {task_name}")
|
|
||||||
|
|
||||||
# Convert each episode
|
|
||||||
for demo_id in tqdm(task_episode_ids, desc=f"Task {task_name}", leave=False):
|
|
||||||
try:
|
|
||||||
convert_episode(
|
|
||||||
data_folder=data_folder,
|
|
||||||
output_repo_id=output_repo_id,
|
|
||||||
task_id=task_id,
|
|
||||||
demo_id=demo_id,
|
|
||||||
dataset=dataset,
|
|
||||||
include_videos=include_videos,
|
|
||||||
include_segmentation=include_segmentation,
|
|
||||||
)
|
|
||||||
total_episodes += 1
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"Failed to convert episode {demo_id}: {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
logging.info(f"Converted {total_episodes} episodes total")
|
|
||||||
|
|
||||||
# Finalize dataset
|
# Finalize dataset
|
||||||
logging.info("Finalizing dataset...")
|
logging.info(f"Finalizing dataset for task {task_name}...")
|
||||||
dataset.finalize()
|
dataset.finalize()
|
||||||
|
|
||||||
# Push to hub if requested
|
# Push to hub if requested
|
||||||
if push_to_hub:
|
if push_to_hub:
|
||||||
logging.info("Pushing dataset to HuggingFace Hub...")
|
logging.info(f"Pushing task {task_name} dataset to HuggingFace Hub...")
|
||||||
dataset.push_to_hub(
|
dataset.push_to_hub()
|
||||||
private=True,
|
|
||||||
license="apache-2.0",
|
|
||||||
)
|
|
||||||
|
|
||||||
logging.info("Conversion complete!")
|
logging.info("Conversion complete!")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="Convert BEHAVIOR-1K data to LeRobotDataset v3.0")
|
parser = argparse.ArgumentParser(description="Convert a single BEHAVIOR-1K task to LeRobotDataset v3.0")
|
||||||
parser.add_argument("--data_folder", type=str, required=True, help="Path to the data folder")
|
parser.add_argument("--data-folder", type=str, required=True, help="Path to the data folder")
|
||||||
parser.add_argument("--output_repo_id", type=str, required=True,
|
parser.add_argument(
|
||||||
help="Output repository ID (e.g., 'username/behavior-dataset-v3')")
|
"--repo-id",
|
||||||
parser.add_argument("--task_names", type=str, nargs="+", default=None,
|
type=str,
|
||||||
help="Task names to convert (default: all)")
|
required=True,
|
||||||
parser.add_argument("--episode_ids", type=int, nargs="+", default=None,
|
help="Output repository ID (e.g., 'username/behavior-1k-assembling-gift-baskets')",
|
||||||
help="Specific episode IDs to convert")
|
)
|
||||||
parser.add_argument("--max_episodes_per_task", type=int, default=None,
|
parser.add_argument(
|
||||||
help="Maximum episodes per task to convert")
|
"--task-id", type=int, required=True, help="Task ID to convert (e.g., 0 for assembling_gift_baskets)"
|
||||||
parser.add_argument("--no_videos", action="store_true",
|
)
|
||||||
help="Exclude video data")
|
parser.add_argument(
|
||||||
parser.add_argument("--no_segmentation", action="store_true",
|
"--push-to-hub", action="store_true", help="Push dataset to HuggingFace Hub after conversion"
|
||||||
help="Exclude segmentation data")
|
)
|
||||||
parser.add_argument("--fps", type=int, default=30,
|
|
||||||
help="Frames per second (default: 30)")
|
|
||||||
parser.add_argument("--batch_encoding_size", type=int, default=1,
|
|
||||||
help="Number of episodes to batch before encoding videos")
|
|
||||||
parser.add_argument("--image_writer_processes", type=int, default=0,
|
|
||||||
help="Number of processes for async image writing")
|
|
||||||
parser.add_argument("--image_writer_threads", type=int, default=4,
|
|
||||||
help="Number of threads for image writing")
|
|
||||||
parser.add_argument("--push_to_hub", action="store_true",
|
|
||||||
help="Push dataset to HuggingFace Hub")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Convert dataset
|
# Convert single task to dataset
|
||||||
convert_dataset(
|
convert_task_to_dataset(
|
||||||
data_folder=args.data_folder,
|
data_folder=args.data_folder,
|
||||||
output_repo_id=args.output_repo_id,
|
repo_id=args.repo_id,
|
||||||
task_names=args.task_names,
|
task_id=args.task_id,
|
||||||
episode_ids=args.episode_ids,
|
|
||||||
max_episodes_per_task=args.max_episodes_per_task,
|
|
||||||
include_videos=not args.no_videos,
|
|
||||||
include_segmentation=not args.no_segmentation,
|
|
||||||
fps=args.fps,
|
|
||||||
batch_encoding_size=args.batch_encoding_size,
|
|
||||||
image_writer_processes=args.image_writer_processes,
|
|
||||||
image_writer_threads=args.image_writer_threads,
|
|
||||||
push_to_hub=args.push_to_hub,
|
push_to_hub=args.push_to_hub,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user