⬆️ sync with lerobot v0.5.1 (#96)

* update agibot2lerobot

* update libero2lerobot

* update robomind2lerobot

* fix robomind2lerobot
This commit is contained in:
Qizhi Chen
2026-04-06 18:25:36 +08:00
committed by GitHub
parent ef184e44be
commit ad1381915c
5 changed files with 340 additions and 232 deletions
+145 -126
View File
@@ -12,19 +12,21 @@ import torch
from agibot_utils.agibot_utils import get_task_info, load_local_dataset from agibot_utils.agibot_utils import get_task_info, load_local_dataset
from agibot_utils.config import AgiBotWorld_TASK_TYPE from agibot_utils.config import AgiBotWorld_TASK_TYPE
from agibot_utils.lerobot_utils import compute_episode_stats, generate_features_from_config from agibot_utils.lerobot_utils import compute_episode_stats, generate_features_from_config
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata from lerobot.datasets import LeRobotDataset, LeRobotDatasetMetadata
from lerobot.datasets.utils import DEFAULT_EPISODES_PATH, validate_episode_buffer, validate_frame from lerobot.datasets.dataset_writer import DatasetWriter
from lerobot.datasets.feature_utils import get_hf_features_from_features, validate_episode_buffer, validate_frame
from lerobot.datasets.utils import DEFAULT_EPISODES_PATH
from ray.runtime_env import RuntimeEnv from ray.runtime_env import RuntimeEnv
class AgiBotDatasetMetadata(LeRobotDatasetMetadata): class AgiBotDatasetMetadata(LeRobotDatasetMetadata):
def _flush_metadata_buffer(self) -> None: def _flush_metadata_buffer(self) -> None:
"""Write all buffered episode metadata to parquet file.""" """Write all buffered episode metadata to parquet file."""
if not hasattr(self, "metadata_buffer") or len(self.metadata_buffer) == 0: if not hasattr(self, "_metadata_buffer") or len(self._metadata_buffer) == 0:
return return
combined_dict = {} combined_dict = {}
for episode_dict in self.metadata_buffer: for episode_dict in self._metadata_buffer:
for key, value in episode_dict.items(): for key, value in episode_dict.items():
if key not in combined_dict: if key not in combined_dict:
combined_dict[key] = [] combined_dict[key] = []
@@ -33,22 +35,138 @@ class AgiBotDatasetMetadata(LeRobotDatasetMetadata):
val = value[0] if isinstance(value, list) else value val = value[0] if isinstance(value, list) else value
combined_dict[key].append(val.tolist() if isinstance(val, np.ndarray) else val) combined_dict[key].append(val.tolist() if isinstance(val, np.ndarray) else val)
first_ep = self.metadata_buffer[0] first_ep = self._metadata_buffer[0]
chunk_idx = first_ep["meta/episodes/chunk_index"][0] chunk_idx = first_ep["meta/episodes/chunk_index"][0]
file_idx = first_ep["meta/episodes/file_index"][0] file_idx = first_ep["meta/episodes/file_index"][0]
schema = None if not self.writer else self.writer.schema schema = None if not self._pq_writer else self._pq_writer.schema
table = pa.Table.from_pydict(combined_dict, schema=schema) table = pa.Table.from_pydict(combined_dict, schema=schema)
if not self.writer: if not self._pq_writer:
path = Path(self.root / DEFAULT_EPISODES_PATH.format(chunk_index=chunk_idx, file_index=file_idx)) path = Path(self.root / DEFAULT_EPISODES_PATH.format(chunk_index=chunk_idx, file_index=file_idx))
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
self.writer = pq.ParquetWriter(path, schema=table.schema, compression="snappy", use_dictionary=True) self._pq_writer = pq.ParquetWriter(path, schema=table.schema, compression="snappy", use_dictionary=True)
self.writer.write_table(table) self._pq_writer.write_table(table)
self.latest_episode = self.metadata_buffer[-1] self.latest_episode = self._metadata_buffer[-1]
self.metadata_buffer.clear() self._metadata_buffer.clear()
class AgiBotDatasetWriter(DatasetWriter):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.hf_features = get_hf_features_from_features(self._meta.features)
def add_frame(self, frame: dict) -> None:
"""
Add a single frame to the current episode buffer.
Apart from images written to a temporary directory, nothing is written to disk
until ``save_episode()`` is called.
The caller must provide all user-defined features plus ``"task"``, and must
not provide ``"timestamp"`` or ``"frame_index"``; those are computed
automatically.
"""
# Convert torch to numpy if needed
for name in frame:
if isinstance(frame[name], torch.Tensor):
frame[name] = frame[name].numpy()
features = {
key: value for key, value in self._meta.features.items() if key in self.hf_features
} # remove video keys
validate_frame(frame, features)
if self.episode_buffer is None:
self.episode_buffer = self._create_episode_buffer()
# Automatically add frame_index and timestamp to episode buffer
frame_index = self.episode_buffer["size"]
timestamp = frame_index / self._meta.fps
self.episode_buffer["frame_index"].append(frame_index)
self.episode_buffer["timestamp"].append(timestamp)
self.episode_buffer["task"].append(frame.pop("task"))
# Add frame features to episode_buffer
for key, value in frame.items():
if key not in self._meta.features:
raise ValueError(
f"An element of the frame is not in the features. '{key}' not in '{self._meta.features.keys()}'."
)
self.episode_buffer[key].append(value)
self.episode_buffer["size"] += 1
def save_episode(
self, videos: dict, action_config: list, episode_data: dict | None = None, parallel_encoding: bool = True
) -> None:
"""Save the current episode in self.episode_buffer to disk."""
episode_buffer = episode_data if episode_data is not None else self.episode_buffer
validate_episode_buffer(episode_buffer, self._meta.total_episodes, self._meta.features)
# size and task are special cases that won't be added to hf_dataset
episode_length = episode_buffer.pop("size")
tasks = episode_buffer.pop("task")
episode_tasks = list(set(tasks))
episode_index = episode_buffer["episode_index"]
episode_buffer["index"] = np.arange(self._meta.total_frames, self._meta.total_frames + episode_length)
episode_buffer["episode_index"] = np.full((episode_length,), episode_index)
# Update tasks and task indices with new tasks if any
self._meta.save_episode_tasks(episode_tasks)
# Given tasks in natural language, find their corresponding task indices
episode_buffer["task_index"] = np.array([self._meta.get_task_index(task) for task in tasks])
for key, ft in self._meta.features.items():
# index, episode_index, task_index are already processed above, and image and video
# are processed separately by storing image path and frame info as meta data
if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["video"]:
continue
episode_buffer[key] = np.stack(episode_buffer[key]).squeeze()
for key in self._meta.video_keys:
episode_buffer[key] = str(videos[key])
ep_stats = compute_episode_stats(episode_buffer, self._meta.features)
ep_metadata = self._save_episode_data(episode_buffer)
has_video_keys = len(self._meta.video_keys) > 0
use_batched_encoding = self._batch_encoding_size > 1
self.current_videos = videos
if has_video_keys and not use_batched_encoding:
for video_key in self._meta.video_keys:
ep_metadata.update(self._save_episode_video(video_key, episode_index))
ep_metadata.update({"action_config": action_config})
self._meta.save_episode(episode_index, episode_length, episode_tasks, ep_stats, ep_metadata)
if has_video_keys and use_batched_encoding:
self._episodes_since_last_encoding += 1
if self._episodes_since_last_encoding == self._batch_encoding_size:
start_ep = self._meta.total_episodes - self._batch_encoding_size
end_ep = self._meta.total_episodes
self._batch_save_episode_video(start_ep, end_ep)
self._episodes_since_last_encoding = 0
if not episode_data:
self.clear_episode_buffer(delete_images=len(self._meta.image_keys) > 0)
def _encode_temporary_episode_video(self, video_key: str, episode_index: int) -> Path:
"""
Use ffmpeg to convert frames stored as png into mp4 videos.
Note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding,
since video encoding with ffmpeg is already using multithreading.
"""
temp_path = Path(tempfile.mkdtemp(dir=self._root)) / f"{video_key}_{episode_index:03d}.mp4"
shutil.copy(self.current_videos[video_key], temp_path)
return temp_path
class AgiBotDataset(LeRobotDataset): class AgiBotDataset(LeRobotDataset):
@@ -65,125 +183,26 @@ class AgiBotDataset(LeRobotDataset):
obj.meta: AgiBotDatasetMetadata = AgiBotDatasetMetadata.create( obj.meta: AgiBotDatasetMetadata = AgiBotDatasetMetadata.create(
repo_id=params["repo_id"], repo_id=params["repo_id"],
fps=params["fps"], fps=params["fps"],
robot_type=params.get("robot_type"), robot_type=params["robot_type"],
features=params["features"], features=params["features"],
root=params.get("root"), root=params["root"],
use_videos=params.get("use_videos", True), use_videos=params["use_videos"],
metadata_buffer_size=params.get("metadata_buffer_size", 10), metadata_buffer_size=params["metadata_buffer_size"],
)
obj.writer: AgiBotDatasetWriter = AgiBotDatasetWriter(
meta=obj.meta,
root=obj.root,
vcodec=obj._vcodec,
encoder_threads=obj._encoder_threads,
batch_encoding_size=obj._batch_encoding_size,
) )
return obj return obj
def add_frame(self, frame: dict) -> None: def save_episode(
""" self, videos: dict, action_config: list, episode_data: dict | None = None, parallel_encoding: bool = True
This function only adds the frame to the episode_buffer. Apart from images — which are written in a ) -> None:
temporary directory — nothing is written to disk. To save those frames, the 'save_episode()' method self._require_writer("save_episode")
then needs to be called. self.writer.save_episode(videos, action_config, episode_data, parallel_encoding)
"""
# Convert torch to numpy if needed
for name in frame:
if isinstance(frame[name], torch.Tensor):
frame[name] = frame[name].numpy()
features = {key: value for key, value in self.features.items() if key in self.hf_features} # remove video keys
validate_frame(frame, features)
if self.episode_buffer is None:
self.episode_buffer = self.create_episode_buffer()
# Automatically add frame_index and timestamp to episode buffer
frame_index = self.episode_buffer["size"]
timestamp = frame.pop("timestamp") if "timestamp" in frame else frame_index / self.fps
self.episode_buffer["frame_index"].append(frame_index)
self.episode_buffer["timestamp"].append(timestamp)
self.episode_buffer["task"].append(frame.pop("task")) # Remove task from frame after processing
# Add frame features to episode_buffer
for key, value in frame.items():
if key not in self.features:
raise ValueError(
f"An element of the frame is not in the features. '{key}' not in '{self.features.keys()}'."
)
self.episode_buffer[key].append(value)
self.episode_buffer["size"] += 1
def save_episode(self, videos: dict, action_config: list, episode_data: dict | None = None) -> None:
"""
This will save to disk the current episode in self.episode_buffer.
Args:
episode_data (dict | None, optional): Dict containing the episode data to save. If None, this will
save the current episode in self.episode_buffer, which is filled with 'add_frame'. Defaults to
None.
"""
episode_buffer = episode_data if episode_data is not None else self.episode_buffer
validate_episode_buffer(episode_buffer, self.meta.total_episodes, self.features)
# size and task are special cases that won't be added to hf_dataset
episode_length = episode_buffer.pop("size")
tasks = episode_buffer.pop("task")
episode_tasks = list(set(tasks))
episode_index = episode_buffer["episode_index"]
episode_buffer["index"] = np.arange(self.meta.total_frames, self.meta.total_frames + episode_length)
episode_buffer["episode_index"] = np.full((episode_length,), episode_index)
# Update tasks and task indices with new tasks if any
self.meta.save_episode_tasks(episode_tasks)
# Given tasks in natural language, find their corresponding task indices
episode_buffer["task_index"] = np.array([self.meta.get_task_index(task) for task in tasks])
for key, ft in self.features.items():
# index, episode_index, task_index are already processed above, and image and video
# are processed separately by storing image path and frame info as meta data
if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["video"]:
continue
episode_buffer[key] = np.stack(episode_buffer[key]).squeeze()
for key in self.meta.video_keys:
episode_buffer[key] = str(videos[key]) # PosixPath -> str
ep_stats = compute_episode_stats(episode_buffer, self.features)
ep_metadata = self._save_episode_data(episode_buffer)
has_video_keys = len(self.meta.video_keys) > 0
use_batched_encoding = self.batch_encoding_size > 1
self.current_videos = videos
if has_video_keys and not use_batched_encoding:
for video_key in self.meta.video_keys:
ep_metadata.update(self._save_episode_video(video_key, episode_index))
# `meta.save_episode` be executed after encoding the videos
# add action_config to current episode
ep_metadata.update({"action_config": action_config})
self.meta.save_episode(episode_index, episode_length, episode_tasks, ep_stats, ep_metadata)
if has_video_keys and use_batched_encoding:
# Check if we should trigger batch encoding
self.episodes_since_last_encoding += 1
if self.episodes_since_last_encoding == self.batch_encoding_size:
start_ep = self.num_episodes - self.batch_encoding_size
end_ep = self.num_episodes
self._batch_save_episode_video(start_ep, end_ep)
self.episodes_since_last_encoding = 0
if not episode_data:
# Reset episode buffer and clean up temporary images (if not already deleted during video encoding)
self.clear_episode_buffer(delete_images=len(self.meta.image_keys) > 0)
def _encode_temporary_episode_video(self, video_key: str, episode_index: int) -> Path:
"""
Use ffmpeg to convert frames stored as png into mp4 videos.
Note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding,
since video encoding with ffmpeg is already using multithreading.
"""
temp_path = Path(tempfile.mkdtemp(dir=self.root)) / f"{video_key}_{episode_index:03d}.mp4"
shutil.copy(self.current_videos[video_key], temp_path)
return temp_path
def get_all_tasks(src_path: Path, output_path: Path): def get_all_tasks(src_path: Path, output_path: Path):
+26 -15
View File
@@ -1,9 +1,11 @@
import numpy as np import numpy as np
import torch from lerobot.datasets.compute_stats import (
import torchvision DEFAULT_QUANTILES,
from lerobot.datasets.compute_stats import auto_downsample_height_width, get_feature_stats, sample_indices auto_downsample_height_width,
get_feature_stats,
torchvision.set_video_backend("pyav") sample_indices,
)
from torchcodec.decoders import VideoDecoder
def generate_features_from_config(AgiBotWorld_CONFIG): def generate_features_from_config(AgiBotWorld_CONFIG):
@@ -20,9 +22,8 @@ def generate_features_from_config(AgiBotWorld_CONFIG):
def sample_images(input): def sample_images(input):
if type(input) is str: if type(input) is str:
video_path = input video_path = input
reader = torchvision.io.VideoReader(video_path, stream="video") decoder = VideoDecoder(video_path)
frames = [frame["data"] for frame in reader] frames_array = decoder[0:-1].numpy() # Shape: [T, C, H, W]
frames_array = torch.stack(frames).numpy() # Shape: [T, C, H, W]
sampled_indices = sample_indices(len(frames_array)) sampled_indices = sample_indices(len(frames_array))
images = None images = None
@@ -50,21 +51,31 @@ def sample_images(input):
return images return images
def compute_episode_stats(episode_data: dict[str, list[str] | np.ndarray], features: dict) -> dict: def compute_episode_stats(
episode_data: dict[str, list[str] | np.ndarray],
features: dict,
quantile_list: list[float] | None = None,
) -> dict:
if quantile_list is None:
quantile_list = DEFAULT_QUANTILES
ep_stats = {} ep_stats = {}
for key, data in episode_data.items(): for key, data in episode_data.items():
if features[key]["dtype"] == "string": if features[key]["dtype"] == "string":
continue # HACK: we should receive np.arrays of strings continue
elif features[key]["dtype"] in ["image", "video"]: elif features[key]["dtype"] in ["image", "video"]:
ep_ft_array = sample_images(data) ep_ft_array = sample_images(data)
axes_to_reduce = (0, 2, 3) # keep channel dim axes_to_reduce = (0, 2, 3)
keepdims = True keepdims = True
else: else:
ep_ft_array = data # data is already a np.ndarray ep_ft_array = data
axes_to_reduce = 0 # compute stats over the first axis axes_to_reduce = 0
keepdims = data.ndim == 1 # keep as np.array keepdims = data.ndim == 1
ep_stats[key] = get_feature_stats(ep_ft_array, axis=axes_to_reduce, keepdims=keepdims) ep_stats[key] = get_feature_stats(
ep_ft_array, axis=axes_to_reduce, keepdims=keepdims, quantile_list=quantile_list
)
if features[key]["dtype"] in ["image", "video"]: if features[key]["dtype"] in ["image", "video"]:
value_norm = 1.0 if "depth" in key else 255.0 value_norm = 1.0 if "depth" in key else 255.0
+8 -6
View File
@@ -8,6 +8,7 @@ import pandas as pd
import ray import ray
from datatrove.executor import LocalPipelineExecutor, RayPipelineExecutor from datatrove.executor import LocalPipelineExecutor, RayPipelineExecutor
from datatrove.pipeline.base import PipelineStep from datatrove.pipeline.base import PipelineStep
from lerobot.datasets import LeRobotDataset, LeRobotDatasetMetadata
from lerobot.datasets.aggregate import ( from lerobot.datasets.aggregate import (
aggregate_data, aggregate_data,
aggregate_metadata, aggregate_metadata,
@@ -15,14 +16,11 @@ from lerobot.datasets.aggregate import (
aggregate_videos, aggregate_videos,
validate_all_metadata, validate_all_metadata,
) )
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata from lerobot.datasets.io_utils import write_info, write_stats, write_tasks
from lerobot.datasets.utils import ( from lerobot.datasets.utils import (
DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_SIZE,
DEFAULT_DATA_FILE_SIZE_IN_MB, DEFAULT_DATA_FILE_SIZE_IN_MB,
DEFAULT_VIDEO_FILE_SIZE_IN_MB, DEFAULT_VIDEO_FILE_SIZE_IN_MB,
write_info,
write_stats,
write_tasks,
) )
from libero_utils.config import LIBERO_FEATURES from libero_utils.config import LIBERO_FEATURES
from libero_utils.libero_utils import load_local_episodes from libero_utils.libero_utils import load_local_episodes
@@ -171,7 +169,9 @@ def main(
) )
) )
if len(src_paths) > 1: if len(src_paths) > 1:
aggregate_output_path = output_path / ("_".join([src_path.name for src_path in src_paths]) + "_aggregated_lerobot") aggregate_output_path = output_path / (
"_".join([src_path.name for src_path in src_paths]) + "_aggregated_lerobot"
)
else: else:
aggregate_output_path = output_path / f"{src_paths[0].name}_lerobot" aggregate_output_path = output_path / f"{src_paths[0].name}_lerobot"
aggregate_output_path = aggregate_output_path.resolve() aggregate_output_path = aggregate_output_path.resolve()
@@ -234,7 +234,9 @@ if __name__ == "__main__":
parser.add_argument("--output-path", type=Path, required=True) parser.add_argument("--output-path", type=Path, required=True)
parser.add_argument("--executor", type=str, choices=["local", "ray"], default="local") parser.add_argument("--executor", type=str, choices=["local", "ray"], default="local")
parser.add_argument("--cpus-per-task", type=int, default=1) parser.add_argument("--cpus-per-task", type=int, default=1)
parser.add_argument("--tasks-per-job", type=int, default=1, help="number of concurrent tasks per job, only used for ray") parser.add_argument(
"--tasks-per-job", type=int, default=1, help="number of concurrent tasks per job, only used for ray"
)
parser.add_argument("--workers", type=int, default=-1, help="number of concurrent jobs to run") parser.add_argument("--workers", type=int, default=-1, help="number of concurrent jobs to run")
parser.add_argument("--resume-dir", type=Path, help="logs directory to resume") parser.add_argument("--resume-dir", type=Path, help="logs directory to resume")
parser.add_argument("--debug", action="store_true") parser.add_argument("--debug", action="store_true")
+137 -73
View File
@@ -1,4 +1,5 @@
import argparse import argparse
import concurrent.futures
import inspect import inspect
import json import json
import logging import logging
@@ -10,9 +11,12 @@ import pandas as pd
import pyarrow as pa import pyarrow as pa
import pyarrow.parquet as pq import pyarrow.parquet as pq
import ray import ray
from lerobot.datasets import LeRobotDataset, LeRobotDatasetMetadata
from lerobot.datasets.compute_stats import aggregate_stats from lerobot.datasets.compute_stats import aggregate_stats
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata from lerobot.datasets.dataset_writer import DatasetWriter, _encode_video_worker
from lerobot.datasets.utils import DEFAULT_EPISODES_PATH, flatten_dict, validate_episode_buffer, write_info, write_stats from lerobot.datasets.feature_utils import validate_episode_buffer
from lerobot.datasets.io_utils import write_info, write_stats
from lerobot.datasets.utils import DEFAULT_EPISODES_PATH, flatten_dict
from ray.runtime_env import RuntimeEnv from ray.runtime_env import RuntimeEnv
from robomind_uitls.configs import ROBOMIND_CONFIG from robomind_uitls.configs import ROBOMIND_CONFIG
from robomind_uitls.lerobot_uitls import compute_episode_stats, generate_features_from_config from robomind_uitls.lerobot_uitls import compute_episode_stats, generate_features_from_config
@@ -24,11 +28,11 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(
class RoboMINDDatasetMetadata(LeRobotDatasetMetadata): class RoboMINDDatasetMetadata(LeRobotDatasetMetadata):
def _flush_metadata_buffer(self) -> None: def _flush_metadata_buffer(self) -> None:
"""Write all buffered episode metadata to parquet file.""" """Write all buffered episode metadata to parquet file."""
if not hasattr(self, "metadata_buffer") or len(self.metadata_buffer) == 0: if not hasattr(self, "_metadata_buffer") or len(self._metadata_buffer) == 0:
return return
combined_dict = {} combined_dict = {}
for episode_dict in self.metadata_buffer: for episode_dict in self._metadata_buffer:
for key, value in episode_dict.items(): for key, value in episode_dict.items():
if key not in combined_dict: if key not in combined_dict:
combined_dict[key] = [] combined_dict[key] = []
@@ -37,22 +41,22 @@ class RoboMINDDatasetMetadata(LeRobotDatasetMetadata):
val = value[0] if isinstance(value, list) else value val = value[0] if isinstance(value, list) else value
combined_dict[key].append(val.tolist() if isinstance(val, np.ndarray) else val) combined_dict[key].append(val.tolist() if isinstance(val, np.ndarray) else val)
first_ep = self.metadata_buffer[0] first_ep = self._metadata_buffer[0]
chunk_idx = first_ep["meta/episodes/chunk_index"][0] chunk_idx = first_ep["meta/episodes/chunk_index"][0]
file_idx = first_ep["meta/episodes/file_index"][0] file_idx = first_ep["meta/episodes/file_index"][0]
schema = None if not self.writer else self.writer.schema schema = None if not self._pq_writer else self._pq_writer.schema
table = pa.Table.from_pydict(combined_dict, schema=schema) table = pa.Table.from_pydict(combined_dict, schema=schema)
if not self.writer: if not self._pq_writer:
path = Path(self.root / DEFAULT_EPISODES_PATH.format(chunk_index=chunk_idx, file_index=file_idx)) path = Path(self.root / DEFAULT_EPISODES_PATH.format(chunk_index=chunk_idx, file_index=file_idx))
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
self.writer = pq.ParquetWriter(path, schema=table.schema, compression="snappy", use_dictionary=True) self._pq_writer = pq.ParquetWriter(path, schema=table.schema, compression="snappy", use_dictionary=True)
self.writer.write_table(table) self._pq_writer.write_table(table)
self.latest_episode = self.metadata_buffer[-1] self.latest_episode = self._metadata_buffer[-1]
self.metadata_buffer.clear() self._metadata_buffer.clear()
def save_episode( def save_episode(
self, self,
@@ -88,6 +92,116 @@ class RoboMINDDatasetMetadata(LeRobotDatasetMetadata):
write_stats(self.stats, self.root) write_stats(self.stats, self.root)
class RoboMINDDatasetWriter(DatasetWriter):
def save_episode(
self,
split,
action_config: dict,
episode_data: dict | None = None,
parallel_encoding: bool = True,
) -> None:
"""Save the current episode in self.episode_buffer to disk."""
episode_buffer = episode_data if episode_data is not None else self.episode_buffer
validate_episode_buffer(episode_buffer, self._meta.total_episodes, self._meta.features)
# size and task are special cases that won't be added to hf_dataset
episode_length = episode_buffer.pop("size")
tasks = episode_buffer.pop("task")
episode_tasks = list(set(tasks))
episode_index = episode_buffer["episode_index"]
episode_buffer["index"] = np.arange(self._meta.total_frames, self._meta.total_frames + episode_length)
episode_buffer["episode_index"] = np.full((episode_length,), episode_index)
# Update tasks and task indices with new tasks if any
self._meta.save_episode_tasks(episode_tasks)
# Given tasks in natural language, find their corresponding task indices
episode_buffer["task_index"] = np.array([self._meta.get_task_index(task) for task in tasks])
for key, ft in self._meta.features.items():
if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["video"]:
continue
episode_buffer[key] = np.stack(episode_buffer[key]).squeeze()
# Wait for image writer to end, so that episode stats over images can be computed
self._wait_image_writer()
has_video_keys = len(self._meta.video_keys) > 0
use_streaming = self._streaming_encoder is not None and has_video_keys
use_batched_encoding = self._batch_encoding_size > 1
if use_streaming:
non_video_buffer = {
k: v for k, v in episode_buffer.items() if self._meta.features.get(k, {}).get("dtype") not in ("video",)
}
non_video_features = {k: v for k, v in self._meta.features.items() if v["dtype"] != "video"}
ep_stats = compute_episode_stats(non_video_buffer, non_video_features)
else:
ep_stats = compute_episode_stats(episode_buffer, self._meta.features)
ep_metadata = self._save_episode_data(episode_buffer)
if use_streaming:
streaming_results = self._streaming_encoder.finish_episode()
for video_key in self._meta.video_keys:
temp_path, video_stats = streaming_results[video_key]
if video_stats is not None:
ep_stats[video_key] = {
k: v if k == "count" else np.squeeze(v.reshape(1, -1, 1, 1) / 255.0, axis=0)
for k, v in video_stats.items()
}
ep_metadata.update(self._save_episode_video(video_key, episode_index, temp_path=temp_path))
elif has_video_keys and not use_batched_encoding:
num_cameras = len(self._meta.video_keys)
if parallel_encoding and num_cameras > 1:
with concurrent.futures.ProcessPoolExecutor(max_workers=num_cameras) as executor:
future_to_key = {
executor.submit(
_encode_video_worker,
video_key,
episode_index,
self._root,
self._meta.fps,
self._vcodec,
self._encoder_threads,
): video_key
for video_key in self._meta.video_keys
}
results = {}
for future in concurrent.futures.as_completed(future_to_key):
video_key = future_to_key[future]
try:
temp_path = future.result()
results[video_key] = temp_path
except Exception as exc:
logging.error(f"Video encoding failed for {video_key}: {exc}")
raise exc
for video_key in self._meta.video_keys:
temp_path = results[video_key]
ep_metadata.update(self._save_episode_video(video_key, episode_index, temp_path=temp_path))
else:
for video_key in self._meta.video_keys:
ep_metadata.update(self._save_episode_video(video_key, episode_index))
# `meta.save_episode` be executed after encoding the videos
ep_metadata.update({"action_config": action_config})
self._meta.save_episode(split, episode_index, episode_length, episode_tasks, ep_stats, ep_metadata)
if has_video_keys and use_batched_encoding:
self._episodes_since_last_encoding += 1
if self._episodes_since_last_encoding == self._batch_encoding_size:
start_ep = self._meta.total_episodes - self._batch_encoding_size
end_ep = self._meta.total_episodes
self._batch_save_episode_video(start_ep, end_ep)
self._episodes_since_last_encoding = 0
if not episode_data:
self.clear_episode_buffer(delete_images=len(self._meta.image_keys) > 0)
class RoboMINDDataset(LeRobotDataset): class RoboMINDDataset(LeRobotDataset):
@classmethod @classmethod
def create(cls, *args, **kwargs) -> "RoboMINDDataset": def create(cls, *args, **kwargs) -> "RoboMINDDataset":
@@ -108,70 +222,20 @@ class RoboMINDDataset(LeRobotDataset):
use_videos=params["use_videos"], use_videos=params["use_videos"],
metadata_buffer_size=params["metadata_buffer_size"], metadata_buffer_size=params["metadata_buffer_size"],
) )
obj.writer: RoboMINDDatasetWriter = RoboMINDDatasetWriter(
meta=obj.meta,
root=obj.root,
vcodec=obj._vcodec,
encoder_threads=obj._encoder_threads,
batch_encoding_size=obj._batch_encoding_size,
)
return obj return obj
def save_episode(self, split, action_config: dict, episode_data: dict | None = None) -> None: def save_episode(
""" self, split, action_config: dict, episode_data: dict | None = None, parallel_encoding: bool = True
This will save to disk the current episode in self.episode_buffer. ) -> None:
self._require_writer("save_episode")
Args: self.writer.save_episode(split, action_config, episode_data, parallel_encoding)
episode_data (dict | None, optional): Dict containing the episode data to save. If None, this will
save the current episode in self.episode_buffer, which is filled with 'add_frame'. Defaults to
None.
"""
episode_buffer = episode_data if episode_data is not None else self.episode_buffer
validate_episode_buffer(episode_buffer, self.meta.total_episodes, self.features)
# size and task are special cases that won't be added to hf_dataset
episode_length = episode_buffer.pop("size")
tasks = episode_buffer.pop("task")
episode_tasks = list(set(tasks))
episode_index = episode_buffer["episode_index"]
episode_buffer["index"] = np.arange(self.meta.total_frames, self.meta.total_frames + episode_length)
episode_buffer["episode_index"] = np.full((episode_length,), episode_index)
# Update tasks and task indices with new tasks if any
self.meta.save_episode_tasks(episode_tasks)
# Given tasks in natural language, find their corresponding task indices
episode_buffer["task_index"] = np.array([self.meta.get_task_index(task) for task in tasks])
for key, ft in self.features.items():
# index, episode_index, task_index are already processed above, and image and video
# are processed separately by storing image path and frame info as meta data
if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["video"]:
continue
episode_buffer[key] = np.stack(episode_buffer[key]).squeeze()
self._wait_image_writer()
ep_stats = compute_episode_stats(episode_buffer, self.features)
ep_metadata = self._save_episode_data(episode_buffer)
has_video_keys = len(self.meta.video_keys) > 0
use_batched_encoding = self.batch_encoding_size > 1
if has_video_keys and not use_batched_encoding:
for video_key in self.meta.video_keys:
ep_metadata.update(self._save_episode_video(video_key, episode_index))
# `meta.save_episode` be executed after encoding the videos
ep_metadata.update({"action_config": action_config})
self.meta.save_episode(split, episode_index, episode_length, episode_tasks, ep_stats, ep_metadata)
if has_video_keys and use_batched_encoding:
# Check if we should trigger batch encoding
self.episodes_since_last_encoding += 1
if self.episodes_since_last_encoding == self.batch_encoding_size:
start_ep = self.num_episodes - self.batch_encoding_size
end_ep = self.num_episodes
self._batch_save_episode_video(start_ep, end_ep)
self.episodes_since_last_encoding = 0
if not episode_data:
# Reset episode buffer and clean up temporary images (if not already deleted during video encoding)
self.clear_episode_buffer(delete_images=len(self.meta.image_keys) > 0)
def get_all_tasks(src_path: Path, output_path: Path, embodiment: str): def get_all_tasks(src_path: Path, output_path: Path, embodiment: str):
@@ -1,9 +1,11 @@
import numpy as np import numpy as np
import torchvision from lerobot.datasets.compute_stats import (
from lerobot.datasets.compute_stats import auto_downsample_height_width, get_feature_stats, sample_indices DEFAULT_QUANTILES,
from lerobot.datasets.utils import load_image_as_numpy auto_downsample_height_width,
get_feature_stats,
torchvision.set_video_backend("pyav") sample_indices,
)
from lerobot.datasets.io_utils import load_image_as_numpy
def generate_features_from_config(AgiBotWorld_CONFIG): def generate_features_from_config(AgiBotWorld_CONFIG):
@@ -49,21 +51,31 @@ def sample_images(input):
return images return images
def compute_episode_stats(episode_data: dict[str, list[str] | np.ndarray], features: dict) -> dict: def compute_episode_stats(
episode_data: dict[str, list[str] | np.ndarray],
features: dict,
quantile_list: list[float] | None = None,
) -> dict:
if quantile_list is None:
quantile_list = DEFAULT_QUANTILES
ep_stats = {} ep_stats = {}
for key, data in episode_data.items(): for key, data in episode_data.items():
if features[key]["dtype"] == "string": if features[key]["dtype"] == "string":
continue # HACK: we should receive np.arrays of strings continue
elif features[key]["dtype"] in ["image", "video"]: elif features[key]["dtype"] in ["image", "video"]:
ep_ft_array = sample_images(data) ep_ft_array = sample_images(data)
axes_to_reduce = (0, 2, 3) # keep channel dim axes_to_reduce = (0, 2, 3)
keepdims = True keepdims = True
else: else:
ep_ft_array = data # data is already a np.ndarray ep_ft_array = data
axes_to_reduce = 0 # compute stats over the first axis axes_to_reduce = 0
keepdims = data.ndim == 1 # keep as np.array keepdims = data.ndim == 1
ep_stats[key] = get_feature_stats(ep_ft_array, axis=axes_to_reduce, keepdims=keepdims) ep_stats[key] = get_feature_stats(
ep_ft_array, axis=axes_to_reduce, keepdims=keepdims, quantile_list=quantile_list
)
if features[key]["dtype"] in ["image", "video"]: if features[key]["dtype"] in ["image", "video"]:
value_norm = 1.0 if "depth" in key else 255.0 value_norm = 1.0 if "depth" in key else 255.0