fix robomind channel & add dirty tasks

This commit is contained in:
Tavish
2025-06-12 09:51:26 +08:00
parent b2f9d9b8ba
commit 462a2ace0b
3 changed files with 45 additions and 7 deletions
+32 -2
View File
@@ -2,6 +2,34 @@
RoboMIND (Multi-embodiment Intelligence Normative Data for Robot Manipulation), a dataset containing 107k demonstration trajectories across 479 diverse tasks involving 96 object classes. RoboMIND is collected through human teleoperation and encompasses comprehensive robotic-related information, including multi-view observations, proprioceptive robot state information, and linguistic task descriptions.. (Copied from [docs](https://x-humanoid-robomind.github.io/))
## ⚠️ Dirty Tasks
| Task ID | Reason |
| :-------------------------------: | :----------------------------------------------: |
| 3_eggplantOven | take - turn on, wrong order |
| 3_eggplantoven_2 | take - turn on, wrong order |
| 5_eggoven_2 | no instruction |
| 10_packplate | no plate marker, no plate 2 |
| 10_packplate_2 | no plate marker, no plate 2 |
| 11_brushcup | two cups |
| 12_packcup | no cup marker |
| 13_packbowl | no bowl marker, blue - greeen flipped |
| 35_putcarrot | no instruction |
| 36_putpepper | no instruction |
| 37_putegg | no instruction |
| 39_puttomato | no instruction |
| 40_putavocado | no instruction |
| 41_putplum | no instruction |
| 42_putkiwifruite | wrong word: wifruite |
| 43_packplate | last object should be "them", not "it" |
| 44_putbluebowlongreenplate | last object should be "them", not "it" |
| 45_putgreenbowlonblueplate | only one instruction, but two sub-tasks |
| 46_putredbowlonwhiteplate | only one instruction, but two sub-tasks |
| 48_putpotatogreenplatefromsteam_2 | last action is about "left arm", not "right arm" |
| 52_holdercup | wrong order: should be right - left - right |
| 53_stackcup | wrong order: should be right - left - right |
| to be continued ... | |
## 🚀 What's New in This Script
In this dataset, we have made several key improvements:
@@ -96,7 +124,8 @@ Dataset Structure of `meta/info.json`:
## Get started
> [!IMPORTANT]
> [!IMPORTANT]
>
> 1. If you want to save depth when converting the dataset, modify `_assert_type_and_shape()` function in [lerobot.common.datasets.compute_stats.py](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/compute_stats.py).
>
> ```python
@@ -119,8 +148,9 @@ Dataset Structure of `meta/info.json`:
> if "depth" in fkey and v.shape != (1, 1, 1):
> raise ValueError(f"Shape of '{k}' must be (1,1,1), but is {v.shape} instead.")
> ```
>
>
> 2. The dataset needs to be organized into the following format before running the script due to differences in storage formats across platforms:
>
> ```bash
> /path/to/robomind/
> ├── benchmark1_0_release
+8 -2
View File
@@ -272,9 +272,15 @@ def save_as_lerobot_dataset(task: tuple[dict, Path, str], src_path, benchmark, e
task_type, splits, local_dir, task_instruction = task
config = ROBOMIND_CONFIG[embodiment]
# HACK: not consistent image shape...
# HACK:
# 1. not consistent image shape...
# 2. franka and ur image is bgr...
bgr2rgb = False
if "1_0" in benchmark:
match embodiment:
case "franka_1rgb" | "franka_3rgb" | "franka_fr3_dual" | "ur_1rgb":
bgr2rgb = True
case "tienkung_gello_1rgb":
if task_type in (
"clean_table_2_241211",
@@ -322,7 +328,7 @@ def save_as_lerobot_dataset(task: tuple[dict, Path, str], src_path, benchmark, e
else:
action_config = {}
for episode_path in path.glob("**/trajectory.hdf5"):
status, raw_dataset, err = load_local_dataset(episode_path, config, save_depth)
status, raw_dataset, err = load_local_dataset(episode_path, config, save_depth, bgr2rgb)
if status and len(raw_dataset) >= 50:
for frame_data in raw_dataset:
frame_data.update({"task": task_instruction})
@@ -5,7 +5,7 @@ import h5py
import numpy as np
def decode_images(camera_key, input_images):
def decode_images(camera_key, input_images, bgr2rgb: bool = False):
if "depth" not in camera_key:
rgb_images = []
camera_rgb_images = input_images
@@ -18,6 +18,8 @@ def decode_images(camera_key, input_images):
rgb = rgb.reshape(720, 1280, 3)
elif rgb.size == 921600:
rgb = rgb.reshape(480, 640, 3)
if bgr2rgb:
rgb = rgb[..., ::-1]
rgb_images.append(rgb)
rgb_images = np.asarray(rgb_images)
return rgb_images
@@ -40,7 +42,7 @@ def decode_images(camera_key, input_images):
return depth_images
def load_local_dataset(episode_path: Path, config: dict, save_depth: bool):
def load_local_dataset(episode_path: Path, config: dict, save_depth: bool, bgr2rgb: bool = False):
try:
images = {}
states = {}
@@ -53,7 +55,7 @@ def load_local_dataset(episode_path: Path, config: dict, save_depth: bool):
image_key = f"observations/rgb_images/{key}"
else:
continue
images[f"observation.images.{key}"] = decode_images(image_key, file[image_key])
images[f"observation.images.{key}"] = decode_images(image_key, file[image_key], bgr2rgb)
for key in config["states"]:
states[f"observation.states.{key}"] = np.array(file[f"puppet/{key}"], dtype=np.float32)
for key in config["actions"]: