From 462a2ace0bae1c119bfb88abafdd0836eb367e27 Mon Sep 17 00:00:00 2001
From: Tavish <tavish9.chen@gmail.com>
Date: Thu, 12 Jun 2025 09:51:26 +0800
Subject: [PATCH] fix robomind channel & add dirty tasks

---
 robomind2lerobot/README.md                    | 34 +++++++++++++++++--
 robomind2lerobot/robomind_h5.py               | 10 ++++--
 .../robomind_uitls/robomind_uitls.py          |  8 +++--
 3 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/robomind2lerobot/README.md b/robomind2lerobot/README.md
index 481f64c..36117aa 100644
--- a/robomind2lerobot/README.md
+++ b/robomind2lerobot/README.md
@@ -2,6 +2,34 @@
 
 RoboMIND (Multi-embodiment Intelligence Normative Data for Robot Manipulation), a dataset containing 107k demonstration trajectories across 479 diverse tasks involving 96 object classes. RoboMIND is collected through human teleoperation and encompasses comprehensive robotic-related information, including multi-view observations, proprioceptive robot state information, and linguistic task descriptions.. (Copied from [docs](https://x-humanoid-robomind.github.io/))
 
+## ⚠️ Dirty Tasks
+
+|              Task ID              |                      Reason                      |
+| :-------------------------------: | :----------------------------------------------: |
+|          3_eggplantOven           |           take - turn on, wrong order            |
+|         3_eggplantoven_2          |           take - turn on, wrong order            |
+|            5_eggoven_2            |                  no instruction                  |
+|           10_packplate            |           no plate marker, no plate 2            |
+|          10_packplate_2           |           no plate marker, no plate 2            |
+|            11_brushcup            |                     two cups                     |
+|            12_packcup             |                  no cup marker                   |
+|            13_packbowl            |      no bowl marker, blue - greeen flipped       |
+|           35_putcarrot            |                  no instruction                  |
+|           36_putpepper            |                  no instruction                  |
+|             37_putegg             |                  no instruction                  |
+|           39_puttomato            |                  no instruction                  |
+|           40_putavocado           |                  no instruction                  |
+|            41_putplum             |                  no instruction                  |
+|         42_putkiwifruite          |               wrong word: wifruite               |
+|           43_packplate            |      last object should be "them", not "it"      |
+|    44_putbluebowlongreenplate     |      last object should be "them", not "it"      |
+|    45_putgreenbowlonblueplate     |     only one instruction, but two sub-tasks      |
+|     46_putredbowlonwhiteplate     |     only one instruction, but two sub-tasks      |
+| 48_putpotatogreenplatefromsteam_2 | last action is about "left arm", not "right arm" |
+|           52_holdercup            |   wrong order: should be right - left - right    |
+|            53_stackcup            |   wrong order: should be right - left - right    |
+|        to be continued ...        |                                                  |
+
 ## 🚀 What's New in This Script
 
 In this dataset, we have made several key improvements:
@@ -96,7 +124,8 @@ Dataset Structure of `meta/info.json`:
 
 ## Get started
 
-> [!IMPORTANT]  
+> [!IMPORTANT]
+>
 > 1. If you want to save depth when converting the dataset, modify `_assert_type_and_shape()` function in [lerobot.common.datasets.compute_stats.py](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/compute_stats.py).
 >
 > ```python
@@ -119,8 +148,9 @@ Dataset Structure of `meta/info.json`:
 >                     if "depth" in fkey and v.shape != (1, 1, 1):
 >                         raise ValueError(f"Shape of '{k}' must be (1,1,1), but is {v.shape} instead.")
 > ```
-> 
+>
 > 2. The dataset needs to be organized into the following format before running the script due to differences in storage formats across platforms:
+>
 > ```bash
 > /path/to/robomind/
 > ├── benchmark1_0_release
diff --git a/robomind2lerobot/robomind_h5.py b/robomind2lerobot/robomind_h5.py
index 21fc757..cbf12f0 100644
--- a/robomind2lerobot/robomind_h5.py
+++ b/robomind2lerobot/robomind_h5.py
@@ -272,9 +272,15 @@ def save_as_lerobot_dataset(task: tuple[dict, Path, str], src_path, benchmark, e
     task_type, splits, local_dir, task_instruction = task
 
     config = ROBOMIND_CONFIG[embodiment]
-    # HACK: not consistent image shape...
+    # HACK:
+    # 1. not consistent image shape...
+    # 2. franka and ur image is bgr...
+    bgr2rgb = False
     if "1_0" in benchmark:
         match embodiment:
+            case "franka_1rgb" | "franka_3rgb" | "franka_fr3_dual" | "ur_1rgb":
+                bgr2rgb = True
+
             case "tienkung_gello_1rgb":
                 if task_type in (
                     "clean_table_2_241211",
@@ -322,7 +328,7 @@ def save_as_lerobot_dataset(task: tuple[dict, Path, str], src_path, benchmark, e
         else:
             action_config = {}
         for episode_path in path.glob("**/trajectory.hdf5"):
-            status, raw_dataset, err = load_local_dataset(episode_path, config, save_depth)
+            status, raw_dataset, err = load_local_dataset(episode_path, config, save_depth, bgr2rgb)
             if status and len(raw_dataset) >= 50:
                 for frame_data in raw_dataset:
                     frame_data.update({"task": task_instruction})
diff --git a/robomind2lerobot/robomind_uitls/robomind_uitls.py b/robomind2lerobot/robomind_uitls/robomind_uitls.py
index 878e7b0..6163cbd 100644
--- a/robomind2lerobot/robomind_uitls/robomind_uitls.py
+++ b/robomind2lerobot/robomind_uitls/robomind_uitls.py
@@ -5,7 +5,7 @@ import h5py
 import numpy as np
 
 
-def decode_images(camera_key, input_images):
+def decode_images(camera_key, input_images, bgr2rgb: bool = False):
     if "depth" not in camera_key:
         rgb_images = []
         camera_rgb_images = input_images
@@ -18,6 +18,8 @@ def decode_images(camera_key, input_images):
                     rgb = rgb.reshape(720, 1280, 3)
                 elif rgb.size == 921600:
                     rgb = rgb.reshape(480, 640, 3)
+            if bgr2rgb:
+                rgb = rgb[..., ::-1]
             rgb_images.append(rgb)
         rgb_images = np.asarray(rgb_images)
         return rgb_images
@@ -40,7 +42,7 @@ def decode_images(camera_key, input_images):
         return depth_images
 
 
-def load_local_dataset(episode_path: Path, config: dict, save_depth: bool):
+def load_local_dataset(episode_path: Path, config: dict, save_depth: bool, bgr2rgb: bool = False):
     try:
         images = {}
         states = {}
@@ -53,7 +55,7 @@ def load_local_dataset(episode_path: Path, config: dict, save_depth: bool):
                     image_key = f"observations/rgb_images/{key}"
                 else:
                     continue
-                images[f"observation.images.{key}"] = decode_images(image_key, file[image_key])
+                images[f"observation.images.{key}"] = decode_images(image_key, file[image_key], bgr2rgb)
             for key in config["states"]:
                 states[f"observation.states.{key}"] = np.array(file[f"puppet/{key}"], dtype=np.float32)
             for key in config["actions"]: