From 3e4e37317e88eeb76246d7f02a1b8b2d9ebf4d55 Mon Sep 17 00:00:00 2001 From: Tavish Date: Tue, 22 Apr 2025 22:43:26 +0800 Subject: [PATCH] fix agibot2lerobot and update dirty tasks --- agibot2lerobot/README.md | 27 ++++++++++- agibot2lerobot/agibot_h5.py | 51 +++++++++++++-------- agibot2lerobot/agibot_utils/agibot_utils.py | 9 +++- 3 files changed, 64 insertions(+), 23 deletions(-) diff --git a/agibot2lerobot/README.md b/agibot2lerobot/README.md index 1bbbf87..b6fed09 100644 --- a/agibot2lerobot/README.md +++ b/agibot2lerobot/README.md @@ -1,5 +1,29 @@ # AgiBot-World to LeRobot +AgiBot World, the first large-scale robotic learning dataset designed to advance multi-purpose robotic policies. It is accompanied by foundation models, benchmarks, and an ecosystem to democratize access to high-quality robot data for the academic community and the industry, paving the path towards the "ImageNet Moment" for Embodied AI. (Copied from [docs](https://agibot-world.com/)) + +## ⚠️ Dirty Tasks + +| (Gripper) Task ID | (Some episodes) Reason | Fixed By | +| :---------------: | :--------------------: | -------- | +| task_352 | action_len > state_len | skipping | +| task_354 | action_len > state_len | skipping | +| task_359 | action_len > state_len | skipping | +| task_361 | action_len > state_len | skipping | +| task_368 | action_len > state_len | skipping | +| task_376 | action_len > state_len | skipping | +| task_377 | action_len > state_len | skipping | +| task_380 | corrupted mp4 | skipping | +| task_384 | corrupted mp4 | skipping | +| task_410 | action_len > state_len | skipping | +| task_414 | action_len > state_len | skipping | +| task_421 | action_len > state_len | skipping | +| task_428 | corrupted mp4 | skipping | +| task_460 | corrupted mp4 | skipping | +| task_505 | corrupted mp4 | skipping | +| task_510 | corrupted mp4 | skipping | +| task_711 | corrupted mp4 | skipping | + ## 🚀 What's New in This Script In this dataset, we have made several key improvements: @@ -129,7 +153,6 @@ Dataset Structure of `meta/info.json`: ### Download source code: - ```bash git clone https://github.com/Tavish9/any4lerobot.git ``` @@ -239,4 +262,4 @@ cd agibot2lerobot && bash convert.sh **Other Community Supported Cluster Managers** -See the [doc](https://docs.ray.io/en/latest/cluster/vms/user-guides/community/index.html) for more details. \ No newline at end of file +See the [doc](https://docs.ray.io/en/latest/cluster/vms/user-guides/community/index.html) for more details. diff --git a/agibot2lerobot/agibot_h5.py b/agibot2lerobot/agibot_h5.py index 1ae58a2..cc8fae0 100644 --- a/agibot2lerobot/agibot_h5.py +++ b/agibot2lerobot/agibot_h5.py @@ -234,7 +234,7 @@ def save_as_lerobot_dataset(agibot_world_config, task: tuple[Path, Path], num_th task_info = get_task_info(json_file) task_name = task_info[0]["task_name"] task_init_scene = task_info[0]["init_scene_text"] - task_instruction = f"{task_name}.{task_init_scene}" + task_instruction = f"{task_name} | {task_init_scene}" task_id = json_file.stem.split("_")[-1] task_info = {episode["episode_id"]: episode for episode in task_info} @@ -263,26 +263,28 @@ def save_as_lerobot_dataset(agibot_world_config, task: tuple[Path, Path], num_th if eid not in task_info: print(f"{json_file.stem}, episode_{eid} not in task_info.json, skipping...") continue - try: - action_config = task_info[eid]["label_info"]["action_config"] - raw_dataset = load_local_dataset( - eid, - src_path=src_path, - task_id=task_id, - task_instruction=task_instruction, - save_depth=save_depth, - AgiBotWorld_CONFIG=agibot_world_config, - ) - _, frames, videos = raw_dataset - if not all([video_path.exists() for video_path in videos.values()]): - print(f"{json_file.stem}, episode_{eid}: some of the videos does not exist, skipping...") - continue + action_config = task_info[eid]["label_info"]["action_config"] + raw_dataset = load_local_dataset( + eid, + src_path=src_path, + task_id=task_id, + task_instruction=task_instruction, + save_depth=save_depth, + AgiBotWorld_CONFIG=agibot_world_config, + ) + _, frames, videos = raw_dataset + if not all([video_path.exists() for video_path in videos.values()]): + print(f"{json_file.stem}, episode_{eid}: some of the videos does not exist, skipping...") + continue - for frame_data in frames: - dataset.add_frame(frame_data) + for frame_data in frames: + dataset.add_frame(frame_data) + try: dataset.save_episode(videos=videos, action_config=action_config) except Exception as e: - raise Exception(f"{json_file.stem}, {eid}") from e + print(f"{json_file.stem}, episode_{eid}: there are some corrupted mp4s\nException details: {str(e)}") + dataset.episode_buffer = None + continue gc.collect() print(f"process done for {json_file.stem}, episode_id {eid}, len {len(frames)}") else: @@ -306,11 +308,22 @@ def save_as_lerobot_dataset(agibot_world_config, task: tuple[Path, Path], num_th for raw_dataset in as_completed(futures): eid, frames, videos = raw_dataset.result() + if not all([video_path.exists() for video_path in videos.values()]): + print(f"{json_file.stem}, episode_{eid}: some of the videos does not exist, skipping...") + continue action_config = task_info[eid]["label_info"]["action_config"] for frame_data in frames: dataset.add_frame(frame_data) - dataset.save_episode(videos=videos, action_config=action_config) + try: + dataset.save_episode(videos=videos, action_config=action_config) + except Exception as e: + print( + f"{json_file.stem}, episode_{eid}: there are some corrupted mp4s\nException details: {str(e)}" + ) + dataset.episode_buffer = None + continue gc.collect() + print(f"process done for {json_file.stem}, episode_id {eid}, len {len(frames)}") def main( diff --git a/agibot2lerobot/agibot_utils/agibot_utils.py b/agibot2lerobot/agibot_utils/agibot_utils.py index f425175..15a7351 100644 --- a/agibot2lerobot/agibot_utils/agibot_utils.py +++ b/agibot2lerobot/agibot_utils/agibot_utils.py @@ -34,10 +34,12 @@ def load_local_dataset( for key in AgiBotWorld_CONFIG["actions"]: action[f"actions.{key}"] = np.array(f["action/" + key.replace(".", "/")], dtype=np.float32) - # HACK: agibot team forgot to pad some of the values + # HACK: agibot team forgot to pad or filter some of the values num_frames = len(next(iter(state.values()))) for action_key, action_value in action.items(): - if action_value.size and len(action_value) != num_frames: + if 0 == len(action_value): + print("0 action occurs, padding all with zeros later") + elif len(action_value) < num_frames: state_key = action_key.replace("actions", "state").replace(".", "/") new_action_value = np.array(f[state_key], dtype=np.float32).copy() action_index_key = "/".join(list(action_key.replace("actions", "action").split(".")[:-1]) + ["index"]) @@ -48,6 +50,9 @@ def load_local_dataset( action_index = np.array(f[action_index_key]) new_action_value[action_index] = action_value action[action_key] = new_action_value + elif len(action_value) > num_frames: + print("corrupt data, skipping") + return episode_id, [], {"dummy_video": Path("/path/to/no_exist")} if save_depth: depth_imgs = load_depths(ob_dir / "depth", "head_depth")