fix agibot2lerobot and update dirty tasks

This commit is contained in:
Tavish
2025-04-22 22:43:26 +08:00
parent 405220e07f
commit 3e4e37317e
3 changed files with 64 additions and 23 deletions
+24 -1
View File
@@ -1,5 +1,29 @@
# AgiBot-World to LeRobot # AgiBot-World to LeRobot
AgiBot World, the first large-scale robotic learning dataset designed to advance multi-purpose robotic policies. It is accompanied by foundation models, benchmarks, and an ecosystem to democratize access to high-quality robot data for the academic community and the industry, paving the path towards the "ImageNet Moment" for Embodied AI. (Copied from [docs](https://agibot-world.com/))
## ⚠️ Dirty Tasks
| (Gripper) Task ID | (Some episodes) Reason | Fixed By |
| :---------------: | :--------------------: | -------- |
| task_352 | action_len > state_len | skipping |
| task_354 | action_len > state_len | skipping |
| task_359 | action_len > state_len | skipping |
| task_361 | action_len > state_len | skipping |
| task_368 | action_len > state_len | skipping |
| task_376 | action_len > state_len | skipping |
| task_377 | action_len > state_len | skipping |
| task_380 | corrupted mp4 | skipping |
| task_384 | corrupted mp4 | skipping |
| task_410 | action_len > state_len | skipping |
| task_414 | action_len > state_len | skipping |
| task_421 | action_len > state_len | skipping |
| task_428 | corrupted mp4 | skipping |
| task_460 | corrupted mp4 | skipping |
| task_505 | corrupted mp4 | skipping |
| task_510 | corrupted mp4 | skipping |
| task_711 | corrupted mp4 | skipping |
## 🚀 What's New in This Script ## 🚀 What's New in This Script
In this dataset, we have made several key improvements: In this dataset, we have made several key improvements:
@@ -129,7 +153,6 @@ Dataset Structure of `meta/info.json`:
### Download source code: ### Download source code:
```bash ```bash
git clone https://github.com/Tavish9/any4lerobot.git git clone https://github.com/Tavish9/any4lerobot.git
``` ```
+32 -19
View File
@@ -234,7 +234,7 @@ def save_as_lerobot_dataset(agibot_world_config, task: tuple[Path, Path], num_th
task_info = get_task_info(json_file) task_info = get_task_info(json_file)
task_name = task_info[0]["task_name"] task_name = task_info[0]["task_name"]
task_init_scene = task_info[0]["init_scene_text"] task_init_scene = task_info[0]["init_scene_text"]
task_instruction = f"{task_name}.{task_init_scene}" task_instruction = f"{task_name} | {task_init_scene}"
task_id = json_file.stem.split("_")[-1] task_id = json_file.stem.split("_")[-1]
task_info = {episode["episode_id"]: episode for episode in task_info} task_info = {episode["episode_id"]: episode for episode in task_info}
@@ -263,26 +263,28 @@ def save_as_lerobot_dataset(agibot_world_config, task: tuple[Path, Path], num_th
if eid not in task_info: if eid not in task_info:
print(f"{json_file.stem}, episode_{eid} not in task_info.json, skipping...") print(f"{json_file.stem}, episode_{eid} not in task_info.json, skipping...")
continue continue
try: action_config = task_info[eid]["label_info"]["action_config"]
action_config = task_info[eid]["label_info"]["action_config"] raw_dataset = load_local_dataset(
raw_dataset = load_local_dataset( eid,
eid, src_path=src_path,
src_path=src_path, task_id=task_id,
task_id=task_id, task_instruction=task_instruction,
task_instruction=task_instruction, save_depth=save_depth,
save_depth=save_depth, AgiBotWorld_CONFIG=agibot_world_config,
AgiBotWorld_CONFIG=agibot_world_config, )
) _, frames, videos = raw_dataset
_, frames, videos = raw_dataset if not all([video_path.exists() for video_path in videos.values()]):
if not all([video_path.exists() for video_path in videos.values()]): print(f"{json_file.stem}, episode_{eid}: some of the videos does not exist, skipping...")
print(f"{json_file.stem}, episode_{eid}: some of the videos does not exist, skipping...") continue
continue
for frame_data in frames: for frame_data in frames:
dataset.add_frame(frame_data) dataset.add_frame(frame_data)
try:
dataset.save_episode(videos=videos, action_config=action_config) dataset.save_episode(videos=videos, action_config=action_config)
except Exception as e: except Exception as e:
raise Exception(f"{json_file.stem}, {eid}") from e print(f"{json_file.stem}, episode_{eid}: there are some corrupted mp4s\nException details: {str(e)}")
dataset.episode_buffer = None
continue
gc.collect() gc.collect()
print(f"process done for {json_file.stem}, episode_id {eid}, len {len(frames)}") print(f"process done for {json_file.stem}, episode_id {eid}, len {len(frames)}")
else: else:
@@ -306,11 +308,22 @@ def save_as_lerobot_dataset(agibot_world_config, task: tuple[Path, Path], num_th
for raw_dataset in as_completed(futures): for raw_dataset in as_completed(futures):
eid, frames, videos = raw_dataset.result() eid, frames, videos = raw_dataset.result()
if not all([video_path.exists() for video_path in videos.values()]):
print(f"{json_file.stem}, episode_{eid}: some of the videos does not exist, skipping...")
continue
action_config = task_info[eid]["label_info"]["action_config"] action_config = task_info[eid]["label_info"]["action_config"]
for frame_data in frames: for frame_data in frames:
dataset.add_frame(frame_data) dataset.add_frame(frame_data)
dataset.save_episode(videos=videos, action_config=action_config) try:
dataset.save_episode(videos=videos, action_config=action_config)
except Exception as e:
print(
f"{json_file.stem}, episode_{eid}: there are some corrupted mp4s\nException details: {str(e)}"
)
dataset.episode_buffer = None
continue
gc.collect() gc.collect()
print(f"process done for {json_file.stem}, episode_id {eid}, len {len(frames)}")
def main( def main(
+7 -2
View File
@@ -34,10 +34,12 @@ def load_local_dataset(
for key in AgiBotWorld_CONFIG["actions"]: for key in AgiBotWorld_CONFIG["actions"]:
action[f"actions.{key}"] = np.array(f["action/" + key.replace(".", "/")], dtype=np.float32) action[f"actions.{key}"] = np.array(f["action/" + key.replace(".", "/")], dtype=np.float32)
# HACK: agibot team forgot to pad some of the values # HACK: agibot team forgot to pad or filter some of the values
num_frames = len(next(iter(state.values()))) num_frames = len(next(iter(state.values())))
for action_key, action_value in action.items(): for action_key, action_value in action.items():
if action_value.size and len(action_value) != num_frames: if 0 == len(action_value):
print("0 action occurs, padding all with zeros later")
elif len(action_value) < num_frames:
state_key = action_key.replace("actions", "state").replace(".", "/") state_key = action_key.replace("actions", "state").replace(".", "/")
new_action_value = np.array(f[state_key], dtype=np.float32).copy() new_action_value = np.array(f[state_key], dtype=np.float32).copy()
action_index_key = "/".join(list(action_key.replace("actions", "action").split(".")[:-1]) + ["index"]) action_index_key = "/".join(list(action_key.replace("actions", "action").split(".")[:-1]) + ["index"])
@@ -48,6 +50,9 @@ def load_local_dataset(
action_index = np.array(f[action_index_key]) action_index = np.array(f[action_index_key])
new_action_value[action_index] = action_value new_action_value[action_index] = action_value
action[action_key] = new_action_value action[action_key] = new_action_value
elif len(action_value) > num_frames:
print("corrupt data, skipping")
return episode_id, [], {"dummy_video": Path("/path/to/no_exist")}
if save_depth: if save_depth:
depth_imgs = load_depths(ob_dir / "depth", "head_depth") depth_imgs = load_depths(ob_dir / "depth", "head_depth")