fix agibot2lerobot and update dirty tasks

This commit is contained in:
Tavish
2025-04-22 22:43:26 +08:00
parent 405220e07f
commit 3e4e37317e
3 changed files with 64 additions and 23 deletions
+25 -2
View File
@@ -1,5 +1,29 @@
# AgiBot-World to LeRobot
AgiBot World, the first large-scale robotic learning dataset designed to advance multi-purpose robotic policies. It is accompanied by foundation models, benchmarks, and an ecosystem to democratize access to high-quality robot data for the academic community and the industry, paving the path towards the "ImageNet Moment" for Embodied AI. (Copied from [docs](https://agibot-world.com/))
## ⚠️ Dirty Tasks
| (Gripper) Task ID | (Some episodes) Reason | Fixed By |
| :---------------: | :--------------------: | -------- |
| task_352 | action_len > state_len | skipping |
| task_354 | action_len > state_len | skipping |
| task_359 | action_len > state_len | skipping |
| task_361 | action_len > state_len | skipping |
| task_368 | action_len > state_len | skipping |
| task_376 | action_len > state_len | skipping |
| task_377 | action_len > state_len | skipping |
| task_380 | corrupted mp4 | skipping |
| task_384 | corrupted mp4 | skipping |
| task_410 | action_len > state_len | skipping |
| task_414 | action_len > state_len | skipping |
| task_421 | action_len > state_len | skipping |
| task_428 | corrupted mp4 | skipping |
| task_460 | corrupted mp4 | skipping |
| task_505 | corrupted mp4 | skipping |
| task_510 | corrupted mp4 | skipping |
| task_711 | corrupted mp4 | skipping |
## 🚀 What's New in This Script
In this dataset, we have made several key improvements:
@@ -129,7 +153,6 @@ Dataset Structure of `meta/info.json`:
### Download source code:
```bash
git clone https://github.com/Tavish9/any4lerobot.git
```
@@ -239,4 +262,4 @@ cd agibot2lerobot && bash convert.sh
**Other Community Supported Cluster Managers**
See the [doc](https://docs.ray.io/en/latest/cluster/vms/user-guides/community/index.html) for more details.
See the [doc](https://docs.ray.io/en/latest/cluster/vms/user-guides/community/index.html) for more details.
+32 -19
View File
@@ -234,7 +234,7 @@ def save_as_lerobot_dataset(agibot_world_config, task: tuple[Path, Path], num_th
task_info = get_task_info(json_file)
task_name = task_info[0]["task_name"]
task_init_scene = task_info[0]["init_scene_text"]
task_instruction = f"{task_name}.{task_init_scene}"
task_instruction = f"{task_name} | {task_init_scene}"
task_id = json_file.stem.split("_")[-1]
task_info = {episode["episode_id"]: episode for episode in task_info}
@@ -263,26 +263,28 @@ def save_as_lerobot_dataset(agibot_world_config, task: tuple[Path, Path], num_th
if eid not in task_info:
print(f"{json_file.stem}, episode_{eid} not in task_info.json, skipping...")
continue
try:
action_config = task_info[eid]["label_info"]["action_config"]
raw_dataset = load_local_dataset(
eid,
src_path=src_path,
task_id=task_id,
task_instruction=task_instruction,
save_depth=save_depth,
AgiBotWorld_CONFIG=agibot_world_config,
)
_, frames, videos = raw_dataset
if not all([video_path.exists() for video_path in videos.values()]):
print(f"{json_file.stem}, episode_{eid}: some of the videos does not exist, skipping...")
continue
action_config = task_info[eid]["label_info"]["action_config"]
raw_dataset = load_local_dataset(
eid,
src_path=src_path,
task_id=task_id,
task_instruction=task_instruction,
save_depth=save_depth,
AgiBotWorld_CONFIG=agibot_world_config,
)
_, frames, videos = raw_dataset
if not all([video_path.exists() for video_path in videos.values()]):
print(f"{json_file.stem}, episode_{eid}: some of the videos does not exist, skipping...")
continue
for frame_data in frames:
dataset.add_frame(frame_data)
for frame_data in frames:
dataset.add_frame(frame_data)
try:
dataset.save_episode(videos=videos, action_config=action_config)
except Exception as e:
raise Exception(f"{json_file.stem}, {eid}") from e
print(f"{json_file.stem}, episode_{eid}: there are some corrupted mp4s\nException details: {str(e)}")
dataset.episode_buffer = None
continue
gc.collect()
print(f"process done for {json_file.stem}, episode_id {eid}, len {len(frames)}")
else:
@@ -306,11 +308,22 @@ def save_as_lerobot_dataset(agibot_world_config, task: tuple[Path, Path], num_th
for raw_dataset in as_completed(futures):
eid, frames, videos = raw_dataset.result()
if not all([video_path.exists() for video_path in videos.values()]):
print(f"{json_file.stem}, episode_{eid}: some of the videos does not exist, skipping...")
continue
action_config = task_info[eid]["label_info"]["action_config"]
for frame_data in frames:
dataset.add_frame(frame_data)
dataset.save_episode(videos=videos, action_config=action_config)
try:
dataset.save_episode(videos=videos, action_config=action_config)
except Exception as e:
print(
f"{json_file.stem}, episode_{eid}: there are some corrupted mp4s\nException details: {str(e)}"
)
dataset.episode_buffer = None
continue
gc.collect()
print(f"process done for {json_file.stem}, episode_id {eid}, len {len(frames)}")
def main(
+7 -2
View File
@@ -34,10 +34,12 @@ def load_local_dataset(
for key in AgiBotWorld_CONFIG["actions"]:
action[f"actions.{key}"] = np.array(f["action/" + key.replace(".", "/")], dtype=np.float32)
# HACK: agibot team forgot to pad some of the values
# HACK: agibot team forgot to pad or filter some of the values
num_frames = len(next(iter(state.values())))
for action_key, action_value in action.items():
if action_value.size and len(action_value) != num_frames:
if 0 == len(action_value):
print("0 action occurs, padding all with zeros later")
elif len(action_value) < num_frames:
state_key = action_key.replace("actions", "state").replace(".", "/")
new_action_value = np.array(f[state_key], dtype=np.float32).copy()
action_index_key = "/".join(list(action_key.replace("actions", "action").split(".")[:-1]) + ["index"])
@@ -48,6 +50,9 @@ def load_local_dataset(
action_index = np.array(f[action_index_key])
new_action_value[action_index] = action_value
action[action_key] = new_action_value
elif len(action_value) > num_frames:
print("corrupt data, skipping")
return episode_id, [], {"dummy_video": Path("/path/to/no_exist")}
if save_depth:
depth_imgs = load_depths(ob_dir / "depth", "head_depth")