mirror of
https://github.com/Tavish9/any4lerobot.git
synced 2026-05-22 09:29:44 +00:00
fix agibot2lerobot and update dirty tasks
This commit is contained in:
@@ -1,5 +1,29 @@
|
||||
# AgiBot-World to LeRobot
|
||||
|
||||
AgiBot World, the first large-scale robotic learning dataset designed to advance multi-purpose robotic policies. It is accompanied by foundation models, benchmarks, and an ecosystem to democratize access to high-quality robot data for the academic community and the industry, paving the path towards the "ImageNet Moment" for Embodied AI. (Copied from [docs](https://agibot-world.com/))
|
||||
|
||||
## ⚠️ Dirty Tasks
|
||||
|
||||
| (Gripper) Task ID | (Some episodes) Reason | Fixed By |
|
||||
| :---------------: | :--------------------: | -------- |
|
||||
| task_352 | action_len > state_len | skipping |
|
||||
| task_354 | action_len > state_len | skipping |
|
||||
| task_359 | action_len > state_len | skipping |
|
||||
| task_361 | action_len > state_len | skipping |
|
||||
| task_368 | action_len > state_len | skipping |
|
||||
| task_376 | action_len > state_len | skipping |
|
||||
| task_377 | action_len > state_len | skipping |
|
||||
| task_380 | corrupted mp4 | skipping |
|
||||
| task_384 | corrupted mp4 | skipping |
|
||||
| task_410 | action_len > state_len | skipping |
|
||||
| task_414 | action_len > state_len | skipping |
|
||||
| task_421 | action_len > state_len | skipping |
|
||||
| task_428 | corrupted mp4 | skipping |
|
||||
| task_460 | corrupted mp4 | skipping |
|
||||
| task_505 | corrupted mp4 | skipping |
|
||||
| task_510 | corrupted mp4 | skipping |
|
||||
| task_711 | corrupted mp4 | skipping |
|
||||
|
||||
## 🚀 What's New in This Script
|
||||
|
||||
In this dataset, we have made several key improvements:
|
||||
@@ -129,7 +153,6 @@ Dataset Structure of `meta/info.json`:
|
||||
|
||||
### Download source code:
|
||||
|
||||
|
||||
```bash
|
||||
git clone https://github.com/Tavish9/any4lerobot.git
|
||||
```
|
||||
@@ -239,4 +262,4 @@ cd agibot2lerobot && bash convert.sh
|
||||
|
||||
**Other Community Supported Cluster Managers**
|
||||
|
||||
See the [doc](https://docs.ray.io/en/latest/cluster/vms/user-guides/community/index.html) for more details.
|
||||
See the [doc](https://docs.ray.io/en/latest/cluster/vms/user-guides/community/index.html) for more details.
|
||||
|
||||
+32
-19
@@ -234,7 +234,7 @@ def save_as_lerobot_dataset(agibot_world_config, task: tuple[Path, Path], num_th
|
||||
task_info = get_task_info(json_file)
|
||||
task_name = task_info[0]["task_name"]
|
||||
task_init_scene = task_info[0]["init_scene_text"]
|
||||
task_instruction = f"{task_name}.{task_init_scene}"
|
||||
task_instruction = f"{task_name} | {task_init_scene}"
|
||||
task_id = json_file.stem.split("_")[-1]
|
||||
task_info = {episode["episode_id"]: episode for episode in task_info}
|
||||
|
||||
@@ -263,26 +263,28 @@ def save_as_lerobot_dataset(agibot_world_config, task: tuple[Path, Path], num_th
|
||||
if eid not in task_info:
|
||||
print(f"{json_file.stem}, episode_{eid} not in task_info.json, skipping...")
|
||||
continue
|
||||
try:
|
||||
action_config = task_info[eid]["label_info"]["action_config"]
|
||||
raw_dataset = load_local_dataset(
|
||||
eid,
|
||||
src_path=src_path,
|
||||
task_id=task_id,
|
||||
task_instruction=task_instruction,
|
||||
save_depth=save_depth,
|
||||
AgiBotWorld_CONFIG=agibot_world_config,
|
||||
)
|
||||
_, frames, videos = raw_dataset
|
||||
if not all([video_path.exists() for video_path in videos.values()]):
|
||||
print(f"{json_file.stem}, episode_{eid}: some of the videos does not exist, skipping...")
|
||||
continue
|
||||
action_config = task_info[eid]["label_info"]["action_config"]
|
||||
raw_dataset = load_local_dataset(
|
||||
eid,
|
||||
src_path=src_path,
|
||||
task_id=task_id,
|
||||
task_instruction=task_instruction,
|
||||
save_depth=save_depth,
|
||||
AgiBotWorld_CONFIG=agibot_world_config,
|
||||
)
|
||||
_, frames, videos = raw_dataset
|
||||
if not all([video_path.exists() for video_path in videos.values()]):
|
||||
print(f"{json_file.stem}, episode_{eid}: some of the videos does not exist, skipping...")
|
||||
continue
|
||||
|
||||
for frame_data in frames:
|
||||
dataset.add_frame(frame_data)
|
||||
for frame_data in frames:
|
||||
dataset.add_frame(frame_data)
|
||||
try:
|
||||
dataset.save_episode(videos=videos, action_config=action_config)
|
||||
except Exception as e:
|
||||
raise Exception(f"{json_file.stem}, {eid}") from e
|
||||
print(f"{json_file.stem}, episode_{eid}: there are some corrupted mp4s\nException details: {str(e)}")
|
||||
dataset.episode_buffer = None
|
||||
continue
|
||||
gc.collect()
|
||||
print(f"process done for {json_file.stem}, episode_id {eid}, len {len(frames)}")
|
||||
else:
|
||||
@@ -306,11 +308,22 @@ def save_as_lerobot_dataset(agibot_world_config, task: tuple[Path, Path], num_th
|
||||
|
||||
for raw_dataset in as_completed(futures):
|
||||
eid, frames, videos = raw_dataset.result()
|
||||
if not all([video_path.exists() for video_path in videos.values()]):
|
||||
print(f"{json_file.stem}, episode_{eid}: some of the videos does not exist, skipping...")
|
||||
continue
|
||||
action_config = task_info[eid]["label_info"]["action_config"]
|
||||
for frame_data in frames:
|
||||
dataset.add_frame(frame_data)
|
||||
dataset.save_episode(videos=videos, action_config=action_config)
|
||||
try:
|
||||
dataset.save_episode(videos=videos, action_config=action_config)
|
||||
except Exception as e:
|
||||
print(
|
||||
f"{json_file.stem}, episode_{eid}: there are some corrupted mp4s\nException details: {str(e)}"
|
||||
)
|
||||
dataset.episode_buffer = None
|
||||
continue
|
||||
gc.collect()
|
||||
print(f"process done for {json_file.stem}, episode_id {eid}, len {len(frames)}")
|
||||
|
||||
|
||||
def main(
|
||||
|
||||
@@ -34,10 +34,12 @@ def load_local_dataset(
|
||||
for key in AgiBotWorld_CONFIG["actions"]:
|
||||
action[f"actions.{key}"] = np.array(f["action/" + key.replace(".", "/")], dtype=np.float32)
|
||||
|
||||
# HACK: agibot team forgot to pad some of the values
|
||||
# HACK: agibot team forgot to pad or filter some of the values
|
||||
num_frames = len(next(iter(state.values())))
|
||||
for action_key, action_value in action.items():
|
||||
if action_value.size and len(action_value) != num_frames:
|
||||
if 0 == len(action_value):
|
||||
print("0 action occurs, padding all with zeros later")
|
||||
elif len(action_value) < num_frames:
|
||||
state_key = action_key.replace("actions", "state").replace(".", "/")
|
||||
new_action_value = np.array(f[state_key], dtype=np.float32).copy()
|
||||
action_index_key = "/".join(list(action_key.replace("actions", "action").split(".")[:-1]) + ["index"])
|
||||
@@ -48,6 +50,9 @@ def load_local_dataset(
|
||||
action_index = np.array(f[action_index_key])
|
||||
new_action_value[action_index] = action_value
|
||||
action[action_key] = new_action_value
|
||||
elif len(action_value) > num_frames:
|
||||
print("corrupt data, skipping")
|
||||
return episode_id, [], {"dummy_video": Path("/path/to/no_exist")}
|
||||
|
||||
if save_depth:
|
||||
depth_imgs = load_depths(ob_dir / "depth", "head_depth")
|
||||
|
||||
Reference in New Issue
Block a user