add port rlds script

This commit is contained in:
Pepijn
2025-09-08 13:40:47 +02:00
parent af79dda8d9
commit 3d31f2ad53
7 changed files with 2450 additions and 9 deletions
@@ -0,0 +1,15 @@
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Open X-Embodiment utilities for dataset conversion."""
+854
View File
@@ -0,0 +1,854 @@
"""
Adapt from https://github.com/openvla/openvla/blob/main/prismatic/vla/datasets/rlds/oxe/configs.py
configs.py
Defines per-dataset configuration (kwargs) for each dataset in Open-X Embodiment.
Configuration adopts the following structure:
image_obs_keys:
primary: primary external RGB
secondary: secondary external RGB
wrist: wrist RGB
depth_obs_keys:
primary: primary external depth
secondary: secondary external depth
wrist: wrist depth
# Always 8-dim =>> changes based on `StateEncoding`
state_obs_keys:
StateEncoding.POS_EULER: EEF XYZ (3) + Roll-Pitch-Yaw (3) + <PAD> (1) + Gripper Open/Close (1)
StateEncoding.POS_QUAT: EEF XYZ (3) + Quaternion (4) + Gripper Open/Close (1)
StateEncoding.JOINT: Joint Angles (7, <PAD> if fewer) + Gripper Open/Close (1)
state_encoding: Type of `StateEncoding`
action_encoding: Type of action encoding (e.g., EEF Position vs. Joint Position)
"""
from enum import IntEnum
import tensorflow as tf
def zero_action_filter(traj: dict) -> bool:
"""
Filters transitions whose actions are all-0 (only relative actions, no gripper action).
Note: this filter is applied *after* action normalization, so need to compare to "normalized 0".
"""
DROID_Q01 = tf.convert_to_tensor( # NOQA: N806
[
-0.7776297926902771,
-0.5803514122962952,
-0.5795090794563293,
-0.6464047729969025,
-0.7041108310222626,
-0.8895104378461838,
]
)
DROID_Q99 = tf.convert_to_tensor( # NOQA: N806
[
0.7597932070493698,
0.5726242214441299,
0.7351000607013702,
0.6705610305070877,
0.6464948207139969,
0.8897542208433151,
]
)
DROID_NORM_0_ACT = ( # NOQA: N806
2 * (tf.zeros_like(traj["action"][:, :6]) - DROID_Q01) / (DROID_Q99 - DROID_Q01 + 1e-8) - 1
)
return tf.reduce_any(tf.math.abs(traj["action"][:, :6] - DROID_NORM_0_ACT) > 1e-5)
# Defines Proprioceptive State Encoding Schemes
class StateEncoding(IntEnum):
# fmt: off
NONE = -1 # No Proprioceptive State
POS_EULER = 1 # EEF XYZ (3) + Roll-Pitch-Yaw (3) + <PAD> (1) + Gripper Open/Close (1)
POS_QUAT = 2 # EEF XYZ (3) + Quaternion (4) + Gripper Open/Close (1)
JOINT = 3 # Joint Angles (7, <PAD> if fewer) + Gripper Open/Close (1)
JOINT_BIMANUAL = 4 # Joint Angles (2 x [ Joint Angles (6) + Gripper Open/Close (1) ])
# fmt: on
# Defines Action Encoding Schemes
class ActionEncoding(IntEnum):
# fmt: off
EEF_POS = 1 # EEF Delta XYZ (3) + Roll-Pitch-Yaw (3) + Gripper Open/Close (1)
EEF_POS_QUAT = 5 # EEF Delta XYZ (3) + Quaternion (4) + Gripper Open/Close (1)
JOINT_POS = 2 # Joint Delta Position (7) + Gripper Open/Close (1)
JOINT_POS_BIMANUAL = 3 # Joint Delta Position (2 x [ Joint Delta Position (6) + Gripper Open/Close (1) ])
EEF_R6 = 4 # EEF Delta XYZ (3) + R6 (6) + Gripper Open/Close (1)
# fmt: on
# === Individual Dataset Configs ===
OXE_DATASET_CONFIGS = {
"fractal20220817_data": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["base_pose_tool_reached", "gripper_closed"],
"state_encoding": StateEncoding.POS_QUAT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 3,
"robot_type": "Google Robot",
},
"kuka": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": [
"clip_function_input/base_pose_tool_reached",
"gripper_closed",
],
"state_encoding": StateEncoding.POS_QUAT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Kuka iiwa",
},
"bridge_oxe": { # Version of Bridge V2 in Open X-Embodiment mixture
"image_obs_keys": {"primary": "image", "secondary": "image_1", "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["EEF_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 5,
"robot_type": "WidowX",
},
"bridge_orig": { # Original version of Bridge V2 from project website
"image_obs_keys": {"primary": "image_0", "secondary": "image_1", "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["EEF_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 5,
"robot_type": "WidowX",
},
"bridge_dataset": { # Original version of Bridge V2 from project website
"image_obs_keys": {"primary": "image_0", "secondary": "image_1", "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["EEF_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 5,
"robot_type": "WidowX",
},
"taco_play": {
"image_obs_keys": {
"primary": "rgb_static",
"secondary": None,
"wrist": "rgb_gripper",
},
"depth_obs_keys": {
"primary": "depth_static",
"secondary": None,
"wrist": "depth_gripper",
},
"state_obs_keys": ["state_eef", None, "state_gripper"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 15,
"robot_type": "Franka",
},
"jaco_play": {
"image_obs_keys": {
"primary": "image",
"secondary": None,
"wrist": "image_wrist",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state_eef", None, "state_gripper"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Jaco 2",
},
"berkeley_cable_routing": {
"image_obs_keys": {
"primary": "image",
"secondary": "top_image",
"wrist": "wrist45_image",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["robot_state", None],
"state_encoding": StateEncoding.JOINT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Franka",
},
"roboturk": {
"image_obs_keys": {"primary": "front_rgb", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": [None, None, None, None, None, None, None, None],
"state_encoding": StateEncoding.NONE,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Sawyer",
},
"nyu_door_opening_surprising_effectiveness": {
"image_obs_keys": {"primary": None, "secondary": None, "wrist": "image"},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": [None, None, None, None, None, None, None, None],
"state_encoding": StateEncoding.NONE,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 3,
"robot_type": "Hello Stretch",
},
"viola": {
"image_obs_keys": {
"primary": "agentview_rgb",
"secondary": None,
"wrist": "eye_in_hand_rgb",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["joint_states", "gripper_states"],
"state_encoding": StateEncoding.JOINT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 20,
"robot_type": "Franka",
},
"berkeley_autolab_ur5": {
"image_obs_keys": {
"primary": "image",
"secondary": None,
"wrist": "hand_image",
},
"depth_obs_keys": {"primary": "depth", "secondary": None, "wrist": None},
"state_obs_keys": ["state"],
"state_encoding": StateEncoding.POS_QUAT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 5,
"robot_type": "UR5",
},
"toto": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state", None],
"state_encoding": StateEncoding.JOINT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 30,
"robot_type": "Franka",
},
"language_table": {
"image_obs_keys": {"primary": "rgb", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["effector_translation", None, None, None, None, None, None],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "xArm",
},
"columbia_cairlab_pusht_real": {
"image_obs_keys": {
"primary": "image",
"secondary": None,
"wrist": "wrist_image",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["robot_state", None, None, None, None, None, None],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "UR5",
},
"stanford_kuka_multimodal_dataset_converted_externally_to_rlds": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": "depth_image", "secondary": None, "wrist": None},
"state_obs_keys": ["ee_position", "ee_orientation", None],
"state_encoding": StateEncoding.POS_QUAT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 20,
"robot_type": "Kuka iiwa",
},
"nyu_rot_dataset_converted_externally_to_rlds": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["eef_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 3,
"robot_type": "xArm",
},
"stanford_hydra_dataset_converted_externally_to_rlds": {
"image_obs_keys": {
"primary": "image",
"secondary": None,
"wrist": "wrist_image",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["eef_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Franka",
},
"austin_buds_dataset_converted_externally_to_rlds": {
"image_obs_keys": {
"primary": "image",
"secondary": None,
"wrist": "wrist_image",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state"],
"state_encoding": StateEncoding.JOINT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 20,
"robot_type": "Franka",
},
"nyu_franka_play_dataset_converted_externally_to_rlds": {
"image_obs_keys": {
"primary": "image",
"secondary": "image_additional_view",
"wrist": None,
},
"depth_obs_keys": {
"primary": "depth",
"secondary": "depth_additional_view",
"wrist": None,
},
"state_obs_keys": ["eef_state", None, None],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 3,
"robot_type": "Franka",
},
"maniskill_dataset_converted_externally_to_rlds": {
"image_obs_keys": {
"primary": "image",
"secondary": None,
"wrist": "wrist_image",
},
"depth_obs_keys": {
"primary": "depth",
"secondary": None,
"wrist": "wrist_depth",
},
"state_obs_keys": ["tcp_pose", "gripper_state"],
"state_encoding": StateEncoding.POS_QUAT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 20,
"robot_type": "Franka",
},
"furniture_bench_dataset_converted_externally_to_rlds": {
"image_obs_keys": {
"primary": "image",
"secondary": None,
"wrist": "wrist_image",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state"],
"state_encoding": StateEncoding.POS_QUAT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Franka",
},
"cmu_franka_exploration_dataset_converted_externally_to_rlds": {
"image_obs_keys": {
"primary": "highres_image",
"secondary": None,
"wrist": None,
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": [None, None, None, None, None, None, None, None],
"state_encoding": StateEncoding.NONE,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Franka",
},
"ucsd_kitchen_dataset_converted_externally_to_rlds": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["joint_state", None],
"state_encoding": StateEncoding.JOINT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 2,
"robot_type": "xArm",
},
"ucsd_pick_and_place_dataset_converted_externally_to_rlds": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["eef_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 3,
"robot_type": "xArm",
},
"austin_sailor_dataset_converted_externally_to_rlds": {
"image_obs_keys": {
"primary": "image",
"secondary": None,
"wrist": "wrist_image",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state"],
"state_encoding": StateEncoding.POS_QUAT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 20,
"robot_type": "Franka",
},
"austin_sirius_dataset_converted_externally_to_rlds": {
"image_obs_keys": {
"primary": "image",
"secondary": None,
"wrist": "wrist_image",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state"],
"state_encoding": StateEncoding.POS_QUAT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 20,
"robot_type": "Franka",
},
"bc_z": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": [
"present/xyz",
"present/axis_angle",
None,
"present/sensed_close",
],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Google Robot",
},
"utokyo_pr2_opening_fridge_converted_externally_to_rlds": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["eef_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "PR2",
},
"utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["eef_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "PR2",
},
"utokyo_xarm_pick_and_place_converted_externally_to_rlds": {
"image_obs_keys": {
"primary": "image",
"secondary": "image2",
"wrist": "hand_image",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["end_effector_pose", None, None],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "xArm",
},
"utokyo_xarm_bimanual_converted_externally_to_rlds": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["pose_r", None, None],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "xArm Bimanual",
},
"robo_net": {
"image_obs_keys": {"primary": "image", "secondary": "image1", "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["eef_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 1,
"robot_type": "Multi-Robot",
},
"berkeley_mvp_converted_externally_to_rlds": {
"image_obs_keys": {"primary": None, "secondary": None, "wrist": "hand_image"},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["pose", "gripper"],
"state_encoding": StateEncoding.POS_QUAT,
"action_encoding": ActionEncoding.JOINT_POS,
"control_frequency": 5,
"robot_type": "xArm",
},
"berkeley_rpt_converted_externally_to_rlds": {
"image_obs_keys": {"primary": None, "secondary": None, "wrist": "hand_image"},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["joint_pos", "gripper"],
"state_encoding": StateEncoding.JOINT,
"action_encoding": ActionEncoding.JOINT_POS,
"control_frequency": 30,
"robot_type": "Franka",
},
"kaist_nonprehensile_converted_externally_to_rlds": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state", None],
"state_encoding": StateEncoding.POS_QUAT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Franka",
},
"stanford_mask_vit_converted_externally_to_rlds": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["eef_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": None,
"robot_type": "Sawyer",
},
"tokyo_u_lsmo_converted_externally_to_rlds": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["eef_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Cobotta",
},
"dlr_sara_pour_converted_externally_to_rlds": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state", None, None],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "DLR SARA",
},
"dlr_sara_grid_clamp_converted_externally_to_rlds": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state", None, None],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "DLR SARA",
},
"dlr_edan_shared_control_converted_externally_to_rlds": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state", None],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 5,
"robot_type": "DLR EDAN",
},
"asu_table_top_converted_externally_to_rlds": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["eef_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 12.5,
"robot_type": "UR5",
},
"stanford_robocook_converted_externally_to_rlds": {
"image_obs_keys": {"primary": "image_1", "secondary": "image_2", "wrist": None},
"depth_obs_keys": {"primary": "depth_1", "secondary": "depth_2", "wrist": None},
"state_obs_keys": ["eef_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 5,
"robot_type": "Franka",
},
"imperialcollege_sawyer_wrist_cam": {
"image_obs_keys": {
"primary": "image",
"secondary": None,
"wrist": "wrist_image",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": [None, None, None, None, None, None, None, "state"],
"state_encoding": StateEncoding.NONE,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Sawyer",
},
"iamlab_cmu_pickup_insert_converted_externally_to_rlds": {
"image_obs_keys": {
"primary": "image",
"secondary": None,
"wrist": "wrist_image",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["joint_state", "gripper_state"],
"state_encoding": StateEncoding.JOINT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 20,
"robot_type": "Franka",
},
"uiuc_d3field": {
"image_obs_keys": {"primary": "image_1", "secondary": "image_2", "wrist": None},
"depth_obs_keys": {"primary": "depth_1", "secondary": "depth_2", "wrist": None},
"state_obs_keys": [None, None, None, None, None, None, None, None],
"state_encoding": StateEncoding.NONE,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 1,
"robot_type": "Kinova Gen3",
},
"utaustin_mutex": {
"image_obs_keys": {
"primary": "image",
"secondary": None,
"wrist": "wrist_image",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state"],
"state_encoding": StateEncoding.JOINT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 20,
"robot_type": "Franka",
},
"berkeley_fanuc_manipulation": {
"image_obs_keys": {
"primary": "image",
"secondary": None,
"wrist": "wrist_image",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["joint_state", None, "gripper_state"],
"state_encoding": StateEncoding.JOINT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Fanuc Mate",
},
"cmu_playing_with_food": {
"image_obs_keys": {
"primary": "image",
"secondary": None,
"wrist": "finger_vision_1",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state", None, None],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Franka",
},
"cmu_play_fusion": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state"],
"state_encoding": StateEncoding.JOINT,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 5,
"robot_type": "Franka",
},
"cmu_stretch": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["eef_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Hello Stretch",
},
"berkeley_gnm_recon": {
"image_obs_keys": {"primary": None, "secondary": None, "wrist": "image"},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state", None, None],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 3,
"robot_type": "Jackal",
},
"berkeley_gnm_cory_hall": {
"image_obs_keys": {"primary": None, "secondary": None, "wrist": "image"},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state", None, None],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 5,
"robot_type": "RC Car",
},
"berkeley_gnm_sac_son": {
"image_obs_keys": {"primary": None, "secondary": None, "wrist": "image"},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["state", None, None],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "TurtleBot 2",
},
# NOTE: modified
"droid": {
"image_obs_keys": {
"primary": "exterior_image_1_left",
"secondary": "exterior_image_2_left",
"wrist": "wrist_image_left",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["EEF_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 15,
"robot_type": "Franka",
"aux_kwargs": {
"dataset_frame_transform_kwargs": {
"chunk_filter_fn": zero_action_filter,
},
},
},
"fmb_dataset": {
"image_obs_keys": {
"primary": "image_side_1",
"secondary": "image_side_2",
"wrist": "image_wrist_1",
},
"depth_obs_keys": {
"primary": "image_side_1_depth",
"secondary": "image_side_2_depth",
"wrist": "image_wrist_1_depth",
},
"state_obs_keys": ["proprio"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Franka",
},
# NOTE: modified
"dobbe": {
"image_obs_keys": {"primary": "wrist_image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["EEF_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 3.75,
"robot_type": "Hello Stretch",
},
"roboset": {
"image_obs_keys": {
"primary": "image_left",
"secondary": "image_right",
"wrist": "image_wrist",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["proprio"],
"state_encoding": StateEncoding.JOINT,
"action_encoding": ActionEncoding.JOINT_POS,
"control_frequency": 5,
"robot_type": "Franka",
},
"rh20t": {
"image_obs_keys": {
"primary": "image_front",
"secondary": "image_side_right",
"wrist": "image_wrist",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["proprio"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 10,
"robot_type": "Flexiv",
},
### T-DROID datasets
"tdroid_carrot_in_bowl": { # "put carrot in bowl" task, 50 demos @ 5 Hz control
"image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None},
"state_obs_keys": ["EEF_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 5,
"robot_type": "Franka",
},
"tdroid_pour_corn_in_pot": { # "pour corn from red bonawl into steel pot" task, 50 demos @ 5 Hz control
"image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None},
"state_obs_keys": ["EEF_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 5,
"robot_type": "Franka",
},
"tdroid_flip_pot_upright": { # "flip pot upright" task, 10 demos @ 5 Hz control
"image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None},
"state_obs_keys": ["EEF_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 5,
"robot_type": "Franka",
},
"tdroid_move_object_onto_plate": { # "move <object> onto plate" task, 150 demos @ 5 Hz control
"image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None},
"state_obs_keys": ["EEF_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 5,
"robot_type": "Franka",
},
"tdroid_knock_object_over": { # "knock <object> over" task, 70 demos @ 5 Hz control
"image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None},
"state_obs_keys": ["EEF_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 5,
"robot_type": "Franka",
},
"tdroid_cover_object_with_towel": { # "cover <object> with towel" task, 45 demos @ 5 Hz control
"image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None},
"depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None},
"state_obs_keys": ["EEF_state", None, "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 5,
"robot_type": "Franka",
},
### DROID Finetuning datasets
"droid_wipe": {
"image_obs_keys": {
"primary": "exterior_image_2_left",
"secondary": None,
"wrist": "wrist_image_left",
},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["proprio"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 15,
"robot_type": "Franka",
},
# NOTE: modified
### LIBERO datasets (modified versions)
"libero_spatial_no_noops": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": "wrist_image"},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["EEF_state", "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 20,
"robot_type": "Franka",
},
"libero_object_no_noops": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": "wrist_image"},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["EEF_state", "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 20,
"robot_type": "Franka",
},
"libero_goal_no_noops": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": "wrist_image"},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["EEF_state", "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 20,
"robot_type": "Franka",
},
"libero_10_no_noops": {
"image_obs_keys": {"primary": "image", "secondary": None, "wrist": "wrist_image"},
"depth_obs_keys": {"primary": None, "secondary": None, "wrist": None},
"state_obs_keys": ["EEF_state", "gripper_state"],
"state_encoding": StateEncoding.POS_EULER,
"action_encoding": ActionEncoding.EEF_POS,
"control_frequency": 20,
"robot_type": "Franka",
},
}
@@ -0,0 +1,76 @@
"""
Copied from https://github.com/openvla/openvla/blob/main/prismatic/vla/datasets/rlds/utils/data_utils.py
"""
from typing import Any
import tensorflow as tf
def binarize_gripper_actions(actions: tf.Tensor) -> tf.Tensor:
"""
Converts gripper actions from continuous to binary values (0 and 1).
We exploit that fact that most of the time, the gripper is fully open (near 1.0) or fully closed (near 0.0). As it
transitions between the two, it sometimes passes through a few intermediate values. We relabel those intermediate
values based on the state that is reached _after_ those intermediate values.
In the edge case that the trajectory ends with an intermediate value, we give up on binarizing and relabel that
chunk of intermediate values as the last action in the trajectory.
The `scan_fn` implements the following logic:
new_actions = np.empty_like(actions)
carry = actions[-1]
for i in reversed(range(actions.shape[0])):
if in_between_mask[i]:
carry = carry
else:
carry = float(open_mask[i])
new_actions[i] = carry
"""
open_mask, closed_mask = actions > 0.95, actions < 0.05
in_between_mask = tf.logical_not(tf.logical_or(open_mask, closed_mask))
is_open_float = tf.cast(open_mask, tf.float32)
def scan_fn(carry, i):
return tf.cond(in_between_mask[i], lambda: tf.cast(carry, tf.float32), lambda: is_open_float[i])
return tf.scan(scan_fn, tf.range(tf.shape(actions)[0]), actions[-1], reverse=True)
def invert_gripper_actions(actions: tf.Tensor) -> tf.Tensor:
return 1 - actions
def rel2abs_gripper_actions(actions: tf.Tensor) -> tf.Tensor:
"""
Converts relative gripper actions (+1 for closing, -1 for opening) to absolute actions (0 = closed; 1 = open).
Assumes that the first relative gripper is not redundant (i.e. close when already closed)!
"""
# Note =>> -1 for closing, 1 for opening, 0 for no change
opening_mask, closing_mask = actions < -0.1, actions > 0.1
thresholded_actions = tf.where(opening_mask, 1, tf.where(closing_mask, -1, 0))
def scan_fn(carry, i):
return tf.cond(thresholded_actions[i] == 0, lambda: carry, lambda: thresholded_actions[i])
# If no relative grasp, assumes open for whole trajectory
start = -1 * thresholded_actions[tf.argmax(thresholded_actions != 0, axis=0)]
start = tf.cond(start == 0, lambda: 1, lambda: start)
# Note =>> -1 for closed, 1 for open
new_actions = tf.scan(scan_fn, tf.range(tf.shape(actions)[0]), start)
new_actions = tf.cast(new_actions, tf.float32) / 2 + 0.5
return new_actions
# === Bridge-V2 =>> Dataset-Specific Transform ===
def relabel_bridge_actions(traj: dict[str, Any]) -> dict[str, Any]:
"""Relabels actions to use reached proprioceptive state; discards last timestep (no-action)."""
movement_actions = traj["observation"]["state"][1:, :6] - traj["observation"]["state"][:-1, :6]
traj_truncated = tf.nest.map_structure(lambda x: x[:-1], traj)
traj_truncated["action"] = tf.concat([movement_actions, traj["action"][:-1, -1:]], axis=1)
return traj_truncated
File diff suppressed because it is too large Load Diff