LIBERO_FEATURES = { "observation.images.image": { "dtype": "video", "shape": (256, 256, 3), "names": ["height", "width", "rgb"], }, "observation.images.wrist_image": { "dtype": "video", "shape": (256, 256, 3), "names": ["height", "width", "rgb"], }, "observation.state": { "dtype": "float32", "shape": (8,), "names": {"motors": ["x", "y", "z", "axis_angle1", "axis_angle2", "axis_angle3", "gripper", "gripper"]}, }, "observation.states.ee_state": { "dtype": "float32", "shape": (6,), "names": {"motors": ["x", "y", "z", "axis_angle1", "axis_angle2", "axis_angle3"]}, }, "observation.states.joint_state": { "dtype": "float32", "shape": (7,), "names": {"motors": ["joint_0", "joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6"]}, }, "observation.states.gripper_state": { "dtype": "float32", "shape": (2,), "names": {"motors": ["gripper", "gripper"]}, }, "action": { "dtype": "float32", "shape": (7,), "names": {"motors": ["x", "y", "z", "axis_angle1", "axis_angle2", "axis_angle3", "gripper"]}, }, } Everything are float32 except for language_instructions and images , are ├── action # nx14 absolute bimanual joints, not used in our paper ├── base_action # nx2 chassis actions, not used in our paper ├── language_instruction # 🌟"fold the cloth" ├── observations │ ├── eef # nx14 absolute eef pos using euler angles to represent the rotation, not used in our paper │ │ eef_quaternion # nx16 absolute eef pos using quaternion to represent the rotation, not used in our paper │ │ eef_6d # 🌟nx20 absolute eef pos using rotate6d to represent the rotation │ │ eef_left_time # 🌟nx1 the time stamp for left arm eef pos, can be used for resample or interpolation │ │ eef_right_time # 🌟nx1 the time stamp for right arm eef pos, can be used for resample or interpolation │ ├── qpos # nx14 absolute bimanual joints, not used in our paper │ ├── qpos_left_time # nx1 the time stamp for left arm joint pos, can be used for resample or interpolation, not used in our paper │ ├── qpos_right_time # nx1 the time stamp for right arm joint pos, can be used for resample or interpolation, not used in our paper │ ├── qvel # nx14 bimanual joint velocity, not used in our paper │ ├── effort # nx14 bimanual joint effort, not used in our paper │ ├── images │ │ ├── cam_high # 🌟the encoded head cam view, should be decoded using cv2 │ │ ├── cam_left_wrist # 🌟the encoded left wrist view, should be decoded using cv2 │ │ ├── cam_right_wrist # 🌟the encoded right wrist view, should be decoded using cv2 ├── time_stamp # the time stamp for each sample, not used in our paper