From 839ac5f2aa4625a1644300fea379c4d9b2e7fcef Mon Sep 17 00:00:00 2001 From: Pepijn <138571049+pkooij@users.noreply.github.com> Date: Sat, 13 Sep 2025 21:25:19 +0200 Subject: [PATCH] fix(processor): phone examples (#1921) * fix(processor): phone examples * chore(processor): simplify gripper in phone example kinematic chain --------- Co-authored-by: Steven Palma --- examples/phone_to_so100/record.py | 14 +- src/lerobot/datasets/utils.py | 8 +- src/lerobot/processor/pipeline.py | 2 +- src/lerobot/record.py | 13 +- .../robot_kinematic_processor.py | 171 +++++++++--------- .../teleoperators/phone/phone_processor.py | 11 +- 6 files changed, 107 insertions(+), 112 deletions(-) diff --git a/examples/phone_to_so100/record.py b/examples/phone_to_so100/record.py index 7afac684c..3509ca51a 100644 --- a/examples/phone_to_so100/record.py +++ b/examples/phone_to_so100/record.py @@ -89,6 +89,7 @@ phone_to_robot_ee_pose_processor = RobotProcessorPipeline[RobotAction, EnvTransi max_ee_step_m=0.20, max_ee_twist_step_rad=0.50, ), + GripperVelocityToJoint(), ], to_transition=robot_action_to_transition, to_output=identity_transition, @@ -102,10 +103,6 @@ robot_ee_to_joints_processor = RobotProcessorPipeline[EnvTransition, RobotAction motor_names=list(robot.bus.motors.keys()), initial_guess_current_joints=True, ), - GripperVelocityToJoint( - motor_names=list(robot.bus.motors.keys()), - speed_factor=20.0, - ), ], to_transition=identity_transition, to_output=transition_to_robot_action, @@ -127,13 +124,6 @@ action_ee = aggregate_pipeline_dataset_features( use_videos=True, ) -# Get gripper pos action features -gripper = aggregate_pipeline_dataset_features( - pipeline=robot_ee_to_joints_processor, - initial_features=create_initial_features(action=robot.action_features, observation={}), - use_videos=True, -) - # Build dataset ee observation features observation_ee = aggregate_pipeline_dataset_features( pipeline=robot_joints_to_ee_pose, @@ -141,7 +131,7 @@ observation_ee = aggregate_pipeline_dataset_features( use_videos=True, ) -dataset_features = combine_feature_dicts(action_ee, gripper, observation_ee) +dataset_features = combine_feature_dicts(action_ee, observation_ee) print("All dataset features: ", dataset_features) diff --git a/src/lerobot/datasets/utils.py b/src/lerobot/datasets/utils.py index 4e9f852f9..44e177445 100644 --- a/src/lerobot/datasets/utils.py +++ b/src/lerobot/datasets/utils.py @@ -684,7 +684,11 @@ def hw_to_dataset_features( dict: A LeRobot features dictionary. """ features = {} - joint_fts = {key: ftype for key, ftype in hw_features.items() if ftype is float} + joint_fts = { + key: ftype + for key, ftype in hw_features.items() + if ftype is float or (isinstance(ftype, PolicyFeature) and ftype.type != FeatureType.VISUAL) + } cam_fts = {key: shape for key, shape in hw_features.items() if isinstance(shape, tuple)} if joint_fts and prefix == "action": @@ -736,7 +740,7 @@ def build_dataset_frame( elif ft["dtype"] == "float32" and len(ft["shape"]) == 1: frame[key] = np.array([values[name] for name in ft["names"]], dtype=np.float32) elif ft["dtype"] in ["image", "video"]: - frame[key] = values[key.removeprefix(f"{prefix}.images.")] + frame[key] = values[key] return frame diff --git a/src/lerobot/processor/pipeline.py b/src/lerobot/processor/pipeline.py index 5f940a77d..4a7db443d 100644 --- a/src/lerobot/processor/pipeline.py +++ b/src/lerobot/processor/pipeline.py @@ -827,7 +827,7 @@ class ObservationProcessorStep(ProcessorStep, ABC): """An abstract `ProcessorStep` that specifically targets the observation in a transition.""" @abstractmethod - def observation(self, observation) -> dict[str, Any]: + def observation(self, observation: dict[str, Any]) -> dict[str, Any]: """Processes an observation dictionary. Subclasses must implement this method. Args: diff --git a/src/lerobot/record.py b/src/lerobot/record.py index b5a93e34c..fac861cde 100644 --- a/src/lerobot/record.py +++ b/src/lerobot/record.py @@ -307,7 +307,6 @@ def record_loop( obs_transition = robot_observation_processor(obs) if policy is not None or dataset is not None: - # TODO(Steven): We might be able to get rid of this observation_frame = build_dataset_frame( dataset.features, obs_transition[TransitionKey.OBSERVATION], prefix="observation" ) @@ -357,26 +356,26 @@ def record_loop( # Applies a pipeline to the action, default is IdentityProcessor # IMPORTANT: action_pipeline.to_output must return a dict suitable for robot.send_action() if policy is not None and policy_transition is not None: + action_values = policy_transition[TransitionKey.ACTION] robot_action_to_send = robot_action_processor(policy_transition) else: + action_values = teleop_transition[TransitionKey.ACTION] robot_action_to_send = robot_action_processor(teleop_transition) # Send action to robot # Action can eventually be clipped using `max_relative_target`, # so action actually sent is saved in the dataset. action = postprocessor.process(action) - # TODO(pepijn, adil): we should use a pipeline step to clip the action, so the sent action is the action that we input to the robot. - sent_action = robot.send_action(robot_action_to_send) + # TODO(steven, pepijn, adil): we should use a pipeline step to clip the action, so the sent action is the action that we input to the robot. + _sent_action = robot.send_action(robot_action_to_send) # Write to dataset if dataset is not None: - action_frame = build_dataset_frame(dataset.features, sent_action, prefix="action") + action_frame = build_dataset_frame(dataset.features, action_values, prefix="action") frame = {**observation_frame, **action_frame} dataset.add_frame(frame, task=single_task) if display_data: - log_rerun_data( - observation=obs_transition.get(TransitionKey.OBSERVATION), action=robot_action_to_send - ) + log_rerun_data(observation=obs_transition[TransitionKey.OBSERVATION], action=action_values) dt_s = time.perf_counter() - start_loop_t busy_wait(1 / fps - dt_s) diff --git a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py index 3874d711c..be42d5654 100644 --- a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py +++ b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py @@ -15,11 +15,11 @@ # limitations under the License. from dataclasses import dataclass, field +from typing import Any import numpy as np from lerobot.configs.types import FeatureType, PipelineFeatureType, PolicyFeature -from lerobot.constants import OBS_STATE from lerobot.model.kinematics import RobotKinematics from lerobot.processor import ( ComplementaryDataProcessorStep, @@ -92,13 +92,14 @@ class EEReferenceAndDelta(RobotActionProcessorStep): # Current pose from FK on measured joints t_curr = self.kinematics.forward_kinematics(q) - enabled = bool(new_action.pop("enabled", 0)) - tx = float(new_action.pop("target_x", 0.0)) - ty = float(new_action.pop("target_y", 0.0)) - tz = float(new_action.pop("target_z", 0.0)) - wx = float(new_action.pop("target_wx", 0.0)) - wy = float(new_action.pop("target_wy", 0.0)) - wz = float(new_action.pop("target_wz", 0.0)) + enabled = bool(new_action.pop("enabled")) + tx = float(new_action.pop("target_x")) + ty = float(new_action.pop("target_y")) + tz = float(new_action.pop("target_z")) + wx = float(new_action.pop("target_wx")) + wy = float(new_action.pop("target_wy")) + wz = float(new_action.pop("target_wz")) + gripper_vel = float(new_action.pop("gripper_vel")) desired = None @@ -140,6 +141,7 @@ class EEReferenceAndDelta(RobotActionProcessorStep): new_action["ee.wx"] = float(tw[0]) new_action["ee.wy"] = float(tw[1]) new_action["ee.wz"] = float(tw[2]) + new_action["ee.gripper_vel"] = gripper_vel self._prev_enabled = enabled return new_action @@ -160,6 +162,7 @@ class EEReferenceAndDelta(RobotActionProcessorStep): features[PipelineFeatureType.ACTION].pop("target_wx", None) features[PipelineFeatureType.ACTION].pop("target_wy", None) features[PipelineFeatureType.ACTION].pop("target_wz", None) + features[PipelineFeatureType.ACTION].pop("gripper_vel", None) features[PipelineFeatureType.ACTION]["ee.x"] = PolicyFeature(type=FeatureType.ACTION, shape=(1,)) features[PipelineFeatureType.ACTION]["ee.y"] = PolicyFeature(type=FeatureType.ACTION, shape=(1,)) @@ -167,6 +170,9 @@ class EEReferenceAndDelta(RobotActionProcessorStep): features[PipelineFeatureType.ACTION]["ee.wx"] = PolicyFeature(type=FeatureType.ACTION, shape=(1,)) features[PipelineFeatureType.ACTION]["ee.wy"] = PolicyFeature(type=FeatureType.ACTION, shape=(1,)) features[PipelineFeatureType.ACTION]["ee.wz"] = PolicyFeature(type=FeatureType.ACTION, shape=(1,)) + features[PipelineFeatureType.ACTION]["ee.gripper_vel"] = PolicyFeature( + type=FeatureType.ACTION, shape=(1,) + ) return features @@ -194,12 +200,13 @@ class EEBoundsAndSafety(RobotActionProcessorStep): _last_twist: np.ndarray | None = field(default=None, init=False, repr=False) def action(self, action: RobotAction) -> RobotAction: - x = action.get("ee.x", None) - y = action.get("ee.y", None) - z = action.get("ee.z", None) - wx = action.get("ee.wx", None) - wy = action.get("ee.wy", None) - wz = action.get("ee.wz", None) + x = action.get("ee.x") + y = action.get("ee.y") + z = action.get("ee.z") + wx = action.get("ee.wx") + wy = action.get("ee.wy") + wz = action.get("ee.wz") + # TODO(Steven): ee.gripper_vel does not need to be bounded if None in (x, y, z, wx, wy, wz): raise ValueError( @@ -273,15 +280,18 @@ class InverseKinematicsEEToJoints(ProcessorStep): comp = new_transition.get(TransitionKey.COMPLEMENTARY_DATA) or {} - x = act.get("ee.x", None) - y = act.get("ee.y", None) - z = act.get("ee.z", None) - wx = act.get("ee.wx", None) - wy = act.get("ee.wy", None) - wz = act.get("ee.wz", None) + x = act.pop("ee.x") + y = act.pop("ee.y") + z = act.pop("ee.z") + wx = act.pop("ee.wx") + wy = act.pop("ee.wy") + wz = act.pop("ee.wz") + gripper_pos = act.pop("ee.gripper_pos") - if None in (x, y, z, wx, wy, wz): - return new_transition + if None in (x, y, z, wx, wy, wz, gripper_pos): + raise ValueError( + "Missing required end-effector pose components: ee.x, ee.y, ee.z, ee.wx, ee.wy, ee.wz, ee.gripper_pos must all be present in action" + ) # Get joint positions from complimentary data raw = comp.get("raw_joint_positions", None) @@ -306,13 +316,12 @@ class InverseKinematicsEEToJoints(ProcessorStep): self.q_curr = q_target new_act = dict(act) + # TODO: This is sentitive to order of motor_names = q_target mapping for i, name in enumerate(self.motor_names): - if name == "gripper": - # TODO(pepijn): Investigate if this is correct - # Do we want an observation key in the action field? - new_act["gripper.pos"] = float(raw["gripper"]) - else: + if name != "gripper": new_act[f"{name}.pos"] = float(q_target[i]) + else: + new_act["gripper.pos"] = float(gripper_pos) new_transition[TransitionKey.ACTION] = new_act if not self.initial_guess_current_joints: new_transition[TransitionKey.COMPLEMENTARY_DATA]["reference_joint_positions"] = q_target @@ -321,9 +330,13 @@ class InverseKinematicsEEToJoints(ProcessorStep): def transform_features( self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]] ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]: - features[PipelineFeatureType.ACTION]["gripper.pos"] = PolicyFeature( - type=FeatureType.ACTION, shape=(1,) - ) + features[PipelineFeatureType.ACTION].pop("ee.x", None) + features[PipelineFeatureType.ACTION].pop("ee.y", None) + features[PipelineFeatureType.ACTION].pop("ee.z", None) + features[PipelineFeatureType.ACTION].pop("ee.wx", None) + features[PipelineFeatureType.ACTION].pop("ee.wy", None) + features[PipelineFeatureType.ACTION].pop("ee.wz", None) + features[PipelineFeatureType.ACTION].pop("ee.gripper_pos", None) for name in self.motor_names: features[PipelineFeatureType.ACTION][f"{name}.pos"] = PolicyFeature( type=FeatureType.ACTION, shape=(1,) @@ -338,7 +351,7 @@ class InverseKinematicsEEToJoints(ProcessorStep): @ProcessorStepRegistry.register("gripper_velocity_to_joint") @dataclass -class GripperVelocityToJoint(ProcessorStep): +class GripperVelocityToJoint(RobotActionProcessorStep): """ Converts a gripper velocity command into a target gripper joint position. @@ -354,66 +367,46 @@ class GripperVelocityToJoint(ProcessorStep): discrete_gripper: If True, treat the input action as discrete (0: open, 1: close, 2: stay). """ - motor_names: list[str] speed_factor: float = 20.0 clip_min: float = 0.0 clip_max: float = 100.0 discrete_gripper: bool = False - def __call__(self, transition: EnvTransition) -> EnvTransition: - new_transition = transition.copy() - obs = new_transition.get(TransitionKey.OBSERVATION) or {} - act = new_transition.get(TransitionKey.ACTION) or {} - comp = new_transition.get(TransitionKey.COMPLEMENTARY_DATA) or {} + def action(self, action: RobotAction) -> RobotAction: + complementary_data = self.transition.get(TransitionKey.COMPLEMENTARY_DATA) or {} - if not isinstance(act, dict): - raise ValueError(f"Action should be a RobotAction type got {type(act)}") + gripper_vel = action.pop("ee.gripper_vel") - if "gripper" not in act: - raise ValueError("Required action key 'gripper' not found in transition") - - if "gripper" not in self.motor_names: + if "raw_joint_positions" not in complementary_data: raise ValueError( - f"Required motor name 'gripper' not found in self.motor_names={self.motor_names}" + "raw_joint_positions is not in complementary data and is required for GripperVelocityToJoint" ) - if self.discrete_gripper: - # Discrete gripper actions are in [0, 1, 2] - # 0: open, 1: close, 2: stay - # We need to shift them to [-1, 0, 1] and then scale them to clip_max - gripper_action = act.get("gripper", 1.0) - gripper_action = gripper_action - 1.0 - gripper_action *= self.clip_max - act["gripper"] = gripper_action + curr_gripper_pos = complementary_data.get("raw_joint_positions").get("gripper") - # Get current gripper position from complementary data - raw = comp.get("raw_joint_positions") or {} - curr_pos = float(raw.get("gripper")) + # TODO(Michel,Adil): Fix this logic + # if self.discrete_gripper: + # # Discrete gripper actions are in [0, 1, 2] + # # 0: open, 1: close, 2: stay + # # We need to shift them to [-1, 0, 1] and then scale them to clip_max + # gripper_action = gripper_vel + # gripper_action *= self.clip_max + # action["ee.gripper_pos"] = gripper_action - # Compute desired gripper velocity - u = float(act.get("gripper", 0.0)) - delta = u * float(self.speed_factor) - gripper_pos = float(np.clip(curr_pos + delta, self.clip_min, self.clip_max)) + # Compute desired gripper position + delta = gripper_vel * float(self.speed_factor) + gripper_pos = float(np.clip(curr_gripper_pos + delta, self.clip_min, self.clip_max)) + action["ee.gripper_pos"] = gripper_pos - new_act = dict(act) - new_act["gripper.pos"] = gripper_pos - new_act.pop("gripper", None) - new_transition[TransitionKey.ACTION] = new_act - - obs[f"{OBS_STATE}.gripper.pos"] = curr_pos - new_transition[TransitionKey.OBSERVATION] = obs - return new_transition + return action def transform_features( self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]] ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]: - features[PipelineFeatureType.ACTION].pop("gripper", None) - features[PipelineFeatureType.ACTION]["gripper.pos"] = PolicyFeature( + features[PipelineFeatureType.ACTION].pop("ee.gripper_vel") + features[PipelineFeatureType.ACTION]["ee.gripper_pos"] = PolicyFeature( type=FeatureType.ACTION, shape=(1,) ) - features[PipelineFeatureType.OBSERVATION][f"{OBS_STATE}.gripper.pos"] = PolicyFeature( - type=FeatureType.STATE, shape=(1,) - ) return features @@ -429,36 +422,42 @@ class ForwardKinematicsJointsToEE(ObservationProcessorStep): Attributes: kinematics: The robot's kinematic model. - motor_names: A list of motor names whose joint positions are used for FK. """ kinematics: RobotKinematics motor_names: list[str] - def observation(self, observation: dict) -> dict: - print("observation in step", observation) - if not all(f"{n}.pos" in observation for n in self.motor_names): - raise ValueError(f"Missing required joint positions for motors: {self.motor_names}") + def observation(self, observation: dict[str, Any]) -> dict[str, Any]: + motor_joint_values = [observation.get(f"{n}.pos") for n in self.motor_names] - q = np.array([observation[f"{n}.pos"] for n in self.motor_names], dtype=float) + q = np.array(motor_joint_values, dtype=float) t = self.kinematics.forward_kinematics(q) pos = t[:3, 3] tw = Rotation.from_matrix(t[:3, :3]).as_rotvec() - observation[f"{OBS_STATE}.ee.x"] = float(pos[0]) - observation[f"{OBS_STATE}.ee.y"] = float(pos[1]) - observation[f"{OBS_STATE}.ee.z"] = float(pos[2]) - observation[f"{OBS_STATE}.ee.wx"] = float(tw[0]) - observation[f"{OBS_STATE}.ee.wy"] = float(tw[1]) - observation[f"{OBS_STATE}.ee.wz"] = float(tw[2]) + gripper_pos = observation.get("gripper.pos") + + for n in self.motor_names: + observation.pop(f"{n}.pos") + + observation["ee.x"] = float(pos[0]) + observation["ee.y"] = float(pos[1]) + observation["ee.z"] = float(pos[2]) + observation["ee.wx"] = float(tw[0]) + observation["ee.wy"] = float(tw[1]) + observation["ee.wz"] = float(tw[2]) + observation["ee.gripper_pos"] = float(gripper_pos) return observation def transform_features( self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]] ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]: + # We only use the ee pose in the dataset, so we don't need the joint positions + for n in self.motor_names: + features[PipelineFeatureType.OBSERVATION].pop(f"{n}.pos", None) # We specify the dataset features of this step that we want to be stored in the dataset - for k in ["x", "y", "z", "wx", "wy", "wz"]: - features[PipelineFeatureType.OBSERVATION][f"{OBS_STATE}.ee.{k}"] = PolicyFeature( + for k in ["x", "y", "z", "wx", "wy", "wz", "gripper_pos"]: + features[PipelineFeatureType.OBSERVATION][f"ee.{k}"] = PolicyFeature( type=FeatureType.STATE, shape=(1,) ) return features diff --git a/src/lerobot/teleoperators/phone/phone_processor.py b/src/lerobot/teleoperators/phone/phone_processor.py index 2935cbc3a..2a86e3c95 100644 --- a/src/lerobot/teleoperators/phone/phone_processor.py +++ b/src/lerobot/teleoperators/phone/phone_processor.py @@ -38,6 +38,7 @@ class MapPhoneActionToRobotAction(RobotActionProcessorStep): to determine the correct button mappings for the gripper. """ + # TODO(Steven): Gripper vel could be output of phone_teleop directly platform: PhoneOS _enabled_prev: bool = field(default=False, init=False, repr=False) @@ -67,11 +68,11 @@ class MapPhoneActionToRobotAction(RobotActionProcessorStep): # Map certain inputs to certain actions if self.platform == PhoneOS.IOS: - gripper = float(inputs.get("a3", 0.0)) + gripper_vel = float(inputs.get("a3", 0.0)) else: a = float(inputs.get("reservedButtonA", 0.0)) b = float(inputs.get("reservedButtonB", 0.0)) - gripper = ( + gripper_vel = ( a - b ) # Positive if a is pressed, negative if b is pressed, 0 if both or neither are pressed @@ -83,7 +84,7 @@ class MapPhoneActionToRobotAction(RobotActionProcessorStep): action["target_wx"] = rotvec[1] if enabled else 0.0 action["target_wy"] = rotvec[0] if enabled else 0.0 action["target_wz"] = -rotvec[2] if enabled else 0.0 - action["gripper"] = gripper # Still send gripper action when disabled + action["gripper_vel"] = gripper_vel # Still send gripper action when disabled return action def transform_features( @@ -101,5 +102,7 @@ class MapPhoneActionToRobotAction(RobotActionProcessorStep): features[PipelineFeatureType.ACTION]["target_wx"] = PolicyFeature(type=FeatureType.ACTION, shape=(1,)) features[PipelineFeatureType.ACTION]["target_wy"] = PolicyFeature(type=FeatureType.ACTION, shape=(1,)) features[PipelineFeatureType.ACTION]["target_wz"] = PolicyFeature(type=FeatureType.ACTION, shape=(1,)) - features[PipelineFeatureType.ACTION]["gripper"] = PolicyFeature(type=FeatureType.ACTION, shape=(1,)) + features[PipelineFeatureType.ACTION]["gripper_vel"] = PolicyFeature( + type=FeatureType.ACTION, shape=(1,) + ) return features