diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py index efa0d728a..6b59167ea 100644 --- a/src/lerobot/envs/configs.py +++ b/src/lerobot/envs/configs.py @@ -299,10 +299,13 @@ class HILSerlProcessorConfig: inverse_kinematics: InverseKinematicsConfig | None = None reward_classifier: RewardClassifierConfig | None = None max_gripper_pos: float | None = 100.0 - # Only used when ``control_mode == "leader"``. ``False`` (default) emits a - # 4-D position+gripper action matching the gamepad path; ``True`` emits the - # PR #2596 7-D action with rotation deltas (requires ``wx/wy/wz`` step - # sizes in ``inverse_kinematics.end_effector_step_sizes``). + # Only used when ``control_mode == "leader"``. ``LeaderFollowerProcessor`` + # always builds the PR #2596 **7-D** vector ``[dx,dy,dz,wx,wy,wz,gripper]``. + # When ``False`` (default), rotation is **disabled** (components 3–5 are + # zeroed); when ``True``, full rotation deltas are used (requires + # ``wx/wy/wz`` in ``inverse_kinematics.end_effector_step_sizes``). The + # intervention step then turns that into a 4-D policy tensor when rotation + # is disabled, matching the gamepad pipeline. use_rotation: bool = False diff --git a/src/lerobot/processor/hil_processor.py b/src/lerobot/processor/hil_processor.py index 7a8b2c867..df4f7c833 100644 --- a/src/lerobot/processor/hil_processor.py +++ b/src/lerobot/processor/hil_processor.py @@ -443,6 +443,39 @@ class GripperPenaltyProcessorStep(ProcessorStep): return features +def _ndarray_intervention_to_action_list( + flat: np.ndarray, use_rotation: bool, use_gripper: bool +) -> list[float]: + """Flatten ``LeaderFollowerProcessor`` / policy outputs into a policy action list. + + PR #2596 leader mode always produces 7 elements ``[dx,dy,dz,wx,wy,wz,g]``. When + ``use_rotation`` is False, rotation is disabled (zeroed in the 7-D vector) and + we still emit a 4-D tensor ``[dx,dy,dz,g]`` for the rest of the pipeline. + """ + n = int(flat.size) + if not use_rotation and use_gripper and n == 7: + return [float(flat[0]), float(flat[1]), float(flat[2]), float(flat[6])] + if not use_rotation and not use_gripper and n == 6: + return [float(flat[0]), float(flat[1]), float(flat[2])] + return flat.tolist() + + +def _tensor_intervention_to_action_list( + flat: torch.Tensor, use_rotation: bool, use_gripper: bool +) -> list[float]: + n = int(flat.numel()) + if not use_rotation and use_gripper and n == 7: + return [ + float(flat[0].item()), + float(flat[1].item()), + float(flat[2].item()), + float(flat[6].item()), + ] + if not use_rotation and not use_gripper and n == 6: + return [float(flat[0].item()), float(flat[1].item()), float(flat[2].item())] + return [float(x.item()) for x in flat] + + @dataclass @ProcessorStepRegistry.register("intervention_action_processor") class InterventionActionProcessorStep(ProcessorStep): @@ -455,6 +488,9 @@ class InterventionActionProcessorStep(ProcessorStep): Attributes: use_gripper: Whether to include the gripper in the teleoperated action. + use_rotation: For dict-based teleop actions, whether to include delta_wx/y/z. + For 7-D ndarray/tensors from ``LeaderFollowerProcessor``, when + ``False`` the policy action is sliced to ``[dx,dy,dz,gripper]``. terminate_on_success: If True, automatically sets the `done` flag when a `success` event is received. """ @@ -509,8 +545,12 @@ class InterventionActionProcessorStep(ProcessorStep): ) if self.use_gripper: action_list.append(teleop_action.get(GRIPPER_KEY, self.gripper_neutral_action)) + elif isinstance(teleop_action, torch.Tensor): + flat = teleop_action.detach().flatten() + action_list = _tensor_intervention_to_action_list(flat, self.use_rotation, self.use_gripper) elif isinstance(teleop_action, np.ndarray): - action_list = teleop_action.tolist() + flat = np.asarray(teleop_action).reshape(-1) + action_list = _ndarray_intervention_to_action_list(flat, self.use_rotation, self.use_gripper) else: action_list = teleop_action diff --git a/src/lerobot/processor/leader_follower_processor.py b/src/lerobot/processor/leader_follower_processor.py index c9afb828a..9334a3941 100644 --- a/src/lerobot/processor/leader_follower_processor.py +++ b/src/lerobot/processor/leader_follower_processor.py @@ -48,10 +48,12 @@ class LeaderFollowerProcessor(ProcessorStep): kinematics: RobotKinematics end_effector_step_sizes: np.ndarray | None = None use_gripper: bool = True - # When True (PR #2596 default), emit a 7-D action with rotation deltas; - # when False, emit a 4-D action ``[dx, dy, dz, gripper]`` matching the - # gamepad/keyboard EE convention. ``end_effector_step_sizes`` only needs - # ``wx/wy/wz`` keys when this is True. + # PR #2596 always produces a **7-D** intervention vector ``[dx, dy, dz, wx, + # wy, wz, gripper]`` (normalised to ~[-1, 1] per axis). When ``use_rotation`` + # is False, the middle three components are **zeroed** (rotation disabled, + # not removed): the same tensor layout and code path as PR #2596, but the + # downstream policy / IK only sees position + gripper. ``wx/wy/wz`` step + # sizes are only read when ``use_rotation`` is True. use_rotation: bool = True # prev_leader_gripper: float | None = None max_gripper_pos: float = 100.0 @@ -106,38 +108,29 @@ class LeaderFollowerProcessor(ProcessorStep): delta_pos = leader_ee_pos - follower_ee_pos delta_gripper = leader_gripper_pos - follower_gripper_pos - # Normalize the position deltas to [-1, 1] - delta_pos = delta_pos / np.array( + # Normalise position to ~[-1, 1] per axis (PR #2596). + step_xyz = np.array( [ self.end_effector_step_sizes["x"], self.end_effector_step_sizes["y"], self.end_effector_step_sizes["z"], ] ) + delta_pos = delta_pos / step_xyz max_normalized_pos = max( abs(delta_pos[0]), abs(delta_pos[1]), abs(delta_pos[2]), ) - if self.use_rotation: - # For rotation: compute relative rotation from follower to leader - # R_leader = R_follower * R_delta => R_delta = R_follower^T * R_leader - r_delta = follower_ee[:3, :3].T @ leader_ee[:3, :3] - delta_rvec = Rotation.from_matrix(r_delta).as_rotvec() + # Relative rotation follower -> leader (same construction as PR #2596). + r_delta = follower_ee[:3, :3].T @ leader_ee[:3, :3] + delta_rvec = Rotation.from_matrix(r_delta).as_rotvec() + if self.use_rotation: desired = np.eye(4, dtype=float) desired[:3, :3] = follower_ee[:3, :3] @ r_delta - desired[:3, 3] = follower_ee[:3, 3] + ( - delta_pos - * np.array( - [ - self.end_effector_step_sizes["x"], - self.end_effector_step_sizes["y"], - self.end_effector_step_sizes["z"], - ] - ) - ) + desired[:3, 3] = follower_ee[:3, 3] + delta_pos * step_xyz pos = desired[:3, 3] tw = Rotation.from_matrix(desired[:3, :3]).as_rotvec() @@ -160,35 +153,28 @@ class LeaderFollowerProcessor(ProcessorStep): if max_normalized > 1.0: delta_pos = delta_pos / max_normalized delta_rvec = delta_rvec / max_normalized - - intervention_action = np.array( - [ - delta_pos[0], - delta_pos[1], - delta_pos[2], - delta_rvec[0], - delta_rvec[1], - delta_rvec[2], - np.clip(delta_gripper, -self.max_gripper_pos, self.max_gripper_pos) - / self.max_gripper_pos, - ], - dtype=float, - ) else: - # Position-only 4-D path: ``[dx, dy, dz, gripper]`` + # Rotation **disabled**: keep PR #2596 joint scaling on position only. if max_normalized_pos > 1.0: delta_pos = delta_pos / max_normalized_pos + delta_rvec = np.zeros(3, dtype=float) - intervention_action = np.array( - [ - delta_pos[0], - delta_pos[1], - delta_pos[2], - np.clip(delta_gripper, -self.max_gripper_pos, self.max_gripper_pos) - / self.max_gripper_pos, - ], - dtype=float, - ) + grip_norm = ( + np.clip(delta_gripper, -self.max_gripper_pos, self.max_gripper_pos) / self.max_gripper_pos + ) + + intervention_action = np.array( + [ + delta_pos[0], + delta_pos[1], + delta_pos[2], + delta_rvec[0], + delta_rvec[1], + delta_rvec[2], + grip_norm, + ], + dtype=float, + ) # # Extract leader positions from teleop action dict # # leader_pos = np.array([teleop_action.get(f"{motor}.pos", 0) for motor in self.motor_names]) diff --git a/src/lerobot/rl/gym_manipulator.py b/src/lerobot/rl/gym_manipulator.py index 1c697fac0..979871ccb 100644 --- a/src/lerobot/rl/gym_manipulator.py +++ b/src/lerobot/rl/gym_manipulator.py @@ -492,11 +492,11 @@ def make_processors( ] # Leader-follower control mode: leader haptically tracks follower until the - # human toggles intervention with SPACE, at which point leader joints are - # converted into EE deltas by ``LeaderFollowerProcessor`` and consumed by - # ``InterventionActionProcessorStep``. With ``use_rotation=False`` the - # action is the 4-D ``[dx, dy, dz, gripper]`` matching the gamepad path; - # with ``True`` it is the PR #2596 7-D ``[dx, dy, dz, dwx, dwy, dwz, gripper]``. + # human toggles intervention with SPACE, at which point ``LeaderFollowerProcessor`` + # builds the PR #2596 **7-D** EE delta tensor. Rotation can be toggled via + # ``processor.use_rotation``: when False the ``wx/wy/wz`` channels are zeroed. + # ``InterventionActionProcessorStep`` then maps that to either a full 7-D + # or 4-D policy action tensor (see helpers in ``hil_processor``). leader_use_rotation = bool(getattr(cfg.processor, "use_rotation", False)) if control_mode == "leader": if not isinstance(teleop_device, SO101LeaderFollower):