disable rotation

2026-05-11 14:49:43 +00:00 · 2026-04-28 15:04:05 +02:00
parent f40a36fe82
commit 05764da0f1
4 changed files with 85 additions and 56 deletions
@@ -299,10 +299,13 @@ class HILSerlProcessorConfig:
    inverse_kinematics: InverseKinematicsConfig | None = None
    reward_classifier: RewardClassifierConfig | None = None
    max_gripper_pos: float | None = 100.0
-    # Only used when ``control_mode == "leader"``. ``False`` (default) emits a
-    # 4-D position+gripper action matching the gamepad path; ``True`` emits the
-    # PR #2596 7-D action with rotation deltas (requires ``wx/wy/wz`` step
-    # sizes in ``inverse_kinematics.end_effector_step_sizes``).
+    # Only used when ``control_mode == "leader"``. ``LeaderFollowerProcessor``
+    # always builds the PR #2596 **7-D** vector ``[dx,dy,dz,wx,wy,wz,gripper]``.
+    # When ``False`` (default), rotation is **disabled** (components 3–5 are
+    # zeroed); when ``True``, full rotation deltas are used (requires
+    # ``wx/wy/wz`` in ``inverse_kinematics.end_effector_step_sizes``). The
+    # intervention step then turns that into a 4-D policy tensor when rotation
+    # is disabled, matching the gamepad pipeline.
    use_rotation: bool = False


@@ -443,6 +443,39 @@ class GripperPenaltyProcessorStep(ProcessorStep):
        return features


+def _ndarray_intervention_to_action_list(
+    flat: np.ndarray, use_rotation: bool, use_gripper: bool
+) -> list[float]:
+    """Flatten ``LeaderFollowerProcessor`` / policy outputs into a policy action list.
+
+    PR #2596 leader mode always produces 7 elements ``[dx,dy,dz,wx,wy,wz,g]``. When
+    ``use_rotation`` is False, rotation is disabled (zeroed in the 7-D vector) and
+    we still emit a 4-D tensor ``[dx,dy,dz,g]`` for the rest of the pipeline.
+    """
+    n = int(flat.size)
+    if not use_rotation and use_gripper and n == 7:
+        return [float(flat[0]), float(flat[1]), float(flat[2]), float(flat[6])]
+    if not use_rotation and not use_gripper and n == 6:
+        return [float(flat[0]), float(flat[1]), float(flat[2])]
+    return flat.tolist()
+
+
+def _tensor_intervention_to_action_list(
+    flat: torch.Tensor, use_rotation: bool, use_gripper: bool
+) -> list[float]:
+    n = int(flat.numel())
+    if not use_rotation and use_gripper and n == 7:
+        return [
+            float(flat[0].item()),
+            float(flat[1].item()),
+            float(flat[2].item()),
+            float(flat[6].item()),
+        ]
+    if not use_rotation and not use_gripper and n == 6:
+        return [float(flat[0].item()), float(flat[1].item()), float(flat[2].item())]
+    return [float(x.item()) for x in flat]
+
+
@dataclass
@ProcessorStepRegistry.register("intervention_action_processor")
 class InterventionActionProcessorStep(ProcessorStep):
@@ -455,6 +488,9 @@ class InterventionActionProcessorStep(ProcessorStep):

    Attributes:
        use_gripper: Whether to include the gripper in the teleoperated action.
+        use_rotation: For dict-based teleop actions, whether to include delta_wx/y/z.
+                      For 7-D ndarray/tensors from ``LeaderFollowerProcessor``, when
+                      ``False`` the policy action is sliced to ``[dx,dy,dz,gripper]``.
        terminate_on_success: If True, automatically sets the `done` flag when a
                              `success` event is received.
    """
@@ -509,8 +545,12 @@ class InterventionActionProcessorStep(ProcessorStep):
                    )
                if self.use_gripper:
                    action_list.append(teleop_action.get(GRIPPER_KEY, self.gripper_neutral_action))
+            elif isinstance(teleop_action, torch.Tensor):
+                flat = teleop_action.detach().flatten()
+                action_list = _tensor_intervention_to_action_list(flat, self.use_rotation, self.use_gripper)
            elif isinstance(teleop_action, np.ndarray):
-                action_list = teleop_action.tolist()
+                flat = np.asarray(teleop_action).reshape(-1)
+                action_list = _ndarray_intervention_to_action_list(flat, self.use_rotation, self.use_gripper)
            else:
                action_list = teleop_action

@@ -48,10 +48,12 @@ class LeaderFollowerProcessor(ProcessorStep):
    kinematics: RobotKinematics
    end_effector_step_sizes: np.ndarray | None = None
    use_gripper: bool = True
-    # When True (PR #2596 default), emit a 7-D action with rotation deltas;
-    # when False, emit a 4-D action ``[dx, dy, dz, gripper]`` matching the
-    # gamepad/keyboard EE convention. ``end_effector_step_sizes`` only needs
-    # ``wx/wy/wz`` keys when this is True.
+    # PR #2596 always produces a **7-D** intervention vector ``[dx, dy, dz, wx,
+    # wy, wz, gripper]`` (normalised to ~[-1, 1] per axis). When ``use_rotation``
+    # is False, the middle three components are **zeroed** (rotation disabled,
+    # not removed): the same tensor layout and code path as PR #2596, but the
+    # downstream policy / IK only sees position + gripper. ``wx/wy/wz`` step
+    # sizes are only read when ``use_rotation`` is True.
    use_rotation: bool = True
    # prev_leader_gripper: float | None = None
    max_gripper_pos: float = 100.0
@@ -106,38 +108,29 @@ class LeaderFollowerProcessor(ProcessorStep):
                delta_pos = leader_ee_pos - follower_ee_pos
                delta_gripper = leader_gripper_pos - follower_gripper_pos

-                # Normalize the position deltas to [-1, 1]
-                delta_pos = delta_pos / np.array(
+                # Normalise position to ~[-1, 1] per axis (PR #2596).
+                step_xyz = np.array(
                    [
                        self.end_effector_step_sizes["x"],
                        self.end_effector_step_sizes["y"],
                        self.end_effector_step_sizes["z"],
                    ]
                )
+                delta_pos = delta_pos / step_xyz
                max_normalized_pos = max(
                    abs(delta_pos[0]),
                    abs(delta_pos[1]),
                    abs(delta_pos[2]),
                )

-                if self.use_rotation:
-                    # For rotation: compute relative rotation from follower to leader
-                    # R_leader = R_follower * R_delta  =>  R_delta = R_follower^T * R_leader
-                    r_delta = follower_ee[:3, :3].T @ leader_ee[:3, :3]
-                    delta_rvec = Rotation.from_matrix(r_delta).as_rotvec()
+                # Relative rotation follower -> leader (same construction as PR #2596).
+                r_delta = follower_ee[:3, :3].T @ leader_ee[:3, :3]
+                delta_rvec = Rotation.from_matrix(r_delta).as_rotvec()

+                if self.use_rotation:
                    desired = np.eye(4, dtype=float)
                    desired[:3, :3] = follower_ee[:3, :3] @ r_delta
-                    desired[:3, 3] = follower_ee[:3, 3] + (
-                        delta_pos
-                        * np.array(
-                            [
-                                self.end_effector_step_sizes["x"],
-                                self.end_effector_step_sizes["y"],
-                                self.end_effector_step_sizes["z"],
-                            ]
-                        )
-                    )
+                    desired[:3, 3] = follower_ee[:3, 3] + delta_pos * step_xyz

                    pos = desired[:3, 3]
                    tw = Rotation.from_matrix(desired[:3, :3]).as_rotvec()
@@ -160,35 +153,28 @@ class LeaderFollowerProcessor(ProcessorStep):
                    if max_normalized > 1.0:
                        delta_pos = delta_pos / max_normalized
                        delta_rvec = delta_rvec / max_normalized
-
-                    intervention_action = np.array(
-                        [
-                            delta_pos[0],
-                            delta_pos[1],
-                            delta_pos[2],
-                            delta_rvec[0],
-                            delta_rvec[1],
-                            delta_rvec[2],
-                            np.clip(delta_gripper, -self.max_gripper_pos, self.max_gripper_pos)
-                            / self.max_gripper_pos,
-                        ],
-                        dtype=float,
-                    )
                else:
-                    # Position-only 4-D path: ``[dx, dy, dz, gripper]``
+                    # Rotation **disabled**: keep PR #2596 joint scaling on position only.
                    if max_normalized_pos > 1.0:
                        delta_pos = delta_pos / max_normalized_pos
+                    delta_rvec = np.zeros(3, dtype=float)

-                    intervention_action = np.array(
-                        [
-                            delta_pos[0],
-                            delta_pos[1],
-                            delta_pos[2],
-                            np.clip(delta_gripper, -self.max_gripper_pos, self.max_gripper_pos)
-                            / self.max_gripper_pos,
-                        ],
-                        dtype=float,
-                    )
+                grip_norm = (
+                    np.clip(delta_gripper, -self.max_gripper_pos, self.max_gripper_pos) / self.max_gripper_pos
+                )
+
+                intervention_action = np.array(
+                    [
+                        delta_pos[0],
+                        delta_pos[1],
+                        delta_pos[2],
+                        delta_rvec[0],
+                        delta_rvec[1],
+                        delta_rvec[2],
+                        grip_norm,
+                    ],
+                    dtype=float,
+                )

                #         # Extract leader positions from teleop action dict
                #         # leader_pos = np.array([teleop_action.get(f"{motor}.pos", 0) for motor in self.motor_names])
@@ -492,11 +492,11 @@ def make_processors(
    ]

    # Leader-follower control mode: leader haptically tracks follower until the
-    # human toggles intervention with SPACE, at which point leader joints are
-    # converted into EE deltas by ``LeaderFollowerProcessor`` and consumed by
-    # ``InterventionActionProcessorStep``. With ``use_rotation=False`` the
-    # action is the 4-D ``[dx, dy, dz, gripper]`` matching the gamepad path;
-    # with ``True`` it is the PR #2596 7-D ``[dx, dy, dz, dwx, dwy, dwz, gripper]``.
+    # human toggles intervention with SPACE, at which point ``LeaderFollowerProcessor``
+    # builds the PR #2596 **7-D** EE delta tensor. Rotation can be toggled via
+    # ``processor.use_rotation``: when False the ``wx/wy/wz`` channels are zeroed.
+    # ``InterventionActionProcessorStep`` then maps that to either a full 7-D
+    # or 4-D policy action tensor (see helpers in ``hil_processor``).
    leader_use_rotation = bool(getattr(cfg.processor, "use_rotation", False))
    if control_mode == "leader":
        if not isinstance(teleop_device, SO101LeaderFollower):