disable rotation

This commit is contained in:
Khalil Meftah
2026-04-28 15:04:05 +02:00
parent f40a36fe82
commit 05764da0f1
4 changed files with 85 additions and 56 deletions
+7 -4
View File
@@ -299,10 +299,13 @@ class HILSerlProcessorConfig:
inverse_kinematics: InverseKinematicsConfig | None = None
reward_classifier: RewardClassifierConfig | None = None
max_gripper_pos: float | None = 100.0
# Only used when ``control_mode == "leader"``. ``False`` (default) emits a
# 4-D position+gripper action matching the gamepad path; ``True`` emits the
# PR #2596 7-D action with rotation deltas (requires ``wx/wy/wz`` step
# sizes in ``inverse_kinematics.end_effector_step_sizes``).
# Only used when ``control_mode == "leader"``. ``LeaderFollowerProcessor``
# always builds the PR #2596 **7-D** vector ``[dx,dy,dz,wx,wy,wz,gripper]``.
# When ``False`` (default), rotation is **disabled** (components 35 are
# zeroed); when ``True``, full rotation deltas are used (requires
# ``wx/wy/wz`` in ``inverse_kinematics.end_effector_step_sizes``). The
# intervention step then turns that into a 4-D policy tensor when rotation
# is disabled, matching the gamepad pipeline.
use_rotation: bool = False
+41 -1
View File
@@ -443,6 +443,39 @@ class GripperPenaltyProcessorStep(ProcessorStep):
return features
def _ndarray_intervention_to_action_list(
flat: np.ndarray, use_rotation: bool, use_gripper: bool
) -> list[float]:
"""Flatten ``LeaderFollowerProcessor`` / policy outputs into a policy action list.
PR #2596 leader mode always produces 7 elements ``[dx,dy,dz,wx,wy,wz,g]``. When
``use_rotation`` is False, rotation is disabled (zeroed in the 7-D vector) and
we still emit a 4-D tensor ``[dx,dy,dz,g]`` for the rest of the pipeline.
"""
n = int(flat.size)
if not use_rotation and use_gripper and n == 7:
return [float(flat[0]), float(flat[1]), float(flat[2]), float(flat[6])]
if not use_rotation and not use_gripper and n == 6:
return [float(flat[0]), float(flat[1]), float(flat[2])]
return flat.tolist()
def _tensor_intervention_to_action_list(
flat: torch.Tensor, use_rotation: bool, use_gripper: bool
) -> list[float]:
n = int(flat.numel())
if not use_rotation and use_gripper and n == 7:
return [
float(flat[0].item()),
float(flat[1].item()),
float(flat[2].item()),
float(flat[6].item()),
]
if not use_rotation and not use_gripper and n == 6:
return [float(flat[0].item()), float(flat[1].item()), float(flat[2].item())]
return [float(x.item()) for x in flat]
@dataclass
@ProcessorStepRegistry.register("intervention_action_processor")
class InterventionActionProcessorStep(ProcessorStep):
@@ -455,6 +488,9 @@ class InterventionActionProcessorStep(ProcessorStep):
Attributes:
use_gripper: Whether to include the gripper in the teleoperated action.
use_rotation: For dict-based teleop actions, whether to include delta_wx/y/z.
For 7-D ndarray/tensors from ``LeaderFollowerProcessor``, when
``False`` the policy action is sliced to ``[dx,dy,dz,gripper]``.
terminate_on_success: If True, automatically sets the `done` flag when a
`success` event is received.
"""
@@ -509,8 +545,12 @@ class InterventionActionProcessorStep(ProcessorStep):
)
if self.use_gripper:
action_list.append(teleop_action.get(GRIPPER_KEY, self.gripper_neutral_action))
elif isinstance(teleop_action, torch.Tensor):
flat = teleop_action.detach().flatten()
action_list = _tensor_intervention_to_action_list(flat, self.use_rotation, self.use_gripper)
elif isinstance(teleop_action, np.ndarray):
action_list = teleop_action.tolist()
flat = np.asarray(teleop_action).reshape(-1)
action_list = _ndarray_intervention_to_action_list(flat, self.use_rotation, self.use_gripper)
else:
action_list = teleop_action
@@ -48,10 +48,12 @@ class LeaderFollowerProcessor(ProcessorStep):
kinematics: RobotKinematics
end_effector_step_sizes: np.ndarray | None = None
use_gripper: bool = True
# When True (PR #2596 default), emit a 7-D action with rotation deltas;
# when False, emit a 4-D action ``[dx, dy, dz, gripper]`` matching the
# gamepad/keyboard EE convention. ``end_effector_step_sizes`` only needs
# ``wx/wy/wz`` keys when this is True.
# PR #2596 always produces a **7-D** intervention vector ``[dx, dy, dz, wx,
# wy, wz, gripper]`` (normalised to ~[-1, 1] per axis). When ``use_rotation``
# is False, the middle three components are **zeroed** (rotation disabled,
# not removed): the same tensor layout and code path as PR #2596, but the
# downstream policy / IK only sees position + gripper. ``wx/wy/wz`` step
# sizes are only read when ``use_rotation`` is True.
use_rotation: bool = True
# prev_leader_gripper: float | None = None
max_gripper_pos: float = 100.0
@@ -106,38 +108,29 @@ class LeaderFollowerProcessor(ProcessorStep):
delta_pos = leader_ee_pos - follower_ee_pos
delta_gripper = leader_gripper_pos - follower_gripper_pos
# Normalize the position deltas to [-1, 1]
delta_pos = delta_pos / np.array(
# Normalise position to ~[-1, 1] per axis (PR #2596).
step_xyz = np.array(
[
self.end_effector_step_sizes["x"],
self.end_effector_step_sizes["y"],
self.end_effector_step_sizes["z"],
]
)
delta_pos = delta_pos / step_xyz
max_normalized_pos = max(
abs(delta_pos[0]),
abs(delta_pos[1]),
abs(delta_pos[2]),
)
if self.use_rotation:
# For rotation: compute relative rotation from follower to leader
# R_leader = R_follower * R_delta => R_delta = R_follower^T * R_leader
r_delta = follower_ee[:3, :3].T @ leader_ee[:3, :3]
delta_rvec = Rotation.from_matrix(r_delta).as_rotvec()
# Relative rotation follower -> leader (same construction as PR #2596).
r_delta = follower_ee[:3, :3].T @ leader_ee[:3, :3]
delta_rvec = Rotation.from_matrix(r_delta).as_rotvec()
if self.use_rotation:
desired = np.eye(4, dtype=float)
desired[:3, :3] = follower_ee[:3, :3] @ r_delta
desired[:3, 3] = follower_ee[:3, 3] + (
delta_pos
* np.array(
[
self.end_effector_step_sizes["x"],
self.end_effector_step_sizes["y"],
self.end_effector_step_sizes["z"],
]
)
)
desired[:3, 3] = follower_ee[:3, 3] + delta_pos * step_xyz
pos = desired[:3, 3]
tw = Rotation.from_matrix(desired[:3, :3]).as_rotvec()
@@ -160,35 +153,28 @@ class LeaderFollowerProcessor(ProcessorStep):
if max_normalized > 1.0:
delta_pos = delta_pos / max_normalized
delta_rvec = delta_rvec / max_normalized
intervention_action = np.array(
[
delta_pos[0],
delta_pos[1],
delta_pos[2],
delta_rvec[0],
delta_rvec[1],
delta_rvec[2],
np.clip(delta_gripper, -self.max_gripper_pos, self.max_gripper_pos)
/ self.max_gripper_pos,
],
dtype=float,
)
else:
# Position-only 4-D path: ``[dx, dy, dz, gripper]``
# Rotation **disabled**: keep PR #2596 joint scaling on position only.
if max_normalized_pos > 1.0:
delta_pos = delta_pos / max_normalized_pos
delta_rvec = np.zeros(3, dtype=float)
intervention_action = np.array(
[
delta_pos[0],
delta_pos[1],
delta_pos[2],
np.clip(delta_gripper, -self.max_gripper_pos, self.max_gripper_pos)
/ self.max_gripper_pos,
],
dtype=float,
)
grip_norm = (
np.clip(delta_gripper, -self.max_gripper_pos, self.max_gripper_pos) / self.max_gripper_pos
)
intervention_action = np.array(
[
delta_pos[0],
delta_pos[1],
delta_pos[2],
delta_rvec[0],
delta_rvec[1],
delta_rvec[2],
grip_norm,
],
dtype=float,
)
# # Extract leader positions from teleop action dict
# # leader_pos = np.array([teleop_action.get(f"{motor}.pos", 0) for motor in self.motor_names])
+5 -5
View File
@@ -492,11 +492,11 @@ def make_processors(
]
# Leader-follower control mode: leader haptically tracks follower until the
# human toggles intervention with SPACE, at which point leader joints are
# converted into EE deltas by ``LeaderFollowerProcessor`` and consumed by
# ``InterventionActionProcessorStep``. With ``use_rotation=False`` the
# action is the 4-D ``[dx, dy, dz, gripper]`` matching the gamepad path;
# with ``True`` it is the PR #2596 7-D ``[dx, dy, dz, dwx, dwy, dwz, gripper]``.
# human toggles intervention with SPACE, at which point ``LeaderFollowerProcessor``
# builds the PR #2596 **7-D** EE delta tensor. Rotation can be toggled via
# ``processor.use_rotation``: when False the ``wx/wy/wz`` channels are zeroed.
# ``InterventionActionProcessorStep`` then maps that to either a full 7-D
# or 4-D policy action tensor (see helpers in ``hil_processor``).
leader_use_rotation = bool(getattr(cfg.processor, "use_rotation", False))
if control_mode == "leader":
if not isinstance(teleop_device, SO101LeaderFollower):