Migrate gym_manipulator to use the pipeline

Added get_teleop_events function to capture relevant events from teleop devices unrelated to actions
2026-05-15 16:49:55 +00:00 · 2025-08-01 20:20:13 +02:00
parent 2c4e888c7f
commit 1fdbecad3c
5 changed files with 676 additions and 1951 deletions
@@ -170,7 +170,7 @@ class VideoRecordConfig:


@dataclass
-class EnvTransformConfig:
+class HILSerlProcessorConfig:
    """Configuration for environment wrappers."""

    # ee_action_space_params: EEActionSpaceConfig = field(default_factory=EEActionSpaceConfig)
@@ -189,6 +189,12 @@ class EnvTransformConfig:
    gripper_penalty: float = 0.0
    gripper_penalty_in_reward: bool = False

+    urdf_path: str | None = None
+    target_frame_name: str | None = None
+    end_effector_bounds: dict[str, list[float]] | None = None
+    end_effector_step_sizes: dict[str, float] | None = None
+    max_gripper_pos: float | None = None
+

@EnvConfig.register_subclass(name="gym_manipulator")
@dataclass
@@ -197,7 +203,7 @@ class HILSerlRobotEnvConfig(EnvConfig):

    robot: RobotConfig | None = None
    teleop: TeleoperatorConfig | None = None
-    wrapper: EnvTransformConfig | None = None
+    processor: HILSerlProcessorConfig | None = None
    fps: int = 10
    name: str = "real_robot"
    mode: str | None = None  # Either "record", "replay", None
@@ -216,58 +222,3 @@ class HILSerlRobotEnvConfig(EnvConfig):
    @property
    def gym_kwargs(self) -> dict:
        return {}
-
-
-@EnvConfig.register_subclass("hil")
-@dataclass
-class HILEnvConfig(EnvConfig):
-    """Configuration for the HIL environment."""
-
-    name: str = "PandaPickCube"
-    task: str | None = "PandaPickCubeKeyboard-v0"
-    use_viewer: bool = True
-    gripper_penalty: float = 0.0
-    use_gamepad: bool = True
-    state_dim: int = 18
-    action_dim: int = 4
-    fps: int = 100
-    episode_length: int = 100
-    video_record: VideoRecordConfig = field(default_factory=VideoRecordConfig)
-    features: dict[str, PolicyFeature] = field(
-        default_factory=lambda: {
-            "action": PolicyFeature(type=FeatureType.ACTION, shape=(4,)),
-            "observation.image": PolicyFeature(type=FeatureType.VISUAL, shape=(3, 128, 128)),
-            "observation.state": PolicyFeature(type=FeatureType.STATE, shape=(18,)),
-        }
-    )
-    features_map: dict[str, str] = field(
-        default_factory=lambda: {
-            "action": ACTION,
-            "observation.image": OBS_IMAGE,
-            "observation.state": OBS_STATE,
-        }
-    )
-    ################# args from hilserlrobotenv
-    reward_classifier_pretrained_path: str | None = None
-    robot_config: RobotConfig | None = None
-    teleop_config: TeleoperatorConfig | None = None
-    wrapper: EnvTransformConfig | None = None
-    mode: str | None = None  # Either "record", "replay", None
-    repo_id: str | None = None
-    dataset_root: str | None = None
-    num_episodes: int = 10  # only for record mode
-    episode: int = 0
-    device: str = "cuda"
-    push_to_hub: bool = True
-    pretrained_policy_name_or_path: str | None = None
-    # For the reward classifier, to record more positive examples after a success
-    number_of_steps_after_success: int = 0
-    ############################
-
-    @property
-    def gym_kwargs(self) -> dict:
-        return {
-            "use_viewer": self.use_viewer,
-            "use_gamepad": self.use_gamepad,
-            "gripper_penalty": self.gripper_penalty,
-        }
@@ -107,6 +107,45 @@ class GamepadTeleop(Teleoperator):

        return action_dict

+    def get_teleop_events(self) -> dict[str, Any]:
+        """
+        Get extra control events from the gamepad such as intervention status,
+        episode termination, success indicators, etc.
+
+        Returns:
+            Dictionary containing:
+                - is_intervention: bool - Whether human is currently intervening
+                - terminate_episode: bool - Whether to terminate the current episode
+                - success: bool - Whether the episode was successful
+                - rerecord_episode: bool - Whether to rerecord the episode
+        """
+        if self.gamepad is None:
+            return {
+                "is_intervention": False,
+                "terminate_episode": False,
+                "success": False,
+                "rerecord_episode": False,
+            }
+
+        # Update gamepad state to get fresh inputs
+        self.gamepad.update()
+
+        # Check if intervention is active
+        is_intervention = self.gamepad.should_intervene()
+
+        # Get episode end status
+        episode_end_status = self.gamepad.get_episode_end_status()
+        terminate_episode = episode_end_status is not None
+        success = episode_end_status == "success"
+        rerecord_episode = episode_end_status == "rerecord_episode"
+
+        return {
+            "is_intervention": is_intervention,
+            "terminate_episode": terminate_episode,
+            "success": success,
+            "rerecord_episode": rerecord_episode,
+        }
+
    def disconnect(self) -> None:
        """Disconnect from the gamepad."""
        if self.gamepad is not None:
@@ -235,3 +235,67 @@ class KeyboardEndEffectorTeleop(KeyboardTeleop):
            action_dict["gripper"] = gripper_action

        return action_dict
+
+    def get_teleop_events(self) -> dict[str, Any]:
+        """
+        Get extra control events from the keyboard such as intervention status,
+        episode termination, success indicators, etc.
+
+        Keyboard mappings:
+        - Any movement keys pressed = intervention active
+        - 's' key = success (terminate episode successfully)
+        - 'r' key = rerecord episode (terminate and rerecord)
+        - 'q' key = quit episode (terminate without success)
+
+        Returns:
+            Dictionary containing:
+                - is_intervention: bool - Whether human is currently intervening
+                - terminate_episode: bool - Whether to terminate the current episode
+                - success: bool - Whether the episode was successful
+                - rerecord_episode: bool - Whether to rerecord the episode
+        """
+        if not self.is_connected:
+            return {
+                "is_intervention": False,
+                "terminate_episode": False,
+                "success": False,
+                "rerecord_episode": False,
+            }
+
+        # Check if any movement keys are currently pressed (indicates intervention)
+        movement_keys = [
+            keyboard.Key.up,
+            keyboard.Key.down,
+            keyboard.Key.left,
+            keyboard.Key.right,
+            keyboard.Key.shift,
+            keyboard.Key.shift_r,
+            keyboard.Key.ctrl_r,
+            keyboard.Key.ctrl_l,
+        ]
+        is_intervention = any(self.current_pressed.get(key, False) for key in movement_keys)
+
+        # Check for episode control commands from misc_keys_queue
+        terminate_episode = False
+        success = False
+        rerecord_episode = False
+
+        # Process any pending misc keys
+        while not self.misc_keys_queue.empty():
+            key = self.misc_keys_queue.get_nowait()
+            if key == "s":
+                terminate_episode = True
+                success = True
+            elif key == "r":
+                terminate_episode = True
+                rerecord_episode = True
+            elif key == "q":
+                terminate_episode = True
+                success = False
+
+        return {
+            "is_intervention": is_intervention,
+            "terminate_episode": terminate_episode,
+            "success": success,
+            "rerecord_episode": rerecord_episode,
+        }
@@ -160,6 +160,18 @@ class Teleoperator(abc.ABC):
        """
        pass

+    @abc.abstractmethod
+    def get_teleop_events(self) -> dict[str, Any]:
+        """
+        Get extra control events from the teleoperator such as intervention status,
+        episode termination, success indicators, etc.
+        Check the implementation of the gamepad for an example.
+
+        Returns:
+            dict[str, Any]: A dictionary containing control events with keys and values that are specific to the setup.
+        """
+        pass
+
    @abc.abstractmethod
    def send_feedback(self, feedback: dict[str, Any]) -> None:
        """