Migrate gym_manipulator to use the pipeline

Added get_teleop_events function to capture relevant events from teleop devices unrelated to actions
2026-05-18 10:10:08 +00:00 · 2025-08-01 20:20:13 +02:00
parent 2c4e888c7f
commit 1fdbecad3c
5 changed files with 676 additions and 1951 deletions
@@ -170,7 +170,7 @@ class VideoRecordConfig:
@dataclass
-class EnvTransformConfig:
+class HILSerlProcessorConfig:
    """Configuration for environment wrappers."""
    # ee_action_space_params: EEActionSpaceConfig = field(default_factory=EEActionSpaceConfig)
@@ -189,6 +189,12 @@ class EnvTransformConfig:
    gripper_penalty: float = 0.0
    gripper_penalty_in_reward: bool = False
    urdf_path: str | None = None
    target_frame_name: str | None = None
    end_effector_bounds: dict[str, list[float]] | None = None
    end_effector_step_sizes: dict[str, float] | None = None
    max_gripper_pos: float | None = None
@EnvConfig.register_subclass(name="gym_manipulator")
@dataclass
@@ -197,7 +203,7 @@ class HILSerlRobotEnvConfig(EnvConfig):
    robot: RobotConfig | None = None
    teleop: TeleoperatorConfig | None = None
-    wrapper: EnvTransformConfig | None = None
+    processor: HILSerlProcessorConfig | None = None
    fps: int = 10
    name: str = "real_robot"
    mode: str | None = None  # Either "record", "replay", None
@@ -216,58 +222,3 @@ class HILSerlRobotEnvConfig(EnvConfig):
    @property
    def gym_kwargs(self) -> dict:
        return {}
@EnvConfig.register_subclass("hil")
@dataclass
 class HILEnvConfig(EnvConfig):
    """Configuration for the HIL environment."""
    name: str = "PandaPickCube"
    task: str | None = "PandaPickCubeKeyboard-v0"
    use_viewer: bool = True
    gripper_penalty: float = 0.0
    use_gamepad: bool = True
    state_dim: int = 18
    action_dim: int = 4
    fps: int = 100
    episode_length: int = 100
    video_record: VideoRecordConfig = field(default_factory=VideoRecordConfig)
    features: dict[str, PolicyFeature] = field(
        default_factory=lambda: {
            "action": PolicyFeature(type=FeatureType.ACTION, shape=(4,)),
            "observation.image": PolicyFeature(type=FeatureType.VISUAL, shape=(3, 128, 128)),
            "observation.state": PolicyFeature(type=FeatureType.STATE, shape=(18,)),
        }
    )
    features_map: dict[str, str] = field(
        default_factory=lambda: {
            "action": ACTION,
            "observation.image": OBS_IMAGE,
            "observation.state": OBS_STATE,
        }
    )
    ################# args from hilserlrobotenv
    reward_classifier_pretrained_path: str | None = None
    robot_config: RobotConfig | None = None
    teleop_config: TeleoperatorConfig | None = None
    wrapper: EnvTransformConfig | None = None
    mode: str | None = None  # Either "record", "replay", None
    repo_id: str | None = None
    dataset_root: str | None = None
    num_episodes: int = 10  # only for record mode
    episode: int = 0
    device: str = "cuda"
    push_to_hub: bool = True
    pretrained_policy_name_or_path: str | None = None
    # For the reward classifier, to record more positive examples after a success
    number_of_steps_after_success: int = 0
    ############################
    @property
    def gym_kwargs(self) -> dict:
        return {
            "use_viewer": self.use_viewer,
            "use_gamepad": self.use_gamepad,
            "gripper_penalty": self.gripper_penalty,
        }
@@ -107,6 +107,45 @@ class GamepadTeleop(Teleoperator):
        return action_dict
    def get_teleop_events(self) -> dict[str, Any]:
        """
        Get extra control events from the gamepad such as intervention status,
        episode termination, success indicators, etc.
        Returns:
            Dictionary containing:
                - is_intervention: bool - Whether human is currently intervening
                - terminate_episode: bool - Whether to terminate the current episode
                - success: bool - Whether the episode was successful
                - rerecord_episode: bool - Whether to rerecord the episode
        """
        if self.gamepad is None:
            return {
                "is_intervention": False,
                "terminate_episode": False,
                "success": False,
                "rerecord_episode": False,
            }
        # Update gamepad state to get fresh inputs
        self.gamepad.update()
        # Check if intervention is active
        is_intervention = self.gamepad.should_intervene()
        # Get episode end status
        episode_end_status = self.gamepad.get_episode_end_status()
        terminate_episode = episode_end_status is not None
        success = episode_end_status == "success"
        rerecord_episode = episode_end_status == "rerecord_episode"
        return {
            "is_intervention": is_intervention,
            "terminate_episode": terminate_episode,
            "success": success,
            "rerecord_episode": rerecord_episode,
        }
    def disconnect(self) -> None:
        """Disconnect from the gamepad."""
        if self.gamepad is not None:
@@ -235,3 +235,67 @@ class KeyboardEndEffectorTeleop(KeyboardTeleop):
            action_dict["gripper"] = gripper_action
        return action_dict
    def get_teleop_events(self) -> dict[str, Any]:
        """
        Get extra control events from the keyboard such as intervention status,
        episode termination, success indicators, etc.
        Keyboard mappings:
        - Any movement keys pressed = intervention active
        - 's' key = success (terminate episode successfully)
        - 'r' key = rerecord episode (terminate and rerecord)
        - 'q' key = quit episode (terminate without success)
        Returns:
            Dictionary containing:
                - is_intervention: bool - Whether human is currently intervening
                - terminate_episode: bool - Whether to terminate the current episode
                - success: bool - Whether the episode was successful
                - rerecord_episode: bool - Whether to rerecord the episode
        """
        if not self.is_connected:
            return {
                "is_intervention": False,
                "terminate_episode": False,
                "success": False,
                "rerecord_episode": False,
            }
        # Check if any movement keys are currently pressed (indicates intervention)
        movement_keys = [
            keyboard.Key.up,
            keyboard.Key.down,
            keyboard.Key.left,
            keyboard.Key.right,
            keyboard.Key.shift,
            keyboard.Key.shift_r,
            keyboard.Key.ctrl_r,
            keyboard.Key.ctrl_l,
        ]
        is_intervention = any(self.current_pressed.get(key, False) for key in movement_keys)
        # Check for episode control commands from misc_keys_queue
        terminate_episode = False
        success = False
        rerecord_episode = False
        # Process any pending misc keys
        while not self.misc_keys_queue.empty():
            key = self.misc_keys_queue.get_nowait()
            if key == "s":
                terminate_episode = True
                success = True
            elif key == "r":
                terminate_episode = True
                rerecord_episode = True
            elif key == "q":
                terminate_episode = True
                success = False
        return {
            "is_intervention": is_intervention,
            "terminate_episode": terminate_episode,
            "success": success,
            "rerecord_episode": rerecord_episode,
        }
@@ -160,6 +160,18 @@ class Teleoperator(abc.ABC):
        """
        pass
    @abc.abstractmethod
    def get_teleop_events(self) -> dict[str, Any]:
        """
        Get extra control events from the teleoperator such as intervention status,
        episode termination, success indicators, etc.
        Check the implementation of the gamepad for an example.
        Returns:
            dict[str, Any]: A dictionary containing control events with keys and values that are specific to the setup.
        """
        pass
    @abc.abstractmethod
    def send_feedback(self, feedback: dict[str, Any]) -> None:
        """