General fixes in code, removed delta action, fixed grasp penalty, added logic to put gripper reward in info

2026-07-23 09:46:00 +00:00 · 2025-04-09 17:04:43 +02:00
parent 02e1ed0bfb
commit 9fd4c21d4d
7 changed files with 75 additions and 65 deletions
@@ -171,7 +171,6 @@ class VideoRecordConfig:
 class WrapperConfig:
    """Configuration for environment wrappers."""

-    delta_action: float | None = None
    joint_masking_action_space: list[bool] | None = None


@@ -191,7 +190,6 @@ class EnvWrapperConfig:
    """Configuration for environment wrappers."""

    display_cameras: bool = False
-    delta_action: float = 0.1
    use_relative_joint_positions: bool = True
    add_joint_velocity_to_observation: bool = False
    add_ee_pose_to_observation: bool = False
@@ -203,11 +201,13 @@ class EnvWrapperConfig:
    joint_masking_action_space: Optional[Any] = None
    ee_action_space_params: Optional[EEActionSpaceConfig] = None
    use_gripper: bool = False
-    gripper_quantization_threshold: float = 0.8
-    gripper_penalty: float = 0.0
+    gripper_quantization_threshold: float | None = 0.8
+    gripper_penalty: float = 0.0 
+    gripper_penalty_in_reward: bool = False
    open_gripper_on_reset: bool = False


+
@EnvConfig.register_subclass(name="gym_manipulator")
@dataclass
 class HILSerlRobotEnvConfig(EnvConfig):
@@ -428,6 +428,7 @@ class SACPolicy(
        actions_discrete = torch.round(actions_discrete)
        actions_discrete = actions_discrete.long()

+        gripper_penalties: Tensor | None = None
        if complementary_info is not None:
            gripper_penalties: Tensor | None = complementary_info.get("gripper_penalty")

@@ -221,7 +221,6 @@ def record_episode(
        events=events,
        policy=policy,
        fps=fps,
-        # record_delta_actions=record_delta_actions,
        teleoperate=policy is None,
        single_task=single_task,
    )
@@ -267,8 +266,6 @@ def control_loop(

        if teleoperate:
            observation, action = robot.teleop_step(record_data=True)
-            # if record_delta_actions:
-            #     action["action"] = action["action"] - current_joint_positions
        else:
            observation = robot.capture_observation()