Refactored hilserl config

2026-07-24 02:06:15 +00:00 · 2025-08-05 01:24:46 +02:00
parent b292dbbc55
commit 9effc5214f
4 changed files with 289 additions and 104 deletions
@@ -56,27 +56,38 @@ pip install -e ".[hilserl]"
 ### Understanding Configuration
-The training process begins with proper configuration for the HILSerl environment. The configuration class of interest is `HILSerlRobotEnvConfig` in `lerobot/envs/configs.py`. Which is defined as:
+The training process begins with proper configuration for the HILSerl environment. The configuration class of interest is `HILSerlRobotEnvConfig` in `lerobot/envs/configs.py`. The configuration is now organized into focused, nested sub-configs:
 <!-- prettier-ignore-start -->
 ```python
 class HILSerlRobotEnvConfig(EnvConfig):
    robot: RobotConfig | None = None    # Main robot agent (defined in `lerobot/robots`)
-    teleop: TeleoperatorConfig | None = None    # Teleoperator agent, e.g., gamepad or leader arm, (defined in `lerobot/teleoperators`)
+    teleop: TeleoperatorConfig | None = None    # Teleoperator agent, e.g., gamepad or leader arm
-    wrapper: EnvTransformConfig | None = None    # Environment wrapper settings; check `lerobot/scripts/server/gym_manipulator.py`
+    processor: HILSerlProcessorConfig    # Processing pipeline configuration (nested)
-    fps: int = 10    # Control frequency
+    dataset: DatasetConfig    # Dataset recording/replay configuration (nested)
    name: str = "real_robot"    # Environment name
    mode: str = None    # "record", "replay", or None (for training)
    device: str = "cuda"    # Compute device
 # Nested processor configuration
 class HILSerlProcessorConfig:
    control_mode: str = "gamepad"    # Control mode
    observation: ObservationConfig    # Observation processing settings
    image_preprocessing: ImagePreprocessingConfig    # Image crop/resize settings
    gripper: GripperConfig    # Gripper control and penalty settings
    reset: ResetConfig    # Environment reset and timing settings
    inverse_kinematics: InverseKinematicsConfig    # IK processing settings
    reward_classifier: RewardClassifierConfig    # Reward classifier settings
 # Dataset configuration
 class DatasetConfig:
    repo_id: str | None = None    # LeRobot dataset repository ID
    dataset_root: str | None = None    # Local dataset root (optional)
    task: str = ""    # Task identifier
    num_episodes: int = 10    # Number of episodes for recording
-    episode: int = 0    # episode index for replay
+    episode: int = 0    # Episode index for replay
-    device: str = "cuda"    # Compute device
+    push_to_hub: bool = True    # Whether to push datasets to Hub
-    push_to_hub: bool = True    # Whether to push the recorded datasets to Hub
+    fps: int = 10    # Control frequency
    pretrained_policy_name_or_path: str | None = None    # For policy loading
    reward_classifier_pretrained_path: str | None = None    # For reward model
    number_of_steps_after_success: int = 0    # For reward classifier, collect more positive examples after a success to train a classifier
 ```
 <!-- prettier-ignore-end -->
@@ -133,19 +144,22 @@ Create a configuration file for recording demonstrations (or edit an existing on
 1. Set `mode` to `"record"`
 2. Specify a unique `repo_id` for your dataset (e.g., "username/task_name")
 3. Set `num_episodes` to the number of demonstrations you want to collect
-4. Set `crop_params_dict` to `null` initially (we'll determine crops later)
+4. Set `processor.image_preprocessing.crop_params_dict` to `{}` initially (we'll determine crops later)
 5. Configure `robot`, `cameras`, and other hardware settings
 Example configuration section:
 ```json
 "mode": "record",
-"repo_id": "username/pick_lift_cube",
+"dataset": {
-"dataset_root": null,
+    "repo_id": "username/pick_lift_cube",
-"task": "pick_and_lift",
+    "dataset_root": null,
-"num_episodes": 15,
+    "task": "pick_and_lift",
-"episode": 0,
+    "num_episodes": 15,
-"push_to_hub": true
+    "episode": 0,
    "push_to_hub": true,
    "fps": 10
 }
 ```
 ### Using a Teleoperation Device
@@ -191,10 +205,13 @@ The gamepad provides a very convenient way to control the robot and the episode
 To setup the gamepad, you need to set the `control_mode` to `"gamepad"` and define the `teleop` section in the configuration file.
 ```json
-    "teleop": {
+"teleop": {
-        "type": "gamepad",
+    "type": "gamepad",
-        "use_gripper": true
+    "use_gripper": true
-    },
+},
 "processor": {
    "control_mode": "gamepad"
 }
 ```
 <p align="center">
@@ -216,11 +233,14 @@ The SO101 leader arm has reduced gears that allows it to move and track the foll
 To setup the SO101 leader, you need to set the `control_mode` to `"leader"` and define the `teleop` section in the configuration file.
 ```json
-    "teleop": {
+"teleop": {
-        "type": "so101_leader",
+    "type": "so101_leader",
-        "port": "/dev/tty.usbmodem585A0077921", # check your port number
+    "port": "/dev/tty.usbmodem585A0077921",
-        "use_degrees": true
+    "use_degrees": true
-    },
+},
 "processor": {
    "control_mode": "leader"
 }
 ```
 In order to annotate the success/failure of the episode, **you will need** to use a keyboard to press `s` for success, `esc` for failure.
@@ -251,7 +271,7 @@ python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/e
 During recording:
-1. The robot will reset to the initial position defined in the configuration file `fixed_reset_joint_positions`
+1. The robot will reset to the initial position defined in the configuration file `processor.reset.fixed_reset_joint_positions`
 2. Complete the task successfully
 3. The episode ends with a reward of 1 when you press the "success" button
 4. If the time limit is reached, or the fail button is pressed, the episode ends with a reward of 0
@@ -310,11 +330,15 @@ observation.images.front: [180, 250, 120, 150]
 Add these crop parameters to your training configuration:
 ```json
-"crop_params_dict": {
+"processor": {
-    "observation.images.side": [180, 207, 180, 200],
+    "image_preprocessing": {
-    "observation.images.front": [180, 250, 120, 150]
+        "crop_params_dict": {
-},
+            "observation.images.side": [180, 207, 180, 200],
-"resize_size": [128, 128]
+            "observation.images.front": [180, 250, 120, 150]
        },
        "resize_size": [128, 128]
    }
 }
 ```
 **Recommended image resolution**
@@ -346,23 +370,29 @@ python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/r
 - **mode**: set it to `"record"` to collect a dataset
 - **repo_id**: `"hf_username/dataset_name"`, name of the dataset and repo on the hub
 - **num_episodes**: Number of episodes to record
- **number_of_steps_after_success**: Number of additional frames to record after a success (reward=1) is detected
+- **processor.reset.number_of_steps_after_success**: Number of additional frames to record after a success (reward=1) is detected
 - **fps**: Number of frames per second to record
 - **push_to_hub**: Whether to push the dataset to the hub
-The `number_of_steps_after_success` parameter is crucial as it allows you to collect more positive examples. When a success is detected, the system will continue recording for the specified number of steps while maintaining the reward=1 label. Otherwise, there won't be enough states in the dataset labeled to 1 to train a good classifier.
+The `processor.reset.number_of_steps_after_success` parameter is crucial as it allows you to collect more positive examples. When a success is detected, the system will continue recording for the specified number of steps while maintaining the reward=1 label. Otherwise, there won't be enough states in the dataset labeled to 1 to train a good classifier.
 Example configuration section for data collection:
 ```json
 {
  "mode": "record",
-  "repo_id": "hf_username/dataset_name",
+  "dataset": {
-  "dataset_root": "data/your_dataset",
+    "repo_id": "hf_username/dataset_name",
-  "num_episodes": 20,
+    "dataset_root": "data/your_dataset",
-  "push_to_hub": true,
+    "num_episodes": 20,
-  "fps": 10,
+    "push_to_hub": true,
-  "number_of_steps_after_success": 15
+    "fps": 10
  },
  "processor": {
    "reset": {
      "number_of_steps_after_success": 15
    }
  }
 }
 ```
@@ -422,7 +452,11 @@ To use your trained reward classifier, configure the `HILSerlRobotEnvConfig` to
 <!-- prettier-ignore-start -->
 ```python
 env_config = HILSerlRobotEnvConfig(
-    reward_classifier_pretrained_path="path_to_your_pretrained_trained_model",
+    processor=HILSerlProcessorConfig(
        reward_classifier=RewardClassifierConfig(
            pretrained_path="path_to_your_pretrained_trained_model"
        )
    ),
    # Other environment parameters
 )
 ```
@@ -432,7 +466,13 @@ or set the argument in the json config file.
 ```json
 {
-  "reward_classifier_pretrained_path": "path_to_your_pretrained_model"
+  "processor": {
    "reward_classifier": {
      "pretrained_path": "path_to_your_pretrained_model",
      "success_threshold": 0.7,
      "success_reward": 1.0
    }
  }
 }
 ```
@@ -161,33 +161,36 @@ class XarmEnv(EnvConfig):
@dataclass
-class VideoRecordConfig:
+class ImagePreprocessingConfig:
-    """Configuration for video recording in ManiSkill environments."""
+    crop_params_dict: dict[str, tuple[int, int, int, int]] | None = None
-
+    resize_size: tuple[int, int] | None = None
    enabled: bool = False
    record_dir: str = "videos"
    trajectory_name: str = "trajectory"
@dataclass
-class HILSerlProcessorConfig:
+class DatasetConfig:
-    """Configuration for environment wrappers."""
+    """Configuration for dataset recording and replay."""
-    # ee_action_space_params: EEActionSpaceConfig = field(default_factory=EEActionSpaceConfig)
+    repo_id: str | None = None
-    control_mode: str = "gamepad"
+    dataset_root: str | None = None
-    display_cameras: bool = False
+    task: str | None = ""
-    add_joint_velocity_to_observation: bool = False
+    num_episodes: int = 10
-    add_current_to_observation: bool = False
+    episode: int = 0  # for replay mode
-    add_ee_pose_to_observation: bool = False
+    push_to_hub: bool = True
-    crop_params_dict: dict[str, tuple[int, int, int, int]] | None = None
+    fps: int = 10
-    resize_size: tuple[int, int] | None = None
+
-    control_time_s: float = 20.0
+
-    fixed_reset_joint_positions: Any | None = None
+@dataclass
-    reset_time_s: float = 5.0
+class RewardClassifierConfig:
-    use_gripper: bool = True
+    """Configuration for reward classification."""
-    gripper_quantization_threshold: float | None = 0.8
+
-    gripper_penalty: float = 0.0
+    pretrained_path: str | None = None
-    gripper_penalty_in_reward: bool = False
+    success_threshold: float = 0.5
    success_reward: float = 1.0
@dataclass
 class InverseKinematicsConfig:
    """Configuration for inverse kinematics processing."""
    urdf_path: str | None = None
    target_frame_name: str | None = None
@@ -196,6 +199,50 @@ class HILSerlProcessorConfig:
    max_gripper_pos: float | None = None
@dataclass
 class ObservationConfig:
    """Configuration for observation processing."""
    add_joint_velocity_to_observation: bool = False
    add_current_to_observation: bool = False
    add_ee_pose_to_observation: bool = False
    display_cameras: bool = False
@dataclass
 class GripperConfig:
    """Configuration for gripper control and penalties."""
    use_gripper: bool = True
    gripper_quantization_threshold: float | None = 0.8
    gripper_penalty: float = 0.0
    gripper_penalty_in_reward: bool = False
@dataclass
 class ResetConfig:
    """Configuration for environment reset behavior."""
    fixed_reset_joint_positions: Any | None = None
    reset_time_s: float = 5.0
    control_time_s: float = 20.0
    terminate_on_success: bool = True
    number_of_steps_after_success: int = 0
@dataclass
 class HILSerlProcessorConfig:
    """Configuration for environment processing pipeline."""
    control_mode: str = "gamepad"
    observation: ObservationConfig = field(default_factory=ObservationConfig)
    image_preprocessing: ImagePreprocessingConfig = field(default_factory=ImagePreprocessingConfig)
    gripper: GripperConfig = field(default_factory=GripperConfig)
    reset: ResetConfig = field(default_factory=ResetConfig)
    inverse_kinematics: InverseKinematicsConfig = field(default_factory=InverseKinematicsConfig)
    reward_classifier: RewardClassifierConfig = field(default_factory=RewardClassifierConfig)
@EnvConfig.register_subclass(name="gym_manipulator")
@dataclass
 class HILSerlRobotEnvConfig(EnvConfig):
@@ -203,21 +250,12 @@ class HILSerlRobotEnvConfig(EnvConfig):
    robot: RobotConfig | None = None
    teleop: TeleoperatorConfig | None = None
-    processor: HILSerlProcessorConfig | None = None
+    processor: HILSerlProcessorConfig = field(default_factory=HILSerlProcessorConfig)
-    fps: int = 10
+    dataset: DatasetConfig = field(default_factory=DatasetConfig)
    name: str = "real_robot"
    mode: str | None = None  # Either "record", "replay", None
    repo_id: str | None = None
    dataset_root: str | None = None
    task: str | None = ""
    num_episodes: int = 10  # only for record mode
    episode: int = 0
    device: str = "cuda"
    push_to_hub: bool = True
    pretrained_policy_name_or_path: str | None = None
    reward_classifier_pretrained_path: str | None = None
    # For the reward classifier, to record more positive examples after a success
    number_of_steps_after_success: int = 0
    @property
    def gym_kwargs(self) -> dict:
@@ -1,3 +1,4 @@
 import time
 from dataclasses import dataclass
 from typing import Any
@@ -241,3 +242,90 @@ class InterventionActionProcessor:
    def feature_contract(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
        return features
@dataclass
@ProcessorStepRegistry.register("reward_classifier_processor")
 class RewardClassifierProcessor:
    """Apply reward classification to image observations.
    This processor runs a trained reward classifier on image observations
    to predict rewards and success states, potentially terminating episodes
    when success is achieved.
    """
    pretrained_path: str = None
    device: str = "cpu"
    success_threshold: float = 0.5
    success_reward: float = 1.0
    terminate_on_success: bool = True
    reward_classifier: Any = None
    def __post_init__(self):
        """Initialize the reward classifier after dataclass initialization."""
        if self.pretrained_path is not None:
            from lerobot.policies.sac.reward_model.modeling_classifier import Classifier
            self.reward_classifier = Classifier.from_pretrained(self.pretrained_path)
            self.reward_classifier.to(self.device)
            self.reward_classifier.eval()
    def __call__(self, transition: EnvTransition) -> EnvTransition:
        observation = transition.get(TransitionKey.OBSERVATION)
        if observation is None or self.reward_classifier is None:
            return transition
        # Extract images from observation
        images = {key: value for key, value in observation.items() if "image" in key}
        if not images:
            return transition
        # Run reward classifier
        start_time = time.perf_counter()
        with torch.inference_mode():
            success = self.reward_classifier.predict_reward(images, threshold=self.success_threshold)
        classifier_frequency = 1 / (time.perf_counter() - start_time)
        # Calculate reward and termination
        reward = transition.get(TransitionKey.REWARD, 0.0)
        terminated = transition.get(TransitionKey.DONE, False)
        if success == 1.0:
            reward = self.success_reward
            if self.terminate_on_success:
                terminated = True
        # Update transition
        new_transition = transition.copy()
        new_transition[TransitionKey.REWARD] = reward
        new_transition[TransitionKey.DONE] = terminated
        # Update info with classifier frequency
        info = new_transition.get(TransitionKey.INFO, {})
        info["reward_classifier_frequency"] = classifier_frequency
        new_transition[TransitionKey.INFO] = info
        return new_transition
    def get_config(self) -> dict[str, Any]:
        return {
            "device": self.device,
            "success_threshold": self.success_threshold,
            "success_reward": self.success_reward,
            "terminate_on_success": self.terminate_on_success,
        }
    def state_dict(self) -> dict[str, torch.Tensor]:
        return {}
    def load_state_dict(self, state: dict[str, torch.Tensor]) -> None:
        pass
    def reset(self) -> None:
        pass
    def feature_contract(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
        return features
@@ -36,6 +36,7 @@ from lerobot.processor.hil_processor import (
    GripperPenaltyProcessor,
    ImageCropResizeProcessor,
    InterventionActionProcessor,
    RewardClassifierProcessor,
    TimeLimitProcessor,
 )
 from lerobot.processor.pipeline import TransitionKey
@@ -308,9 +309,9 @@ def make_robot_env(cfg: EnvConfig) -> tuple[gym.Env, Any]:
    # Create base environment
    env = RobotEnv(
        robot=robot,
-        use_gripper=cfg.processor.use_gripper,
+        use_gripper=cfg.processor.gripper.use_gripper,
-        display_cameras=cfg.processor.display_cameras,
+        display_cameras=cfg.processor.observation.display_cameras,
-        reset_pose=cfg.processor.fixed_reset_joint_positions,
+        reset_pose=cfg.processor.reset.fixed_reset_joint_positions,
    )
    return env, teleop_device
@@ -330,33 +331,48 @@ def make_processors(env, cfg):
    env_pipeline_steps = [
        ImageProcessor(),
        StateProcessor(),
-        JointVelocityProcessor(dt=1.0 / cfg.fps),
+        JointVelocityProcessor(dt=1.0 / cfg.dataset.fps),
        MotorCurrentProcessor(env=env),
        ImageCropResizeProcessor(
-            crop_params_dict=cfg.processor.crop_params_dict, resize_size=cfg.processor.resize_size
+            crop_params_dict=cfg.processor.image_preprocessing.crop_params_dict,
            resize_size=cfg.processor.image_preprocessing.resize_size,
        ),
-        TimeLimitProcessor(max_episode_steps=int(cfg.processor.control_time_s * cfg.fps)),
+        TimeLimitProcessor(max_episode_steps=int(cfg.processor.reset.control_time_s * cfg.dataset.fps)),
    ]
-    if cfg.processor.use_gripper:
+    if cfg.processor.gripper.use_gripper:
        env_pipeline_steps.append(
            GripperPenaltyProcessor(
-                penalty=cfg.processor.gripper_penalty, max_gripper_pos=cfg.processor.max_gripper_pos
+                penalty=cfg.processor.gripper.gripper_penalty,
                max_gripper_pos=cfg.processor.inverse_kinematics.max_gripper_pos,
            )
        )
    # Add reward classifier processor if configured
    if cfg.processor.reward_classifier.pretrained_path is not None:
        env_pipeline_steps.append(
            RewardClassifierProcessor(
                pretrained_path=cfg.processor.reward_classifier.pretrained_path,
                device=cfg.device,
                success_threshold=cfg.processor.reward_classifier.success_threshold,
                success_reward=cfg.processor.reward_classifier.success_reward,
                terminate_on_success=cfg.processor.reward_classifier.terminate_on_success,
            )
        )
    env_pipeline_steps.append(DeviceProcessor(device=cfg.device))
    env_processor = RobotProcessor(steps=env_pipeline_steps)
    action_pipeline_steps = [
        InterventionActionProcessor(
-            use_gripper=cfg.processor.use_gripper,
+            use_gripper=cfg.processor.gripper.use_gripper,
        ),
        InverseKinematicsProcessor(
-            urdf_path=cfg.processor.urdf_path,
+            urdf_path=cfg.processor.inverse_kinematics.urdf_path,
-            target_frame_name=cfg.processor.target_frame_name,
+            target_frame_name=cfg.processor.inverse_kinematics.target_frame_name,
-            end_effector_step_sizes=cfg.processor.end_effector_step_sizes,
+            end_effector_step_sizes=cfg.processor.inverse_kinematics.end_effector_step_sizes,
-            end_effector_bounds=cfg.processor.end_effector_bounds,
+            end_effector_bounds=cfg.processor.inverse_kinematics.end_effector_bounds,
-            max_gripper_pos=cfg.processor.max_gripper_pos,
+            max_gripper_pos=cfg.processor.inverse_kinematics.max_gripper_pos,
        ),
    ]
    action_processor = RobotProcessor(steps=action_pipeline_steps)
@@ -435,9 +451,9 @@ def step_env_and_process_transition(
 def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvConfig):
-    dt = 1.0 / cfg.fps
+    dt = 1.0 / cfg.dataset.fps
-    print(f"Starting control loop at {cfg.fps} FPS")
+    print(f"Starting control loop at {cfg.dataset.fps} FPS")
    print("Controls:")
    print("- Use gamepad/teleop device for intervention")
    print("- When not intervening, robot will stay still")
@@ -460,7 +476,7 @@ def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvCo
            "next.reward": {"dtype": "float32", "shape": (1,), "names": None},
            "next.done": {"dtype": "bool", "shape": (1,), "names": None},
        }
-        if cfg.processor.use_gripper:
+        if cfg.processor.gripper.use_gripper:
            features["complementary_info.discrete_penalty"] = {
                "dtype": "float32",
                "shape": (1,),
@@ -483,9 +499,9 @@ def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvCo
        # Create dataset
        dataset = LeRobotDataset.create(
-            cfg.repo_id,
+            cfg.dataset.repo_id,
-            cfg.fps,
+            cfg.dataset.fps,
-            root=cfg.dataset_root,
+            root=cfg.dataset.dataset_root,
            use_videos=True,
            image_writer_threads=4,
            image_writer_processes=0,
@@ -496,12 +512,12 @@ def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvCo
    episode_step = 0
    episode_start_time = time.perf_counter()
-    while episode_idx < cfg.num_episodes:
+    while episode_idx < cfg.dataset.num_episodes:
        step_start_time = time.perf_counter()
        # Create a neutral action (no movement)
        neutral_action = torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32)
-        if hasattr(env, "use_gripper") and env.use_gripper:
+        if cfg.processor.gripper.use_gripper:
            neutral_action = torch.cat([neutral_action, torch.tensor([1.0])])  # Gripper stay
        # Use the new step function
@@ -524,11 +540,11 @@ def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvCo
                "next.reward": np.array([transition[TransitionKey.REWARD]], dtype=np.float32),
                "next.done": np.array([terminated or truncated], dtype=bool),
            }
-            if cfg.processor.use_gripper:
+            if cfg.processor.gripper.use_gripper:
                frame["complementary_info.discrete_penalty"] = np.array(
                    [transition[TransitionKey.COMPLEMENTARY_DATA]["discrete_penalty"]], dtype=np.float32
                )
-            dataset.add_frame(frame, task=cfg.task)
+            dataset.add_frame(frame, task=cfg.dataset.task)
        episode_step += 1
@@ -562,14 +578,17 @@ def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvCo
        # Maintain fps timing
        busy_wait(dt - (time.perf_counter() - step_start_time))
-    if cfg.mode == "record" and cfg.push_to_hub:
+    if cfg.mode == "record" and cfg.dataset.push_to_hub:
        logging.info("Pushing dataset to hub")
        dataset.push_to_hub()
 def replay_trajectory(env, action_processor, cfg):
    dataset = LeRobotDataset(
-        cfg.repo_id, root=cfg.dataset_root, episodes=[cfg.episode], download_videos=False
+        cfg.dataset.repo_id,
        root=cfg.dataset.dataset_root,
        episodes=[cfg.dataset.episode],
        download_videos=False,
    )
    dataset_actions = dataset.hf_dataset.select_columns(["action"])
    _, info = env.reset()
@@ -581,7 +600,7 @@ def replay_trajectory(env, action_processor, cfg):
        )
        transition = action_processor(transition)
        env.step(transition[TransitionKey.ACTION])
-        busy_wait(1 / cfg.fps - (time.perf_counter() - start_time))
+        busy_wait(1 / cfg.dataset.fps - (time.perf_counter() - start_time))
@parser.wrap()