diff --git a/docs/source/hilserl.mdx b/docs/source/hilserl.mdx index b90e613db..b4cd061ba 100644 --- a/docs/source/hilserl.mdx +++ b/docs/source/hilserl.mdx @@ -56,18 +56,22 @@ pip install -e ".[hilserl]" ### Understanding Configuration -The training process begins with proper configuration for the HILSerl environment. The configuration class of interest is `HILSerlRobotEnvConfig` in `lerobot/envs/configs.py`. The configuration is now organized into focused, nested sub-configs: +The training process begins with proper configuration for the HILSerl environment. The main configuration class is `GymManipulatorConfig` in `lerobot/scripts/rl/gym_manipulator.py`, which contains nested `HILSerlRobotEnvConfig` and `DatasetConfig`. The configuration is organized into focused, nested sub-configs: ```python +class GymManipulatorConfig: + env: HILSerlRobotEnvConfig # Environment configuration (nested) + dataset: DatasetConfig # Dataset recording/replay configuration (nested) + mode: str | None = None # "record", "replay", or None (for training) + class HILSerlRobotEnvConfig(EnvConfig): robot: RobotConfig | None = None # Main robot agent (defined in `lerobot/robots`) teleop: TeleoperatorConfig | None = None # Teleoperator agent, e.g., gamepad or leader arm processor: HILSerlProcessorConfig # Processing pipeline configuration (nested) - dataset: DatasetConfig # Dataset recording/replay configuration (nested) name: str = "real_robot" # Environment name - mode: str = None # "record", "replay", or None (for training) device: str = "cuda" # Compute device + fps: int = 30 # Control frequency # Nested processor configuration class HILSerlProcessorConfig: @@ -81,13 +85,12 @@ class HILSerlProcessorConfig: # Dataset configuration class DatasetConfig: - repo_id: str | None = None # LeRobot dataset repository ID + repo_id: str # LeRobot dataset repository ID dataset_root: str | None = None # Local dataset root (optional) - task: str = "" # Task identifier - num_episodes: int = 10 # Number of episodes for recording - episode: int = 0 # Episode index for replay - push_to_hub: bool = True # Whether to push datasets to Hub - fps: int = 10 # Control frequency + task: str # Task identifier + num_episodes: int # Number of episodes for recording + episode: int # Episode index for replay + push_to_hub: bool # Whether to push datasets to Hub ``` @@ -141,24 +144,30 @@ With the bounds defined, you can safely collect demonstrations for training. Tra Create a configuration file for recording demonstrations (or edit an existing one like [env_config_so100.json](https://huggingface.co/datasets/aractingi/lerobot-example-config-files/blob/main/env_config_so100.json)): -1. Set `mode` to `"record"` -2. Specify a unique `repo_id` for your dataset (e.g., "username/task_name") -3. Set `num_episodes` to the number of demonstrations you want to collect -4. Set `processor.image_preprocessing.crop_params_dict` to `{}` initially (we'll determine crops later) -5. Configure `robot`, `cameras`, and other hardware settings +1. Set `mode` to `"record"` at the root level +2. Specify a unique `repo_id` for your dataset in the `dataset` section (e.g., "username/task_name") +3. Set `num_episodes` in the `dataset` section to the number of demonstrations you want to collect +4. Set `env.processor.image_preprocessing.crop_params_dict` to `{}` initially (we'll determine crops later) +5. Configure `env.robot`, `env.teleop`, and other hardware settings in the `env` section Example configuration section: ```json -"mode": "record", -"dataset": { - "repo_id": "username/pick_lift_cube", - "dataset_root": null, - "task": "pick_and_lift", - "num_episodes": 15, - "episode": 0, - "push_to_hub": true, - "fps": 10 +{ + "env": { + "type": "gym_manipulator", + "fps": 10, + // ... robot, teleop, processor configs ... + }, + "dataset": { + "repo_id": "username/pick_lift_cube", + "dataset_root": null, + "task": "pick_and_lift", + "num_episodes": 15, + "episode": 0, + "push_to_hub": true + }, + "mode": "record" } ``` @@ -205,12 +214,16 @@ The gamepad provides a very convenient way to control the robot and the episode To setup the gamepad, you need to set the `control_mode` to `"gamepad"` and define the `teleop` section in the configuration file. ```json -"teleop": { - "type": "gamepad", - "use_gripper": true -}, -"processor": { - "control_mode": "gamepad" +{ + "env": { + "teleop": { + "type": "gamepad", + "use_gripper": true + }, + "processor": { + "control_mode": "gamepad" + } + } } ``` @@ -233,13 +246,17 @@ The SO101 leader arm has reduced gears that allows it to move and track the foll To setup the SO101 leader, you need to set the `control_mode` to `"leader"` and define the `teleop` section in the configuration file. ```json -"teleop": { - "type": "so101_leader", - "port": "/dev/tty.usbmodem585A0077921", - "use_degrees": true -}, -"processor": { - "control_mode": "leader" +{ + "env": { + "teleop": { + "type": "so101_leader", + "port": "/dev/tty.usbmodem585A0077921", + "use_degrees": true + }, + "processor": { + "control_mode": "leader" + } + } } ``` @@ -271,7 +288,7 @@ python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/e During recording: -1. The robot will reset to the initial position defined in the configuration file `processor.reset.fixed_reset_joint_positions` +1. The robot will reset to the initial position defined in the configuration file `env.processor.reset.fixed_reset_joint_positions` 2. Complete the task successfully 3. The episode ends with a reward of 1 when you press the "success" button 4. If the time limit is reached, or the fail button is pressed, the episode ends with a reward of 0 @@ -330,13 +347,17 @@ observation.images.front: [180, 250, 120, 150] Add these crop parameters to your training configuration: ```json -"processor": { - "image_preprocessing": { - "crop_params_dict": { - "observation.images.side": [180, 207, 180, 200], - "observation.images.front": [180, 250, 120, 150] - }, - "resize_size": [128, 128] +{ + "env": { + "processor": { + "image_preprocessing": { + "crop_params_dict": { + "observation.images.side": [180, 207, 180, 200], + "observation.images.front": [180, 250, 120, 150] + }, + "resize_size": [128, 128] + } + } } } ``` @@ -367,32 +388,35 @@ python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/r **Key Parameters for Data Collection** -- **mode**: set it to `"record"` to collect a dataset -- **repo_id**: `"hf_username/dataset_name"`, name of the dataset and repo on the hub -- **num_episodes**: Number of episodes to record -- **processor.reset.number_of_steps_after_success**: Number of additional frames to record after a success (reward=1) is detected -- **fps**: Number of frames per second to record -- **push_to_hub**: Whether to push the dataset to the hub +- **mode**: set it to `"record"` to collect a dataset (at root level) +- **dataset.repo_id**: `"hf_username/dataset_name"`, name of the dataset and repo on the hub +- **dataset.num_episodes**: Number of episodes to record +- **env.processor.reset.number_of_steps_after_success**: Number of additional frames to record after a success (reward=1) is detected +- **env.fps**: Number of frames per second to record +- **dataset.push_to_hub**: Whether to push the dataset to the hub -The `processor.reset.number_of_steps_after_success` parameter is crucial as it allows you to collect more positive examples. When a success is detected, the system will continue recording for the specified number of steps while maintaining the reward=1 label. Otherwise, there won't be enough states in the dataset labeled to 1 to train a good classifier. +The `env.processor.reset.number_of_steps_after_success` parameter is crucial as it allows you to collect more positive examples. When a success is detected, the system will continue recording for the specified number of steps while maintaining the reward=1 label. Otherwise, there won't be enough states in the dataset labeled to 1 to train a good classifier. Example configuration section for data collection: ```json { - "mode": "record", + "env": { + "type": "gym_manipulator", + "fps": 10, + "processor": { + "reset": { + "number_of_steps_after_success": 15 + } + } + }, "dataset": { "repo_id": "hf_username/dataset_name", "dataset_root": "data/your_dataset", "num_episodes": 20, - "push_to_hub": true, - "fps": 10 + "push_to_hub": true }, - "processor": { - "reset": { - "number_of_steps_after_success": 15 - } - } + "mode": "record" } ``` @@ -451,13 +475,17 @@ To use your trained reward classifier, configure the `HILSerlRobotEnvConfig` to ```python -env_config = HILSerlRobotEnvConfig( - processor=HILSerlProcessorConfig( - reward_classifier=RewardClassifierConfig( - pretrained_path="path_to_your_pretrained_trained_model" - ) +config = GymManipulatorConfig( + env=HILSerlRobotEnvConfig( + processor=HILSerlProcessorConfig( + reward_classifier=RewardClassifierConfig( + pretrained_path="path_to_your_pretrained_trained_model" + ) + ), + # Other environment parameters ), - # Other environment parameters + dataset=DatasetConfig(...), + mode=None # For training ) ``` @@ -466,11 +494,13 @@ or set the argument in the json config file. ```json { - "processor": { - "reward_classifier": { - "pretrained_path": "path_to_your_pretrained_model", - "success_threshold": 0.7, - "success_reward": 1.0 + "env": { + "processor": { + "reward_classifier": { + "pretrained_path": "path_to_your_pretrained_model", + "success_threshold": 0.7, + "success_reward": 1.0 + } } } } diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py index 78a52616d..cef339e0c 100644 --- a/src/lerobot/envs/configs.py +++ b/src/lerobot/envs/configs.py @@ -166,19 +166,6 @@ class ImagePreprocessingConfig: resize_size: tuple[int, int] | None = None -@dataclass -class DatasetConfig: - """Configuration for dataset recording and replay.""" - - repo_id: str | None = None - dataset_root: str | None = None - task: str | None = "" - num_episodes: int = 10 - episode: int = 0 # for replay mode - push_to_hub: bool = True - fps: int = 10 - - @dataclass class RewardClassifierConfig: """Configuration for reward classification.""" @@ -214,7 +201,6 @@ class GripperConfig: """Configuration for gripper control and penalties.""" use_gripper: bool = True - gripper_quantization_threshold: float | None = 0.8 gripper_penalty: float = 0.0 gripper_penalty_in_reward: bool = False @@ -251,10 +237,8 @@ class HILSerlRobotEnvConfig(EnvConfig): robot: RobotConfig | None = None teleop: TeleoperatorConfig | None = None processor: HILSerlProcessorConfig = field(default_factory=HILSerlProcessorConfig) - dataset: DatasetConfig = field(default_factory=DatasetConfig) name: str = "real_robot" - mode: str | None = None # Either "record", "replay", None device: str = "cuda" @property diff --git a/src/lerobot/scripts/rl/gym_manipulator.py b/src/lerobot/scripts/rl/gym_manipulator.py index b83718e94..cb3cad262 100644 --- a/src/lerobot/scripts/rl/gym_manipulator.py +++ b/src/lerobot/scripts/rl/gym_manipulator.py @@ -16,6 +16,7 @@ import logging import time +from dataclasses import dataclass from typing import Any import gymnasium as gym @@ -25,7 +26,7 @@ import torch from lerobot.cameras import opencv # noqa: F401 from lerobot.configs import parser from lerobot.datasets.lerobot_dataset import LeRobotDataset -from lerobot.envs.configs import EnvConfig +from lerobot.envs.configs import HILSerlRobotEnvConfig from lerobot.processor import ( DeviceProcessor, ImageProcessor, @@ -63,6 +64,23 @@ from lerobot.utils.utils import log_say logging.basicConfig(level=logging.INFO) +@dataclass +class DatasetConfig: + repo_id: str + dataset_root: str + task: str + num_episodes: int + episode: int + push_to_hub: bool + + +@dataclass +class GymManipulatorConfig: + env: HILSerlRobotEnvConfig + dataset: DatasetConfig + mode: str | None = None # Either "record", "replay", None + + def create_transition( observation=None, action=None, reward=0.0, done=False, truncated=False, info=None, complementary_data=None ): @@ -287,7 +305,7 @@ class RobotEnv(gym.Env): self.robot.disconnect() -def make_robot_env(cfg: EnvConfig) -> tuple[gym.Env, Any]: +def make_robot_env(cfg: HILSerlRobotEnvConfig) -> tuple[gym.Env, Any]: """ Factory function to create a robot environment. @@ -317,7 +335,7 @@ def make_robot_env(cfg: EnvConfig) -> tuple[gym.Env, Any]: return env, teleop_device -def make_processors(env, cfg): +def make_processors(env: RobotEnv, cfg: HILSerlRobotEnvConfig): """ Factory function to create environment and action processors. @@ -331,13 +349,13 @@ def make_processors(env, cfg): env_pipeline_steps = [ ImageProcessor(), StateProcessor(), - JointVelocityProcessor(dt=1.0 / cfg.dataset.fps), + JointVelocityProcessor(dt=1.0 / cfg.fps), MotorCurrentProcessor(env=env), ImageCropResizeProcessor( crop_params_dict=cfg.processor.image_preprocessing.crop_params_dict, resize_size=cfg.processor.image_preprocessing.resize_size, ), - TimeLimitProcessor(max_episode_steps=int(cfg.processor.reset.control_time_s * cfg.dataset.fps)), + TimeLimitProcessor(max_episode_steps=int(cfg.processor.reset.control_time_s * cfg.fps)), ] if cfg.processor.gripper.use_gripper: env_pipeline_steps.append( @@ -355,7 +373,6 @@ def make_processors(env, cfg): device=cfg.device, success_threshold=cfg.processor.reward_classifier.success_threshold, success_reward=cfg.processor.reward_classifier.success_reward, - terminate_on_success=cfg.processor.reward_classifier.terminate_on_success, ) ) @@ -450,10 +467,10 @@ def step_env_and_process_transition( return new_transition, terminate_episode -def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvConfig): - dt = 1.0 / cfg.dataset.fps +def control_loop(env, env_processor, action_processor, teleop_device, cfg: GymManipulatorConfig): + dt = 1.0 / cfg.env.fps - print(f"Starting control loop at {cfg.dataset.fps} FPS") + print(f"Starting control loop at {cfg.env.fps} FPS") print("Controls:") print("- Use gamepad/teleop device for intervention") print("- When not intervening, robot will stay still") @@ -476,7 +493,7 @@ def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvCo "next.reward": {"dtype": "float32", "shape": (1,), "names": None}, "next.done": {"dtype": "bool", "shape": (1,), "names": None}, } - if cfg.processor.gripper.use_gripper: + if cfg.env.processor.gripper.use_gripper: features["complementary_info.discrete_penalty"] = { "dtype": "float32", "shape": (1,), @@ -500,7 +517,7 @@ def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvCo # Create dataset dataset = LeRobotDataset.create( cfg.dataset.repo_id, - cfg.dataset.fps, + cfg.env.fps, root=cfg.dataset.dataset_root, use_videos=True, image_writer_threads=4, @@ -517,7 +534,7 @@ def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvCo # Create a neutral action (no movement) neutral_action = torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32) - if cfg.processor.gripper.use_gripper: + if cfg.env.processor.gripper.use_gripper: neutral_action = torch.cat([neutral_action, torch.tensor([1.0])]) # Gripper stay # Use the new step function @@ -540,7 +557,7 @@ def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvCo "next.reward": np.array([transition[TransitionKey.REWARD]], dtype=np.float32), "next.done": np.array([terminated or truncated], dtype=bool), } - if cfg.processor.gripper.use_gripper: + if cfg.env.processor.gripper.use_gripper: frame["complementary_info.discrete_penalty"] = np.array( [transition[TransitionKey.COMPLEMENTARY_DATA]["discrete_penalty"]], dtype=np.float32 ) @@ -583,7 +600,7 @@ def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvCo dataset.push_to_hub() -def replay_trajectory(env, action_processor, cfg): +def replay_trajectory(env, action_processor, cfg: GymManipulatorConfig): dataset = LeRobotDataset( cfg.dataset.repo_id, root=cfg.dataset.dataset_root, @@ -600,13 +617,13 @@ def replay_trajectory(env, action_processor, cfg): ) transition = action_processor(transition) env.step(transition[TransitionKey.ACTION]) - busy_wait(1 / cfg.dataset.fps - (time.perf_counter() - start_time)) + busy_wait(1 / cfg.env.fps - (time.perf_counter() - start_time)) @parser.wrap() -def main(cfg: EnvConfig): - env, teleop_device = make_robot_env(cfg) - env_processor, action_processor = make_processors(env, cfg) +def main(cfg: GymManipulatorConfig): + env, teleop_device = make_robot_env(cfg.env) + env_processor, action_processor = make_processors(env, cfg.env) print("Environment observation space:", env.observation_space) print("Environment action space:", env.action_space)