mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-18 02:00:03 +00:00
0053defa2e
* Migrate gym_manipulator to use the pipeline Added get_teleop_events function to capture relevant events from teleop devices unrelated to actions * Added the capability to record a dataset * Added the replay functionality with the pipeline * Refactored `actor.py` to use the pipeline * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * RL works at this commit - fixed actor.py and bugs in gym_manipulator * change folder structure to reduce the size of gym_manip * Refactored hilserl config * Remove dataset and mode from HilSerlEnvConfig to a GymManipulatorConfig to reduce verbose of configs during training * format docs * removed get_teleop_events from abc * Refactor environment configuration and processing pipeline for GymHIL support. Removed device attribute from HILSerlRobotEnvConfig, added DummyTeleopDevice for simulation, and updated processor creation to accommodate GymHIL environments. * Improved typing for HILRobotEnv config and GymManipulator config * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Migrated `gym_manipulator` to use a more modular structure similar to phone teleop * Refactor gripper handling and transition processing in HIL and robot kinematic processors - Updated gripper position handling to use a consistent key format across processors - Improved the EEReferenceAndDelta class to handle reference joint positions. - Added support for discrete gripper actions in the GripperVelocityToJoint processor. - Refactored the gym manipulator to improve modularity and clarity in processing steps. * Added delta_action_processor mapping wrapper * Added missing file delta_action_processor and improved imports in `gym_manipulator` * nit * Added missing file joint_observation_processor * Enhance processing architecture with new teleoperation processors - Introduced `AddTeleopActionAsComplimentaryData` and `AddTeleopEventsAsInfo` for integrating teleoperator actions and events into transitions. - Added `Torch2NumpyActionProcessor` and `Numpy2TorchActionProcessor` for seamless conversion between PyTorch tensors and NumPy arrays. - Updated `__init__.py` to include new processors in module exports, improving modularity and clarity in the processing pipeline. - GymHIL is now fully supported with HIL using the pipeline * Refactor configuration structure for gym_hil integration - Renamed sections for better readability, such as changing "Gym Wrappers Configuration" to "Processor Configuration." - Enhanced documentation with clear examples for dataset collection and policy evaluation configurations. * Enhance reset configuration and teleoperation event handling - Added `terminate_on_success` parameter to `ResetConfig` and `InterventionActionProcessor` for controlling episode termination behavior upon success detection. - Updated documentation to clarify the impact of `terminate_on_success` on data collection for reward classifier training. - Refactored teleoperation event handling to use `TeleopEvents` constants for improved readability and maintainability across various modules. * fix(keyboard teleop), delta action keys * Added transform features and feature contract * Added transform features for image crop * Enum for TeleopEvents * Update tranform_features delta action proc --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
245 lines
7.5 KiB
Python
245 lines
7.5 KiB
Python
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import abc
|
|
from dataclasses import dataclass, field
|
|
from typing import Any
|
|
|
|
import draccus
|
|
|
|
from lerobot.configs.types import FeatureType, PolicyFeature
|
|
from lerobot.constants import ACTION, OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE
|
|
from lerobot.robots import RobotConfig
|
|
from lerobot.teleoperators.config import TeleoperatorConfig
|
|
|
|
|
|
@dataclass
|
|
class EnvConfig(draccus.ChoiceRegistry, abc.ABC):
|
|
task: str | None = None
|
|
fps: int = 30
|
|
features: dict[str, PolicyFeature] = field(default_factory=dict)
|
|
features_map: dict[str, str] = field(default_factory=dict)
|
|
|
|
@property
|
|
def type(self) -> str:
|
|
return self.get_choice_name(self.__class__)
|
|
|
|
@property
|
|
@abc.abstractmethod
|
|
def gym_kwargs(self) -> dict:
|
|
raise NotImplementedError()
|
|
|
|
|
|
@EnvConfig.register_subclass("aloha")
|
|
@dataclass
|
|
class AlohaEnv(EnvConfig):
|
|
task: str | None = "AlohaInsertion-v0"
|
|
fps: int = 50
|
|
episode_length: int = 400
|
|
obs_type: str = "pixels_agent_pos"
|
|
render_mode: str = "rgb_array"
|
|
features: dict[str, PolicyFeature] = field(
|
|
default_factory=lambda: {
|
|
"action": PolicyFeature(type=FeatureType.ACTION, shape=(14,)),
|
|
}
|
|
)
|
|
features_map: dict[str, str] = field(
|
|
default_factory=lambda: {
|
|
"action": ACTION,
|
|
"agent_pos": OBS_STATE,
|
|
"top": f"{OBS_IMAGE}.top",
|
|
"pixels/top": f"{OBS_IMAGES}.top",
|
|
}
|
|
)
|
|
|
|
def __post_init__(self):
|
|
if self.obs_type == "pixels":
|
|
self.features["top"] = PolicyFeature(type=FeatureType.VISUAL, shape=(480, 640, 3))
|
|
elif self.obs_type == "pixels_agent_pos":
|
|
self.features["agent_pos"] = PolicyFeature(type=FeatureType.STATE, shape=(14,))
|
|
self.features["pixels/top"] = PolicyFeature(type=FeatureType.VISUAL, shape=(480, 640, 3))
|
|
|
|
@property
|
|
def gym_kwargs(self) -> dict:
|
|
return {
|
|
"obs_type": self.obs_type,
|
|
"render_mode": self.render_mode,
|
|
"max_episode_steps": self.episode_length,
|
|
}
|
|
|
|
|
|
@EnvConfig.register_subclass("pusht")
|
|
@dataclass
|
|
class PushtEnv(EnvConfig):
|
|
task: str | None = "PushT-v0"
|
|
fps: int = 10
|
|
episode_length: int = 300
|
|
obs_type: str = "pixels_agent_pos"
|
|
render_mode: str = "rgb_array"
|
|
visualization_width: int = 384
|
|
visualization_height: int = 384
|
|
features: dict[str, PolicyFeature] = field(
|
|
default_factory=lambda: {
|
|
"action": PolicyFeature(type=FeatureType.ACTION, shape=(2,)),
|
|
"agent_pos": PolicyFeature(type=FeatureType.STATE, shape=(2,)),
|
|
}
|
|
)
|
|
features_map: dict[str, str] = field(
|
|
default_factory=lambda: {
|
|
"action": ACTION,
|
|
"agent_pos": OBS_STATE,
|
|
"environment_state": OBS_ENV_STATE,
|
|
"pixels": OBS_IMAGE,
|
|
}
|
|
)
|
|
|
|
def __post_init__(self):
|
|
if self.obs_type == "pixels_agent_pos":
|
|
self.features["pixels"] = PolicyFeature(type=FeatureType.VISUAL, shape=(384, 384, 3))
|
|
elif self.obs_type == "environment_state_agent_pos":
|
|
self.features["environment_state"] = PolicyFeature(type=FeatureType.ENV, shape=(16,))
|
|
|
|
@property
|
|
def gym_kwargs(self) -> dict:
|
|
return {
|
|
"obs_type": self.obs_type,
|
|
"render_mode": self.render_mode,
|
|
"visualization_width": self.visualization_width,
|
|
"visualization_height": self.visualization_height,
|
|
"max_episode_steps": self.episode_length,
|
|
}
|
|
|
|
|
|
@EnvConfig.register_subclass("xarm")
|
|
@dataclass
|
|
class XarmEnv(EnvConfig):
|
|
task: str | None = "XarmLift-v0"
|
|
fps: int = 15
|
|
episode_length: int = 200
|
|
obs_type: str = "pixels_agent_pos"
|
|
render_mode: str = "rgb_array"
|
|
visualization_width: int = 384
|
|
visualization_height: int = 384
|
|
features: dict[str, PolicyFeature] = field(
|
|
default_factory=lambda: {
|
|
"action": PolicyFeature(type=FeatureType.ACTION, shape=(4,)),
|
|
"pixels": PolicyFeature(type=FeatureType.VISUAL, shape=(84, 84, 3)),
|
|
}
|
|
)
|
|
features_map: dict[str, str] = field(
|
|
default_factory=lambda: {
|
|
"action": ACTION,
|
|
"agent_pos": OBS_STATE,
|
|
"pixels": OBS_IMAGE,
|
|
}
|
|
)
|
|
|
|
def __post_init__(self):
|
|
if self.obs_type == "pixels_agent_pos":
|
|
self.features["agent_pos"] = PolicyFeature(type=FeatureType.STATE, shape=(4,))
|
|
|
|
@property
|
|
def gym_kwargs(self) -> dict:
|
|
return {
|
|
"obs_type": self.obs_type,
|
|
"render_mode": self.render_mode,
|
|
"visualization_width": self.visualization_width,
|
|
"visualization_height": self.visualization_height,
|
|
"max_episode_steps": self.episode_length,
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class ImagePreprocessingConfig:
|
|
crop_params_dict: dict[str, tuple[int, int, int, int]] | None = None
|
|
resize_size: tuple[int, int] | None = None
|
|
|
|
|
|
@dataclass
|
|
class RewardClassifierConfig:
|
|
"""Configuration for reward classification."""
|
|
|
|
pretrained_path: str | None = None
|
|
success_threshold: float = 0.5
|
|
success_reward: float = 1.0
|
|
|
|
|
|
@dataclass
|
|
class InverseKinematicsConfig:
|
|
"""Configuration for inverse kinematics processing."""
|
|
|
|
urdf_path: str | None = None
|
|
target_frame_name: str | None = None
|
|
end_effector_bounds: dict[str, list[float]] | None = None
|
|
end_effector_step_sizes: dict[str, float] | None = None
|
|
|
|
|
|
@dataclass
|
|
class ObservationConfig:
|
|
"""Configuration for observation processing."""
|
|
|
|
add_joint_velocity_to_observation: bool = False
|
|
add_current_to_observation: bool = False
|
|
add_ee_pose_to_observation: bool = False
|
|
display_cameras: bool = False
|
|
|
|
|
|
@dataclass
|
|
class GripperConfig:
|
|
"""Configuration for gripper control and penalties."""
|
|
|
|
use_gripper: bool = True
|
|
gripper_penalty: float = 0.0
|
|
gripper_penalty_in_reward: bool = False
|
|
|
|
|
|
@dataclass
|
|
class ResetConfig:
|
|
"""Configuration for environment reset behavior."""
|
|
|
|
fixed_reset_joint_positions: Any | None = None
|
|
reset_time_s: float = 5.0
|
|
control_time_s: float = 20.0
|
|
terminate_on_success: bool = True
|
|
|
|
|
|
@dataclass
|
|
class HILSerlProcessorConfig:
|
|
"""Configuration for environment processing pipeline."""
|
|
|
|
control_mode: str = "gamepad"
|
|
observation: ObservationConfig | None = None
|
|
image_preprocessing: ImagePreprocessingConfig | None = None
|
|
gripper: GripperConfig | None = None
|
|
reset: ResetConfig | None = None
|
|
inverse_kinematics: InverseKinematicsConfig | None = None
|
|
reward_classifier: RewardClassifierConfig | None = None
|
|
max_gripper_pos: float | None = 100.0
|
|
|
|
|
|
@EnvConfig.register_subclass(name="gym_manipulator")
|
|
@dataclass
|
|
class HILSerlRobotEnvConfig(EnvConfig):
|
|
"""Configuration for the HILSerlRobotEnv environment."""
|
|
|
|
robot: RobotConfig | None = None
|
|
teleop: TeleoperatorConfig | None = None
|
|
processor: HILSerlProcessorConfig = field(default_factory=HILSerlProcessorConfig)
|
|
|
|
name: str = "real_robot"
|
|
|
|
@property
|
|
def gym_kwargs(self) -> dict:
|
|
return {}
|