mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-22 03:59:42 +00:00
Fix gym-hil integration with the new LeRobot pipeline. (#2482)
* Add GymHILAdapterProcessorStep for gym-hil environment integration * Fix action features in control loop for None teleop device with gym-hil * Finalize dataset before pushing to hub for visualization on the hub * Fix neutral action for gripper * fix pre-commit
This commit is contained in:
@@ -44,6 +44,7 @@ from .hil_processor import (
|
|||||||
AddTeleopActionAsComplimentaryDataStep,
|
AddTeleopActionAsComplimentaryDataStep,
|
||||||
AddTeleopEventsAsInfoStep,
|
AddTeleopEventsAsInfoStep,
|
||||||
GripperPenaltyProcessorStep,
|
GripperPenaltyProcessorStep,
|
||||||
|
GymHILAdapterProcessorStep,
|
||||||
ImageCropResizeProcessorStep,
|
ImageCropResizeProcessorStep,
|
||||||
InterventionActionProcessorStep,
|
InterventionActionProcessorStep,
|
||||||
RewardClassifierProcessorStep,
|
RewardClassifierProcessorStep,
|
||||||
@@ -87,6 +88,7 @@ __all__ = [
|
|||||||
"DoneProcessorStep",
|
"DoneProcessorStep",
|
||||||
"EnvAction",
|
"EnvAction",
|
||||||
"EnvTransition",
|
"EnvTransition",
|
||||||
|
"GymHILAdapterProcessorStep",
|
||||||
"GripperPenaltyProcessorStep",
|
"GripperPenaltyProcessorStep",
|
||||||
"hotswap_stats",
|
"hotswap_stats",
|
||||||
"IdentityProcessorStep",
|
"IdentityProcessorStep",
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ from lerobot.configs.types import PipelineFeatureType, PolicyFeature
|
|||||||
|
|
||||||
from .converters import to_tensor
|
from .converters import to_tensor
|
||||||
from .core import EnvAction, EnvTransition, PolicyAction
|
from .core import EnvAction, EnvTransition, PolicyAction
|
||||||
|
from .hil_processor import TELEOP_ACTION_KEY
|
||||||
from .pipeline import ActionProcessorStep, ProcessorStep, ProcessorStepRegistry
|
from .pipeline import ActionProcessorStep, ProcessorStep, ProcessorStepRegistry
|
||||||
|
|
||||||
|
|
||||||
@@ -89,6 +90,13 @@ class Numpy2TorchActionProcessorStep(ProcessorStep):
|
|||||||
torch_action = to_tensor(action, dtype=None) # Preserve original dtype
|
torch_action = to_tensor(action, dtype=None) # Preserve original dtype
|
||||||
new_transition[TransitionKey.ACTION] = torch_action
|
new_transition[TransitionKey.ACTION] = torch_action
|
||||||
|
|
||||||
|
complementary_data = new_transition.get(TransitionKey.COMPLEMENTARY_DATA, {})
|
||||||
|
if TELEOP_ACTION_KEY in complementary_data:
|
||||||
|
teleop_action = complementary_data[TELEOP_ACTION_KEY]
|
||||||
|
if isinstance(teleop_action, EnvAction):
|
||||||
|
complementary_data[TELEOP_ACTION_KEY] = to_tensor(teleop_action)
|
||||||
|
new_transition[TransitionKey.COMPLEMENTARY_DATA] = complementary_data
|
||||||
|
|
||||||
return new_transition
|
return new_transition
|
||||||
|
|
||||||
def transform_features(
|
def transform_features(
|
||||||
|
|||||||
@@ -312,6 +312,37 @@ class TimeLimitProcessorStep(TruncatedProcessorStep):
|
|||||||
return features
|
return features
|
||||||
|
|
||||||
|
|
||||||
|
@ProcessorStepRegistry.register("gym_hil_adapter_processor")
|
||||||
|
class GymHILAdapterProcessorStep(ProcessorStep):
|
||||||
|
"""
|
||||||
|
Adapts the output of the `gym-hil` environment to the format expected by `lerobot` processors.
|
||||||
|
|
||||||
|
This step normalizes the `transition` object by:
|
||||||
|
1. Copying `teleop_action` from `info` to `complementary_data`.
|
||||||
|
2. Copying `is_intervention` from `info` (using the string key) to `info` (using the enum key).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __call__(self, transition: EnvTransition) -> EnvTransition:
|
||||||
|
info = transition.get(TransitionKey.INFO, {})
|
||||||
|
complementary_data = transition.get(TransitionKey.COMPLEMENTARY_DATA, {})
|
||||||
|
|
||||||
|
if TELEOP_ACTION_KEY in info:
|
||||||
|
complementary_data[TELEOP_ACTION_KEY] = info[TELEOP_ACTION_KEY]
|
||||||
|
|
||||||
|
if "is_intervention" in info:
|
||||||
|
info[TeleopEvents.IS_INTERVENTION] = info["is_intervention"]
|
||||||
|
|
||||||
|
transition[TransitionKey.INFO] = info
|
||||||
|
transition[TransitionKey.COMPLEMENTARY_DATA] = complementary_data
|
||||||
|
|
||||||
|
return transition
|
||||||
|
|
||||||
|
def transform_features(
|
||||||
|
self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
|
||||||
|
) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
|
||||||
|
return features
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ProcessorStepRegistry.register("gripper_penalty_processor")
|
@ProcessorStepRegistry.register("gripper_penalty_processor")
|
||||||
class GripperPenaltyProcessorStep(ProcessorStep):
|
class GripperPenaltyProcessorStep(ProcessorStep):
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ from lerobot.processor import (
|
|||||||
DeviceProcessorStep,
|
DeviceProcessorStep,
|
||||||
EnvTransition,
|
EnvTransition,
|
||||||
GripperPenaltyProcessorStep,
|
GripperPenaltyProcessorStep,
|
||||||
|
GymHILAdapterProcessorStep,
|
||||||
ImageCropResizeProcessorStep,
|
ImageCropResizeProcessorStep,
|
||||||
InterventionActionProcessorStep,
|
InterventionActionProcessorStep,
|
||||||
MapDeltaActionToRobotActionStep,
|
MapDeltaActionToRobotActionStep,
|
||||||
@@ -379,6 +380,7 @@ def make_processors(
|
|||||||
]
|
]
|
||||||
|
|
||||||
env_pipeline_steps = [
|
env_pipeline_steps = [
|
||||||
|
GymHILAdapterProcessorStep(),
|
||||||
Numpy2TorchActionProcessorStep(),
|
Numpy2TorchActionProcessorStep(),
|
||||||
VanillaObservationProcessorStep(),
|
VanillaObservationProcessorStep(),
|
||||||
AddBatchDimensionProcessorStep(),
|
AddBatchDimensionProcessorStep(),
|
||||||
@@ -608,7 +610,14 @@ def control_loop(
|
|||||||
|
|
||||||
dataset = None
|
dataset = None
|
||||||
if cfg.mode == "record":
|
if cfg.mode == "record":
|
||||||
action_features = teleop_device.action_features
|
if teleop_device:
|
||||||
|
action_features = teleop_device.action_features
|
||||||
|
else:
|
||||||
|
action_features = {
|
||||||
|
"dtype": "float32",
|
||||||
|
"shape": (4,),
|
||||||
|
"names": ["delta_x", "delta_y", "delta_z", "gripper"],
|
||||||
|
}
|
||||||
features = {
|
features = {
|
||||||
ACTION: action_features,
|
ACTION: action_features,
|
||||||
REWARD: {"dtype": "float32", "shape": (1,), "names": None},
|
REWARD: {"dtype": "float32", "shape": (1,), "names": None},
|
||||||
@@ -656,7 +665,7 @@ def control_loop(
|
|||||||
# Create a neutral action (no movement)
|
# Create a neutral action (no movement)
|
||||||
neutral_action = torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32)
|
neutral_action = torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32)
|
||||||
if use_gripper:
|
if use_gripper:
|
||||||
neutral_action = torch.cat([neutral_action, torch.tensor([1.0])]) # Gripper stay
|
neutral_action = torch.cat([neutral_action, torch.tensor([0.0])]) # Gripper stay
|
||||||
|
|
||||||
# Use the new step function
|
# Use the new step function
|
||||||
transition = step_env_and_process_transition(
|
transition = step_env_and_process_transition(
|
||||||
@@ -725,6 +734,8 @@ def control_loop(
|
|||||||
precise_sleep(max(dt - (time.perf_counter() - step_start_time), 0.0))
|
precise_sleep(max(dt - (time.perf_counter() - step_start_time), 0.0))
|
||||||
|
|
||||||
if dataset is not None and cfg.dataset.push_to_hub:
|
if dataset is not None and cfg.dataset.push_to_hub:
|
||||||
|
logging.info("Finalizing dataset before pushing to hub")
|
||||||
|
dataset.finalize()
|
||||||
logging.info("Pushing dataset to hub")
|
logging.info("Pushing dataset to hub")
|
||||||
dataset.push_to_hub()
|
dataset.push_to_hub()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user