From ff38a51df901e782e892d81fda845ad35cefa780 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 2 Aug 2025 17:07:19 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/lerobot/scripts/rl/actor.py | 21 ++++++++-------- src/lerobot/scripts/rl/gym_manipulator.py | 30 +++++++++++------------ 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/src/lerobot/scripts/rl/actor.py b/src/lerobot/scripts/rl/actor.py index fae3be753..d2331166e 100644 --- a/src/lerobot/scripts/rl/actor.py +++ b/src/lerobot/scripts/rl/actor.py @@ -62,8 +62,14 @@ from lerobot.configs import parser from lerobot.configs.train import TrainRLServerPipelineConfig from lerobot.policies.factory import make_policy from lerobot.policies.sac.modeling_sac import SACPolicy +from lerobot.processor.pipeline import TransitionKey from lerobot.robots import so100_follower # noqa: F401 -from lerobot.scripts.rl.gym_manipulator import make_robot_env +from lerobot.scripts.rl.gym_manipulator import ( + create_transition, + make_processors, + make_robot_env, + step_env_and_process_transition, +) from lerobot.teleoperators import gamepad, so101_leader # noqa: F401 from lerobot.transport import services_pb2, services_pb2_grpc from lerobot.transport.utils import ( @@ -83,13 +89,6 @@ from lerobot.utils.transition import ( move_state_dict_to_device, move_transition_to_device, ) - -from lerobot.processor.pipeline import EnvTransition, TransitionKey -from lerobot.scripts.rl.gym_manipulator import ( - create_transition, - make_processors, - step_env_and_process_transition, -) from lerobot.utils.utils import ( TimerManager, get_safe_torch_device, @@ -311,13 +310,13 @@ def act_with_policy( env_processor=env_processor, action_processor=action_processor, ) - + # Extract values from processed transition reward = new_transition[TransitionKey.REWARD] done = new_transition.get(TransitionKey.DONE, False) truncated = new_transition.get(TransitionKey.TRUNCATED, False) processed_action = new_transition[TransitionKey.ACTION] - + sum_reward_episode += float(reward) episode_total_steps += 1 @@ -381,7 +380,7 @@ def act_with_policy( episode_intervention = False episode_intervention_steps = 0 episode_total_steps = 0 - + # Reset environment and processors obs, info = online_env.reset() complementary_data = {"raw_joint_positions": info.pop("raw_joint_positions")} diff --git a/src/lerobot/scripts/rl/gym_manipulator.py b/src/lerobot/scripts/rl/gym_manipulator.py index a102e3ff2..29401a9ed 100644 --- a/src/lerobot/scripts/rl/gym_manipulator.py +++ b/src/lerobot/scripts/rl/gym_manipulator.py @@ -783,11 +783,11 @@ def make_robot_env(cfg: EnvConfig) -> tuple[gym.Env, Any]: def make_processors(env, cfg): """ Factory function to create environment and action processors. - + Args: env: The robot environment cfg: Configuration object containing processor parameters - + Returns: tuple: (env_processor, action_processor) """ @@ -797,13 +797,11 @@ def make_processors(env, cfg): JointVelocityProcessor(dt=1.0 / cfg.fps), MotorCurrentProcessor(env=env), ImageCropResizeProcessor( - crop_params_dict=cfg.processor.crop_params_dict, - resize_size=cfg.processor.resize_size + crop_params_dict=cfg.processor.crop_params_dict, resize_size=cfg.processor.resize_size ), TimeLimitProcessor(max_episode_steps=int(cfg.processor.control_time_s * cfg.fps)), GripperPenaltyProcessor( - penalty=cfg.processor.gripper_penalty, - max_gripper_pos=cfg.processor.max_gripper_pos + penalty=cfg.processor.gripper_penalty, max_gripper_pos=cfg.processor.max_gripper_pos ), DeviceProcessor(device=cfg.device), ] @@ -823,21 +821,21 @@ def make_processors(env, cfg): ), ] action_processor = RobotProcessor(steps=action_pipeline_steps) - + return env_processor, action_processor def step_env_and_process_transition( - env, - transition, - action, - teleop_device, - env_processor, - action_processor, + env, + transition, + action, + teleop_device, + env_processor, + action_processor, ): """ Execute one step with processors handling intervention and observation processing. - + Args: env: The robot environment transition: Current transition state @@ -845,7 +843,7 @@ def step_env_and_process_transition( teleop_device: Teleoperator device for getting intervention signals env_processor: Environment processor for observations action_processor: Action processor for handling interventions - + Returns: tuple: (new_transition, terminate_episode) """ @@ -893,7 +891,7 @@ def step_env_and_process_transition( complementary_data=complementary_data, ) new_transition = env_processor(new_transition) - + return new_transition, terminate_episode