mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-15 08:39:49 +00:00
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
This commit is contained in:
@@ -62,8 +62,14 @@ from lerobot.configs import parser
|
||||
from lerobot.configs.train import TrainRLServerPipelineConfig
|
||||
from lerobot.policies.factory import make_policy
|
||||
from lerobot.policies.sac.modeling_sac import SACPolicy
|
||||
from lerobot.processor.pipeline import TransitionKey
|
||||
from lerobot.robots import so100_follower # noqa: F401
|
||||
from lerobot.scripts.rl.gym_manipulator import make_robot_env
|
||||
from lerobot.scripts.rl.gym_manipulator import (
|
||||
create_transition,
|
||||
make_processors,
|
||||
make_robot_env,
|
||||
step_env_and_process_transition,
|
||||
)
|
||||
from lerobot.teleoperators import gamepad, so101_leader # noqa: F401
|
||||
from lerobot.transport import services_pb2, services_pb2_grpc
|
||||
from lerobot.transport.utils import (
|
||||
@@ -83,13 +89,6 @@ from lerobot.utils.transition import (
|
||||
move_state_dict_to_device,
|
||||
move_transition_to_device,
|
||||
)
|
||||
|
||||
from lerobot.processor.pipeline import EnvTransition, TransitionKey
|
||||
from lerobot.scripts.rl.gym_manipulator import (
|
||||
create_transition,
|
||||
make_processors,
|
||||
step_env_and_process_transition,
|
||||
)
|
||||
from lerobot.utils.utils import (
|
||||
TimerManager,
|
||||
get_safe_torch_device,
|
||||
@@ -311,13 +310,13 @@ def act_with_policy(
|
||||
env_processor=env_processor,
|
||||
action_processor=action_processor,
|
||||
)
|
||||
|
||||
|
||||
# Extract values from processed transition
|
||||
reward = new_transition[TransitionKey.REWARD]
|
||||
done = new_transition.get(TransitionKey.DONE, False)
|
||||
truncated = new_transition.get(TransitionKey.TRUNCATED, False)
|
||||
processed_action = new_transition[TransitionKey.ACTION]
|
||||
|
||||
|
||||
sum_reward_episode += float(reward)
|
||||
episode_total_steps += 1
|
||||
|
||||
@@ -381,7 +380,7 @@ def act_with_policy(
|
||||
episode_intervention = False
|
||||
episode_intervention_steps = 0
|
||||
episode_total_steps = 0
|
||||
|
||||
|
||||
# Reset environment and processors
|
||||
obs, info = online_env.reset()
|
||||
complementary_data = {"raw_joint_positions": info.pop("raw_joint_positions")}
|
||||
|
||||
@@ -783,11 +783,11 @@ def make_robot_env(cfg: EnvConfig) -> tuple[gym.Env, Any]:
|
||||
def make_processors(env, cfg):
|
||||
"""
|
||||
Factory function to create environment and action processors.
|
||||
|
||||
|
||||
Args:
|
||||
env: The robot environment
|
||||
cfg: Configuration object containing processor parameters
|
||||
|
||||
|
||||
Returns:
|
||||
tuple: (env_processor, action_processor)
|
||||
"""
|
||||
@@ -797,13 +797,11 @@ def make_processors(env, cfg):
|
||||
JointVelocityProcessor(dt=1.0 / cfg.fps),
|
||||
MotorCurrentProcessor(env=env),
|
||||
ImageCropResizeProcessor(
|
||||
crop_params_dict=cfg.processor.crop_params_dict,
|
||||
resize_size=cfg.processor.resize_size
|
||||
crop_params_dict=cfg.processor.crop_params_dict, resize_size=cfg.processor.resize_size
|
||||
),
|
||||
TimeLimitProcessor(max_episode_steps=int(cfg.processor.control_time_s * cfg.fps)),
|
||||
GripperPenaltyProcessor(
|
||||
penalty=cfg.processor.gripper_penalty,
|
||||
max_gripper_pos=cfg.processor.max_gripper_pos
|
||||
penalty=cfg.processor.gripper_penalty, max_gripper_pos=cfg.processor.max_gripper_pos
|
||||
),
|
||||
DeviceProcessor(device=cfg.device),
|
||||
]
|
||||
@@ -823,21 +821,21 @@ def make_processors(env, cfg):
|
||||
),
|
||||
]
|
||||
action_processor = RobotProcessor(steps=action_pipeline_steps)
|
||||
|
||||
|
||||
return env_processor, action_processor
|
||||
|
||||
|
||||
def step_env_and_process_transition(
|
||||
env,
|
||||
transition,
|
||||
action,
|
||||
teleop_device,
|
||||
env_processor,
|
||||
action_processor,
|
||||
env,
|
||||
transition,
|
||||
action,
|
||||
teleop_device,
|
||||
env_processor,
|
||||
action_processor,
|
||||
):
|
||||
"""
|
||||
Execute one step with processors handling intervention and observation processing.
|
||||
|
||||
|
||||
Args:
|
||||
env: The robot environment
|
||||
transition: Current transition state
|
||||
@@ -845,7 +843,7 @@ def step_env_and_process_transition(
|
||||
teleop_device: Teleoperator device for getting intervention signals
|
||||
env_processor: Environment processor for observations
|
||||
action_processor: Action processor for handling interventions
|
||||
|
||||
|
||||
Returns:
|
||||
tuple: (new_transition, terminate_episode)
|
||||
"""
|
||||
@@ -893,7 +891,7 @@ def step_env_and_process_transition(
|
||||
complementary_data=complementary_data,
|
||||
)
|
||||
new_transition = env_processor(new_transition)
|
||||
|
||||
|
||||
return new_transition, terminate_episode
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user