mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-20 19:19:56 +00:00
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
This commit is contained in:
@@ -62,8 +62,14 @@ from lerobot.configs import parser
|
|||||||
from lerobot.configs.train import TrainRLServerPipelineConfig
|
from lerobot.configs.train import TrainRLServerPipelineConfig
|
||||||
from lerobot.policies.factory import make_policy
|
from lerobot.policies.factory import make_policy
|
||||||
from lerobot.policies.sac.modeling_sac import SACPolicy
|
from lerobot.policies.sac.modeling_sac import SACPolicy
|
||||||
|
from lerobot.processor.pipeline import TransitionKey
|
||||||
from lerobot.robots import so100_follower # noqa: F401
|
from lerobot.robots import so100_follower # noqa: F401
|
||||||
from lerobot.scripts.rl.gym_manipulator import make_robot_env
|
from lerobot.scripts.rl.gym_manipulator import (
|
||||||
|
create_transition,
|
||||||
|
make_processors,
|
||||||
|
make_robot_env,
|
||||||
|
step_env_and_process_transition,
|
||||||
|
)
|
||||||
from lerobot.teleoperators import gamepad, so101_leader # noqa: F401
|
from lerobot.teleoperators import gamepad, so101_leader # noqa: F401
|
||||||
from lerobot.transport import services_pb2, services_pb2_grpc
|
from lerobot.transport import services_pb2, services_pb2_grpc
|
||||||
from lerobot.transport.utils import (
|
from lerobot.transport.utils import (
|
||||||
@@ -83,13 +89,6 @@ from lerobot.utils.transition import (
|
|||||||
move_state_dict_to_device,
|
move_state_dict_to_device,
|
||||||
move_transition_to_device,
|
move_transition_to_device,
|
||||||
)
|
)
|
||||||
|
|
||||||
from lerobot.processor.pipeline import EnvTransition, TransitionKey
|
|
||||||
from lerobot.scripts.rl.gym_manipulator import (
|
|
||||||
create_transition,
|
|
||||||
make_processors,
|
|
||||||
step_env_and_process_transition,
|
|
||||||
)
|
|
||||||
from lerobot.utils.utils import (
|
from lerobot.utils.utils import (
|
||||||
TimerManager,
|
TimerManager,
|
||||||
get_safe_torch_device,
|
get_safe_torch_device,
|
||||||
@@ -311,13 +310,13 @@ def act_with_policy(
|
|||||||
env_processor=env_processor,
|
env_processor=env_processor,
|
||||||
action_processor=action_processor,
|
action_processor=action_processor,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extract values from processed transition
|
# Extract values from processed transition
|
||||||
reward = new_transition[TransitionKey.REWARD]
|
reward = new_transition[TransitionKey.REWARD]
|
||||||
done = new_transition.get(TransitionKey.DONE, False)
|
done = new_transition.get(TransitionKey.DONE, False)
|
||||||
truncated = new_transition.get(TransitionKey.TRUNCATED, False)
|
truncated = new_transition.get(TransitionKey.TRUNCATED, False)
|
||||||
processed_action = new_transition[TransitionKey.ACTION]
|
processed_action = new_transition[TransitionKey.ACTION]
|
||||||
|
|
||||||
sum_reward_episode += float(reward)
|
sum_reward_episode += float(reward)
|
||||||
episode_total_steps += 1
|
episode_total_steps += 1
|
||||||
|
|
||||||
@@ -381,7 +380,7 @@ def act_with_policy(
|
|||||||
episode_intervention = False
|
episode_intervention = False
|
||||||
episode_intervention_steps = 0
|
episode_intervention_steps = 0
|
||||||
episode_total_steps = 0
|
episode_total_steps = 0
|
||||||
|
|
||||||
# Reset environment and processors
|
# Reset environment and processors
|
||||||
obs, info = online_env.reset()
|
obs, info = online_env.reset()
|
||||||
complementary_data = {"raw_joint_positions": info.pop("raw_joint_positions")}
|
complementary_data = {"raw_joint_positions": info.pop("raw_joint_positions")}
|
||||||
|
|||||||
@@ -783,11 +783,11 @@ def make_robot_env(cfg: EnvConfig) -> tuple[gym.Env, Any]:
|
|||||||
def make_processors(env, cfg):
|
def make_processors(env, cfg):
|
||||||
"""
|
"""
|
||||||
Factory function to create environment and action processors.
|
Factory function to create environment and action processors.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
env: The robot environment
|
env: The robot environment
|
||||||
cfg: Configuration object containing processor parameters
|
cfg: Configuration object containing processor parameters
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tuple: (env_processor, action_processor)
|
tuple: (env_processor, action_processor)
|
||||||
"""
|
"""
|
||||||
@@ -797,13 +797,11 @@ def make_processors(env, cfg):
|
|||||||
JointVelocityProcessor(dt=1.0 / cfg.fps),
|
JointVelocityProcessor(dt=1.0 / cfg.fps),
|
||||||
MotorCurrentProcessor(env=env),
|
MotorCurrentProcessor(env=env),
|
||||||
ImageCropResizeProcessor(
|
ImageCropResizeProcessor(
|
||||||
crop_params_dict=cfg.processor.crop_params_dict,
|
crop_params_dict=cfg.processor.crop_params_dict, resize_size=cfg.processor.resize_size
|
||||||
resize_size=cfg.processor.resize_size
|
|
||||||
),
|
),
|
||||||
TimeLimitProcessor(max_episode_steps=int(cfg.processor.control_time_s * cfg.fps)),
|
TimeLimitProcessor(max_episode_steps=int(cfg.processor.control_time_s * cfg.fps)),
|
||||||
GripperPenaltyProcessor(
|
GripperPenaltyProcessor(
|
||||||
penalty=cfg.processor.gripper_penalty,
|
penalty=cfg.processor.gripper_penalty, max_gripper_pos=cfg.processor.max_gripper_pos
|
||||||
max_gripper_pos=cfg.processor.max_gripper_pos
|
|
||||||
),
|
),
|
||||||
DeviceProcessor(device=cfg.device),
|
DeviceProcessor(device=cfg.device),
|
||||||
]
|
]
|
||||||
@@ -823,21 +821,21 @@ def make_processors(env, cfg):
|
|||||||
),
|
),
|
||||||
]
|
]
|
||||||
action_processor = RobotProcessor(steps=action_pipeline_steps)
|
action_processor = RobotProcessor(steps=action_pipeline_steps)
|
||||||
|
|
||||||
return env_processor, action_processor
|
return env_processor, action_processor
|
||||||
|
|
||||||
|
|
||||||
def step_env_and_process_transition(
|
def step_env_and_process_transition(
|
||||||
env,
|
env,
|
||||||
transition,
|
transition,
|
||||||
action,
|
action,
|
||||||
teleop_device,
|
teleop_device,
|
||||||
env_processor,
|
env_processor,
|
||||||
action_processor,
|
action_processor,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Execute one step with processors handling intervention and observation processing.
|
Execute one step with processors handling intervention and observation processing.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
env: The robot environment
|
env: The robot environment
|
||||||
transition: Current transition state
|
transition: Current transition state
|
||||||
@@ -845,7 +843,7 @@ def step_env_and_process_transition(
|
|||||||
teleop_device: Teleoperator device for getting intervention signals
|
teleop_device: Teleoperator device for getting intervention signals
|
||||||
env_processor: Environment processor for observations
|
env_processor: Environment processor for observations
|
||||||
action_processor: Action processor for handling interventions
|
action_processor: Action processor for handling interventions
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tuple: (new_transition, terminate_episode)
|
tuple: (new_transition, terminate_episode)
|
||||||
"""
|
"""
|
||||||
@@ -893,7 +891,7 @@ def step_env_and_process_transition(
|
|||||||
complementary_data=complementary_data,
|
complementary_data=complementary_data,
|
||||||
)
|
)
|
||||||
new_transition = env_processor(new_transition)
|
new_transition = env_processor(new_transition)
|
||||||
|
|
||||||
return new_transition, terminate_episode
|
return new_transition, terminate_episode
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user