mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-23 20:50:02 +00:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4c666599b4 | |||
| 8d9f137f70 | |||
| 375f23559e | |||
| 7a62ea21d8 | |||
| fdbfc015a2 | |||
| d656da8ccc |
@@ -514,7 +514,7 @@ def make_policy(
|
||||
|
||||
logging.info("Loading policy's PEFT adapter.")
|
||||
|
||||
peft_pretrained_path = cfg.pretrained_path
|
||||
peft_pretrained_path = str(cfg.pretrained_path)
|
||||
peft_config = PeftConfig.from_pretrained(peft_pretrained_path)
|
||||
|
||||
kwargs["pretrained_name_or_path"] = peft_config.base_model_name_or_path
|
||||
@@ -527,7 +527,9 @@ def make_policy(
|
||||
)
|
||||
|
||||
policy = policy_cls.from_pretrained(**kwargs)
|
||||
policy = PeftModel.from_pretrained(policy, peft_pretrained_path, config=peft_config)
|
||||
policy = PeftModel.from_pretrained(
|
||||
policy, peft_pretrained_path, config=peft_config, is_trainable=True
|
||||
)
|
||||
|
||||
else:
|
||||
# Make a fresh policy.
|
||||
|
||||
@@ -748,16 +748,8 @@ class PI0Pytorch(nn.Module): # see openpi `PI0Pytorch`
|
||||
|
||||
return embs, pad_masks, att_masks, adarms_cond
|
||||
|
||||
def forward(
|
||||
self, images, img_masks, lang_tokens, lang_masks, state, actions, noise=None, time=None
|
||||
) -> Tensor:
|
||||
def forward(self, images, img_masks, lang_tokens, lang_masks, state, actions, noise, time) -> Tensor:
|
||||
"""Do a full training forward pass and compute the loss."""
|
||||
if noise is None:
|
||||
noise = self.sample_noise(actions.shape, actions.device)
|
||||
|
||||
if time is None:
|
||||
time = self.sample_time(actions.shape[0], actions.device)
|
||||
|
||||
time_expanded = time[:, None, None]
|
||||
x_t = time_expanded * noise + (1 - time_expanded) * actions
|
||||
u_t = noise - actions
|
||||
@@ -1292,8 +1284,11 @@ class PI0Policy(PreTrainedPolicy):
|
||||
state = self.prepare_state(batch)
|
||||
actions = self.prepare_action(batch)
|
||||
|
||||
noise = self.model.sample_noise(actions.shape, actions.device)
|
||||
time = self.model.sample_time(actions.shape[0], actions.device)
|
||||
|
||||
# Compute loss
|
||||
losses = self.model.forward(images, img_masks, lang_tokens, lang_masks, state, actions)
|
||||
losses = self.model.forward(images, img_masks, lang_tokens, lang_masks, state, actions, noise, time)
|
||||
|
||||
# Truncate losses to actual action dimensions
|
||||
original_action_dim = self.config.output_features[ACTION].shape[0]
|
||||
|
||||
@@ -728,14 +728,8 @@ class PI05Pytorch(nn.Module): # see openpi `PI0Pytorch`
|
||||
|
||||
return embs, pad_masks, att_masks, adarms_cond
|
||||
|
||||
def forward(self, images, img_masks, tokens, masks, actions, noise=None, time=None) -> Tensor:
|
||||
def forward(self, images, img_masks, tokens, masks, actions, noise, time) -> Tensor:
|
||||
"""Do a full training forward pass and compute the loss."""
|
||||
if noise is None:
|
||||
noise = self.sample_noise(actions.shape, actions.device)
|
||||
|
||||
if time is None:
|
||||
time = self.sample_time(actions.shape[0], actions.device)
|
||||
|
||||
time_expanded = time[:, None, None]
|
||||
x_t = time_expanded * noise + (1 - time_expanded) * actions
|
||||
u_t = noise - actions
|
||||
@@ -1262,8 +1256,11 @@ class PI05Policy(PreTrainedPolicy):
|
||||
|
||||
actions = self.prepare_action(batch)
|
||||
|
||||
noise = self.model.sample_noise(actions.shape, actions.device)
|
||||
time = self.model.sample_time(actions.shape[0], actions.device)
|
||||
|
||||
# Compute loss (no separate state needed for PI05)
|
||||
losses = self.model.forward(images, img_masks, tokens, masks, actions)
|
||||
losses = self.model.forward(images, img_masks, tokens, masks, actions, noise, time)
|
||||
|
||||
# Truncate losses to actual action dimensions
|
||||
original_action_dim = self.config.output_features[ACTION].shape[0]
|
||||
|
||||
@@ -33,12 +33,13 @@ Recording modes:
|
||||
``record_autonomous=False``: Only correction windows are recorded.
|
||||
Each correction (start to stop) becomes one episode.
|
||||
|
||||
Teleoperator expectations:
|
||||
The user is responsible for keeping the leader arm aligned with the
|
||||
follower arm at the moment a correction begins. Programmatic motor
|
||||
handover (``enable_torque`` / ``disable_torque`` / ``write_goal_positions``)
|
||||
is intentionally not invoked here — see the TODO in
|
||||
:func:`DAggerStrategy._apply_transition` for the open design decision.
|
||||
Teleoperator handover:
|
||||
On AUTONOMOUS → PAUSED, actuated teleops (those with non-empty
|
||||
``feedback_features``, e.g. SO-101, OpenArmMini) are smoothly driven to
|
||||
the follower's last position via ``send_feedback`` so the operator takes
|
||||
over without a jerk. Non-actuated teleops cannot be driven,
|
||||
so on PAUSED → CORRECTING the follower is instead slid to the teleop's
|
||||
current pose before the correction begins.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -175,17 +176,27 @@ class DAggerEvents:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
# TODO(Steven): re-enable programmatic teleop alignment once we decide whether
|
||||
# to enforce motor-control methods on every Teleoperator. Until then the user
|
||||
# is responsible for moving the leader arm to the follower's pose at the moment
|
||||
# a correction begins.
|
||||
def _teleop_smooth_move_to(
|
||||
teleop: Teleoperator, target_pos: dict, duration_s: float = 2.0, fps: int = 50
|
||||
) -> None:
|
||||
"""Smoothly move teleop to target position via linear interpolation.
|
||||
def _teleop_supports_feedback(teleop: Teleoperator) -> bool:
|
||||
"""Return True when the teleop can receive position feedback (is actuated).
|
||||
TODO(Maxime): See if it is possible to unify this interface across teleops instead of duck-typing.
|
||||
"""
|
||||
return (
|
||||
bool(teleop.feedback_features)
|
||||
and hasattr(teleop, "disable_torque")
|
||||
and hasattr(teleop, "enable_torque")
|
||||
)
|
||||
|
||||
Requires the teleoperator to support motor control methods
|
||||
(``enable_torque``, ``write_goal_positions``, ``get_action``).
|
||||
|
||||
def _teleop_smooth_move_to(
|
||||
teleop: Teleoperator, target_pos: dict, duration_s: float = 2.0, fps: int = 30
|
||||
) -> None:
|
||||
"""Smoothly move an actuated teleop to ``target_pos`` via linear interpolation.
|
||||
|
||||
Requires the teleoperator to support feedback
|
||||
(i.e. have non-empty ``feedback_features`` and implement ``disable_torque`` / ``enable_torque``).
|
||||
|
||||
TODO(Maxime): This blocks up to ``duration_s`` seconds, during this time
|
||||
the follower robot doesn't receive new actions, this could be an issue on LeKiwi.
|
||||
"""
|
||||
teleop.enable_torque()
|
||||
current = teleop.get_action()
|
||||
@@ -193,13 +204,28 @@ def _teleop_smooth_move_to(
|
||||
|
||||
for step in range(steps + 1):
|
||||
t = step / steps
|
||||
interp = {}
|
||||
for k in current:
|
||||
if k in target_pos:
|
||||
interp[k] = current[k] * (1 - t) + target_pos[k] * t
|
||||
else:
|
||||
interp[k] = current[k]
|
||||
teleop.write_goal_positions(interp)
|
||||
interp = {
|
||||
k: current[k] * (1 - t) + target_pos[k] * t if k in target_pos else current[k] for k in current
|
||||
}
|
||||
teleop.send_feedback(interp)
|
||||
time.sleep(1 / fps)
|
||||
|
||||
|
||||
def _follower_smooth_move_to(
|
||||
robot: ThreadSafeRobot, current: dict, target: dict, duration_s: float = 1.0, fps: int = 30
|
||||
) -> None:
|
||||
"""Smoothly move the follower robot from ``current`` to ``target`` action.
|
||||
|
||||
Used when the teleop is non-actuated: instead of driving the leader arm
|
||||
to the follower, we bring the follower to the teleop's current pose.
|
||||
Both ``current`` and ``target`` must be in robot-action key space.
|
||||
"""
|
||||
steps = max(int(duration_s * fps), 1)
|
||||
|
||||
for step in range(steps + 1):
|
||||
t = step / steps
|
||||
interp = {k: current[k] * (1 - t) + target[k] * t if k in target else current[k] for k in current}
|
||||
robot.send_action(interp)
|
||||
time.sleep(1 / fps)
|
||||
|
||||
|
||||
@@ -415,9 +441,6 @@ class DAggerStrategy(RolloutStrategy):
|
||||
engine.reset()
|
||||
interpolator.reset()
|
||||
events.reset()
|
||||
# TODO(Steven): re-enable once Teleoperator motor-control methods are
|
||||
# standardised; until then the user pre-aligns the leader by hand.
|
||||
# teleop.disable_torque()
|
||||
engine.resume()
|
||||
|
||||
last_action: dict[str, Any] | None = None
|
||||
@@ -441,8 +464,16 @@ class DAggerStrategy(RolloutStrategy):
|
||||
transition = events.consume_transition()
|
||||
if transition is not None:
|
||||
old_phase, new_phase = transition
|
||||
self._apply_transition(old_phase, new_phase, engine, interpolator, robot, teleop)
|
||||
last_action = None
|
||||
self._apply_transition(
|
||||
old_phase,
|
||||
new_phase,
|
||||
engine,
|
||||
interpolator,
|
||||
ctx,
|
||||
last_action,
|
||||
)
|
||||
if new_phase == DAggerPhase.AUTONOMOUS:
|
||||
last_action = None
|
||||
|
||||
phase = events.phase
|
||||
obs = robot.get_observation()
|
||||
@@ -532,9 +563,6 @@ class DAggerStrategy(RolloutStrategy):
|
||||
finally:
|
||||
logger.info("DAgger continuous control loop ended — pausing engine")
|
||||
engine.pause()
|
||||
# TODO(Steven): re-enable once Teleoperator motor-control methods
|
||||
# are standardised across all teleop implementations.
|
||||
# teleop.disable_torque()
|
||||
with contextlib.suppress(Exception):
|
||||
with self._episode_lock:
|
||||
dataset.save_episode()
|
||||
@@ -570,9 +598,6 @@ class DAggerStrategy(RolloutStrategy):
|
||||
engine.reset()
|
||||
interpolator.reset()
|
||||
events.reset()
|
||||
# TODO(Steven): re-enable once Teleoperator motor-control methods are
|
||||
# standardised; until then the user pre-aligns the leader by hand.
|
||||
# teleop.disable_torque()
|
||||
engine.resume()
|
||||
|
||||
last_action: dict[str, Any] | None = None
|
||||
@@ -600,8 +625,16 @@ class DAggerStrategy(RolloutStrategy):
|
||||
transition = events.consume_transition()
|
||||
if transition is not None:
|
||||
old_phase, new_phase = transition
|
||||
self._apply_transition(old_phase, new_phase, engine, interpolator, robot, teleop)
|
||||
last_action = None
|
||||
self._apply_transition(
|
||||
old_phase,
|
||||
new_phase,
|
||||
engine,
|
||||
interpolator,
|
||||
ctx,
|
||||
last_action,
|
||||
)
|
||||
if new_phase == DAggerPhase.AUTONOMOUS:
|
||||
last_action = None
|
||||
|
||||
# Correction ended -> save episode (blocking if not streaming)
|
||||
if old_phase == DAggerPhase.CORRECTING and new_phase == DAggerPhase.PAUSED:
|
||||
@@ -679,9 +712,6 @@ class DAggerStrategy(RolloutStrategy):
|
||||
finally:
|
||||
logger.info("DAgger corrections-only loop ended — pausing engine")
|
||||
engine.pause()
|
||||
# TODO(Steven): re-enable once Teleoperator motor-control methods
|
||||
# are standardised across all teleop implementations.
|
||||
# teleop.disable_torque()
|
||||
with contextlib.suppress(Exception):
|
||||
with self._episode_lock:
|
||||
dataset.save_episode()
|
||||
@@ -698,36 +728,71 @@ class DAggerStrategy(RolloutStrategy):
|
||||
new_phase: DAggerPhase,
|
||||
engine,
|
||||
interpolator,
|
||||
robot: ThreadSafeRobot,
|
||||
teleop: Teleoperator,
|
||||
ctx: RolloutContext,
|
||||
prev_action: dict | None,
|
||||
) -> None:
|
||||
"""Execute side-effects for a validated phase transition."""
|
||||
"""Execute side-effects for a validated phase transition, including smooth handovers.
|
||||
|
||||
AUTONOMOUS -> PAUSED (actuated teleop):
|
||||
Pause the engine, then drive the leader arm to the follower's last
|
||||
commanded position so the operator takes over without a jerk.
|
||||
|
||||
PAUSED -> CORRECTING (non-actuated teleop):
|
||||
Slide the follower to the teleop's current pose so the robot meets
|
||||
the operator's hand rather than jumping to it on the first frame.
|
||||
|
||||
CORRECTING -> PAUSED (actuated teleop):
|
||||
Re-enable torque to hold position after correction.
|
||||
This will be potentially useful if cancelling the correction recording
|
||||
|
||||
PAUSED -> AUTONOMOUS:
|
||||
Reset and resume the inference engine.
|
||||
"""
|
||||
teleop = ctx.hardware.teleop
|
||||
robot = ctx.hardware.robot_wrapper
|
||||
|
||||
logger.info("Phase transition: %s -> %s", old_phase.value, new_phase.value)
|
||||
if old_phase == DAggerPhase.AUTONOMOUS and new_phase == DAggerPhase.PAUSED:
|
||||
logger.info("Pausing engine — robot holds position")
|
||||
logger.info("Pausing engine - robot holds position")
|
||||
engine.pause()
|
||||
obs = robot.get_observation()
|
||||
_robot_pos = {
|
||||
k: v for k, v in obs.items() if k.endswith(".pos") and k in robot.observation_features
|
||||
}
|
||||
# TODO(Steven): once Teleoperator motor-control methods are
|
||||
# standardised, drive the leader to the follower's pose here so the
|
||||
# operator does not need to pre-align the arm by hand. Until then
|
||||
# the user is responsible for the alignment.
|
||||
# _teleop_smooth_move_to(teleop, _robot_pos, duration_s=2.0, fps=50)
|
||||
|
||||
elif new_phase == DAggerPhase.CORRECTING:
|
||||
logger.info("Entering correction mode — human teleop control")
|
||||
# TODO(Steven): re-enable once Teleoperator motor-control methods
|
||||
# are standardised across all teleop implementations.
|
||||
# teleop.disable_torque()
|
||||
if _teleop_supports_feedback(teleop) and prev_action is not None:
|
||||
# TODO(Maxime): prev_action is in robot action key space (output of robot_action_processor).
|
||||
# send_feedback expects teleop feedback key space. For homogeneous setups (e.g. SO-101
|
||||
# leader + SO-101 follower) the keys are identical so this works. If the processor pipeline
|
||||
# does non-trivial key renaming (e.g. a rename_map on action keys), the interpolation in
|
||||
# _teleop_smooth_move_to silently no-ops and the arm doesn't move.
|
||||
logger.info("Smooth handover: moving leader arm to follower position")
|
||||
_teleop_smooth_move_to(teleop, prev_action)
|
||||
|
||||
elif old_phase == DAggerPhase.PAUSED and new_phase == DAggerPhase.CORRECTING:
|
||||
logger.info("Entering correction mode - human teleop control")
|
||||
if not _teleop_supports_feedback(teleop) and prev_action is not None:
|
||||
logger.info("Smooth handover: sliding follower to teleop position")
|
||||
obs = robot.get_observation()
|
||||
teleop_action = teleop.get_action()
|
||||
processed = ctx.processors.teleop_action_processor((teleop_action, obs))
|
||||
target = ctx.processors.robot_action_processor((processed, obs))
|
||||
_follower_smooth_move_to(robot, prev_action, target)
|
||||
|
||||
# unlock the teleop for human control
|
||||
if _teleop_supports_feedback(teleop):
|
||||
teleop.disable_torque()
|
||||
|
||||
elif old_phase == DAggerPhase.CORRECTING and new_phase == DAggerPhase.PAUSED:
|
||||
if _teleop_supports_feedback(teleop):
|
||||
teleop.enable_torque()
|
||||
|
||||
elif new_phase == DAggerPhase.AUTONOMOUS:
|
||||
logger.info("Resuming autonomous mode — resetting engine and interpolator")
|
||||
logger.info("Resuming autonomous mode - resetting engine and interpolator")
|
||||
interpolator.reset()
|
||||
engine.reset()
|
||||
engine.resume()
|
||||
|
||||
# release teleop before resuming the policy
|
||||
if _teleop_supports_feedback(teleop):
|
||||
teleop.disable_torque()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Background push (shared by both modes)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@@ -277,9 +277,14 @@ def train(cfg: TrainPipelineConfig, accelerator: "Accelerator | None" = None):
|
||||
if cfg.peft is not None:
|
||||
if cfg.is_reward_model_training:
|
||||
raise ValueError("PEFT is only supported for policy training. ")
|
||||
logging.info("Using PEFT! Wrapping model.")
|
||||
peft_cli_overrides = dataclasses.asdict(cfg.peft)
|
||||
policy = policy.wrap_with_peft(peft_cli_overrides=peft_cli_overrides)
|
||||
from peft import PeftModel
|
||||
|
||||
if isinstance(policy, PeftModel):
|
||||
logging.info("PEFT adapter already loaded from checkpoint, skipping wrap_with_peft.")
|
||||
else:
|
||||
logging.info("Using PEFT! Wrapping model.")
|
||||
peft_cli_overrides = dataclasses.asdict(cfg.peft)
|
||||
policy = policy.wrap_with_peft(peft_cli_overrides=peft_cli_overrides)
|
||||
|
||||
# Wait for all processes to finish model creation before continuing
|
||||
accelerator.wait_for_everyone()
|
||||
|
||||
@@ -112,7 +112,7 @@ class OpenArmMini(Teleoperator):
|
||||
|
||||
@property
|
||||
def feedback_features(self) -> dict[str, type]:
|
||||
return {}
|
||||
return self.action_features
|
||||
|
||||
@property
|
||||
def is_connected(self) -> bool:
|
||||
@@ -348,8 +348,9 @@ class OpenArmMini(Teleoperator):
|
||||
if left_goals:
|
||||
self.bus_left.sync_write("Goal_Position", left_goals)
|
||||
|
||||
@check_if_not_connected
|
||||
def send_feedback(self, feedback: dict[str, float]) -> None:
|
||||
raise NotImplementedError("Feedback is not yet implemented for OpenArm Mini.")
|
||||
self.write_goal_positions(feedback)
|
||||
|
||||
@check_if_not_connected
|
||||
def disconnect(self) -> None:
|
||||
|
||||
@@ -59,7 +59,7 @@ class SOLeader(Teleoperator):
|
||||
|
||||
@property
|
||||
def feedback_features(self) -> dict[str, type]:
|
||||
return {}
|
||||
return self.action_features
|
||||
|
||||
@property
|
||||
def is_connected(self) -> bool:
|
||||
@@ -130,6 +130,12 @@ class SOLeader(Teleoperator):
|
||||
for motor in self.bus.motors:
|
||||
self.bus.write("Operating_Mode", motor, OperatingMode.POSITION.value)
|
||||
|
||||
def enable_torque(self) -> None:
|
||||
self.bus.enable_torque()
|
||||
|
||||
def disable_torque(self) -> None:
|
||||
self.bus.disable_torque()
|
||||
|
||||
def setup_motors(self) -> None:
|
||||
for motor in reversed(self.bus.motors):
|
||||
input(f"Connect the controller board to the '{motor}' motor only and press enter.")
|
||||
@@ -145,9 +151,11 @@ class SOLeader(Teleoperator):
|
||||
logger.debug(f"{self} read action: {dt_ms:.1f}ms")
|
||||
return action
|
||||
|
||||
@check_if_not_connected
|
||||
def send_feedback(self, feedback: dict[str, float]) -> None:
|
||||
# TODO: Implement force feedback
|
||||
raise NotImplementedError
|
||||
goals = {k.removesuffix(".pos"): v for k, v in feedback.items() if k.endswith(".pos")}
|
||||
if goals:
|
||||
self.bus.sync_write("Goal_Position", goals)
|
||||
|
||||
@check_if_not_connected
|
||||
def disconnect(self) -> None:
|
||||
|
||||
Reference in New Issue
Block a user