mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-21 03:30:10 +00:00
Only move teleop after space press
This commit is contained in:
+15
-7
@@ -126,19 +126,27 @@ python examples/rac/rac_data_collection.py \
|
|||||||
|
|
||||||
| Key | Action |
|
| Key | Action |
|
||||||
|-----|--------|
|
|-----|--------|
|
||||||
| **SPACE** | Start intervention (take control) |
|
| **SPACE** | Pause policy (teleop mirrors robot, no recording) |
|
||||||
|
| **c** | Take control (start correction, recording resumes) |
|
||||||
| **→** | End episode (save) |
|
| **→** | End episode (save) |
|
||||||
| **ESC** | Stop recording session |
|
| **←** | Re-record episode |
|
||||||
|
| **ESC** | Stop session and push to hub |
|
||||||
|
|
||||||
**The RaC Protocol:**
|
**The RaC Protocol:**
|
||||||
|
|
||||||
1. Watch the policy run autonomously
|
1. Watch the policy run autonomously
|
||||||
2. When you see imminent failure, press **SPACE** to intervene
|
2. When you see imminent failure, press **SPACE** to pause
|
||||||
3. **RECOVERY**: Teleoperate the robot back to a good in-distribution state
|
- Policy stops, teleoperator mirrors robot position
|
||||||
4. **CORRECTION**: Use teleoperator to complete the subtask
|
- Allows smooth handoff without jarring movements
|
||||||
5. Press **→** to save and end episode
|
- No frames recorded during pause
|
||||||
|
3. Press **c** to take control
|
||||||
|
- Teleoperator is now free to move
|
||||||
|
- **RECOVERY**: Teleoperate back to a good state
|
||||||
|
- **CORRECTION**: Complete the subtask
|
||||||
|
- All movements are recorded
|
||||||
|
4. Press **→** to save and end episode
|
||||||
|
|
||||||
The recovery segment (teleoperating back to good state) is recorded as training data - this teaches the policy how to recover from errors.
|
The recovery and correction segments teach the policy how to recover from errors.
|
||||||
|
|
||||||
### Step 3: (Optional) Compute SARM Rewards
|
### Step 3: (Optional) Compute SARM Rewards
|
||||||
|
|
||||||
|
|||||||
@@ -23,9 +23,11 @@ The recovery segment (teleoperating back to good state) is recorded as training
|
|||||||
this teaches the policy how to recover from errors.
|
this teaches the policy how to recover from errors.
|
||||||
|
|
||||||
Keyboard Controls:
|
Keyboard Controls:
|
||||||
SPACE - Start intervention (policy stops, human takes over)
|
SPACE - Pause policy (robot holds position, no recording)
|
||||||
|
c - Take control (start correction, recording resumes)
|
||||||
→ - End episode (save and continue to next)
|
→ - End episode (save and continue to next)
|
||||||
ESC - Stop recording session
|
← - Re-record episode
|
||||||
|
ESC - Stop recording and push dataset to hub
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python examples/rac/rac_data_collection.py \
|
python examples/rac/rac_data_collection.py \
|
||||||
@@ -129,7 +131,8 @@ def init_rac_keyboard_listener():
|
|||||||
"exit_early": False,
|
"exit_early": False,
|
||||||
"rerecord_episode": False,
|
"rerecord_episode": False,
|
||||||
"stop_recording": False,
|
"stop_recording": False,
|
||||||
"intervention_active": False,
|
"policy_paused": False, # SPACE pressed - policy paused, teleop tracking robot
|
||||||
|
"correction_active": False, # 'c' pressed - human controlling, recording correction
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_headless():
|
if is_headless():
|
||||||
@@ -141,12 +144,15 @@ def init_rac_keyboard_listener():
|
|||||||
def on_press(key):
|
def on_press(key):
|
||||||
try:
|
try:
|
||||||
if key == keyboard.Key.space:
|
if key == keyboard.Key.space:
|
||||||
if not events["intervention_active"]:
|
if not events["policy_paused"] and not events["correction_active"]:
|
||||||
print("\n[RaC] ▶ INTERVENTION - You have control")
|
print("\n[RaC] ⏸ PAUSED - Policy stopped, teleop tracking robot")
|
||||||
print(" 1. Teleoperate robot back to good state (RECOVERY)")
|
print(" Press 'c' to take control and start correction")
|
||||||
print(" 2. Complete the subtask (CORRECTION)")
|
events["policy_paused"] = True
|
||||||
print(" 3. Press → when done")
|
elif hasattr(key, 'char') and key.char == 'c':
|
||||||
events["intervention_active"] = True
|
if events["policy_paused"] and not events["correction_active"]:
|
||||||
|
print("\n[RaC] ▶ CORRECTION - You have control (recording)")
|
||||||
|
print(" Teleoperate to correct, press → when done")
|
||||||
|
events["correction_active"] = True
|
||||||
elif key == keyboard.Key.right:
|
elif key == keyboard.Key.right:
|
||||||
print("[RaC] → End episode")
|
print("[RaC] → End episode")
|
||||||
events["exit_early"] = True
|
events["exit_early"] = True
|
||||||
@@ -155,7 +161,7 @@ def init_rac_keyboard_listener():
|
|||||||
events["rerecord_episode"] = True
|
events["rerecord_episode"] = True
|
||||||
events["exit_early"] = True
|
events["exit_early"] = True
|
||||||
elif key == keyboard.Key.esc:
|
elif key == keyboard.Key.esc:
|
||||||
print("[RaC] ESC - Stop recording session")
|
print("[RaC] ESC - Stop recording, pushing to hub...")
|
||||||
events["stop_recording"] = True
|
events["stop_recording"] = True
|
||||||
events["exit_early"] = True
|
events["exit_early"] = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -201,10 +207,12 @@ def rac_rollout_loop(
|
|||||||
display_data: bool = True,
|
display_data: bool = True,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""
|
"""
|
||||||
RaC rollout loop: policy runs until intervention, then human does recovery+correction.
|
RaC rollout loop with two-stage intervention:
|
||||||
|
|
||||||
The human intervention (recovery + correction) is recorded as training data.
|
1. Policy runs autonomously (recording)
|
||||||
This teaches the policy how to recover from errors.
|
2. SPACE: Policy pauses (NOT recording) - robot holds position
|
||||||
|
3. 'c': Human takes control (recording correction)
|
||||||
|
4. →: End episode
|
||||||
"""
|
"""
|
||||||
policy.reset()
|
policy.reset()
|
||||||
preprocessor.reset()
|
preprocessor.reset()
|
||||||
@@ -216,10 +224,11 @@ def rac_rollout_loop(
|
|||||||
stats = {
|
stats = {
|
||||||
"total_frames": 0,
|
"total_frames": 0,
|
||||||
"autonomous_frames": 0,
|
"autonomous_frames": 0,
|
||||||
"human_frames": 0,
|
"paused_frames": 0,
|
||||||
"intervention_occurred": False,
|
"correction_frames": 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
last_robot_action = None
|
||||||
timestamp = 0
|
timestamp = 0
|
||||||
start_t = time.perf_counter()
|
start_t = time.perf_counter()
|
||||||
|
|
||||||
@@ -228,13 +237,35 @@ def rac_rollout_loop(
|
|||||||
|
|
||||||
if events["exit_early"]:
|
if events["exit_early"]:
|
||||||
events["exit_early"] = False
|
events["exit_early"] = False
|
||||||
events["intervention_active"] = False
|
events["policy_paused"] = False
|
||||||
|
events["correction_active"] = False
|
||||||
break
|
break
|
||||||
|
|
||||||
obs = robot.get_observation()
|
obs = robot.get_observation()
|
||||||
obs_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR)
|
obs_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR)
|
||||||
|
|
||||||
if not events["intervention_active"]:
|
if events["correction_active"]:
|
||||||
|
# Human controlling - record correction data
|
||||||
|
robot_action = teleop.get_action()
|
||||||
|
robot.send_action(robot_action)
|
||||||
|
stats["correction_frames"] += 1
|
||||||
|
|
||||||
|
# Record this frame
|
||||||
|
action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
|
||||||
|
frame = {**obs_frame, **action_frame, "task": single_task}
|
||||||
|
frame_buffer.append(frame)
|
||||||
|
stats["total_frames"] += 1
|
||||||
|
|
||||||
|
elif events["policy_paused"]:
|
||||||
|
# Paused - hold last position, don't record
|
||||||
|
if last_robot_action is not None:
|
||||||
|
robot.send_action(last_robot_action)
|
||||||
|
stats["paused_frames"] += 1
|
||||||
|
robot_action = last_robot_action
|
||||||
|
# Don't record frames during pause
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Normal policy execution - record
|
||||||
action_values = predict_action(
|
action_values = predict_action(
|
||||||
observation=obs_frame,
|
observation=obs_frame,
|
||||||
policy=policy,
|
policy=policy,
|
||||||
@@ -246,22 +277,18 @@ def rac_rollout_loop(
|
|||||||
robot_type=robot.robot_type,
|
robot_type=robot.robot_type,
|
||||||
)
|
)
|
||||||
robot_action: RobotAction = make_robot_action(action_values, dataset.features)
|
robot_action: RobotAction = make_robot_action(action_values, dataset.features)
|
||||||
|
robot.send_action(robot_action)
|
||||||
|
last_robot_action = robot_action
|
||||||
stats["autonomous_frames"] += 1
|
stats["autonomous_frames"] += 1
|
||||||
else:
|
|
||||||
stats["intervention_occurred"] = True
|
# Record this frame
|
||||||
robot_action = teleop.get_action()
|
action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
|
||||||
action_values = robot_action
|
frame = {**obs_frame, **action_frame, "task": single_task}
|
||||||
stats["human_frames"] += 1
|
frame_buffer.append(frame)
|
||||||
|
stats["total_frames"] += 1
|
||||||
|
|
||||||
robot.send_action(robot_action)
|
if display_data and robot_action is not None:
|
||||||
|
log_rerun_data(observation=obs, action=robot_action)
|
||||||
action_frame = build_dataset_frame(dataset.features, action_values, prefix=ACTION)
|
|
||||||
frame = {**obs_frame, **action_frame, "task": single_task}
|
|
||||||
frame_buffer.append(frame)
|
|
||||||
stats["total_frames"] += 1
|
|
||||||
|
|
||||||
if display_data:
|
|
||||||
log_rerun_data(observation=obs, action=action_values)
|
|
||||||
|
|
||||||
dt = time.perf_counter() - loop_start
|
dt = time.perf_counter() - loop_start
|
||||||
precise_sleep(1 / fps - dt)
|
precise_sleep(1 / fps - dt)
|
||||||
@@ -374,9 +401,11 @@ def rac_collect(cfg: RaCConfig) -> LeRobotDataset:
|
|||||||
print(" Policy runs autonomously until you intervene.")
|
print(" Policy runs autonomously until you intervene.")
|
||||||
print()
|
print()
|
||||||
print(" Controls:")
|
print(" Controls:")
|
||||||
print(" SPACE - Intervene (take control)")
|
print(" SPACE - Pause policy (robot holds position, no recording)")
|
||||||
|
print(" c - Take control (start correction, recording)")
|
||||||
print(" → - End episode (save)")
|
print(" → - End episode (save)")
|
||||||
print(" ESC - Stop recording session")
|
print(" ← - Re-record episode")
|
||||||
|
print(" ESC - Stop session and push to hub")
|
||||||
print("=" * 65 + "\n")
|
print("=" * 65 + "\n")
|
||||||
|
|
||||||
with VideoEncodingManager(dataset):
|
with VideoEncodingManager(dataset):
|
||||||
|
|||||||
@@ -23,9 +23,11 @@ The recovery segment (teleoperating back to good state) is recorded as training
|
|||||||
this teaches the policy how to recover from errors.
|
this teaches the policy how to recover from errors.
|
||||||
|
|
||||||
Keyboard Controls:
|
Keyboard Controls:
|
||||||
SPACE - Start intervention (policy stops, human takes over)
|
SPACE - Pause policy (teleop mirrors robot, no recording)
|
||||||
|
c - Take control (teleop free, recording correction)
|
||||||
→ - End episode (save and continue to next)
|
→ - End episode (save and continue to next)
|
||||||
ESC - Stop recording session
|
← - Re-record episode
|
||||||
|
ESC - Stop recording and push dataset to hub
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python examples/rac/rac_data_collection_openarms.py \
|
python examples/rac/rac_data_collection_openarms.py \
|
||||||
@@ -135,7 +137,8 @@ def init_rac_keyboard_listener():
|
|||||||
"exit_early": False,
|
"exit_early": False,
|
||||||
"rerecord_episode": False,
|
"rerecord_episode": False,
|
||||||
"stop_recording": False,
|
"stop_recording": False,
|
||||||
"intervention_active": False,
|
"policy_paused": False, # SPACE pressed - policy paused, teleop tracking robot
|
||||||
|
"correction_active": False, # 'c' pressed - human controlling, recording correction
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_headless():
|
if is_headless():
|
||||||
@@ -147,12 +150,15 @@ def init_rac_keyboard_listener():
|
|||||||
def on_press(key):
|
def on_press(key):
|
||||||
try:
|
try:
|
||||||
if key == keyboard.Key.space:
|
if key == keyboard.Key.space:
|
||||||
if not events["intervention_active"]:
|
if not events["policy_paused"] and not events["correction_active"]:
|
||||||
print("\n[RaC] ▶ INTERVENTION - You have control")
|
print("\n[RaC] ⏸ PAUSED - Policy stopped, teleop tracking robot")
|
||||||
print(" 1. Teleoperate robot back to good state (RECOVERY)")
|
print(" Press 'c' to take control and start correction")
|
||||||
print(" 2. Complete the subtask (CORRECTION)")
|
events["policy_paused"] = True
|
||||||
print(" 3. Press → when done")
|
elif hasattr(key, 'char') and key.char == 'c':
|
||||||
events["intervention_active"] = True
|
if events["policy_paused"] and not events["correction_active"]:
|
||||||
|
print("\n[RaC] ▶ CORRECTION - You have control (recording)")
|
||||||
|
print(" Teleoperate to correct, press → when done")
|
||||||
|
events["correction_active"] = True
|
||||||
elif key == keyboard.Key.right:
|
elif key == keyboard.Key.right:
|
||||||
print("[RaC] → End episode")
|
print("[RaC] → End episode")
|
||||||
events["exit_early"] = True
|
events["exit_early"] = True
|
||||||
@@ -161,7 +167,7 @@ def init_rac_keyboard_listener():
|
|||||||
events["rerecord_episode"] = True
|
events["rerecord_episode"] = True
|
||||||
events["exit_early"] = True
|
events["exit_early"] = True
|
||||||
elif key == keyboard.Key.esc:
|
elif key == keyboard.Key.esc:
|
||||||
print("[RaC] ESC - Stop recording session")
|
print("[RaC] ESC - Stop recording, pushing to hub...")
|
||||||
events["stop_recording"] = True
|
events["stop_recording"] = True
|
||||||
events["exit_early"] = True
|
events["exit_early"] = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -207,13 +213,14 @@ def rac_rollout_loop(
|
|||||||
display_data: bool = True,
|
display_data: bool = True,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""
|
"""
|
||||||
RaC rollout loop: policy runs until intervention, then human does recovery+correction.
|
RaC rollout loop with two-stage intervention:
|
||||||
|
|
||||||
The human intervention (recovery + correction) is recorded as training data.
|
|
||||||
This teaches the policy how to recover from errors.
|
|
||||||
|
|
||||||
During autonomous execution, the teleoperator mirrors the robot's position so
|
1. Policy runs autonomously (recording)
|
||||||
intervention is seamless - the teleoperator is already in the same configuration.
|
2. SPACE: Policy pauses, teleop mirrors robot position (NOT recording)
|
||||||
|
3. 'c': Human takes control, teleop torque disabled (recording correction)
|
||||||
|
4. →: End episode
|
||||||
|
|
||||||
|
This allows smooth handoff - teleop tracks robot until human is ready.
|
||||||
"""
|
"""
|
||||||
policy.reset()
|
policy.reset()
|
||||||
preprocessor.reset()
|
preprocessor.reset()
|
||||||
@@ -225,13 +232,13 @@ def rac_rollout_loop(
|
|||||||
stats = {
|
stats = {
|
||||||
"total_frames": 0,
|
"total_frames": 0,
|
||||||
"autonomous_frames": 0,
|
"autonomous_frames": 0,
|
||||||
"human_frames": 0,
|
"paused_frames": 0,
|
||||||
"intervention_occurred": False,
|
"correction_frames": 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Enable torque on teleoperator so it can track robot position
|
# Enable torque on teleoperator so it can track robot position
|
||||||
teleop.enable_torque()
|
teleop.enable_torque()
|
||||||
was_intervention_active = False
|
was_correction_active = False
|
||||||
|
|
||||||
timestamp = 0
|
timestamp = 0
|
||||||
start_t = time.perf_counter()
|
start_t = time.perf_counter()
|
||||||
@@ -241,20 +248,41 @@ def rac_rollout_loop(
|
|||||||
|
|
||||||
if events["exit_early"]:
|
if events["exit_early"]:
|
||||||
events["exit_early"] = False
|
events["exit_early"] = False
|
||||||
events["intervention_active"] = False
|
events["policy_paused"] = False
|
||||||
|
events["correction_active"] = False
|
||||||
break
|
break
|
||||||
|
|
||||||
# Detect transition to intervention mode
|
# Detect transition to correction mode (human takes control)
|
||||||
if events["intervention_active"] and not was_intervention_active:
|
if events["correction_active"] and not was_correction_active:
|
||||||
teleop.disable_torque()
|
teleop.disable_torque()
|
||||||
was_intervention_active = True
|
was_correction_active = True
|
||||||
|
|
||||||
obs = robot.get_observation()
|
obs = robot.get_observation()
|
||||||
# Filter to only include keys in observation_features (excludes .vel, .torque, _timing, etc.)
|
|
||||||
obs_filtered = {k: v for k, v in obs.items() if k in robot.observation_features}
|
obs_filtered = {k: v for k, v in obs.items() if k in robot.observation_features}
|
||||||
obs_frame = build_dataset_frame(dataset.features, obs_filtered, prefix=OBS_STR)
|
obs_frame = build_dataset_frame(dataset.features, obs_filtered, prefix=OBS_STR)
|
||||||
|
|
||||||
if not events["intervention_active"]:
|
if events["correction_active"]:
|
||||||
|
# Human controlling - record correction data
|
||||||
|
robot_action = teleop.get_action()
|
||||||
|
robot.send_action(robot_action)
|
||||||
|
stats["correction_frames"] += 1
|
||||||
|
|
||||||
|
# Record this frame
|
||||||
|
action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
|
||||||
|
frame = {**obs_frame, **action_frame, "task": single_task}
|
||||||
|
frame_buffer.append(frame)
|
||||||
|
stats["total_frames"] += 1
|
||||||
|
|
||||||
|
elif events["policy_paused"]:
|
||||||
|
# Paused - teleop tracks robot, but don't record
|
||||||
|
# Read current robot position and send to teleop
|
||||||
|
robot_action = {k: v for k, v in obs_filtered.items() if k.endswith(".pos")}
|
||||||
|
teleop.send_feedback(robot_action)
|
||||||
|
stats["paused_frames"] += 1
|
||||||
|
# Don't record frames during pause
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Normal policy execution - record
|
||||||
action_values = predict_action(
|
action_values = predict_action(
|
||||||
observation=obs_frame,
|
observation=obs_frame,
|
||||||
policy=policy,
|
policy=policy,
|
||||||
@@ -266,22 +294,17 @@ def rac_rollout_loop(
|
|||||||
robot_type=robot.robot_type,
|
robot_type=robot.robot_type,
|
||||||
)
|
)
|
||||||
robot_action: RobotAction = make_robot_action(action_values, dataset.features)
|
robot_action: RobotAction = make_robot_action(action_values, dataset.features)
|
||||||
|
robot.send_action(robot_action)
|
||||||
stats["autonomous_frames"] += 1
|
stats["autonomous_frames"] += 1
|
||||||
|
|
||||||
# Mirror robot position to teleoperator for seamless intervention
|
# Mirror robot position to teleoperator
|
||||||
teleop.send_feedback(robot_action)
|
teleop.send_feedback(robot_action)
|
||||||
else:
|
|
||||||
stats["intervention_occurred"] = True
|
# Record this frame
|
||||||
robot_action = teleop.get_action()
|
action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
|
||||||
action_values = robot_action
|
frame = {**obs_frame, **action_frame, "task": single_task}
|
||||||
stats["human_frames"] += 1
|
frame_buffer.append(frame)
|
||||||
|
stats["total_frames"] += 1
|
||||||
robot.send_action(robot_action)
|
|
||||||
|
|
||||||
action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
|
|
||||||
frame = {**obs_frame, **action_frame, "task": single_task}
|
|
||||||
frame_buffer.append(frame)
|
|
||||||
stats["total_frames"] += 1
|
|
||||||
|
|
||||||
if display_data:
|
if display_data:
|
||||||
log_rerun_data(observation=obs_filtered, action=robot_action)
|
log_rerun_data(observation=obs_filtered, action=robot_action)
|
||||||
@@ -400,9 +423,11 @@ def rac_collect(cfg: RaCConfig) -> LeRobotDataset:
|
|||||||
print(" Policy runs autonomously until you intervene.")
|
print(" Policy runs autonomously until you intervene.")
|
||||||
print()
|
print()
|
||||||
print(" Controls:")
|
print(" Controls:")
|
||||||
print(" SPACE - Intervene (take control)")
|
print(" SPACE - Pause policy (teleop tracks robot, no recording)")
|
||||||
|
print(" c - Take control (start correction, recording)")
|
||||||
print(" → - End episode (save)")
|
print(" → - End episode (save)")
|
||||||
print(" ESC - Stop recording session")
|
print(" ← - Re-record episode")
|
||||||
|
print(" ESC - Stop session and push to hub")
|
||||||
print("=" * 65 + "\n")
|
print("=" * 65 + "\n")
|
||||||
|
|
||||||
with VideoEncodingManager(dataset):
|
with VideoEncodingManager(dataset):
|
||||||
|
|||||||
Reference in New Issue
Block a user