Only move teleop after space press

This commit is contained in:
Pepijn
2025-12-31 12:24:43 +01:00
parent a97255e3d1
commit f15872293d
3 changed files with 143 additions and 81 deletions
+15 -7
View File
@@ -126,19 +126,27 @@ python examples/rac/rac_data_collection.py \
| Key | Action | | Key | Action |
|-----|--------| |-----|--------|
| **SPACE** | Start intervention (take control) | | **SPACE** | Pause policy (teleop mirrors robot, no recording) |
| **c** | Take control (start correction, recording resumes) |
| **→** | End episode (save) | | **→** | End episode (save) |
| **ESC** | Stop recording session | | **** | Re-record episode |
| **ESC** | Stop session and push to hub |
**The RaC Protocol:** **The RaC Protocol:**
1. Watch the policy run autonomously 1. Watch the policy run autonomously
2. When you see imminent failure, press **SPACE** to intervene 2. When you see imminent failure, press **SPACE** to pause
3. **RECOVERY**: Teleoperate the robot back to a good in-distribution state - Policy stops, teleoperator mirrors robot position
4. **CORRECTION**: Use teleoperator to complete the subtask - Allows smooth handoff without jarring movements
5. Press **→** to save and end episode - No frames recorded during pause
3. Press **c** to take control
- Teleoperator is now free to move
- **RECOVERY**: Teleoperate back to a good state
- **CORRECTION**: Complete the subtask
- All movements are recorded
4. Press **→** to save and end episode
The recovery segment (teleoperating back to good state) is recorded as training data - this teaches the policy how to recover from errors. The recovery and correction segments teach the policy how to recover from errors.
### Step 3: (Optional) Compute SARM Rewards ### Step 3: (Optional) Compute SARM Rewards
+63 -34
View File
@@ -23,9 +23,11 @@ The recovery segment (teleoperating back to good state) is recorded as training
this teaches the policy how to recover from errors. this teaches the policy how to recover from errors.
Keyboard Controls: Keyboard Controls:
SPACE - Start intervention (policy stops, human takes over) SPACE - Pause policy (robot holds position, no recording)
c - Take control (start correction, recording resumes)
→ - End episode (save and continue to next) → - End episode (save and continue to next)
ESC - Stop recording session - Re-record episode
ESC - Stop recording and push dataset to hub
Usage: Usage:
python examples/rac/rac_data_collection.py \ python examples/rac/rac_data_collection.py \
@@ -129,7 +131,8 @@ def init_rac_keyboard_listener():
"exit_early": False, "exit_early": False,
"rerecord_episode": False, "rerecord_episode": False,
"stop_recording": False, "stop_recording": False,
"intervention_active": False, "policy_paused": False, # SPACE pressed - policy paused, teleop tracking robot
"correction_active": False, # 'c' pressed - human controlling, recording correction
} }
if is_headless(): if is_headless():
@@ -141,12 +144,15 @@ def init_rac_keyboard_listener():
def on_press(key): def on_press(key):
try: try:
if key == keyboard.Key.space: if key == keyboard.Key.space:
if not events["intervention_active"]: if not events["policy_paused"] and not events["correction_active"]:
print("\n[RaC] ▶ INTERVENTION - You have control") print("\n[RaC] ⏸ PAUSED - Policy stopped, teleop tracking robot")
print(" 1. Teleoperate robot back to good state (RECOVERY)") print(" Press 'c' to take control and start correction")
print(" 2. Complete the subtask (CORRECTION)") events["policy_paused"] = True
print(" 3. Press → when done") elif hasattr(key, 'char') and key.char == 'c':
events["intervention_active"] = True if events["policy_paused"] and not events["correction_active"]:
print("\n[RaC] ▶ CORRECTION - You have control (recording)")
print(" Teleoperate to correct, press → when done")
events["correction_active"] = True
elif key == keyboard.Key.right: elif key == keyboard.Key.right:
print("[RaC] → End episode") print("[RaC] → End episode")
events["exit_early"] = True events["exit_early"] = True
@@ -155,7 +161,7 @@ def init_rac_keyboard_listener():
events["rerecord_episode"] = True events["rerecord_episode"] = True
events["exit_early"] = True events["exit_early"] = True
elif key == keyboard.Key.esc: elif key == keyboard.Key.esc:
print("[RaC] ESC - Stop recording session") print("[RaC] ESC - Stop recording, pushing to hub...")
events["stop_recording"] = True events["stop_recording"] = True
events["exit_early"] = True events["exit_early"] = True
except Exception as e: except Exception as e:
@@ -201,10 +207,12 @@ def rac_rollout_loop(
display_data: bool = True, display_data: bool = True,
) -> dict: ) -> dict:
""" """
RaC rollout loop: policy runs until intervention, then human does recovery+correction. RaC rollout loop with two-stage intervention:
The human intervention (recovery + correction) is recorded as training data. 1. Policy runs autonomously (recording)
This teaches the policy how to recover from errors. 2. SPACE: Policy pauses (NOT recording) - robot holds position
3. 'c': Human takes control (recording correction)
4. →: End episode
""" """
policy.reset() policy.reset()
preprocessor.reset() preprocessor.reset()
@@ -216,10 +224,11 @@ def rac_rollout_loop(
stats = { stats = {
"total_frames": 0, "total_frames": 0,
"autonomous_frames": 0, "autonomous_frames": 0,
"human_frames": 0, "paused_frames": 0,
"intervention_occurred": False, "correction_frames": 0,
} }
last_robot_action = None
timestamp = 0 timestamp = 0
start_t = time.perf_counter() start_t = time.perf_counter()
@@ -228,13 +237,35 @@ def rac_rollout_loop(
if events["exit_early"]: if events["exit_early"]:
events["exit_early"] = False events["exit_early"] = False
events["intervention_active"] = False events["policy_paused"] = False
events["correction_active"] = False
break break
obs = robot.get_observation() obs = robot.get_observation()
obs_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR) obs_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR)
if not events["intervention_active"]: if events["correction_active"]:
# Human controlling - record correction data
robot_action = teleop.get_action()
robot.send_action(robot_action)
stats["correction_frames"] += 1
# Record this frame
action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
frame = {**obs_frame, **action_frame, "task": single_task}
frame_buffer.append(frame)
stats["total_frames"] += 1
elif events["policy_paused"]:
# Paused - hold last position, don't record
if last_robot_action is not None:
robot.send_action(last_robot_action)
stats["paused_frames"] += 1
robot_action = last_robot_action
# Don't record frames during pause
else:
# Normal policy execution - record
action_values = predict_action( action_values = predict_action(
observation=obs_frame, observation=obs_frame,
policy=policy, policy=policy,
@@ -246,22 +277,18 @@ def rac_rollout_loop(
robot_type=robot.robot_type, robot_type=robot.robot_type,
) )
robot_action: RobotAction = make_robot_action(action_values, dataset.features) robot_action: RobotAction = make_robot_action(action_values, dataset.features)
robot.send_action(robot_action)
last_robot_action = robot_action
stats["autonomous_frames"] += 1 stats["autonomous_frames"] += 1
else:
stats["intervention_occurred"] = True # Record this frame
robot_action = teleop.get_action() action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
action_values = robot_action frame = {**obs_frame, **action_frame, "task": single_task}
stats["human_frames"] += 1 frame_buffer.append(frame)
stats["total_frames"] += 1
robot.send_action(robot_action) if display_data and robot_action is not None:
log_rerun_data(observation=obs, action=robot_action)
action_frame = build_dataset_frame(dataset.features, action_values, prefix=ACTION)
frame = {**obs_frame, **action_frame, "task": single_task}
frame_buffer.append(frame)
stats["total_frames"] += 1
if display_data:
log_rerun_data(observation=obs, action=action_values)
dt = time.perf_counter() - loop_start dt = time.perf_counter() - loop_start
precise_sleep(1 / fps - dt) precise_sleep(1 / fps - dt)
@@ -374,9 +401,11 @@ def rac_collect(cfg: RaCConfig) -> LeRobotDataset:
print(" Policy runs autonomously until you intervene.") print(" Policy runs autonomously until you intervene.")
print() print()
print(" Controls:") print(" Controls:")
print(" SPACE - Intervene (take control)") print(" SPACE - Pause policy (robot holds position, no recording)")
print(" c - Take control (start correction, recording)")
print(" → - End episode (save)") print(" → - End episode (save)")
print(" ESC - Stop recording session") print(" - Re-record episode")
print(" ESC - Stop session and push to hub")
print("=" * 65 + "\n") print("=" * 65 + "\n")
with VideoEncodingManager(dataset): with VideoEncodingManager(dataset):
+65 -40
View File
@@ -23,9 +23,11 @@ The recovery segment (teleoperating back to good state) is recorded as training
this teaches the policy how to recover from errors. this teaches the policy how to recover from errors.
Keyboard Controls: Keyboard Controls:
SPACE - Start intervention (policy stops, human takes over) SPACE - Pause policy (teleop mirrors robot, no recording)
c - Take control (teleop free, recording correction)
- End episode (save and continue to next) - End episode (save and continue to next)
ESC - Stop recording session - Re-record episode
ESC - Stop recording and push dataset to hub
Usage: Usage:
python examples/rac/rac_data_collection_openarms.py \ python examples/rac/rac_data_collection_openarms.py \
@@ -135,7 +137,8 @@ def init_rac_keyboard_listener():
"exit_early": False, "exit_early": False,
"rerecord_episode": False, "rerecord_episode": False,
"stop_recording": False, "stop_recording": False,
"intervention_active": False, "policy_paused": False, # SPACE pressed - policy paused, teleop tracking robot
"correction_active": False, # 'c' pressed - human controlling, recording correction
} }
if is_headless(): if is_headless():
@@ -147,12 +150,15 @@ def init_rac_keyboard_listener():
def on_press(key): def on_press(key):
try: try:
if key == keyboard.Key.space: if key == keyboard.Key.space:
if not events["intervention_active"]: if not events["policy_paused"] and not events["correction_active"]:
print("\n[RaC] ▶ INTERVENTION - You have control") print("\n[RaC] ⏸ PAUSED - Policy stopped, teleop tracking robot")
print(" 1. Teleoperate robot back to good state (RECOVERY)") print(" Press 'c' to take control and start correction")
print(" 2. Complete the subtask (CORRECTION)") events["policy_paused"] = True
print(" 3. Press → when done") elif hasattr(key, 'char') and key.char == 'c':
events["intervention_active"] = True if events["policy_paused"] and not events["correction_active"]:
print("\n[RaC] ▶ CORRECTION - You have control (recording)")
print(" Teleoperate to correct, press → when done")
events["correction_active"] = True
elif key == keyboard.Key.right: elif key == keyboard.Key.right:
print("[RaC] → End episode") print("[RaC] → End episode")
events["exit_early"] = True events["exit_early"] = True
@@ -161,7 +167,7 @@ def init_rac_keyboard_listener():
events["rerecord_episode"] = True events["rerecord_episode"] = True
events["exit_early"] = True events["exit_early"] = True
elif key == keyboard.Key.esc: elif key == keyboard.Key.esc:
print("[RaC] ESC - Stop recording session") print("[RaC] ESC - Stop recording, pushing to hub...")
events["stop_recording"] = True events["stop_recording"] = True
events["exit_early"] = True events["exit_early"] = True
except Exception as e: except Exception as e:
@@ -207,13 +213,14 @@ def rac_rollout_loop(
display_data: bool = True, display_data: bool = True,
) -> dict: ) -> dict:
""" """
RaC rollout loop: policy runs until intervention, then human does recovery+correction. RaC rollout loop with two-stage intervention:
The human intervention (recovery + correction) is recorded as training data.
This teaches the policy how to recover from errors.
During autonomous execution, the teleoperator mirrors the robot's position so 1. Policy runs autonomously (recording)
intervention is seamless - the teleoperator is already in the same configuration. 2. SPACE: Policy pauses, teleop mirrors robot position (NOT recording)
3. 'c': Human takes control, teleop torque disabled (recording correction)
4. : End episode
This allows smooth handoff - teleop tracks robot until human is ready.
""" """
policy.reset() policy.reset()
preprocessor.reset() preprocessor.reset()
@@ -225,13 +232,13 @@ def rac_rollout_loop(
stats = { stats = {
"total_frames": 0, "total_frames": 0,
"autonomous_frames": 0, "autonomous_frames": 0,
"human_frames": 0, "paused_frames": 0,
"intervention_occurred": False, "correction_frames": 0,
} }
# Enable torque on teleoperator so it can track robot position # Enable torque on teleoperator so it can track robot position
teleop.enable_torque() teleop.enable_torque()
was_intervention_active = False was_correction_active = False
timestamp = 0 timestamp = 0
start_t = time.perf_counter() start_t = time.perf_counter()
@@ -241,20 +248,41 @@ def rac_rollout_loop(
if events["exit_early"]: if events["exit_early"]:
events["exit_early"] = False events["exit_early"] = False
events["intervention_active"] = False events["policy_paused"] = False
events["correction_active"] = False
break break
# Detect transition to intervention mode # Detect transition to correction mode (human takes control)
if events["intervention_active"] and not was_intervention_active: if events["correction_active"] and not was_correction_active:
teleop.disable_torque() teleop.disable_torque()
was_intervention_active = True was_correction_active = True
obs = robot.get_observation() obs = robot.get_observation()
# Filter to only include keys in observation_features (excludes .vel, .torque, _timing, etc.)
obs_filtered = {k: v for k, v in obs.items() if k in robot.observation_features} obs_filtered = {k: v for k, v in obs.items() if k in robot.observation_features}
obs_frame = build_dataset_frame(dataset.features, obs_filtered, prefix=OBS_STR) obs_frame = build_dataset_frame(dataset.features, obs_filtered, prefix=OBS_STR)
if not events["intervention_active"]: if events["correction_active"]:
# Human controlling - record correction data
robot_action = teleop.get_action()
robot.send_action(robot_action)
stats["correction_frames"] += 1
# Record this frame
action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
frame = {**obs_frame, **action_frame, "task": single_task}
frame_buffer.append(frame)
stats["total_frames"] += 1
elif events["policy_paused"]:
# Paused - teleop tracks robot, but don't record
# Read current robot position and send to teleop
robot_action = {k: v for k, v in obs_filtered.items() if k.endswith(".pos")}
teleop.send_feedback(robot_action)
stats["paused_frames"] += 1
# Don't record frames during pause
else:
# Normal policy execution - record
action_values = predict_action( action_values = predict_action(
observation=obs_frame, observation=obs_frame,
policy=policy, policy=policy,
@@ -266,22 +294,17 @@ def rac_rollout_loop(
robot_type=robot.robot_type, robot_type=robot.robot_type,
) )
robot_action: RobotAction = make_robot_action(action_values, dataset.features) robot_action: RobotAction = make_robot_action(action_values, dataset.features)
robot.send_action(robot_action)
stats["autonomous_frames"] += 1 stats["autonomous_frames"] += 1
# Mirror robot position to teleoperator for seamless intervention # Mirror robot position to teleoperator
teleop.send_feedback(robot_action) teleop.send_feedback(robot_action)
else:
stats["intervention_occurred"] = True # Record this frame
robot_action = teleop.get_action() action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
action_values = robot_action frame = {**obs_frame, **action_frame, "task": single_task}
stats["human_frames"] += 1 frame_buffer.append(frame)
stats["total_frames"] += 1
robot.send_action(robot_action)
action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
frame = {**obs_frame, **action_frame, "task": single_task}
frame_buffer.append(frame)
stats["total_frames"] += 1
if display_data: if display_data:
log_rerun_data(observation=obs_filtered, action=robot_action) log_rerun_data(observation=obs_filtered, action=robot_action)
@@ -400,9 +423,11 @@ def rac_collect(cfg: RaCConfig) -> LeRobotDataset:
print(" Policy runs autonomously until you intervene.") print(" Policy runs autonomously until you intervene.")
print() print()
print(" Controls:") print(" Controls:")
print(" SPACE - Intervene (take control)") print(" SPACE - Pause policy (teleop tracks robot, no recording)")
print(" c - Take control (start correction, recording)")
print(" → - End episode (save)") print(" → - End episode (save)")
print(" ESC - Stop recording session") print(" - Re-record episode")
print(" ESC - Stop session and push to hub")
print("=" * 65 + "\n") print("=" * 65 + "\n")
with VideoEncodingManager(dataset): with VideoEncodingManager(dataset):