From f15872293d04b4229c05c4bc9b63ea2f1a97f073 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Wed, 31 Dec 2025 12:24:43 +0100 Subject: [PATCH] Only move teleop after space press --- docs/source/rac.mdx | 22 ++-- examples/rac/rac_data_collection.py | 97 +++++++++++------ examples/rac/rac_data_collection_openarms.py | 105 ++++++++++++------- 3 files changed, 143 insertions(+), 81 deletions(-) diff --git a/docs/source/rac.mdx b/docs/source/rac.mdx index 022637ebf..7da574c92 100644 --- a/docs/source/rac.mdx +++ b/docs/source/rac.mdx @@ -126,19 +126,27 @@ python examples/rac/rac_data_collection.py \ | Key | Action | |-----|--------| -| **SPACE** | Start intervention (take control) | +| **SPACE** | Pause policy (teleop mirrors robot, no recording) | +| **c** | Take control (start correction, recording resumes) | | **→** | End episode (save) | -| **ESC** | Stop recording session | +| **←** | Re-record episode | +| **ESC** | Stop session and push to hub | **The RaC Protocol:** 1. Watch the policy run autonomously -2. When you see imminent failure, press **SPACE** to intervene -3. **RECOVERY**: Teleoperate the robot back to a good in-distribution state -4. **CORRECTION**: Use teleoperator to complete the subtask -5. Press **→** to save and end episode +2. When you see imminent failure, press **SPACE** to pause + - Policy stops, teleoperator mirrors robot position + - Allows smooth handoff without jarring movements + - No frames recorded during pause +3. Press **c** to take control + - Teleoperator is now free to move + - **RECOVERY**: Teleoperate back to a good state + - **CORRECTION**: Complete the subtask + - All movements are recorded +4. Press **→** to save and end episode -The recovery segment (teleoperating back to good state) is recorded as training data - this teaches the policy how to recover from errors. +The recovery and correction segments teach the policy how to recover from errors. ### Step 3: (Optional) Compute SARM Rewards diff --git a/examples/rac/rac_data_collection.py b/examples/rac/rac_data_collection.py index 2f4018481..f9f929a49 100644 --- a/examples/rac/rac_data_collection.py +++ b/examples/rac/rac_data_collection.py @@ -23,9 +23,11 @@ The recovery segment (teleoperating back to good state) is recorded as training this teaches the policy how to recover from errors. Keyboard Controls: - SPACE - Start intervention (policy stops, human takes over) + SPACE - Pause policy (robot holds position, no recording) + c - Take control (start correction, recording resumes) → - End episode (save and continue to next) - ESC - Stop recording session + ← - Re-record episode + ESC - Stop recording and push dataset to hub Usage: python examples/rac/rac_data_collection.py \ @@ -129,7 +131,8 @@ def init_rac_keyboard_listener(): "exit_early": False, "rerecord_episode": False, "stop_recording": False, - "intervention_active": False, + "policy_paused": False, # SPACE pressed - policy paused, teleop tracking robot + "correction_active": False, # 'c' pressed - human controlling, recording correction } if is_headless(): @@ -141,12 +144,15 @@ def init_rac_keyboard_listener(): def on_press(key): try: if key == keyboard.Key.space: - if not events["intervention_active"]: - print("\n[RaC] ▶ INTERVENTION - You have control") - print(" 1. Teleoperate robot back to good state (RECOVERY)") - print(" 2. Complete the subtask (CORRECTION)") - print(" 3. Press → when done") - events["intervention_active"] = True + if not events["policy_paused"] and not events["correction_active"]: + print("\n[RaC] ⏸ PAUSED - Policy stopped, teleop tracking robot") + print(" Press 'c' to take control and start correction") + events["policy_paused"] = True + elif hasattr(key, 'char') and key.char == 'c': + if events["policy_paused"] and not events["correction_active"]: + print("\n[RaC] ▶ CORRECTION - You have control (recording)") + print(" Teleoperate to correct, press → when done") + events["correction_active"] = True elif key == keyboard.Key.right: print("[RaC] → End episode") events["exit_early"] = True @@ -155,7 +161,7 @@ def init_rac_keyboard_listener(): events["rerecord_episode"] = True events["exit_early"] = True elif key == keyboard.Key.esc: - print("[RaC] ESC - Stop recording session") + print("[RaC] ESC - Stop recording, pushing to hub...") events["stop_recording"] = True events["exit_early"] = True except Exception as e: @@ -201,10 +207,12 @@ def rac_rollout_loop( display_data: bool = True, ) -> dict: """ - RaC rollout loop: policy runs until intervention, then human does recovery+correction. - - The human intervention (recovery + correction) is recorded as training data. - This teaches the policy how to recover from errors. + RaC rollout loop with two-stage intervention: + + 1. Policy runs autonomously (recording) + 2. SPACE: Policy pauses (NOT recording) - robot holds position + 3. 'c': Human takes control (recording correction) + 4. →: End episode """ policy.reset() preprocessor.reset() @@ -216,10 +224,11 @@ def rac_rollout_loop( stats = { "total_frames": 0, "autonomous_frames": 0, - "human_frames": 0, - "intervention_occurred": False, + "paused_frames": 0, + "correction_frames": 0, } + last_robot_action = None timestamp = 0 start_t = time.perf_counter() @@ -228,13 +237,35 @@ def rac_rollout_loop( if events["exit_early"]: events["exit_early"] = False - events["intervention_active"] = False + events["policy_paused"] = False + events["correction_active"] = False break obs = robot.get_observation() obs_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR) - if not events["intervention_active"]: + if events["correction_active"]: + # Human controlling - record correction data + robot_action = teleop.get_action() + robot.send_action(robot_action) + stats["correction_frames"] += 1 + + # Record this frame + action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION) + frame = {**obs_frame, **action_frame, "task": single_task} + frame_buffer.append(frame) + stats["total_frames"] += 1 + + elif events["policy_paused"]: + # Paused - hold last position, don't record + if last_robot_action is not None: + robot.send_action(last_robot_action) + stats["paused_frames"] += 1 + robot_action = last_robot_action + # Don't record frames during pause + + else: + # Normal policy execution - record action_values = predict_action( observation=obs_frame, policy=policy, @@ -246,22 +277,18 @@ def rac_rollout_loop( robot_type=robot.robot_type, ) robot_action: RobotAction = make_robot_action(action_values, dataset.features) + robot.send_action(robot_action) + last_robot_action = robot_action stats["autonomous_frames"] += 1 - else: - stats["intervention_occurred"] = True - robot_action = teleop.get_action() - action_values = robot_action - stats["human_frames"] += 1 + + # Record this frame + action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION) + frame = {**obs_frame, **action_frame, "task": single_task} + frame_buffer.append(frame) + stats["total_frames"] += 1 - robot.send_action(robot_action) - - action_frame = build_dataset_frame(dataset.features, action_values, prefix=ACTION) - frame = {**obs_frame, **action_frame, "task": single_task} - frame_buffer.append(frame) - stats["total_frames"] += 1 - - if display_data: - log_rerun_data(observation=obs, action=action_values) + if display_data and robot_action is not None: + log_rerun_data(observation=obs, action=robot_action) dt = time.perf_counter() - loop_start precise_sleep(1 / fps - dt) @@ -374,9 +401,11 @@ def rac_collect(cfg: RaCConfig) -> LeRobotDataset: print(" Policy runs autonomously until you intervene.") print() print(" Controls:") - print(" SPACE - Intervene (take control)") + print(" SPACE - Pause policy (robot holds position, no recording)") + print(" c - Take control (start correction, recording)") print(" → - End episode (save)") - print(" ESC - Stop recording session") + print(" ← - Re-record episode") + print(" ESC - Stop session and push to hub") print("=" * 65 + "\n") with VideoEncodingManager(dataset): diff --git a/examples/rac/rac_data_collection_openarms.py b/examples/rac/rac_data_collection_openarms.py index 8e53fa2ff..351b44c4d 100644 --- a/examples/rac/rac_data_collection_openarms.py +++ b/examples/rac/rac_data_collection_openarms.py @@ -23,9 +23,11 @@ The recovery segment (teleoperating back to good state) is recorded as training this teaches the policy how to recover from errors. Keyboard Controls: - SPACE - Start intervention (policy stops, human takes over) + SPACE - Pause policy (teleop mirrors robot, no recording) + c - Take control (teleop free, recording correction) → - End episode (save and continue to next) - ESC - Stop recording session + ← - Re-record episode + ESC - Stop recording and push dataset to hub Usage: python examples/rac/rac_data_collection_openarms.py \ @@ -135,7 +137,8 @@ def init_rac_keyboard_listener(): "exit_early": False, "rerecord_episode": False, "stop_recording": False, - "intervention_active": False, + "policy_paused": False, # SPACE pressed - policy paused, teleop tracking robot + "correction_active": False, # 'c' pressed - human controlling, recording correction } if is_headless(): @@ -147,12 +150,15 @@ def init_rac_keyboard_listener(): def on_press(key): try: if key == keyboard.Key.space: - if not events["intervention_active"]: - print("\n[RaC] ▶ INTERVENTION - You have control") - print(" 1. Teleoperate robot back to good state (RECOVERY)") - print(" 2. Complete the subtask (CORRECTION)") - print(" 3. Press → when done") - events["intervention_active"] = True + if not events["policy_paused"] and not events["correction_active"]: + print("\n[RaC] ⏸ PAUSED - Policy stopped, teleop tracking robot") + print(" Press 'c' to take control and start correction") + events["policy_paused"] = True + elif hasattr(key, 'char') and key.char == 'c': + if events["policy_paused"] and not events["correction_active"]: + print("\n[RaC] ▶ CORRECTION - You have control (recording)") + print(" Teleoperate to correct, press → when done") + events["correction_active"] = True elif key == keyboard.Key.right: print("[RaC] → End episode") events["exit_early"] = True @@ -161,7 +167,7 @@ def init_rac_keyboard_listener(): events["rerecord_episode"] = True events["exit_early"] = True elif key == keyboard.Key.esc: - print("[RaC] ESC - Stop recording session") + print("[RaC] ESC - Stop recording, pushing to hub...") events["stop_recording"] = True events["exit_early"] = True except Exception as e: @@ -207,13 +213,14 @@ def rac_rollout_loop( display_data: bool = True, ) -> dict: """ - RaC rollout loop: policy runs until intervention, then human does recovery+correction. - - The human intervention (recovery + correction) is recorded as training data. - This teaches the policy how to recover from errors. + RaC rollout loop with two-stage intervention: - During autonomous execution, the teleoperator mirrors the robot's position so - intervention is seamless - the teleoperator is already in the same configuration. + 1. Policy runs autonomously (recording) + 2. SPACE: Policy pauses, teleop mirrors robot position (NOT recording) + 3. 'c': Human takes control, teleop torque disabled (recording correction) + 4. →: End episode + + This allows smooth handoff - teleop tracks robot until human is ready. """ policy.reset() preprocessor.reset() @@ -225,13 +232,13 @@ def rac_rollout_loop( stats = { "total_frames": 0, "autonomous_frames": 0, - "human_frames": 0, - "intervention_occurred": False, + "paused_frames": 0, + "correction_frames": 0, } # Enable torque on teleoperator so it can track robot position teleop.enable_torque() - was_intervention_active = False + was_correction_active = False timestamp = 0 start_t = time.perf_counter() @@ -241,20 +248,41 @@ def rac_rollout_loop( if events["exit_early"]: events["exit_early"] = False - events["intervention_active"] = False + events["policy_paused"] = False + events["correction_active"] = False break - # Detect transition to intervention mode - if events["intervention_active"] and not was_intervention_active: + # Detect transition to correction mode (human takes control) + if events["correction_active"] and not was_correction_active: teleop.disable_torque() - was_intervention_active = True + was_correction_active = True obs = robot.get_observation() - # Filter to only include keys in observation_features (excludes .vel, .torque, _timing, etc.) obs_filtered = {k: v for k, v in obs.items() if k in robot.observation_features} obs_frame = build_dataset_frame(dataset.features, obs_filtered, prefix=OBS_STR) - if not events["intervention_active"]: + if events["correction_active"]: + # Human controlling - record correction data + robot_action = teleop.get_action() + robot.send_action(robot_action) + stats["correction_frames"] += 1 + + # Record this frame + action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION) + frame = {**obs_frame, **action_frame, "task": single_task} + frame_buffer.append(frame) + stats["total_frames"] += 1 + + elif events["policy_paused"]: + # Paused - teleop tracks robot, but don't record + # Read current robot position and send to teleop + robot_action = {k: v for k, v in obs_filtered.items() if k.endswith(".pos")} + teleop.send_feedback(robot_action) + stats["paused_frames"] += 1 + # Don't record frames during pause + + else: + # Normal policy execution - record action_values = predict_action( observation=obs_frame, policy=policy, @@ -266,22 +294,17 @@ def rac_rollout_loop( robot_type=robot.robot_type, ) robot_action: RobotAction = make_robot_action(action_values, dataset.features) + robot.send_action(robot_action) stats["autonomous_frames"] += 1 - # Mirror robot position to teleoperator for seamless intervention + # Mirror robot position to teleoperator teleop.send_feedback(robot_action) - else: - stats["intervention_occurred"] = True - robot_action = teleop.get_action() - action_values = robot_action - stats["human_frames"] += 1 - - robot.send_action(robot_action) - - action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION) - frame = {**obs_frame, **action_frame, "task": single_task} - frame_buffer.append(frame) - stats["total_frames"] += 1 + + # Record this frame + action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION) + frame = {**obs_frame, **action_frame, "task": single_task} + frame_buffer.append(frame) + stats["total_frames"] += 1 if display_data: log_rerun_data(observation=obs_filtered, action=robot_action) @@ -400,9 +423,11 @@ def rac_collect(cfg: RaCConfig) -> LeRobotDataset: print(" Policy runs autonomously until you intervene.") print() print(" Controls:") - print(" SPACE - Intervene (take control)") + print(" SPACE - Pause policy (teleop tracks robot, no recording)") + print(" c - Take control (start correction, recording)") print(" → - End episode (save)") - print(" ESC - Stop recording session") + print(" ← - Re-record episode") + print(" ESC - Stop session and push to hub") print("=" * 65 + "\n") with VideoEncodingManager(dataset):