From f15872293d04b4229c05c4bc9b63ea2f1a97f073 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Wed, 31 Dec 2025 12:24:43 +0100
Subject: [PATCH] Only move teleop after space press

---
 docs/source/rac.mdx                          |  22 ++--
 examples/rac/rac_data_collection.py          |  97 +++++++++++------
 examples/rac/rac_data_collection_openarms.py | 105 ++++++++++++-------
 3 files changed, 143 insertions(+), 81 deletions(-)

diff --git a/docs/source/rac.mdx b/docs/source/rac.mdx
index 022637ebf..7da574c92 100644
--- a/docs/source/rac.mdx
+++ b/docs/source/rac.mdx
@@ -126,19 +126,27 @@ python examples/rac/rac_data_collection.py \
 
 | Key | Action |
 |-----|--------|
-| **SPACE** | Start intervention (take control) |
+| **SPACE** | Pause policy (teleop mirrors robot, no recording) |
+| **c** | Take control (start correction, recording resumes) |
 | **→** | End episode (save) |
-| **ESC** | Stop recording session |
+| **←** | Re-record episode |
+| **ESC** | Stop session and push to hub |
 
 **The RaC Protocol:**
 
 1. Watch the policy run autonomously
-2. When you see imminent failure, press **SPACE** to intervene
-3. **RECOVERY**: Teleoperate the robot back to a good in-distribution state
-4. **CORRECTION**: Use teleoperator to complete the subtask
-5. Press **→** to save and end episode
+2. When you see imminent failure, press **SPACE** to pause
+   - Policy stops, teleoperator mirrors robot position
+   - Allows smooth handoff without jarring movements
+   - No frames recorded during pause
+3. Press **c** to take control
+   - Teleoperator is now free to move
+   - **RECOVERY**: Teleoperate back to a good state
+   - **CORRECTION**: Complete the subtask
+   - All movements are recorded
+4. Press **→** to save and end episode
 
-The recovery segment (teleoperating back to good state) is recorded as training data - this teaches the policy how to recover from errors.
+The recovery and correction segments teach the policy how to recover from errors.
 
 ### Step 3: (Optional) Compute SARM Rewards
 
diff --git a/examples/rac/rac_data_collection.py b/examples/rac/rac_data_collection.py
index 2f4018481..f9f929a49 100644
--- a/examples/rac/rac_data_collection.py
+++ b/examples/rac/rac_data_collection.py
@@ -23,9 +23,11 @@ The recovery segment (teleoperating back to good state) is recorded as training
 this teaches the policy how to recover from errors.
 
 Keyboard Controls:
-    SPACE  - Start intervention (policy stops, human takes over)
+    SPACE  - Pause policy (robot holds position, no recording)
+    c      - Take control (start correction, recording resumes)
     →      - End episode (save and continue to next)
-    ESC    - Stop recording session
+    ←      - Re-record episode
+    ESC    - Stop recording and push dataset to hub
 
 Usage:
     python examples/rac/rac_data_collection.py \
@@ -129,7 +131,8 @@ def init_rac_keyboard_listener():
         "exit_early": False,
         "rerecord_episode": False,
         "stop_recording": False,
-        "intervention_active": False,
+        "policy_paused": False,      # SPACE pressed - policy paused, teleop tracking robot
+        "correction_active": False,  # 'c' pressed - human controlling, recording correction
     }
 
     if is_headless():
@@ -141,12 +144,15 @@ def init_rac_keyboard_listener():
     def on_press(key):
         try:
             if key == keyboard.Key.space:
-                if not events["intervention_active"]:
-                    print("\n[RaC] ▶ INTERVENTION - You have control")
-                    print("      1. Teleoperate robot back to good state (RECOVERY)")
-                    print("      2. Complete the subtask (CORRECTION)")
-                    print("      3. Press → when done")
-                    events["intervention_active"] = True
+                if not events["policy_paused"] and not events["correction_active"]:
+                    print("\n[RaC] ⏸ PAUSED - Policy stopped, teleop tracking robot")
+                    print("      Press 'c' to take control and start correction")
+                    events["policy_paused"] = True
+            elif hasattr(key, 'char') and key.char == 'c':
+                if events["policy_paused"] and not events["correction_active"]:
+                    print("\n[RaC] ▶ CORRECTION - You have control (recording)")
+                    print("      Teleoperate to correct, press → when done")
+                    events["correction_active"] = True
             elif key == keyboard.Key.right:
                 print("[RaC] → End episode")
                 events["exit_early"] = True
@@ -155,7 +161,7 @@ def init_rac_keyboard_listener():
                 events["rerecord_episode"] = True
                 events["exit_early"] = True
             elif key == keyboard.Key.esc:
-                print("[RaC] ESC - Stop recording session")
+                print("[RaC] ESC - Stop recording, pushing to hub...")
                 events["stop_recording"] = True
                 events["exit_early"] = True
         except Exception as e:
@@ -201,10 +207,12 @@ def rac_rollout_loop(
     display_data: bool = True,
 ) -> dict:
     """
-    RaC rollout loop: policy runs until intervention, then human does recovery+correction.
-
-    The human intervention (recovery + correction) is recorded as training data.
-    This teaches the policy how to recover from errors.
+    RaC rollout loop with two-stage intervention:
+    
+    1. Policy runs autonomously (recording)
+    2. SPACE: Policy pauses (NOT recording) - robot holds position
+    3. 'c': Human takes control (recording correction)
+    4. →: End episode
     """
     policy.reset()
     preprocessor.reset()
@@ -216,10 +224,11 @@ def rac_rollout_loop(
     stats = {
         "total_frames": 0,
         "autonomous_frames": 0,
-        "human_frames": 0,
-        "intervention_occurred": False,
+        "paused_frames": 0,
+        "correction_frames": 0,
     }
 
+    last_robot_action = None
     timestamp = 0
     start_t = time.perf_counter()
 
@@ -228,13 +237,35 @@ def rac_rollout_loop(
 
         if events["exit_early"]:
             events["exit_early"] = False
-            events["intervention_active"] = False
+            events["policy_paused"] = False
+            events["correction_active"] = False
             break
 
         obs = robot.get_observation()
         obs_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR)
 
-        if not events["intervention_active"]:
+        if events["correction_active"]:
+            # Human controlling - record correction data
+            robot_action = teleop.get_action()
+            robot.send_action(robot_action)
+            stats["correction_frames"] += 1
+            
+            # Record this frame
+            action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
+            frame = {**obs_frame, **action_frame, "task": single_task}
+            frame_buffer.append(frame)
+            stats["total_frames"] += 1
+            
+        elif events["policy_paused"]:
+            # Paused - hold last position, don't record
+            if last_robot_action is not None:
+                robot.send_action(last_robot_action)
+            stats["paused_frames"] += 1
+            robot_action = last_robot_action
+            # Don't record frames during pause
+            
+        else:
+            # Normal policy execution - record
             action_values = predict_action(
                 observation=obs_frame,
                 policy=policy,
@@ -246,22 +277,18 @@ def rac_rollout_loop(
                 robot_type=robot.robot_type,
             )
             robot_action: RobotAction = make_robot_action(action_values, dataset.features)
+            robot.send_action(robot_action)
+            last_robot_action = robot_action
             stats["autonomous_frames"] += 1
-        else:
-            stats["intervention_occurred"] = True
-            robot_action = teleop.get_action()
-            action_values = robot_action
-            stats["human_frames"] += 1
+            
+            # Record this frame
+            action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
+            frame = {**obs_frame, **action_frame, "task": single_task}
+            frame_buffer.append(frame)
+            stats["total_frames"] += 1
 
-        robot.send_action(robot_action)
-
-        action_frame = build_dataset_frame(dataset.features, action_values, prefix=ACTION)
-        frame = {**obs_frame, **action_frame, "task": single_task}
-        frame_buffer.append(frame)
-        stats["total_frames"] += 1
-
-        if display_data:
-            log_rerun_data(observation=obs, action=action_values)
+        if display_data and robot_action is not None:
+            log_rerun_data(observation=obs, action=robot_action)
 
         dt = time.perf_counter() - loop_start
         precise_sleep(1 / fps - dt)
@@ -374,9 +401,11 @@ def rac_collect(cfg: RaCConfig) -> LeRobotDataset:
         print("  Policy runs autonomously until you intervene.")
         print()
         print("  Controls:")
-        print("    SPACE  - Intervene (take control)")
+        print("    SPACE  - Pause policy (robot holds position, no recording)")
+        print("    c      - Take control (start correction, recording)")
         print("    →      - End episode (save)")
-        print("    ESC    - Stop recording session")
+        print("    ←      - Re-record episode")
+        print("    ESC    - Stop session and push to hub")
         print("=" * 65 + "\n")
 
         with VideoEncodingManager(dataset):
diff --git a/examples/rac/rac_data_collection_openarms.py b/examples/rac/rac_data_collection_openarms.py
index 8e53fa2ff..351b44c4d 100644
--- a/examples/rac/rac_data_collection_openarms.py
+++ b/examples/rac/rac_data_collection_openarms.py
@@ -23,9 +23,11 @@ The recovery segment (teleoperating back to good state) is recorded as training
 this teaches the policy how to recover from errors.
 
 Keyboard Controls:
-    SPACE  - Start intervention (policy stops, human takes over)
+    SPACE  - Pause policy (teleop mirrors robot, no recording)
+    c      - Take control (teleop free, recording correction)
     →      - End episode (save and continue to next)
-    ESC    - Stop recording session
+    ←      - Re-record episode
+    ESC    - Stop recording and push dataset to hub
 
 Usage:
     python examples/rac/rac_data_collection_openarms.py \
@@ -135,7 +137,8 @@ def init_rac_keyboard_listener():
         "exit_early": False,
         "rerecord_episode": False,
         "stop_recording": False,
-        "intervention_active": False,
+        "policy_paused": False,      # SPACE pressed - policy paused, teleop tracking robot
+        "correction_active": False,  # 'c' pressed - human controlling, recording correction
     }
 
     if is_headless():
@@ -147,12 +150,15 @@ def init_rac_keyboard_listener():
     def on_press(key):
         try:
             if key == keyboard.Key.space:
-                if not events["intervention_active"]:
-                    print("\n[RaC] ▶ INTERVENTION - You have control")
-                    print("      1. Teleoperate robot back to good state (RECOVERY)")
-                    print("      2. Complete the subtask (CORRECTION)")
-                    print("      3. Press → when done")
-                    events["intervention_active"] = True
+                if not events["policy_paused"] and not events["correction_active"]:
+                    print("\n[RaC] ⏸ PAUSED - Policy stopped, teleop tracking robot")
+                    print("      Press 'c' to take control and start correction")
+                    events["policy_paused"] = True
+            elif hasattr(key, 'char') and key.char == 'c':
+                if events["policy_paused"] and not events["correction_active"]:
+                    print("\n[RaC] ▶ CORRECTION - You have control (recording)")
+                    print("      Teleoperate to correct, press → when done")
+                    events["correction_active"] = True
             elif key == keyboard.Key.right:
                 print("[RaC] → End episode")
                 events["exit_early"] = True
@@ -161,7 +167,7 @@ def init_rac_keyboard_listener():
                 events["rerecord_episode"] = True
                 events["exit_early"] = True
             elif key == keyboard.Key.esc:
-                print("[RaC] ESC - Stop recording session")
+                print("[RaC] ESC - Stop recording, pushing to hub...")
                 events["stop_recording"] = True
                 events["exit_early"] = True
         except Exception as e:
@@ -207,13 +213,14 @@ def rac_rollout_loop(
     display_data: bool = True,
 ) -> dict:
     """
-    RaC rollout loop: policy runs until intervention, then human does recovery+correction.
-
-    The human intervention (recovery + correction) is recorded as training data.
-    This teaches the policy how to recover from errors.
+    RaC rollout loop with two-stage intervention:
     
-    During autonomous execution, the teleoperator mirrors the robot's position so
-    intervention is seamless - the teleoperator is already in the same configuration.
+    1. Policy runs autonomously (recording)
+    2. SPACE: Policy pauses, teleop mirrors robot position (NOT recording)
+    3. 'c': Human takes control, teleop torque disabled (recording correction)
+    4. →: End episode
+    
+    This allows smooth handoff - teleop tracks robot until human is ready.
     """
     policy.reset()
     preprocessor.reset()
@@ -225,13 +232,13 @@ def rac_rollout_loop(
     stats = {
         "total_frames": 0,
         "autonomous_frames": 0,
-        "human_frames": 0,
-        "intervention_occurred": False,
+        "paused_frames": 0,
+        "correction_frames": 0,
     }
 
     # Enable torque on teleoperator so it can track robot position
     teleop.enable_torque()
-    was_intervention_active = False
+    was_correction_active = False
 
     timestamp = 0
     start_t = time.perf_counter()
@@ -241,20 +248,41 @@ def rac_rollout_loop(
 
         if events["exit_early"]:
             events["exit_early"] = False
-            events["intervention_active"] = False
+            events["policy_paused"] = False
+            events["correction_active"] = False
             break
 
-        # Detect transition to intervention mode
-        if events["intervention_active"] and not was_intervention_active:
+        # Detect transition to correction mode (human takes control)
+        if events["correction_active"] and not was_correction_active:
             teleop.disable_torque()
-            was_intervention_active = True
+            was_correction_active = True
 
         obs = robot.get_observation()
-        # Filter to only include keys in observation_features (excludes .vel, .torque, _timing, etc.)
         obs_filtered = {k: v for k, v in obs.items() if k in robot.observation_features}
         obs_frame = build_dataset_frame(dataset.features, obs_filtered, prefix=OBS_STR)
 
-        if not events["intervention_active"]:
+        if events["correction_active"]:
+            # Human controlling - record correction data
+            robot_action = teleop.get_action()
+            robot.send_action(robot_action)
+            stats["correction_frames"] += 1
+            
+            # Record this frame
+            action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
+            frame = {**obs_frame, **action_frame, "task": single_task}
+            frame_buffer.append(frame)
+            stats["total_frames"] += 1
+            
+        elif events["policy_paused"]:
+            # Paused - teleop tracks robot, but don't record
+            # Read current robot position and send to teleop
+            robot_action = {k: v for k, v in obs_filtered.items() if k.endswith(".pos")}
+            teleop.send_feedback(robot_action)
+            stats["paused_frames"] += 1
+            # Don't record frames during pause
+            
+        else:
+            # Normal policy execution - record
             action_values = predict_action(
                 observation=obs_frame,
                 policy=policy,
@@ -266,22 +294,17 @@ def rac_rollout_loop(
                 robot_type=robot.robot_type,
             )
             robot_action: RobotAction = make_robot_action(action_values, dataset.features)
+            robot.send_action(robot_action)
             stats["autonomous_frames"] += 1
             
-            # Mirror robot position to teleoperator for seamless intervention
+            # Mirror robot position to teleoperator
             teleop.send_feedback(robot_action)
-        else:
-            stats["intervention_occurred"] = True
-            robot_action = teleop.get_action()
-            action_values = robot_action
-            stats["human_frames"] += 1
-
-        robot.send_action(robot_action)
-
-        action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
-        frame = {**obs_frame, **action_frame, "task": single_task}
-        frame_buffer.append(frame)
-        stats["total_frames"] += 1
+            
+            # Record this frame
+            action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
+            frame = {**obs_frame, **action_frame, "task": single_task}
+            frame_buffer.append(frame)
+            stats["total_frames"] += 1
 
         if display_data:
             log_rerun_data(observation=obs_filtered, action=robot_action)
@@ -400,9 +423,11 @@ def rac_collect(cfg: RaCConfig) -> LeRobotDataset:
         print("  Policy runs autonomously until you intervene.")
         print()
         print("  Controls:")
-        print("    SPACE  - Intervene (take control)")
+        print("    SPACE  - Pause policy (teleop tracks robot, no recording)")
+        print("    c      - Take control (start correction, recording)")
         print("    →      - End episode (save)")
-        print("    ESC    - Stop recording session")
+        print("    ←      - Re-record episode")
+        print("    ESC    - Stop session and push to hub")
         print("=" * 65 + "\n")
 
         with VideoEncodingManager(dataset):