Only move teleop after space press

2026-05-15 16:49:55 +00:00 · 2025-12-31 12:24:43 +01:00
parent a97255e3d1
commit f15872293d
3 changed files with 143 additions and 81 deletions
@@ -23,9 +23,11 @@ The recovery segment (teleoperating back to good state) is recorded as training
 this teaches the policy how to recover from errors.

 Keyboard Controls:
-    SPACE  - Start intervention (policy stops, human takes over)
+    SPACE  - Pause policy (robot holds position, no recording)
+    c      - Take control (start correction, recording resumes)
    →      - End episode (save and continue to next)
-    ESC    - Stop recording session
+    ←      - Re-record episode
+    ESC    - Stop recording and push dataset to hub

 Usage:
    python examples/rac/rac_data_collection.py \
@@ -129,7 +131,8 @@ def init_rac_keyboard_listener():
        "exit_early": False,
        "rerecord_episode": False,
        "stop_recording": False,
-        "intervention_active": False,
+        "policy_paused": False,      # SPACE pressed - policy paused, teleop tracking robot
+        "correction_active": False,  # 'c' pressed - human controlling, recording correction
    }

    if is_headless():
@@ -141,12 +144,15 @@ def init_rac_keyboard_listener():
    def on_press(key):
        try:
            if key == keyboard.Key.space:
-                if not events["intervention_active"]:
-                    print("\n[RaC] ▶ INTERVENTION - You have control")
-                    print("      1. Teleoperate robot back to good state (RECOVERY)")
-                    print("      2. Complete the subtask (CORRECTION)")
-                    print("      3. Press → when done")
-                    events["intervention_active"] = True
+                if not events["policy_paused"] and not events["correction_active"]:
+                    print("\n[RaC] ⏸ PAUSED - Policy stopped, teleop tracking robot")
+                    print("      Press 'c' to take control and start correction")
+                    events["policy_paused"] = True
+            elif hasattr(key, 'char') and key.char == 'c':
+                if events["policy_paused"] and not events["correction_active"]:
+                    print("\n[RaC] ▶ CORRECTION - You have control (recording)")
+                    print("      Teleoperate to correct, press → when done")
+                    events["correction_active"] = True
            elif key == keyboard.Key.right:
                print("[RaC] → End episode")
                events["exit_early"] = True
@@ -155,7 +161,7 @@ def init_rac_keyboard_listener():
                events["rerecord_episode"] = True
                events["exit_early"] = True
            elif key == keyboard.Key.esc:
-                print("[RaC] ESC - Stop recording session")
+                print("[RaC] ESC - Stop recording, pushing to hub...")
                events["stop_recording"] = True
                events["exit_early"] = True
        except Exception as e:
@@ -201,10 +207,12 @@ def rac_rollout_loop(
    display_data: bool = True,
 ) -> dict:
    """
-    RaC rollout loop: policy runs until intervention, then human does recovery+correction.
-
-    The human intervention (recovery + correction) is recorded as training data.
-    This teaches the policy how to recover from errors.
+    RaC rollout loop with two-stage intervention:
+    
+    1. Policy runs autonomously (recording)
+    2. SPACE: Policy pauses (NOT recording) - robot holds position
+    3. 'c': Human takes control (recording correction)
+    4. →: End episode
    """
    policy.reset()
    preprocessor.reset()
@@ -216,10 +224,11 @@ def rac_rollout_loop(
    stats = {
        "total_frames": 0,
        "autonomous_frames": 0,
-        "human_frames": 0,
-        "intervention_occurred": False,
+        "paused_frames": 0,
+        "correction_frames": 0,
    }

+    last_robot_action = None
    timestamp = 0
    start_t = time.perf_counter()

@@ -228,13 +237,35 @@ def rac_rollout_loop(

        if events["exit_early"]:
            events["exit_early"] = False
-            events["intervention_active"] = False
+            events["policy_paused"] = False
+            events["correction_active"] = False
            break

        obs = robot.get_observation()
        obs_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR)

-        if not events["intervention_active"]:
+        if events["correction_active"]:
+            # Human controlling - record correction data
+            robot_action = teleop.get_action()
+            robot.send_action(robot_action)
+            stats["correction_frames"] += 1
+            
+            # Record this frame
+            action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
+            frame = {**obs_frame, **action_frame, "task": single_task}
+            frame_buffer.append(frame)
+            stats["total_frames"] += 1
+            
+        elif events["policy_paused"]:
+            # Paused - hold last position, don't record
+            if last_robot_action is not None:
+                robot.send_action(last_robot_action)
+            stats["paused_frames"] += 1
+            robot_action = last_robot_action
+            # Don't record frames during pause
+            
+        else:
+            # Normal policy execution - record
            action_values = predict_action(
                observation=obs_frame,
                policy=policy,
@@ -246,22 +277,18 @@ def rac_rollout_loop(
                robot_type=robot.robot_type,
            )
            robot_action: RobotAction = make_robot_action(action_values, dataset.features)
+            robot.send_action(robot_action)
+            last_robot_action = robot_action
            stats["autonomous_frames"] += 1
-        else:
-            stats["intervention_occurred"] = True
-            robot_action = teleop.get_action()
-            action_values = robot_action
-            stats["human_frames"] += 1
+            
+            # Record this frame
+            action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
+            frame = {**obs_frame, **action_frame, "task": single_task}
+            frame_buffer.append(frame)
+            stats["total_frames"] += 1

-        robot.send_action(robot_action)
-
-        action_frame = build_dataset_frame(dataset.features, action_values, prefix=ACTION)
-        frame = {**obs_frame, **action_frame, "task": single_task}
-        frame_buffer.append(frame)
-        stats["total_frames"] += 1
-
-        if display_data:
-            log_rerun_data(observation=obs, action=action_values)
+        if display_data and robot_action is not None:
+            log_rerun_data(observation=obs, action=robot_action)

        dt = time.perf_counter() - loop_start
        precise_sleep(1 / fps - dt)
@@ -374,9 +401,11 @@ def rac_collect(cfg: RaCConfig) -> LeRobotDataset:
        print("  Policy runs autonomously until you intervene.")
        print()
        print("  Controls:")
-        print("    SPACE  - Intervene (take control)")
+        print("    SPACE  - Pause policy (robot holds position, no recording)")
+        print("    c      - Take control (start correction, recording)")
        print("    →      - End episode (save)")
-        print("    ESC    - Stop recording session")
+        print("    ←      - Re-record episode")
+        print("    ESC    - Stop session and push to hub")
        print("=" * 65 + "\n")

        with VideoEncodingManager(dataset):
@@ -23,9 +23,11 @@ The recovery segment (teleoperating back to good state) is recorded as training
 this teaches the policy how to recover from errors.

 Keyboard Controls:
-    SPACE  - Start intervention (policy stops, human takes over)
+    SPACE  - Pause policy (teleop mirrors robot, no recording)
+    c      - Take control (teleop free, recording correction)
    →      - End episode (save and continue to next)
-    ESC    - Stop recording session
+    ←      - Re-record episode
+    ESC    - Stop recording and push dataset to hub

 Usage:
    python examples/rac/rac_data_collection_openarms.py \
@@ -135,7 +137,8 @@ def init_rac_keyboard_listener():
        "exit_early": False,
        "rerecord_episode": False,
        "stop_recording": False,
-        "intervention_active": False,
+        "policy_paused": False,      # SPACE pressed - policy paused, teleop tracking robot
+        "correction_active": False,  # 'c' pressed - human controlling, recording correction
    }

    if is_headless():
@@ -147,12 +150,15 @@ def init_rac_keyboard_listener():
    def on_press(key):
        try:
            if key == keyboard.Key.space:
-                if not events["intervention_active"]:
-                    print("\n[RaC] ▶ INTERVENTION - You have control")
-                    print("      1. Teleoperate robot back to good state (RECOVERY)")
-                    print("      2. Complete the subtask (CORRECTION)")
-                    print("      3. Press → when done")
-                    events["intervention_active"] = True
+                if not events["policy_paused"] and not events["correction_active"]:
+                    print("\n[RaC] ⏸ PAUSED - Policy stopped, teleop tracking robot")
+                    print("      Press 'c' to take control and start correction")
+                    events["policy_paused"] = True
+            elif hasattr(key, 'char') and key.char == 'c':
+                if events["policy_paused"] and not events["correction_active"]:
+                    print("\n[RaC] ▶ CORRECTION - You have control (recording)")
+                    print("      Teleoperate to correct, press → when done")
+                    events["correction_active"] = True
            elif key == keyboard.Key.right:
                print("[RaC] → End episode")
                events["exit_early"] = True
@@ -161,7 +167,7 @@ def init_rac_keyboard_listener():
                events["rerecord_episode"] = True
                events["exit_early"] = True
            elif key == keyboard.Key.esc:
-                print("[RaC] ESC - Stop recording session")
+                print("[RaC] ESC - Stop recording, pushing to hub...")
                events["stop_recording"] = True
                events["exit_early"] = True
        except Exception as e:
@@ -207,13 +213,14 @@ def rac_rollout_loop(
    display_data: bool = True,
 ) -> dict:
    """
-    RaC rollout loop: policy runs until intervention, then human does recovery+correction.
-
-    The human intervention (recovery + correction) is recorded as training data.
-    This teaches the policy how to recover from errors.
+    RaC rollout loop with two-stage intervention:
    
-    During autonomous execution, the teleoperator mirrors the robot's position so
-    intervention is seamless - the teleoperator is already in the same configuration.
+    1. Policy runs autonomously (recording)
+    2. SPACE: Policy pauses, teleop mirrors robot position (NOT recording)
+    3. 'c': Human takes control, teleop torque disabled (recording correction)
+    4. →: End episode
+    
+    This allows smooth handoff - teleop tracks robot until human is ready.
    """
    policy.reset()
    preprocessor.reset()
@@ -225,13 +232,13 @@ def rac_rollout_loop(
    stats = {
        "total_frames": 0,
        "autonomous_frames": 0,
-        "human_frames": 0,
-        "intervention_occurred": False,
+        "paused_frames": 0,
+        "correction_frames": 0,
    }

    # Enable torque on teleoperator so it can track robot position
    teleop.enable_torque()
-    was_intervention_active = False
+    was_correction_active = False

    timestamp = 0
    start_t = time.perf_counter()
@@ -241,20 +248,41 @@ def rac_rollout_loop(

        if events["exit_early"]:
            events["exit_early"] = False
-            events["intervention_active"] = False
+            events["policy_paused"] = False
+            events["correction_active"] = False
            break

-        # Detect transition to intervention mode
-        if events["intervention_active"] and not was_intervention_active:
+        # Detect transition to correction mode (human takes control)
+        if events["correction_active"] and not was_correction_active:
            teleop.disable_torque()
-            was_intervention_active = True
+            was_correction_active = True

        obs = robot.get_observation()
-        # Filter to only include keys in observation_features (excludes .vel, .torque, _timing, etc.)
        obs_filtered = {k: v for k, v in obs.items() if k in robot.observation_features}
        obs_frame = build_dataset_frame(dataset.features, obs_filtered, prefix=OBS_STR)

-        if not events["intervention_active"]:
+        if events["correction_active"]:
+            # Human controlling - record correction data
+            robot_action = teleop.get_action()
+            robot.send_action(robot_action)
+            stats["correction_frames"] += 1
+            
+            # Record this frame
+            action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
+            frame = {**obs_frame, **action_frame, "task": single_task}
+            frame_buffer.append(frame)
+            stats["total_frames"] += 1
+            
+        elif events["policy_paused"]:
+            # Paused - teleop tracks robot, but don't record
+            # Read current robot position and send to teleop
+            robot_action = {k: v for k, v in obs_filtered.items() if k.endswith(".pos")}
+            teleop.send_feedback(robot_action)
+            stats["paused_frames"] += 1
+            # Don't record frames during pause
+            
+        else:
+            # Normal policy execution - record
            action_values = predict_action(
                observation=obs_frame,
                policy=policy,
@@ -266,22 +294,17 @@ def rac_rollout_loop(
                robot_type=robot.robot_type,
            )
            robot_action: RobotAction = make_robot_action(action_values, dataset.features)
+            robot.send_action(robot_action)
            stats["autonomous_frames"] += 1
            
-            # Mirror robot position to teleoperator for seamless intervention
+            # Mirror robot position to teleoperator
            teleop.send_feedback(robot_action)
-        else:
-            stats["intervention_occurred"] = True
-            robot_action = teleop.get_action()
-            action_values = robot_action
-            stats["human_frames"] += 1
-
-        robot.send_action(robot_action)
-
-        action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
-        frame = {**obs_frame, **action_frame, "task": single_task}
-        frame_buffer.append(frame)
-        stats["total_frames"] += 1
+            
+            # Record this frame
+            action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION)
+            frame = {**obs_frame, **action_frame, "task": single_task}
+            frame_buffer.append(frame)
+            stats["total_frames"] += 1

        if display_data:
            log_rerun_data(observation=obs_filtered, action=robot_action)
@@ -400,9 +423,11 @@ def rac_collect(cfg: RaCConfig) -> LeRobotDataset:
        print("  Policy runs autonomously until you intervene.")
        print()
        print("  Controls:")
-        print("    SPACE  - Intervene (take control)")
+        print("    SPACE  - Pause policy (teleop tracks robot, no recording)")
+        print("    c      - Take control (start correction, recording)")
        print("    →      - End episode (save)")
-        print("    ESC    - Stop recording session")
+        print("    ←      - Re-record episode")
+        print("    ESC    - Stop session and push to hub")
        print("=" * 65 + "\n")

        with VideoEncodingManager(dataset):