diff --git a/docs/source/rac.mdx b/docs/source/rac.mdx index 7da574c92..bcfb03f9d 100644 --- a/docs/source/rac.mdx +++ b/docs/source/rac.mdx @@ -134,13 +134,13 @@ python examples/rac/rac_data_collection.py \ **The RaC Protocol:** -1. Watch the policy run autonomously +1. Watch the policy run autonomously (teleop is idle/free) 2. When you see imminent failure, press **SPACE** to pause - - Policy stops, teleoperator mirrors robot position - - Allows smooth handoff without jarring movements + - Policy stops + - Teleoperator moves to match robot position (torque enabled) - No frames recorded during pause 3. Press **c** to take control - - Teleoperator is now free to move + - Teleoperator torque disabled, free to move - **RECOVERY**: Teleoperate back to a good state - **CORRECTION**: Complete the subtask - All movements are recorded diff --git a/examples/rac/rac_data_collection.py b/examples/rac/rac_data_collection.py index f9f929a49..edf7768bc 100644 --- a/examples/rac/rac_data_collection.py +++ b/examples/rac/rac_data_collection.py @@ -9,10 +9,10 @@ RaC improves upon standard data collection (BC) and prior human-in-the-loop meth (DAgger, HG-DAgger) by explicitly collecting recovery and correction behaviors: The workflow: -1. Policy runs autonomously until human presses SPACE to intervene -2. On intervention: human teleoperates the robot back to a good state (RECOVERY) -3. Human provides CORRECTION with teleoperator to complete the subtask -4. Press -> to end episode (save and continue to next) +1. Policy runs autonomously +2. Press SPACE to pause - robot holds position +3. Press 'c' to take control - human provides RECOVERY + CORRECTION +4. Press → to end episode (save and continue to next) 5. Reset, then do next rollout Key RaC Rules: diff --git a/examples/rac/rac_data_collection_openarms.py b/examples/rac/rac_data_collection_openarms.py index 351b44c4d..78d8a5f4b 100644 --- a/examples/rac/rac_data_collection_openarms.py +++ b/examples/rac/rac_data_collection_openarms.py @@ -9,10 +9,10 @@ RaC improves upon standard data collection (BC) and prior human-in-the-loop meth (DAgger, HG-DAgger) by explicitly collecting recovery and correction behaviors: The workflow: -1. Policy runs autonomously until human presses SPACE to intervene -2. On intervention: human teleoperates the robot back to a good state (RECOVERY) -3. Human provides CORRECTION with teleoperator to complete the subtask -4. Press -> to end episode (save and continue to next) +1. Policy runs autonomously (teleop is idle/free) +2. Press SPACE to pause - teleop moves to match robot position +3. Press 'c' to take control - teleop is free, human provides RECOVERY + CORRECTION +4. Press → to end episode (save and continue to next) 5. Reset, then do next rollout Key RaC Rules: @@ -215,12 +215,12 @@ def rac_rollout_loop( """ RaC rollout loop with two-stage intervention: - 1. Policy runs autonomously (recording) + 1. Policy runs autonomously (recording) - teleop free/idle 2. SPACE: Policy pauses, teleop mirrors robot position (NOT recording) 3. 'c': Human takes control, teleop torque disabled (recording correction) 4. →: End episode - This allows smooth handoff - teleop tracks robot until human is ready. + This allows smooth handoff - teleop tracks robot only when paused. """ policy.reset() preprocessor.reset() @@ -236,8 +236,9 @@ def rac_rollout_loop( "correction_frames": 0, } - # Enable torque on teleoperator so it can track robot position - teleop.enable_torque() + # Start with teleop torque disabled - only enable when paused to track robot + teleop.disable_torque() + was_paused = False was_correction_active = False timestamp = 0 @@ -252,7 +253,12 @@ def rac_rollout_loop( events["correction_active"] = False break - # Detect transition to correction mode (human takes control) + # Detect transition to paused state (enable torque to track robot) + if events["policy_paused"] and not was_paused: + teleop.enable_torque() + was_paused = True + + # Detect transition to correction mode (disable torque for human control) if events["correction_active"] and not was_correction_active: teleop.disable_torque() was_correction_active = True @@ -274,15 +280,13 @@ def rac_rollout_loop( stats["total_frames"] += 1 elif events["policy_paused"]: - # Paused - teleop tracks robot, but don't record - # Read current robot position and send to teleop + # Paused - teleop tracks robot position, don't record robot_action = {k: v for k, v in obs_filtered.items() if k.endswith(".pos")} teleop.send_feedback(robot_action) stats["paused_frames"] += 1 - # Don't record frames during pause else: - # Normal policy execution - record + # Normal policy execution - record (teleop is free/idle) action_values = predict_action( observation=obs_frame, policy=policy, @@ -297,9 +301,6 @@ def rac_rollout_loop( robot.send_action(robot_action) stats["autonomous_frames"] += 1 - # Mirror robot position to teleoperator - teleop.send_feedback(robot_action) - # Record this frame action_frame = build_dataset_frame(dataset.features, robot_action, prefix=ACTION) frame = {**obs_frame, **action_frame, "task": single_task}