fix rac data collection with rtc by disabling compile

2026-05-15 08:39:49 +00:00 · 2026-01-15 17:06:58 +01:00
parent 3316301693
commit d1f50babaa
26 changed files with 553 additions and 20 deletions
@@ -44,13 +44,13 @@ from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import init_rerun


-HF_MODEL_ID = "lerobot-data-collection/three-folds-pi0"  # TODO: Replace with your trained model
-HF_EVAL_DATASET_ID = "lerobot-data-collection/three-folds-pi0_eval7"  # TODO: Replace with your eval dataset name
-TASK_DESCRIPTION = "three-folds-dataset"  # TODO: Replace with your task, this should match!!
+HF_MODEL_ID = "lerobot-data-collection/level1_rac2_100k"  # TODO: Replace with your trained model
+HF_EVAL_DATASET_ID = "lerobot-data-collection/three-folds-pi0_eval_raccc3"  # TODO: Replace with your eval dataset name
+TASK_DESCRIPTION = "Fold the T-shirt properly" # TODO: Replace with your task, this should match!!

 NUM_EPISODES = 1
 FPS = 30
-EPISODE_TIME_SEC = 300
+EPISODE_TIME_SEC = 1000
 RESET_TIME_SEC = 60

 # Robot CAN interfaces
@@ -58,15 +58,15 @@ FOLLOWER_LEFT_PORT = "can0"
 FOLLOWER_RIGHT_PORT = "can1"

 # If enabled, you can manually reset the environment between evaluation episodes
-USE_LEADER_FOR_RESETS = True  # Set to False if you don't want to use leader
+USE_LEADER_FOR_RESETS = False  # Set to False if you don't want to use leader
 LEADER_LEFT_PORT = "can2"
 LEADER_RIGHT_PORT = "can3"

 # Camera configuration
 CAMERA_CONFIG = {
-    "left_wrist": OpenCVCameraConfig(index_or_path="/dev/video5", width=640, height=480, fps=FPS),
-    "right_wrist": OpenCVCameraConfig(index_or_path="/dev/video1", width=640, height=480, fps=FPS),
-    "base": OpenCVCameraConfig(index_or_path="/dev/video3", width=640, height=480, fps=FPS),
+    "left_wrist": OpenCVCameraConfig(index_or_path="/dev/video0", width=1280, height=720, fps=FPS),
+    "right_wrist": OpenCVCameraConfig(index_or_path="/dev/video5", width=1280, height=720, fps=FPS),
+    "base": OpenCVCameraConfig(index_or_path="/dev/video2", width=640, height=480, fps=FPS),
 }

 def main():
@@ -73,22 +73,22 @@ logger = logging.getLogger(__name__)
 # Default Configuration Constants
 # ============================================================================

-DEFAULT_HF_MODEL_ID = "lerobot-data-collection/three-folds-pi0"
-DEFAULT_HF_EVAL_DATASET_ID = "lerobot-data-collection/three-folds-pi0_eval_rtc"
-DEFAULT_TASK_DESCRIPTION = "three-folds-dataset"
+DEFAULT_HF_MODEL_ID = "lerobot-data-collection/level1_rac3_100k"
+DEFAULT_HF_EVAL_DATASET_ID = "lerobot-data-collection/test"
+DEFAULT_TASK_DESCRIPTION = "Fold the T-shirt properly"

 DEFAULT_NUM_EPISODES = 1
 DEFAULT_FPS = 30
-DEFAULT_EPISODE_TIME_SEC = 300
+DEFAULT_EPISODE_TIME_SEC = 1000
 DEFAULT_RESET_TIME_SEC = 60

 DEFAULT_FOLLOWER_LEFT_PORT = "can0"
 DEFAULT_FOLLOWER_RIGHT_PORT = "can1"

 DEFAULT_CAMERA_CONFIG = {
-    "left_wrist": OpenCVCameraConfig(index_or_path="/dev/video5", width=640, height=480, fps=DEFAULT_FPS),
-    "right_wrist": OpenCVCameraConfig(index_or_path="/dev/video1", width=640, height=480, fps=DEFAULT_FPS),
-    "base": OpenCVCameraConfig(index_or_path="/dev/video3", width=640, height=480, fps=DEFAULT_FPS),
+    "left_wrist": OpenCVCameraConfig(index_or_path="/dev/video0", width=1280, height=720, fps=DEFAULT_FPS),
+    "right_wrist": OpenCVCameraConfig(index_or_path="/dev/video4", width=1280, height=720, fps=DEFAULT_FPS),
+    "base": OpenCVCameraConfig(index_or_path="/dev/video2", width=640, height=480, fps=DEFAULT_FPS),
 }


@@ -141,9 +141,9 @@ class OpenArmsRTCEvalConfig(HubMixin):
    rtc: RTCConfig = field(
        default_factory=lambda: RTCConfig(
            enabled=True,
-            execution_horizon=10,
-            max_guidance_weight=10.0,
-            prefix_attention_schedule=RTCAttentionSchedule.EXP,
+            execution_horizon=20,
+            max_guidance_weight=5.0,
+            prefix_attention_schedule=RTCAttentionSchedule.LINEAR,
        )
    )

@@ -167,7 +167,7 @@ class OpenArmsRTCEvalConfig(HubMixin):
    record_dataset: bool = True
    push_to_hub: bool = True

-    interpolation: bool = False
+    interpolation: bool = True

    use_torch_compile: bool = False
    torch_compile_backend: str = "inductor"
@@ -130,6 +130,10 @@ class RaCRTCConfig:
    resume: bool = False
    device: str = "cuda"
    action_queue_size_to_get_new_actions: int = 30
+    
+    # Torch compile is disabled by default for real-time inference
+    # First inference with compile takes minutes to compile kernels
+    use_torch_compile: bool = False

    def __post_init__(self):
        policy_path = parser.get_path_arg("policy")
@@ -735,7 +739,14 @@ def rac_rtc_collect(cfg: RaCRTCConfig) -> LeRobotDataset:
        # Load policy
        logger.info(f"Loading policy from: {cfg.policy.pretrained_path}")
        policy_class = get_policy_class(cfg.policy.type)
-        policy = policy_class.from_pretrained(cfg.policy.pretrained_path)
+        
+        # Override compile_model for real-time inference (first compile takes minutes)
+        policy_config = PreTrainedConfig.from_pretrained(cfg.policy.pretrained_path)
+        if cfg.policy.type in ["pi05", "pi0"]:
+            policy_config.compile_model = cfg.use_torch_compile
+            logger.info(f"Set compile_model={cfg.use_torch_compile} for real-time inference")
+        
+        policy = policy_class.from_pretrained(cfg.policy.pretrained_path, config=policy_config)
        policy.config.rtc_config = cfg.rtc
        policy.init_rtc_processor()
        policy = policy.to(cfg.device)