fix(profiling): use bf16 dtype and gradient checkpointing for pi0/pi05

Enable --policy.dtype=bfloat16 and --policy.gradient_checkpointing=true for pi0, pi0_fast, and pi05 profiling specs. Combined with use_amp=true, this brings the 4B-param VLA models well within the 22GB GPU budget. Made-with: Cursor
2026-05-17 01:30:14 +00:00 · 2026-04-16 15:35:25 +02:00
parent dbe01b0444
commit a4544ffea7
1 changed files with 6 additions and 0 deletions
@@ -58,8 +58,10 @@
      "--dataset.episodes=[0]",
      "--policy.path=lerobot/pi0_base",
      "--policy.device=cuda",
+      "--policy.dtype=bfloat16",
      "--policy.n_action_steps=30",
      "--policy.use_amp=true",
+      "--policy.gradient_checkpointing=true",
      "--batch_size=1",
      "--rename_map={\"observation.images.front\": \"observation.images.base_0_rgb\", \"observation.images.wrist\": \"observation.images.left_wrist_0_rgb\"}"
    ]
@@ -71,8 +73,10 @@
      "--dataset.episodes=[0]",
      "--policy.path=lerobot/pi0fast-base",
      "--policy.device=cuda",
+      "--policy.dtype=bfloat16",
      "--policy.n_action_steps=30",
      "--policy.use_amp=true",
+      "--policy.gradient_checkpointing=true",
      "--batch_size=1",
      "--rename_map={\"observation.images.front\": \"observation.images.base_0_rgb\", \"observation.images.wrist\": \"observation.images.left_wrist_0_rgb\"}"
    ]
@@ -84,8 +88,10 @@
      "--dataset.episodes=[0]",
      "--policy.path=lerobot/pi05_base",
      "--policy.device=cuda",
+      "--policy.dtype=bfloat16",
      "--policy.n_action_steps=30",
      "--policy.use_amp=true",
+      "--policy.gradient_checkpointing=true",
      "--batch_size=1",
      "--policy.normalization_mapping={\"ACTION\": \"MEAN_STD\", \"STATE\": \"MEAN_STD\", \"VISUAL\": \"IDENTITY\"}",
      "--rename_map={\"observation.images.front\": \"observation.images.base_0_rgb\", \"observation.images.wrist\": \"observation.images.left_wrist_0_rgb\"}"