Files
lerobot/profiling/model_profiling_specs.json
T
Pepijn a4544ffea7 fix(profiling): use bf16 dtype and gradient checkpointing for pi0/pi05
Enable --policy.dtype=bfloat16 and --policy.gradient_checkpointing=true
for pi0, pi0_fast, and pi05 profiling specs. Combined with use_amp=true,
this brings the 4B-param VLA models well within the 22GB GPU budget.

Made-with: Cursor
2026-04-16 15:35:25 +02:00

142 lines
4.5 KiB
JSON

{
"act": {
"steps": 12,
"train_args": [
"--dataset.repo_id=lerobot/pusht",
"--dataset.episodes=[0]",
"--policy.type=act",
"--policy.device=cuda",
"--batch_size=4",
"--cudnn_deterministic=true"
]
},
"diffusion": {
"steps": 12,
"train_args": [
"--dataset.repo_id=lerobot/pusht",
"--dataset.episodes=[0]",
"--policy.type=diffusion",
"--policy.device=cuda",
"--batch_size=4",
"--cudnn_deterministic=true"
]
},
"groot": {
"steps": 12,
"train_args": [
"--dataset.repo_id=lerobot/libero_plus",
"--dataset.episodes=[0]",
"--policy.type=groot",
"--policy.base_model_path=nvidia/GR00T-N1.5-3B",
"--policy.tune_diffusion_model=true",
"--policy.tune_projector=true",
"--policy.tune_llm=false",
"--policy.tune_visual=false",
"--policy.use_bf16=true",
"--policy.device=cuda",
"--batch_size=1",
"--rename_map={\"observation.images.image\": \"observation.images.camera1\", \"observation.images.image2\": \"observation.images.camera2\"}"
]
},
"multi_task_dit": {
"steps": 12,
"train_args": [
"--dataset.repo_id=lerobot/pusht",
"--dataset.episodes=[0]",
"--policy.type=multi_task_dit",
"--policy.device=cuda",
"--policy.horizon=32",
"--policy.n_action_steps=30",
"--batch_size=4",
"--cudnn_deterministic=true"
]
},
"pi0": {
"steps": 12,
"train_args": [
"--dataset.repo_id=lerobot/libero_plus",
"--dataset.episodes=[0]",
"--policy.path=lerobot/pi0_base",
"--policy.device=cuda",
"--policy.dtype=bfloat16",
"--policy.n_action_steps=30",
"--policy.use_amp=true",
"--policy.gradient_checkpointing=true",
"--batch_size=1",
"--rename_map={\"observation.images.front\": \"observation.images.base_0_rgb\", \"observation.images.wrist\": \"observation.images.left_wrist_0_rgb\"}"
]
},
"pi0_fast": {
"steps": 12,
"train_args": [
"--dataset.repo_id=lerobot/libero_plus",
"--dataset.episodes=[0]",
"--policy.path=lerobot/pi0fast-base",
"--policy.device=cuda",
"--policy.dtype=bfloat16",
"--policy.n_action_steps=30",
"--policy.use_amp=true",
"--policy.gradient_checkpointing=true",
"--batch_size=1",
"--rename_map={\"observation.images.front\": \"observation.images.base_0_rgb\", \"observation.images.wrist\": \"observation.images.left_wrist_0_rgb\"}"
]
},
"pi05": {
"steps": 12,
"train_args": [
"--dataset.repo_id=lerobot/libero_plus",
"--dataset.episodes=[0]",
"--policy.path=lerobot/pi05_base",
"--policy.device=cuda",
"--policy.dtype=bfloat16",
"--policy.n_action_steps=30",
"--policy.use_amp=true",
"--policy.gradient_checkpointing=true",
"--batch_size=1",
"--policy.normalization_mapping={\"ACTION\": \"MEAN_STD\", \"STATE\": \"MEAN_STD\", \"VISUAL\": \"IDENTITY\"}",
"--rename_map={\"observation.images.front\": \"observation.images.base_0_rgb\", \"observation.images.wrist\": \"observation.images.left_wrist_0_rgb\"}"
]
},
"smolvla": {
"steps": 12,
"train_args": [
"--dataset.repo_id=lerobot/libero_plus",
"--dataset.episodes=[0]",
"--policy.path=lerobot/smolvla_base",
"--policy.load_vlm_weights=true",
"--policy.freeze_vision_encoder=false",
"--policy.train_expert_only=false",
"--policy.empty_cameras=1",
"--policy.device=cuda",
"--batch_size=1",
"--rename_map={\"observation.images.front\": \"observation.images.camera1\", \"observation.images.wrist\": \"observation.images.camera2\"}"
]
},
"wall_x": {
"steps": 12,
"train_args": [
"--dataset.repo_id=lerobot/aloha_sim_insertion_human",
"--dataset.episodes=[0]",
"--policy.type=wall_x",
"--policy.pretrained_name_or_path=x-square-robot/wall-oss-flow",
"--policy.prediction_mode=diffusion",
"--policy.attn_implementation=eager",
"--policy.device=cuda",
"--batch_size=1"
]
},
"xvla": {
"steps": 12,
"train_args": [
"--dataset.repo_id=lerobot/libero_plus",
"--dataset.episodes=[0]",
"--policy.path=lerobot/xvla-widowx",
"--policy.action_mode=auto",
"--policy.empty_cameras=1",
"--policy.device=cuda",
"--batch_size=1",
"--rename_map={\"observation.images.image\": \"observation.images.camera1\", \"observation.images.image2\": \"observation.images.camera2\"}"
]
}
}