chore(training): align pi052_hirobot.slurm with the operator's actual command

Match the working SmolVLA2 launch pattern so the two SLURM scripts
are interchangeable:

  * literal NUM_PROCESSES / BATCH_SIZE / STEPS (no env-var defaults)
  * STEPS=10000 to match the next SmolVLA2 run
  * save_freq=$STEPS so only the final checkpoint is saved
  * dropouts 0.1/0.1/0.1 (mild — matches the operator's iteration)
  * flow_loss_weight / text_loss_weight come from the PI052Config
    defaults (10.0 / 1.0 per Pi 0.5 paper §IV.D), no need to pass
    them explicitly

Job name and policy_repo_id mirror the SmolVLA2 ``_tool-g2`` naming
so the two runs can be compared side-by-side in WandB.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pepijn
2026-05-13 11:03:09 +02:00
parent 12cce8f2cc
commit ecbac17196
+13 -30
View File
@@ -1,20 +1,11 @@
#!/bin/bash
#SBATCH --job-name=pi052-hirobot
#SBATCH --job-name=pi052-hirobot-10k
#SBATCH --partition=hopper-prod
#SBATCH --qos=high
#SBATCH --time=48:00:00
#SBATCH --ntasks=1
#SBATCH --gpus-per-task=8
# π0.5 v2 training — reproduces the π0.5 paper's hierarchical recipe.
#
# Same recipe blend as the SmolVLA2 stack (recipes/pi052_hirobot.yaml),
# just on the PaliGemma 2B + Gemma-300m action-expert backbone the
# paper uses. The text head learns subtask prediction via cross-
# entropy on supervised spans; the action expert learns the flow
# field. Paper §IV.D mixes the two losses with α=10, which we encode
# as flow_loss_weight=10 / text_loss_weight=1.
set -euo pipefail
cd "${LEROBOT_ROOT:-$HOME/lerobot}"
@@ -25,22 +16,17 @@ export NCCL_TIMEOUT="${NCCL_TIMEOUT:-1800}"
export HF_HUB_DOWNLOAD_TIMEOUT="${HF_HUB_DOWNLOAD_TIMEOUT:-120}"
export WANDB_INIT_TIMEOUT="${WANDB_INIT_TIMEOUT:-300}"
DATASET="${DATASET:-pepijn223/super_poulain_full_tool3}"
POLICY_REPO_ID="${POLICY_REPO_ID:-pepijn223/pi052_hirobot_super_poulain}"
JOB_NAME="${JOB_NAME:-pi052-hirobot-super-poulain}"
NUM_PROCESSES="${NUM_PROCESSES:-8}"
BATCH_SIZE="${BATCH_SIZE:-32}"
STEPS="${STEPS:-15000}"
DATASET="pepijn223/super_poulain_full_tool3"
POLICY_REPO_ID="pepijn223/pi052_hirobot_super_poulain_tool-g2"
JOB_NAME="pi052-hirobot-super-poulain-tool-g2-10k"
NUM_PROCESSES=8
BATCH_SIZE=32
STEPS=10000
RUN_ID="${SLURM_JOB_ID:-$(date +%Y%m%d_%H%M%S)}"
OUTPUT_DIR="${OUTPUT_DIR:-/fsx/pepijn/outputs/train/pi052_hirobot_${STEPS}_${RUN_ID}}"
OUTPUT_DIR="/fsx/pepijn/outputs/train/pi052_hirobot_super_poulain_tool3_10k_${RUN_ID}"
echo "Training pi052 on $DATASET"
echo " GPUs: $NUM_PROCESSES"
echo " batch: $BATCH_SIZE / GPU (global=$((NUM_PROCESSES * BATCH_SIZE)))"
echo " steps: $STEPS"
echo " output: $OUTPUT_DIR"
echo " loss mix: flow_loss_weight=10 (paper α), text_loss_weight=1"
echo " augmentation: image_transforms ON, prompt dropout {plan:0.30 memory:0.30 subtask:0.20}"
echo "Training pi052 on $DATASET with ${NUM_PROCESSES} GPUs, batch size ${BATCH_SIZE}/GPU, ${STEPS} steps"
echo "Output directory: $OUTPUT_DIR"
accelerate launch --multi_gpu --num_processes="$NUM_PROCESSES" \
-m lerobot.scripts.lerobot_train \
@@ -55,9 +41,6 @@ accelerate launch --multi_gpu --num_processes="$NUM_PROCESSES" \
--policy.compile_model=false \
--policy.device=cuda \
--policy.tokenizer_max_length=512 \
--policy.text_loss_weight=1.0 \
--policy.flow_loss_weight=10.0 \
--policy.unfreeze_lm_head=true \
--steps="$STEPS" \
--policy.scheduler_decay_steps="$STEPS" \
--batch_size="$BATCH_SIZE" \
@@ -70,6 +53,6 @@ accelerate launch --multi_gpu --num_processes="$NUM_PROCESSES" \
--dataset.image_transforms.enable=true \
--dataset.image_transforms.max_num_transforms=3 \
--dataset.image_transforms.random_order=true \
--policy.plan_dropout_prob=0.30 \
--policy.memory_dropout_prob=0.30 \
--policy.subtask_dropout_prob=0.20
--policy.plan_dropout_prob=0.1 \
--policy.memory_dropout_prob=0.1 \
--policy.subtask_dropout_prob=0.1 \