From 4908433f9ad2639d5cd6eec2897977add1832395 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Tue, 12 May 2026 18:45:38 +0200 Subject: [PATCH] chore(training): align smolvla2_hirobot.slurm with what's actually run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Match the operator's current training command for the _tool6 retrain: * default DATASET / POLICY_REPO_ID / JOB_NAME point at the tool6 iteration (super_poulain_full_tool3 → smolvla2_hirobot_super_poulain_tool6) * STEPS default 2000 (short enough to iterate; bump to 10k for full) * save_freq=$STEPS so the only checkpoint is the final one * OUTPUT_DIR includes step count so successive runs don't clobber * Drop the wider augmentation envelope I added earlier — back to default ColorJitter ranges (brightness ±20% etc) since the high_level_subtask recipe fix (current-subtask supervision) is expected to fix the LM-head collapse on its own; the augmentation is just the standard regulariser, not a load-bearing widener. * prompt-dropout fractions stay at the original 0.15 / 0.15 / 0.20. Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/training/smolvla2_hirobot.slurm | 63 ++++++++++-------------- 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/examples/training/smolvla2_hirobot.slurm b/examples/training/smolvla2_hirobot.slurm index 35faf8a27..c03022ce3 100644 --- a/examples/training/smolvla2_hirobot.slurm +++ b/examples/training/smolvla2_hirobot.slurm @@ -6,27 +6,23 @@ #SBATCH --ntasks=1 #SBATCH --gpus-per-task=8 -# SmolVLA2 training on an annotated dataset, with image augmentation -# and per-component prompt dropout enabled — the two regularisers -# that move the model away from the "text_loss=6e-6 memorised one -# epoch worth of frames" failure mode toward "learns concepts, not -# pixels". +# SmolVLA2 training on an annotated dataset. # -# What the regularisers do: +# The high_level_subtask recipe (recipes/smolvla2_hirobot.yaml) was +# fixed in PR3 to supervise the LM head with the *current* active +# subtask span at every frame, not the next-span target which is +# empty on stable phases. With the old recipe the head learned to +# emit ``\n`` on every chunk boundary; the new one supervises a +# real, scene-grounded string at every frame. # -# * --dataset.image_transforms.enable=true: applies torchvision -# v2 ColorJitter (brightness/contrast/saturation/hue), -# SharpnessJitter and RandomAffine per frame at training time. -# Set max_num_transforms to control how many are sampled per -# frame; defaults to 3 of the 6. -# * --policy.plan_dropout_prob / memory / subtask: at training, -# randomly drop the context messages that carry the named -# binding so the model is forced to handle missing/stale context. -# Mirrors Pi0.7's prompt-component dropout (§V.E). +# Two regularisers are still on: # -# Expected effect: text_loss plateaus higher (~0.5-2.0 instead of -# ~1e-5) and the model handles slight prompt/scene drift at -# inference instead of collapsing to memorised fragments. +# * --dataset.image_transforms.enable=true: torchvision-v2 +# ColorJitter + SharpnessJitter + RandomAffine per frame; default +# envelope (brightness ±20% etc). +# * --policy.{plan,memory,subtask}_dropout_prob: randomly drop the +# context messages carrying the named recipe binding so the model +# handles missing/stale context. Mirrors Pi0.7 §V.E. set -euo pipefail @@ -39,20 +35,20 @@ export HF_HUB_DOWNLOAD_TIMEOUT="${HF_HUB_DOWNLOAD_TIMEOUT:-120}" export WANDB_INIT_TIMEOUT="${WANDB_INIT_TIMEOUT:-300}" DATASET="${DATASET:-pepijn223/super_poulain_full_tool3}" -POLICY_REPO_ID="${POLICY_REPO_ID:-pepijn223/smolvla2_hirobot_super_poulain_tool4}" -JOB_NAME="${JOB_NAME:-smolvla2-hirobot-super-poulain-tool4}" +POLICY_REPO_ID="${POLICY_REPO_ID:-pepijn223/smolvla2_hirobot_super_poulain_tool6}" +JOB_NAME="${JOB_NAME:-smolvla2-hirobot-super-poulain-tool6}" NUM_PROCESSES="${NUM_PROCESSES:-8}" BATCH_SIZE="${BATCH_SIZE:-32}" -STEPS="${STEPS:-10000}" +STEPS="${STEPS:-2000}" RUN_ID="${SLURM_JOB_ID:-$(date +%Y%m%d_%H%M%S)}" -OUTPUT_DIR="${OUTPUT_DIR:-/fsx/pepijn/outputs/train/smolvla2_hirobot_${RUN_ID}}" +OUTPUT_DIR="${OUTPUT_DIR:-/fsx/pepijn/outputs/train/smolvla2_hirobot_super_poulain_tool3_${STEPS}_${RUN_ID}}" echo "Training smolvla2 on $DATASET" echo " GPUs: $NUM_PROCESSES" echo " batch: $BATCH_SIZE / GPU (global=$((NUM_PROCESSES * BATCH_SIZE)))" echo " steps: $STEPS" echo " output: $OUTPUT_DIR" -echo " augmentation: image_transforms ON (wide), prompt dropout {plan:0.20 memory:0.20 subtask:0.30}" +echo " augmentation: image_transforms ON, prompt dropout {plan:0.15 memory:0.15 subtask:0.20}" accelerate launch --multi_gpu --num_processes="$NUM_PROCESSES" \ -m lerobot.scripts.lerobot_train \ @@ -61,17 +57,6 @@ accelerate launch --multi_gpu --num_processes="$NUM_PROCESSES" \ --dataset.repo_id="$DATASET" \ --dataset.revision=main \ --dataset.video_backend=pyav \ - --dataset.image_transforms.enable=true \ - --dataset.image_transforms.max_num_transforms=4 \ - --dataset.image_transforms.random_order=true \ - --dataset.image_transforms.tfs.brightness.kwargs='{"brightness": [0.5, 1.6]}' \ - --dataset.image_transforms.tfs.contrast.kwargs='{"contrast": [0.6, 1.5]}' \ - --dataset.image_transforms.tfs.saturation.kwargs='{"saturation": [0.3, 1.7]}' \ - --dataset.image_transforms.tfs.hue.kwargs='{"hue": [-0.1, 0.1]}' \ - --dataset.image_transforms.tfs.affine.kwargs='{"degrees": [-15.0, 15.0], "translate": [0.15, 0.15]}' \ - --policy.plan_dropout_prob=0.20 \ - --policy.memory_dropout_prob=0.20 \ - --policy.subtask_dropout_prob=0.30 \ --output_dir="$OUTPUT_DIR" \ --job_name="$JOB_NAME" \ --policy.repo_id="$POLICY_REPO_ID" \ @@ -85,5 +70,11 @@ accelerate launch --multi_gpu --num_processes="$NUM_PROCESSES" \ --wandb.disable_artifact=true \ --wandb.project=hirobot \ --log_freq=100 \ - --save_freq=1000 \ - --num_workers=0 + --save_freq="$STEPS" \ + --num_workers=0 \ + --dataset.image_transforms.enable=true \ + --dataset.image_transforms.max_num_transforms=3 \ + --dataset.image_transforms.random_order=true \ + --policy.plan_dropout_prob=0.15 \ + --policy.memory_dropout_prob=0.15 \ + --policy.subtask_dropout_prob=0.20