From 4170d1b6f182f5fe3a2b3021d2f3373a4dada1c2 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Tue, 14 Oct 2025 14:48:18 +0200 Subject: [PATCH] cleanup --- docs/source/multi_gpu_training.mdx | 5 ----- src/lerobot/scripts/lerobot_train.py | 8 +++++++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/source/multi_gpu_training.mdx b/docs/source/multi_gpu_training.mdx index 5d8319acb..a1318e772 100644 --- a/docs/source/multi_gpu_training.mdx +++ b/docs/source/multi_gpu_training.mdx @@ -22,7 +22,6 @@ You can specify all parameters directly in the command without running `accelera accelerate launch \ --multi_gpu \ --num_processes=2 \ - --mixed_precision=fp16 \ $(which lerobot-train) \ --dataset.repo_id=${HF_USER}/my_dataset \ --policy.type=act \ @@ -75,10 +74,6 @@ When you launch training with accelerate: 3. **Gradient synchronization**: Gradients are synchronized across GPUs during backpropagation 4. **Single process logging**: Only the main process logs to wandb and saves checkpoints -## Mixed Precision Training - -For faster training, you can enable mixed precision (fp16 or bf16). This is configured during `accelerate config` or by passing `--mixed_precision=fp16` to `accelerate launch`. LeRobot's `use_amp` setting is automatically handled when using accelerate. - ## Learning Rate and Training Steps Scaling **Important:** LeRobot does **NOT** automatically scale learning rates or training steps based on the number of GPUs. This gives you full control over your training hyperparameters. diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py index baf52c400..04b837269 100644 --- a/src/lerobot/scripts/lerobot_train.py +++ b/src/lerobot/scripts/lerobot_train.py @@ -145,8 +145,14 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None): # It will automatically detect if running in distributed mode or single-process mode # We set step_scheduler_with_optimizer=False to prevent accelerate from adjusting # the lr_scheduler steps based on the num_processes + # We set find_unused_parameters=True to handle models with conditional computation paths if accelerator is None: - accelerator = Accelerator(step_scheduler_with_optimizer=False) + from accelerate.utils import DistributedDataParallelKwargs + ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True) + accelerator = Accelerator( + step_scheduler_with_optimizer=False, + kwargs_handlers=[ddp_kwargs] + ) # Determine if this is the main process (for logging and checkpointing) # When using accelerate, only the main process should log to avoid duplicate outputs