From b42d124007f21af512d667aeb58d5b09c53dce8e Mon Sep 17 00:00:00 2001 From: Maxime Ellerbach Date: Mon, 15 Jun 2026 14:50:23 +0000 Subject: [PATCH] cleanup --- docs/source/multi_gpu_training.mdx | 6 +++--- src/lerobot/scripts/lerobot_train.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/source/multi_gpu_training.mdx b/docs/source/multi_gpu_training.mdx index 181390485..0bec365c9 100644 --- a/docs/source/multi_gpu_training.mdx +++ b/docs/source/multi_gpu_training.mdx @@ -138,10 +138,10 @@ num_machines: 1 num_processes: 4 fsdp_config: fsdp_version: 1 - fsdp_sharding_strategy: FULL_SHARD # params + grads + optimizer (ZeRO-3) + fsdp_sharding_strategy: FULL_SHARD # params + grads + optimizer (ZeRO-3) fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP - fsdp_transformer_layer_cls_to_wrap: # repeated block class to shard - fsdp_use_orig_params: true # required: optimizer is built pre-prepare + fsdp_transformer_layer_cls_to_wrap: # repeated block class to shard + fsdp_use_orig_params: true # required: optimizer is built pre-prepare fsdp_state_dict_type: FULL_STATE_DICT ``` diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py index f60aae8b6..add506887 100644 --- a/src/lerobot/scripts/lerobot_train.py +++ b/src/lerobot/scripts/lerobot_train.py @@ -198,7 +198,6 @@ def train(cfg: TrainPipelineConfig, accelerator: "Accelerator | None" = None): # We set step_scheduler_with_optimizer=False to prevent accelerate from adjusting the lr_scheduler steps based on the num_processes # We set find_unused_parameters=True to handle models with conditional computation if accelerator is None: - ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True) # Accelerate auto-detects the device based on the available hardware and ignores the policy.device setting. # Force the device to be CPU when the active config's device is set to CPU (works for both policy and reward model training).