diff --git a/docs/source/multi_gpu_training.mdx b/docs/source/multi_gpu_training.mdx
index 4cda9cb01..a6cd540bc 100644
--- a/docs/source/multi_gpu_training.mdx
+++ b/docs/source/multi_gpu_training.mdx
@@ -16,15 +16,15 @@ Or install it with the LeRobot accelerate extra:
 pip install lerobot[accelerate]
 ```
 
-## Configuration
+## Configuration (Optional)
 
-Configure accelerate for your hardware setup by running:
+You can optionally configure accelerate for your hardware setup by running:
 
 ```bash
 accelerate config
 ```
 
-This interactive setup will ask you questions about your training environment (number of GPUs, mixed precision settings, etc.). For a simple multi-GPU setup on a single machine, you can use these recommended settings:
+This interactive setup will ask you questions about your training environment (number of GPUs, mixed precision settings, etc.) and saves the configuration for future use. For a simple multi-GPU setup on a single machine, you can use these recommended settings:
 
 - Compute environment: This machine
 - Number of machines: 1
@@ -32,26 +32,15 @@ This interactive setup will ask you questions about your training environment (n
 - GPU ids to use: (leave empty to use all)
 - Mixed precision: fp16 or bf16 (recommended for faster training)
 
+**Note:** You can skip this step and specify parameters directly in the launch command (see Option 1 below).
+
 ## Training with Multiple GPUs
 
 You can launch training in two ways:
 
-### Option 1: Using accelerate config (recommended)
+### Option 1: Without config (specify parameters directly)
 
-If you ran `accelerate config`, simply launch with:
-
-```bash
-accelerate launch $(which lerobot-train) \
-  --dataset.repo_id=${HF_USER}/my_dataset \
-  --policy.type=act \
-  --output_dir=outputs/train/act_multi_gpu \
-  --job_name=act_multi_gpu \
-  --wandb.enable=true
-```
-
-### Option 2: Without config (specify parameters directly)
-
-If you prefer not to run `accelerate config`, you can specify all parameters in the command:
+You can specify all parameters directly in the command without running `accelerate config`:
 
 ```bash
 accelerate launch \
@@ -61,6 +50,7 @@ accelerate launch \
   $(which lerobot-train) \
   --dataset.repo_id=${HF_USER}/my_dataset \
   --policy.type=act \
+  --policy.repo_id=${HF_USER}/my_trained_policy \
   --output_dir=outputs/train/act_multi_gpu \
   --job_name=act_multi_gpu \
   --wandb.enable=true
@@ -71,6 +61,20 @@ accelerate launch \
 - `--num_processes=2`: Number of GPUs to use
 - `--mixed_precision=fp16`: Use fp16 mixed precision (or `bf16` if supported)
 
+### Option 2: Using accelerate config
+
+If you prefer to save your configuration, run `accelerate config` once and then simply launch with:
+
+```bash
+accelerate launch $(which lerobot-train) \
+  --dataset.repo_id=${HF_USER}/my_dataset \
+  --policy.type=act \
+  --policy.repo_id=${HF_USER}/my_trained_policy \
+  --output_dir=outputs/train/act_multi_gpu \
+  --job_name=act_multi_gpu \
+  --wandb.enable=true
+```
+
 ## How It Works
 
 When you launch training with accelerate:
diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py
index 6202990ed..65e86cb46 100644
--- a/src/lerobot/scripts/lerobot_train.py
+++ b/src/lerobot/scripts/lerobot_train.py
@@ -163,17 +163,20 @@ def train(cfg: TrainPipelineConfig, accelerator: Callable | None = None):
         cfg: A `TrainPipelineConfig` object containing all training configurations.
     """
     cfg.validate()
-    logging.info(pformat(cfg.to_dict()))
-
+    
     if accelerator and not accelerator.is_main_process:
         # Disable logging on non-main processes.
         cfg.wandb.enable = False
 
+    if not accelerator or accelerator.is_main_process:
+        logging.info(pformat(cfg.to_dict()))
+
     if cfg.wandb.enable and cfg.wandb.project:
         wandb_logger = WandBLogger(cfg)
     else:
         wandb_logger = None
-        logging.info(colored("Logs will be saved locally.", "yellow", attrs=["bold"]))
+        if not accelerator or accelerator.is_main_process:
+            logging.info(colored("Logs will be saved locally.", "yellow", attrs=["bold"]))
 
     if cfg.seed is not None:
         set_seed(cfg.seed, accelerator=accelerator)
@@ -183,7 +186,8 @@ def train(cfg: TrainPipelineConfig, accelerator: Callable | None = None):
     torch.backends.cudnn.benchmark = True
     torch.backends.cuda.matmul.allow_tf32 = True
 
-    logging.info("Creating dataset")
+    if not accelerator or accelerator.is_main_process:
+        logging.info("Creating dataset")
     dataset = make_dataset(cfg)
 
     # Create environment used for evaluating checkpoints during training on simulation data.
@@ -191,10 +195,12 @@ def train(cfg: TrainPipelineConfig, accelerator: Callable | None = None):
     # using the eval.py instead, with gym_dora environment and dora-rs.
     eval_env = None
     if cfg.eval_freq > 0 and cfg.env is not None:
-        logging.info("Creating env")
+        if not accelerator or accelerator.is_main_process:
+            logging.info("Creating env")
         eval_env = make_env(cfg.env, n_envs=cfg.eval.batch_size, use_async_envs=cfg.eval.use_async_envs)
 
-    logging.info("Creating policy")
+    if not accelerator or accelerator.is_main_process:
+        logging.info("Creating policy")
     policy = make_policy(
         cfg=cfg.policy,
         ds_meta=dataset.meta,
@@ -232,7 +238,8 @@ def train(cfg: TrainPipelineConfig, accelerator: Callable | None = None):
         **postprocessor_kwargs,
     )
 
-    logging.info("Creating optimizer and scheduler")
+    if not accelerator or accelerator.is_main_process:
+        logging.info("Creating optimizer and scheduler")
     optimizer, lr_scheduler = make_optimizer_and_scheduler(cfg, policy)
     grad_scaler = GradScaler(device.type, enabled=cfg.policy.use_amp)
 
@@ -304,6 +311,7 @@ def train(cfg: TrainPipelineConfig, accelerator: Callable | None = None):
 
     if not accelerator or accelerator.is_main_process:
         logging.info("Start offline training on a fixed dataset")
+        
     for _ in range(step, cfg.steps):
         start_time = time.perf_counter()
         batch = next(dl_iter)