From 2b2ff1936686c1067733ca95dd473fe0e3573db0 Mon Sep 17 00:00:00 2001 From: fracapuano Date: Fri, 21 Nov 2025 09:28:36 +0000 Subject: [PATCH] fix the number of workers to prevent contention --- examples/behavior_1k/upload.slurm | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/examples/behavior_1k/upload.slurm b/examples/behavior_1k/upload.slurm index 393d13135..fc0baed26 100644 --- a/examples/behavior_1k/upload.slurm +++ b/examples/behavior_1k/upload.slurm @@ -2,10 +2,10 @@ #SBATCH -J b1k-upload #SBATCH -p hopper-cpu #SBATCH --qos=high -#SBATCH -c 8 +#SBATCH -c 1 #SBATCH -t 48:00:00 -#SBATCH --mem=8G -#SBATCH --array=0-49%8 +#SBATCH --mem=4G +#SBATCH --array=0-49%2 #SBATCH -D /admin/home/francesco_capuano/lerobot #SBATCH -o /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A_%a.out #SBATCH -e /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A_%a.err @@ -28,8 +28,14 @@ TASK_ID=${SLURM_ARRAY_TASK_ID} # Configuration ROOT_PATH="/fsx/francesco_capuano/behavior1k-v3" HF_USER="fracapuano" +# Limit upload workers to reduce network contention (default in HF Hub is 4) +# For I/O-bound uploads, 2-4 workers per task is optimal +NUM_WORKERS=2 + +echo "Task ${TASK_ID}: uploading with ${NUM_WORKERS} workers from ${ROOT_PATH}" python examples/behavior_1k/upload_folders.py \ --task-id ${TASK_ID} \ --root-path ${ROOT_PATH} \ - --hf-user ${HF_USER} + --hf-user ${HF_USER} \ + --num-workers ${NUM_WORKERS}