fix: slurm job for parallel conversion on nodes

This commit is contained in:
fracapuano
2025-11-10 17:40:15 +00:00
parent 09e2a55901
commit edbfa3d3e6
+13 -5
View File
@@ -6,11 +6,14 @@
#SBATCH -c 1 # CPUs per conversion (tune as needed)
#SBATCH -t 2:00:00 # Time per conversion
#SBATCH --mem=3G # ~1.75GB for task 0, ~doubled for safety
#SBATCH -o logs/%x-%A_%a.out # per-task logs
#SBATCH -e logs/%x-%A_%a.err
#SBATCH -D /admin/home/francesco_capuano/lerobot
#SBATCH -o /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A_%a.out
#SBATCH -e /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A_%a.err
set -euo pipefail
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK # avoid BLAS oversubscription
set -x
export PYTHONUNBUFFERED=1
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK:-1} # avoid BLAS oversubscription
DATA_PATH="/fsx/francesco_capuano/.cache/behavior-1k/2025-challenge-demos"
BASE_OUT="/fsx/francesco_capuano/behavior1k"
@@ -20,11 +23,16 @@ i="${SLURM_ARRAY_TASK_ID}"
OUT_DIR="$(printf "%s/behavior1k-task%04d" "$BASE_OUT" "$i")"
# activate your env if needed
source ~/.bashrc
source "$HOME/.bashrc" 2>/dev/null || true
if ! command -v conda >/dev/null 2>&1; then
source "$HOME/miniconda3/etc/profile.d/conda.sh" 2>/dev/null || true
source "$HOME/anaconda3/etc/profile.d/conda.sh" 2>/dev/null || true
fi
conda activate lerobot
python examples/behavior_1k/convert_to_lerobot_v3.py \
--data-path "$DATA_PATH" \
--new-repo "$OUT_DIR" \
--task-id "$i" \
--force-conversion
--force-conversion \
--push-to-hub