#!/bin/bash #SBATCH --time=24:00:00 #SBATCH --partition=hopper-cpu #SBATCH --cpus-per-task=96 #SBATCH --output=/fsx/jade_choghari/logs/launcher_%j.out #SBATCH --error=/fsx/jade_choghari/logs/launcher_%j.err # Activate conda environment # Load conda source /fsx/jade_choghari/miniforge3/etc/profile.d/conda.sh conda activate lerobot set -e # Exit on error # Input dataset REPO_ID="lerobot" ROOT="/fsx/jade_choghari/vlabench-primitive/" # Output paths OUTPUT_DIR="/fsx/jade_choghari/vlabench-primitive-encoded" OUTPUT_REPO_ID="vlabench-primitive-encoded" LOGS_DIR="/fsx/jade_choghari/logs/convert_video" # Video encoding settings VCODEC="libsvtav1" PIX_FMT="yuv420p" GOP_SIZE=2 CRF=30 FAST_DECODE=0 # Parallelization settings NUM_WORKERS=24 # Number of parallel SLURM workers NUM_IMAGE_WORKERS=4 # Threads per worker for image saving # SLURM settings PARTITION="hopper-cpu" # Change to your CPU partition name CPUS_PER_TASK=32 # CPUs per worker MEM_PER_CPU="4G" # Memory per CPU TIME_LIMIT="24:00:00" # Time limit per job ############################################################################### # STEP 1: Parallel Video Conversion ############################################################################### rm -rf "${OUTPUT_DIR}" mkdir -p "${OUTPUT_DIR}" echo "==============================================" echo "STEP 1: Starting parallel video conversion" echo " Workers: ${NUM_WORKERS}" echo " Input: ${REPO_ID} (root: ${ROOT})" echo " Output: ${OUTPUT_DIR}" echo "==============================================" python /admin/home/jade_choghari/lerobot/examples/port_datasets/slurm_convert_to_video.py\ --repo-id "${REPO_ID}" \ --root "${ROOT}" \ --output-dir "${OUTPUT_DIR}" \ --output-repo-id "${OUTPUT_REPO_ID}" \ --vcodec "${VCODEC}" \ --pix-fmt "${PIX_FMT}" \ --g ${GOP_SIZE} \ --crf ${CRF} \ --fast-decode ${FAST_DECODE} \ --num-image-workers ${NUM_IMAGE_WORKERS} \ --logs-dir "${LOGS_DIR}" \ --job-name "convert_video" \ --slurm 1 \ --workers ${NUM_WORKERS} \ --partition "${PARTITION}" \ --cpus-per-task ${CPUS_PER_TASK} \ --mem-per-cpu "${MEM_PER_CPU}" \ --time-limit "${TIME_LIMIT}" echo "" echo "✓ Parallel conversion jobs submitted!" echo " Monitor with: squeue -u \$USER" echo " Check logs in: ${LOGS_DIR}/convert_video" echo "" echo "Wait for all jobs to complete before running Step 2." echo "You can check completion with: squeue -u \$USER | grep convert_video" echo "" echo "After all jobs complete, run Step 2 to aggregate shards:" echo " bash convert_to_video_parallel.sh aggregate" ############################################################################### # STEP 2: Aggregate Shards (run this after Step 1 completes) ############################################################################### if [ "$1" == "aggregate" ]; then echo "" echo "==============================================" echo "STEP 2: Aggregating video shards" echo " Shards: ${NUM_WORKERS}" echo " Input: ${OUTPUT_DIR}/shard_XXXX" echo " Output: ${OUTPUT_DIR}_final" echo "==============================================" python slurm_aggregate_video_shards.py \ --shards-dir "${OUTPUT_DIR}" \ --output-dir "${OUTPUT_DIR}_final" \ --output-repo-id "${OUTPUT_REPO_ID}" \ --num-shards ${NUM_WORKERS} \ --logs-dir "${LOGS_DIR}" \ --job-name "aggregate_video" \ --slurm 1 \ --partition "${PARTITION}" \ --cpus-per-task 16 \ --mem-per-cpu "8G" \ --time-limit "08:00:00" echo "" echo "✓ Aggregation job submitted!" echo " Monitor with: squeue -u \$USER | grep aggregate_video" echo " Check logs in: ${LOGS_DIR}/aggregate_video" echo "" echo "After completion, your final dataset will be in:" echo " ${OUTPUT_DIR}_final" fi ############################################################################### # Helpful information ############################################################################### if [ "$1" != "aggregate" ]; then echo "" echo "==============================================" echo "WORKFLOW SUMMARY" echo "==============================================" echo "" echo "1. Step 1 is now running - it will:" echo " - Split episodes across ${NUM_WORKERS} workers" echo " - Each worker converts its episodes to video" echo " - Creates shard datasets in ${OUTPUT_DIR}/shard_XXXX" echo "" echo "2. After Step 1 completes, run Step 2:" echo " bash convert_to_video_parallel.sh aggregate" echo "" echo "3. Step 2 will merge all shards into a single dataset" echo "" echo "==============================================" fi