#!/bin/bash

# Example script for converting RT-1 dataset using SLURM
# Make sure to modify the paths and parameters according to your setup

# Configuration
RAW_DIR="/path/to/datasets/fractal20220817_data/0.1.0"
REPO_ID="your_username/rt1_lerobot"
LOGS_DIR="/path/to/logs"
PARTITION="cpu"  # Your SLURM partition name

# Step 1: Convert dataset using distributed processing
echo "Starting RT-1 dataset conversion..."
python examples/port_datasets/slurm_port_shards.py \
    --raw-dir "$RAW_DIR" \
    --repo-id "$REPO_ID" \
    --dataset-type rlds \
    --logs-dir "$LOGS_DIR" \
    --job-name rt1_conversion \
    --workers 32 \
    --num-shards 32 \
    --partition "$PARTITION" \
    --cpus-per-task 4 \
    --mem-per-cpu 2G \
    --slurm 1

# Step 2: Wait for jobs to complete (you can monitor with squeue)
echo "Conversion jobs submitted. Monitor with 'squeue -u \$USER'"
echo "Once all jobs complete, run the aggregation step:"
echo ""
echo "python examples/port_datasets/slurm_aggregate_shards.py \\"
echo "    --repo-id $REPO_ID \\"
echo "    --push-to-hub"

# Uncomment the following lines if you want to automatically aggregate
# (but make sure all shards are complete first)

# echo "Waiting for jobs to complete..."
# while [ $(squeue -u $USER -h | wc -l) -gt 0 ]; do
#     echo "Jobs still running, waiting 60 seconds..."
#     sleep 60
# done

# echo "All jobs completed. Starting aggregation..."
# python examples/port_datasets/slurm_aggregate_shards.py \
#     --repo-id "$REPO_ID" \
#     --push-to-hub