mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-16 17:20:05 +00:00
48 lines
1.5 KiB
Bash
48 lines
1.5 KiB
Bash
#!/bin/bash
|
|
|
|
# Example script for converting RT-1 dataset using SLURM
|
|
# Make sure to modify the paths and parameters according to your setup
|
|
|
|
# Configuration
|
|
RAW_DIR="/path/to/datasets/fractal20220817_data/0.1.0"
|
|
REPO_ID="your_username/rt1_lerobot"
|
|
LOGS_DIR="/path/to/logs"
|
|
PARTITION="cpu" # Your SLURM partition name
|
|
|
|
# Step 1: Convert dataset using distributed processing
|
|
echo "Starting RT-1 dataset conversion..."
|
|
python examples/port_datasets/slurm_port_shards.py \
|
|
--raw-dir "$RAW_DIR" \
|
|
--repo-id "$REPO_ID" \
|
|
--dataset-type rlds \
|
|
--logs-dir "$LOGS_DIR" \
|
|
--job-name rt1_conversion \
|
|
--workers 32 \
|
|
--num-shards 32 \
|
|
--partition "$PARTITION" \
|
|
--cpus-per-task 4 \
|
|
--mem-per-cpu 2G \
|
|
--slurm 1
|
|
|
|
# Step 2: Wait for jobs to complete (you can monitor with squeue)
|
|
echo "Conversion jobs submitted. Monitor with 'squeue -u \$USER'"
|
|
echo "Once all jobs complete, run the aggregation step:"
|
|
echo ""
|
|
echo "python examples/port_datasets/slurm_aggregate_shards.py \\"
|
|
echo " --repo-id $REPO_ID \\"
|
|
echo " --push-to-hub"
|
|
|
|
# Uncomment the following lines if you want to automatically aggregate
|
|
# (but make sure all shards are complete first)
|
|
|
|
# echo "Waiting for jobs to complete..."
|
|
# while [ $(squeue -u $USER -h | wc -l) -gt 0 ]; do
|
|
# echo "Jobs still running, waiting 60 seconds..."
|
|
# sleep 60
|
|
# done
|
|
|
|
# echo "All jobs completed. Starting aggregation..."
|
|
# python examples/port_datasets/slurm_aggregate_shards.py \
|
|
# --repo-id "$REPO_ID" \
|
|
# --push-to-hub
|