From f875566e1dfb6398e3d871f3bbe442903ef7d77d Mon Sep 17 00:00:00 2001 From: fracapuano Date: Thu, 13 Nov 2025 14:43:30 +0000 Subject: [PATCH] add: downloading data utils --- examples/behavior_1k/download.slurm | 27 +++++++++++++++++++++++++++ examples/behavior_1k/download_data.py | 22 ++++++++++++++++++++-- 2 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 examples/behavior_1k/download.slurm diff --git a/examples/behavior_1k/download.slurm b/examples/behavior_1k/download.slurm new file mode 100644 index 000000000..655819129 --- /dev/null +++ b/examples/behavior_1k/download.slurm @@ -0,0 +1,27 @@ +#!/bin/bash +#SBATCH -J b1k-download +#SBATCH -p hopper-cpu +#SBATCH --qos=high +#SBATCH -c 64 # CPUs per conversion (tune as needed) +#SBATCH -t 20:00:00 # Time per conversion +#SBATCH -D /admin/home/francesco_capuano/lerobot +#SBATCH -o /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A.out +#SBATCH -e /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A.err + +set -euo pipefail +set -x +export PYTHONUNBUFFERED=1 +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK:-1} + +# activate your env if needed +source "$HOME/.bashrc" 2>/dev/null || true +if ! command -v conda >/dev/null 2>&1; then + source "$HOME/miniconda3/etc/profile.d/conda.sh" 2>/dev/null || true + source "$HOME/anaconda3/etc/profile.d/conda.sh" 2>/dev/null || true +fi +conda activate lerobot + +python examples/behavior_1k/download_data.py \ + --repo-id "behavior-1k/2025-challenge-demos" \ + --local-dir "/fsx/francesco_capuano/behavior1k-2025-v21" \ + --max-workers 64 diff --git a/examples/behavior_1k/download_data.py b/examples/behavior_1k/download_data.py index 4c5266af2..2433ea12c 100644 --- a/examples/behavior_1k/download_data.py +++ b/examples/behavior_1k/download_data.py @@ -1,8 +1,26 @@ +import shutil + from huggingface_hub import snapshot_download if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--repo-id", type=str, required=True) + parser.add_argument("--max-workers", type=int, default=8) + parser.add_argument("--local-dir", type=str, required=True) + parser.add_argument("--force-download", action="store_true") + + args = parser.parse_args() + + if args.force_download: + shutil.rmtree(args.local_dir, ignore_errors=True) + snapshot_download( - repo_id="behavior-1k/2025-challenge-demos", + repo_id=args.repo_id, repo_type="dataset", - local_dir="/fsx/francesco_capuano/behavior1k-2025-v21", + force_download=args.force_download, + max_workers=args.max_workers, + local_dir=args.local_dir, + ignore_patterns=["annotations/*"], # NOTE(fracapuano): Dropping textual annotations right now )