diff --git a/examples/behavior_1k/upload.slurm b/examples/behavior_1k/upload.slurm new file mode 100644 index 000000000..393d13135 --- /dev/null +++ b/examples/behavior_1k/upload.slurm @@ -0,0 +1,35 @@ +#!/bin/bash +#SBATCH -J b1k-upload +#SBATCH -p hopper-cpu +#SBATCH --qos=high +#SBATCH -c 8 +#SBATCH -t 48:00:00 +#SBATCH --mem=8G +#SBATCH --array=0-49%8 +#SBATCH -D /admin/home/francesco_capuano/lerobot +#SBATCH -o /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A_%a.out +#SBATCH -e /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A_%a.err + +set -euo pipefail +set -x +export PYTHONUNBUFFERED=1 +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK:-1} + +source "$HOME/.bashrc" 2>/dev/null || true +if ! command -v conda >/dev/null 2>&1; then + source "$HOME/miniconda3/etc/profile.d/conda.sh" 2>/dev/null || true + source "$HOME/anaconda3/etc/profile.d/conda.sh" 2>/dev/null || true +fi +conda activate lerobot + +# The SLURM_ARRAY_TASK_ID will be used as the task-id +TASK_ID=${SLURM_ARRAY_TASK_ID} + +# Configuration +ROOT_PATH="/fsx/francesco_capuano/behavior1k-v3" +HF_USER="fracapuano" + +python examples/behavior_1k/upload_folders.py \ + --task-id ${TASK_ID} \ + --root-path ${ROOT_PATH} \ + --hf-user ${HF_USER} diff --git a/examples/behavior_1k/upload_folders.py b/examples/behavior_1k/upload_folders.py new file mode 100644 index 000000000..d8fe16eb8 --- /dev/null +++ b/examples/behavior_1k/upload_folders.py @@ -0,0 +1,101 @@ +import argparse +from pathlib import Path + +from huggingface_hub import HfApi, upload_large_folder + + +def main(): + parser = argparse.ArgumentParser( + description="Upload a folder to Hugging Face Hub using upload_large_folder" + ) + parser.add_argument( + "--folder-path", + type=str, + required=False, + help="Path to the folder to upload (used if task-id is not provided)", + ) + parser.add_argument( + "--repo-id", + type=str, + required=False, + help="Repository ID on Hugging Face Hub (e.g., 'username/repo-name'). If task-id is provided, will be constructed as '{hf-user}/behavior1k-task{task_id:04d}'", + ) + parser.add_argument( + "--task-id", + type=int, + required=False, + help="Task index to upload (e.g., 0, 1, 2, ...). When provided, folder-path is constructed from root-path.", + ) + parser.add_argument( + "--root-path", + type=str, + required=False, + help="Root path containing task folders (e.g., /fsx/user/behavior1k-v3). Used with --task-id to construct folder path.", + ) + parser.add_argument( + "--hf-user", + type=str, + default=None, + help="Hugging Face username for constructing repo-id with task-id (default: from HF_USER env var or 'fracapuano')", + ) + parser.add_argument( + "--create-repo", action="store_true", help="Create the repository if it doesn't exist" + ) + + args = parser.parse_args() + + # Construct folder path and repo ID based on task-id or use provided values + if args.task_id is not None: + if not args.root_path: + raise ValueError("--root-path is required when --task-id is provided") + + task_folder_name = f"behavior1k-task{args.task_id:04d}" + folder_path = Path(args.root_path) / task_folder_name + repo_id = f"{args.hf_user}/{task_folder_name}" + + print(f"Task mode: uploading task {args.task_id}") + else: + if not args.folder_path or not args.repo_id: + raise ValueError( + "Either --task-id with --root-path, or both --folder-path and --repo-id must be provided" + ) + + folder_path = Path(args.folder_path) + repo_id = args.repo_id + + # Validate folder path + if not folder_path.exists(): + raise ValueError(f"Folder path does not exist: {folder_path}") + if not folder_path.is_dir(): + raise ValueError(f"Path is not a directory: {folder_path}") + + print(f"Uploading folder: {folder_path}") + print(f"Repository: {repo_id}") + + # Create repository if requested + if args.create_repo: + api = HfApi() + print(f"Creating repository {repo_id}...") + try: + api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True) + print("Repository created or already exists. Updating its contents") + except Exception as e: + print(f"Warning: Could not create repository: {e}") + + # Upload the folder + print("Starting upload...") + try: + result = upload_large_folder( + folder_path=str(folder_path), + repo_id=repo_id, + repo_type="dataset", + ) + print("✓ Upload completed successfully!") + print(f"Commit URL: {result}") + except Exception as e: + print(f"✗ Upload failed: {e}") + raise + + +if __name__ == "__main__": + main()