mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-16 00:59:46 +00:00
add: utils for stabler, large scale upload (ds.push_to_hub may fail)
This commit is contained in:
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -J b1k-upload
|
||||
#SBATCH -p hopper-cpu
|
||||
#SBATCH --qos=high
|
||||
#SBATCH -c 8
|
||||
#SBATCH -t 48:00:00
|
||||
#SBATCH --mem=8G
|
||||
#SBATCH --array=0-49%8
|
||||
#SBATCH -D /admin/home/francesco_capuano/lerobot
|
||||
#SBATCH -o /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A_%a.out
|
||||
#SBATCH -e /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A_%a.err
|
||||
|
||||
set -euo pipefail
|
||||
set -x
|
||||
export PYTHONUNBUFFERED=1
|
||||
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK:-1}
|
||||
|
||||
source "$HOME/.bashrc" 2>/dev/null || true
|
||||
if ! command -v conda >/dev/null 2>&1; then
|
||||
source "$HOME/miniconda3/etc/profile.d/conda.sh" 2>/dev/null || true
|
||||
source "$HOME/anaconda3/etc/profile.d/conda.sh" 2>/dev/null || true
|
||||
fi
|
||||
conda activate lerobot
|
||||
|
||||
# The SLURM_ARRAY_TASK_ID will be used as the task-id
|
||||
TASK_ID=${SLURM_ARRAY_TASK_ID}
|
||||
|
||||
# Configuration
|
||||
ROOT_PATH="/fsx/francesco_capuano/behavior1k-v3"
|
||||
HF_USER="fracapuano"
|
||||
|
||||
python examples/behavior_1k/upload_folders.py \
|
||||
--task-id ${TASK_ID} \
|
||||
--root-path ${ROOT_PATH} \
|
||||
--hf-user ${HF_USER}
|
||||
@@ -0,0 +1,101 @@
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from huggingface_hub import HfApi, upload_large_folder
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Upload a folder to Hugging Face Hub using upload_large_folder"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--folder-path",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Path to the folder to upload (used if task-id is not provided)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--repo-id",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Repository ID on Hugging Face Hub (e.g., 'username/repo-name'). If task-id is provided, will be constructed as '{hf-user}/behavior1k-task{task_id:04d}'",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--task-id",
|
||||
type=int,
|
||||
required=False,
|
||||
help="Task index to upload (e.g., 0, 1, 2, ...). When provided, folder-path is constructed from root-path.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--root-path",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Root path containing task folders (e.g., /fsx/user/behavior1k-v3). Used with --task-id to construct folder path.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hf-user",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Hugging Face username for constructing repo-id with task-id (default: from HF_USER env var or 'fracapuano')",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--create-repo", action="store_true", help="Create the repository if it doesn't exist"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Construct folder path and repo ID based on task-id or use provided values
|
||||
if args.task_id is not None:
|
||||
if not args.root_path:
|
||||
raise ValueError("--root-path is required when --task-id is provided")
|
||||
|
||||
task_folder_name = f"behavior1k-task{args.task_id:04d}"
|
||||
folder_path = Path(args.root_path) / task_folder_name
|
||||
repo_id = f"{args.hf_user}/{task_folder_name}"
|
||||
|
||||
print(f"Task mode: uploading task {args.task_id}")
|
||||
else:
|
||||
if not args.folder_path or not args.repo_id:
|
||||
raise ValueError(
|
||||
"Either --task-id with --root-path, or both --folder-path and --repo-id must be provided"
|
||||
)
|
||||
|
||||
folder_path = Path(args.folder_path)
|
||||
repo_id = args.repo_id
|
||||
|
||||
# Validate folder path
|
||||
if not folder_path.exists():
|
||||
raise ValueError(f"Folder path does not exist: {folder_path}")
|
||||
if not folder_path.is_dir():
|
||||
raise ValueError(f"Path is not a directory: {folder_path}")
|
||||
|
||||
print(f"Uploading folder: {folder_path}")
|
||||
print(f"Repository: {repo_id}")
|
||||
|
||||
# Create repository if requested
|
||||
if args.create_repo:
|
||||
api = HfApi()
|
||||
print(f"Creating repository {repo_id}...")
|
||||
try:
|
||||
api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
|
||||
print("Repository created or already exists. Updating its contents")
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not create repository: {e}")
|
||||
|
||||
# Upload the folder
|
||||
print("Starting upload...")
|
||||
try:
|
||||
result = upload_large_folder(
|
||||
folder_path=str(folder_path),
|
||||
repo_id=repo_id,
|
||||
repo_type="dataset",
|
||||
)
|
||||
print("✓ Upload completed successfully!")
|
||||
print(f"Commit URL: {result}")
|
||||
except Exception as e:
|
||||
print(f"✗ Upload failed: {e}")
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user