mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-21 11:39:50 +00:00
add: utils for stabler, large scale upload (ds.push_to_hub may fail)
This commit is contained in:
@@ -0,0 +1,35 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#SBATCH -J b1k-upload
|
||||||
|
#SBATCH -p hopper-cpu
|
||||||
|
#SBATCH --qos=high
|
||||||
|
#SBATCH -c 8
|
||||||
|
#SBATCH -t 48:00:00
|
||||||
|
#SBATCH --mem=8G
|
||||||
|
#SBATCH --array=0-49%8
|
||||||
|
#SBATCH -D /admin/home/francesco_capuano/lerobot
|
||||||
|
#SBATCH -o /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A_%a.out
|
||||||
|
#SBATCH -e /admin/home/francesco_capuano/lerobot/examples/behavior_1k/logs/%x-%A_%a.err
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
set -x
|
||||||
|
export PYTHONUNBUFFERED=1
|
||||||
|
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK:-1}
|
||||||
|
|
||||||
|
source "$HOME/.bashrc" 2>/dev/null || true
|
||||||
|
if ! command -v conda >/dev/null 2>&1; then
|
||||||
|
source "$HOME/miniconda3/etc/profile.d/conda.sh" 2>/dev/null || true
|
||||||
|
source "$HOME/anaconda3/etc/profile.d/conda.sh" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
conda activate lerobot
|
||||||
|
|
||||||
|
# The SLURM_ARRAY_TASK_ID will be used as the task-id
|
||||||
|
TASK_ID=${SLURM_ARRAY_TASK_ID}
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
ROOT_PATH="/fsx/francesco_capuano/behavior1k-v3"
|
||||||
|
HF_USER="fracapuano"
|
||||||
|
|
||||||
|
python examples/behavior_1k/upload_folders.py \
|
||||||
|
--task-id ${TASK_ID} \
|
||||||
|
--root-path ${ROOT_PATH} \
|
||||||
|
--hf-user ${HF_USER}
|
||||||
@@ -0,0 +1,101 @@
|
|||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from huggingface_hub import HfApi, upload_large_folder
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Upload a folder to Hugging Face Hub using upload_large_folder"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--folder-path",
|
||||||
|
type=str,
|
||||||
|
required=False,
|
||||||
|
help="Path to the folder to upload (used if task-id is not provided)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--repo-id",
|
||||||
|
type=str,
|
||||||
|
required=False,
|
||||||
|
help="Repository ID on Hugging Face Hub (e.g., 'username/repo-name'). If task-id is provided, will be constructed as '{hf-user}/behavior1k-task{task_id:04d}'",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--task-id",
|
||||||
|
type=int,
|
||||||
|
required=False,
|
||||||
|
help="Task index to upload (e.g., 0, 1, 2, ...). When provided, folder-path is constructed from root-path.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--root-path",
|
||||||
|
type=str,
|
||||||
|
required=False,
|
||||||
|
help="Root path containing task folders (e.g., /fsx/user/behavior1k-v3). Used with --task-id to construct folder path.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--hf-user",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="Hugging Face username for constructing repo-id with task-id (default: from HF_USER env var or 'fracapuano')",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--create-repo", action="store_true", help="Create the repository if it doesn't exist"
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Construct folder path and repo ID based on task-id or use provided values
|
||||||
|
if args.task_id is not None:
|
||||||
|
if not args.root_path:
|
||||||
|
raise ValueError("--root-path is required when --task-id is provided")
|
||||||
|
|
||||||
|
task_folder_name = f"behavior1k-task{args.task_id:04d}"
|
||||||
|
folder_path = Path(args.root_path) / task_folder_name
|
||||||
|
repo_id = f"{args.hf_user}/{task_folder_name}"
|
||||||
|
|
||||||
|
print(f"Task mode: uploading task {args.task_id}")
|
||||||
|
else:
|
||||||
|
if not args.folder_path or not args.repo_id:
|
||||||
|
raise ValueError(
|
||||||
|
"Either --task-id with --root-path, or both --folder-path and --repo-id must be provided"
|
||||||
|
)
|
||||||
|
|
||||||
|
folder_path = Path(args.folder_path)
|
||||||
|
repo_id = args.repo_id
|
||||||
|
|
||||||
|
# Validate folder path
|
||||||
|
if not folder_path.exists():
|
||||||
|
raise ValueError(f"Folder path does not exist: {folder_path}")
|
||||||
|
if not folder_path.is_dir():
|
||||||
|
raise ValueError(f"Path is not a directory: {folder_path}")
|
||||||
|
|
||||||
|
print(f"Uploading folder: {folder_path}")
|
||||||
|
print(f"Repository: {repo_id}")
|
||||||
|
|
||||||
|
# Create repository if requested
|
||||||
|
if args.create_repo:
|
||||||
|
api = HfApi()
|
||||||
|
print(f"Creating repository {repo_id}...")
|
||||||
|
try:
|
||||||
|
api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
|
||||||
|
print("Repository created or already exists. Updating its contents")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Could not create repository: {e}")
|
||||||
|
|
||||||
|
# Upload the folder
|
||||||
|
print("Starting upload...")
|
||||||
|
try:
|
||||||
|
result = upload_large_folder(
|
||||||
|
folder_path=str(folder_path),
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="dataset",
|
||||||
|
)
|
||||||
|
print("✓ Upload completed successfully!")
|
||||||
|
print(f"Commit URL: {result}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ Upload failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user