feat(benchmarks): add LIBERO training benchmark pipeline

Single-script benchmark that trains and evaluates all 9 LeRobot policies
on LIBERO. Each SLURM job self-publishes its result row to a HuggingFace
leaderboard dataset — no separate collection step needed.

Policies: pi0, pi0_fast, pi05, groot, act, diffusion, smolvla, xvla,
multi_task_dit. 5000 steps, BS 256, with per-policy GPU allocation and
default LR/scheduler presets.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pepijn
2026-04-09 17:01:49 +02:00
parent 4dbbcca496
commit fd00e38851
2 changed files with 666 additions and 0 deletions
+60
View File
@@ -0,0 +1,60 @@
# LeRobot LIBERO Training Benchmark
Train and evaluate all LeRobot policies on [LIBERO](https://libero-project.github.io/) and publish results as a HuggingFace leaderboard dataset.
## Policies
| Policy | Base Model | GPUs | LR | Chunk | Notes |
| -------------- | -------------------- | ---- | ------ | ----- | ------------------------------------- |
| pi0 | lerobot/pi0_base | 8 | 2.5e-5 | 30 | PaliGemma + Gemma flow matching |
| pi0_fast | lerobot/pi0fast-base | 8 | 2.5e-5 | 30 | Requires tokenizer pre-training |
| pi05 | lerobot/pi05_base | 8 | 2.5e-5 | 30 | Quantiles normalization |
| groot | nvidia/GR00T-N1.5-3B | 8 | 1e-4 | 30 | bf16, diffusion head + projector only |
| act | From scratch | 1 | 1e-5 | 30 | ResNet-18, lightweight |
| diffusion | From scratch | 1 | 1e-4 | 32\* | U-Net, horizon must be divisible by 8 |
| smolvla | lerobot/smolvla_base | 8 | 1e-4 | 30 | SmolVLM2-500M |
| xvla | lerobot/xvla-widowx | 4 | 1e-4 | 32\* | Florence2 + CLIP |
| multi_task_dit | From scratch | 1 | 2e-5 | 32\* | CLIP + DiT |
\* These policies use `horizon` rather than `chunk_size`. Set to 32 (nearest valid value to 30).
## Training spec
- **Steps**: 5,000 per policy
- **Batch size**: 32 per GPU (effective BS = 256 for multi-GPU)
- **Dataset**: `lerobot/libero` (libero_spatial)
- **Evaluation**: 20 episodes after training
- **LR**: each policy's default optimizer/scheduler preset
- **Results**: each SLURM job publishes its own row to the HF leaderboard dataset automatically
## Quick start
### 1. Generate SLURM scripts
```bash
python benchmarks/libero/run_benchmark.py \
--output_dir /scratch/lerobot-benchmark \
--hub_org lerobot
```
### 2. Submit jobs
```bash
# If using pi0_fast, submit tokenizer first:
sbatch /scratch/lerobot-benchmark/slurm_scripts/00_tokenizer.sh
# Wait, then submit pi0_fast
# All other policies can run in parallel:
for script in /scratch/lerobot-benchmark/slurm_scripts/[0-9][0-9]_*.sh; do
[[ "$script" == *pi0_fast* ]] && continue
sbatch "$script"
done
```
Each job publishes its result to `lerobot/benchmark-libero` on the Hub when it finishes.
## Prerequisites
- SLURM cluster with CUDA GPUs (A100 80GB recommended for VLM policies)
- `pip install lerobot[pi,smolvla,groot,xvla,multi_task_dit,libero] datasets`
- `huggingface-cli login`
+606
View File
@@ -0,0 +1,606 @@
#!/usr/bin/env python
"""Generate SLURM sbatch scripts for training all LeRobot policies on LIBERO.
Each generated script trains one policy, evaluates it, and publishes its
results row to a HuggingFace leaderboard dataset — no separate collection
step needed.
Usage:
# Generate scripts for all policies:
python benchmarks/libero/run_benchmark.py \\
--output_dir /scratch/lerobot-benchmark --hub_org lerobot
# Generate for a subset:
python benchmarks/libero/run_benchmark.py \\
--policies pi0 smolvla act \\
--output_dir /scratch/lerobot-benchmark --hub_org lerobot
"""
from __future__ import annotations
import argparse
import json
import subprocess
import textwrap
import uuid
from dataclasses import dataclass, field
from datetime import UTC, datetime
from pathlib import Path
# ──────────────────────────────────────────────────────────────────────
# Policy benchmark configs
# ──────────────────────────────────────────────────────────────────────
@dataclass
class PolicyBenchmarkConfig:
"""Training configuration for a single policy on a benchmark."""
policy_type: str
policy_path: str | None = None
num_gpus: int = 1
chunk_size: int | None = None # Set on policies that use chunk_size (not horizon)
extra_policy_args: dict[str, str] = field(default_factory=dict)
needs_tokenizer: bool = False
tokenizer_args: dict[str, str] = field(default_factory=dict)
COMMON_TRAINING_ARGS: dict[str, str] = {
"dataset.repo_id": "lerobot/libero",
"dataset.use_imagenet_stats": "false",
"env.type": "libero",
"env.task": "libero_spatial",
"steps": "5000",
"batch_size": "32",
"eval_freq": "0",
"save_freq": "5000",
"save_checkpoint": "true",
"log_freq": "100",
"wandb.enable": "true",
"policy.push_to_hub": "true",
"rename_map": (
'{"observation.images.image":"observation.images.camera1",'
'"observation.images.image2":"observation.images.camera2"}'
),
}
EVAL_ARGS: dict[str, str] = {
"env.type": "libero",
"env.task": "libero_spatial",
"eval.n_episodes": "20",
"eval.batch_size": "10",
}
POLICY_CONFIGS: dict[str, PolicyBenchmarkConfig] = {
"pi0": PolicyBenchmarkConfig(
policy_type="pi0",
policy_path="lerobot/pi0_base",
num_gpus=8,
chunk_size=30,
extra_policy_args={
"policy.n_action_steps": "30",
"policy.scheduler_decay_steps": "5000",
},
),
"pi0_fast": PolicyBenchmarkConfig(
policy_type="pi0_fast",
policy_path="lerobot/pi0fast-base",
num_gpus=8,
chunk_size=30,
extra_policy_args={
"policy.n_action_steps": "30",
"policy.scheduler_decay_steps": "5000",
},
needs_tokenizer=True,
tokenizer_args={
"repo_id": "lerobot/libero",
"action_horizon": "30",
"encoded_dims": "0:7",
"normalization_mode": "QUANTILES",
"vocab_size": "1024",
"scale": "10.0",
"push_to_hub": "true",
},
),
"pi05": PolicyBenchmarkConfig(
policy_type="pi05",
policy_path="lerobot/pi05_base",
num_gpus=8,
chunk_size=30,
extra_policy_args={
"policy.n_action_steps": "30",
"policy.scheduler_decay_steps": "5000",
},
),
"groot": PolicyBenchmarkConfig(
policy_type="groot",
policy_path=None,
num_gpus=8,
chunk_size=30,
extra_policy_args={
"policy.n_action_steps": "30",
"policy.base_model_path": "nvidia/GR00T-N1.5-3B",
"policy.tune_diffusion_model": "true",
"policy.tune_projector": "true",
"policy.tune_llm": "false",
"policy.tune_visual": "false",
"policy.use_bf16": "true",
},
),
"act": PolicyBenchmarkConfig(
policy_type="act",
policy_path=None,
num_gpus=1,
chunk_size=30,
extra_policy_args={"policy.n_action_steps": "30"},
),
"diffusion": PolicyBenchmarkConfig(
policy_type="diffusion",
policy_path=None,
num_gpus=1,
chunk_size=None,
extra_policy_args={
"policy.horizon": "32",
"policy.n_action_steps": "30",
"policy.n_obs_steps": "2",
},
),
"smolvla": PolicyBenchmarkConfig(
policy_type="smolvla",
policy_path="lerobot/smolvla_base",
num_gpus=8,
chunk_size=30,
extra_policy_args={
"policy.n_action_steps": "30",
"policy.load_vlm_weights": "true",
"policy.freeze_vision_encoder": "false",
"policy.train_expert_only": "false",
"policy.scheduler_decay_steps": "5000",
},
),
"xvla": PolicyBenchmarkConfig(
policy_type="xvla",
policy_path="lerobot/xvla-widowx",
num_gpus=4,
chunk_size=32,
extra_policy_args={
"policy.n_action_steps": "32",
"policy.scheduler_decay_steps": "5000",
},
),
"multi_task_dit": PolicyBenchmarkConfig(
policy_type="multi_task_dit",
policy_path=None,
num_gpus=1,
chunk_size=None,
extra_policy_args={
"policy.horizon": "32",
"policy.n_action_steps": "30",
},
),
}
ALL_POLICY_NAMES = list(POLICY_CONFIGS.keys())
# GPU memory estimates (GB) for SLURM --mem allocation
GPU_MEM_ESTIMATES: dict[str, int] = {
"pi0": 320,
"pi0_fast": 320,
"pi05": 280,
"groot": 320,
"act": 64,
"diffusion": 64,
"smolvla": 160,
"xvla": 160,
"multi_task_dit": 64,
}
# ──────────────────────────────────────────────────────────────────────
# SLURM script generation
# ──────────────────────────────────────────────────────────────────────
def _cli_args(args: dict[str, str]) -> str:
"""Build a backslash-continued CLI arg string with proper shell quoting."""
lines = []
for key, value in args.items():
if any(c in str(value) for c in ["{", "}", " ", '"', "'"]):
lines.append(f" --{key}='{value}'")
else:
lines.append(f" --{key}={value}")
return " \\\n".join(lines)
def _training_cli_args(
policy_name: str,
output_dir: Path,
hub_org: str,
benchmark_uuid: str,
) -> str:
cfg = POLICY_CONFIGS[policy_name]
args: dict[str, str] = {}
args.update(COMMON_TRAINING_ARGS)
args["policy.type"] = cfg.policy_type
if cfg.policy_path:
args["policy.path"] = cfg.policy_path
if cfg.chunk_size is not None:
args["policy.chunk_size"] = str(cfg.chunk_size)
args.update(cfg.extra_policy_args)
args["output_dir"] = str(output_dir / "train" / policy_name)
args["policy.repo_id"] = f"{hub_org}/{policy_name}_libero"
args["wandb.project"] = "lerobot-libero-benchmark"
args["wandb.run_name"] = f"{policy_name}_{benchmark_uuid[:8]}"
return _cli_args(args)
def _publish_snippet(
policy_name: str,
output_dir: Path,
hub_org: str,
benchmark_uuid: str,
hub_dataset: str,
) -> str:
"""Inline Python that each SLURM job runs to publish its own result row."""
cfg = POLICY_CONFIGS[policy_name]
steps = int(COMMON_TRAINING_ARGS["steps"])
bs = int(COMMON_TRAINING_ARGS["batch_size"])
eff_bs = bs * cfg.num_gpus
train_dir = output_dir / "train" / policy_name
return textwrap.dedent(f"""\
python3 -c "
import json, os, re, sys
from pathlib import Path
from datetime import datetime, timezone
timing = {{}}
tp = Path('{output_dir}/logs/{policy_name}_timing.txt')
if tp.exists():
for ln in tp.read_text().splitlines():
if '=' in ln:
k, _, v = ln.partition('=')
timing[k.strip()] = v.strip()
# Parse eval results
eval_sr, eval_per_task, eval_n = None, '{{}}', 0
eval_dir = Path('{train_dir}/eval_results')
if eval_dir.exists():
for jf in eval_dir.glob('**/*.json'):
try:
d = json.loads(jf.read_text())
except Exception:
continue
if 'avg_success_rate' in d:
eval_sr = d['avg_success_rate']
elif 'eval_info' in d and 'avg_success_rate' in d.get('eval_info', {{}}):
eval_sr = d['eval_info']['avg_success_rate']
pt = {{k: v for k, v in d.items() if 'success_rate' in k and k != 'avg_success_rate'}}
if pt:
eval_per_task = json.dumps(pt)
if 'n_episodes' in d:
eval_n = d['n_episodes']
# Parse final loss from SLURM stdout
final_loss = None
for lf in sorted(Path('{output_dir}/logs').glob('{policy_name}_*.out'), reverse=True):
losses = re.findall(r'\\\"loss\\\"\\s*:\\s*([\\d.e+-]+)', lf.read_text())
if losses:
final_loss = float(losses[-1])
break
# Parse peak GPU mem
peak_mem = 0.0
csv_p = Path('{output_dir}/logs/{policy_name}_gpu_mem.csv')
if csv_p.exists():
for ln in csv_p.read_text().splitlines():
parts = ln.strip().split(',')
if len(parts) >= 2:
try:
peak_mem = max(peak_mem, float(parts[1].strip()))
except ValueError:
pass
# Parse train config for optimizer details
lr, opt_wd, sched_type, sched_warmup, sched_decay = 0.0, 0.0, '', 0, 0
freeze_ve, train_eo, grad_ckpt = False, False, False
cfg_path = Path('{train_dir}/checkpoints/{steps:06d}/pretrained_model/train_config.json')
if cfg_path.exists():
tc = json.loads(cfg_path.read_text())
o = tc.get('optimizer', {{}})
lr = o.get('lr', 0.0)
opt_wd = o.get('weight_decay', 0.0)
s = tc.get('scheduler', {{}})
sched_type = s.get('type', '')
sched_warmup = s.get('num_warmup_steps', 0)
sched_decay = s.get('num_decay_steps', 0)
p = tc.get('policy', {{}})
freeze_ve = p.get('freeze_vision_encoder', False)
train_eo = p.get('train_expert_only', False)
grad_ckpt = p.get('gradient_checkpointing', False)
row = {{
'benchmark_uuid': '{benchmark_uuid}',
'policy_type': '{policy_name}',
'policy_repo_id': '{hub_org}/{policy_name}_libero',
'base_model_repo_id': '{cfg.policy_path or ""}',
'dataset_repo_id': '{COMMON_TRAINING_ARGS["dataset.repo_id"]}',
'env_type': '{COMMON_TRAINING_ARGS["env.type"]}',
'env_task': '{COMMON_TRAINING_ARGS["env.task"]}',
'steps': {steps},
'batch_size_per_gpu': {bs},
'num_gpus': {cfg.num_gpus},
'effective_batch_size': {eff_bs},
'total_samples_seen': {steps * eff_bs},
'chunk_size': {cfg.chunk_size or 0},
'learning_rate': lr,
'optimizer_type': 'AdamW',
'optimizer_weight_decay': opt_wd,
'scheduler_type': sched_type,
'scheduler_warmup_steps': sched_warmup,
'scheduler_decay_steps': sched_decay,
'freeze_vision_encoder': freeze_ve,
'train_expert_only': train_eo,
'gradient_checkpointing': grad_ckpt,
'eval_success_rate': eval_sr,
'eval_success_rate_per_task': eval_per_task,
'eval_n_episodes': eval_n,
'final_train_loss': final_loss,
'training_time_s': float(timing.get('TRAINING_TIME_S', 0)),
'peak_gpu_memory_mb': peak_mem or float(timing.get('MAX_GPU_MEM_MB', 0)),
'gpu_type': timing.get('GPU_TYPE', 'unknown'),
'lerobot_commit': timing.get('LEROBOT_COMMIT', 'unknown'),
'timestamp': datetime.now(timezone.utc).isoformat(),
}}
# Save locally
Path('{train_dir}/benchmark_result.json').write_text(json.dumps(row, indent=2, default=str))
# Push to HF dataset
try:
from datasets import Dataset, load_dataset
try:
existing = load_dataset('{hub_dataset}', split='train')
rows = existing.to_list() + [row]
except Exception:
rows = [row]
Dataset.from_list(rows).push_to_hub('{hub_dataset}', split='train')
print('Published result to {hub_dataset}')
except ImportError:
print('datasets library not installed — result saved locally only')
except Exception as e:
print(f'Failed to push to hub: {{e}} — result saved locally')
"
""")
def _generate_sbatch_script(
policy_name: str,
output_dir: Path,
hub_org: str,
benchmark_uuid: str,
hub_dataset: str,
lerobot_commit: str,
) -> str:
cfg = POLICY_CONFIGS[policy_name]
steps = int(COMMON_TRAINING_ARGS["steps"])
log_dir = output_dir / "logs"
train_dir = output_dir / "train" / policy_name
checkpoint_path = train_dir / f"checkpoints/{steps:06d}/pretrained_model"
training_args = _training_cli_args(policy_name, output_dir, hub_org, benchmark_uuid)
eval_args = _cli_args(EVAL_ARGS)
publish = _publish_snippet(policy_name, output_dir, hub_org, benchmark_uuid, hub_dataset)
return textwrap.dedent(f"""\
#!/bin/bash
#SBATCH --job-name=bench_{policy_name}
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --gres=gpu:{cfg.num_gpus}
#SBATCH --cpus-per-task={cfg.num_gpus * 8}
#SBATCH --mem={GPU_MEM_ESTIMATES.get(policy_name, 128)}G
#SBATCH --time=06:00:00
#SBATCH --output={log_dir}/{policy_name}_%j.out
#SBATCH --error={log_dir}/{policy_name}_%j.err
set -euo pipefail
echo "=========================================="
echo "LeRobot LIBERO Benchmark — {policy_name}"
echo "UUID: {benchmark_uuid}"
echo "Start: $(date -Iseconds)"
echo "Host: $(hostname) | GPUs: {cfg.num_gpus}"
echo "=========================================="
START_TIME=$(date +%s)
# GPU memory monitoring (every 30s)
nvidia-smi --query-gpu=index,memory.used,memory.total,gpu_name \\
--format=csv,noheader,nounits -l 30 \\
> "{log_dir}/{policy_name}_gpu_mem.csv" &
GPU_MONITOR_PID=$!
# ── Training ──────────────────────────────────────────────────
echo "[$(date -Iseconds)] Starting training..."
accelerate launch --num_processes={cfg.num_gpus} \\
$(which lerobot-train) \\
{training_args}
TRAIN_EXIT=$?
TRAIN_END=$(date +%s)
echo "[$(date -Iseconds)] Training exit code: $TRAIN_EXIT"
# ── Evaluation ────────────────────────────────────────────────
EVAL_EXIT=1
if [ $TRAIN_EXIT -eq 0 ]; then
echo "[$(date -Iseconds)] Starting evaluation..."
lerobot-eval \\
--policy.path="{checkpoint_path}" \\
{eval_args} \\
--output_dir="{train_dir}/eval_results"
EVAL_EXIT=$?
echo "[$(date -Iseconds)] Eval exit code: $EVAL_EXIT"
else
echo "[$(date -Iseconds)] Skipping eval — training failed."
fi
# ── Timing ────────────────────────────────────────────────────
END_TIME=$(date +%s)
kill $GPU_MONITOR_PID 2>/dev/null || true
cat > "{log_dir}/{policy_name}_timing.txt" <<TIMING_EOF
BENCHMARK_UUID={benchmark_uuid}
POLICY_TYPE={policy_name}
TRAINING_TIME_S=$((TRAIN_END - START_TIME))
TOTAL_TIME_S=$((END_TIME - START_TIME))
TRAIN_EXIT=$TRAIN_EXIT
EVAL_EXIT=$EVAL_EXIT
MAX_GPU_MEM_MB=$(awk -F',' '{{print $2}}' "{log_dir}/{policy_name}_gpu_mem.csv" 2>/dev/null | sort -n | tail -1)
GPU_TYPE=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader | head -1 | xargs)
LEROBOT_COMMIT={lerobot_commit}
TIMING_EOF
# ── Publish result to HF dataset ──────────────────────────────
echo "[$(date -Iseconds)] Publishing result..."
{publish}
echo "=========================================="
echo "Done: $(date -Iseconds)"
echo "Training: $((TRAIN_END - START_TIME))s | Total: $((END_TIME - START_TIME))s"
echo "=========================================="
""")
def _generate_tokenizer_script(
output_dir: Path,
hub_org: str,
benchmark_uuid: str,
) -> str:
cfg = POLICY_CONFIGS["pi0_fast"]
log_dir = output_dir / "logs"
tokenizer_hub_repo = f"{hub_org}/fast-tokenizer-libero"
tok_args = dict(cfg.tokenizer_args)
tok_args["hub_repo_id"] = tokenizer_hub_repo
return textwrap.dedent(f"""\
#!/bin/bash
#SBATCH --job-name=bench_tokenizer
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --gres=gpu:1
#SBATCH --cpus-per-task=8
#SBATCH --mem=64G
#SBATCH --time=01:00:00
#SBATCH --output={log_dir}/tokenizer_%j.out
#SBATCH --error={log_dir}/tokenizer_%j.err
set -euo pipefail
echo "LeRobot — FAST Tokenizer | UUID: {benchmark_uuid}"
lerobot-train-tokenizer \\
{_cli_args(tok_args)}
echo "Tokenizer pushed to: {tokenizer_hub_repo}"
""")
# ──────────────────────────────────────────────────────────────────────
# Main
# ──────────────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(description="Generate SLURM scripts for LeRobot LIBERO benchmark.")
parser.add_argument(
"--policies",
nargs="+",
default=ALL_POLICY_NAMES,
choices=ALL_POLICY_NAMES,
help="Policies to benchmark (default: all).",
)
parser.add_argument("--output_dir", type=Path, required=True, help="Root output directory.")
parser.add_argument("--hub_org", type=str, default="lerobot", help="HuggingFace org.")
parser.add_argument("--hub_dataset", type=str, default=None, help="HF dataset repo for results.")
parser.add_argument("--uuid", type=str, default=None, help="Override benchmark UUID.")
args = parser.parse_args()
benchmark_uuid = args.uuid or str(uuid.uuid4())
output_dir: Path = args.output_dir.resolve()
policies: list[str] = args.policies
hub_org: str = args.hub_org
hub_dataset: str = args.hub_dataset or f"{hub_org}/benchmark-libero"
try:
commit = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip()
except (subprocess.CalledProcessError, FileNotFoundError):
commit = "unknown"
scripts_dir = output_dir / "slurm_scripts"
log_dir = output_dir / "logs"
scripts_dir.mkdir(parents=True, exist_ok=True)
log_dir.mkdir(parents=True, exist_ok=True)
for p in policies:
(output_dir / "train" / p).mkdir(parents=True, exist_ok=True)
generated: dict[str, Path] = {}
# Tokenizer job for pi0_fast
tokenizer_path = None
if "pi0_fast" in policies:
script = _generate_tokenizer_script(output_dir, hub_org, benchmark_uuid)
tokenizer_path = scripts_dir / "00_tokenizer.sh"
tokenizer_path.write_text(script)
tokenizer_path.chmod(0o755)
generated["tokenizer"] = tokenizer_path
tokenizer_hub_repo = f"{hub_org}/fast-tokenizer-libero"
POLICY_CONFIGS["pi0_fast"].extra_policy_args["policy.action_tokenizer_name"] = tokenizer_hub_repo
# Per-policy scripts
for i, name in enumerate(sorted(policies), start=1):
script = _generate_sbatch_script(name, output_dir, hub_org, benchmark_uuid, hub_dataset, commit)
path = scripts_dir / f"{i:02d}_{name}.sh"
path.write_text(script)
path.chmod(0o755)
generated[name] = path
# Manifest
manifest = {
"benchmark_uuid": benchmark_uuid,
"timestamp": datetime.now(UTC).isoformat(),
"lerobot_commit": commit,
"hub_org": hub_org,
"hub_dataset": hub_dataset,
"policies": policies,
"output_dir": str(output_dir),
"scripts": {k: str(v) for k, v in generated.items()},
}
manifest_path = output_dir / "benchmark_manifest.json"
manifest_path.write_text(json.dumps(manifest, indent=2))
# Instructions
print("=" * 60)
print("LeRobot LIBERO Benchmark — Scripts Generated")
print(f"UUID: {benchmark_uuid}")
print(f"Output: {output_dir}")
print(f"Results dataset: {hub_dataset}")
print("=" * 60)
print()
for _name, path in sorted(generated.items()):
print(f" {path}")
print()
if tokenizer_path:
print("IMPORTANT: pi0_fast requires tokenizer training FIRST.")
print(f" 1. sbatch {tokenizer_path}")
print(" 2. Wait for completion")
print(f" 3. sbatch {generated.get('pi0_fast', 'N/A')}")
print(" 4. All other policies can run in parallel")
else:
print("All scripts can be submitted in parallel.")
print()
print("Each job publishes its result to the HF dataset automatically.")
if __name__ == "__main__":
main()