mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-18 18:20:08 +00:00
feat(benchmarks): add matrix runner and leaderboard
This commit is contained in:
@@ -0,0 +1 @@
|
||||
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
|
||||
@@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Publish benchmark rows and lightweight artifacts to a Hub dataset."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from lerobot.utils.history_repo import UploadTarget, make_hub_file_url, upload_targets, utc_timestamp_slug
|
||||
|
||||
|
||||
def load_json_if_exists(path: Path) -> dict[str, Any] | None:
|
||||
if not path.exists():
|
||||
return None
|
||||
return json.loads(path.read_text())
|
||||
|
||||
|
||||
def find_latest_train_config_path(run_root: Path) -> Path | None:
|
||||
checkpoints_dir = run_root / "train" / "checkpoints"
|
||||
if not checkpoints_dir.exists():
|
||||
return None
|
||||
candidates = sorted(
|
||||
checkpoints_dir.glob("*/pretrained_model/train_config.json"),
|
||||
key=lambda path: path.parts[-3],
|
||||
)
|
||||
return candidates[-1] if candidates else None
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--benchmark", required=True)
|
||||
parser.add_argument("--policy", required=True)
|
||||
parser.add_argument("--run_root", required=True, type=Path)
|
||||
parser.add_argument("--results_repo", required=True)
|
||||
parser.add_argument("--git_commit", required=True)
|
||||
parser.add_argument("--num_gpus", required=True, type=int)
|
||||
parser.add_argument("--microbatch_per_gpu", required=True, type=int)
|
||||
parser.add_argument("--gradient_accumulation_steps", required=True, type=int)
|
||||
parser.add_argument("--effective_batch_size", required=True, type=int)
|
||||
parser.add_argument("--train_wall_time_s", required=True, type=float)
|
||||
parser.add_argument("--eval_wall_time_s", required=True, type=float)
|
||||
parser.add_argument("--slurm_job_id", default="")
|
||||
parser.add_argument("--docker_image", required=True)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def build_row(args: argparse.Namespace) -> tuple[dict[str, Any], list[UploadTarget]]:
|
||||
now = datetime.now(UTC)
|
||||
created_at = now.isoformat()
|
||||
timestamp = utc_timestamp_slug(now)
|
||||
run_id = f"{timestamp}__{args.benchmark}__{args.policy}__{args.slurm_job_id or 'manual'}"
|
||||
eval_info = load_json_if_exists(args.run_root / "eval" / "eval_info.json") or {}
|
||||
train_config_path = find_latest_train_config_path(args.run_root)
|
||||
train_config = load_json_if_exists(train_config_path) or {}
|
||||
|
||||
artifact_prefix = f"artifacts/{args.benchmark}/{args.policy}/{run_id}"
|
||||
row_path_in_repo = f"rows/{args.benchmark}/{args.policy}/{run_id}.json"
|
||||
|
||||
row = {
|
||||
"schema_version": 1,
|
||||
"created_at": created_at,
|
||||
"run_id": run_id,
|
||||
"benchmark": args.benchmark,
|
||||
"policy": args.policy,
|
||||
"git_commit": args.git_commit,
|
||||
"slurm_job_id": args.slurm_job_id or None,
|
||||
"docker_image": args.docker_image,
|
||||
"resources": {
|
||||
"num_gpus": args.num_gpus,
|
||||
"microbatch_per_gpu": args.microbatch_per_gpu,
|
||||
"gradient_accumulation_steps": args.gradient_accumulation_steps,
|
||||
"effective_batch_size": args.effective_batch_size,
|
||||
},
|
||||
"timings": {
|
||||
"train_wall_time_s": args.train_wall_time_s,
|
||||
"eval_wall_time_s": args.eval_wall_time_s,
|
||||
"total_wall_time_s": args.train_wall_time_s + args.eval_wall_time_s,
|
||||
},
|
||||
"eval": {
|
||||
"overall": eval_info.get("overall", {}),
|
||||
"per_group": eval_info.get("per_group", {}),
|
||||
"per_task_count": len(eval_info.get("per_task", [])),
|
||||
},
|
||||
"paths": {
|
||||
"run_root": str(args.run_root),
|
||||
"train_dir": str(args.run_root / "train"),
|
||||
"eval_dir": str(args.run_root / "eval"),
|
||||
},
|
||||
"train_config": train_config,
|
||||
"artifact_urls": {
|
||||
"row": make_hub_file_url(args.results_repo, row_path_in_repo),
|
||||
},
|
||||
}
|
||||
|
||||
row_path = args.run_root / "benchmark_row.json"
|
||||
row_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
upload_list = [UploadTarget(local_path=row_path, path_in_repo=row_path_in_repo)]
|
||||
|
||||
eval_info_path = args.run_root / "eval" / "eval_info.json"
|
||||
if eval_info_path.exists():
|
||||
row["artifact_urls"]["eval_info"] = make_hub_file_url(
|
||||
args.results_repo, f"{artifact_prefix}/eval_info.json"
|
||||
)
|
||||
upload_list.append(
|
||||
UploadTarget(local_path=eval_info_path, path_in_repo=f"{artifact_prefix}/eval_info.json")
|
||||
)
|
||||
|
||||
if train_config_path is not None and train_config_path.exists():
|
||||
row["artifact_urls"]["train_config"] = make_hub_file_url(
|
||||
args.results_repo, f"{artifact_prefix}/train_config.json"
|
||||
)
|
||||
upload_list.append(
|
||||
UploadTarget(local_path=train_config_path, path_in_repo=f"{artifact_prefix}/train_config.json")
|
||||
)
|
||||
|
||||
row_path.write_text(json.dumps(row, indent=2, sort_keys=True))
|
||||
return row, upload_list
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
row, upload_list = build_row(args)
|
||||
uploaded = upload_targets(
|
||||
repo_id=args.results_repo,
|
||||
targets=upload_list,
|
||||
repo_type="dataset",
|
||||
private=False,
|
||||
commit_message=f"Add benchmark row {row['run_id']}",
|
||||
)
|
||||
row["uploaded_paths"] = uploaded
|
||||
row_path = args.run_root / "benchmark_row.json"
|
||||
row_path.write_text(json.dumps(row, indent=2, sort_keys=True))
|
||||
print(json.dumps(row, indent=2, sort_keys=True))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,647 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Generate lightweight SLURM jobs for policy x benchmark benchmarking."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import subprocess
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from lerobot.utils.history_repo import utc_timestamp_slug
|
||||
|
||||
MAX_GPUS = 8
|
||||
MIN_GPUS = 1
|
||||
DEFAULT_STEPS = 20_000
|
||||
DEFAULT_EFFECTIVE_BATCH_SIZE = 256
|
||||
DEFAULT_MICROBATCH_PER_GPU = 32
|
||||
DEFAULT_EVAL_BATCH_SIZE = 1
|
||||
DEFAULT_CPUS_PER_GPU = 8
|
||||
DEFAULT_MEMORY_PER_GPU_GB = 40
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BenchmarkSpec:
|
||||
name: str
|
||||
dataset_repo_id: str
|
||||
docker_image: str
|
||||
eval_env_type: str
|
||||
eval_task: str
|
||||
eval_n_episodes: int
|
||||
train_steps: int = DEFAULT_STEPS
|
||||
effective_batch_size: int = DEFAULT_EFFECTIVE_BATCH_SIZE
|
||||
train_extra_args: dict[str, Any] = field(default_factory=dict)
|
||||
eval_extra_args: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PolicySpec:
|
||||
name: str
|
||||
policy_type: str
|
||||
num_gpus: int
|
||||
policy_path: str | None = None
|
||||
microbatch_per_gpu: int = DEFAULT_MICROBATCH_PER_GPU
|
||||
extra_train_args: dict[str, Any] = field(default_factory=dict)
|
||||
extra_eval_args: dict[str, Any] = field(default_factory=dict)
|
||||
needs_tokenizer: bool = False
|
||||
tokenizer_args: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PlannedJob:
|
||||
benchmark: str
|
||||
policy: str
|
||||
run_rel: str
|
||||
num_gpus: int
|
||||
microbatch_per_gpu: int
|
||||
gradient_accumulation_steps: int
|
||||
effective_batch_size: int
|
||||
docker_image: str
|
||||
train_args: dict[str, Any]
|
||||
eval_args: dict[str, Any]
|
||||
tokenizer_args: dict[str, Any] | None
|
||||
script_path: str
|
||||
|
||||
|
||||
BENCHMARKS: dict[str, BenchmarkSpec] = {
|
||||
"libero_plus": BenchmarkSpec(
|
||||
name="libero_plus",
|
||||
dataset_repo_id="lerobot/libero_plus",
|
||||
docker_image="lerobot-benchmark-libero-plus:latest",
|
||||
eval_env_type="libero_plus",
|
||||
eval_task="libero_spatial,libero_object,libero_goal,libero_10",
|
||||
eval_n_episodes=10,
|
||||
train_extra_args={
|
||||
"rename_map": {
|
||||
"observation.images.image": "observation.images.camera1",
|
||||
"observation.images.image2": "observation.images.camera2",
|
||||
},
|
||||
},
|
||||
eval_extra_args={
|
||||
"env.camera_name_mapping": {
|
||||
"agentview_image": "camera1",
|
||||
"robot0_eye_in_hand_image": "camera2",
|
||||
},
|
||||
"env.max_parallel_tasks": 1,
|
||||
"eval.batch_size": DEFAULT_EVAL_BATCH_SIZE,
|
||||
"eval.use_async_envs": False,
|
||||
"eval.max_episodes_rendered": 0,
|
||||
"policy.device": "cuda",
|
||||
},
|
||||
),
|
||||
"robomme": BenchmarkSpec(
|
||||
name="robomme",
|
||||
dataset_repo_id="lerobot/robomme",
|
||||
docker_image="lerobot-benchmark-robomme:latest",
|
||||
eval_env_type="robomme",
|
||||
eval_task=(
|
||||
"BinFill,PickXtimes,SwingXtimes,StopCube,VideoUnmask,VideoUnmaskSwap,"
|
||||
"ButtonUnmask,ButtonUnmaskSwap,PickHighlight,VideoRepick,VideoPlaceButton,"
|
||||
"VideoPlaceOrder,MoveCube,InsertPeg,PatternLock,RouteStick"
|
||||
),
|
||||
eval_n_episodes=50,
|
||||
train_extra_args={
|
||||
"rename_map": {
|
||||
"observation.images.image": "observation.images.camera1",
|
||||
"observation.images.wrist_image": "observation.images.camera2",
|
||||
},
|
||||
},
|
||||
eval_extra_args={
|
||||
"env.dataset_split": "test",
|
||||
"env.max_parallel_tasks": 1,
|
||||
"rename_map": {
|
||||
"observation.images.image": "observation.images.camera1",
|
||||
"observation.images.wrist_image": "observation.images.camera2",
|
||||
},
|
||||
"eval.batch_size": DEFAULT_EVAL_BATCH_SIZE,
|
||||
"eval.use_async_envs": False,
|
||||
"eval.max_episodes_rendered": 0,
|
||||
"policy.device": "cuda",
|
||||
},
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
POLICIES: dict[str, PolicySpec] = {
|
||||
"pi0": PolicySpec(
|
||||
name="pi0",
|
||||
policy_type="pi0",
|
||||
policy_path="lerobot/pi0_base",
|
||||
num_gpus=8,
|
||||
extra_train_args={
|
||||
"policy.n_action_steps": 30,
|
||||
"policy.scheduler_decay_steps": DEFAULT_STEPS,
|
||||
"policy.empty_cameras": 0,
|
||||
},
|
||||
),
|
||||
"pi0_fast": PolicySpec(
|
||||
name="pi0_fast",
|
||||
policy_type="pi0_fast",
|
||||
policy_path="lerobot/pi0fast-base",
|
||||
num_gpus=8,
|
||||
extra_train_args={
|
||||
"policy.n_action_steps": 30,
|
||||
"policy.scheduler_decay_steps": DEFAULT_STEPS,
|
||||
"policy.empty_cameras": 0,
|
||||
},
|
||||
needs_tokenizer=True,
|
||||
tokenizer_args={
|
||||
"action_horizon": 30,
|
||||
"encoded_dims": "0:7",
|
||||
"normalization_mode": "QUANTILES",
|
||||
"vocab_size": 1024,
|
||||
"scale": 10.0,
|
||||
"push_to_hub": True,
|
||||
},
|
||||
),
|
||||
"pi05": PolicySpec(
|
||||
name="pi05",
|
||||
policy_type="pi05",
|
||||
policy_path="lerobot/pi05_base",
|
||||
num_gpus=8,
|
||||
extra_train_args={
|
||||
"policy.n_action_steps": 30,
|
||||
"policy.scheduler_decay_steps": DEFAULT_STEPS,
|
||||
"policy.empty_cameras": 0,
|
||||
},
|
||||
),
|
||||
"groot": PolicySpec(
|
||||
name="groot",
|
||||
policy_type="groot",
|
||||
num_gpus=8,
|
||||
extra_train_args={
|
||||
"policy.n_action_steps": 30,
|
||||
"policy.base_model_path": "nvidia/GR00T-N1.5-3B",
|
||||
"policy.tune_diffusion_model": True,
|
||||
"policy.tune_projector": True,
|
||||
"policy.tune_llm": False,
|
||||
"policy.tune_visual": False,
|
||||
"policy.use_bf16": True,
|
||||
},
|
||||
),
|
||||
"act": PolicySpec(
|
||||
name="act",
|
||||
policy_type="act",
|
||||
num_gpus=1,
|
||||
extra_train_args={
|
||||
"policy.n_action_steps": 30,
|
||||
},
|
||||
),
|
||||
"diffusion": PolicySpec(
|
||||
name="diffusion",
|
||||
policy_type="diffusion",
|
||||
num_gpus=1,
|
||||
extra_train_args={
|
||||
"policy.horizon": 32,
|
||||
"policy.n_action_steps": 30,
|
||||
"policy.n_obs_steps": 2,
|
||||
},
|
||||
),
|
||||
"smolvla": PolicySpec(
|
||||
name="smolvla",
|
||||
policy_type="smolvla",
|
||||
policy_path="lerobot/smolvla_base",
|
||||
num_gpus=8,
|
||||
extra_train_args={
|
||||
"policy.n_action_steps": 30,
|
||||
"policy.load_vlm_weights": True,
|
||||
"policy.freeze_vision_encoder": False,
|
||||
"policy.train_expert_only": False,
|
||||
"policy.scheduler_decay_steps": DEFAULT_STEPS,
|
||||
"policy.empty_cameras": 1,
|
||||
},
|
||||
),
|
||||
"xvla": PolicySpec(
|
||||
name="xvla",
|
||||
policy_type="xvla",
|
||||
policy_path="lerobot/xvla-widowx",
|
||||
num_gpus=4,
|
||||
extra_train_args={
|
||||
"policy.n_action_steps": 32,
|
||||
"policy.scheduler_decay_steps": DEFAULT_STEPS,
|
||||
"policy.empty_cameras": 1,
|
||||
},
|
||||
),
|
||||
"multi_task_dit": PolicySpec(
|
||||
name="multi_task_dit",
|
||||
policy_type="multi_task_dit",
|
||||
num_gpus=1,
|
||||
extra_train_args={
|
||||
"policy.horizon": 32,
|
||||
"policy.n_action_steps": 30,
|
||||
},
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def normalize_repo_id(hub_org: str, repo_or_id: str) -> str:
|
||||
return repo_or_id if "/" in repo_or_id else f"{hub_org}/{repo_or_id}"
|
||||
|
||||
|
||||
def get_requested_names(
|
||||
requested: list[str] | None,
|
||||
available: dict[str, Any],
|
||||
*,
|
||||
kind: str,
|
||||
) -> list[str]:
|
||||
if not requested:
|
||||
return list(available)
|
||||
unknown = sorted(set(requested) - set(available))
|
||||
if unknown:
|
||||
raise ValueError(f"Unknown {kind}: {', '.join(unknown)}. Available: {', '.join(available)}")
|
||||
return requested
|
||||
|
||||
|
||||
def compute_gradient_accumulation_steps(
|
||||
*,
|
||||
effective_batch_size: int,
|
||||
num_gpus: int,
|
||||
microbatch_per_gpu: int,
|
||||
) -> int:
|
||||
per_step_batch = num_gpus * microbatch_per_gpu
|
||||
if effective_batch_size % per_step_batch != 0:
|
||||
raise ValueError(
|
||||
f"Cannot reach effective batch {effective_batch_size} with {num_gpus=} and "
|
||||
f"{microbatch_per_gpu=}."
|
||||
)
|
||||
return effective_batch_size // per_step_batch
|
||||
|
||||
|
||||
def make_run_slug() -> str:
|
||||
return utc_timestamp_slug()
|
||||
|
||||
|
||||
def shell_value(value: Any) -> str:
|
||||
if isinstance(value, bool):
|
||||
value = "true" if value else "false"
|
||||
elif isinstance(value, (dict, list)):
|
||||
value = json.dumps(value, sort_keys=True)
|
||||
else:
|
||||
value = str(value)
|
||||
escaped = (
|
||||
value.replace("\\", "\\\\")
|
||||
.replace('"', '\\"')
|
||||
.replace("$", "\\$")
|
||||
.replace("`", "\\`")
|
||||
)
|
||||
return f'"{escaped}"'
|
||||
|
||||
|
||||
def format_cli_args(args: dict[str, Any]) -> str:
|
||||
lines = []
|
||||
for key, value in args.items():
|
||||
lines.append(f" --{key}={shell_value(value)}")
|
||||
return " \\\n".join(lines)
|
||||
|
||||
|
||||
def build_train_args(
|
||||
*,
|
||||
benchmark: BenchmarkSpec,
|
||||
policy: PolicySpec,
|
||||
train_dir: str,
|
||||
gradient_accumulation_steps: int,
|
||||
) -> dict[str, Any]:
|
||||
args: dict[str, Any] = {
|
||||
"dataset.repo_id": benchmark.dataset_repo_id,
|
||||
"output_dir": train_dir,
|
||||
"steps": benchmark.train_steps,
|
||||
"batch_size": policy.microbatch_per_gpu,
|
||||
"gradient_accumulation_steps": gradient_accumulation_steps,
|
||||
"eval_freq": 0,
|
||||
"save_freq": benchmark.train_steps,
|
||||
"save_checkpoint": True,
|
||||
"log_freq": 100,
|
||||
"wandb.enable": False,
|
||||
"policy.push_to_hub": False,
|
||||
"policy.device": "cuda",
|
||||
}
|
||||
if policy.policy_path:
|
||||
args["policy.path"] = policy.policy_path
|
||||
else:
|
||||
args["policy.type"] = policy.policy_type
|
||||
args.update(benchmark.train_extra_args)
|
||||
args.update(policy.extra_train_args)
|
||||
return args
|
||||
|
||||
|
||||
def build_eval_args(
|
||||
*,
|
||||
benchmark: BenchmarkSpec,
|
||||
policy: PolicySpec,
|
||||
checkpoint_path: str,
|
||||
eval_dir: str,
|
||||
) -> dict[str, Any]:
|
||||
args: dict[str, Any] = {
|
||||
"policy.path": checkpoint_path,
|
||||
"env.type": benchmark.eval_env_type,
|
||||
"env.task": benchmark.eval_task,
|
||||
"eval.n_episodes": benchmark.eval_n_episodes,
|
||||
"output_dir": eval_dir,
|
||||
}
|
||||
args.update(benchmark.eval_extra_args)
|
||||
args.update(policy.extra_eval_args)
|
||||
return args
|
||||
|
||||
|
||||
def plan_jobs(
|
||||
*,
|
||||
output_dir: Path,
|
||||
hub_org: str,
|
||||
results_repo: str,
|
||||
policies: list[str],
|
||||
benchmarks: list[str],
|
||||
) -> list[PlannedJob]:
|
||||
_ = hub_org
|
||||
_ = results_repo
|
||||
scripts_dir = output_dir / "slurm"
|
||||
jobs: list[PlannedJob] = []
|
||||
for benchmark_name in benchmarks:
|
||||
benchmark = BENCHMARKS[benchmark_name]
|
||||
for policy_name in policies:
|
||||
policy = POLICIES[policy_name]
|
||||
num_gpus = max(MIN_GPUS, min(policy.num_gpus, MAX_GPUS))
|
||||
run_rel = f"runs/{benchmark_name}/{policy_name}/{make_run_slug()}"
|
||||
run_root = f"/benchmark-output/{run_rel}"
|
||||
gradient_accumulation_steps = compute_gradient_accumulation_steps(
|
||||
effective_batch_size=benchmark.effective_batch_size,
|
||||
num_gpus=num_gpus,
|
||||
microbatch_per_gpu=policy.microbatch_per_gpu,
|
||||
)
|
||||
train_dir = f"{run_root}/train"
|
||||
checkpoint_path = f"{train_dir}/checkpoints/{benchmark.train_steps:06d}/pretrained_model"
|
||||
eval_dir = f"{run_root}/eval"
|
||||
train_args = build_train_args(
|
||||
benchmark=benchmark,
|
||||
policy=policy,
|
||||
train_dir=train_dir,
|
||||
gradient_accumulation_steps=gradient_accumulation_steps,
|
||||
)
|
||||
eval_args = build_eval_args(
|
||||
benchmark=benchmark,
|
||||
policy=policy,
|
||||
checkpoint_path=checkpoint_path,
|
||||
eval_dir=eval_dir,
|
||||
)
|
||||
tokenizer_args = None
|
||||
if policy.needs_tokenizer:
|
||||
tokenizer_repo_id = f"{hub_org}/{policy_name}-{benchmark_name}-tokenizer"
|
||||
tokenizer_args = {
|
||||
"repo_id": benchmark.dataset_repo_id,
|
||||
"output_dir": f"{run_root}/tokenizer",
|
||||
"hub_repo_id": tokenizer_repo_id,
|
||||
**policy.tokenizer_args,
|
||||
}
|
||||
train_args["policy.action_tokenizer_name"] = tokenizer_repo_id
|
||||
script_path = str(scripts_dir / f"{benchmark_name}__{policy_name}.sbatch")
|
||||
jobs.append(
|
||||
PlannedJob(
|
||||
benchmark=benchmark_name,
|
||||
policy=policy_name,
|
||||
run_rel=run_rel,
|
||||
num_gpus=num_gpus,
|
||||
microbatch_per_gpu=policy.microbatch_per_gpu,
|
||||
gradient_accumulation_steps=gradient_accumulation_steps,
|
||||
effective_batch_size=benchmark.effective_batch_size,
|
||||
docker_image=benchmark.docker_image,
|
||||
train_args=train_args,
|
||||
eval_args=eval_args,
|
||||
tokenizer_args=tokenizer_args,
|
||||
script_path=script_path,
|
||||
)
|
||||
)
|
||||
return jobs
|
||||
|
||||
|
||||
def render_sbatch_script(
|
||||
*,
|
||||
job: PlannedJob,
|
||||
output_dir: Path,
|
||||
results_repo_id: str,
|
||||
git_commit: str,
|
||||
) -> str:
|
||||
host_output_dir = output_dir.resolve()
|
||||
run_root = f"/benchmark-output/{job.run_rel}"
|
||||
host_run_root = host_output_dir / job.run_rel
|
||||
cpus_per_task = max(DEFAULT_CPUS_PER_GPU, DEFAULT_CPUS_PER_GPU * job.num_gpus)
|
||||
mem_gb = max(DEFAULT_MEMORY_PER_GPU_GB, DEFAULT_MEMORY_PER_GPU_GB * job.num_gpus)
|
||||
gpu_ids_expr = "${GPU_IDS}"
|
||||
train_cli = format_cli_args(job.train_args)
|
||||
eval_cli = format_cli_args(job.eval_args)
|
||||
tokenizer_command = ""
|
||||
if job.tokenizer_args:
|
||||
tokenizer_cli = format_cli_args(job.tokenizer_args)
|
||||
tokenizer_command = f"""
|
||||
docker run --rm --gpus all \\
|
||||
--shm-size=16g \\
|
||||
-e CUDA_VISIBLE_DEVICES={gpu_ids_expr} \\
|
||||
-e HF_TOKEN="${{HF_TOKEN:-}}" \\
|
||||
-e HF_USER_TOKEN="${{HF_TOKEN:-}}" \\
|
||||
-e HF_HOME=/tmp/hf \\
|
||||
-v "{host_output_dir}:/benchmark-output" \\
|
||||
-w /lerobot \\
|
||||
"{job.docker_image}" \\
|
||||
bash -lc '
|
||||
set -euo pipefail
|
||||
if [[ -n "${{HF_TOKEN:-}}" ]]; then
|
||||
hf auth login --token "${{HF_TOKEN}}" --add-to-git-credential 2>/dev/null || true
|
||||
fi
|
||||
lerobot-train-tokenizer \\
|
||||
{tokenizer_cli}
|
||||
'
|
||||
"""
|
||||
return f"""#!/bin/bash
|
||||
#SBATCH --job-name=bench-{job.benchmark}-{job.policy}
|
||||
#SBATCH --gres=gpu:{job.num_gpus}
|
||||
#SBATCH --cpus-per-task={cpus_per_task}
|
||||
#SBATCH --mem={mem_gb}G
|
||||
#SBATCH --output={output_dir.resolve()}/logs/{job.benchmark}__{job.policy}__%j.out
|
||||
#SBATCH --error={output_dir.resolve()}/logs/{job.benchmark}__{job.policy}__%j.err
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
HF_TOKEN="${{HF_TOKEN:-${{HF_USER_TOKEN:-}}}}"
|
||||
GPU_IDS="$(seq -s, 0 $(({job.num_gpus} - 1)))"
|
||||
RUN_ROOT="{run_root}"
|
||||
|
||||
mkdir -p "{host_output_dir}/logs"
|
||||
mkdir -p "{host_run_root.parent}"
|
||||
|
||||
{tokenizer_command}
|
||||
|
||||
TRAIN_START="$(date +%s)"
|
||||
docker run --rm --gpus all \\
|
||||
--shm-size=16g \\
|
||||
-e CUDA_VISIBLE_DEVICES="${{GPU_IDS}}" \\
|
||||
-e HF_TOKEN="${{HF_TOKEN:-}}" \\
|
||||
-e HF_USER_TOKEN="${{HF_TOKEN:-}}" \\
|
||||
-e HF_HOME=/tmp/hf \\
|
||||
-v "{host_output_dir}:/benchmark-output" \\
|
||||
-w /lerobot \\
|
||||
"{job.docker_image}" \\
|
||||
bash -lc '
|
||||
set -euo pipefail
|
||||
if [[ -n "${{HF_TOKEN:-}}" ]]; then
|
||||
hf auth login --token "${{HF_TOKEN}}" --add-to-git-credential 2>/dev/null || true
|
||||
fi
|
||||
accelerate launch --num_processes={job.num_gpus} $(which lerobot-train) \\
|
||||
{train_cli}
|
||||
'
|
||||
TRAIN_END="$(date +%s)"
|
||||
|
||||
EVAL_START="$(date +%s)"
|
||||
docker run --rm --gpus all \\
|
||||
--shm-size=16g \\
|
||||
-e CUDA_VISIBLE_DEVICES="${{GPU_IDS}}" \\
|
||||
-e HF_TOKEN="${{HF_TOKEN:-}}" \\
|
||||
-e HF_USER_TOKEN="${{HF_TOKEN:-}}" \\
|
||||
-e HF_HOME=/tmp/hf \\
|
||||
-v "{host_output_dir}:/benchmark-output" \\
|
||||
-w /lerobot \\
|
||||
"{job.docker_image}" \\
|
||||
bash -lc '
|
||||
set -euo pipefail
|
||||
if [[ -n "${{HF_TOKEN:-}}" ]]; then
|
||||
hf auth login --token "${{HF_TOKEN}}" --add-to-git-credential 2>/dev/null || true
|
||||
fi
|
||||
lerobot-eval \\
|
||||
{eval_cli}
|
||||
'
|
||||
EVAL_END="$(date +%s)"
|
||||
TRAIN_WALL_TIME_S="$((TRAIN_END - TRAIN_START))"
|
||||
EVAL_WALL_TIME_S="$((EVAL_END - EVAL_START))"
|
||||
|
||||
docker run --rm --gpus all \\
|
||||
--shm-size=16g \\
|
||||
-e CUDA_VISIBLE_DEVICES="${{GPU_IDS}}" \\
|
||||
-e HF_TOKEN="${{HF_TOKEN:-}}" \\
|
||||
-e HF_USER_TOKEN="${{HF_TOKEN:-}}" \\
|
||||
-e HF_HOME=/tmp/hf \\
|
||||
-e RUN_ROOT="${{RUN_ROOT}}" \\
|
||||
-e TRAIN_WALL_TIME_S="${{TRAIN_WALL_TIME_S}}" \\
|
||||
-e EVAL_WALL_TIME_S="${{EVAL_WALL_TIME_S}}" \\
|
||||
-v "{host_output_dir}:/benchmark-output" \\
|
||||
-w /lerobot \\
|
||||
"{job.docker_image}" \\
|
||||
bash -lc '
|
||||
set -euo pipefail
|
||||
if [[ -n "${{HF_TOKEN:-}}" ]]; then
|
||||
hf auth login --token "${{HF_TOKEN}}" --add-to-git-credential 2>/dev/null || true
|
||||
fi
|
||||
uv run python benchmarks/publish_benchmark_result.py \\
|
||||
--benchmark={job.benchmark} \\
|
||||
--policy={job.policy} \\
|
||||
--run_root="${{RUN_ROOT}}" \\
|
||||
--results_repo={results_repo_id} \\
|
||||
--git_commit={git_commit} \\
|
||||
--num_gpus={job.num_gpus} \\
|
||||
--microbatch_per_gpu={job.microbatch_per_gpu} \\
|
||||
--gradient_accumulation_steps={job.gradient_accumulation_steps} \\
|
||||
--effective_batch_size={job.effective_batch_size} \\
|
||||
--train_wall_time_s="${{TRAIN_WALL_TIME_S}}" \\
|
||||
--eval_wall_time_s="${{EVAL_WALL_TIME_S}}" \\
|
||||
--slurm_job_id="${{SLURM_JOB_ID:-}}" \\
|
||||
--docker_image={job.docker_image}
|
||||
'
|
||||
"""
|
||||
|
||||
|
||||
def write_manifest(
|
||||
*,
|
||||
output_dir: Path,
|
||||
jobs: list[PlannedJob],
|
||||
git_commit: str,
|
||||
hub_org: str,
|
||||
results_repo: str,
|
||||
) -> Path:
|
||||
manifest = {
|
||||
"generated_at": datetime.now(UTC).isoformat(),
|
||||
"git_commit": git_commit,
|
||||
"hub_org": hub_org,
|
||||
"results_repo": results_repo,
|
||||
"jobs": [asdict(job) for job in jobs],
|
||||
}
|
||||
manifest_path = output_dir / "manifest.json"
|
||||
manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True))
|
||||
return manifest_path
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--policies", nargs="*", default=None)
|
||||
parser.add_argument("--benchmarks", nargs="*", default=None)
|
||||
parser.add_argument("--output_dir", required=True, type=Path)
|
||||
parser.add_argument("--hub_org", required=True)
|
||||
parser.add_argument("--results_repo", required=True)
|
||||
parser.add_argument("--submit", action="store_true")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def get_git_commit() -> str:
|
||||
return subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
args.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
(args.output_dir / "slurm").mkdir(parents=True, exist_ok=True)
|
||||
(args.output_dir / "logs").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
selected_policies = get_requested_names(args.policies, POLICIES, kind="policies")
|
||||
selected_benchmarks = get_requested_names(args.benchmarks, BENCHMARKS, kind="benchmarks")
|
||||
git_commit = get_git_commit()
|
||||
results_repo_id = normalize_repo_id(args.hub_org, args.results_repo)
|
||||
|
||||
jobs = plan_jobs(
|
||||
output_dir=args.output_dir,
|
||||
hub_org=args.hub_org,
|
||||
results_repo=results_repo_id,
|
||||
policies=selected_policies,
|
||||
benchmarks=selected_benchmarks,
|
||||
)
|
||||
|
||||
for job in jobs:
|
||||
script = render_sbatch_script(
|
||||
job=job,
|
||||
output_dir=args.output_dir,
|
||||
results_repo_id=results_repo_id,
|
||||
git_commit=git_commit,
|
||||
)
|
||||
script_path = Path(job.script_path)
|
||||
script_path.write_text(script)
|
||||
script_path.chmod(0o755)
|
||||
if args.submit:
|
||||
subprocess.run(["sbatch", str(script_path)], check=True)
|
||||
|
||||
manifest_path = write_manifest(
|
||||
output_dir=args.output_dir,
|
||||
jobs=jobs,
|
||||
git_commit=git_commit,
|
||||
hub_org=args.hub_org,
|
||||
results_repo=results_repo_id,
|
||||
)
|
||||
print(f"Wrote {len(jobs)} benchmark jobs to {args.output_dir}")
|
||||
print(f"Manifest: {manifest_path}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user