mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-18 10:10:08 +00:00
feat(benchmarks): add matrix runner and leaderboard
This commit is contained in:
@@ -0,0 +1,142 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
|
||||
from benchmarks.run_benchmark_matrix import (
|
||||
PlannedJob,
|
||||
compute_gradient_accumulation_steps,
|
||||
plan_jobs,
|
||||
render_sbatch_script,
|
||||
write_manifest,
|
||||
)
|
||||
|
||||
|
||||
def _one_job(job_list: list[PlannedJob]) -> PlannedJob:
|
||||
assert len(job_list) == 1
|
||||
return job_list[0]
|
||||
|
||||
|
||||
def test_compute_gradient_accumulation_steps_for_fixed_effective_batch():
|
||||
assert compute_gradient_accumulation_steps(
|
||||
effective_batch_size=256,
|
||||
num_gpus=8,
|
||||
microbatch_per_gpu=32,
|
||||
) == 1
|
||||
assert compute_gradient_accumulation_steps(
|
||||
effective_batch_size=256,
|
||||
num_gpus=4,
|
||||
microbatch_per_gpu=32,
|
||||
) == 2
|
||||
assert compute_gradient_accumulation_steps(
|
||||
effective_batch_size=256,
|
||||
num_gpus=1,
|
||||
microbatch_per_gpu=32,
|
||||
) == 8
|
||||
|
||||
|
||||
def test_plan_jobs_filters_libero_plus_only(tmp_path):
|
||||
jobs = plan_jobs(
|
||||
output_dir=tmp_path,
|
||||
hub_org="lerobot",
|
||||
results_repo="lerobot/benchmark-history",
|
||||
policies=["pi0", "act"],
|
||||
benchmarks=["libero_plus"],
|
||||
)
|
||||
|
||||
assert [job.benchmark for job in jobs] == ["libero_plus", "libero_plus"]
|
||||
assert [job.policy for job in jobs] == ["pi0", "act"]
|
||||
|
||||
|
||||
def test_plan_jobs_includes_libero_plus_and_robomme(tmp_path):
|
||||
jobs = plan_jobs(
|
||||
output_dir=tmp_path,
|
||||
hub_org="lerobot",
|
||||
results_repo="lerobot/benchmark-history",
|
||||
policies=["pi0"],
|
||||
benchmarks=["libero_plus", "robomme"],
|
||||
)
|
||||
|
||||
assert [job.benchmark for job in jobs] == ["libero_plus", "robomme"]
|
||||
assert jobs[0].effective_batch_size == 256
|
||||
assert jobs[1].effective_batch_size == 256
|
||||
|
||||
|
||||
def test_plan_jobs_sets_expected_gpu_and_accumulation(tmp_path):
|
||||
jobs = plan_jobs(
|
||||
output_dir=tmp_path,
|
||||
hub_org="lerobot",
|
||||
results_repo="lerobot/benchmark-history",
|
||||
policies=["pi0", "xvla", "act"],
|
||||
benchmarks=["robomme"],
|
||||
)
|
||||
by_policy = {job.policy: job for job in jobs}
|
||||
|
||||
assert by_policy["pi0"].num_gpus == 8
|
||||
assert by_policy["pi0"].gradient_accumulation_steps == 1
|
||||
assert by_policy["xvla"].num_gpus == 4
|
||||
assert by_policy["xvla"].gradient_accumulation_steps == 2
|
||||
assert by_policy["act"].num_gpus == 1
|
||||
assert by_policy["act"].gradient_accumulation_steps == 8
|
||||
|
||||
|
||||
def test_render_sbatch_script_contains_train_eval_and_publish(tmp_path):
|
||||
job = _one_job(
|
||||
plan_jobs(
|
||||
output_dir=tmp_path,
|
||||
hub_org="lerobot",
|
||||
results_repo="lerobot/benchmark-history",
|
||||
policies=["pi0_fast"],
|
||||
benchmarks=["robomme"],
|
||||
)
|
||||
)
|
||||
|
||||
script = render_sbatch_script(
|
||||
job=job,
|
||||
output_dir=tmp_path,
|
||||
results_repo_id="lerobot/benchmark-history",
|
||||
git_commit="deadbeef",
|
||||
)
|
||||
|
||||
assert "docker/Dockerfile" not in script
|
||||
assert "lerobot-benchmark-robomme:latest" in script
|
||||
assert '--dataset.repo_id="lerobot/robomme"' in script
|
||||
assert '--env.type="robomme"' in script
|
||||
assert "--gradient_accumulation_steps=1" in script
|
||||
assert "lerobot-train-tokenizer" in script
|
||||
assert "benchmarks/publish_benchmark_result.py" in script
|
||||
|
||||
|
||||
def test_write_manifest_records_job_metadata(tmp_path):
|
||||
jobs = plan_jobs(
|
||||
output_dir=tmp_path,
|
||||
hub_org="lerobot",
|
||||
results_repo="lerobot/benchmark-history",
|
||||
policies=["pi0"],
|
||||
benchmarks=["libero_plus", "robomme"],
|
||||
)
|
||||
manifest_path = write_manifest(
|
||||
output_dir=tmp_path,
|
||||
jobs=jobs,
|
||||
git_commit="deadbeef",
|
||||
hub_org="lerobot",
|
||||
results_repo="lerobot/benchmark-history",
|
||||
)
|
||||
|
||||
manifest = json.loads(manifest_path.read_text())
|
||||
assert manifest["git_commit"] == "deadbeef"
|
||||
assert manifest["results_repo"] == "lerobot/benchmark-history"
|
||||
assert [job["benchmark"] for job in manifest["jobs"]] == ["libero_plus", "robomme"]
|
||||
@@ -0,0 +1,123 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from types import ModuleType
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def _install_robomme_stub():
|
||||
stub = ModuleType("robomme")
|
||||
wrapper_stub = ModuleType("robomme.env_record_wrapper")
|
||||
|
||||
class FakeBuilder:
|
||||
def __init__(self, **kwargs):
|
||||
pass
|
||||
|
||||
def make_env_for_episode(self, episode_idx: int, max_steps: int):
|
||||
env = MagicMock()
|
||||
obs = {
|
||||
"front_rgb_list": [np.zeros((256, 256, 3), dtype=np.uint8)],
|
||||
"wrist_rgb_list": [np.zeros((256, 256, 3), dtype=np.uint8)],
|
||||
"joint_state_list": [np.zeros(7, dtype=np.float32)],
|
||||
"gripper_state_list": [np.zeros(2, dtype=np.float32)],
|
||||
}
|
||||
env.reset.return_value = (obs, {"status": "ongoing", "task_goal": "pick the cube"})
|
||||
env.step.return_value = (obs, 0.0, False, False, {"status": "ongoing", "task_goal": ""})
|
||||
return env
|
||||
|
||||
wrapper_stub.BenchmarkEnvBuilder = FakeBuilder
|
||||
stub.env_record_wrapper = wrapper_stub
|
||||
sys.modules["robomme"] = stub
|
||||
sys.modules["robomme.env_record_wrapper"] = wrapper_stub
|
||||
|
||||
|
||||
def _uninstall_robomme_stub():
|
||||
sys.modules.pop("robomme", None)
|
||||
sys.modules.pop("robomme.env_record_wrapper", None)
|
||||
|
||||
|
||||
def test_robomme_env_config_defaults():
|
||||
from lerobot.envs.configs import RoboMMEEnv
|
||||
|
||||
cfg = RoboMMEEnv()
|
||||
assert cfg.task == "PickXtimes"
|
||||
assert cfg.fps == 10
|
||||
assert cfg.episode_length == 300
|
||||
assert cfg.action_space == "joint_angle"
|
||||
assert cfg.dataset_split == "test"
|
||||
assert cfg.task_ids is None
|
||||
|
||||
|
||||
def test_robomme_features_map():
|
||||
from lerobot.envs.configs import RoboMMEEnv
|
||||
from lerobot.utils.constants import ACTION, OBS_IMAGES, OBS_STATE
|
||||
|
||||
cfg = RoboMMEEnv()
|
||||
assert cfg.features_map[ACTION] == ACTION
|
||||
assert cfg.features_map["image"] == f"{OBS_IMAGES}.image"
|
||||
assert cfg.features_map["wrist_image"] == f"{OBS_IMAGES}.wrist_image"
|
||||
assert cfg.features_map[OBS_STATE] == OBS_STATE
|
||||
|
||||
|
||||
def test_convert_obs_list_format():
|
||||
_install_robomme_stub()
|
||||
try:
|
||||
from lerobot.envs.robomme import RoboMMEGymEnv
|
||||
|
||||
env = RoboMMEGymEnv.__new__(RoboMMEGymEnv)
|
||||
|
||||
front = np.full((256, 256, 3), 42, dtype=np.uint8)
|
||||
wrist = np.full((256, 256, 3), 7, dtype=np.uint8)
|
||||
joints = np.arange(7, dtype=np.float32)
|
||||
gripper = np.array([0.5, 0.5], dtype=np.float32)
|
||||
|
||||
obs_raw = {
|
||||
"front_rgb_list": [np.zeros_like(front), front],
|
||||
"wrist_rgb_list": [np.zeros_like(wrist), wrist],
|
||||
"joint_state_list": [np.zeros(7, dtype=np.float32), joints],
|
||||
"gripper_state_list": [np.zeros(2, dtype=np.float32), gripper],
|
||||
}
|
||||
|
||||
result = env._convert_obs(obs_raw)
|
||||
np.testing.assert_array_equal(result["image"], front)
|
||||
np.testing.assert_array_equal(result["wrist_image"], wrist)
|
||||
assert result["state"].shape == (8,)
|
||||
np.testing.assert_array_almost_equal(result["state"][:7], joints)
|
||||
assert result["state"][7] == gripper[0]
|
||||
finally:
|
||||
_uninstall_robomme_stub()
|
||||
|
||||
|
||||
def test_create_robomme_envs_multi_task():
|
||||
_install_robomme_stub()
|
||||
try:
|
||||
from lerobot.envs.robomme import create_robomme_envs
|
||||
|
||||
env_cls = MagicMock(return_value=MagicMock())
|
||||
result = create_robomme_envs(
|
||||
task="PickXtimes,BinFill,StopCube",
|
||||
n_envs=1,
|
||||
env_cls=env_cls,
|
||||
)
|
||||
|
||||
assert set(result.keys()) == {"PickXtimes", "BinFill", "StopCube"}
|
||||
finally:
|
||||
_uninstall_robomme_stub()
|
||||
Reference in New Issue
Block a user