mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-15 16:49:55 +00:00
452d9abaa4
- spaces/health-dashboard/app.py: Gradio Space that queries the GitHub Actions API directly (no extra datastore). Shows benchmark status badges, success-rate and duration trend charts, and embeds the latest rollout video per benchmark. Results cached 5 min in-memory; video files cached on disk by artifact ID so downloads only happen once. - spaces/health-dashboard/requirements.txt + README.md: Space card with setup instructions for the GITHUB_RO_TOKEN secret (actions:read, metadata:read only). - scripts/ci/parse_eval_metrics.py: runs on the CI host after each eval, reads eval_info.json written by lerobot-eval, extracts pc_success and n_episodes, and writes metrics.json to the artifacts dir. - .github/workflows/benchmark_tests.yml: add "Parse … metrics" and "Upload … metrics" steps (if: always()) after each eval so the dashboard has data even when the eval fails. The Space should be deployed as a private Space under the huggingface org. Required secret: GITHUB_RO_TOKEN (fine-grained, read-only). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
233 lines
8.3 KiB
YAML
233 lines
8.3 KiB
YAML
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# Integration tests: build an isolated Docker image per benchmark and run a
|
|
# 1-episode smoke eval. Each benchmark gets its own image so incompatible
|
|
# dependency trees (e.g. hf-libero vs metaworld==3.0.0) can never collide.
|
|
#
|
|
# To add a new benchmark:
|
|
# 1. Add docker/Dockerfile.benchmark.<name> (install only lerobot[<name>])
|
|
# 2. Copy one of the jobs below and adjust the image name and eval command.
|
|
name: Benchmark Integration Tests
|
|
|
|
on:
|
|
# Run manually from the Actions tab
|
|
workflow_dispatch:
|
|
|
|
push:
|
|
branches:
|
|
- feat/benchmark-ci
|
|
- main
|
|
paths:
|
|
- "src/lerobot/envs/**"
|
|
- "src/lerobot/scripts/lerobot_eval.py"
|
|
- "docker/Dockerfile.benchmark.*"
|
|
- ".github/workflows/benchmark_tests.yml"
|
|
- "pyproject.toml"
|
|
|
|
pull_request:
|
|
branches:
|
|
- main
|
|
paths:
|
|
- "src/lerobot/envs/**"
|
|
- "src/lerobot/scripts/lerobot_eval.py"
|
|
- "docker/Dockerfile.benchmark.*"
|
|
- ".github/workflows/benchmark_tests.yml"
|
|
- "pyproject.toml"
|
|
|
|
permissions:
|
|
contents: read
|
|
|
|
env:
|
|
UV_VERSION: "0.8.0"
|
|
PYTHON_VERSION: "3.12"
|
|
|
|
# Cancel in-flight runs for the same branch/PR.
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
# ── LIBERO ────────────────────────────────────────────────────────────────
|
|
# Isolated image: lerobot[libero] only (hf-libero, dm-control, mujoco chain)
|
|
libero-integration-test:
|
|
name: Libero — build image + 1-episode eval
|
|
runs-on:
|
|
group: aws-g6-4xlarge-plus
|
|
env:
|
|
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
|
|
|
steps:
|
|
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
|
with:
|
|
persist-credentials: false
|
|
lfs: true
|
|
|
|
- name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
|
|
with:
|
|
cache-binary: false
|
|
|
|
# Build the benchmark-specific image; layer cache lives in the runner's
|
|
# local Docker daemon — reused across re-runs on the same machine.
|
|
- name: Build Libero benchmark image
|
|
uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
|
|
with:
|
|
context: .
|
|
file: docker/Dockerfile.benchmark.libero
|
|
push: false
|
|
load: true
|
|
tags: lerobot-benchmark-libero:ci
|
|
cache-from: type=local,src=/tmp/.buildx-cache-libero
|
|
cache-to: type=local,dest=/tmp/.buildx-cache-libero,mode=max
|
|
|
|
- name: Login to Hugging Face
|
|
if: env.HF_USER_TOKEN != ''
|
|
run: |
|
|
docker run --rm \
|
|
-e HF_HOME=/tmp/hf \
|
|
lerobot-benchmark-libero:ci \
|
|
bash -c "hf auth login --token '$HF_USER_TOKEN' --add-to-git-credential && hf auth whoami"
|
|
|
|
- name: Run Libero smoke eval (1 episode)
|
|
run: |
|
|
mkdir -p /tmp/libero-artifacts && chmod 777 /tmp/libero-artifacts
|
|
docker run --rm --gpus all \
|
|
--shm-size=4g \
|
|
-e HF_HOME=/tmp/hf \
|
|
-e HF_USER_TOKEN="${HF_USER_TOKEN}" \
|
|
-e HF_HUB_DOWNLOAD_TIMEOUT=300 \
|
|
-v /tmp/libero-artifacts:/artifacts \
|
|
lerobot-benchmark-libero:ci \
|
|
bash -c "
|
|
hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
|
|
lerobot-eval \
|
|
--policy.path=pepijn223/smolvla_libero \
|
|
--env.type=libero \
|
|
--env.task=libero_spatial \
|
|
--eval.batch_size=1 \
|
|
--eval.n_episodes=1 \
|
|
--eval.use_async_envs=false \
|
|
--policy.device=cuda \
|
|
'--env.camera_name_mapping={\"agentview_image\": \"camera1\", \"robot0_eye_in_hand_image\": \"camera2\"}' \
|
|
--policy.empty_cameras=1 \
|
|
--output_dir=/artifacts
|
|
"
|
|
|
|
- name: Parse Libero eval metrics
|
|
if: always()
|
|
run: |
|
|
python scripts/ci/parse_eval_metrics.py \
|
|
--artifacts-dir /tmp/libero-artifacts \
|
|
--env libero \
|
|
--task libero_spatial \
|
|
--policy pepijn223/smolvla_libero
|
|
|
|
- name: Upload Libero rollout video
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: libero-rollout-video
|
|
path: /tmp/libero-artifacts/videos/
|
|
if-no-files-found: warn
|
|
|
|
- name: Upload Libero eval metrics
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: libero-metrics
|
|
path: /tmp/libero-artifacts/metrics.json
|
|
if-no-files-found: warn
|
|
|
|
# ── METAWORLD ─────────────────────────────────────────────────────────────
|
|
# Isolated image: lerobot[metaworld] only (metaworld==3.0.0, mujoco>=3 chain)
|
|
metaworld-integration-test:
|
|
name: MetaWorld — build image + 1-episode eval
|
|
runs-on:
|
|
group: aws-g6-4xlarge-plus
|
|
env:
|
|
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
|
|
|
steps:
|
|
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
|
with:
|
|
persist-credentials: false
|
|
lfs: true
|
|
|
|
- name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
|
|
with:
|
|
cache-binary: false
|
|
|
|
- name: Build MetaWorld benchmark image
|
|
uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
|
|
with:
|
|
context: .
|
|
file: docker/Dockerfile.benchmark.metaworld
|
|
push: false
|
|
load: true
|
|
tags: lerobot-benchmark-metaworld:ci
|
|
cache-from: type=local,src=/tmp/.buildx-cache-metaworld
|
|
cache-to: type=local,dest=/tmp/.buildx-cache-metaworld,mode=max
|
|
|
|
- name: Run MetaWorld smoke eval (1 episode)
|
|
run: |
|
|
mkdir -p /tmp/metaworld-artifacts && chmod 777 /tmp/metaworld-artifacts
|
|
docker run --rm --gpus all \
|
|
--shm-size=4g \
|
|
-e HF_HOME=/tmp/hf \
|
|
-e HF_USER_TOKEN="${HF_USER_TOKEN}" \
|
|
-e HF_HUB_DOWNLOAD_TIMEOUT=300 \
|
|
-v /tmp/metaworld-artifacts:/artifacts \
|
|
lerobot-benchmark-metaworld:ci \
|
|
bash -c "
|
|
hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
|
|
lerobot-eval \
|
|
--policy.path=pepijn223/smolvla_metaworld \
|
|
--env.type=metaworld \
|
|
--env.task=metaworld-push-v3 \
|
|
--eval.batch_size=1 \
|
|
--eval.n_episodes=1 \
|
|
--eval.use_async_envs=false \
|
|
--policy.device=cuda \
|
|
'--rename_map={\"observation.image\": \"observation.images.camera1\"}' \
|
|
--policy.empty_cameras=2 \
|
|
--output_dir=/artifacts
|
|
"
|
|
|
|
- name: Parse MetaWorld eval metrics
|
|
if: always()
|
|
run: |
|
|
python scripts/ci/parse_eval_metrics.py \
|
|
--artifacts-dir /tmp/metaworld-artifacts \
|
|
--env metaworld \
|
|
--task metaworld-push-v3 \
|
|
--policy pepijn223/smolvla_metaworld
|
|
|
|
- name: Upload MetaWorld rollout video
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: metaworld-rollout-video
|
|
path: /tmp/metaworld-artifacts/videos/
|
|
if-no-files-found: warn
|
|
|
|
- name: Upload MetaWorld eval metrics
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: metaworld-metrics
|
|
path: /tmp/metaworld-artifacts/metrics.json
|
|
if-no-files-found: warn
|