mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-15 08:39:49 +00:00
fix(ci): address PR review feedback for benchmark smoke tests
Security: - Remove "Login to Hugging Face" step — it was a no-op (ephemeral --rm container) that exposed the HF token via CLI argument in docker inspect / /proc/*/cmdline. The eval step already re-authenticates via env var. Functional: - Remove feat/benchmark-ci from push trigger branches (won't exist post-merge). Dockerfiles: - Pin uv to 0.8.0 (was unpinned, fetching whatever latest ships). - Add comment explaining the chmod +x ptxas workaround (Triton packaging bug — ships ptxas without execute bit). Scripts: - parse_eval_metrics.py: add note that it runs on bare host and must stay stdlib-only. - parse_eval_metrics.py: add NaN guard for avg_sum_reward and eval_s (was only guarding pc_success). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -31,7 +31,6 @@ on:
|
||||
|
||||
push:
|
||||
branches:
|
||||
- feat/benchmark-ci
|
||||
- main
|
||||
paths:
|
||||
- "src/lerobot/envs/**"
|
||||
@@ -101,14 +100,6 @@ jobs:
|
||||
load: true
|
||||
tags: lerobot-benchmark-libero:ci
|
||||
|
||||
- name: Login to Hugging Face
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
run: |
|
||||
docker run --rm \
|
||||
-e HF_HOME=/tmp/hf \
|
||||
lerobot-benchmark-libero:ci \
|
||||
bash -c "hf auth login --token '$HF_USER_TOKEN' --add-to-git-credential && hf auth whoami"
|
||||
|
||||
- name: Run Libero smoke eval (1 episode)
|
||||
run: |
|
||||
# Named container (no --rm) so we can docker cp artifacts out.
|
||||
|
||||
@@ -43,7 +43,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python${PYTHON_VERSION} \
|
||||
python${PYTHON_VERSION}-venv \
|
||||
python${PYTHON_VERSION}-dev \
|
||||
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
|
||||
&& curl -LsSf https://astral.sh/uv/0.8.0/install.sh | sh \
|
||||
&& mv /root/.local/bin/uv /usr/local/bin/uv \
|
||||
&& useradd --create-home --shell /bin/bash user_lerobot \
|
||||
&& usermod -aG sudo user_lerobot \
|
||||
@@ -84,6 +84,9 @@ snapshot_download(repo_id='lerobot/libero-assets', repo_type='dataset', \
|
||||
printf "assets: /home/user_lerobot/.libero/assets\nbddl_files: ${LIBERO_DIR}/bddl_files\ndatasets: ${LIBERO_DIR}/../datasets\ninit_states: ${LIBERO_DIR}/init_files\n" \
|
||||
> /home/user_lerobot/.libero/config.yaml
|
||||
|
||||
# Workaround: Triton ships ptxas without the execute bit set.
|
||||
# Without this chmod, any JIT compilation (e.g. torch.compile) fails
|
||||
# with "Permission denied". See: https://github.com/triton-lang/triton/issues/2due
|
||||
RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas
|
||||
|
||||
# ── Source layer (rebuilds in seconds on code-only changes) ─────────────────
|
||||
|
||||
@@ -43,7 +43,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python${PYTHON_VERSION} \
|
||||
python${PYTHON_VERSION}-venv \
|
||||
python${PYTHON_VERSION}-dev \
|
||||
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
|
||||
&& curl -LsSf https://astral.sh/uv/0.8.0/install.sh | sh \
|
||||
&& mv /root/.local/bin/uv /usr/local/bin/uv \
|
||||
&& useradd --create-home --shell /bin/bash user_lerobot \
|
||||
&& usermod -aG sudo user_lerobot \
|
||||
@@ -69,6 +69,9 @@ RUN mkdir -p src/lerobot && touch src/lerobot/__init__.py src/lerobot/py.typed
|
||||
|
||||
RUN uv sync --locked --extra metaworld --extra smolvla --no-cache
|
||||
|
||||
# Workaround: Triton ships ptxas without the execute bit set.
|
||||
# Without this chmod, any JIT compilation (e.g. torch.compile) fails
|
||||
# with "Permission denied". See: https://github.com/triton-lang/triton/issues/2due
|
||||
RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas
|
||||
|
||||
# ── Source layer (rebuilds in seconds on code-only changes) ─────────────────
|
||||
|
||||
@@ -19,6 +19,9 @@ Reads eval_info.json written by lerobot-eval --output_dir and extracts the
|
||||
key metrics needed by the health dashboard. Handles both single-task and
|
||||
multi-task eval output formats.
|
||||
|
||||
NOTE: This script runs on the bare CI runner (not inside Docker), so it
|
||||
must use only Python stdlib modules. Do not add third-party imports.
|
||||
|
||||
Usage:
|
||||
python scripts/ci/parse_eval_metrics.py \\
|
||||
--artifacts-dir /tmp/libero-artifacts \\
|
||||
@@ -54,12 +57,19 @@ def _extract_metrics(info: dict) -> tuple[float | None, int | None, float | None
|
||||
n = agg.get("n_episodes")
|
||||
reward = agg.get("avg_sum_reward")
|
||||
eval_s = agg.get("eval_s")
|
||||
|
||||
def _safe_float(v: float | int | None) -> float | None:
|
||||
if v is None:
|
||||
return None
|
||||
f = float(v)
|
||||
return None if math.isnan(f) else f
|
||||
|
||||
if pc is not None and not math.isnan(pc):
|
||||
return (
|
||||
float(pc),
|
||||
int(n) if n is not None else None,
|
||||
float(reward) if reward is not None else None,
|
||||
float(eval_s) if eval_s is not None else None,
|
||||
_safe_float(reward),
|
||||
_safe_float(eval_s),
|
||||
)
|
||||
|
||||
return None, None, None, None
|
||||
|
||||
Reference in New Issue
Block a user