mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-27 06:29:47 +00:00
fix(ci): address PR review feedback for benchmark smoke tests
Security: - Remove "Login to Hugging Face" step — it was a no-op (ephemeral --rm container) that exposed the HF token via CLI argument in docker inspect / /proc/*/cmdline. The eval step already re-authenticates via env var. Functional: - Remove feat/benchmark-ci from push trigger branches (won't exist post-merge). Dockerfiles: - Pin uv to 0.8.0 (was unpinned, fetching whatever latest ships). - Add comment explaining the chmod +x ptxas workaround (Triton packaging bug — ships ptxas without execute bit). Scripts: - parse_eval_metrics.py: add note that it runs on bare host and must stay stdlib-only. - parse_eval_metrics.py: add NaN guard for avg_sum_reward and eval_s (was only guarding pc_success). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -31,7 +31,6 @@ on:
|
|||||||
|
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- feat/benchmark-ci
|
|
||||||
- main
|
- main
|
||||||
paths:
|
paths:
|
||||||
- "src/lerobot/envs/**"
|
- "src/lerobot/envs/**"
|
||||||
@@ -101,14 +100,6 @@ jobs:
|
|||||||
load: true
|
load: true
|
||||||
tags: lerobot-benchmark-libero:ci
|
tags: lerobot-benchmark-libero:ci
|
||||||
|
|
||||||
- name: Login to Hugging Face
|
|
||||||
if: env.HF_USER_TOKEN != ''
|
|
||||||
run: |
|
|
||||||
docker run --rm \
|
|
||||||
-e HF_HOME=/tmp/hf \
|
|
||||||
lerobot-benchmark-libero:ci \
|
|
||||||
bash -c "hf auth login --token '$HF_USER_TOKEN' --add-to-git-credential && hf auth whoami"
|
|
||||||
|
|
||||||
- name: Run Libero smoke eval (1 episode)
|
- name: Run Libero smoke eval (1 episode)
|
||||||
run: |
|
run: |
|
||||||
# Named container (no --rm) so we can docker cp artifacts out.
|
# Named container (no --rm) so we can docker cp artifacts out.
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
python${PYTHON_VERSION} \
|
python${PYTHON_VERSION} \
|
||||||
python${PYTHON_VERSION}-venv \
|
python${PYTHON_VERSION}-venv \
|
||||||
python${PYTHON_VERSION}-dev \
|
python${PYTHON_VERSION}-dev \
|
||||||
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
|
&& curl -LsSf https://astral.sh/uv/0.8.0/install.sh | sh \
|
||||||
&& mv /root/.local/bin/uv /usr/local/bin/uv \
|
&& mv /root/.local/bin/uv /usr/local/bin/uv \
|
||||||
&& useradd --create-home --shell /bin/bash user_lerobot \
|
&& useradd --create-home --shell /bin/bash user_lerobot \
|
||||||
&& usermod -aG sudo user_lerobot \
|
&& usermod -aG sudo user_lerobot \
|
||||||
@@ -84,6 +84,9 @@ snapshot_download(repo_id='lerobot/libero-assets', repo_type='dataset', \
|
|||||||
printf "assets: /home/user_lerobot/.libero/assets\nbddl_files: ${LIBERO_DIR}/bddl_files\ndatasets: ${LIBERO_DIR}/../datasets\ninit_states: ${LIBERO_DIR}/init_files\n" \
|
printf "assets: /home/user_lerobot/.libero/assets\nbddl_files: ${LIBERO_DIR}/bddl_files\ndatasets: ${LIBERO_DIR}/../datasets\ninit_states: ${LIBERO_DIR}/init_files\n" \
|
||||||
> /home/user_lerobot/.libero/config.yaml
|
> /home/user_lerobot/.libero/config.yaml
|
||||||
|
|
||||||
|
# Workaround: Triton ships ptxas without the execute bit set.
|
||||||
|
# Without this chmod, any JIT compilation (e.g. torch.compile) fails
|
||||||
|
# with "Permission denied". See: https://github.com/triton-lang/triton/issues/2due
|
||||||
RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas
|
RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas
|
||||||
|
|
||||||
# ── Source layer (rebuilds in seconds on code-only changes) ─────────────────
|
# ── Source layer (rebuilds in seconds on code-only changes) ─────────────────
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
python${PYTHON_VERSION} \
|
python${PYTHON_VERSION} \
|
||||||
python${PYTHON_VERSION}-venv \
|
python${PYTHON_VERSION}-venv \
|
||||||
python${PYTHON_VERSION}-dev \
|
python${PYTHON_VERSION}-dev \
|
||||||
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
|
&& curl -LsSf https://astral.sh/uv/0.8.0/install.sh | sh \
|
||||||
&& mv /root/.local/bin/uv /usr/local/bin/uv \
|
&& mv /root/.local/bin/uv /usr/local/bin/uv \
|
||||||
&& useradd --create-home --shell /bin/bash user_lerobot \
|
&& useradd --create-home --shell /bin/bash user_lerobot \
|
||||||
&& usermod -aG sudo user_lerobot \
|
&& usermod -aG sudo user_lerobot \
|
||||||
@@ -69,6 +69,9 @@ RUN mkdir -p src/lerobot && touch src/lerobot/__init__.py src/lerobot/py.typed
|
|||||||
|
|
||||||
RUN uv sync --locked --extra metaworld --extra smolvla --no-cache
|
RUN uv sync --locked --extra metaworld --extra smolvla --no-cache
|
||||||
|
|
||||||
|
# Workaround: Triton ships ptxas without the execute bit set.
|
||||||
|
# Without this chmod, any JIT compilation (e.g. torch.compile) fails
|
||||||
|
# with "Permission denied". See: https://github.com/triton-lang/triton/issues/2due
|
||||||
RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas
|
RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas
|
||||||
|
|
||||||
# ── Source layer (rebuilds in seconds on code-only changes) ─────────────────
|
# ── Source layer (rebuilds in seconds on code-only changes) ─────────────────
|
||||||
|
|||||||
@@ -19,6 +19,9 @@ Reads eval_info.json written by lerobot-eval --output_dir and extracts the
|
|||||||
key metrics needed by the health dashboard. Handles both single-task and
|
key metrics needed by the health dashboard. Handles both single-task and
|
||||||
multi-task eval output formats.
|
multi-task eval output formats.
|
||||||
|
|
||||||
|
NOTE: This script runs on the bare CI runner (not inside Docker), so it
|
||||||
|
must use only Python stdlib modules. Do not add third-party imports.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python scripts/ci/parse_eval_metrics.py \\
|
python scripts/ci/parse_eval_metrics.py \\
|
||||||
--artifacts-dir /tmp/libero-artifacts \\
|
--artifacts-dir /tmp/libero-artifacts \\
|
||||||
@@ -54,12 +57,19 @@ def _extract_metrics(info: dict) -> tuple[float | None, int | None, float | None
|
|||||||
n = agg.get("n_episodes")
|
n = agg.get("n_episodes")
|
||||||
reward = agg.get("avg_sum_reward")
|
reward = agg.get("avg_sum_reward")
|
||||||
eval_s = agg.get("eval_s")
|
eval_s = agg.get("eval_s")
|
||||||
|
|
||||||
|
def _safe_float(v: float | int | None) -> float | None:
|
||||||
|
if v is None:
|
||||||
|
return None
|
||||||
|
f = float(v)
|
||||||
|
return None if math.isnan(f) else f
|
||||||
|
|
||||||
if pc is not None and not math.isnan(pc):
|
if pc is not None and not math.isnan(pc):
|
||||||
return (
|
return (
|
||||||
float(pc),
|
float(pc),
|
||||||
int(n) if n is not None else None,
|
int(n) if n is not None else None,
|
||||||
float(reward) if reward is not None else None,
|
_safe_float(reward),
|
||||||
float(eval_s) if eval_s is not None else None,
|
_safe_float(eval_s),
|
||||||
)
|
)
|
||||||
|
|
||||||
return None, None, None, None
|
return None, None, None, None
|
||||||
|
|||||||
Reference in New Issue
Block a user