From 2665d4a5acc450a6291c1f818128e7ebeabd06c9 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Mon, 13 Apr 2026 13:38:28 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20address=20PR=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20broken=20link,=20NaN=20guard,=20zizmor=20tags,=20fo?= =?UTF-8?q?rk=20skip?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove broken Triton issue link from Dockerfile.benchmark.libero - Add module-level _safe_int helper to guard n_episodes against NaN - Move _safe_float to module level alongside _safe_int - Add # zizmor: ignore[unpinned-uses] to all upload-artifact@v4 steps - Add if: env.HF_USER_TOKEN != '' to Libero smoke eval for fork PRs Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/benchmark_tests.yml | 11 ++++++----- docker/Dockerfile.benchmark.libero | 2 +- scripts/ci/parse_eval_metrics.py | 22 +++++++++++++++------- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/.github/workflows/benchmark_tests.yml b/.github/workflows/benchmark_tests.yml index 6a72f06b4..4fe34a8fa 100644 --- a/.github/workflows/benchmark_tests.yml +++ b/.github/workflows/benchmark_tests.yml @@ -102,6 +102,7 @@ jobs: tags: lerobot-benchmark-libero:ci - name: Run Libero smoke eval (1 episode) + if: env.HF_USER_TOKEN != '' run: | # Named container (no --rm) so we can docker cp artifacts out. # Output to /tmp inside the container — /artifacts doesn't exist @@ -148,7 +149,7 @@ jobs: - name: Upload Libero rollout video if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses] with: name: libero-rollout-video path: /tmp/libero-artifacts/videos/ @@ -156,7 +157,7 @@ jobs: - name: Upload Libero eval metrics if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses] with: name: libero-metrics path: /tmp/libero-artifacts/metrics.json @@ -210,7 +211,7 @@ jobs: - name: Upload Libero train-smoke eval video if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses] with: name: libero-train-smoke-video path: /tmp/libero-train-smoke-artifacts/eval/ @@ -295,7 +296,7 @@ jobs: - name: Upload MetaWorld rollout video if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses] with: name: metaworld-rollout-video path: /tmp/metaworld-artifacts/videos/ @@ -303,7 +304,7 @@ jobs: - name: Upload MetaWorld eval metrics if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses] with: name: metaworld-metrics path: /tmp/metaworld-artifacts/metrics.json diff --git a/docker/Dockerfile.benchmark.libero b/docker/Dockerfile.benchmark.libero index a16179c7d..03fb64323 100644 --- a/docker/Dockerfile.benchmark.libero +++ b/docker/Dockerfile.benchmark.libero @@ -90,7 +90,7 @@ snapshot_download(repo_id='lerobot/libero-assets', repo_type='dataset', \ # Workaround: Triton ships ptxas without the execute bit set. # Without this chmod, any JIT compilation (e.g. torch.compile) fails -# with "Permission denied". See: https://github.com/triton-lang/triton/issues/2due +# with "Permission denied". RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas # ── Source layer (rebuilds in seconds on code-only changes) ───────────────── diff --git a/scripts/ci/parse_eval_metrics.py b/scripts/ci/parse_eval_metrics.py index 7666a7a5a..897d9e81b 100644 --- a/scripts/ci/parse_eval_metrics.py +++ b/scripts/ci/parse_eval_metrics.py @@ -42,6 +42,20 @@ import sys from pathlib import Path +def _safe_float(v: float | int | None) -> float | None: + if v is None: + return None + f = float(v) + return None if math.isnan(f) else f + + +def _safe_int(v: float | int | None) -> int | None: + if v is None: + return None + f = float(v) + return None if math.isnan(f) else int(f) + + def _extract_metrics(info: dict) -> tuple[float | None, int | None, float | None, float | None]: """Extract (pc_success, n_episodes, avg_sum_reward, eval_s) from eval_info.json. @@ -58,16 +72,10 @@ def _extract_metrics(info: dict) -> tuple[float | None, int | None, float | None reward = agg.get("avg_sum_reward") eval_s = agg.get("eval_s") - def _safe_float(v: float | int | None) -> float | None: - if v is None: - return None - f = float(v) - return None if math.isnan(f) else f - if pc is not None and not math.isnan(pc): return ( float(pc), - int(n) if n is not None else None, + _safe_int(n), _safe_float(reward), _safe_float(eval_s), )