diff --git a/.github/workflows/benchmark_tests.yml b/.github/workflows/benchmark_tests.yml index 6a72f06b4..4fe34a8fa 100644 --- a/.github/workflows/benchmark_tests.yml +++ b/.github/workflows/benchmark_tests.yml @@ -102,6 +102,7 @@ jobs: tags: lerobot-benchmark-libero:ci - name: Run Libero smoke eval (1 episode) + if: env.HF_USER_TOKEN != '' run: | # Named container (no --rm) so we can docker cp artifacts out. # Output to /tmp inside the container — /artifacts doesn't exist @@ -148,7 +149,7 @@ jobs: - name: Upload Libero rollout video if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses] with: name: libero-rollout-video path: /tmp/libero-artifacts/videos/ @@ -156,7 +157,7 @@ jobs: - name: Upload Libero eval metrics if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses] with: name: libero-metrics path: /tmp/libero-artifacts/metrics.json @@ -210,7 +211,7 @@ jobs: - name: Upload Libero train-smoke eval video if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses] with: name: libero-train-smoke-video path: /tmp/libero-train-smoke-artifacts/eval/ @@ -295,7 +296,7 @@ jobs: - name: Upload MetaWorld rollout video if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses] with: name: metaworld-rollout-video path: /tmp/metaworld-artifacts/videos/ @@ -303,7 +304,7 @@ jobs: - name: Upload MetaWorld eval metrics if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses] with: name: metaworld-metrics path: /tmp/metaworld-artifacts/metrics.json diff --git a/docker/Dockerfile.benchmark.libero b/docker/Dockerfile.benchmark.libero index a16179c7d..03fb64323 100644 --- a/docker/Dockerfile.benchmark.libero +++ b/docker/Dockerfile.benchmark.libero @@ -90,7 +90,7 @@ snapshot_download(repo_id='lerobot/libero-assets', repo_type='dataset', \ # Workaround: Triton ships ptxas without the execute bit set. # Without this chmod, any JIT compilation (e.g. torch.compile) fails -# with "Permission denied". See: https://github.com/triton-lang/triton/issues/2due +# with "Permission denied". RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas # ── Source layer (rebuilds in seconds on code-only changes) ───────────────── diff --git a/scripts/ci/parse_eval_metrics.py b/scripts/ci/parse_eval_metrics.py index 7666a7a5a..897d9e81b 100644 --- a/scripts/ci/parse_eval_metrics.py +++ b/scripts/ci/parse_eval_metrics.py @@ -42,6 +42,20 @@ import sys from pathlib import Path +def _safe_float(v: float | int | None) -> float | None: + if v is None: + return None + f = float(v) + return None if math.isnan(f) else f + + +def _safe_int(v: float | int | None) -> int | None: + if v is None: + return None + f = float(v) + return None if math.isnan(f) else int(f) + + def _extract_metrics(info: dict) -> tuple[float | None, int | None, float | None, float | None]: """Extract (pc_success, n_episodes, avg_sum_reward, eval_s) from eval_info.json. @@ -58,16 +72,10 @@ def _extract_metrics(info: dict) -> tuple[float | None, int | None, float | None reward = agg.get("avg_sum_reward") eval_s = agg.get("eval_s") - def _safe_float(v: float | int | None) -> float | None: - if v is None: - return None - f = float(v) - return None if math.isnan(f) else f - if pc is not None and not math.isnan(pc): return ( float(pc), - int(n) if n is not None else None, + _safe_int(n), _safe_float(reward), _safe_float(eval_s), )