diff --git a/.github/workflows/benchmark_tests.yml b/.github/workflows/benchmark_tests.yml
index eed3d4800..5a4b58bc5 100644
--- a/.github/workflows/benchmark_tests.yml
+++ b/.github/workflows/benchmark_tests.yml
@@ -137,6 +137,15 @@ jobs:
           docker cp libero-eval:/tmp/eval-artifacts/. /tmp/libero-artifacts/ 2>/dev/null || true
           docker rm -f libero-eval || true
 
+      - name: Parse Libero eval metrics
+        if: always()
+        run: |
+          python3 scripts/ci/parse_eval_metrics.py \
+            --artifacts-dir /tmp/libero-artifacts \
+            --env libero \
+            --task libero_spatial \
+            --policy pepijn223/smolvla_libero
+
       - name: Upload Libero rollout video
         if: always()
         uses: actions/upload-artifact@v4
@@ -145,6 +154,14 @@ jobs:
           path: /tmp/libero-artifacts/videos/
           if-no-files-found: warn
 
+      - name: Upload Libero eval metrics
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: libero-metrics
+          path: /tmp/libero-artifacts/metrics.json
+          if-no-files-found: warn
+
   # ── METAWORLD ─────────────────────────────────────────────────────────────
   # Isolated image: lerobot[metaworld] only (metaworld==3.0.0, mujoco>=3 chain)
   metaworld-integration-test:
@@ -206,6 +223,15 @@ jobs:
           docker cp metaworld-eval:/tmp/eval-artifacts/. /tmp/metaworld-artifacts/ 2>/dev/null || true
           docker rm -f metaworld-eval || true
 
+      - name: Parse MetaWorld eval metrics
+        if: always()
+        run: |
+          python3 scripts/ci/parse_eval_metrics.py \
+            --artifacts-dir /tmp/metaworld-artifacts \
+            --env metaworld \
+            --task metaworld-push-v3 \
+            --policy pepijn223/smolvla_metaworld
+
       - name: Upload MetaWorld rollout video
         if: always()
         uses: actions/upload-artifact@v4
@@ -213,3 +239,11 @@ jobs:
           name: metaworld-rollout-video
           path: /tmp/metaworld-artifacts/videos/
           if-no-files-found: warn
+
+      - name: Upload MetaWorld eval metrics
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: metaworld-metrics
+          path: /tmp/metaworld-artifacts/metrics.json
+          if-no-files-found: warn
diff --git a/scripts/ci/parse_eval_metrics.py b/scripts/ci/parse_eval_metrics.py
new file mode 100644
index 000000000..b10be7170
--- /dev/null
+++ b/scripts/ci/parse_eval_metrics.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Parse lerobot-eval output into a small metrics.json artifact.
+
+Reads eval_info.json written by lerobot-eval --output_dir and extracts the
+key metrics needed by the health dashboard. Handles both single-task and
+multi-task eval output formats.
+
+Usage:
+    python scripts/ci/parse_eval_metrics.py \\
+        --artifacts-dir /tmp/libero-artifacts \\
+        --env libero \\
+        --task libero_spatial \\
+        --policy pepijn223/smolvla_libero
+
+Writes <artifacts-dir>/metrics.json. The CI workflow then uploads this file
+as a GitHub Actions artifact named "<env>-metrics".
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import math
+import sys
+from pathlib import Path
+
+
+def _extract_metrics(info: dict) -> tuple[float | None, int | None, float | None, float | None]:
+    """Extract (pc_success, n_episodes, avg_sum_reward, eval_s) from eval_info.json.
+
+    Handles two output shapes:
+      - Single-task: {"aggregated": {"pc_success": 80.0, ...}}
+      - Multi-task:  {"overall": {"pc_success": 80.0, "n_episodes": 5, ...}}
+    """
+    for key in ("aggregated", "overall"):
+        if key not in info:
+            continue
+        agg = info[key]
+        pc = agg.get("pc_success")
+        n = agg.get("n_episodes")
+        reward = agg.get("avg_sum_reward")
+        eval_s = agg.get("eval_s")
+        if pc is not None and not math.isnan(pc):
+            return (
+                float(pc),
+                int(n) if n is not None else None,
+                float(reward) if reward is not None else None,
+                float(eval_s) if eval_s is not None else None,
+            )
+
+    return None, None, None, None
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    parser.add_argument("--artifacts-dir", required=True, help="Path to the mounted artifacts volume")
+    parser.add_argument("--env", required=True, help="Environment name (e.g. libero)")
+    parser.add_argument("--task", required=True, help="Task name (e.g. libero_spatial)")
+    parser.add_argument("--policy", required=True, help="Policy hub path (e.g. pepijn223/smolvla_libero)")
+    args = parser.parse_args()
+
+    artifacts_dir = Path(args.artifacts_dir)
+    eval_info_path = artifacts_dir / "eval_info.json"
+
+    pc_success: float | None = None
+    n_episodes: int | None = None
+    avg_sum_reward: float | None = None
+    eval_s: float | None = None
+
+    if eval_info_path.exists():
+        try:
+            info = json.loads(eval_info_path.read_text())
+            pc_success, n_episodes, avg_sum_reward, eval_s = _extract_metrics(info)
+        except (json.JSONDecodeError, KeyError, TypeError) as exc:
+            print(f"[parse_eval_metrics] Warning: could not parse eval_info.json: {exc}", file=sys.stderr)
+    else:
+        print(
+            f"[parse_eval_metrics] Warning: {eval_info_path} not found — eval may have failed.",
+            file=sys.stderr,
+        )
+
+    metrics = {
+        "env": args.env,
+        "task": args.task,
+        "policy": args.policy,
+        "pc_success": pc_success,
+        "n_episodes": n_episodes,
+        "avg_sum_reward": avg_sum_reward,
+        "eval_s": eval_s,
+    }
+
+    out_path = artifacts_dir / "metrics.json"
+    out_path.write_text(json.dumps(metrics, indent=2))
+    print(f"[parse_eval_metrics] Written: {out_path}")
+    print(json.dumps(metrics, indent=2))
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())