From 43d3ba1d4ea12c65fbad5bbd348747c463c38a22 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Tue, 28 Apr 2026 17:43:47 +0200
Subject: [PATCH] fix(annotate): detect server ready via stdout banner, not
 /v1/models polls

transformers serve rescans the HF cache on every /v1/models request
which exceeds the 2s urllib timeout, leaving the probe loop spinning
even after Uvicorn is fully up. Watch the streamed server output for
'Uvicorn running' / 'Application startup complete' instead.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../steerable_pipeline/vlm_client.py          | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/lerobot/annotations/steerable_pipeline/vlm_client.py b/src/lerobot/annotations/steerable_pipeline/vlm_client.py
index 8c85f2736..fe32d3f8e 100644
--- a/src/lerobot/annotations/steerable_pipeline/vlm_client.py
+++ b/src/lerobot/annotations/steerable_pipeline/vlm_client.py
@@ -356,11 +356,20 @@ def _spawn_inference_server(config: VlmConfig) -> str:
         bufsize=1,
     )
 
+    # Watch the server output for the uvicorn readiness banner. This is
+    # more reliable than polling /v1/models because transformers serve
+    # rescans its cache on every model-list request, which can exceed
+    # the urllib timeout and trigger an infinite probe loop.
+    ready_event = threading.Event()
+    ready_markers = ("Uvicorn running", "Application startup complete")
+
     def _stream_output() -> None:
         assert proc.stdout is not None
         for line in proc.stdout:
             sys.stdout.write(f"[server] {line}")
             sys.stdout.flush()
+            if any(marker in line for marker in ready_markers):
+                ready_event.set()
 
     threading.Thread(target=_stream_output, daemon=True).start()
 
@@ -377,20 +386,14 @@ def _spawn_inference_server(config: VlmConfig) -> str:
     atexit.register(_shutdown)
 
     deadline = time.monotonic() + config.serve_ready_timeout_s
-    health_url = api_base.rstrip("/") + "/models"
     while time.monotonic() < deadline:
         if proc.poll() is not None:
             raise RuntimeError(
                 f"[server] inference server exited unexpectedly with rc={proc.returncode}. "
                 f"See [server] log lines above for the cause."
             )
-        try:
-            with urllib.request.urlopen(health_url, timeout=2) as resp:
-                if resp.status == 200:
-                    return api_base
-        except Exception:  # noqa: BLE001  - intentional broad except
-            pass
-        time.sleep(2)
+        if ready_event.wait(timeout=2):
+            return api_base
     proc.terminate()
     raise RuntimeError(
         f"[server] did not become ready within {config.serve_ready_timeout_s}s"