fix(annotate): detect server ready via stdout banner, not /v1/models polls

transformers serve rescans the HF cache on every /v1/models request which exceeds the 2s urllib timeout, leaving the probe loop spinning even after Uvicorn is fully up. Watch the streamed server output for 'Uvicorn running' / 'Application startup complete' instead. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-17 01:30:14 +00:00 · 2026-04-28 17:43:47 +02:00
parent d1849d1b63
commit 43d3ba1d4e
1 changed files with 11 additions and 8 deletions
@@ -356,11 +356,20 @@ def _spawn_inference_server(config: VlmConfig) -> str:
        bufsize=1,
    )

+    # Watch the server output for the uvicorn readiness banner. This is
+    # more reliable than polling /v1/models because transformers serve
+    # rescans its cache on every model-list request, which can exceed
+    # the urllib timeout and trigger an infinite probe loop.
+    ready_event = threading.Event()
+    ready_markers = ("Uvicorn running", "Application startup complete")
+
    def _stream_output() -> None:
        assert proc.stdout is not None
        for line in proc.stdout:
            sys.stdout.write(f"[server] {line}")
            sys.stdout.flush()
+            if any(marker in line for marker in ready_markers):
+                ready_event.set()

    threading.Thread(target=_stream_output, daemon=True).start()

@@ -377,20 +386,14 @@ def _spawn_inference_server(config: VlmConfig) -> str:
    atexit.register(_shutdown)

    deadline = time.monotonic() + config.serve_ready_timeout_s
-    health_url = api_base.rstrip("/") + "/models"
    while time.monotonic() < deadline:
        if proc.poll() is not None:
            raise RuntimeError(
                f"[server] inference server exited unexpectedly with rc={proc.returncode}. "
                f"See [server] log lines above for the cause."
            )
-        try:
-            with urllib.request.urlopen(health_url, timeout=2) as resp:
-                if resp.status == 200:
-                    return api_base
-        except Exception:  # noqa: BLE001  - intentional broad except
-            pass
-        time.sleep(2)
+        if ready_event.wait(timeout=2):
+            return api_base
    proc.terminate()
    raise RuntimeError(
        f"[server] did not become ready within {config.serve_ready_timeout_s}s"