From 43d3ba1d4ea12c65fbad5bbd348747c463c38a22 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Tue, 28 Apr 2026 17:43:47 +0200 Subject: [PATCH] fix(annotate): detect server ready via stdout banner, not /v1/models polls transformers serve rescans the HF cache on every /v1/models request which exceeds the 2s urllib timeout, leaving the probe loop spinning even after Uvicorn is fully up. Watch the streamed server output for 'Uvicorn running' / 'Application startup complete' instead. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../steerable_pipeline/vlm_client.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/lerobot/annotations/steerable_pipeline/vlm_client.py b/src/lerobot/annotations/steerable_pipeline/vlm_client.py index 8c85f2736..fe32d3f8e 100644 --- a/src/lerobot/annotations/steerable_pipeline/vlm_client.py +++ b/src/lerobot/annotations/steerable_pipeline/vlm_client.py @@ -356,11 +356,20 @@ def _spawn_inference_server(config: VlmConfig) -> str: bufsize=1, ) + # Watch the server output for the uvicorn readiness banner. This is + # more reliable than polling /v1/models because transformers serve + # rescans its cache on every model-list request, which can exceed + # the urllib timeout and trigger an infinite probe loop. + ready_event = threading.Event() + ready_markers = ("Uvicorn running", "Application startup complete") + def _stream_output() -> None: assert proc.stdout is not None for line in proc.stdout: sys.stdout.write(f"[server] {line}") sys.stdout.flush() + if any(marker in line for marker in ready_markers): + ready_event.set() threading.Thread(target=_stream_output, daemon=True).start() @@ -377,20 +386,14 @@ def _spawn_inference_server(config: VlmConfig) -> str: atexit.register(_shutdown) deadline = time.monotonic() + config.serve_ready_timeout_s - health_url = api_base.rstrip("/") + "/models" while time.monotonic() < deadline: if proc.poll() is not None: raise RuntimeError( f"[server] inference server exited unexpectedly with rc={proc.returncode}. " f"See [server] log lines above for the cause." ) - try: - with urllib.request.urlopen(health_url, timeout=2) as resp: - if resp.status == 200: - return api_base - except Exception: # noqa: BLE001 - intentional broad except - pass - time.sleep(2) + if ready_event.wait(timeout=2): + return api_base proc.terminate() raise RuntimeError( f"[server] did not become ready within {config.serve_ready_timeout_s}s"