diff --git a/src/lerobot/annotations/steerable_pipeline/vlm_client.py b/src/lerobot/annotations/steerable_pipeline/vlm_client.py index e55e9bae4..0cb002d2c 100644 --- a/src/lerobot/annotations/steerable_pipeline/vlm_client.py +++ b/src/lerobot/annotations/steerable_pipeline/vlm_client.py @@ -455,7 +455,16 @@ def _spawn_parallel_inference_servers(config: VlmConfig) -> list[str]: api_bases: list[str] = [] procs: list[subprocess.Popen] = [] ready_events: list[threading.Event] = [] - ready_markers = ("Uvicorn running", "Application startup complete") + # Multiple readiness signals — uvicorn's own banner is suppressed at + # ``--uvicorn-log-level warning``, so we also accept vllm's own + # "Starting vLLM API server" line and the route-listing line. The + # HTTP probe below is the ultimate fallback. + ready_markers = ( + "Uvicorn running", + "Application startup complete", + "Starting vLLM API server", + "Available routes are", + ) # Single lock for all server-stream threads so multibyte chars from # different servers don't interleave and tear UTF-8 sequences. print_lock = threading.Lock() @@ -506,6 +515,16 @@ def _spawn_parallel_inference_servers(config: VlmConfig) -> list[str]: threading.Thread(target=_stream, args=(i, proc, ready), daemon=True).start() + def _probe(idx: int, base: str, ev: threading.Event, p: subprocess.Popen) -> None: + while not ev.is_set() and p.poll() is None: + if _server_is_up(base): + print(f"[server-{idx}] ready (http probe)", flush=True) + ev.set() + return + time.sleep(2) + + threading.Thread(target=_probe, args=(i, api_base, ready, proc), daemon=True).start() + def _shutdown() -> None: for i, p in enumerate(procs): if p.poll() is None: @@ -588,7 +607,23 @@ def _spawn_inference_server(config: VlmConfig) -> str: # rescans its cache on every model-list request, which can exceed # the urllib timeout and trigger an infinite probe loop. ready_event = threading.Event() - ready_markers = ("Uvicorn running", "Application startup complete") + # See _spawn_parallel_inference_servers for why we accept these. + ready_markers = ( + "Uvicorn running", + "Application startup complete", + "Starting vLLM API server", + "Available routes are", + ) + + def _probe() -> None: + while not ready_event.is_set() and proc.poll() is None: + if _server_is_up(api_base): + print("[server] ready (http probe)", flush=True) + ready_event.set() + return + time.sleep(2) + + threading.Thread(target=_probe, daemon=True).start() def _stream_output() -> None: # Read raw chunks instead of iterating lines so tqdm progress