fix(annotate): visible auto_serve via stdout prints + live server log stream

The previous logger-based output never appeared, leaving users in the dark when auto_serve silently no-op'd. Switch to print(flush=True) so the spawn decision is unmistakable, and stream the server's stdout to the parent terminal in real-time on a background thread so model-load progress and errors surface immediately. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-22 20:19:43 +00:00 · 2026-04-28 17:34:36 +02:00
parent 1a4358ed8b
commit d1849d1b63
1 changed files with 31 additions and 11 deletions
@@ -276,8 +276,18 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
        ) from exc
    api_base = config.api_base
-    if config.auto_serve and not _server_is_up(api_base):
+    print(
-        api_base = _spawn_inference_server(config)
+        f"[lerobot-annotate] backend=openai model={config.model_id} "
        f"api_base={api_base} auto_serve={config.auto_serve}",
        flush=True,
    )
    if config.auto_serve:
        if _server_is_up(api_base):
            print(f"[lerobot-annotate] reusing server already up at {api_base}", flush=True)
        else:
            print("[lerobot-annotate] no server reachable; spawning one", flush=True)
            api_base = _spawn_inference_server(config)
            print(f"[lerobot-annotate] server ready at {api_base}", flush=True)
    client = OpenAI(base_url=api_base, api_key=config.api_key)
@@ -315,17 +325,21 @@ def _spawn_inference_server(config: VlmConfig) -> str:
    """Spawn ``transformers serve`` (or ``serve_command``), wait until it
    accepts ``/v1/models``, and register a shutdown hook.
    Streams the server's stdout/stderr to the parent terminal in
    real-time on a background thread so users can see model-load
    progress and errors as they happen.
    Returns the full ``api_base`` URL the OpenAI client should use.
    """
    import atexit  # noqa: PLC0415
    import logging  # noqa: PLC0415
    import shlex  # noqa: PLC0415
    import signal  # noqa: PLC0415
    import subprocess  # noqa: PLC0415
    import sys  # noqa: PLC0415
    import threading  # noqa: PLC0415
    import time  # noqa: PLC0415
    import urllib.request  # noqa: PLC0415
    log = logging.getLogger(__name__)
    cmd = config.serve_command
    if not cmd:
        cmd = (
@@ -333,7 +347,7 @@ def _spawn_inference_server(config: VlmConfig) -> str:
            f"--port {config.serve_port} --continuous-batching"
        )
    api_base = f"http://localhost:{config.serve_port}/v1"
-    log.info("auto_serve: launching: %s", cmd)
+    print(f"[server] launching: {cmd}", flush=True)
    proc = subprocess.Popen(
        shlex.split(cmd),
        stdout=subprocess.PIPE,
@@ -342,9 +356,17 @@ def _spawn_inference_server(config: VlmConfig) -> str:
        bufsize=1,
    )
    def _stream_output() -> None:
        assert proc.stdout is not None
        for line in proc.stdout:
            sys.stdout.write(f"[server] {line}")
            sys.stdout.flush()
    threading.Thread(target=_stream_output, daemon=True).start()
    def _shutdown() -> None:
        if proc.poll() is None:
-            log.info("auto_serve: stopping pid=%s", proc.pid)
+            print(f"[server] stopping pid={proc.pid}", flush=True)
            proc.send_signal(signal.SIGINT)
            try:
                proc.wait(timeout=15)
@@ -358,22 +380,20 @@ def _spawn_inference_server(config: VlmConfig) -> str:
    health_url = api_base.rstrip("/") + "/models"
    while time.monotonic() < deadline:
        if proc.poll() is not None:
            tail = proc.stdout.read() if proc.stdout else ""
            raise RuntimeError(
-                f"auto_serve: inference server exited (rc={proc.returncode}). "
+                f"[server] inference server exited unexpectedly with rc={proc.returncode}. "
-                f"Tail of output:\n{tail}"
+                f"See [server] log lines above for the cause."
            )
        try:
            with urllib.request.urlopen(health_url, timeout=2) as resp:
                if resp.status == 200:
                    log.info("auto_serve: server ready at %s", api_base)
                    return api_base
        except Exception:  # noqa: BLE001  - intentional broad except
            pass
        time.sleep(2)
    proc.terminate()
    raise RuntimeError(
-        f"auto_serve: server did not become ready within {config.serve_ready_timeout_s}s"
+        f"[server] did not become ready within {config.serve_ready_timeout_s}s"
    )