mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-22 20:19:43 +00:00
fix(annotate): visible auto_serve via stdout prints + live server log stream
The previous logger-based output never appeared, leaving users in the dark when auto_serve silently no-op'd. Switch to print(flush=True) so the spawn decision is unmistakable, and stream the server's stdout to the parent terminal in real-time on a background thread so model-load progress and errors surface immediately. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -276,8 +276,18 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
|
|||||||
) from exc
|
) from exc
|
||||||
|
|
||||||
api_base = config.api_base
|
api_base = config.api_base
|
||||||
if config.auto_serve and not _server_is_up(api_base):
|
print(
|
||||||
api_base = _spawn_inference_server(config)
|
f"[lerobot-annotate] backend=openai model={config.model_id} "
|
||||||
|
f"api_base={api_base} auto_serve={config.auto_serve}",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
if config.auto_serve:
|
||||||
|
if _server_is_up(api_base):
|
||||||
|
print(f"[lerobot-annotate] reusing server already up at {api_base}", flush=True)
|
||||||
|
else:
|
||||||
|
print("[lerobot-annotate] no server reachable; spawning one", flush=True)
|
||||||
|
api_base = _spawn_inference_server(config)
|
||||||
|
print(f"[lerobot-annotate] server ready at {api_base}", flush=True)
|
||||||
|
|
||||||
client = OpenAI(base_url=api_base, api_key=config.api_key)
|
client = OpenAI(base_url=api_base, api_key=config.api_key)
|
||||||
|
|
||||||
@@ -315,17 +325,21 @@ def _spawn_inference_server(config: VlmConfig) -> str:
|
|||||||
"""Spawn ``transformers serve`` (or ``serve_command``), wait until it
|
"""Spawn ``transformers serve`` (or ``serve_command``), wait until it
|
||||||
accepts ``/v1/models``, and register a shutdown hook.
|
accepts ``/v1/models``, and register a shutdown hook.
|
||||||
|
|
||||||
|
Streams the server's stdout/stderr to the parent terminal in
|
||||||
|
real-time on a background thread so users can see model-load
|
||||||
|
progress and errors as they happen.
|
||||||
|
|
||||||
Returns the full ``api_base`` URL the OpenAI client should use.
|
Returns the full ``api_base`` URL the OpenAI client should use.
|
||||||
"""
|
"""
|
||||||
import atexit # noqa: PLC0415
|
import atexit # noqa: PLC0415
|
||||||
import logging # noqa: PLC0415
|
|
||||||
import shlex # noqa: PLC0415
|
import shlex # noqa: PLC0415
|
||||||
import signal # noqa: PLC0415
|
import signal # noqa: PLC0415
|
||||||
import subprocess # noqa: PLC0415
|
import subprocess # noqa: PLC0415
|
||||||
|
import sys # noqa: PLC0415
|
||||||
|
import threading # noqa: PLC0415
|
||||||
import time # noqa: PLC0415
|
import time # noqa: PLC0415
|
||||||
import urllib.request # noqa: PLC0415
|
import urllib.request # noqa: PLC0415
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
cmd = config.serve_command
|
cmd = config.serve_command
|
||||||
if not cmd:
|
if not cmd:
|
||||||
cmd = (
|
cmd = (
|
||||||
@@ -333,7 +347,7 @@ def _spawn_inference_server(config: VlmConfig) -> str:
|
|||||||
f"--port {config.serve_port} --continuous-batching"
|
f"--port {config.serve_port} --continuous-batching"
|
||||||
)
|
)
|
||||||
api_base = f"http://localhost:{config.serve_port}/v1"
|
api_base = f"http://localhost:{config.serve_port}/v1"
|
||||||
log.info("auto_serve: launching: %s", cmd)
|
print(f"[server] launching: {cmd}", flush=True)
|
||||||
proc = subprocess.Popen(
|
proc = subprocess.Popen(
|
||||||
shlex.split(cmd),
|
shlex.split(cmd),
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
@@ -342,9 +356,17 @@ def _spawn_inference_server(config: VlmConfig) -> str:
|
|||||||
bufsize=1,
|
bufsize=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _stream_output() -> None:
|
||||||
|
assert proc.stdout is not None
|
||||||
|
for line in proc.stdout:
|
||||||
|
sys.stdout.write(f"[server] {line}")
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
threading.Thread(target=_stream_output, daemon=True).start()
|
||||||
|
|
||||||
def _shutdown() -> None:
|
def _shutdown() -> None:
|
||||||
if proc.poll() is None:
|
if proc.poll() is None:
|
||||||
log.info("auto_serve: stopping pid=%s", proc.pid)
|
print(f"[server] stopping pid={proc.pid}", flush=True)
|
||||||
proc.send_signal(signal.SIGINT)
|
proc.send_signal(signal.SIGINT)
|
||||||
try:
|
try:
|
||||||
proc.wait(timeout=15)
|
proc.wait(timeout=15)
|
||||||
@@ -358,22 +380,20 @@ def _spawn_inference_server(config: VlmConfig) -> str:
|
|||||||
health_url = api_base.rstrip("/") + "/models"
|
health_url = api_base.rstrip("/") + "/models"
|
||||||
while time.monotonic() < deadline:
|
while time.monotonic() < deadline:
|
||||||
if proc.poll() is not None:
|
if proc.poll() is not None:
|
||||||
tail = proc.stdout.read() if proc.stdout else ""
|
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"auto_serve: inference server exited (rc={proc.returncode}). "
|
f"[server] inference server exited unexpectedly with rc={proc.returncode}. "
|
||||||
f"Tail of output:\n{tail}"
|
f"See [server] log lines above for the cause."
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
with urllib.request.urlopen(health_url, timeout=2) as resp:
|
with urllib.request.urlopen(health_url, timeout=2) as resp:
|
||||||
if resp.status == 200:
|
if resp.status == 200:
|
||||||
log.info("auto_serve: server ready at %s", api_base)
|
|
||||||
return api_base
|
return api_base
|
||||||
except Exception: # noqa: BLE001 - intentional broad except
|
except Exception: # noqa: BLE001 - intentional broad except
|
||||||
pass
|
pass
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
proc.terminate()
|
proc.terminate()
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"auto_serve: server did not become ready within {config.serve_ready_timeout_s}s"
|
f"[server] did not become ready within {config.serve_ready_timeout_s}s"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user