fix(annotate): auto_serve defaults to True; probe before spawning

Default auto_serve to True so lerobot-annotate can drive the entire
flow with one command. Probe api_base/models first — if a server is
already reachable (user started one manually, or it's a remote
endpoint), skip the spawn.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pepijn
2026-04-28 17:14:48 +02:00
parent 8b9c598cf4
commit 11597d4f71
2 changed files with 21 additions and 6 deletions
@@ -83,11 +83,14 @@ class VlmConfig:
"""Base URL for the ``openai`` backend."""
api_key: str = "EMPTY"
"""API key for the ``openai`` backend; ``EMPTY`` works for local servers."""
auto_serve: bool = False
"""When True with ``backend=openai``, the CLI spawns the inference
server itself (default: ``transformers serve``), waits for it to be
ready, runs the pipeline, and tears it down on exit. Override the
command via ``serve_command``."""
auto_serve: bool = True
"""When True with ``backend=openai``, the CLI probes ``api_base``
first; if no server answers, it spawns one (default:
``transformers serve``), waits for it to be ready, runs the
pipeline, and tears it down on exit. Default ``True`` so a single
``lerobot-annotate`` call can drive the whole flow. Set to ``False``
if you want to fail fast when no server is reachable (e.g. you're
pointing at a remote endpoint that should already be up)."""
serve_port: int = 8000
"""Port the auto-spawned server binds to. Sets ``api_base`` automatically."""
serve_command: str | None = None
@@ -276,7 +276,7 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
) from exc
api_base = config.api_base
if config.auto_serve:
if config.auto_serve and not _server_is_up(api_base):
api_base = _spawn_inference_server(config)
client = OpenAI(base_url=api_base, api_key=config.api_key)
@@ -299,6 +299,18 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
return _GenericTextClient(_gen, config)
def _server_is_up(api_base: str) -> bool:
"""Return True if ``api_base/models`` answers 200 within 2 seconds."""
import urllib.request # noqa: PLC0415
url = api_base.rstrip("/") + "/models"
try:
with urllib.request.urlopen(url, timeout=2) as resp:
return resp.status == 200
except Exception: # noqa: BLE001
return False
def _spawn_inference_server(config: VlmConfig) -> str:
"""Spawn ``transformers serve`` (or ``serve_command``), wait until it
accepts ``/v1/models``, and register a shutdown hook.