fix(annotate): auto_serve defaults to True; probe before spawning

Default auto_serve to True so lerobot-annotate can drive the entire flow with one command. Probe api_base/models first — if a server is already reachable (user started one manually, or it's a remote endpoint), skip the spawn. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-15 08:39:49 +00:00 · 2026-04-28 17:14:48 +02:00
parent 8b9c598cf4
commit 11597d4f71
2 changed files with 21 additions and 6 deletions
@@ -83,11 +83,14 @@ class VlmConfig:
    """Base URL for the ``openai`` backend."""
    api_key: str = "EMPTY"
    """API key for the ``openai`` backend; ``EMPTY`` works for local servers."""
-    auto_serve: bool = False
-    """When True with ``backend=openai``, the CLI spawns the inference
-    server itself (default: ``transformers serve``), waits for it to be
-    ready, runs the pipeline, and tears it down on exit. Override the
-    command via ``serve_command``."""
+    auto_serve: bool = True
+    """When True with ``backend=openai``, the CLI probes ``api_base``
+    first; if no server answers, it spawns one (default:
+    ``transformers serve``), waits for it to be ready, runs the
+    pipeline, and tears it down on exit. Default ``True`` so a single
+    ``lerobot-annotate`` call can drive the whole flow. Set to ``False``
+    if you want to fail fast when no server is reachable (e.g. you're
+    pointing at a remote endpoint that should already be up)."""
    serve_port: int = 8000
    """Port the auto-spawned server binds to. Sets ``api_base`` automatically."""
    serve_command: str | None = None
@@ -276,7 +276,7 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
        ) from exc

    api_base = config.api_base
-    if config.auto_serve:
+    if config.auto_serve and not _server_is_up(api_base):
        api_base = _spawn_inference_server(config)

    client = OpenAI(base_url=api_base, api_key=config.api_key)
@@ -299,6 +299,18 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
    return _GenericTextClient(_gen, config)


+def _server_is_up(api_base: str) -> bool:
+    """Return True if ``api_base/models`` answers 200 within 2 seconds."""
+    import urllib.request  # noqa: PLC0415
+
+    url = api_base.rstrip("/") + "/models"
+    try:
+        with urllib.request.urlopen(url, timeout=2) as resp:
+            return resp.status == 200
+    except Exception:  # noqa: BLE001
+        return False
+
+
 def _spawn_inference_server(config: VlmConfig) -> str:
    """Spawn ``transformers serve`` (or ``serve_command``), wait until it
    accepts ``/v1/models``, and register a shutdown hook.