From 11597d4f7149745531848c978cc3c72476233098 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Tue, 28 Apr 2026 17:14:48 +0200
Subject: [PATCH] fix(annotate): auto_serve defaults to True; probe before
 spawning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Default auto_serve to True so lerobot-annotate can drive the entire
flow with one command. Probe api_base/models first — if a server is
already reachable (user started one manually, or it's a remote
endpoint), skip the spawn.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../annotations/steerable_pipeline/config.py       | 13 ++++++++-----
 .../annotations/steerable_pipeline/vlm_client.py   | 14 +++++++++++++-
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/src/lerobot/annotations/steerable_pipeline/config.py b/src/lerobot/annotations/steerable_pipeline/config.py
index 3126602be..dcabc8345 100644
--- a/src/lerobot/annotations/steerable_pipeline/config.py
+++ b/src/lerobot/annotations/steerable_pipeline/config.py
@@ -83,11 +83,14 @@ class VlmConfig:
     """Base URL for the ``openai`` backend."""
     api_key: str = "EMPTY"
     """API key for the ``openai`` backend; ``EMPTY`` works for local servers."""
-    auto_serve: bool = False
-    """When True with ``backend=openai``, the CLI spawns the inference
-    server itself (default: ``transformers serve``), waits for it to be
-    ready, runs the pipeline, and tears it down on exit. Override the
-    command via ``serve_command``."""
+    auto_serve: bool = True
+    """When True with ``backend=openai``, the CLI probes ``api_base``
+    first; if no server answers, it spawns one (default:
+    ``transformers serve``), waits for it to be ready, runs the
+    pipeline, and tears it down on exit. Default ``True`` so a single
+    ``lerobot-annotate`` call can drive the whole flow. Set to ``False``
+    if you want to fail fast when no server is reachable (e.g. you're
+    pointing at a remote endpoint that should already be up)."""
     serve_port: int = 8000
     """Port the auto-spawned server binds to. Sets ``api_base`` automatically."""
     serve_command: str | None = None
diff --git a/src/lerobot/annotations/steerable_pipeline/vlm_client.py b/src/lerobot/annotations/steerable_pipeline/vlm_client.py
index b5503a6bb..0c25c5cf8 100644
--- a/src/lerobot/annotations/steerable_pipeline/vlm_client.py
+++ b/src/lerobot/annotations/steerable_pipeline/vlm_client.py
@@ -276,7 +276,7 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
         ) from exc
 
     api_base = config.api_base
-    if config.auto_serve:
+    if config.auto_serve and not _server_is_up(api_base):
         api_base = _spawn_inference_server(config)
 
     client = OpenAI(base_url=api_base, api_key=config.api_key)
@@ -299,6 +299,18 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
     return _GenericTextClient(_gen, config)
 
 
+def _server_is_up(api_base: str) -> bool:
+    """Return True if ``api_base/models`` answers 200 within 2 seconds."""
+    import urllib.request  # noqa: PLC0415
+
+    url = api_base.rstrip("/") + "/models"
+    try:
+        with urllib.request.urlopen(url, timeout=2) as resp:
+            return resp.status == 200
+    except Exception:  # noqa: BLE001
+        return False
+
+
 def _spawn_inference_server(config: VlmConfig) -> str:
     """Spawn ``transformers serve`` (or ``serve_command``), wait until it
     accepts ``/v1/models``, and register a shutdown hook.