From 4ac6c58ab13e473bc20ba35646ac2e899674c1c5 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Tue, 28 Apr 2026 19:33:34 +0200
Subject: [PATCH] feat(annotate): default to HF Inference Providers, no local
 GPU needed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Flip the default backend to 'openai' with use_hf_inference_providers=True
and a Qwen3-VL-30B-A3B-Instruct:novita default model_id. The CLI now
runs end-to-end without a local model load — annotations are produced
by sending video_url + prompt to https://router.huggingface.co/v1.

Switch back to local inference with --vlm.backend=vllm or
--vlm.use_hf_inference_providers=false.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../annotations/steerable_pipeline/config.py  | 25 +++++++++++--------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/lerobot/annotations/steerable_pipeline/config.py b/src/lerobot/annotations/steerable_pipeline/config.py
index 6f5e64c6d..1aede15b9 100644
--- a/src/lerobot/annotations/steerable_pipeline/config.py
+++ b/src/lerobot/annotations/steerable_pipeline/config.py
@@ -72,23 +72,26 @@ class Module3Config:
 class VlmConfig:
     """Shared Qwen-VL client configuration."""
 
-    backend: str = "vllm"
+    backend: str = "openai"
     """One of ``vllm``, ``transformers``, ``openai``, or ``stub`` (tests only).
 
-    The ``openai`` backend talks to any OpenAI-compatible server — works
-    with ``vllm serve``, ``transformers serve``, ``ktransformers serve``,
-    or hosted endpoints. Set ``api_base`` and (optionally) ``api_key``."""
-    model_id: str = "Qwen/Qwen3.6-27B-FP8"
+    Default ``openai`` paired with ``use_hf_inference_providers=True``
+    routes requests through HF Inference Providers — no local GPU
+    needed. Switch to ``vllm`` / ``transformers`` for in-process
+    inference."""
+    model_id: str = "Qwen/Qwen3-VL-30B-A3B-Instruct:novita"
     api_base: str = "http://localhost:8000/v1"
     """Base URL for the ``openai`` backend."""
     api_key: str = "EMPTY"
     """API key for the ``openai`` backend; ``EMPTY`` works for local servers."""
-    use_hf_inference_providers: bool = False
-    """When True, route requests through https://router.huggingface.co/v1
-    using your ``HF_TOKEN`` env var as the API key. The CLI flips
-    ``auto_serve`` off automatically — no local server is spawned. Use
-    ``model_id`` of the form ``Qwen/Qwen3-VL-30B-A3B-Instruct:novita`` to
-    pin a specific provider, or omit ``:provider`` to let HF route."""
+    use_hf_inference_providers: bool = True
+    """Route requests through https://router.huggingface.co/v1 using your
+    ``HF_TOKEN`` env var as the API key. Default ``True`` — no local GPU
+    needed. The CLI flips ``auto_serve`` off automatically when this is
+    set. Use ``model_id`` of the form
+    ``Qwen/Qwen3-VL-30B-A3B-Instruct:novita`` to pin a specific provider,
+    or omit ``:provider`` to let HF route. Set ``False`` to fall back to
+    a local server (vllm serve / transformers serve / external)."""
     auto_serve: bool = True
     """When True with ``backend=openai``, the CLI probes ``api_base``
     first; if no server answers, it spawns one (default: