mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-17 01:30:14 +00:00
refactor(annotate): drop HF Inference Providers code path
Default backend is now a local OpenAI-compatible server (vllm / transformers) which auto_serve spawns. Removes the use_hf_inference_providers config flag and the router.huggingface.co routing branch. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -82,23 +82,13 @@ class VlmConfig:
|
||||
backend: str = "openai"
|
||||
"""One of ``vllm``, ``transformers``, ``openai``, or ``stub`` (tests only).
|
||||
|
||||
Default ``openai`` paired with ``use_hf_inference_providers=True``
|
||||
routes requests through HF Inference Providers — no local GPU
|
||||
needed. Switch to ``vllm`` / ``transformers`` for in-process
|
||||
inference."""
|
||||
model_id: str = "Qwen/Qwen3-VL-30B-A3B-Instruct:novita"
|
||||
Default ``openai`` talks to a local OpenAI-compatible server (vllm /
|
||||
transformers) which the CLI auto-spawns when ``auto_serve=True``."""
|
||||
model_id: str = "Qwen/Qwen2.5-VL-7B-Instruct"
|
||||
api_base: str = "http://localhost:8000/v1"
|
||||
"""Base URL for the ``openai`` backend."""
|
||||
api_key: str = "EMPTY"
|
||||
"""API key for the ``openai`` backend; ``EMPTY`` works for local servers."""
|
||||
use_hf_inference_providers: bool = True
|
||||
"""Route requests through https://router.huggingface.co/v1 using your
|
||||
``HF_TOKEN`` env var as the API key. Default ``True`` — no local GPU
|
||||
needed. The CLI flips ``auto_serve`` off automatically when this is
|
||||
set. Use ``model_id`` of the form
|
||||
``Qwen/Qwen3-VL-30B-A3B-Instruct:novita`` to pin a specific provider,
|
||||
or omit ``:provider`` to let HF route. Set ``False`` to fall back to
|
||||
a local server (vllm serve / transformers serve / external)."""
|
||||
auto_serve: bool = True
|
||||
"""When True with ``backend=openai``, the CLI probes ``api_base``
|
||||
first; if no server answers, it spawns one (default:
|
||||
|
||||
@@ -336,48 +336,25 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
|
||||
auto_serve = config.auto_serve
|
||||
api_bases: list[str] = [api_base]
|
||||
|
||||
if config.use_hf_inference_providers:
|
||||
api_base = "https://router.huggingface.co/v1"
|
||||
token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_API_KEY") or ""
|
||||
if not token:
|
||||
try:
|
||||
from huggingface_hub import get_token # noqa: PLC0415
|
||||
|
||||
token = get_token() or ""
|
||||
except Exception: # noqa: BLE001
|
||||
token = ""
|
||||
if not token:
|
||||
raise RuntimeError(
|
||||
"use_hf_inference_providers=True needs an HF token. Either set "
|
||||
"HF_TOKEN in the environment, or run `huggingface-cli login` once."
|
||||
print(
|
||||
f"[lerobot-annotate] backend=openai model={config.model_id} "
|
||||
f"api_base={api_base} auto_serve={auto_serve}",
|
||||
flush=True,
|
||||
)
|
||||
if auto_serve:
|
||||
if config.parallel_servers > 1:
|
||||
print(
|
||||
f"[lerobot-annotate] spawning {config.parallel_servers} parallel servers",
|
||||
flush=True,
|
||||
)
|
||||
api_key = token
|
||||
auto_serve = False
|
||||
print(
|
||||
f"[lerobot-annotate] HF Inference Providers: routing model={config.model_id} "
|
||||
f"via {api_base}",
|
||||
flush=True,
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f"[lerobot-annotate] backend=openai model={config.model_id} "
|
||||
f"api_base={api_base} auto_serve={auto_serve}",
|
||||
flush=True,
|
||||
)
|
||||
if auto_serve:
|
||||
if config.parallel_servers > 1:
|
||||
print(
|
||||
f"[lerobot-annotate] spawning {config.parallel_servers} parallel servers",
|
||||
flush=True,
|
||||
)
|
||||
api_bases = _spawn_parallel_inference_servers(config)
|
||||
elif _server_is_up(api_base):
|
||||
print(f"[lerobot-annotate] reusing server already up at {api_base}", flush=True)
|
||||
else:
|
||||
print("[lerobot-annotate] no server reachable; spawning one", flush=True)
|
||||
api_base = _spawn_inference_server(config)
|
||||
api_bases = [api_base]
|
||||
print(f"[lerobot-annotate] server ready at {api_base}", flush=True)
|
||||
api_bases = _spawn_parallel_inference_servers(config)
|
||||
elif _server_is_up(api_base):
|
||||
print(f"[lerobot-annotate] reusing server already up at {api_base}", flush=True)
|
||||
else:
|
||||
print("[lerobot-annotate] no server reachable; spawning one", flush=True)
|
||||
api_base = _spawn_inference_server(config)
|
||||
api_bases = [api_base]
|
||||
print(f"[lerobot-annotate] server ready at {api_base}", flush=True)
|
||||
|
||||
clients = [OpenAI(base_url=base, api_key=api_key) for base in api_bases]
|
||||
client = clients[0]
|
||||
|
||||
Reference in New Issue
Block a user