mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-27 14:39:43 +00:00
Compare commits
4 Commits
72d0fc0dce
...
1927077fea
| Author | SHA1 | Date | |
|---|---|---|---|
| 1927077fea | |||
| 327ce89423 | |||
| b56cfe7eb9 | |||
| e06a7c79f7 |
+1
-1
@@ -302,7 +302,7 @@ lerobot-annotate="lerobot.scripts.lerobot_annotate:main"
|
|||||||
|
|
||||||
# ---------------- Tool Configurations ----------------
|
# ---------------- Tool Configurations ----------------
|
||||||
[tool.setuptools.package-data]
|
[tool.setuptools.package-data]
|
||||||
lerobot = ["envs/*.json"]
|
lerobot = ["envs/*.json", "annotations/steerable_pipeline/prompts/*.txt"]
|
||||||
|
|
||||||
[tool.setuptools.packages.find]
|
[tool.setuptools.packages.find]
|
||||||
where = ["src"]
|
where = ["src"]
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -107,10 +108,17 @@ class VlmConfig:
|
|||||||
this command (if present) is substituted per-replica."""
|
this command (if present) is substituted per-replica."""
|
||||||
parallel_servers: int = 1
|
parallel_servers: int = 1
|
||||||
"""When >1, spawn this many independent inference servers (each pinned
|
"""When >1, spawn this many independent inference servers (each pinned
|
||||||
to one GPU via ``CUDA_VISIBLE_DEVICES`` and listening on
|
to a GPU via ``CUDA_VISIBLE_DEVICES`` and listening on
|
||||||
``serve_port + i``) and round-robin client requests across them.
|
``serve_port + i``) and round-robin client requests across them.
|
||||||
Useful when DP/TP NCCL setup is broken on the node — single-GPU
|
Useful when DP/TP NCCL setup is broken on the node — single-GPU
|
||||||
replicas don't need cross-GPU communication."""
|
replicas don't need cross-GPU communication. When
|
||||||
|
``parallel_servers > num_gpus``, replicas are round-robin-assigned
|
||||||
|
to GPUs (e.g. 4 replicas on 2 GPUs → 0,1,0,1)."""
|
||||||
|
num_gpus: int = 0
|
||||||
|
"""How many physical GPUs are available for round-robin replica
|
||||||
|
placement. ``0`` means ``parallel_servers`` (one GPU per replica,
|
||||||
|
backward-compatible default). Set this to ``2`` with
|
||||||
|
``parallel_servers=4`` to pack 2 replicas per GPU."""
|
||||||
client_concurrency: int = 16
|
client_concurrency: int = 16
|
||||||
"""Maximum number of in-flight chat requests the client issues in
|
"""Maximum number of in-flight chat requests the client issues in
|
||||||
parallel. vllm batches them internally for free, so bumping this
|
parallel. vllm batches them internally for free, so bumping this
|
||||||
@@ -140,6 +148,12 @@ class VlmConfig:
|
|||||||
camera_key: str | None = None
|
camera_key: str | None = None
|
||||||
"""Override the camera stream used for keyframe attachment. ``None`` picks
|
"""Override the camera stream used for keyframe attachment. ``None`` picks
|
||||||
the first ``observation.images.*`` key the dataset declares."""
|
the first ``observation.images.*`` key the dataset declares."""
|
||||||
|
chat_template_kwargs: dict[str, Any] | None = None
|
||||||
|
"""Forwarded as ``extra_body.chat_template_kwargs`` on every chat call.
|
||||||
|
Use this to pass model-specific template flags such as
|
||||||
|
``{"enable_thinking": false}`` for Qwen3.5/Qwen3.6 to suppress the
|
||||||
|
reasoning preamble that otherwise eats the entire ``max_new_tokens``
|
||||||
|
budget before any JSON is emitted."""
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
@@ -225,8 +225,11 @@ def episode_clip_path(
|
|||||||
"""Extract the episode's subclip to ``cache_dir/ep_{idx:06d}.mp4``.
|
"""Extract the episode's subclip to ``cache_dir/ep_{idx:06d}.mp4``.
|
||||||
|
|
||||||
Returns ``None`` if the dataset has no video tracks. Skips re-extract
|
Returns ``None`` if the dataset has no video tracks. Skips re-extract
|
||||||
when the cached clip already exists. Uses ``ffmpeg`` via subprocess
|
when the cached clip already exists. Re-encodes to H.264
|
||||||
with stream-copy where possible (no re-encode) for speed.
|
(libx264) so the resulting mp4 is decodable by every downstream
|
||||||
|
video processor — stream-copy would inherit the source codec
|
||||||
|
(often AV1 in modern LeRobot datasets), which vllm's libav build
|
||||||
|
cannot decode.
|
||||||
"""
|
"""
|
||||||
import subprocess # noqa: PLC0415
|
import subprocess # noqa: PLC0415
|
||||||
|
|
||||||
@@ -253,12 +256,19 @@ def episode_clip_path(
|
|||||||
f"{to_timestamp:.3f}",
|
f"{to_timestamp:.3f}",
|
||||||
"-i",
|
"-i",
|
||||||
str(src),
|
str(src),
|
||||||
"-c",
|
"-c:v",
|
||||||
"copy",
|
"libx264",
|
||||||
|
"-preset",
|
||||||
|
"ultrafast",
|
||||||
|
"-crf",
|
||||||
|
"23",
|
||||||
|
"-pix_fmt",
|
||||||
|
"yuv420p",
|
||||||
|
"-an",
|
||||||
str(out_path),
|
str(out_path),
|
||||||
]
|
]
|
||||||
try:
|
try:
|
||||||
subprocess.run(cmd, check=True, timeout=120)
|
subprocess.run(cmd, check=True, timeout=300)
|
||||||
except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
|
except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
|
||||||
return None
|
return None
|
||||||
return out_path if out_path.exists() and out_path.stat().st_size > 0 else None
|
return out_path if out_path.exists() and out_path.stat().st_size > 0 else None
|
||||||
|
|||||||
@@ -380,10 +380,13 @@ def _make_openai_client(config: VlmConfig) -> VlmClient:
|
|||||||
"max_tokens": max_tok,
|
"max_tokens": max_tok,
|
||||||
"temperature": temp,
|
"temperature": temp,
|
||||||
}
|
}
|
||||||
|
extra_body: dict[str, Any] = {}
|
||||||
if send_mm_kwargs and mm_kwargs:
|
if send_mm_kwargs and mm_kwargs:
|
||||||
kwargs["extra_body"] = {
|
extra_body["mm_processor_kwargs"] = {**mm_kwargs, "do_sample_frames": True}
|
||||||
"mm_processor_kwargs": {**mm_kwargs, "do_sample_frames": True}
|
if config.chat_template_kwargs:
|
||||||
}
|
extra_body["chat_template_kwargs"] = config.chat_template_kwargs
|
||||||
|
if extra_body:
|
||||||
|
kwargs["extra_body"] = extra_body
|
||||||
with rr_lock:
|
with rr_lock:
|
||||||
chosen = clients[rr_counter["i"] % len(clients)]
|
chosen = clients[rr_counter["i"] % len(clients)]
|
||||||
rr_counter["i"] += 1
|
rr_counter["i"] += 1
|
||||||
@@ -453,10 +456,12 @@ def _spawn_parallel_inference_servers(config: VlmConfig) -> list[str]:
|
|||||||
f"--uvicorn-log-level warning"
|
f"--uvicorn-log-level warning"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
num_gpus = config.num_gpus if config.num_gpus > 0 else n
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
port = config.serve_port + i
|
port = config.serve_port + i
|
||||||
|
gpu = i % num_gpus
|
||||||
env = _os.environ.copy()
|
env = _os.environ.copy()
|
||||||
env["CUDA_VISIBLE_DEVICES"] = str(i)
|
env["CUDA_VISIBLE_DEVICES"] = str(gpu)
|
||||||
cmd = base_cmd
|
cmd = base_cmd
|
||||||
if "{port}" in cmd:
|
if "{port}" in cmd:
|
||||||
cmd = cmd.replace("{port}", str(port))
|
cmd = cmd.replace("{port}", str(port))
|
||||||
@@ -464,7 +469,7 @@ def _spawn_parallel_inference_servers(config: VlmConfig) -> list[str]:
|
|||||||
cmd = f"{cmd} --port {port}"
|
cmd = f"{cmd} --port {port}"
|
||||||
api_base = f"http://localhost:{port}/v1"
|
api_base = f"http://localhost:{port}/v1"
|
||||||
api_bases.append(api_base)
|
api_bases.append(api_base)
|
||||||
print(f"[server-{i}] launching on GPU {i} port {port}: {cmd}", flush=True)
|
print(f"[server-{i}] launching on GPU {gpu} port {port}: {cmd}", flush=True)
|
||||||
proc = subprocess.Popen(
|
proc = subprocess.Popen(
|
||||||
shlex.split(cmd),
|
shlex.split(cmd),
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
|
|||||||
Reference in New Issue
Block a user