Merge branch 'feat/language-annotation-pipeline' into feat/smolvla-on-steerable

Resolves conflicts from 66 commits on the base branch: * pyproject.toml — keep base's transformers>=5.4.0,<5.6.0; add the sentencepiece-dep entry pi052 (FAST action tokenizer) needs. * policies/__init__.py — keep pi052 export; drop the RewardClassifierConfig export that base removed. * policies/factory.py — docstring list resolution (keep pi052; drop reward_classifier, removed by base). * annotations/steerable_pipeline/executor.py — adopt base's renamed _ensure_annotation_metadata_in_info (it already advertises the say tool); drop pi052's older _ensure_tools_in_info call. * configs/train.py — keep pi052's vqa_target_fraction; adopt base's SampleWeightingConfig (legacy RA-BC inline params already covered by the migration shim base added). * scripts/lerobot_train.py — merge pi052's per-policy processor rebuild + dataset_repo_id pass-through with base's active_cfg / is_reward_model_training tightening, and re-route vqa-weighted sampler to active_cfg.drop_n_last_frames. * datasets/language_render.py — adopt base's _select_one + timestamp tolerance (drops pi052's stale _select_latest / per-style sort_key). * tests — adopt base's parametrized per-camera blend + tolerance test; drop pi052 tests that overlap with base's tighter rewrites; keep pi052's flow-only / VQA-blend coverage; add a test_canonical_recipe_loads check on subtask_mem_vqa_speech.yaml. * policies/pi052/processor_pi052.py — import RenderMessagesStep directly from render_messages_processor (base intentionally dropped it from lerobot.processor's re-exports). * uv.lock — regenerated cleanly from base + pi052's pocket-tts / beartype. All 67 touched tests pass (30 pi052 + 37 recipe / language-render / pipeline / render-messages). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-07-23 01:41:54 +00:00 · 2026-05-25 14:47:09 +02:00
parent 67bdf4690e 471b2b1b1d
commit 1ff10b935c
210 changed files with 14334 additions and 5728 deletions
@@ -128,7 +128,7 @@ dataset_viz = ["lerobot[dataset]", "lerobot[viz]"]
 av-dep = ["av>=15.0.0,<16.0.0"]
 pygame-dep = ["pygame>=2.5.1,<2.7.0"]
 placo-dep = ["placo>=0.9.6,<0.9.17"]
-transformers-dep = ["transformers==5.3.0"] # TODO(Steven): https://github.com/huggingface/lerobot/pull/3249
+transformers-dep = ["transformers>=5.4.0,<5.6.0"]
 sentencepiece-dep = ["sentencepiece>=0.2.0,<0.3.0"] # FAST action tokenizer backend (pi052, pi0_fast)
 grpcio-dep = ["grpcio==1.73.1", "protobuf>=6.31.1,<6.32.0"]
 can-dep = ["python-can>=4.2.0,<5.0.0"]
@@ -195,18 +195,22 @@ groot = [
 ]
 sarm = ["lerobot[transformers-dep]", "pydantic>=2.0.0,<3.0.0", "faker>=33.0.0,<35.0.0", "lerobot[matplotlib-dep]", "lerobot[qwen-vl-utils-dep]"]
 xvla = ["lerobot[transformers-dep]"]
+eo1 = ["lerobot[transformers-dep]", "lerobot[qwen-vl-utils-dep]"]
 hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpcio-dep]", "lerobot[placo-dep]"]

 # Features
 async = ["lerobot[grpcio-dep]", "lerobot[matplotlib-dep]"]
 peft = ["lerobot[transformers-dep]", "lerobot[peft-dep]"]

-# Annotation pipeline (lerobot-annotate). datatrove is mandatory; vllm is
-# the preferred backend on Linux, with a transformers fallback elsewhere.
+# Annotation pipeline (lerobot-annotate). vllm is the preferred backend
+# on Linux, with a transformers fallback elsewhere; openai is the default
+# backend and talks to any OpenAI-compatible server (``vllm serve`` /
+# ``transformers serve`` / hosted endpoints). Distributed execution is
+# delegated to Hugging Face Jobs (see examples/annotations/run_hf_job.py).
 annotations = [
    "lerobot[dataset]",
    "lerobot[transformers-dep]",
-    "datatrove>=0.4.0,<2.0.0",
+    "openai>=1.40,<2.0",
    "vllm>=0.6.0,<1.0.0; sys_platform == 'linux'",
 ]

@@ -308,6 +312,7 @@ lerobot-imgtransform-viz="lerobot.scripts.lerobot_imgtransform_viz:main"
 lerobot-edit-dataset="lerobot.scripts.lerobot_edit_dataset:main"
 lerobot-setup-can="lerobot.scripts.lerobot_setup_can:main"
 lerobot-annotate="lerobot.scripts.lerobot_annotate:main"
+lerobot-rollout="lerobot.scripts.lerobot_rollout:main"
 # Interactive hierarchical-VLA runtime for PI052 (PaliGemma backbone).
 lerobot-pi052-runtime="lerobot.scripts.lerobot_pi052_runtime:main"