Files
lerobot/tests/configs/test_recipe.py
T
Pepijn 1ff10b935c Merge branch 'feat/language-annotation-pipeline' into feat/smolvla-on-steerable
Resolves conflicts from 66 commits on the base branch:

* pyproject.toml — keep base's transformers>=5.4.0,<5.6.0; add the
  sentencepiece-dep entry pi052 (FAST action tokenizer) needs.
* policies/__init__.py — keep pi052 export; drop the
  RewardClassifierConfig export that base removed.
* policies/factory.py — docstring list resolution (keep pi052; drop
  reward_classifier, removed by base).
* annotations/steerable_pipeline/executor.py — adopt base's renamed
  _ensure_annotation_metadata_in_info (it already advertises the say
  tool); drop pi052's older _ensure_tools_in_info call.
* configs/train.py — keep pi052's vqa_target_fraction; adopt base's
  SampleWeightingConfig (legacy RA-BC inline params already covered
  by the migration shim base added).
* scripts/lerobot_train.py — merge pi052's per-policy processor
  rebuild + dataset_repo_id pass-through with base's active_cfg /
  is_reward_model_training tightening, and re-route vqa-weighted
  sampler to active_cfg.drop_n_last_frames.
* datasets/language_render.py — adopt base's _select_one + timestamp
  tolerance (drops pi052's stale _select_latest / per-style sort_key).
* tests — adopt base's parametrized per-camera blend + tolerance
  test; drop pi052 tests that overlap with base's tighter rewrites;
  keep pi052's flow-only / VQA-blend coverage; add a
  test_canonical_recipe_loads check on subtask_mem_vqa_speech.yaml.
* policies/pi052/processor_pi052.py — import RenderMessagesStep
  directly from render_messages_processor (base intentionally
  dropped it from lerobot.processor's re-exports).
* uv.lock — regenerated cleanly from base + pi052's pocket-tts /
  beartype.

All 67 touched tests pass (30 pi052 + 37 recipe / language-render /
pipeline / render-messages).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 14:47:09 +02:00

178 lines
6.4 KiB
Python

#!/usr/bin/env python
from pathlib import Path
from textwrap import dedent
import pytest
from lerobot.configs.recipe import MessageTurn, TrainingRecipe, load_recipe
def _minimal_message_turn(content: str = "${task}") -> MessageTurn:
return MessageTurn(role="user", content=content, stream="high_level")
def _minimal_target_turn() -> MessageTurn:
return MessageTurn(role="assistant", content="ok", stream="high_level", target=True)
# ── Message-recipe validation ────────────────────────────────────────
def test_message_recipe_validates_unknown_binding():
with pytest.raises(ValueError, match="unknown binding"):
TrainingRecipe(
messages=[
MessageTurn(role="user", content="${missing}", stream="high_level"),
_minimal_target_turn(),
]
)
def test_canonical_recipe_loads():
"""The canonical PI052 blend YAML loads + validates."""
recipe = TrainingRecipe.from_yaml(
Path("src/lerobot/configs/recipes/subtask_mem_vqa_speech.yaml")
)
assert recipe.blend is not None
assert sum(c.weight for c in recipe.blend.values()) == pytest.approx(1.0)
def test_message_turn_requires_a_stream():
"""Every turn must declare a stream — None is rejected at construction.
Previously this only failed at render time (``_validate_rendered``);
catching it here means a malformed recipe YAML errors at load instead
of at the first training sample.
"""
with pytest.raises(ValueError, match="missing a stream"):
MessageTurn(role="user", content="${task}")
def test_message_recipe_requires_at_least_one_target():
with pytest.raises(ValueError, match="target"):
TrainingRecipe(
messages=[
_minimal_message_turn(),
MessageTurn(role="assistant", content="no target", stream="high_level"),
]
)
def test_recipe_rejects_both_messages_and_blend():
with pytest.raises(ValueError, match="only one"):
TrainingRecipe(
messages=[_minimal_message_turn(), _minimal_target_turn()],
blend={"a": TrainingRecipe(weight=1.0, messages=[_minimal_target_turn()])},
)
def test_recipe_rejects_neither_messages_nor_blend():
with pytest.raises(ValueError, match="must set one"):
TrainingRecipe()
# ── Blend validation ─────────────────────────────────────────────────
def test_blend_must_be_non_empty():
with pytest.raises(ValueError, match="at least one component"):
TrainingRecipe(blend={})
def test_blend_component_must_define_weight():
with pytest.raises(ValueError, match="weight"):
TrainingRecipe(blend={"a": TrainingRecipe(messages=[_minimal_target_turn()])})
def test_blend_component_weight_must_be_positive():
with pytest.raises(ValueError, match="positive weight"):
TrainingRecipe(blend={"a": TrainingRecipe(weight=0.0, messages=[_minimal_target_turn()])})
def test_blend_component_must_define_messages():
# A bare TrainingRecipe(weight=1.0) would itself raise; build it without
# going through __post_init__ to exercise the blend-level validator.
bad = TrainingRecipe.__new__(TrainingRecipe)
bad.messages = None
bad.bindings = None
bad.blend = None
bad.weight = 1.0
with pytest.raises(ValueError, match="must define messages"):
TrainingRecipe(blend={"a": bad})
def test_blend_components_cannot_themselves_define_a_blend():
inner = TrainingRecipe(blend={"x": TrainingRecipe(weight=1.0, messages=[_minimal_target_turn()])})
# Force-bypass the inner component's normal validation so the test
# exercises the outer blend's "no nested blends" rule directly.
nested = TrainingRecipe.__new__(TrainingRecipe)
nested.messages = None
nested.bindings = None
nested.blend = inner.blend
nested.weight = 1.0
with pytest.raises(ValueError, match="cannot itself define a blend"):
TrainingRecipe(blend={"outer": nested})
# ── from_dict / from_yaml round-trips ────────────────────────────────
def test_from_dict_with_nested_blend():
recipe = TrainingRecipe.from_dict(
{
"blend": {
"a": {
"weight": 1.0,
"messages": [
{"role": "user", "content": "${task}", "stream": "high_level"},
{"role": "assistant", "content": "a", "stream": "high_level", "target": True},
],
},
"b": {
"weight": 2.0,
"messages": [
{"role": "user", "content": "${task}", "stream": "high_level"},
{"role": "assistant", "content": "b", "stream": "high_level", "target": True},
],
},
}
}
)
assert recipe.blend is not None
assert set(recipe.blend) == {"a", "b"}
assert recipe.blend["b"].weight == 2.0
# Inner messages were promoted to MessageTurn instances.
assert isinstance(recipe.blend["a"].messages[0], MessageTurn)
def test_from_yaml_round_trips_through_load_recipe(tmp_path: Path):
yaml_text = dedent(
"""
bindings:
custom: "active_at(t, style=subtask)"
messages:
- {role: user, content: "${task}: ${custom}", stream: high_level}
- {role: assistant, content: "ok", stream: high_level, target: true}
"""
).strip()
path = tmp_path / "recipe.yaml"
path.write_text(yaml_text)
via_classmethod = TrainingRecipe.from_yaml(path)
via_helper = load_recipe(path)
assert via_classmethod.bindings == {"custom": "active_at(t, style=subtask)"}
assert via_classmethod.messages[1].target is True
# ``load_recipe`` is just a wrapper, but assert the two paths agree
# on the structural result so a future divergence is caught here.
assert via_helper.bindings == via_classmethod.bindings
assert len(via_helper.messages) == len(via_classmethod.messages)
def test_from_yaml_rejects_non_mapping(tmp_path: Path):
path = tmp_path / "bad.yaml"
path.write_text("- just\n- a\n- list\n")
with pytest.raises(ValueError, match="mapping at the top level"):
TrainingRecipe.from_yaml(path)