Merge remote-tracking branch 'origin/main' into feat/language-annotation-pipeline

# Conflicts:
#	uv.lock
This commit is contained in:
Pepijn
2026-06-02 17:36:07 +02:00
46 changed files with 17465 additions and 18 deletions
File diff suppressed because it is too large Load Diff
+340
View File
@@ -0,0 +1,340 @@
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for Robometer reward model."""
from __future__ import annotations
from types import SimpleNamespace
import pytest
import torch
from lerobot.configs.rewards import RewardModelConfig
from lerobot.rewards.factory import get_reward_model_class, make_reward_model_config
from lerobot.rewards.robometer import RobometerConfig
from lerobot.rewards.robometer.configuration_robometer import ROBOMETER_SPECIAL_TOKENS
from lerobot.rewards.robometer.modeling_robometer import (
ROBOMETER_FEATURE_PREFIX,
convert_bins_to_continuous,
decode_progress_outputs,
)
from tests.utils import skip_if_package_missing
# Length of the fake tokenizer used in `_patch_build`. The deterministic
# resize target derived in ``RobometerConfig.__post_init__`` is therefore
# ``_FAKE_TOKENIZER_LEN + len(ROBOMETER_SPECIAL_TOKENS)``.
_FAKE_TOKENIZER_LEN = 100
_EXPECTED_RESIZED_VOCAB = _FAKE_TOKENIZER_LEN + len(ROBOMETER_SPECIAL_TOKENS)
class _FakeQwenConfig:
"""Stand-in for a Qwen3-VL config (the `model.config` attribute).
``to_dict`` matches HF's ``PretrainedConfig.to_dict`` closely enough for
``RobometerConfig.__post_init__`` to snapshot a meaningful ``vlm_config``
into the saved ``config.json`` and for the reload path to round-trip
through ``AutoConfig.for_model``.
"""
def __init__(self, hidden_dim: int = 8, vocab_size: int = _FAKE_TOKENIZER_LEN) -> None:
# `vocab_size` here is the *pre-resize* value the fake backbone advertises.
# `__post_init__` is expected to overwrite it with `len(tokenizer) + 5`.
self.text_config = SimpleNamespace(hidden_size=hidden_dim, vocab_size=vocab_size)
self._hidden_dim = hidden_dim
self._vocab_size = vocab_size
def to_dict(self) -> dict:
return {
"model_type": "fake_qwen",
"text_config": {
"hidden_size": self._hidden_dim,
"vocab_size": self._vocab_size,
},
}
class _FakeEmbeddings(torch.nn.Module):
def __init__(self, num_embeddings: int = _FAKE_TOKENIZER_LEN) -> None:
super().__init__()
self.num_embeddings = num_embeddings
class _FakeBaseModel(torch.nn.Module):
"""Stand-in for the Qwen3-VL backbone during tests.
Provides the minimum surface `RobometerRewardModel.__init__` and
`_compute_rbm_logits` rely on: a `parameters()` iterator (for dtype +
device), a `config.text_config.hidden_size`, a `config.to_dict()` so
`_save_pretrained` can snapshot `vlm_config`,
`get_input_embeddings()` / `resize_token_embeddings()` so the fresh-init
embed resize is a no-op, and a forward that returns a `SimpleNamespace`
with a `hidden_states` tuple.
"""
def __init__(self, hidden_dim: int = 8) -> None:
super().__init__()
self._param = torch.nn.Parameter(torch.zeros(1))
self.hidden_dim = hidden_dim
self.config = _FakeQwenConfig(hidden_dim)
self._embeddings = _FakeEmbeddings()
def get_input_embeddings(self) -> _FakeEmbeddings:
return self._embeddings
def resize_token_embeddings(self, new_size: int) -> None:
self._embeddings.num_embeddings = new_size
def forward(self, **kwargs): # noqa: ARG002 - intentional kwargs sink
input_ids = kwargs["input_ids"]
return SimpleNamespace(
hidden_states=(torch.zeros(input_ids.shape[0], input_ids.shape[1], self.hidden_dim),),
last_hidden_state=torch.zeros(input_ids.shape[0], input_ids.shape[1], self.hidden_dim),
)
class _FakeTokenizer:
"""Minimal stand-in for an HF tokenizer.
``RobometerConfig.__post_init__`` uses ``len(tokenizer)`` to compute the
deterministic resize target ``len(tokenizer) + len(ROBOMETER_SPECIAL_TOKENS)``,
so a working ``__len__`` is all we need.
"""
def __init__(self, length: int = _FAKE_TOKENIZER_LEN) -> None:
self._length = length
def __len__(self) -> int:
return self._length
def _patch_build(monkeypatch) -> None:
"""Stub out the HF AutoX calls so Robometer construction stays cheap in tests.
Covers (EO-1 style no model-side override hooks):
* ``AutoConfig.from_pretrained`` (config side) used by
``RobometerConfig.__post_init__`` to snapshot the backbone config.
* ``AutoTokenizer.from_pretrained`` (config side) used by
``__post_init__`` to compute ``len(tokenizer) + 5``.
* ``AutoConfig.for_model`` used by
``RobometerConfig.vlm_backbone_config`` when rebuilding for ``from_config``.
* ``AutoModelForImageTextToText.from_pretrained`` fresh-training path
(``pretrained_path is None``).
* ``AutoModelForImageTextToText.from_config`` checkpoint-reload path
(``pretrained_path`` is set).
"""
from lerobot.rewards.robometer import configuration_robometer, modeling_robometer
monkeypatch.setattr(
modeling_robometer.AutoModelForImageTextToText,
"from_pretrained",
lambda *args, **kwargs: _FakeBaseModel(hidden_dim=8),
)
monkeypatch.setattr(
modeling_robometer.AutoModelForImageTextToText,
"from_config",
lambda *args, **kwargs: _FakeBaseModel(hidden_dim=8),
)
monkeypatch.setattr(
configuration_robometer.AutoConfig,
"for_model",
lambda *args, **kwargs: _FakeQwenConfig(hidden_dim=8),
)
monkeypatch.setattr(
configuration_robometer.AutoConfig,
"from_pretrained",
lambda *args, **kwargs: _FakeQwenConfig(hidden_dim=8),
)
monkeypatch.setattr(
configuration_robometer.AutoTokenizer,
"from_pretrained",
lambda *args, **kwargs: _FakeTokenizer(length=_FAKE_TOKENIZER_LEN),
)
def _make_batch(features: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
"""Build a `compute_reward`-ready batch using Robometer's namespaced keys."""
return {f"{ROBOMETER_FEATURE_PREFIX}{key}": value for key, value in features.items()}
@skip_if_package_missing("transformers")
def test_robometer_config_registered(monkeypatch):
_patch_build(monkeypatch)
assert "robometer" in RewardModelConfig.get_known_choices()
assert RewardModelConfig.get_choice_class("robometer") is RobometerConfig
assert isinstance(make_reward_model_config("robometer", device="cpu"), RobometerConfig)
def test_robometer_factory_returns_in_tree_class():
from lerobot.rewards.robometer.modeling_robometer import RobometerRewardModel
assert get_reward_model_class("robometer") is RobometerRewardModel
def test_convert_bins_to_continuous_returns_expected_values():
# Two frames: first peaks at bin 0 (center 0.0), second peaks at bin 9 (center 1.0).
bin_logits = torch.full((2, 10), -10.0)
bin_logits[0, 0] = 10.0
bin_logits[1, -1] = 10.0
values = convert_bins_to_continuous(bin_logits)
assert values.shape == (2,)
assert torch.allclose(values, torch.tensor([0.0, 1.0]), atol=1e-3)
def test_decode_progress_outputs_returns_last_frame_values():
progress = torch.tensor([[0.1, 0.9], [0.4, 0.6]])
success_logits = torch.tensor([[0.0, 5.0], [0.0, -5.0]])
outputs = decode_progress_outputs(progress, success_logits, is_discrete_mode=False)
assert outputs["progress_pred"] == [pytest.approx([0.1, 0.9]), pytest.approx([0.4, 0.6])]
assert outputs["success_probs"][0][-1] == pytest.approx(torch.sigmoid(torch.tensor(5.0)).item(), abs=1e-3)
assert outputs["success_probs"][1][-1] == pytest.approx(
torch.sigmoid(torch.tensor(-5.0)).item(), abs=1e-3
)
def test_decode_progress_outputs_discrete_mode_softmaxes_over_bins():
# 2 frames, peaks at bin 0 and bin 9 → continuous predictions 0.0 and 1.0
bin_logits = torch.full((1, 2, 10), -10.0)
bin_logits[0, 0, 0] = 10.0
bin_logits[0, 1, -1] = 10.0
outputs = decode_progress_outputs(bin_logits, success_logits=None, is_discrete_mode=True)
assert outputs["success_probs"] == []
assert outputs["progress_pred"][0] == pytest.approx([0.0, 1.0], abs=1e-3)
@skip_if_package_missing("transformers")
def test_robometer_post_init_overwrites_vocab_size_with_tokenizer_length(monkeypatch):
"""``RobometerConfig.__post_init__`` must overwrite the backbone's stale
``text_config.vocab_size`` (which on the real Qwen3-VL config is the
padded embedding size, ``151,936``) with ``len(tokenizer) + 5``. This is
the contract that makes the published ``Robometer-4B`` checkpoint load
byte-equivalently."""
_patch_build(monkeypatch)
cfg = RobometerConfig(device="cpu", progress_loss_type="l2")
assert cfg.vlm_config["text_config"]["vocab_size"] == _EXPECTED_RESIZED_VOCAB
@skip_if_package_missing("transformers")
def test_robometer_compute_reward_reads_pre_encoded_inputs(monkeypatch):
from lerobot.rewards.robometer.modeling_robometer import RobometerRewardModel
progress = torch.tensor([[0.1, 0.9], [0.4, 0.6]])
success_logits = torch.tensor([[0.0, 5.0], [0.0, -5.0]])
_patch_build(monkeypatch)
cfg = RobometerConfig(device="cpu", reward_output="progress", progress_loss_type="l2")
model = RobometerRewardModel(cfg)
# Bypass the Qwen3-VL forward + head extraction with deterministic logits.
monkeypatch.setattr(model, "_compute_rbm_logits", lambda _inputs: (progress, success_logits))
batch = _make_batch({"input_ids": torch.zeros(2, 2, dtype=torch.long)})
rewards = model.compute_reward(batch)
assert torch.allclose(rewards, torch.tensor([0.9, 0.6]))
@skip_if_package_missing("transformers")
def test_robometer_compute_reward_can_return_binary_success(monkeypatch):
from lerobot.rewards.robometer.modeling_robometer import RobometerRewardModel
progress = torch.tensor([[0.1, 0.9], [0.4, 0.6]])
success_logits = torch.tensor([[0.0, 5.0], [0.0, -5.0]]) # sigmoid(5) > 0.5; sigmoid(-5) < 0.5
_patch_build(monkeypatch)
cfg = RobometerConfig(
device="cpu",
reward_output="success",
success_threshold=0.5,
progress_loss_type="l2",
)
model = RobometerRewardModel(cfg)
monkeypatch.setattr(model, "_compute_rbm_logits", lambda _inputs: (progress, success_logits))
batch = _make_batch({"input_ids": torch.zeros(2, 2, dtype=torch.long)})
rewards = model.compute_reward(batch)
assert torch.equal(rewards, torch.tensor([1.0, 0.0]))
@skip_if_package_missing("transformers")
def test_robometer_compute_reward_errors_when_inputs_missing(monkeypatch):
from lerobot.rewards.robometer.modeling_robometer import RobometerRewardModel
_patch_build(monkeypatch)
cfg = RobometerConfig(device="cpu", progress_loss_type="l2")
model = RobometerRewardModel(cfg)
with pytest.raises(KeyError, match=r"observation\.robometer\.input_ids"):
model.compute_reward({})
@skip_if_package_missing("transformers")
def test_robometer_save_pretrained_roundtrips(monkeypatch, tmp_path):
"""Saving and reloading a Robometer model in LeRobot HF format must produce
a single ``model.safetensors`` + ``config.json`` (no Hydra ``config.yaml``),
must round-trip user-tunable config fields, and must persist all three
prediction heads (``progress_head``, ``success_head``, ``preference_head``)
so the published ``Robometer-4B`` checkpoint loads byte-equivalently.
"""
from huggingface_hub.constants import CONFIG_NAME, SAFETENSORS_SINGLE_FILE
from safetensors.torch import load_file
from lerobot.rewards.robometer.modeling_robometer import RobometerRewardModel
_patch_build(monkeypatch)
cfg = RobometerConfig(
device="cpu",
pretrained_path="robometer/Robometer-4B",
# Knobs the user might tweak — must survive the round-trip.
image_key="observation.images.cam_top",
task_key="task",
reward_output="success",
success_threshold=0.7,
progress_loss_type="l2",
)
model = RobometerRewardModel(cfg)
model.save_pretrained(str(tmp_path))
# Exactly the files LeRobot's HubMixin promises.
assert (tmp_path / CONFIG_NAME).exists()
assert (tmp_path / SAFETENSORS_SINGLE_FILE).exists()
assert not (tmp_path / "config.yaml").exists() # we want HF-style, not Hydra
# All three heads must be present in the saved safetensors. The preference
# head is unused at inference but the published checkpoint expects its
# rows — losing it would silently break weight loading.
state = load_file(str(tmp_path / SAFETENSORS_SINGLE_FILE))
assert any(k.startswith("progress_head.") for k in state), "progress_head weights missing"
assert any(k.startswith("success_head.") for k in state), "success_head weights missing"
assert any(k.startswith("preference_head.") for k in state), "preference_head weights missing"
# Reload from the local directory: no Hub fetch, no YAML overlay. The
# base class drives subclass dispatch via the `type` field in config.json.
reloaded_cfg = RewardModelConfig.from_pretrained(str(tmp_path))
assert isinstance(reloaded_cfg, RobometerConfig)
reloaded_cfg.pretrained_path = str(tmp_path) # mimic lerobot-train's `validate()`
reloaded = RobometerRewardModel.from_pretrained(str(tmp_path), config=reloaded_cfg)
assert reloaded.config.image_key == "observation.images.cam_top"
assert reloaded.config.task_key == "task"
assert reloaded.config.reward_output == "success"
assert reloaded.config.success_threshold == 0.7
assert reloaded.config.progress_loss_type == "l2" # came back from config.json
+296
View File
@@ -0,0 +1,296 @@
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the TOPReward reward model."""
from __future__ import annotations
from types import SimpleNamespace
import pytest
import torch
from lerobot.configs.rewards import RewardModelConfig
from lerobot.rewards.factory import get_reward_model_class, make_reward_model_config
from lerobot.rewards.topreward import TOPRewardConfig
from lerobot.rewards.topreward.processor_topreward import TOPREWARD_FEATURE_PREFIX, TOPREWARD_INPUT_KEYS
from tests.utils import skip_if_package_missing
class _FakeQwenModel(torch.nn.Module):
"""Stand-in for ``Qwen3VLForConditionalGeneration``.
Returns a ``SimpleNamespace`` with ``logits`` of a controlled shape so
the log-prob extraction path in ``compute_reward`` can be exercised
without downloading real VLM weights.
"""
def __init__(self) -> None:
super().__init__()
self._param = torch.nn.Parameter(torch.zeros(1))
self._reward_value: float = -1.5
@classmethod
def from_pretrained(cls, *args, **kwargs): # noqa: ARG003
return cls()
def forward( # noqa: ARG002
self, input_ids, attention_mask=None, labels=None, logits_to_keep=0, **kwargs
):
batch_size, seq_len = input_ids.shape
vocab_size = 1000
logits = torch.zeros(batch_size, seq_len, vocab_size)
# Place a controlled log-prob at the target token position so the
# model returns a predictable reward value.
# The label-masked suffix is the last token.
# After the causal-LM shift (logits[:, :-1], labels[:, 1:]) the scored
# position is logits[:, -2, :] predicting labels[:, -1].
# We set logits so that log_softmax at the target token ≈ _reward_value.
for i in range(batch_size):
target_idx = int(input_ids[i, -1].item())
logits[i, -2, target_idx] = self._reward_value * -10 # high logit -> high log-prob
if logits_to_keep:
logits = logits[:, -logits_to_keep:, :]
return SimpleNamespace(logits=logits)
def _patch_build(monkeypatch) -> None:
"""Stub out HF AutoX so TOPReward construction is cheap and offline."""
from lerobot.rewards.topreward import modeling_topreward
monkeypatch.setattr(modeling_topreward, "Qwen3VLForConditionalGeneration", _FakeQwenModel)
def _make_batch(
input_ids: torch.Tensor,
attention_mask: torch.Tensor | None = None,
labels: torch.Tensor | None = None,
*,
omit: str | None = None,
) -> dict[str, torch.Tensor]:
"""Build a ``compute_reward``-ready batch using TOPReward's namespaced keys."""
batch_size, seq_len = input_ids.shape
if attention_mask is None:
attention_mask = torch.ones(batch_size, seq_len, dtype=torch.long)
batch: dict[str, torch.Tensor] = {}
if labels is not None:
batch[f"{TOPREWARD_FEATURE_PREFIX}labels"] = labels
batch.update(
{
f"{TOPREWARD_FEATURE_PREFIX}input_ids": input_ids,
f"{TOPREWARD_FEATURE_PREFIX}attention_mask": attention_mask,
f"{TOPREWARD_FEATURE_PREFIX}pixel_values_videos": torch.zeros(
batch_size, 1536, dtype=torch.float32
),
f"{TOPREWARD_FEATURE_PREFIX}video_grid_thw": torch.ones(batch_size, 3, dtype=torch.long),
f"{TOPREWARD_FEATURE_PREFIX}mm_token_type_ids": torch.zeros_like(input_ids),
}
)
if omit is not None:
batch.pop(f"{TOPREWARD_FEATURE_PREFIX}{omit}", None)
return batch
def _terminal_labels(input_ids: torch.Tensor) -> torch.Tensor:
labels = torch.full_like(input_ids, -100)
labels[:, -1] = input_ids[:, -1]
return labels
# ---------------------------------------------------------------------------
# Registry + factory
# ---------------------------------------------------------------------------
def test_topreward_config_registered():
assert "topreward" in RewardModelConfig.get_known_choices()
assert RewardModelConfig.get_choice_class("topreward") is TOPRewardConfig
assert isinstance(make_reward_model_config("topreward", device="cpu"), TOPRewardConfig)
def test_topreward_factory_returns_in_tree_class():
from lerobot.rewards.topreward.modeling_topreward import TOPRewardModel
assert get_reward_model_class("topreward") is TOPRewardModel
# ---------------------------------------------------------------------------
# Config validation
# ---------------------------------------------------------------------------
def test_topreward_config_rejects_zero_max_frames():
with pytest.raises(ValueError, match="max_frames must be >= 1"):
TOPRewardConfig(device="cpu", max_frames=0)
def test_topreward_config_rejects_non_positive_fps():
with pytest.raises(ValueError, match="fps must be > 0"):
TOPRewardConfig(device="cpu", fps=0.0)
def test_topreward_config_rejects_suffix_without_instruction_placeholder():
with pytest.raises(ValueError, match=r"\{instruction\}"):
TOPRewardConfig(device="cpu", prompt_suffix_template="no placeholder here")
# ---------------------------------------------------------------------------
# compute_reward
# ---------------------------------------------------------------------------
@skip_if_package_missing("transformers")
def test_topreward_compute_reward_returns_one_scalar_per_sample(monkeypatch):
"""``compute_reward`` must return a ``(B,)`` float32 tensor with one
log-prob reward per sample, consuming pre-encoded Qwen-VL tensors."""
from lerobot.rewards.topreward.modeling_topreward import TOPRewardModel
_patch_build(monkeypatch)
cfg = TOPRewardConfig(device="cpu")
model = TOPRewardModel(cfg)
input_ids = torch.randint(0, 100, (2, 10))
attention_mask = torch.ones(2, 10, dtype=torch.long)
labels = _terminal_labels(input_ids)
batch = _make_batch(input_ids, attention_mask, labels)
rewards = model.compute_reward(batch)
assert rewards.shape == (2,)
assert rewards.dtype == torch.float32
@skip_if_package_missing("transformers")
def test_topreward_compute_reward_applies_success_threshold(monkeypatch):
"""When ``success_threshold`` is finite, the model returns binary success."""
from lerobot.rewards.topreward.modeling_topreward import TOPRewardModel
_patch_build(monkeypatch)
cfg = TOPRewardConfig(device="cpu", success_threshold=0.0)
model = TOPRewardModel(cfg)
input_ids = torch.randint(0, 100, (2, 10))
attention_mask = torch.ones(2, 10, dtype=torch.long)
labels = _terminal_labels(input_ids)
batch = _make_batch(input_ids, attention_mask, labels)
rewards = model.compute_reward(batch)
assert rewards.shape == (2,)
assert set(rewards.tolist()).issubset({0.0, 1.0})
@skip_if_package_missing("transformers")
def test_topreward_compute_reward_errors_when_inputs_missing(monkeypatch):
from lerobot.rewards.topreward.modeling_topreward import TOPRewardModel
_patch_build(monkeypatch)
cfg = TOPRewardConfig(device="cpu")
model = TOPRewardModel(cfg)
with pytest.raises(KeyError, match=r"observation\.topreward\.input_ids"):
model.compute_reward(_make_batch(torch.randint(0, 100, (1, 10)), omit="input_ids"))
@skip_if_package_missing("transformers")
def test_topreward_compute_reward_errors_when_labels_missing(monkeypatch):
from lerobot.rewards.topreward.modeling_topreward import TOPRewardModel
_patch_build(monkeypatch)
cfg = TOPRewardConfig(device="cpu")
model = TOPRewardModel(cfg)
input_ids = torch.randint(0, 100, (1, 10))
with pytest.raises(KeyError, match=r"observation\.topreward\.labels"):
model.compute_reward(_make_batch(input_ids, labels=None))
@skip_if_package_missing("transformers")
def test_topreward_compute_reward_requires_all_encoder_keys(monkeypatch):
from lerobot.rewards.topreward.modeling_topreward import TOPRewardModel
_patch_build(monkeypatch)
cfg = TOPRewardConfig(device="cpu")
model = TOPRewardModel(cfg)
input_ids = torch.randint(0, 100, (1, 10))
labels = _terminal_labels(input_ids)
required_encoder_keys = set(TOPREWARD_INPUT_KEYS) - {"input_ids", "labels"}
for key in required_encoder_keys:
with pytest.raises(KeyError, match=rf"observation\.topreward\.{key}"):
model.compute_reward(_make_batch(input_ids, labels=labels, omit=key))
# ---------------------------------------------------------------------------
# Save / load — config-only checkpoint
# ---------------------------------------------------------------------------
@skip_if_package_missing("transformers")
def test_topreward_save_pretrained_writes_only_config_json(monkeypatch, tmp_path):
from huggingface_hub.constants import CONFIG_NAME, SAFETENSORS_SINGLE_FILE
from lerobot.rewards.topreward.modeling_topreward import TOPRewardModel
_patch_build(monkeypatch)
cfg = TOPRewardConfig(
device="cpu",
vlm_name="Qwen/Qwen3-VL-8B-Instruct",
fps=4.0,
image_key="observation.images.front",
)
model = TOPRewardModel(cfg)
model.save_pretrained(str(tmp_path))
assert (tmp_path / CONFIG_NAME).exists()
assert not (tmp_path / SAFETENSORS_SINGLE_FILE).exists()
@skip_if_package_missing("transformers")
def test_topreward_from_pretrained_local_dir_roundtrips_config(monkeypatch, tmp_path):
from lerobot.rewards.topreward.modeling_topreward import TOPRewardModel
_patch_build(monkeypatch)
cfg = TOPRewardConfig(
device="cpu",
vlm_name="Qwen/Qwen3-VL-8B-Instruct",
fps=4.0,
image_key="observation.images.front",
add_chat_template=True,
success_threshold=-1.5,
)
TOPRewardModel(cfg).save_pretrained(str(tmp_path))
reloaded = TOPRewardModel.from_pretrained(str(tmp_path))
assert isinstance(reloaded.config, TOPRewardConfig)
assert reloaded.config.vlm_name == "Qwen/Qwen3-VL-8B-Instruct"
assert reloaded.config.fps == 4.0
assert reloaded.config.image_key == "observation.images.front"
assert reloaded.config.add_chat_template is True
assert reloaded.config.success_threshold == -1.5
@skip_if_package_missing("transformers")
def test_topreward_is_not_trainable(monkeypatch):
from lerobot.rewards.topreward.modeling_topreward import TOPRewardModel
_patch_build(monkeypatch)
cfg = TOPRewardConfig(device="cpu")
model = TOPRewardModel(cfg)
assert model.is_trainable is False
with pytest.raises(NotImplementedError, match="not trainable"):
model.forward({"x": torch.zeros(1)})
+354
View File
@@ -0,0 +1,354 @@
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for Robometer's pre-processing helpers and encoder step.
Covers the pure helpers (``_video_to_numpy`` and ``_expand_tasks``) directly,
and exercises :class:`RobometerEncoderProcessorStep` with a stubbed
``AutoProcessor`` so we don't need to download Qwen-VL just to test the
dataclass plumbing (``transform_features`` / ``get_config``).
The full ``__call__`` path that runs ``process_vision_info`` + the Qwen
processor is intentionally *not* covered here it is essentially HF glue
that's exercised by the integration / parity scripts.
"""
from __future__ import annotations
from typing import Any
import numpy as np
import pytest
import torch
from lerobot.configs import FeatureType, PipelineFeatureType, PolicyFeature
from lerobot.rewards.robometer.processor_robometer import (
PROGRESS_PROMPT,
_expand_tasks,
_frames_to_pil,
_video_to_numpy,
)
from tests.utils import skip_if_package_missing
def _skip_if_robometer_extras_missing(func):
"""Apply both optional-dependency guards in one shot.
``RobometerEncoderProcessorStep.__post_init__`` calls
``require_package("transformers", ...)`` *and*
``require_package("qwen-vl-utils", ...)``, so both need to be present
before we can instantiate the step.
"""
func = skip_if_package_missing("qwen-vl-utils", import_name="qwen_vl_utils")(func)
func = skip_if_package_missing("transformers")(func)
return func
# ---------------------------------------------------------------------------
# _video_to_numpy — pure tensor → uint8 (T, H, W, C) conversion
# ---------------------------------------------------------------------------
def test_video_to_numpy_chw_float_is_converted_to_thwc_uint8():
video = torch.rand(4, 3, 8, 8) # (T, C, H, W) floats in [0, 1]
array = _video_to_numpy(video, max_frames=None)
assert array.shape == (4, 8, 8, 3)
assert array.dtype == np.uint8
assert array.min() >= 0 and array.max() <= 255
def test_video_to_numpy_already_thwc_uint8_passes_through():
video = torch.randint(0, 256, (3, 8, 8, 3), dtype=torch.uint8) # (T, H, W, C)
array = _video_to_numpy(video, max_frames=None)
assert array.shape == (3, 8, 8, 3)
assert array.dtype == np.uint8
def test_video_to_numpy_max_frames_tail_crops_recent_frames():
"""``max_frames`` should keep the **last** K frames (most recent)."""
video = torch.zeros(10, 3, 4, 4)
for t in range(10):
video[t] = t / 9.0 # marker: 0 at t=0, ≈1 at t=9
array = _video_to_numpy(video, max_frames=3)
assert array.shape == (3, 4, 4, 3)
# The first kept frame is t=7 → marker ≈ 7/9 → uint8 ≈ 198
assert int(array[0, 0, 0, 0]) == int(round(7 / 9 * 255))
# The last kept frame is t=9 → marker = 1.0 → uint8 = 255
assert int(array[-1, 0, 0, 0]) == 255
def test_video_to_numpy_rejects_3d_input():
with pytest.raises(ValueError, match="Expected channel dim"):
_video_to_numpy(torch.zeros(4, 8, 8), max_frames=None)
def test_video_to_numpy_floats_above_one_pass_through_without_rescaling():
"""If ``array.max() > 1`` the helper assumes the tensor is already in the
[0, 255] range (uint8-as-float), so values pass through unchanged."""
video = torch.full((1, 3, 2, 2), 5.0)
array = _video_to_numpy(video, max_frames=None)
assert array.shape == (1, 2, 2, 3)
assert int(array.max()) == 5
def test_video_to_numpy_clips_very_large_floats_to_uint8_max():
"""Out-of-uint8-range floats are clipped at 255 before the cast."""
video = torch.full((1, 3, 2, 2), 300.0)
array = _video_to_numpy(video, max_frames=None)
assert int(array.max()) == 255
# ---------------------------------------------------------------------------
# _expand_tasks — string / list / tuple broadcasting to batch size
# ---------------------------------------------------------------------------
def test_expand_tasks_string_is_broadcast_to_batch_size():
assert _expand_tasks("pick up", batch_size=3, default=None) == ["pick up", "pick up", "pick up"]
def test_expand_tasks_list_of_matching_size_passes_through():
assert _expand_tasks(["a", "b", "c"], batch_size=3, default=None) == ["a", "b", "c"]
def test_expand_tasks_tuple_is_normalised_to_list():
assert _expand_tasks(("a", "b"), batch_size=2, default=None) == ["a", "b"]
def test_expand_tasks_single_element_list_is_broadcast():
assert _expand_tasks(["only one"], batch_size=3, default=None) == ["only one"] * 3
def test_expand_tasks_size_mismatch_raises():
with pytest.raises(ValueError, match="Expected 3 tasks"):
_expand_tasks(["a", "b"], batch_size=3, default=None)
def test_expand_tasks_missing_uses_default():
assert _expand_tasks(None, batch_size=2, default="fallback") == ["fallback", "fallback"]
def test_expand_tasks_missing_without_default_raises():
with pytest.raises(KeyError, match="task description"):
_expand_tasks(None, batch_size=1, default=None)
def test_expand_tasks_wrong_type_raises():
with pytest.raises(TypeError, match="must be a string or list"):
_expand_tasks(42, batch_size=1, default=None)
# ---------------------------------------------------------------------------
# _frames_to_pil — uint8 (T, H, W, C) → list[PIL.Image]
# ---------------------------------------------------------------------------
def test_frames_to_pil_returns_one_image_per_frame():
frames = np.zeros((4, 8, 8, 3), dtype=np.uint8)
images = _frames_to_pil(frames)
assert len(images) == 4
assert all(img.size == (8, 8) for img in images)
def test_frames_to_pil_casts_floats_to_uint8():
frames = np.full((2, 4, 4, 3), 200.0, dtype=np.float32)
images = _frames_to_pil(frames)
assert len(images) == 2
# PIL converted from clipped uint8 - sanity check pixel values come through.
assert np.asarray(images[0]).dtype == np.uint8
def test_frames_to_pil_rejects_non_4d_input():
with pytest.raises(ValueError, match=r"\(T,H,W,C\)"):
_frames_to_pil(np.zeros((4, 8, 8), dtype=np.uint8))
# ---------------------------------------------------------------------------
# Encoder step plumbing — exercise dataclass surface with a stubbed AutoProcessor
# ---------------------------------------------------------------------------
class _FakeTokenizer:
"""Tokenizer surface the encoder step touches in ``__post_init__``."""
def __init__(self) -> None:
self.pad_token: str | None = None
self.eos_token = "<|endoftext|>"
self._vocab: dict[str, int] = {"<|endoftext|>": 0}
self.added: list[str] = []
def get_vocab(self) -> dict[str, int]:
return self._vocab
def add_special_tokens(self, payload: dict[str, Any]) -> int:
for token in payload.get("additional_special_tokens", []):
if token not in self._vocab:
self._vocab[token] = len(self._vocab)
self.added.append(token)
return len(self.added)
class _FakeAutoProcessor:
"""Stand-in returned by ``AutoProcessor.from_pretrained`` during tests."""
def __init__(self) -> None:
self.tokenizer = _FakeTokenizer()
self.image_processor = None
self.video_processor = None
@classmethod
def from_pretrained(cls, *args, **kwargs): # noqa: ARG003
return cls()
def _build_step(monkeypatch, **overrides):
from lerobot.rewards.robometer import processor_robometer
monkeypatch.setattr(processor_robometer, "AutoProcessor", _FakeAutoProcessor)
return processor_robometer.RobometerEncoderProcessorStep(**overrides)
@_skip_if_robometer_extras_missing
def test_encoder_step_registers_special_tokens_on_tokenizer(monkeypatch):
"""``__post_init__`` must register Robometer's five special tokens on the
tokenizer that ships with the chosen Qwen-VL checkpoint."""
from lerobot.rewards.robometer.configuration_robometer import ROBOMETER_SPECIAL_TOKENS
step = _build_step(monkeypatch)
vocab = step._processor.tokenizer.get_vocab()
for token in ROBOMETER_SPECIAL_TOKENS:
assert token in vocab, f"{token} not registered on the tokenizer"
@_skip_if_robometer_extras_missing
def test_encoder_step_sets_pad_token_to_eos_when_missing(monkeypatch):
"""Qwen tokenizers ship without a pad token; the step must reuse EOS so
batched processing doesn't crash on padding."""
step = _build_step(monkeypatch)
assert step._processor.tokenizer.pad_token == "<|endoftext|>"
@_skip_if_robometer_extras_missing
def test_encoder_step_get_config_roundtrips_user_fields(monkeypatch):
"""``get_config`` must serialise every user-tunable field — these are what
the processor pipeline saves under ``preprocessor_config.json``."""
step = _build_step(
monkeypatch,
base_model_id="Qwen/Qwen3-VL-4B-Instruct",
image_key="observation.images.cam_top",
task_key="task",
default_task="do the thing",
max_frames=12,
use_multi_image=True,
use_per_frame_progress_token=True,
max_length=2048,
)
cfg = step.get_config()
assert cfg == {
"base_model_id": "Qwen/Qwen3-VL-4B-Instruct",
"image_key": "observation.images.cam_top",
"task_key": "task",
"default_task": "do the thing",
"max_frames": 12,
"use_multi_image": True,
"use_per_frame_progress_token": True,
"max_length": 2048,
}
@_skip_if_robometer_extras_missing
def test_encoder_step_transform_features_is_identity(monkeypatch):
"""The encoder step writes Qwen tensors into ``observation`` at call time,
but it does **not** advertise new typed features at pipeline-build time
the downstream model consumes them via the ``ROBOMETER_FEATURE_PREFIX``
namespace, not via the typed feature map.
"""
step = _build_step(monkeypatch)
features = {
PipelineFeatureType.OBSERVATION: {
"observation.images.top": PolicyFeature(shape=(3, 224, 224), type=FeatureType.VISUAL),
}
}
assert step.transform_features(features) == features
@_skip_if_robometer_extras_missing
def test_encoder_step_build_conversation_inserts_prog_token_per_frame(monkeypatch):
"""In multi-image mode with per-frame progress tokens, the conversation
must alternate ``image`` and ``<|prog_token|>`` text entries, one pair
per frame, after the task prompt."""
step = _build_step(
monkeypatch,
use_multi_image=True,
use_per_frame_progress_token=True,
)
frames = np.zeros((3, 8, 8, 3), dtype=np.uint8)
conversation = step._build_conversation(frames, task="pick up the cube")
assert len(conversation) == 1 and conversation[0]["role"] == "user"
content = conversation[0]["content"]
# First entry is the task prompt.
assert content[0] == {"type": "text", "text": PROGRESS_PROMPT.format(task="pick up the cube")}
# Then 3 (image, <|prog_token|>) pairs.
expected_tail = [
item
for _ in range(3)
for item in (
{"type": "image"}, # value asserted below
{"type": "text", "text": "<|prog_token|>"},
)
]
assert len(content) == 1 + len(expected_tail)
for got, exp in zip(content[1:], expected_tail, strict=True):
assert got["type"] == exp["type"]
if exp["type"] == "text":
assert got["text"] == exp["text"]
@_skip_if_robometer_extras_missing
def test_encoder_step_build_conversation_video_mode_uses_single_video_entry(monkeypatch):
"""When ``use_multi_image=False``, frames are bundled into a single
``video`` content entry instead of individual ``image`` entries."""
step = _build_step(
monkeypatch,
use_multi_image=False,
use_per_frame_progress_token=False,
)
frames = np.zeros((4, 8, 8, 3), dtype=np.uint8)
conversation = step._build_conversation(frames, task="pour the water")
content = conversation[0]["content"]
# Exactly two entries: the prompt and one video entry.
assert len(content) == 2
assert content[0]["type"] == "text"
assert content[1]["type"] == "video"
# The video entry carries all four frames.
assert len(content[1]["video"]) == 4
+80
View File
@@ -0,0 +1,80 @@
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""End-to-end TOPReward smoke test with the real Qwen3-VL model."""
import os
import pytest
import torch
pytest.importorskip("transformers")
from lerobot.rewards.topreward.configuration_topreward import TOPRewardConfig # noqa: E402
from lerobot.rewards.topreward.modeling_topreward import TOPRewardModel # noqa: E402
from lerobot.rewards.topreward.processor_topreward import ( # noqa: E402
TOPREWARD_FEATURE_PREFIX,
TOPREWARD_INPUT_KEYS,
make_topreward_pre_post_processors,
)
from tests.utils import require_cuda # noqa: E402
pytestmark = pytest.mark.skipif(
os.environ.get("CI") == "true" or os.environ.get("GITHUB_ACTIONS") == "true",
reason="This test requires downloading and loading Qwen3-VL and is not meant for CI",
)
def _make_dummy_topreward_batch(image_key: str, task_key: str) -> dict[str, object]:
num_frames = 4
image_size = 64
frames = torch.zeros(1, num_frames, 3, image_size, image_size, dtype=torch.uint8)
for frame_idx in range(num_frames):
frames[0, frame_idx, 0].fill_(min(frame_idx * 48, 255))
frames[0, frame_idx, 1].fill_(96)
frames[0, frame_idx, 2].fill_(192)
return {
image_key: frames,
task_key: ["pick up the red cube"],
}
@require_cuda
def test_topreward_full_qwen3vl_preprocessor_to_compute_reward():
cfg = TOPRewardConfig(
vlm_name="Qwen/Qwen3-VL-8B-Instruct",
device="cuda",
max_frames=4,
fps=2.0,
max_input_length=4096,
)
preprocessor, _ = make_topreward_pre_post_processors(cfg)
encoded_batch = preprocessor(_make_dummy_topreward_batch(cfg.image_key, cfg.task_key))
for key in TOPREWARD_INPUT_KEYS:
assert f"{TOPREWARD_FEATURE_PREFIX}{key}" in encoded_batch
model = TOPRewardModel(cfg)
try:
model.to(cfg.device)
model.eval()
rewards = model.compute_reward(encoded_batch)
finally:
del model
torch.cuda.empty_cache()
assert rewards.shape == (1,)
assert rewards.dtype == torch.float32
assert torch.isfinite(rewards).all()
+246
View File
@@ -0,0 +1,246 @@
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for TOPReward's pre-processing helpers and encoder step."""
from __future__ import annotations
import pytest
import torch
from lerobot.configs import FeatureType, PipelineFeatureType, PolicyFeature
from lerobot.rewards.topreward.processor_topreward import (
TOPREWARD_FEATURE_PREFIX,
TOPREWARD_INPUT_KEYS,
_expand_tasks,
_prepare_video_batch,
)
from lerobot.types import TransitionKey
from tests.utils import skip_if_package_missing
# ---------------------------------------------------------------------------
# _prepare_video_batch — raw image/video batch -> (B, T, C, H, W) uint8
# ---------------------------------------------------------------------------
def test_prepare_video_batch_batched_chw_float_is_converted_to_uint8():
video = torch.rand(2, 4, 3, 8, 8)
tensor = _prepare_video_batch(video, max_frames=None)
assert tensor.shape == (2, 4, 3, 8, 8)
assert tensor.dtype == torch.uint8
assert tensor.min() >= 0 and tensor.max() <= 255
def test_prepare_video_batch_batched_thwc_uint8_is_permuted_to_channel_first():
video = torch.randint(0, 256, (2, 3, 8, 8, 3), dtype=torch.uint8)
tensor = _prepare_video_batch(video, max_frames=None)
assert tensor.shape == (2, 3, 3, 8, 8)
assert tensor.dtype == torch.uint8
def test_prepare_video_batch_max_frames_tail_crops_recent_frames():
video = torch.zeros(1, 10, 3, 4, 4)
for t in range(10):
video[:, t] = t / 9.0
tensor = _prepare_video_batch(video, max_frames=3)
assert tensor.shape == (1, 3, 3, 4, 4)
assert int(tensor[0, 0, 0, 0, 0]) == int(7 / 9 * 255)
assert int(tensor[0, -1, 0, 0, 0]) == 255
def test_prepare_video_batch_rejects_3d_input():
with pytest.raises(ValueError, match="Expected TOPReward frames"):
_prepare_video_batch(torch.zeros(4, 8, 8), max_frames=None)
def test_prepare_video_batch_floats_above_one_are_rescaled_and_clipped():
video = torch.full((1, 1, 3, 2, 2), 5.0)
tensor = _prepare_video_batch(video, max_frames=None)
assert tensor.shape == (1, 1, 3, 2, 2)
assert int(tensor.max()) == 255
def test_prepare_video_batch_clips_very_large_floats_to_uint8_max():
video = torch.full((1, 1, 3, 2, 2), 300.0)
tensor = _prepare_video_batch(video, max_frames=None)
assert int(tensor.max()) == 255
# ---------------------------------------------------------------------------
# _expand_tasks — string / list / tuple broadcasting to batch size
# ---------------------------------------------------------------------------
def test_expand_tasks_string_is_broadcast_to_batch_size():
assert _expand_tasks("pick up", batch_size=3, default=None) == ["pick up", "pick up", "pick up"]
def test_expand_tasks_list_of_matching_size_passes_through():
assert _expand_tasks(["a", "b", "c"], batch_size=3, default=None) == ["a", "b", "c"]
def test_expand_tasks_tuple_is_normalised_to_list():
assert _expand_tasks(("a", "b"), batch_size=2, default=None) == ["a", "b"]
def test_expand_tasks_single_element_list_is_broadcast():
assert _expand_tasks(["only one"], batch_size=3, default=None) == ["only one"] * 3
def test_expand_tasks_size_mismatch_raises():
with pytest.raises(ValueError, match="Expected 3 tasks"):
_expand_tasks(["a", "b"], batch_size=3, default=None)
def test_expand_tasks_missing_uses_default():
assert _expand_tasks(None, batch_size=2, default="fallback") == ["fallback", "fallback"]
def test_expand_tasks_missing_without_default_raises():
with pytest.raises(KeyError, match="task description"):
_expand_tasks(None, batch_size=1, default=None)
def test_expand_tasks_wrong_type_raises():
with pytest.raises(TypeError, match="must be a string or list"):
_expand_tasks(42, batch_size=1, default=None)
# ---------------------------------------------------------------------------
# Encoder step — stubbed AutoProcessor
# ---------------------------------------------------------------------------
def _skip_if_topreward_extras_missing(func):
func = skip_if_package_missing("transformers")(func)
return func
class _FakeTokenizer:
eos_token = "<|endoftext|>"
pad_token = "<|endoftext|>"
def __call__(self, *args, **kwargs):
return {"input_ids": torch.zeros(1, 10, dtype=torch.long)}
class _FakeAutoProcessor:
def __init__(self) -> None:
self.tokenizer = _FakeTokenizer()
@classmethod
def from_pretrained(cls, *args, **kwargs): # noqa: ARG003
return cls()
def apply_chat_template(self, messages, **kwargs): # noqa: ARG002
return "fake_prompt_text"
def __call__(self, text=None, images=None, videos=None, **kwargs): # noqa: ARG002
seq_len = 10
batch_size = len(text) if isinstance(text, list) else 1
return {
"input_ids": torch.randint(0, 100, (batch_size, seq_len)),
"attention_mask": torch.ones(batch_size, seq_len, dtype=torch.long),
"pixel_values_videos": torch.zeros(batch_size, 1536, dtype=torch.float32),
"video_grid_thw": torch.ones(batch_size, 3, dtype=torch.long),
"mm_token_type_ids": torch.zeros(batch_size, seq_len, dtype=torch.long),
}
def _build_step(monkeypatch, **overrides):
from lerobot.rewards.topreward import processor_topreward
monkeypatch.setattr(processor_topreward, "AutoProcessor", _FakeAutoProcessor)
return processor_topreward.TOPRewardEncoderProcessorStep(**overrides)
def _make_transition(observation: dict, complementary: dict | None = None) -> dict:
transition: dict = {TransitionKey.OBSERVATION: observation}
if complementary is not None:
transition[TransitionKey.COMPLEMENTARY_DATA] = complementary
return transition
@_skip_if_topreward_extras_missing
def test_encoder_step_emits_input_ids_and_labels(monkeypatch):
"""The processor must emit Qwen-VL tensors including ``input_ids`` and
``labels`` under the ``observation.topreward.*`` namespace."""
step = _build_step(monkeypatch)
frames_batch = torch.zeros(2, 4, 3, 8, 8)
out = step(
_make_transition(
observation={"observation.images.top": frames_batch},
complementary={"task": ["pick", "place"]},
)
)
obs_out = out[TransitionKey.OBSERVATION]
for key in TOPREWARD_INPUT_KEYS:
assert f"{TOPREWARD_FEATURE_PREFIX}{key}" in obs_out
input_ids = obs_out[f"{TOPREWARD_FEATURE_PREFIX}input_ids"]
labels = obs_out[f"{TOPREWARD_FEATURE_PREFIX}labels"]
assert labels.dtype == torch.long
assert labels.shape == (2, 10)
assert labels[:, :-1].eq(-100).all()
assert labels[:, -1].equal(input_ids[:, -1])
@_skip_if_topreward_extras_missing
def test_encoder_step_get_config_roundtrips_user_fields(monkeypatch):
step = _build_step(
monkeypatch,
vlm_name="Qwen/Qwen3-VL-8B-Instruct",
image_key="observation.images.cam_top",
task_key="task",
default_task="do the thing",
max_frames=8,
fps=4.0,
add_chat_template=True,
max_length=2048,
)
cfg = step.get_config()
assert cfg["vlm_name"] == "Qwen/Qwen3-VL-8B-Instruct"
assert cfg["image_key"] == "observation.images.cam_top"
assert cfg["default_task"] == "do the thing"
assert cfg["max_frames"] == 8
assert cfg["fps"] == 4.0
assert cfg["add_chat_template"] is True
assert cfg["max_length"] == 2048
@_skip_if_topreward_extras_missing
def test_encoder_step_transform_features_is_identity(monkeypatch):
step = _build_step(monkeypatch)
features = {
PipelineFeatureType.OBSERVATION: {
"observation.images.top": PolicyFeature(shape=(3, 224, 224), type=FeatureType.VISUAL),
}
}
assert step.transform_features(features) == features
@_skip_if_topreward_extras_missing
def test_encoder_step_rejects_missing_image_key(monkeypatch):
step = _build_step(monkeypatch, image_key="observation.images.top")
with pytest.raises(KeyError, match="image key"):
step(_make_transition(observation={}, complementary={"task": "pick"}))
+109 -7
View File
@@ -1,10 +1,14 @@
"""Tests for policy.path support in YAML config files (issue #2957)."""
import json
import sys
import tempfile
from dataclasses import dataclass, field
from unittest.mock import patch
import yaml
from lerobot.configs import parser
from lerobot.configs.parser import (
_config_path_args,
_config_yaml_overrides,
@@ -16,7 +20,8 @@ from lerobot.configs.parser import (
def test_extract_path_fields_from_yaml():
"""Test that policy.path is extracted from a YAML config and removed."""
"""Test that policy.path is extracted from a YAML config and the policy block
is removed entirely (siblings are captured separately as cli_overrides)."""
config = {
"dataset": {"repo_id": "lerobot/pusht"},
"policy": {"type": "smolvla", "path": "lerobot/smolvla_base", "push_to_hub": False},
@@ -26,26 +31,33 @@ def test_extract_path_fields_from_yaml():
config_path = f.name
_config_path_args.clear()
_config_yaml_overrides.clear()
cleaned_path = extract_path_fields_from_config(config_path, ["policy"])
# Path should be extracted and stored
assert _config_path_args["policy"] == "lerobot/smolvla_base"
# Cleaned config should not have the path field
# Cleaned config should not have the policy block at all -- draccus must not
# try to decode it as PreTrainedConfig; the actual config comes from
# from_pretrained(path) with the captured overrides applied on top.
with open(cleaned_path) as f:
cleaned = yaml.safe_load(f)
assert "path" not in cleaned["policy"]
assert cleaned["policy"]["type"] == "smolvla"
assert cleaned["policy"]["push_to_hub"] is False
assert "policy" not in cleaned
# Original dataset should be untouched
assert cleaned["dataset"]["repo_id"] == "lerobot/pusht"
# Sibling overrides (excluding type/path) captured for from_pretrained.
overrides = get_yaml_overrides("policy")
assert any("push_to_hub=false" in o for o in overrides)
_config_path_args.clear()
_config_yaml_overrides.clear()
def test_extract_path_fields_from_json():
"""Test that policy.path is extracted from a JSON config."""
"""Test that policy.path is extracted from a JSON config and the policy
block is removed entirely."""
config = {
"policy": {"type": "act", "path": "some/local/path"},
}
@@ -54,15 +66,17 @@ def test_extract_path_fields_from_json():
config_path = f.name
_config_path_args.clear()
_config_yaml_overrides.clear()
cleaned_path = extract_path_fields_from_config(config_path, ["policy"])
assert _config_path_args["policy"] == "some/local/path"
with open(cleaned_path) as f:
cleaned = json.load(f)
assert "path" not in cleaned["policy"]
assert "policy" not in cleaned
_config_path_args.clear()
_config_yaml_overrides.clear()
def test_extract_no_path_returns_original():
@@ -216,3 +230,91 @@ def test_flatten_nested_with_bools():
args = _flatten_to_cli_args(d)
assert "--optimizer.use_warmup=true" in args
assert "--optimizer.lr=0.01" in args
def test_extract_removes_field_with_siblings_and_no_type():
"""Regression: when policy.path has siblings but no type:, the entire policy
block must still be removed from the cleaned config. Otherwise draccus tries
to decode the leftover dict as PreTrainedConfig and crashes on the missing
type discriminator.
"""
config = {
"dataset": {"repo_id": "lerobot/pusht"},
"policy": {
"path": "lerobot/smolvla_base",
"n_action_steps": 10,
"dtype": "bfloat16",
},
}
with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
yaml.dump(config, f)
config_path = f.name
_config_path_args.clear()
_config_yaml_overrides.clear()
cleaned_path = extract_path_fields_from_config(config_path, ["policy"])
with open(cleaned_path) as f:
cleaned = yaml.safe_load(f) or {}
assert "policy" not in cleaned, "policy block should be fully removed when path is present"
assert cleaned["dataset"]["repo_id"] == "lerobot/pusht"
assert _config_path_args["policy"] == "lerobot/smolvla_base"
overrides = get_yaml_overrides("policy")
assert any("n_action_steps=10" in o for o in overrides)
assert any("dtype=bfloat16" in o for o in overrides)
_config_path_args.clear()
_config_yaml_overrides.clear()
@dataclass
class _DummyNested:
foo: int = 0
@dataclass
class _DummyConfig:
nested: _DummyNested = field(default_factory=_DummyNested)
other: str = "default"
@classmethod
def __get_path_fields__(cls):
return ["nested"]
def test_wrap_uses_cleaned_config_for_draccus_parse():
"""Regression: wrap() updates config_path_cli to point at the cleaned temp
file but must propagate that to the draccus.parse fallback branch. Without
the fix, cli_args still contains --config_path=<original> and draccus reads
the original YAML with `path:` still in it, crashing on the unknown field.
"""
config = {
"nested": {"path": "some/checkpoint", "foo": 42},
"other": "set-via-yaml",
}
with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
yaml.dump(config, f)
config_path = f.name
_config_path_args.clear()
_config_yaml_overrides.clear()
captured: dict = {}
@parser.wrap()
def main(cfg: _DummyConfig) -> _DummyConfig:
captured["cfg"] = cfg
return cfg
with patch.object(sys, "argv", ["prog", f"--config_path={config_path}"]):
main()
assert captured["cfg"].other == "set-via-yaml"
assert _config_path_args["nested"] == "some/checkpoint"
# Cleaned config dropped `nested:` entirely; defaults stand for this wrapper
# class (a real PreTrainedConfig would now load the checkpoint and apply
# the captured yaml_overrides via from_pretrained()).
assert captured["cfg"].nested.foo == 0
_config_path_args.clear()
_config_yaml_overrides.clear()