# Copyright 2026 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for the reward model base classes and registry.""" import json from dataclasses import dataclass from pathlib import Path from types import SimpleNamespace import pytest import torch from lerobot.configs.rewards import RewardModelConfig from lerobot.optim.optimizers import AdamWConfig from lerobot.rewards.pretrained import PreTrainedRewardModel @RewardModelConfig.register_subclass(name="_dummy_hub_reward") @dataclass class _DummyHubRewardConfig(RewardModelConfig): def get_optimizer_preset(self): return AdamWConfig(lr=1e-4) class _DummyHubReward(PreTrainedRewardModel): config_class = _DummyHubRewardConfig name = "_dummy_hub_reward" def __init__(self, config): super().__init__(config) self.bias = torch.nn.Parameter(torch.zeros(1)) def compute_reward(self, batch): return self.bias.expand(1) def test_reward_model_config_registry(): """Verify that classifier and sarm are registered.""" known = RewardModelConfig.get_known_choices() assert "reward_classifier" in known assert "sarm" in known def test_reward_model_config_lookup(): """Verify that we can look up configs by name.""" cls = RewardModelConfig.get_choice_class("reward_classifier") from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig assert cls is RewardClassifierConfig def test_factory_get_reward_model_class(): """Test the get_reward_model_class factory.""" from lerobot.rewards.factory import get_reward_model_class cls = get_reward_model_class("sarm") from lerobot.rewards.sarm.modeling_sarm import SARMRewardModel assert cls is SARMRewardModel def test_factory_unknown_raises(): """Unknown name should raise ValueError.""" from lerobot.rewards.factory import get_reward_model_class with pytest.raises(ValueError, match="not available"): get_reward_model_class("nonexistent_reward_model") def test_pretrained_reward_model_requires_config_class(): """Subclass without config_class should fail.""" with pytest.raises(TypeError, match="must define 'config_class'"): class BadModel(PreTrainedRewardModel): name = "bad" def compute_reward(self, batch): pass def test_pretrained_reward_model_requires_name(): """Subclass without name should fail.""" with pytest.raises(TypeError, match="must define 'name'"): class BadModel(PreTrainedRewardModel): config_class = RewardModelConfig def compute_reward(self, batch): pass def test_non_trainable_forward_raises(): """Non-trainable model should raise on forward().""" from dataclasses import dataclass from lerobot.optim.optimizers import AdamWConfig @dataclass class DummyConfig(RewardModelConfig): def get_optimizer_preset(self): return AdamWConfig(lr=1e-4) class DummyReward(PreTrainedRewardModel): config_class = DummyConfig name = "dummy_test" def compute_reward(self, batch): return torch.zeros(1) config = DummyConfig() model = DummyReward(config) with pytest.raises(NotImplementedError, match="not trainable"): model.forward({"x": torch.zeros(1)}) # --------------------------------------------------------------------------- # Trainable vs zero-shot (general-purpose) reward models. # The proposal explicitly supports models like TOPReward that wrap a pretrained # VLM and produce a reward signal without any training step. These tests pin # the contract that lets such models coexist with trainable ones. # --------------------------------------------------------------------------- def test_is_trainable_false_when_forward_not_overridden(): """A reward model that only implements ``compute_reward`` is zero-shot.""" model, _ = _make_dummy_reward_model() assert model.is_trainable is False def test_is_trainable_true_when_forward_overridden(): """Overriding ``forward`` flips ``is_trainable`` to True.""" class _TrainableReward(_DummyHubReward): name = "_trainable_dummy_reward" def forward(self, batch): loss = (self.bias**2).sum() return loss, {} # Register a fresh config subclass so the subclass check passes. @RewardModelConfig.register_subclass(name="_trainable_dummy_reward") @dataclass class _TrainableConfig(_DummyHubRewardConfig): pass _TrainableReward.config_class = _TrainableConfig model = _TrainableReward(_TrainableConfig()) assert model.is_trainable is True # --------------------------------------------------------------------------- # RewardModelConfig.from_pretrained # --------------------------------------------------------------------------- def test_reward_model_config_from_pretrained_raises_when_config_missing(tmp_path): """``from_pretrained`` must surface a clear ``FileNotFoundError`` when the target directory exists but does not contain ``config.json``, instead of crashing later inside ``draccus.parse``. """ # tmp_path exists but has no config.json with pytest.raises(FileNotFoundError, match="config.json not found"): RewardModelConfig.from_pretrained(tmp_path) def test_reward_model_config_from_pretrained_roundtrip(tmp_path): """Round-trip: save a RewardClassifierConfig, reload it, fields must match.""" from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig original = RewardClassifierConfig( num_classes=3, hidden_dim=128, latent_dim=64, num_cameras=1, learning_rate=5e-4, ) original._save_pretrained(tmp_path) loaded = RewardModelConfig.from_pretrained(tmp_path) assert isinstance(loaded, RewardClassifierConfig) assert loaded.num_classes == 3 assert loaded.hidden_dim == 128 assert loaded.latent_dim == 64 assert loaded.num_cameras == 1 assert loaded.learning_rate == 5e-4 # --------------------------------------------------------------------------- # TrainPipelineConfig — reward model training path # --------------------------------------------------------------------------- def test_train_pipeline_config_path_fields_includes_reward_model(): """``--reward_model.path=local/dir`` requires ``reward_model`` to be listed as a draccus path-field on ``TrainPipelineConfig``.""" from lerobot.configs.train import TrainPipelineConfig fields = TrainPipelineConfig.__get_path_fields__() assert "policy" in fields assert "reward_model" in fields def test_train_pipeline_config_trainable_config_returns_reward_model_when_set(): """When only ``reward_model`` is set, ``trainable_config`` (used by the trainer for e.g. ``.device``) must return it — not ``None`` from ``policy``.""" from lerobot.configs.default import DatasetConfig from lerobot.configs.train import TrainPipelineConfig from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig reward_cfg = RewardClassifierConfig(device="cpu") cfg = TrainPipelineConfig( dataset=DatasetConfig(repo_id="user/repo"), reward_model=reward_cfg, ) assert cfg.is_reward_model_training is True assert cfg.trainable_config is reward_cfg # This is what lerobot_train.py uses to decide force_cpu; ``cfg.policy.device`` # would AttributeError here because policy is None. assert cfg.trainable_config.device == "cpu" def test_train_pipeline_config_trainable_config_returns_policy_when_set(): """Mirror of the reward-model case: when only ``policy`` is set, ``trainable_config`` must return it.""" from lerobot.configs.default import DatasetConfig from lerobot.configs.train import TrainPipelineConfig from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig policy_cfg = DiffusionConfig(device="cpu") cfg = TrainPipelineConfig( dataset=DatasetConfig(repo_id="user/repo"), policy=policy_cfg, ) assert cfg.is_reward_model_training is False assert cfg.trainable_config is policy_cfg assert cfg.trainable_config.device == "cpu" def test_train_pipeline_config_from_pretrained_migrates_legacy_rabc_fields(tmp_path): """Legacy top-level RA-BC fields should be migrated into ``sample_weighting``.""" from lerobot.configs.default import DatasetConfig from lerobot.configs.train import TRAIN_CONFIG_NAME, TrainPipelineConfig from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig cfg = TrainPipelineConfig( dataset=DatasetConfig(repo_id="user/repo"), policy=DiffusionConfig(device="cpu"), ) cfg._save_pretrained(tmp_path) config_path = tmp_path / TRAIN_CONFIG_NAME with open(config_path) as f: payload = json.load(f) payload.pop("sample_weighting", None) payload.update( { "use_rabc": True, "rabc_progress_path": "hf://datasets/user/repo/sarm_progress.parquet", "rabc_kappa": 0.05, "rabc_epsilon": 1e-5, "rabc_head_mode": "dense", } ) with open(config_path, "w") as f: json.dump(payload, f) loaded = TrainPipelineConfig.from_pretrained(tmp_path) assert loaded.sample_weighting is not None assert loaded.sample_weighting.type == "rabc" assert loaded.sample_weighting.progress_path == "hf://datasets/user/repo/sarm_progress.parquet" assert loaded.sample_weighting.kappa == 0.05 assert loaded.sample_weighting.epsilon == 1e-5 assert loaded.sample_weighting.head_mode == "dense" def test_train_pipeline_config_from_pretrained_strips_legacy_rabc_when_disabled(tmp_path): """Legacy RA-BC fields should be ignored when ``use_rabc`` was false.""" from lerobot.configs.default import DatasetConfig from lerobot.configs.train import TRAIN_CONFIG_NAME, TrainPipelineConfig from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig cfg = TrainPipelineConfig( dataset=DatasetConfig(repo_id="user/repo"), policy=DiffusionConfig(device="cpu"), ) cfg._save_pretrained(tmp_path) config_path = tmp_path / TRAIN_CONFIG_NAME with open(config_path) as f: payload = json.load(f) payload.pop("sample_weighting", None) payload.update( { "use_rabc": False, "rabc_progress_path": "hf://datasets/user/repo/sarm_progress.parquet", "rabc_kappa": 0.05, "rabc_epsilon": 1e-5, "rabc_head_mode": "dense", } ) with open(config_path, "w") as f: json.dump(payload, f) loaded = TrainPipelineConfig.from_pretrained(tmp_path) assert loaded.sample_weighting is None # --------------------------------------------------------------------------- # PreTrainedRewardModel hub upload: push_model_to_hub + generate_model_card. # We test the generation side (offline) fully, and the upload side with HfApi # mocked so nothing actually hits the network. # --------------------------------------------------------------------------- def _make_dummy_reward_model(**config_kwargs): return _DummyHubReward(_DummyHubRewardConfig(**config_kwargs)), _DummyHubRewardConfig @pytest.fixture def _offline_model_card(monkeypatch): """``ModelCard.validate`` does a live ``POST`` to huggingface.co — bypass it so tests can run offline.""" from huggingface_hub import ModelCard monkeypatch.setattr(ModelCard, "validate", lambda self, *a, **kw: None) def test_reward_model_generate_model_card_renders_expected_fields(_offline_model_card): """``generate_model_card`` must produce a card with the right metadata and body, using the dedicated reward-model template.""" model, _ = _make_dummy_reward_model( license="mit", tags=["robot", "sim"], ) card = model.generate_model_card( dataset_repo_id="user/my_dataset", model_type=model.config.type, license=model.config.license, tags=model.config.tags, ) # Metadata (YAML header) — ModelCardData fields. assert card.data.license == "mit" assert card.data.library_name == "lerobot" assert card.data.pipeline_tag == "robotics" assert "reward-model" in card.data.tags assert model.config.type in card.data.tags assert card.data.model_name == model.config.type assert card.data.datasets == "user/my_dataset" # Body — specific to the reward-model template, NOT the policy one. body = str(card) assert "Reward Model Card" in body assert "This reward model has been trained" in body assert "--reward_model.type=" in body # reward-model-specific usage block def test_reward_model_generate_model_card_uses_default_license(_offline_model_card): """When config.license is None the card falls back to apache-2.0.""" model, _ = _make_dummy_reward_model() card = model.generate_model_card( dataset_repo_id="user/my_dataset", model_type=model.config.type, license=model.config.license, tags=None, ) assert card.data.license == "apache-2.0" def test_reward_model_push_model_to_hub_uploads_expected_files(monkeypatch, _offline_model_card): """``push_model_to_hub`` must: 1. create the repo, 2. assemble a temp folder with weights + config.json + train_config.json + README.md, 3. call ``api.upload_folder`` on that folder. All network calls are mocked. """ from huggingface_hub.constants import CONFIG_NAME from lerobot.configs.default import DatasetConfig from lerobot.configs.train import TRAIN_CONFIG_NAME, TrainPipelineConfig model, _ = _make_dummy_reward_model( repo_id="user/my_reward", license="apache-2.0", ) # Point the reward model's train config at a dummy dataset repo. train_cfg = TrainPipelineConfig( dataset=DatasetConfig(repo_id="user/my_dataset"), reward_model=model.config, ) uploaded: dict = {} fake_commit_info = SimpleNamespace(repo_url=SimpleNamespace(url="https://huggingface.co/user/my_reward")) class _FakeHfApi: def create_repo(self, repo_id, private=None, exist_ok=False): uploaded["create_repo_id"] = repo_id uploaded["create_private"] = private return SimpleNamespace(repo_id=repo_id) def upload_folder(self, *, repo_id, repo_type, folder_path, commit_message, **_kwargs): uploaded["upload_repo_id"] = repo_id uploaded["upload_repo_type"] = repo_type uploaded["commit_message"] = commit_message # Snapshot files assembled in the temp folder — this is the real # contract we care about. uploaded["files"] = sorted(p.name for p in Path(folder_path).iterdir()) return fake_commit_info from lerobot.rewards import pretrained as reward_pretrained monkeypatch.setattr(reward_pretrained, "HfApi", lambda *a, **kw: _FakeHfApi()) model.push_model_to_hub(train_cfg) assert uploaded["create_repo_id"] == "user/my_reward" assert uploaded["upload_repo_id"] == "user/my_reward" assert uploaded["upload_repo_type"] == "model" assert uploaded["commit_message"] == "Upload reward model weights, train config and readme" # Minimum required files that must be uploaded with a reward model. assert CONFIG_NAME in uploaded["files"] # config.json assert TRAIN_CONFIG_NAME in uploaded["files"] # train_config.json assert "README.md" in uploaded["files"] assert any(name.endswith(".safetensors") for name in uploaded["files"])