Files
lerobot/tests/rewards/test_reward_model_base.py
T
Khalil Meftah 8a3d64033f Reward models refactor (#3142)
* feat(rewards): add RewardModelConfig and PreTrainedRewardModel base classes

* refactor(rewards): migrate Classifier from policies/sac/reward_model/ to rewards/classifier/

* refactor(rewards): migrate SARM from policies/sarm/ to rewards/sarm/

* refactor(rewards): add rewards/factory.py and remove reward model code from policies/factory.py

* refactor(rewards): update imports and delete old reward model locations

* test(rewards): add reward model tests and update existing test imports

* fix(rewards): restore full Classifier and SARM implementations

* test(rewards): restore missing CUDA and mixed precision classifier processor tests

* refactor(lerobot_train.py): remove rabc specific configuration and replace it with a generic samplerweight class in lerobot_train

* refactor(lerobot_train.py): add missing sampling weight script

* linter + missing files

* add testing for sampl weighter

* revert some useless changes, improve typing

* update docs

* add automatic detection of the progress path

* remove type exp

* improve comment

* fix: move rabc.py to rewards/sarm/ and update import paths

* refactor(imports): update reward model imports to new module structure

* refactor(imports): update reward model imports to reflect new module structure

* refactor(imports): conditionally import pandas based on availability

* feat(configs): add reward_model field to TrainPipelineConfig and Hub fields to RewardModelConfig

* refactor(policies): remove reward model branches from policy factory and __init__

* refactor(rewards): expand __init__ facade and fix SARMConfig __post_init__ crash

* feat(train): route reward model training through rewards/factory instead of policies/factory

* refactor(train): streamline reward model training logic

* fix(rewards): ensure FileNotFoundError is raised for missing config_file

* refactor(train): update __get_path_fields__ to include reward_model for config loading

* refactor(classifier): remove redundant input normalization in predict_reward method

* fix(train): raise ValueError for non-trainable reward models in train function

* refactor(pretrained_rm): add model card template

* refactor(tests): reward models

* refactor(sarm): update reset method and remove unused action prediction methods

* refactor(wandb): differentiate tags for reward model and policy training in cfg_to_group function

* fix(train): raise ValueError for PEFT usage in reward model training

* refactor(rewards): enhance RewardModelConfig with device handling and delta indices properties

---------

Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co>
2026-04-28 17:56:24 +02:00

374 lines
13 KiB
Python

# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the reward model base classes and registry."""
from dataclasses import dataclass
from pathlib import Path
from types import SimpleNamespace
import pytest
import torch
from lerobot.configs.rewards import RewardModelConfig
from lerobot.optim.optimizers import AdamWConfig
from lerobot.rewards.pretrained import PreTrainedRewardModel
@RewardModelConfig.register_subclass(name="_dummy_hub_reward")
@dataclass
class _DummyHubRewardConfig(RewardModelConfig):
def get_optimizer_preset(self):
return AdamWConfig(lr=1e-4)
class _DummyHubReward(PreTrainedRewardModel):
config_class = _DummyHubRewardConfig
name = "_dummy_hub_reward"
def __init__(self, config):
super().__init__(config)
self.bias = torch.nn.Parameter(torch.zeros(1))
def compute_reward(self, batch):
return self.bias.expand(1)
def test_reward_model_config_registry():
"""Verify that classifier and sarm are registered."""
known = RewardModelConfig.get_known_choices()
assert "reward_classifier" in known
assert "sarm" in known
def test_reward_model_config_lookup():
"""Verify that we can look up configs by name."""
cls = RewardModelConfig.get_choice_class("reward_classifier")
from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig
assert cls is RewardClassifierConfig
def test_factory_get_reward_model_class():
"""Test the get_reward_model_class factory."""
from lerobot.rewards.factory import get_reward_model_class
cls = get_reward_model_class("sarm")
from lerobot.rewards.sarm.modeling_sarm import SARMRewardModel
assert cls is SARMRewardModel
def test_factory_unknown_raises():
"""Unknown name should raise ValueError."""
from lerobot.rewards.factory import get_reward_model_class
with pytest.raises(ValueError, match="not available"):
get_reward_model_class("nonexistent_reward_model")
def test_pretrained_reward_model_requires_config_class():
"""Subclass without config_class should fail."""
with pytest.raises(TypeError, match="must define 'config_class'"):
class BadModel(PreTrainedRewardModel):
name = "bad"
def compute_reward(self, batch):
pass
def test_pretrained_reward_model_requires_name():
"""Subclass without name should fail."""
with pytest.raises(TypeError, match="must define 'name'"):
class BadModel(PreTrainedRewardModel):
config_class = RewardModelConfig
def compute_reward(self, batch):
pass
def test_non_trainable_forward_raises():
"""Non-trainable model should raise on forward()."""
from dataclasses import dataclass
from lerobot.optim.optimizers import AdamWConfig
@dataclass
class DummyConfig(RewardModelConfig):
def get_optimizer_preset(self):
return AdamWConfig(lr=1e-4)
class DummyReward(PreTrainedRewardModel):
config_class = DummyConfig
name = "dummy_test"
def compute_reward(self, batch):
return torch.zeros(1)
config = DummyConfig()
model = DummyReward(config)
with pytest.raises(NotImplementedError, match="not trainable"):
model.forward({"x": torch.zeros(1)})
# ---------------------------------------------------------------------------
# Trainable vs zero-shot (general-purpose) reward models.
# The proposal explicitly supports models like TOPReward that wrap a pretrained
# VLM and produce a reward signal without any training step. These tests pin
# the contract that lets such models coexist with trainable ones.
# ---------------------------------------------------------------------------
def test_is_trainable_false_when_forward_not_overridden():
"""A reward model that only implements ``compute_reward`` is zero-shot."""
model, _ = _make_dummy_reward_model()
assert model.is_trainable is False
def test_is_trainable_true_when_forward_overridden():
"""Overriding ``forward`` flips ``is_trainable`` to True."""
class _TrainableReward(_DummyHubReward):
name = "_trainable_dummy_reward"
def forward(self, batch):
loss = (self.bias**2).sum()
return loss, {}
# Register a fresh config subclass so the subclass check passes.
@RewardModelConfig.register_subclass(name="_trainable_dummy_reward")
@dataclass
class _TrainableConfig(_DummyHubRewardConfig):
pass
_TrainableReward.config_class = _TrainableConfig
model = _TrainableReward(_TrainableConfig())
assert model.is_trainable is True
# ---------------------------------------------------------------------------
# RewardModelConfig.from_pretrained
# ---------------------------------------------------------------------------
def test_reward_model_config_from_pretrained_raises_when_config_missing(tmp_path):
"""``from_pretrained`` must surface a clear ``FileNotFoundError`` when the
target directory exists but does not contain ``config.json``, instead of
crashing later inside ``draccus.parse``.
"""
# tmp_path exists but has no config.json
with pytest.raises(FileNotFoundError, match="config.json not found"):
RewardModelConfig.from_pretrained(tmp_path)
def test_reward_model_config_from_pretrained_roundtrip(tmp_path):
"""Round-trip: save a RewardClassifierConfig, reload it, fields must match."""
from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig
original = RewardClassifierConfig(
num_classes=3,
hidden_dim=128,
latent_dim=64,
num_cameras=1,
learning_rate=5e-4,
)
original._save_pretrained(tmp_path)
loaded = RewardModelConfig.from_pretrained(tmp_path)
assert isinstance(loaded, RewardClassifierConfig)
assert loaded.num_classes == 3
assert loaded.hidden_dim == 128
assert loaded.latent_dim == 64
assert loaded.num_cameras == 1
assert loaded.learning_rate == 5e-4
# ---------------------------------------------------------------------------
# TrainPipelineConfig — reward model training path
# ---------------------------------------------------------------------------
def test_train_pipeline_config_path_fields_includes_reward_model():
"""``--reward_model.path=local/dir`` requires ``reward_model`` to be listed
as a draccus path-field on ``TrainPipelineConfig``."""
from lerobot.configs.train import TrainPipelineConfig
fields = TrainPipelineConfig.__get_path_fields__()
assert "policy" in fields
assert "reward_model" in fields
def test_train_pipeline_config_trainable_config_returns_reward_model_when_set():
"""When only ``reward_model`` is set, ``trainable_config`` (used by the
trainer for e.g. ``.device``) must return it — not ``None`` from ``policy``."""
from lerobot.configs.default import DatasetConfig
from lerobot.configs.train import TrainPipelineConfig
from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig
reward_cfg = RewardClassifierConfig(device="cpu")
cfg = TrainPipelineConfig(
dataset=DatasetConfig(repo_id="user/repo"),
reward_model=reward_cfg,
)
assert cfg.is_reward_model_training is True
assert cfg.trainable_config is reward_cfg
# This is what lerobot_train.py uses to decide force_cpu; ``cfg.policy.device``
# would AttributeError here because policy is None.
assert cfg.trainable_config.device == "cpu"
def test_train_pipeline_config_trainable_config_returns_policy_when_set():
"""Mirror of the reward-model case: when only ``policy`` is set,
``trainable_config`` must return it."""
from lerobot.configs.default import DatasetConfig
from lerobot.configs.train import TrainPipelineConfig
from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
policy_cfg = DiffusionConfig(device="cpu")
cfg = TrainPipelineConfig(
dataset=DatasetConfig(repo_id="user/repo"),
policy=policy_cfg,
)
assert cfg.is_reward_model_training is False
assert cfg.trainable_config is policy_cfg
assert cfg.trainable_config.device == "cpu"
# ---------------------------------------------------------------------------
# PreTrainedRewardModel hub upload: push_model_to_hub + generate_model_card.
# We test the generation side (offline) fully, and the upload side with HfApi
# mocked so nothing actually hits the network.
# ---------------------------------------------------------------------------
def _make_dummy_reward_model(**config_kwargs):
return _DummyHubReward(_DummyHubRewardConfig(**config_kwargs)), _DummyHubRewardConfig
@pytest.fixture
def _offline_model_card(monkeypatch):
"""``ModelCard.validate`` does a live ``POST`` to huggingface.co — bypass it
so tests can run offline."""
from huggingface_hub import ModelCard
monkeypatch.setattr(ModelCard, "validate", lambda self, *a, **kw: None)
def test_reward_model_generate_model_card_renders_expected_fields(_offline_model_card):
"""``generate_model_card`` must produce a card with the right metadata and
body, using the dedicated reward-model template."""
model, _ = _make_dummy_reward_model(
license="mit",
tags=["robot", "sim"],
)
card = model.generate_model_card(
dataset_repo_id="user/my_dataset",
model_type=model.config.type,
license=model.config.license,
tags=model.config.tags,
)
# Metadata (YAML header) — ModelCardData fields.
assert card.data.license == "mit"
assert card.data.library_name == "lerobot"
assert card.data.pipeline_tag == "robotics"
assert "reward-model" in card.data.tags
assert model.config.type in card.data.tags
assert card.data.model_name == model.config.type
assert card.data.datasets == "user/my_dataset"
# Body — specific to the reward-model template, NOT the policy one.
body = str(card)
assert "Reward Model Card" in body
assert "This reward model has been trained" in body
assert "--reward_model.type=" in body # reward-model-specific usage block
def test_reward_model_generate_model_card_uses_default_license(_offline_model_card):
"""When config.license is None the card falls back to apache-2.0."""
model, _ = _make_dummy_reward_model()
card = model.generate_model_card(
dataset_repo_id="user/my_dataset",
model_type=model.config.type,
license=model.config.license,
tags=None,
)
assert card.data.license == "apache-2.0"
def test_reward_model_push_model_to_hub_uploads_expected_files(monkeypatch, _offline_model_card):
"""``push_model_to_hub`` must:
1. create the repo,
2. assemble a temp folder with weights + config.json + train_config.json + README.md,
3. call ``api.upload_folder`` on that folder.
All network calls are mocked.
"""
from huggingface_hub.constants import CONFIG_NAME
from lerobot.configs.default import DatasetConfig
from lerobot.configs.train import TRAIN_CONFIG_NAME, TrainPipelineConfig
model, _ = _make_dummy_reward_model(
repo_id="user/my_reward",
license="apache-2.0",
)
# Point the reward model's train config at a dummy dataset repo.
train_cfg = TrainPipelineConfig(
dataset=DatasetConfig(repo_id="user/my_dataset"),
reward_model=model.config,
)
uploaded: dict = {}
fake_commit_info = SimpleNamespace(repo_url=SimpleNamespace(url="https://huggingface.co/user/my_reward"))
class _FakeHfApi:
def create_repo(self, repo_id, private=None, exist_ok=False):
uploaded["create_repo_id"] = repo_id
uploaded["create_private"] = private
return SimpleNamespace(repo_id=repo_id)
def upload_folder(self, *, repo_id, repo_type, folder_path, commit_message, **_kwargs):
uploaded["upload_repo_id"] = repo_id
uploaded["upload_repo_type"] = repo_type
uploaded["commit_message"] = commit_message
# Snapshot files assembled in the temp folder — this is the real
# contract we care about.
uploaded["files"] = sorted(p.name for p in Path(folder_path).iterdir())
return fake_commit_info
from lerobot.rewards import pretrained as reward_pretrained
monkeypatch.setattr(reward_pretrained, "HfApi", lambda *a, **kw: _FakeHfApi())
model.push_model_to_hub(train_cfg)
assert uploaded["create_repo_id"] == "user/my_reward"
assert uploaded["upload_repo_id"] == "user/my_reward"
assert uploaded["upload_repo_type"] == "model"
assert uploaded["commit_message"] == "Upload reward model weights, train config and readme"
# Minimum required files that must be uploaded with a reward model.
assert CONFIG_NAME in uploaded["files"] # config.json
assert TRAIN_CONFIG_NAME in uploaded["files"] # train_config.json
assert "README.md" in uploaded["files"]
assert any(name.endswith(".safetensors") for name in uploaded["files"])