mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-11 14:49:43 +00:00
8a3d64033f
* feat(rewards): add RewardModelConfig and PreTrainedRewardModel base classes * refactor(rewards): migrate Classifier from policies/sac/reward_model/ to rewards/classifier/ * refactor(rewards): migrate SARM from policies/sarm/ to rewards/sarm/ * refactor(rewards): add rewards/factory.py and remove reward model code from policies/factory.py * refactor(rewards): update imports and delete old reward model locations * test(rewards): add reward model tests and update existing test imports * fix(rewards): restore full Classifier and SARM implementations * test(rewards): restore missing CUDA and mixed precision classifier processor tests * refactor(lerobot_train.py): remove rabc specific configuration and replace it with a generic samplerweight class in lerobot_train * refactor(lerobot_train.py): add missing sampling weight script * linter + missing files * add testing for sampl weighter * revert some useless changes, improve typing * update docs * add automatic detection of the progress path * remove type exp * improve comment * fix: move rabc.py to rewards/sarm/ and update import paths * refactor(imports): update reward model imports to new module structure * refactor(imports): update reward model imports to reflect new module structure * refactor(imports): conditionally import pandas based on availability * feat(configs): add reward_model field to TrainPipelineConfig and Hub fields to RewardModelConfig * refactor(policies): remove reward model branches from policy factory and __init__ * refactor(rewards): expand __init__ facade and fix SARMConfig __post_init__ crash * feat(train): route reward model training through rewards/factory instead of policies/factory * refactor(train): streamline reward model training logic * fix(rewards): ensure FileNotFoundError is raised for missing config_file * refactor(train): update __get_path_fields__ to include reward_model for config loading * refactor(classifier): remove redundant input normalization in predict_reward method * fix(train): raise ValueError for non-trainable reward models in train function * refactor(pretrained_rm): add model card template * refactor(tests): reward models * refactor(sarm): update reset method and remove unused action prediction methods * refactor(wandb): differentiate tags for reward model and policy training in cfg_to_group function * fix(train): raise ValueError for PEFT usage in reward model training * refactor(rewards): enhance RewardModelConfig with device handling and delta indices properties --------- Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co>
374 lines
13 KiB
Python
374 lines
13 KiB
Python
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""Tests for the reward model base classes and registry."""
|
|
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from types import SimpleNamespace
|
|
|
|
import pytest
|
|
import torch
|
|
|
|
from lerobot.configs.rewards import RewardModelConfig
|
|
from lerobot.optim.optimizers import AdamWConfig
|
|
from lerobot.rewards.pretrained import PreTrainedRewardModel
|
|
|
|
|
|
@RewardModelConfig.register_subclass(name="_dummy_hub_reward")
|
|
@dataclass
|
|
class _DummyHubRewardConfig(RewardModelConfig):
|
|
def get_optimizer_preset(self):
|
|
return AdamWConfig(lr=1e-4)
|
|
|
|
|
|
class _DummyHubReward(PreTrainedRewardModel):
|
|
config_class = _DummyHubRewardConfig
|
|
name = "_dummy_hub_reward"
|
|
|
|
def __init__(self, config):
|
|
super().__init__(config)
|
|
self.bias = torch.nn.Parameter(torch.zeros(1))
|
|
|
|
def compute_reward(self, batch):
|
|
return self.bias.expand(1)
|
|
|
|
|
|
def test_reward_model_config_registry():
|
|
"""Verify that classifier and sarm are registered."""
|
|
known = RewardModelConfig.get_known_choices()
|
|
assert "reward_classifier" in known
|
|
assert "sarm" in known
|
|
|
|
|
|
def test_reward_model_config_lookup():
|
|
"""Verify that we can look up configs by name."""
|
|
cls = RewardModelConfig.get_choice_class("reward_classifier")
|
|
from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig
|
|
|
|
assert cls is RewardClassifierConfig
|
|
|
|
|
|
def test_factory_get_reward_model_class():
|
|
"""Test the get_reward_model_class factory."""
|
|
from lerobot.rewards.factory import get_reward_model_class
|
|
|
|
cls = get_reward_model_class("sarm")
|
|
from lerobot.rewards.sarm.modeling_sarm import SARMRewardModel
|
|
|
|
assert cls is SARMRewardModel
|
|
|
|
|
|
def test_factory_unknown_raises():
|
|
"""Unknown name should raise ValueError."""
|
|
from lerobot.rewards.factory import get_reward_model_class
|
|
|
|
with pytest.raises(ValueError, match="not available"):
|
|
get_reward_model_class("nonexistent_reward_model")
|
|
|
|
|
|
def test_pretrained_reward_model_requires_config_class():
|
|
"""Subclass without config_class should fail."""
|
|
with pytest.raises(TypeError, match="must define 'config_class'"):
|
|
|
|
class BadModel(PreTrainedRewardModel):
|
|
name = "bad"
|
|
|
|
def compute_reward(self, batch):
|
|
pass
|
|
|
|
|
|
def test_pretrained_reward_model_requires_name():
|
|
"""Subclass without name should fail."""
|
|
with pytest.raises(TypeError, match="must define 'name'"):
|
|
|
|
class BadModel(PreTrainedRewardModel):
|
|
config_class = RewardModelConfig
|
|
|
|
def compute_reward(self, batch):
|
|
pass
|
|
|
|
|
|
def test_non_trainable_forward_raises():
|
|
"""Non-trainable model should raise on forward()."""
|
|
from dataclasses import dataclass
|
|
|
|
from lerobot.optim.optimizers import AdamWConfig
|
|
|
|
@dataclass
|
|
class DummyConfig(RewardModelConfig):
|
|
def get_optimizer_preset(self):
|
|
return AdamWConfig(lr=1e-4)
|
|
|
|
class DummyReward(PreTrainedRewardModel):
|
|
config_class = DummyConfig
|
|
name = "dummy_test"
|
|
|
|
def compute_reward(self, batch):
|
|
return torch.zeros(1)
|
|
|
|
config = DummyConfig()
|
|
model = DummyReward(config)
|
|
|
|
with pytest.raises(NotImplementedError, match="not trainable"):
|
|
model.forward({"x": torch.zeros(1)})
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Trainable vs zero-shot (general-purpose) reward models.
|
|
# The proposal explicitly supports models like TOPReward that wrap a pretrained
|
|
# VLM and produce a reward signal without any training step. These tests pin
|
|
# the contract that lets such models coexist with trainable ones.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_is_trainable_false_when_forward_not_overridden():
|
|
"""A reward model that only implements ``compute_reward`` is zero-shot."""
|
|
model, _ = _make_dummy_reward_model()
|
|
assert model.is_trainable is False
|
|
|
|
|
|
def test_is_trainable_true_when_forward_overridden():
|
|
"""Overriding ``forward`` flips ``is_trainable`` to True."""
|
|
|
|
class _TrainableReward(_DummyHubReward):
|
|
name = "_trainable_dummy_reward"
|
|
|
|
def forward(self, batch):
|
|
loss = (self.bias**2).sum()
|
|
return loss, {}
|
|
|
|
# Register a fresh config subclass so the subclass check passes.
|
|
@RewardModelConfig.register_subclass(name="_trainable_dummy_reward")
|
|
@dataclass
|
|
class _TrainableConfig(_DummyHubRewardConfig):
|
|
pass
|
|
|
|
_TrainableReward.config_class = _TrainableConfig
|
|
model = _TrainableReward(_TrainableConfig())
|
|
assert model.is_trainable is True
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# RewardModelConfig.from_pretrained
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_reward_model_config_from_pretrained_raises_when_config_missing(tmp_path):
|
|
"""``from_pretrained`` must surface a clear ``FileNotFoundError`` when the
|
|
target directory exists but does not contain ``config.json``, instead of
|
|
crashing later inside ``draccus.parse``.
|
|
"""
|
|
# tmp_path exists but has no config.json
|
|
with pytest.raises(FileNotFoundError, match="config.json not found"):
|
|
RewardModelConfig.from_pretrained(tmp_path)
|
|
|
|
|
|
def test_reward_model_config_from_pretrained_roundtrip(tmp_path):
|
|
"""Round-trip: save a RewardClassifierConfig, reload it, fields must match."""
|
|
from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig
|
|
|
|
original = RewardClassifierConfig(
|
|
num_classes=3,
|
|
hidden_dim=128,
|
|
latent_dim=64,
|
|
num_cameras=1,
|
|
learning_rate=5e-4,
|
|
)
|
|
original._save_pretrained(tmp_path)
|
|
|
|
loaded = RewardModelConfig.from_pretrained(tmp_path)
|
|
|
|
assert isinstance(loaded, RewardClassifierConfig)
|
|
assert loaded.num_classes == 3
|
|
assert loaded.hidden_dim == 128
|
|
assert loaded.latent_dim == 64
|
|
assert loaded.num_cameras == 1
|
|
assert loaded.learning_rate == 5e-4
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# TrainPipelineConfig — reward model training path
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_train_pipeline_config_path_fields_includes_reward_model():
|
|
"""``--reward_model.path=local/dir`` requires ``reward_model`` to be listed
|
|
as a draccus path-field on ``TrainPipelineConfig``."""
|
|
from lerobot.configs.train import TrainPipelineConfig
|
|
|
|
fields = TrainPipelineConfig.__get_path_fields__()
|
|
assert "policy" in fields
|
|
assert "reward_model" in fields
|
|
|
|
|
|
def test_train_pipeline_config_trainable_config_returns_reward_model_when_set():
|
|
"""When only ``reward_model`` is set, ``trainable_config`` (used by the
|
|
trainer for e.g. ``.device``) must return it — not ``None`` from ``policy``."""
|
|
from lerobot.configs.default import DatasetConfig
|
|
from lerobot.configs.train import TrainPipelineConfig
|
|
from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig
|
|
|
|
reward_cfg = RewardClassifierConfig(device="cpu")
|
|
cfg = TrainPipelineConfig(
|
|
dataset=DatasetConfig(repo_id="user/repo"),
|
|
reward_model=reward_cfg,
|
|
)
|
|
|
|
assert cfg.is_reward_model_training is True
|
|
assert cfg.trainable_config is reward_cfg
|
|
# This is what lerobot_train.py uses to decide force_cpu; ``cfg.policy.device``
|
|
# would AttributeError here because policy is None.
|
|
assert cfg.trainable_config.device == "cpu"
|
|
|
|
|
|
def test_train_pipeline_config_trainable_config_returns_policy_when_set():
|
|
"""Mirror of the reward-model case: when only ``policy`` is set,
|
|
``trainable_config`` must return it."""
|
|
from lerobot.configs.default import DatasetConfig
|
|
from lerobot.configs.train import TrainPipelineConfig
|
|
from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
|
|
|
|
policy_cfg = DiffusionConfig(device="cpu")
|
|
cfg = TrainPipelineConfig(
|
|
dataset=DatasetConfig(repo_id="user/repo"),
|
|
policy=policy_cfg,
|
|
)
|
|
|
|
assert cfg.is_reward_model_training is False
|
|
assert cfg.trainable_config is policy_cfg
|
|
assert cfg.trainable_config.device == "cpu"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# PreTrainedRewardModel hub upload: push_model_to_hub + generate_model_card.
|
|
# We test the generation side (offline) fully, and the upload side with HfApi
|
|
# mocked so nothing actually hits the network.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _make_dummy_reward_model(**config_kwargs):
|
|
return _DummyHubReward(_DummyHubRewardConfig(**config_kwargs)), _DummyHubRewardConfig
|
|
|
|
|
|
@pytest.fixture
|
|
def _offline_model_card(monkeypatch):
|
|
"""``ModelCard.validate`` does a live ``POST`` to huggingface.co — bypass it
|
|
so tests can run offline."""
|
|
from huggingface_hub import ModelCard
|
|
|
|
monkeypatch.setattr(ModelCard, "validate", lambda self, *a, **kw: None)
|
|
|
|
|
|
def test_reward_model_generate_model_card_renders_expected_fields(_offline_model_card):
|
|
"""``generate_model_card`` must produce a card with the right metadata and
|
|
body, using the dedicated reward-model template."""
|
|
model, _ = _make_dummy_reward_model(
|
|
license="mit",
|
|
tags=["robot", "sim"],
|
|
)
|
|
|
|
card = model.generate_model_card(
|
|
dataset_repo_id="user/my_dataset",
|
|
model_type=model.config.type,
|
|
license=model.config.license,
|
|
tags=model.config.tags,
|
|
)
|
|
|
|
# Metadata (YAML header) — ModelCardData fields.
|
|
assert card.data.license == "mit"
|
|
assert card.data.library_name == "lerobot"
|
|
assert card.data.pipeline_tag == "robotics"
|
|
assert "reward-model" in card.data.tags
|
|
assert model.config.type in card.data.tags
|
|
assert card.data.model_name == model.config.type
|
|
assert card.data.datasets == "user/my_dataset"
|
|
|
|
# Body — specific to the reward-model template, NOT the policy one.
|
|
body = str(card)
|
|
assert "Reward Model Card" in body
|
|
assert "This reward model has been trained" in body
|
|
assert "--reward_model.type=" in body # reward-model-specific usage block
|
|
|
|
|
|
def test_reward_model_generate_model_card_uses_default_license(_offline_model_card):
|
|
"""When config.license is None the card falls back to apache-2.0."""
|
|
model, _ = _make_dummy_reward_model()
|
|
|
|
card = model.generate_model_card(
|
|
dataset_repo_id="user/my_dataset",
|
|
model_type=model.config.type,
|
|
license=model.config.license,
|
|
tags=None,
|
|
)
|
|
|
|
assert card.data.license == "apache-2.0"
|
|
|
|
|
|
def test_reward_model_push_model_to_hub_uploads_expected_files(monkeypatch, _offline_model_card):
|
|
"""``push_model_to_hub`` must:
|
|
1. create the repo,
|
|
2. assemble a temp folder with weights + config.json + train_config.json + README.md,
|
|
3. call ``api.upload_folder`` on that folder.
|
|
All network calls are mocked.
|
|
"""
|
|
from huggingface_hub.constants import CONFIG_NAME
|
|
|
|
from lerobot.configs.default import DatasetConfig
|
|
from lerobot.configs.train import TRAIN_CONFIG_NAME, TrainPipelineConfig
|
|
|
|
model, _ = _make_dummy_reward_model(
|
|
repo_id="user/my_reward",
|
|
license="apache-2.0",
|
|
)
|
|
# Point the reward model's train config at a dummy dataset repo.
|
|
train_cfg = TrainPipelineConfig(
|
|
dataset=DatasetConfig(repo_id="user/my_dataset"),
|
|
reward_model=model.config,
|
|
)
|
|
|
|
uploaded: dict = {}
|
|
fake_commit_info = SimpleNamespace(repo_url=SimpleNamespace(url="https://huggingface.co/user/my_reward"))
|
|
|
|
class _FakeHfApi:
|
|
def create_repo(self, repo_id, private=None, exist_ok=False):
|
|
uploaded["create_repo_id"] = repo_id
|
|
uploaded["create_private"] = private
|
|
return SimpleNamespace(repo_id=repo_id)
|
|
|
|
def upload_folder(self, *, repo_id, repo_type, folder_path, commit_message, **_kwargs):
|
|
uploaded["upload_repo_id"] = repo_id
|
|
uploaded["upload_repo_type"] = repo_type
|
|
uploaded["commit_message"] = commit_message
|
|
# Snapshot files assembled in the temp folder — this is the real
|
|
# contract we care about.
|
|
uploaded["files"] = sorted(p.name for p in Path(folder_path).iterdir())
|
|
return fake_commit_info
|
|
|
|
from lerobot.rewards import pretrained as reward_pretrained
|
|
|
|
monkeypatch.setattr(reward_pretrained, "HfApi", lambda *a, **kw: _FakeHfApi())
|
|
|
|
model.push_model_to_hub(train_cfg)
|
|
|
|
assert uploaded["create_repo_id"] == "user/my_reward"
|
|
assert uploaded["upload_repo_id"] == "user/my_reward"
|
|
assert uploaded["upload_repo_type"] == "model"
|
|
assert uploaded["commit_message"] == "Upload reward model weights, train config and readme"
|
|
# Minimum required files that must be uploaded with a reward model.
|
|
assert CONFIG_NAME in uploaded["files"] # config.json
|
|
assert TRAIN_CONFIG_NAME in uploaded["files"] # train_config.json
|
|
assert "README.md" in uploaded["files"]
|
|
assert any(name.endswith(".safetensors") for name in uploaded["files"])
|