Files
lerobot/tests/rl/test_trainer.py
T
2026-04-27 16:53:34 +02:00

128 lines
3.9 KiB
Python

#!/usr/bin/env python
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])")
import torch # noqa: E402
from torch import Tensor # noqa: E402
from lerobot.rl.algorithms.base import RLAlgorithm # noqa: E402
from lerobot.rl.algorithms.configs import TrainingStats # noqa: E402
from lerobot.rl.trainer import RLTrainer # noqa: E402
from lerobot.utils.constants import ACTION, OBS_STATE # noqa: E402
class _DummyRLAlgorithmConfig:
"""Dummy config for testing."""
class _DummyRLAlgorithm(RLAlgorithm):
config_class = _DummyRLAlgorithmConfig
name = "dummy_rl_algorithm"
def __init__(self):
self.configure_calls = 0
self.update_calls = 0
def select_action(self, observation: dict[str, Tensor]) -> Tensor:
return torch.zeros(1)
def configure_data_iterator(
self,
data_mixer,
batch_size: int,
*,
async_prefetch: bool = True,
queue_size: int = 2,
):
self.configure_calls += 1
return data_mixer.get_iterator(
batch_size=batch_size,
async_prefetch=async_prefetch,
queue_size=queue_size,
)
def make_optimizers_and_scheduler(self):
return {}
def update(self, batch_iterator):
self.update_calls += 1
_ = next(batch_iterator)
return TrainingStats(losses={"dummy": 1.0})
def load_weights(self, weights, device="cpu") -> None:
_ = (weights, device)
class _SimpleMixer:
def get_iterator(self, batch_size: int, async_prefetch: bool = True, queue_size: int = 2):
_ = (async_prefetch, queue_size)
while True:
yield {
"state": {OBS_STATE: torch.randn(batch_size, 3)},
ACTION: torch.randn(batch_size, 2),
"reward": torch.randn(batch_size),
"next_state": {OBS_STATE: torch.randn(batch_size, 3)},
"done": torch.zeros(batch_size),
"truncated": torch.zeros(batch_size),
"complementary_info": None,
}
def test_trainer_lazy_iterator_lifecycle_and_reset():
algo = _DummyRLAlgorithm()
mixer = _SimpleMixer()
trainer = RLTrainer(algorithm=algo, data_mixer=mixer, batch_size=4)
# First call builds iterator once.
trainer.training_step()
assert algo.configure_calls == 1
assert algo.update_calls == 1
# Second call reuses existing iterator.
trainer.training_step()
assert algo.configure_calls == 1
assert algo.update_calls == 2
# Explicit reset forces lazy rebuild on next step.
trainer.reset_data_iterator()
trainer.training_step()
assert algo.configure_calls == 2
assert algo.update_calls == 3
def test_trainer_set_data_mixer_resets_by_default():
algo = _DummyRLAlgorithm()
mixer_a = _SimpleMixer()
mixer_b = _SimpleMixer()
trainer = RLTrainer(algorithm=algo, data_mixer=mixer_a, batch_size=2)
trainer.training_step()
assert algo.configure_calls == 1
trainer.set_data_mixer(mixer_b, reset=True)
trainer.training_step()
assert algo.configure_calls == 2
def test_algorithm_optimization_step_contract_defaults():
algo = _DummyRLAlgorithm()
assert algo.optimization_step == 0
algo.optimization_step = 11
assert algo.optimization_step == 11