feat(rl): consolidate HIL-SERL checkpoint into HF-style components

Make  and  s, add abstract
 /  for algorithm-owned tensors (critics,
target nets, ), and persist them as a sibling
component next to . Replace the pickled
 side-file with an enriched
carrying both  and , so resume restores actor +
critics + target nets + temperature + optimizers + RNG + counters from
plain HF-standard files.
This commit is contained in:
Khalil Meftah
2026-05-08 21:24:23 +02:00
parent b1b2708e2f
commit 0944b84279
8 changed files with 382 additions and 24 deletions
+6
View File
@@ -68,6 +68,12 @@ class _DummyRLAlgorithm(RLAlgorithm):
def load_weights(self, weights, device="cpu") -> None:
_ = (weights, device)
def state_dict(self) -> dict[str, torch.Tensor]:
return {}
def load_state_dict(self, state_dict, device="cpu") -> None:
_ = (state_dict, device)
class _SimpleMixer:
def get_iterator(self, batch_size: int, async_prefetch: bool = True, queue_size: int = 2):