fix(sac): make temperature a property to fix checkpoint resume bug (#2877)

* fix(sac): make temperature a property to fix checkpoint resume bug Temperature was stored as a plain float and not restored after loading a checkpoint, causing incorrect loss computations until update_temperature() was called. Changed to a property that always computes from log_alpha, ensuring correct behavior after checkpoint loading. * simplify docstrings
2026-07-12 12:32:02 +00:00 · 2026-01-30 12:23:22 +01:00
parent 3409ef0dc2
commit 04cbf669cf
3 changed files with 8 additions and 9 deletions
@@ -441,12 +441,13 @@ def test_sac_policy_with_predefined_entropy():


 def test_sac_policy_update_temperature():
+    """Test that temperature property is always in sync with log_alpha."""
    config = create_default_config(continuous_action_dim=10, state_dim=10)
    policy = SACPolicy(config=config)

    assert policy.temperature == pytest.approx(1.0)
    policy.log_alpha.data = torch.tensor([math.log(0.1)])
-    policy.update_temperature()
+    # Temperature property automatically reflects log_alpha changes
    assert policy.temperature == pytest.approx(0.1)