Fix eval action conversion for bf16 policies

2026-06-26 20:57:28 +00:00 · 2026-06-13 10:51:33 +08:00
parent fa984990c0
commit 4cfa762da8
2 changed files with 20 additions and 1 deletions
@@ -191,7 +191,7 @@ def rollout(
        action = action_transition[ACTION]

        # Convert to CPU / numpy.
-        action_numpy: np.ndarray = action.to("cpu").numpy()
+        action_numpy = _action_to_env_numpy(action)
        assert action_numpy.ndim == 2, "Action dimensions should be (batch, action_dim)"

        # Apply the next action.
@@ -261,6 +261,11 @@ def rollout(
    return ret


+def _action_to_env_numpy(action: Tensor) -> np.ndarray:
+    """Convert policy actions to a NumPy array accepted by Gym environments."""
+    return action.detach().to(device="cpu", dtype=torch.float32).numpy()
+
+
 def eval_policy(
    env: gym.vector.VectorEnv,
    policy: PreTrainedPolicy,
@@ -0,0 +1,14 @@
+import numpy as np
+import torch
+
+from lerobot.scripts.lerobot_eval import _action_to_env_numpy
+
+
+def test_action_to_env_numpy_casts_bfloat16_to_float32():
+    action = torch.tensor([[0.5, -1.0]], dtype=torch.bfloat16)
+
+    action_numpy = _action_to_env_numpy(action)
+
+    assert action_numpy.shape == (1, 2)
+    assert action_numpy.dtype == np.float32
+    np.testing.assert_allclose(action_numpy, np.array([[0.5, -1.0]], dtype=np.float32))