Fix eval action conversion for bf16 policies

This commit is contained in:
javadcc_mac
2026-06-13 10:51:33 +08:00
parent fa984990c0
commit 4cfa762da8
2 changed files with 20 additions and 1 deletions
+6 -1
View File
@@ -191,7 +191,7 @@ def rollout(
action = action_transition[ACTION]
# Convert to CPU / numpy.
action_numpy: np.ndarray = action.to("cpu").numpy()
action_numpy = _action_to_env_numpy(action)
assert action_numpy.ndim == 2, "Action dimensions should be (batch, action_dim)"
# Apply the next action.
@@ -261,6 +261,11 @@ def rollout(
return ret
def _action_to_env_numpy(action: Tensor) -> np.ndarray:
"""Convert policy actions to a NumPy array accepted by Gym environments."""
return action.detach().to(device="cpu", dtype=torch.float32).numpy()
def eval_policy(
env: gym.vector.VectorEnv,
policy: PreTrainedPolicy,
+14
View File
@@ -0,0 +1,14 @@
import numpy as np
import torch
from lerobot.scripts.lerobot_eval import _action_to_env_numpy
def test_action_to_env_numpy_casts_bfloat16_to_float32():
action = torch.tensor([[0.5, -1.0]], dtype=torch.bfloat16)
action_numpy = _action_to_env_numpy(action)
assert action_numpy.shape == (1, 2)
assert action_numpy.dtype == np.float32
np.testing.assert_allclose(action_numpy, np.array([[0.5, -1.0]], dtype=np.float32))