diff --git a/src/lerobot/scripts/lerobot_eval.py b/src/lerobot/scripts/lerobot_eval.py index d45483d21..440c96d3c 100644 --- a/src/lerobot/scripts/lerobot_eval.py +++ b/src/lerobot/scripts/lerobot_eval.py @@ -191,7 +191,7 @@ def rollout( action = action_transition[ACTION] # Convert to CPU / numpy. - action_numpy: np.ndarray = action.to("cpu").numpy() + action_numpy = _action_to_env_numpy(action) assert action_numpy.ndim == 2, "Action dimensions should be (batch, action_dim)" # Apply the next action. @@ -261,6 +261,11 @@ def rollout( return ret +def _action_to_env_numpy(action: Tensor) -> np.ndarray: + """Convert policy actions to a NumPy array accepted by Gym environments.""" + return action.detach().to(device="cpu", dtype=torch.float32).numpy() + + def eval_policy( env: gym.vector.VectorEnv, policy: PreTrainedPolicy, diff --git a/tests/scripts/test_lerobot_eval.py b/tests/scripts/test_lerobot_eval.py new file mode 100644 index 000000000..fd41296c5 --- /dev/null +++ b/tests/scripts/test_lerobot_eval.py @@ -0,0 +1,14 @@ +import numpy as np +import torch + +from lerobot.scripts.lerobot_eval import _action_to_env_numpy + + +def test_action_to_env_numpy_casts_bfloat16_to_float32(): + action = torch.tensor([[0.5, -1.0]], dtype=torch.bfloat16) + + action_numpy = _action_to_env_numpy(action) + + assert action_numpy.shape == (1, 2) + assert action_numpy.dtype == np.float32 + np.testing.assert_allclose(action_numpy, np.array([[0.5, -1.0]], dtype=np.float32))