diff --git a/src/lerobot/scripts/eval.py b/src/lerobot/scripts/eval.py index 87058abb8..e3014d924 100644 --- a/src/lerobot/scripts/eval.py +++ b/src/lerobot/scripts/eval.py @@ -57,6 +57,7 @@ from dataclasses import asdict from pathlib import Path from pprint import pformat from typing import Any +from typing import Any import einops import gymnasium as gym @@ -71,7 +72,9 @@ from lerobot.configs.eval import EvalPipelineConfig from lerobot.envs.factory import make_env from lerobot.envs.utils import add_envs_task, check_env_attributes_and_types, preprocess_observation from lerobot.policies.factory import make_policy, make_pre_post_processors +from lerobot.policies.factory import make_policy, make_pre_post_processors from lerobot.policies.pretrained import PreTrainedPolicy +from lerobot.policies.utils import get_device_from_parameters from lerobot.processor.core import TransitionKey from lerobot.processor.pipeline import PolicyProcessorPipeline from lerobot.utils.io_utils import write_video @@ -88,6 +91,8 @@ def rollout( policy: PreTrainedPolicy, preprocessor: PolicyProcessorPipeline[dict[str, Any]], postprocessor: PolicyProcessorPipeline[dict[str, Any]], + preprocessor: PolicyProcessorPipeline[dict[str, Any]], + postprocessor: PolicyProcessorPipeline[dict[str, Any]], seeds: list[int] | None = None, return_observations: bool = False, render_callback: Callable[[gym.vector.VectorEnv], None] | None = None, @@ -151,6 +156,7 @@ def rollout( while not np.all(done): # Numpy array to tensor and changing dictionary keys to LeRobot policy format. observation = preprocess_observation(observation) + observation = preprocessor(observation) if return_observations: all_observations.append(deepcopy(observation)) @@ -161,9 +167,11 @@ def rollout( with torch.inference_mode(): action = policy.select_action(observation) action: torch.Tensor = postprocessor({TransitionKey.ACTION: action})[TransitionKey.ACTION] + action: torch.Tensor = postprocessor({TransitionKey.ACTION: action})[TransitionKey.ACTION] # Convert to CPU / numpy. action: np.ndarray = action.to("cpu").numpy() + action: np.ndarray = action.to("cpu").numpy() assert action.ndim == 2, "Action dimensions should be (batch, action_dim)" # Apply the next action. @@ -222,6 +230,8 @@ def eval_policy( policy: PreTrainedPolicy, preprocessor: PolicyProcessorPipeline, postprocessor: PolicyProcessorPipeline, + preprocessor: PolicyProcessorPipeline, + postprocessor: PolicyProcessorPipeline, n_episodes: int, max_episodes_rendered: int = 0, videos_dir: Path | None = None, @@ -298,6 +308,10 @@ def eval_policy( start_seed + (batch_ix * env.num_envs), start_seed + ((batch_ix + 1) * env.num_envs) ) rollout_data = rollout( + env=env, + policy=policy, + preprocessor=preprocessor, + postprocessor=postprocessor, env=env, policy=policy, preprocessor=preprocessor, @@ -484,13 +498,22 @@ def eval_main(cfg: EvalPipelineConfig): env_cfg=cfg.env, ) + policy.eval() preprocessor, postprocessor = make_pre_post_processors( policy_cfg=cfg.policy, pretrained_path=cfg.policy.pretrained_path ) + preprocessor, postprocessor = make_pre_post_processors( + policy_cfg=cfg.policy, pretrained_path=cfg.policy.pretrained_path + ) with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext(): info = eval_policy( + env=env, + policy=policy, + preprocessor=preprocessor, + postprocessor=postprocessor, + n_episodes=cfg.eval.n_episodes, env=env, policy=policy, preprocessor=preprocessor,