This commit is contained in:
Pepijn
2025-09-11 17:49:36 +02:00
8 changed files with 585 additions and 297 deletions
+58
View File
@@ -0,0 +1,58 @@
#!/bin/bash
# storage / caches
RAID=/raid/jade
export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
export HF_HOME=$RAID/.cache/huggingface
export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
export WANDB_CACHE_DIR=$RAID/.cache/wandb
export TMPDIR=$RAID/.cache/tmp
mkdir -p $TMPDIR
export WANDB_MODE=offline
export HF_DATASETS_OFFLINE=1
export HF_HUB_OFFLINE=1
export TOKENIZERS_PARALLELISM=false
export MUJOCO_GL=egl
export CUDA_VISIBLE_DEVICES=2
# CONFIGURATION
POLICY_PATH="/raid/jade/logs/lerobot/lerobot_2_HuggingFaceVLA_libero_smolvla_lr1e-4bs32steps100000/checkpoints/100000/pretrained_model"
POLICY_PATH="/raid/jade/models/smolvlamust"
TASK=libero_spatial,libero_object
ENV_TYPE="libero"
BATCH_SIZE=1
N_EPISODES=1
# storage / caches
RAID=/raid/jade
N_ACTION_STEPS=1
export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
export HF_HOME=$RAID/.cache/huggingface
export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
export WANDB_CACHE_DIR=$RAID/.cache/wandb
export TMPDIR=$RAID/.cache/tmp
mkdir -p $TMPDIR
export WANDB_MODE=offline
# export HF_DATASETS_OFFLINE=1
# export HF_HUB_OFFLINE=1
export TOKENIZERS_PARALLELISM=false
export MUJOCO_GL=egl
export MUJOCO_GL=egl
unset HF_HUB_OFFLINE
# RUN EVALUATION
python src/lerobot/scripts/eval.py \
--policy.path="$POLICY_PATH" \
--env.type="$ENV_TYPE" \
--eval.batch_size="$BATCH_SIZE" \
--eval.n_episodes="$N_EPISODES" \
--env.multitask_eval=True \
--env.task=$TASK \
# python examples/evaluate_libero.py \
# --policy_path "$POLICY_PATH" \
# --task_suite_name "$TASK" \
# --num_steps_wait 10 \
# --num_trials_per_task 10 \
# --video_out_path "data/libero/videos" \
# --device "cuda" \
# --seed 7
-2
View File
@@ -320,8 +320,6 @@ class LiberoEnv(EnvConfig):
@property @property
def gym_kwargs(self) -> dict: def gym_kwargs(self) -> dict:
return { return {
# "task": self.task,
"obs_type": self.obs_type, "obs_type": self.obs_type,
"render_mode": self.render_mode, "render_mode": self.render_mode,
# "max_episode_steps": self.episode_length,
} }
+14 -12
View File
@@ -57,15 +57,14 @@ def make_env(
""" """
if n_envs < 1: if n_envs < 1:
raise ValueError("`n_envs must be at least 1") raise ValueError("`n_envs` must be at least 1")
# batched version of the env that returns an observation of shape (b, c)
env_cls = gym.vector.AsyncVectorEnv if use_async_envs else gym.vector.SyncVectorEnv env_cls = gym.vector.AsyncVectorEnv if use_async_envs else gym.vector.SyncVectorEnv
if "libero" in cfg.type: if "libero" in cfg.type:
from lerobot.envs.libero import create_libero_envs from lerobot.envs.libero import create_libero_envs
env = create_libero_envs( return create_libero_envs(
task=cfg.task, task=cfg.task,
n_envs=n_envs, n_envs=n_envs,
camera_name=cfg.camera_name, camera_name=cfg.camera_name,
@@ -74,19 +73,22 @@ def make_env(
env_cls=env_cls, env_cls=env_cls,
multitask_eval=cfg.multitask_eval, multitask_eval=cfg.multitask_eval,
) )
else:
package_name = f"gym_{cfg.type}" package_name = f"gym_{cfg.type}"
try: try:
importlib.import_module(package_name) importlib.import_module(package_name)
except ModuleNotFoundError as e: except ModuleNotFoundError as e:
print( raise ModuleNotFoundError(
f"{package_name} is not installed. Please install it with `pip install 'lerobot[{cfg.type}]'`" f'{package_name} is not installed. Install with: pip install "lerobot[{cfg.type}]"'
) ) from e
raise e
gym_handle = f"{package_name}/{cfg.task}" gym_handle = f"{package_name}/{cfg.task}"
env = env_cls(
[lambda: gym.make(gym_handle, disable_env_checker=True, **cfg.gym_kwargs) for _ in range(n_envs)]
)
return env def _make_one():
return gym.make(gym_handle, disable_env_checker=True, **(cfg.gym_kwargs or {}))
vec = env_cls([_make_one for _ in range(n_envs)])
# normalize to {suite: {task_id: vec_env}} for consistency
suite_name = cfg.type # e.g., "pusht", "aloha"
return {suite_name: {0: vec}}
+276 -104
View File
@@ -1,7 +1,10 @@
from __future__ import annotations
import logging
import math import math
import os import os
from collections import defaultdict from collections import defaultdict
from collections.abc import Callable from collections.abc import Callable, Iterable, Mapping, Sequence
from itertools import chain from itertools import chain
from typing import Any from typing import Any
@@ -12,98 +15,164 @@ from gymnasium import spaces
from libero.libero import benchmark, get_libero_path from libero.libero import benchmark, get_libero_path
from libero.libero.envs import OffScreenRenderEnv from libero.libero.envs import OffScreenRenderEnv
logger = logging.getLogger(__name__)
# ---- Helpers -----------------------------------------------------------------
def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
"""Normalize camera_name into a non-empty list of strings."""
if isinstance(camera_name, str):
cams = [c.strip() for c in camera_name.split(",") if c.strip()]
elif isinstance(camera_name, (list, tuple)):
cams = [str(c).strip() for c in camera_name if str(c).strip()]
else:
raise TypeError(f"camera_name must be str or sequence[str], got {type(camera_name).__name__}")
if not cams:
raise ValueError("camera_name resolved to an empty list.")
return cams
def _get_suite(name: str):
"""Instantiate a LIBERO suite by name with clear validation."""
bench = benchmark.get_benchmark_dict()
if name not in bench:
raise ValueError(f"Unknown LIBERO suite '{name}'. Available: {', '.join(sorted(bench.keys()))}")
suite = bench[name]()
if not getattr(suite, "tasks", None):
raise ValueError(f"Suite '{name}' has no tasks.")
return suite
def _select_task_ids(total_tasks: int, task_ids: Iterable[int] | None) -> list[int]:
"""Validate/normalize task ids. If None → all tasks."""
if task_ids is None:
return list(range(total_tasks))
ids = sorted({int(t) for t in task_ids})
for t in ids:
if t < 0 or t >= total_tasks:
raise ValueError(f"task_id {t} out of range [0, {total_tasks - 1}].")
return ids
def _make_env_fns(
*,
suite,
suite_name: str,
task_id: int,
n_envs: int,
camera_names: list[str],
init_states: bool,
gym_kwargs: Mapping[str, Any],
LiberoEnv: type, # injected to avoid forward ref issues if needed
) -> list[Callable[[], LiberoEnv]]:
"""Build n_envs factory callables for a single (suite, task_id)."""
joined_cams = ",".join(camera_names) # keep backward-compat: downstream expects a string
fns: list[Callable[[], LiberoEnv]] = []
for i in range(n_envs):
def _mk(
i=i,
suite=suite,
task_id=task_id,
suite_name=suite_name,
joined_cams=joined_cams,
init_states=init_states,
gym_kwargs=dict(gym_kwargs),
):
return LiberoEnv(
task_suite=suite,
task_id=task_id,
task_suite_name=suite_name,
camera_name=joined_cams,
init_states=init_states,
episode_index=i,
**gym_kwargs,
)
fns.append(_mk)
return fns
# ---- Main API ----------------------------------------------------------------
def create_libero_envs( def create_libero_envs(
task: str, task: str,
n_envs: int, n_envs: int,
gym_kwargs: dict[str, Any] = None, gym_kwargs: dict[str, Any] | None = None,
camera_name: str = "agentview_image,robot0_eye_in_hand_image", camera_name: str | Sequence[str] = "agentview_image,robot0_eye_in_hand_image",
init_states: bool = True, init_states: bool = True,
env_cls: Callable = None, env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None,
multitask_eval: bool = True, multitask_eval: bool = True, # kept for signature compatibility; return type is consistent regardless
) -> dict[str, dict[str, Any]]: ) -> dict[str, dict[int, Any]]:
""" """
Here n_envs is per task and equal to the number of rollouts. Create vectorized LIBERO environments with a consistent return shape.
Returns:
dict[str, dict[str, list[LiberoEnv]]]: keys are task_suite and values are list of LiberoEnv envs.
"""
print("num envs", n_envs)
print("multitask_eval", multitask_eval)
print("gym_kwargs", gym_kwargs)
if gym_kwargs is None:
gym_kwargs = {}
if not multitask_eval: Returns:
benchmark_dict = benchmark.get_benchmark_dict() dict[suite_name][task_id] -> vec_env (env_cls([...]) with exactly n_envs factories)
task_suite = benchmark_dict[task]() # can also choose libero_spatial, libero_object, libero_10 etc. Notes:
tasks_id = list(range(len(task_suite.tasks))) - n_envs is the number of rollouts *per task* (episode_index = 0..n_envs-1).
episode_indices = [0 for i in range(len(tasks_id))] - `task` can be a single suite or a comma-separated list of suites.
if len(tasks_id) == 1: - You may pass `task_ids` (list[int]) inside `gym_kwargs` to restrict tasks per suite.
tasks_id = [tasks_id[0] for _ in range(n_envs)] """
episode_indices = list(range(n_envs)) if env_cls is None or not callable(env_cls):
elif len(tasks_id) < n_envs and n_envs % len(tasks_id) == 0: raise ValueError("env_cls must be a callable that wraps a list of environment factory callables.")
n_repeat = n_envs // len(tasks_id) if not isinstance(n_envs, int) or n_envs <= 0:
print("n_repeat", n_repeat) raise ValueError(f"n_envs must be a positive int; got {n_envs}.")
episode_indices = []
for _ in range(len(tasks_id)): gym_kwargs = dict(gym_kwargs or {})
episode_indices.extend(list(range(n_repeat))) task_ids_filter = gym_kwargs.pop("task_ids", None) # optional: limit to specific tasks
tasks_id = list(chain.from_iterable([[item] * n_repeat for item in tasks_id]))
elif n_envs < len(tasks_id): # Avoid circular import/type issues: assume LiberoEnv is defined in this module
tasks_id = tasks_id[:n_envs] try:
episode_indices = list(range(n_envs))[:n_envs] LiberoEnv # type: ignore[name-defined]
print(f"WARNING: n_envs < len(tasks_id), evaluating only on {tasks_id}") except NameError:
print(f"Creating Libero envs with task ids {tasks_id} from suite {task}") # If LiberoEnv is in the same file, this won't run. If it's elsewhere, import here.
assert n_envs == len(tasks_id), ( exit()
f"len(n_envs) and tasks_id should be the same, got {n_envs} and {len(tasks_id)}" # from .libero_env import LiberoEnv # adjust if your class lives in another module
camera_names = _parse_camera_names(camera_name)
suite_names = [s.strip() for s in str(task).split(",") if s.strip()]
if not suite_names:
raise ValueError("`task` must contain at least one LIBERO suite name.")
logger.info(
"Creating LIBERO envs | suites=%s | n_envs(per task)=%d | init_states=%s | multitask_eval=%s",
suite_names,
n_envs,
init_states,
bool(multitask_eval),
) )
return env_cls( if task_ids_filter is not None:
[ logger.info("Restricting to task_ids=%s", task_ids_filter)
lambda i=i: LiberoEnv(
task_suite=task_suite, out: dict[str, dict[int, Any]] = defaultdict(dict)
task_id=tasks_id[i],
task_suite_name=task, for suite_name in suite_names:
camera_name=camera_name, suite = _get_suite(suite_name)
total = len(suite.tasks)
selected = _select_task_ids(total, task_ids_filter)
if not selected:
raise ValueError(f"No tasks selected for suite '{suite_name}' (available: {total}).")
for tid in selected:
fns = _make_env_fns(
suite=suite,
suite_name=suite_name,
task_id=tid,
n_envs=n_envs,
camera_names=camera_names,
init_states=init_states, init_states=init_states,
episode_index=episode_indices[i], gym_kwargs=gym_kwargs,
**gym_kwargs, LiberoEnv=LiberoEnv,
) )
for i in range(n_envs) out[suite_name][tid] = env_cls(fns)
] logger.debug("Built vec env | suite=%s | task_id=%d | n_envs=%d", suite_name, tid, n_envs)
)
else: # return plain dicts for predictability
envs = defaultdict(dict) return {suite: dict(task_map) for suite, task_map in out.items()}
benchmark_dict = benchmark.get_benchmark_dict()
task = task.split(",")
for _task in task:
task_suite = benchmark_dict[
_task
]() # can also choose libero_spatial, libero_object, libero_10 etc.
tasks_ids = list(range(len(task_suite.tasks)))
for tasks_id in tasks_ids:
episode_indices = list(range(n_envs))
print(
f"Creating Libero envs with task ids {tasks_id} from suite {_task}, episode_indices: {episode_indices}"
)
envs_list = [
(
lambda i=i,
task_suite=task_suite,
tasks_id=tasks_id,
_task=_task,
episode_indices=episode_indices: LiberoEnv(
task_suite=task_suite,
task_id=tasks_id,
task_suite_name=_task,
camera_name=camera_name,
init_states=init_states,
episode_index=episode_indices[i],
**gym_kwargs,
)
)
for i in range(n_envs)
]
envs[_task][tasks_id] = env_cls(envs_list)
return envs
def quat2axisangle(quat): def quat2axisangle(quat):
@@ -199,17 +268,15 @@ class LiberoEnv(gym.Env):
self.episode_index = episode_index self.episode_index = episode_index
self._env = self._make_envs_task(task_suite, self.task_id) self._env = self._make_envs_task(task_suite, self.task_id)
if task_suite_name == "libero_spatial": TASK_SUITE_MAX_STEPS: dict[str, int] = {
max_steps = 220 # longest training demo has 193 steps "libero_spatial": 220, # longest training demo has 193 steps
elif task_suite_name == "libero_object": "libero_object": 280, # longest training demo has 254 steps
max_steps = 280 # longest training demo has 254 steps "libero_goal": 300, # longest training demo has 270 steps
elif task_suite_name == "libero_goal": "libero_10": 520, # longest training demo has 505 steps
max_steps = 300 # longest training demo has 270 steps "libero_90": 400, # longest training demo has 373 steps
elif task_suite_name == "libero_10": }
max_steps = 520 # longest training demo has 505 steps default_steps = 500
elif task_suite_name == "libero_90": self._max_episode_steps = TASK_SUITE_MAX_STEPS.get(task_suite_name, default_steps)
max_steps = 400 # longest training demo has 373 steps
self._max_episode_steps = max_steps
images = {} images = {}
for cam in self.camera_name: for cam in self.camera_name:
@@ -221,7 +288,11 @@ class LiberoEnv(gym.Env):
) )
if self.obs_type == "state": if self.obs_type == "state":
raise NotImplementedError() raise NotImplementedError(
"The 'state' observation type is not supported in LiberoEnv. "
"Please switch to an image-based obs_type (e.g. 'pixels', 'pixels_agent_pos')."
)
elif self.obs_type == "pixels": elif self.obs_type == "pixels":
self.observation_space = spaces.Dict( self.observation_space = spaces.Dict(
{ {
@@ -245,9 +316,8 @@ class LiberoEnv(gym.Env):
def render(self): def render(self):
raw_obs = self._env.env._get_observations() raw_obs = self._env.env._get_observations()
formatted = self._format_raw_obs(raw_obs) image = self._format_raw_obs(raw_obs)["pixels"]["image"]
# grab the "main" camera return image
return formatted["pixels"]["image"]
def _make_envs_task(self, task_suite, task_id: int = 0): def _make_envs_task(self, task_suite, task_id: int = 0):
task = task_suite.get_task(task_id) task = task_suite.get_task(task_id)
@@ -277,7 +347,6 @@ class LiberoEnv(gym.Env):
image = raw_obs[camera_name] image = raw_obs[camera_name]
image = image[::-1, ::-1] # rotate 180 degrees image = image[::-1, ::-1] # rotate 180 degrees
images[self.camera_name_mapping[camera_name]] = image images[self.camera_name_mapping[camera_name]] = image
# images = image if len(images) == 1 else images
state = np.concatenate( state = np.concatenate(
( (
raw_obs["robot0_eef_pos"], raw_obs["robot0_eef_pos"],
@@ -287,7 +356,10 @@ class LiberoEnv(gym.Env):
) )
agent_pos = state agent_pos = state
if self.obs_type == "state": if self.obs_type == "state":
raise NotImplementedError() raise NotImplementedError(
"The 'state' observation type is not supported in LiberoEnv. "
"Please switch to an image-based obs_type (e.g. 'pixels', 'pixels_agent_pos')."
)
elif self.obs_type == "pixels": elif self.obs_type == "pixels":
obs = {"pixels": images.copy()} obs = {"pixels": images.copy()}
elif self.obs_type == "pixels_agent_pos": elif self.obs_type == "pixels_agent_pos":
@@ -310,16 +382,116 @@ class LiberoEnv(gym.Env):
return observation, info return observation, info
def step(self, action): def step(self, action):
assert action.ndim == 1 if action.ndim != 1:
action[-1] = 1.0 - action[-1] raise ValueError(
f"Expected action to be 1-D (shape (action_dim,)), "
f"but got shape {action.shape} with ndim={action.ndim}"
)
raw_obs, reward, done, info = self._env.step(action) raw_obs, reward, done, info = self._env.step(action)
is_success = self._env.check_success() is_success = self._env.check_success()
terminated = done or is_success terminated = done or is_success
info["is_success"] = is_success info["is_success"] = done # is_success
observation = self._format_raw_obs(raw_obs) observation = self._format_raw_obs(raw_obs)
if done:
self.reset()
print(self.task, self.task_id, done, is_success)
truncated = False truncated = False
# note if it is unable to complete get libero error after many steps
return observation, reward, terminated, truncated, info return observation, reward, terminated, truncated, info
def close(self): def close(self):
self._env.close() self._env.close()
def create_libero_envs1(
task: str,
n_envs: int,
gym_kwargs: dict[str, Any] = None,
camera_name: str = "agentview_image,robot0_eye_in_hand_image",
init_states: bool = True,
env_cls: Callable = None,
multitask_eval: bool = True,
) -> dict[str, dict[str, Any]]:
"""
Here n_envs is per task and equal to the number of rollouts.
Returns:
dict[str, dict[str, list[LiberoEnv]]]: keys are task_suite and values are list of LiberoEnv envs.
"""
print("num envs", n_envs)
print("multitask_eval", multitask_eval)
print("gym_kwargs", gym_kwargs)
if gym_kwargs is None:
gym_kwargs = {}
if not multitask_eval:
benchmark_dict = benchmark.get_benchmark_dict()
task_suite = benchmark_dict[task]() # can also choose libero_spatial, libero_object, libero_10 etc.
tasks_id = list(range(len(task_suite.tasks)))
episode_indices = [0 for i in range(len(tasks_id))]
if len(tasks_id) == 1:
tasks_id = [tasks_id[0] for _ in range(n_envs)]
episode_indices = list(range(n_envs))
elif len(tasks_id) < n_envs and n_envs % len(tasks_id) == 0:
n_repeat = n_envs // len(tasks_id)
print("n_repeat", n_repeat)
episode_indices = []
for _ in range(len(tasks_id)):
episode_indices.extend(list(range(n_repeat)))
tasks_id = list(chain.from_iterable([[item] * n_repeat for item in tasks_id]))
elif n_envs < len(tasks_id):
tasks_id = tasks_id[:n_envs]
episode_indices = list(range(n_envs))[:n_envs]
print(f"WARNING: n_envs < len(tasks_id), evaluating only on {tasks_id}")
print(f"Creating Libero envs with task ids {tasks_id} from suite {task}")
assert n_envs == len(tasks_id), (
f"len(n_envs) and tasks_id should be the same, got {n_envs} and {len(tasks_id)}"
)
return env_cls(
[
lambda i=i: LiberoEnv(
task_suite=task_suite,
task_id=tasks_id[i],
task_suite_name=task,
camera_name=camera_name,
init_states=init_states,
episode_index=episode_indices[i],
**gym_kwargs,
)
for i in range(n_envs)
]
)
else:
envs = defaultdict(dict)
benchmark_dict = benchmark.get_benchmark_dict()
task = task.split(",")
for _task in task:
task_suite = benchmark_dict[
_task
]() # can also choose libero_spatial, libero_object, libero_10 etc.
tasks_ids = list(range(len(task_suite.tasks)))
for tasks_id in tasks_ids:
episode_indices = list(range(n_envs))
print(
f"Creating Libero envs with task ids {tasks_id} from suite {_task}, episode_indices: {episode_indices}"
)
envs_list = [
(
lambda i=i,
task_suite=task_suite,
tasks_id=tasks_id,
_task=_task,
episode_indices=episode_indices: LiberoEnv(
task_suite=task_suite,
task_id=tasks_id,
task_suite_name=_task,
camera_name=camera_name,
init_states=init_states,
episode_index=episode_indices[i],
**gym_kwargs,
)
)
for i in range(n_envs)
]
envs[_task][tasks_id] = env_cls(envs_list)
return envs
+46
View File
@@ -134,3 +134,49 @@ def add_envs_task(env: gym.vector.VectorEnv, observation: dict[str, Any]) -> dic
num_envs = observation[list(observation.keys())[0]].shape[0] num_envs = observation[list(observation.keys())[0]].shape[0]
observation["task"] = ["" for _ in range(num_envs)] observation["task"] = ["" for _ in range(num_envs)]
return observation return observation
def _close_single_env(env: Any) -> None:
"""Try to close a single env object if it exposes .close()."""
try:
close_fn = getattr(env, "close", None)
if callable(close_fn):
close_fn()
except Exception as exc:
# Best-effort close: log but don't raise
LOG.debug("Exception while closing env %s: %s", env, exc)
def close_envs(env_or_collection: Any) -> None:
"""
Close a single env or any nested structure of envs.
Accepts:
- a single env with .close()
- a Mapping of things (e.g. dict)
- a Sequence of things (list/tuple) but NOT str/bytes
- nested combinations of the above
This is intentionally permissive and best-effort: it will swallow exceptions
encountered while closing individual envs and continue.
"""
# Guard: single object with close()
if hasattr(env_or_collection, "close") and not isinstance(env_or_collection, (Mapping, Sequence)):
_close_single_env(env_or_collection)
return
# Mapping (e.g., {suite: {task_id: vec_env}})
if isinstance(env_or_collection, Mapping):
for v in env_or_collection.values():
close_envs(v)
return
# Sequence (list/tuple) but skip str/bytes
if isinstance(env_or_collection, Sequence) and not isinstance(env_or_collection, (str, bytes)):
for v in env_or_collection:
close_envs(v)
return
# Fallback: try to close if possible
if hasattr(env_or_collection, "close"):
_close_single_env(env_or_collection)
-2
View File
@@ -186,7 +186,5 @@ def make_policy(
policy.to(cfg.device) policy.to(cfg.device)
assert isinstance(policy, nn.Module) assert isinstance(policy, nn.Module)
# policy = torch.compile(policy, mode="reduce-overhead") # policy = torch.compile(policy, mode="reduce-overhead")
return policy return policy
+138 -109
View File
@@ -46,17 +46,19 @@ Note that in both examples, the repo/folder should contain at least `config.json
You can learn about the CLI options for this script in the `EvalPipelineConfig` in lerobot/configs/eval.py You can learn about the CLI options for this script in the `EvalPipelineConfig` in lerobot/configs/eval.py
""" """
import concurrent import concurrent.futures as cf
import json import json
import logging import logging
import threading import threading
import time import time
from collections.abc import Callable from collections import defaultdict
from collections.abc import Callable, Iterator
from contextlib import nullcontext from contextlib import nullcontext
from copy import deepcopy from copy import deepcopy
from dataclasses import asdict from dataclasses import asdict
from pathlib import Path from pathlib import Path
from pprint import pformat from pprint import pformat
from typing import TypedDict
import einops import einops
import gymnasium as gym import gymnasium as gym
@@ -69,7 +71,11 @@ from tqdm import trange
from lerobot.configs import parser from lerobot.configs import parser
from lerobot.configs.eval import EvalPipelineConfig from lerobot.configs.eval import EvalPipelineConfig
from lerobot.envs.factory import make_env from lerobot.envs.factory import make_env
from lerobot.envs.utils import add_envs_task, check_env_attributes_and_types, preprocess_observation from lerobot.envs.utils import (
add_envs_task,
check_env_attributes_and_types,
preprocess_observation,
)
from lerobot.policies.factory import make_policy from lerobot.policies.factory import make_policy
from lerobot.policies.pretrained import PreTrainedPolicy from lerobot.policies.pretrained import PreTrainedPolicy
from lerobot.policies.utils import get_device_from_parameters from lerobot.policies.utils import get_device_from_parameters
@@ -466,7 +472,9 @@ def eval_main(cfg: EvalPipelineConfig):
# Check device is available # Check device is available
device = get_safe_torch_device(cfg.policy.device, log=True) device = get_safe_torch_device(cfg.policy.device, log=True)
# login to hf
# login()
torch.backends.cudnn.benchmark = True torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cuda.matmul.allow_tf32 = True
set_seed(cfg.seed) set_seed(cfg.seed)
@@ -474,18 +482,18 @@ def eval_main(cfg: EvalPipelineConfig):
logging.info(colored("Output dir:", "yellow", attrs=["bold"]) + f" {cfg.output_dir}") logging.info(colored("Output dir:", "yellow", attrs=["bold"]) + f" {cfg.output_dir}")
logging.info("Making environment.") logging.info("Making environment.")
env = make_env(cfg.env, n_envs=cfg.eval.batch_size, use_async_envs=cfg.eval.use_async_envs) envs = make_env(cfg.env, n_envs=cfg.eval.batch_size, use_async_envs=cfg.eval.use_async_envs)
logging.info("Making policy.") logging.info("Making policy.")
policy = make_policy( policy = make_policy(
cfg=cfg.policy, cfg=cfg.policy,
env_cfg=cfg.env, env_cfg=cfg.env,
) )
policy.eval() policy.eval()
with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext(): with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext():
if cfg.env.multitask_eval: info = eval_policy_all(
info = eval_policy_multitask( envs,
env,
policy, policy,
cfg.eval.n_episodes, cfg.eval.n_episodes,
max_episodes_rendered=10, max_episodes_rendered=10,
@@ -503,21 +511,10 @@ def eval_main(cfg: EvalPipelineConfig):
continue # Skip the overall stats since we already printed it continue # Skip the overall stats since we already printed it
print(f"\nAggregated Metrics for {task_group}:") print(f"\nAggregated Metrics for {task_group}:")
print(task_group_info["aggregated"]) print(task_group_info["aggregated"])
for _task_group, v in env.items(): # Close all vec envs
for _env in v.values(): for _suite, task_map in envs.items():
_env.close() for _vec in task_map.values():
else: _vec.close()
info = eval_policy(
env,
policy,
cfg.eval.n_episodes,
max_episodes_rendered=10,
videos_dir=Path(cfg.output_dir) / "videos",
start_seed=cfg.seed,
)
print(info["aggregated"])
env.close()
# Save info # Save info
with open(Path(cfg.output_dir) / "eval_info.json", "w") as f: with open(Path(cfg.output_dir) / "eval_info.json", "w") as f:
json.dump(info, f, indent=2) json.dump(info, f, indent=2)
@@ -525,9 +522,20 @@ def eval_main(cfg: EvalPipelineConfig):
logging.info("End of eval") logging.info("End of eval")
def eval_policy_multitask( # ---- typed payload returned by one task eval ----
envs: dict[str, dict[str, gym.vector.VectorEnv]], class TaskMetrics(TypedDict):
policy, sum_rewards: list[float]
max_rewards: list[float]
successes: list[bool]
video_paths: list[str]
ACC_KEYS = ("sum_rewards", "max_rewards", "successes", "video_paths")
def eval_policy_all(
envs: dict[str, dict[int, gym.vector.VectorEnv]],
policy: PreTrainedPolicy,
n_episodes: int, n_episodes: int,
max_episodes_rendered: int = 0, max_episodes_rendered: int = 0,
videos_dir: Path | None = None, videos_dir: Path | None = None,
@@ -536,126 +544,147 @@ def eval_policy_multitask(
max_parallel_tasks: int = 5, max_parallel_tasks: int = 5,
verbose: bool = True, verbose: bool = True,
) -> dict: ) -> dict:
"""
Evaluate a policy over a dict-of-dicts of vectorized envs:
envs[suite_name][task_id] -> gym.vector.VectorEnv
Returns a dict with per-suite aggregates and an 'overall' block.
"""
global_start = time.time() global_start = time.time()
results = {}
overall_rewards, overall_max_rewards, overall_successes = [], [], [] # inner: evaluate a single (suite, task)
overall_video_paths = [] def eval_one(
overall_episode_data = None task_group: str,
task_id: int,
env: gym.vector.VectorEnv,
*,
policy: PreTrainedPolicy,
n_episodes: int,
max_episodes_rendered: int,
videos_dir: Path | None,
return_episode_data: bool,
start_seed: int | None,
) -> TaskMetrics:
"""Evaluates one task_id of one suite using the provided vec env."""
if verbose:
print(f"Evaluating: task_group={task_group}, task_id={task_id} ...")
def eval_task(task_group, task_id, env): task_videos_dir = None
"""Evaluates a single task in parallel."""
print(f"Evaluating: task_group: {task_group}, task_id: {task_id} ...")
if videos_dir is not None: if videos_dir is not None:
task_videos_dir = videos_dir / f"{task_group}_{task_id}" task_videos_dir = videos_dir / f"{task_group}_{task_id}"
task_videos_dir.mkdir(parents=True, exist_ok=True) task_videos_dir.mkdir(parents=True, exist_ok=True)
task_result = eval_policy( task_result = eval_policy(
env, env=env,
policy, policy=policy,
n_episodes, n_episodes=n_episodes,
max_episodes_rendered, max_episodes_rendered=max_episodes_rendered,
task_videos_dir, videos_dir=task_videos_dir,
return_episode_data, return_episode_data=return_episode_data,
start_seed, start_seed=start_seed,
) )
per_episode = task_result["per_episode"] per_episode = task_result["per_episode"]
return { return TaskMetrics(
"task_group": task_group, sum_rewards=[ep["sum_reward"] for ep in per_episode],
"task_id": task_id, max_rewards=[ep["max_reward"] for ep in per_episode],
"sum_rewards": [ep["sum_reward"] for ep in per_episode], successes=[ep["success"] for ep in per_episode],
"max_rewards": [ep["max_reward"] for ep in per_episode], video_paths=task_result.get("video_paths", []),
"successes": [ep["success"] for ep in per_episode], )
"video_paths": task_result.get("video_paths", []),
}
task_group_results = {} # result producer: sequential or threaded, same consumer
def iter_task_results() -> Iterator[tuple[str, int, TaskMetrics]]:
if max_parallel_tasks == 1: if max_parallel_tasks == 1:
# sequential mode (safe for colab / EGL)
for task_group, tasks in envs.items(): for task_group, tasks in envs.items():
for task_id, env in tasks.items(): for task_id, vec in tasks.items():
task_result = eval_task(task_group, task_id, env) yield (
if task_group not in task_group_results: task_group,
task_group_results[task_group] = { task_id,
"sum_rewards": [], eval_one(
"max_rewards": [], task_group,
"successes": [], task_id,
"video_paths": [], vec,
} policy=policy,
task_group_results[task_group]["sum_rewards"].extend(task_result["sum_rewards"]) n_episodes=n_episodes,
task_group_results[task_group]["max_rewards"].extend(task_result["max_rewards"]) max_episodes_rendered=max_episodes_rendered,
task_group_results[task_group]["successes"].extend(task_result["successes"]) videos_dir=videos_dir,
task_group_results[task_group]["video_paths"].extend(task_result["video_paths"]) return_episode_data=return_episode_data,
start_seed=start_seed,
),
)
else: else:
with concurrent.futures.ThreadPoolExecutor(max_workers=max_parallel_tasks) as executor: with cf.ThreadPoolExecutor(max_workers=max_parallel_tasks) as executor:
future_to_task = { fut2key: dict[cf.Future, tuple[str, int]] = {}
executor.submit(eval_task, task_group, task_id, env): (task_group, task_id) for task_group, tasks in envs.items():
for task_group, tasks in envs.items() for task_id, vec in tasks.items():
for task_id, env in tasks.items() fut = executor.submit(
} eval_one,
task_group,
task_id,
vec,
policy=policy,
n_episodes=n_episodes,
max_episodes_rendered=max_episodes_rendered,
videos_dir=videos_dir,
return_episode_data=return_episode_data,
start_seed=start_seed,
)
fut2key[fut] = (task_group, task_id)
for fut in cf.as_completed(fut2key):
task_group, task_id = fut2key[fut]
yield task_group, task_id, fut.result()
task_group_results = {} # single accumulator path on the main thread
group_acc: dict[str, dict[str, list]] = defaultdict(lambda: {k: [] for k in ACC_KEYS})
overall: dict[str, list] = {k: [] for k in ACC_KEYS}
for future in concurrent.futures.as_completed(future_to_task): for task_group, task_id, metrics in iter_task_results():
task_result = future.result() acc = group_acc[task_group]
task_group = task_result["task_group"] for k in ACC_KEYS:
acc[k].extend(metrics[k])
overall[k].extend(metrics[k])
if task_group not in task_group_results: # build outputs
task_group_results[task_group] = { results: dict[str, dict] = {}
"sum_rewards": [], for task_group, data in group_acc.items():
"max_rewards": [],
"successes": [],
"video_paths": [],
}
task_group_results[task_group]["sum_rewards"].extend(task_result["sum_rewards"])
task_group_results[task_group]["max_rewards"].extend(task_result["max_rewards"])
task_group_results[task_group]["successes"].extend(task_result["successes"])
task_group_results[task_group]["video_paths"].extend(task_result["video_paths"])
# Process results per task group
for task_group, data in task_group_results.items():
suite_rewards = data["sum_rewards"] suite_rewards = data["sum_rewards"]
suite_max_rewards = data["max_rewards"] suite_max = data["max_rewards"]
suite_successes = data["successes"] suite_succ = data["successes"]
suite_video_paths = data["video_paths"] suite_vids = data["video_paths"]
suite_eval_s = time.time() - global_start suite_eval_s = time.time() - global_start
suite_eval_ep_s = suite_eval_s / max(1, len(suite_rewards)) suite_eval_ep_s = suite_eval_s / max(1, len(suite_rewards))
results[task_group] = { results[task_group] = {
"aggregated": { "aggregated": {
"avg_sum_reward": float(np.nanmean(suite_rewards)), "avg_sum_reward": float(np.nanmean(suite_rewards)) if suite_rewards else float("nan"),
"avg_max_reward": float(np.nanmean(suite_max_rewards)), "avg_max_reward": float(np.nanmean(suite_max)) if suite_max else float("nan"),
"pc_success": float(np.nanmean(suite_successes) * 100), "pc_success": float(np.nanmean(suite_succ) * 100) if suite_succ else float("nan"),
"eval_s": suite_eval_s, "eval_s": suite_eval_s,
"eval_ep_s": suite_eval_ep_s, "eval_ep_s": suite_eval_ep_s,
}, },
"video_paths": suite_video_paths, "video_paths": suite_vids,
"episodes": None, # Modify if episode data is needed "episodes": None,
} }
overall_rewards.extend(suite_rewards)
overall_max_rewards.extend(suite_max_rewards)
overall_successes.extend(suite_successes)
overall_video_paths.extend(suite_video_paths)
# Global metrics
global_eval_s = time.time() - global_start global_eval_s = time.time() - global_start
global_eval_ep_s = global_eval_s / max(1, len(overall_rewards)) global_eval_ep_s = global_eval_s / max(1, len(overall["sum_rewards"]))
results["overall"] = { results["overall"] = {
"aggregated": { "aggregated": {
"avg_sum_reward": float(np.nanmean(overall_rewards)), "avg_sum_reward": float(np.nanmean(overall["sum_rewards"]))
"avg_max_reward": float(np.nanmean(overall_max_rewards)), if overall["sum_rewards"]
"pc_success": float(np.nanmean(overall_successes) * 100), else float("nan"),
"avg_max_reward": float(np.nanmean(overall["max_rewards"]))
if overall["max_rewards"]
else float("nan"),
"pc_success": float(np.nanmean(overall["successes"]) * 100)
if overall["successes"]
else float("nan"),
"eval_s": global_eval_s, "eval_s": global_eval_s,
"eval_ep_s": global_eval_ep_s, "eval_ep_s": global_eval_ep_s,
}, },
"video_paths": overall_video_paths, "video_paths": overall["video_paths"],
"episodes": overall_episode_data, "episodes": None,
} }
return results return results
+20 -35
View File
@@ -30,11 +30,12 @@ from lerobot.datasets.factory import make_dataset
from lerobot.datasets.sampler import EpisodeAwareSampler from lerobot.datasets.sampler import EpisodeAwareSampler
from lerobot.datasets.utils import cycle from lerobot.datasets.utils import cycle
from lerobot.envs.factory import make_env from lerobot.envs.factory import make_env
from lerobot.envs.utils import close_envs
from lerobot.optim.factory import make_optimizer_and_scheduler from lerobot.optim.factory import make_optimizer_and_scheduler
from lerobot.policies.factory import make_policy from lerobot.policies.factory import make_policy
from lerobot.policies.pretrained import PreTrainedPolicy from lerobot.policies.pretrained import PreTrainedPolicy
from lerobot.policies.utils import get_device_from_parameters from lerobot.policies.utils import get_device_from_parameters
from lerobot.scripts.eval import eval_policy, eval_policy_multitask from lerobot.scripts.eval import eval_policy_all
from lerobot.utils.logging_utils import AverageMeter, MetricsTracker from lerobot.utils.logging_utils import AverageMeter, MetricsTracker
from lerobot.utils.random_utils import set_seed from lerobot.utils.random_utils import set_seed
from lerobot.utils.train_utils import ( from lerobot.utils.train_utils import (
@@ -126,7 +127,6 @@ def train(cfg: TrainPipelineConfig):
logging.info("Creating dataset") logging.info("Creating dataset")
dataset = make_dataset(cfg) dataset = make_dataset(cfg)
# Create environment used for evaluating checkpoints during training on simulation data. # Create environment used for evaluating checkpoints during training on simulation data.
# On real-world data, no need to create an environment as evaluations are done outside train.py, # On real-world data, no need to create an environment as evaluations are done outside train.py,
# using the eval.py instead, with gym_dora environment and dora-rs. # using the eval.py instead, with gym_dora environment and dora-rs.
@@ -140,7 +140,6 @@ def train(cfg: TrainPipelineConfig):
cfg=cfg.policy, cfg=cfg.policy,
ds_meta=dataset.meta, ds_meta=dataset.meta,
) )
logging.info("Creating optimizer and scheduler") logging.info("Creating optimizer and scheduler")
optimizer, lr_scheduler = make_optimizer_and_scheduler(cfg, policy) optimizer, lr_scheduler = make_optimizer_and_scheduler(cfg, policy)
grad_scaler = GradScaler(device.type, enabled=cfg.policy.use_amp) grad_scaler = GradScaler(device.type, enabled=cfg.policy.use_amp)
@@ -203,7 +202,6 @@ def train(cfg: TrainPipelineConfig):
start_time = time.perf_counter() start_time = time.perf_counter()
batch = next(dl_iter) batch = next(dl_iter)
train_tracker.dataloading_s = time.perf_counter() - start_time train_tracker.dataloading_s = time.perf_counter() - start_time
for key in batch: for key in batch:
if isinstance(batch[key], torch.Tensor): if isinstance(batch[key], torch.Tensor):
batch[key] = batch[key].to(device, non_blocking=device.type == "cuda") batch[key] = batch[key].to(device, non_blocking=device.type == "cuda")
@@ -251,34 +249,27 @@ def train(cfg: TrainPipelineConfig):
torch.no_grad(), torch.no_grad(),
torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext(), torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext(),
): ):
if cfg.env.multitask_eval: eval_info = eval_policy_all(
eval_info = eval_policy_multitask( eval_env, # dict[suite][task_id] -> vec_env
eval_env,
policy, policy,
cfg.eval.n_episodes, cfg.eval.n_episodes,
videos_dir=cfg.output_dir / "eval" / f"videos_step_{step_id}", videos_dir=videos_dir,
max_episodes_rendered=4, max_episodes_rendered=4,
start_seed=cfg.seed, start_seed=cfg.seed,
max_parallel_tasks=cfg.env.max_parallel_tasks, max_parallel_tasks=cfg.env.max_parallel_tasks,
verbose=False,
) )
aggregated = eval_info["overall"]["aggregated"]
# Print per-suite stats, log?
for task_group, task_group_info in eval_info.items():
if task_group == "overall":
continue # Skip the overall stats since we already printed it
print(f"\nAggregated Metrics for {task_group}:")
print(task_group_info["aggregated"])
else:
eval_info = eval_policy(
eval_env,
policy,
cfg.eval.n_episodes,
videos_dir=cfg.output_dir / "eval" / f"videos_step_{step_id}",
max_episodes_rendered=4,
start_seed=cfg.seed,
)
aggregated = eval_info["aggregated"]
# overall metrics (suite-agnostic)
aggregated = eval_info["overall"]["aggregated"]
# optional: per-suite logging
for suite, suite_info in eval_info.items():
if suite == "overall":
continue
logging.info("Suite %s aggregated: %s", suite, suite_info["aggregated"])
# meters/tracker
eval_metrics = { eval_metrics = {
"avg_sum_reward": AverageMeter("∑rwrd", ":.3f"), "avg_sum_reward": AverageMeter("∑rwrd", ":.3f"),
"pc_success": AverageMeter("success", ":.1f"), "pc_success": AverageMeter("success", ":.1f"),
@@ -287,22 +278,16 @@ def train(cfg: TrainPipelineConfig):
eval_tracker = MetricsTracker( eval_tracker = MetricsTracker(
cfg.batch_size, dataset.num_frames, dataset.num_episodes, eval_metrics, initial_step=step cfg.batch_size, dataset.num_frames, dataset.num_episodes, eval_metrics, initial_step=step
) )
eval_tracker.eval_s = aggregated.pop("eval_s") eval_tracker.eval_s = aggregated.get("eval_s", 0.0)
eval_tracker.avg_sum_reward = aggregated.pop("avg_sum_reward") eval_tracker.avg_sum_reward = aggregated.get("avg_sum_reward", float("nan"))
eval_tracker.pc_success = aggregated.pop("pc_success") eval_tracker.pc_success = aggregated.get("pc_success", float("nan"))
logging.info(eval_tracker)
if wandb_logger: if wandb_logger:
wandb_log_dict = {**eval_tracker.to_dict(), **eval_info} wandb_log_dict = {**eval_tracker.to_dict(), **eval_info}
wandb_logger.log_dict(wandb_log_dict, step, mode="eval") wandb_logger.log_dict(wandb_log_dict, step, mode="eval")
wandb_logger.log_video(eval_info["video_paths"][0], step, mode="eval") wandb_logger.log_video(eval_info["video_paths"][0], step, mode="eval")
if eval_env: if eval_env:
if cfg.env.multitask_eval: close_envs(eval_env)
for _task_group, envs_dict in eval_env.items():
for _idx, env in envs_dict.items():
env.close()
else:
eval_env.close()
logging.info("End of training") logging.info("End of training")
if cfg.policy.push_to_hub: if cfg.policy.push_to_hub: