mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-11 14:49:43 +00:00
fix: enable SmolVLA eval on LIBERO with custom camera mappings
- Thread camera_name_mapping from LiberoEnv config through to gym envs - Sync features_map with camera_name_mapping in LiberoEnv.__post_init__ - Fix render() to use first available camera instead of hardcoded "image" - Handle non-dict final_info in rollout by falling back to info["is_success"] - Add use_peft legacy field to SmolVLAConfig for checkpoint compat - Add defaults to GR00TN15Config init=False fields for transformers 5.3 Made-with: Cursor
This commit is contained in:
@@ -381,6 +381,12 @@ class LiberoEnv(EnvConfig):
|
||||
else:
|
||||
raise ValueError(f"Unsupported obs_type: {self.obs_type}")
|
||||
|
||||
if self.camera_name_mapping is not None:
|
||||
mapped_agentview = self.camera_name_mapping.get("agentview_image", "image")
|
||||
mapped_eye_in_hand = self.camera_name_mapping.get("robot0_eye_in_hand_image", "image2")
|
||||
self.features_map[LIBERO_KEY_PIXELS_AGENTVIEW] = f"{OBS_IMAGES}.{mapped_agentview}"
|
||||
self.features_map[LIBERO_KEY_PIXELS_EYE_IN_HAND] = f"{OBS_IMAGES}.{mapped_eye_in_hand}"
|
||||
|
||||
@property
|
||||
def gym_kwargs(self) -> dict:
|
||||
kwargs: dict[str, Any] = {"obs_type": self.obs_type, "render_mode": self.render_mode}
|
||||
@@ -403,6 +409,7 @@ class LiberoEnv(EnvConfig):
|
||||
env_cls=env_cls,
|
||||
control_mode=self.control_mode,
|
||||
episode_length=self.episode_length,
|
||||
camera_name_mapping=self.camera_name_mapping,
|
||||
)
|
||||
|
||||
def get_env_processors(self):
|
||||
|
||||
@@ -223,7 +223,8 @@ class LiberoEnv(gym.Env):
|
||||
|
||||
def render(self):
|
||||
raw_obs = self._env.env._get_observations()
|
||||
image = self._format_raw_obs(raw_obs)["pixels"]["image"]
|
||||
pixels = self._format_raw_obs(raw_obs)["pixels"]
|
||||
image = next(iter(pixels.values()))
|
||||
image = image[::-1, ::-1] # flip both H and W for visualization
|
||||
return image
|
||||
|
||||
@@ -339,12 +340,6 @@ class LiberoEnv(gym.Env):
|
||||
)
|
||||
observation = self._format_raw_obs(raw_obs)
|
||||
if terminated:
|
||||
info["final_info"] = {
|
||||
"task": self.task,
|
||||
"task_id": self.task_id,
|
||||
"done": bool(done),
|
||||
"is_success": bool(is_success),
|
||||
}
|
||||
self.reset()
|
||||
truncated = False
|
||||
return observation, reward, terminated, truncated, info
|
||||
@@ -364,6 +359,7 @@ def _make_env_fns(
|
||||
init_states: bool,
|
||||
gym_kwargs: Mapping[str, Any],
|
||||
control_mode: str,
|
||||
camera_name_mapping: dict[str, str] | None = None,
|
||||
) -> list[Callable[[], LiberoEnv]]:
|
||||
"""Build n_envs factory callables for a single (suite, task_id)."""
|
||||
|
||||
@@ -379,6 +375,7 @@ def _make_env_fns(
|
||||
episode_index=episode_index,
|
||||
n_envs=n_envs,
|
||||
control_mode=control_mode,
|
||||
camera_name_mapping=camera_name_mapping,
|
||||
**local_kwargs,
|
||||
)
|
||||
|
||||
@@ -400,6 +397,7 @@ def create_libero_envs(
|
||||
env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None,
|
||||
control_mode: str = "relative",
|
||||
episode_length: int | None = None,
|
||||
camera_name_mapping: dict[str, str] | None = None,
|
||||
) -> dict[str, dict[int, Any]]:
|
||||
"""
|
||||
Create vectorized LIBERO environments with a consistent return shape.
|
||||
@@ -449,6 +447,7 @@ def create_libero_envs(
|
||||
init_states=init_states,
|
||||
gym_kwargs=gym_kwargs,
|
||||
control_mode=control_mode,
|
||||
camera_name_mapping=camera_name_mapping,
|
||||
)
|
||||
out[suite_name][tid] = env_cls(fns)
|
||||
print(f"Built vec env | suite={suite_name} | task_id={tid} | n_envs={n_envs}")
|
||||
|
||||
@@ -176,13 +176,13 @@ N_COLOR_CHANNELS = 3
|
||||
@dataclass
|
||||
class GR00TN15Config(PretrainedConfig):
|
||||
model_type = "gr00t_n1_5"
|
||||
backbone_cfg: dict = field(init=False, metadata={"help": "Backbone configuration."})
|
||||
backbone_cfg: dict = field(init=False, default_factory=dict, metadata={"help": "Backbone configuration."})
|
||||
|
||||
action_head_cfg: dict = field(init=False, metadata={"help": "Action head configuration."})
|
||||
action_head_cfg: dict = field(init=False, default_factory=dict, metadata={"help": "Action head configuration."})
|
||||
|
||||
action_horizon: int = field(init=False, metadata={"help": "Action horizon."})
|
||||
action_horizon: int = field(init=False, default=0, metadata={"help": "Action horizon."})
|
||||
|
||||
action_dim: int = field(init=False, metadata={"help": "Action dimension."})
|
||||
action_dim: int = field(init=False, default=0, metadata={"help": "Action dimension."})
|
||||
compute_dtype: str = field(default="float32", metadata={"help": "Compute dtype."})
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
|
||||
@@ -109,6 +109,7 @@ class SmolVLAConfig(PreTrainedConfig):
|
||||
compile_model: bool = False # Whether to use torch.compile for model optimization
|
||||
compile_mode: str = "max-autotune" # Torch compile mode
|
||||
|
||||
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
|
||||
|
||||
@@ -193,14 +193,11 @@ def rollout(
|
||||
|
||||
# VectorEnv stores is_success in `info["final_info"][env_index]["is_success"]`. "final_info" isn't
|
||||
# available if none of the envs finished.
|
||||
if "final_info" in info:
|
||||
final_info = info["final_info"]
|
||||
if not isinstance(final_info, dict):
|
||||
raise RuntimeError(
|
||||
"Unsupported `final_info` format: expected dict (Gymnasium >= 1.0). "
|
||||
"You're likely using an older version of gymnasium (< 1.0). Please upgrade."
|
||||
)
|
||||
successes = final_info["is_success"].tolist()
|
||||
if "final_info" in info and isinstance(info["final_info"], dict):
|
||||
successes = info["final_info"]["is_success"].tolist()
|
||||
elif "is_success" in info:
|
||||
is_success = info["is_success"]
|
||||
successes = is_success.tolist() if hasattr(is_success, "tolist") else [bool(is_success)] * env.num_envs
|
||||
else:
|
||||
successes = [False] * env.num_envs
|
||||
|
||||
|
||||
Reference in New Issue
Block a user