From cc46497f4c77d525a0c5989abe7c5d984dfd0da9 Mon Sep 17 00:00:00 2001 From: "Jade Choghari (jchoghar)" Date: Wed, 20 Aug 2025 06:55:05 -0400 Subject: [PATCH] fix renaming issues with cams --- examples/test.sh | 5 +-- src/lerobot/configs/policies.py | 8 +++- src/lerobot/constants.py | 4 -- src/lerobot/envs/configs.py | 4 +- src/lerobot/envs/libero.py | 12 ++++-- src/lerobot/envs/utils.py | 66 +++++++++++++++++---------------- src/lerobot/policies/factory.py | 1 - src/lerobot/scripts/eval.py | 6 +-- src/lerobot/scripts/train.py | 2 + 9 files changed, 59 insertions(+), 49 deletions(-) diff --git a/examples/test.sh b/examples/test.sh index 5dfcd581e..d6f29f35e 100644 --- a/examples/test.sh +++ b/examples/test.sh @@ -1,6 +1,5 @@ #!/bin/bash -# Example evaluation script for LeRobot policies unset LEROBOT_HOME unset HF_LEROBOT_HOME # === CONFIGURATION === @@ -12,11 +11,11 @@ N_EPISODES=1 USE_AMP=false DEVICE=cuda -# === RUN EVALUATION === +# RUN EVALUATION python src/lerobot/scripts/eval.py \ --policy.path="$POLICY_PATH" \ --env.type="$ENV_TYPE" \ --eval.batch_size="$BATCH_SIZE" \ --eval.n_episodes="$N_EPISODES" \ - --env.multitask_eval=False \ + --env.multitask_eval=True \ --env.task=$TASK \ diff --git a/src/lerobot/configs/policies.py b/src/lerobot/configs/policies.py index f5fa727cf..8ae69fc62 100644 --- a/src/lerobot/configs/policies.py +++ b/src/lerobot/configs/policies.py @@ -124,7 +124,13 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC): if ft.type is FeatureType.STATE and ft_name == OBS_STATE: return ft return None - + + @property + def robot_state_feature_key(self) -> PolicyFeature | None: + for key, ft in self.input_features.items(): + if ft.type is FeatureType.STATE: + return key + return None @property def env_state_feature(self) -> PolicyFeature | None: for _, ft in self.input_features.items(): diff --git a/src/lerobot/constants.py b/src/lerobot/constants.py index 3d6f9edc3..bc5b2013c 100644 --- a/src/lerobot/constants.py +++ b/src/lerobot/constants.py @@ -21,10 +21,6 @@ OBS_ENV_STATE = "observation.environment_state" OBS_STATE = "observation.state" OBS_IMAGE = "observation.image" OBS_IMAGE_2 = "observation.image2" -OBS_IMAGE = "image" -OBS_IMAGE_2 = "image2" -# OBS_IMAGE = "image" -# OBS_IMAGE_2 = "wrist_image" OBS_IMAGES = "observation.images" ACTION = "action" REWARD = "next.reward" diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py index 47a23eb04..da3e2d5bf 100644 --- a/src/lerobot/envs/configs.py +++ b/src/lerobot/envs/configs.py @@ -295,8 +295,8 @@ class LiberoEnv(EnvConfig): default_factory=lambda: { "action": ACTION, "agent_pos": OBS_STATE, - "pixels/agentview_image": f"observation.images.{OBS_IMAGE}", - "pixels/robot0_eye_in_hand_image": f"observation.images.{OBS_IMAGE_2}", + "pixels/agentview_image": f"{OBS_IMAGE}", + "pixels/robot0_eye_in_hand_image": f"{OBS_IMAGE_2}", } ) diff --git a/src/lerobot/envs/libero.py b/src/lerobot/envs/libero.py index 10bc4aa9f..b83958d6c 100644 --- a/src/lerobot/envs/libero.py +++ b/src/lerobot/envs/libero.py @@ -180,8 +180,8 @@ class LiberoEnv(gym.Env): ) # agentview_image (main) or robot0_eye_in_hand_image (wrist) # TODO: jadechoghari, check mapping self.camera_name_mapping = { - "agentview_image": OBS_IMAGE, - "robot0_eye_in_hand_image": OBS_IMAGE_2, + "agentview_image": "image", + "robot0_eye_in_hand_image": "image2", } self.num_steps_wait = ( @@ -234,10 +234,16 @@ class LiberoEnv(gym.Env): self.action_space = spaces.Box(low=-1, high=1, shape=(7,), dtype=np.float32) - def render(self): + def render1(self): raw_obs = self._env.env._get_observations() image = self._format_raw_obs(raw_obs)["pixels"][OBS_IMAGE] return image + def render(self): + raw_obs = self._env.env._get_observations() + formatted = self._format_raw_obs(raw_obs) + # grab the "main" camera + return formatted["pixels"]["image"] + def _make_envs_task(self, task_suite, task_id: int = 0): task = task_suite.get_task(task_id) diff --git a/src/lerobot/envs/utils.py b/src/lerobot/envs/utils.py index 2cf9efcfe..cd76230d1 100644 --- a/src/lerobot/envs/utils.py +++ b/src/lerobot/envs/utils.py @@ -26,61 +26,63 @@ from lerobot.configs.types import FeatureType, PolicyFeature from lerobot.envs.configs import EnvConfig from lerobot.utils.utils import get_channel_first_image_shape - -def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Tensor]: - # TODO(aliberts, rcadene): refactor this to use features from the environment (no hardcoding) +def preprocess_observation( + observations: dict[str, np.ndarray], cfg: dict[str, Any] = None +) -> dict[str, Tensor]: """Convert environment observation to LeRobot format observation. Args: - observation: Dictionary of observation batches from a Gym vector environment. + observations: Dictionary of observation batches from a Gym vector environment. + cfg: Policy config containing expected feature keys. Returns: - Dictionary of observation batches with keys renamed to LeRobot format and values as tensors. + Dictionary of observation batches with keys renamed to match policy expectations. """ - # map to expected inputs for the policy return_observations = {} + + # expected keys from policy + policy_img_keys = list(cfg.image_features.keys()) if cfg else ["observation.image"] + state_key = cfg.robot_state_feature_key if cfg else "observation.state" + + # handle images if "pixels" in observations: if isinstance(observations["pixels"], dict): - imgs = {f"observation.images.{key}": img for key, img in observations["pixels"].items()} + env_img_keys = list(observations["pixels"].keys()) + imgs = observations["pixels"] else: - imgs = {"observation.image": observations["pixels"]} + env_img_keys = ["pixels"] + imgs = {"pixels": observations["pixels"]} + + # build rename map env_key -> policy_key + rename_map = dict(zip(env_img_keys, policy_img_keys)) for imgkey, img in imgs.items(): - # TODO(aliberts, rcadene): use transforms.ToTensor()? + target_key = rename_map.get(imgkey, imgkey) + img = torch.from_numpy(img) - # When preprocessing observations in a non-vectorized environment, we need to add a batch dimension. - # This is the case for human-in-the-loop RL where there is only one environment. - if img.ndim == 3: - img = img.unsqueeze(0) - # sanity check that images are channel last + # sanity checks _, h, w, c = img.shape - assert c < h and c < w, f"expect channel last images, but instead got {img.shape=}" + assert c < h and c < w, f"expect channel last images, got {img.shape=}" + assert img.dtype == torch.uint8, f"expect torch.uint8, got {img.dtype=}" - # sanity check that images are uint8 - assert img.dtype == torch.uint8, f"expect torch.uint8, but instead {img.dtype=}" - - # convert to channel first of type float32 in range [0,1] + # channel last → channel first, normalize img = einops.rearrange(img, "b h w c -> b c h w").contiguous() - img = img.type(torch.float32) - img /= 255 + img = img.float() / 255.0 - return_observations[imgkey] = img + return_observations[target_key] = img + # handle state if "environment_state" in observations: - env_state = torch.from_numpy(observations["environment_state"]).float() - if env_state.dim() == 1: - env_state = env_state.unsqueeze(0) + return_observations["observation.environment_state"] = torch.from_numpy( + observations["environment_state"] + ).float() - return_observations["observation.environment_state"] = env_state + return_observations[state_key] = torch.from_numpy(observations["agent_pos"]).float() - # TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing - agent_pos = torch.from_numpy(observations["agent_pos"]).float() - if agent_pos.dim() == 1: - agent_pos = agent_pos.unsqueeze(0) - return_observations["observation.state"] = agent_pos + if "task" in observations: + return_observations["task"] = observations["task"] return return_observations - def env_to_policy_features(env_cfg: EnvConfig) -> dict[str, PolicyFeature]: # TODO(aliberts, rcadene): remove this hardcoding of keys and just use the nested keys as is # (need to also refactor preprocess_observation and externalize normalization from policies) diff --git a/src/lerobot/policies/factory.py b/src/lerobot/policies/factory.py index cab752112..4b8eeffd1 100644 --- a/src/lerobot/policies/factory.py +++ b/src/lerobot/policies/factory.py @@ -168,7 +168,6 @@ def make_policy( else: # Make a fresh policy. policy = policy_cls(**kwargs) - policy.to(cfg.device) assert isinstance(policy, nn.Module) diff --git a/src/lerobot/scripts/eval.py b/src/lerobot/scripts/eval.py index 7e086821f..37ef6348f 100644 --- a/src/lerobot/scripts/eval.py +++ b/src/lerobot/scripts/eval.py @@ -146,7 +146,8 @@ def rollout( check_env_attributes_and_types(env) while not np.all(done) and step < max_steps: # Numpy array to tensor and changing dictionary keys to LeRobot policy format. - observation = preprocess_observation(observation) + # observation = preprocess_observation(observation) + observation = preprocess_observation(observation, cfg=policy.config) if return_observations: all_observations.append(deepcopy(observation)) @@ -159,7 +160,6 @@ def rollout( observation = add_envs_task(env, observation) with torch.inference_mode(): action = policy.select_action(observation) - observation["observation.images.image"] # Convert to CPU / numpy. action = action.to("cpu").numpy() assert action.ndim == 2, "Action dimensions should be (batch, action_dim)" @@ -198,7 +198,7 @@ def rollout( # Track the final observation. if return_observations: - observation = preprocess_observation(observation) + observation = preprocess_observation(observation, cfg=policy.config) all_observations.append(deepcopy(observation)) # Stack the sequence along the first dimension so that we have (batch, sequence, *) tensors. diff --git a/src/lerobot/scripts/train.py b/src/lerobot/scripts/train.py index 6c07a1ae8..82f78e595 100644 --- a/src/lerobot/scripts/train.py +++ b/src/lerobot/scripts/train.py @@ -269,6 +269,7 @@ def train(cfg: TrainPipelineConfig): continue # Skip the overall stats since we already printed it print(f"\nAggregated Metrics for {task_group}:") print(task_group_info["aggregated"]) + breakpoint() else: print("START EVAL") eval_info = eval_policy( @@ -279,6 +280,7 @@ def train(cfg: TrainPipelineConfig): max_episodes_rendered=4, start_seed=cfg.seed, ) + breakpoint() aggregated = eval_info["aggregated"] print("END EVAL")