fix renaming issues with cams

This commit is contained in:
Jade Choghari (jchoghar)
2025-08-20 06:55:05 -04:00
parent 5d25f5bd40
commit cc46497f4c
9 changed files with 59 additions and 49 deletions
+2 -3
View File
@@ -1,6 +1,5 @@
#!/bin/bash #!/bin/bash
# Example evaluation script for LeRobot policies
unset LEROBOT_HOME unset LEROBOT_HOME
unset HF_LEROBOT_HOME unset HF_LEROBOT_HOME
# === CONFIGURATION === # === CONFIGURATION ===
@@ -12,11 +11,11 @@ N_EPISODES=1
USE_AMP=false USE_AMP=false
DEVICE=cuda DEVICE=cuda
# === RUN EVALUATION === # RUN EVALUATION
python src/lerobot/scripts/eval.py \ python src/lerobot/scripts/eval.py \
--policy.path="$POLICY_PATH" \ --policy.path="$POLICY_PATH" \
--env.type="$ENV_TYPE" \ --env.type="$ENV_TYPE" \
--eval.batch_size="$BATCH_SIZE" \ --eval.batch_size="$BATCH_SIZE" \
--eval.n_episodes="$N_EPISODES" \ --eval.n_episodes="$N_EPISODES" \
--env.multitask_eval=False \ --env.multitask_eval=True \
--env.task=$TASK \ --env.task=$TASK \
+6
View File
@@ -126,6 +126,12 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
return None return None
@property @property
def robot_state_feature_key(self) -> PolicyFeature | None:
for key, ft in self.input_features.items():
if ft.type is FeatureType.STATE:
return key
return None
@property
def env_state_feature(self) -> PolicyFeature | None: def env_state_feature(self) -> PolicyFeature | None:
for _, ft in self.input_features.items(): for _, ft in self.input_features.items():
if ft.type is FeatureType.ENV: if ft.type is FeatureType.ENV:
-4
View File
@@ -21,10 +21,6 @@ OBS_ENV_STATE = "observation.environment_state"
OBS_STATE = "observation.state" OBS_STATE = "observation.state"
OBS_IMAGE = "observation.image" OBS_IMAGE = "observation.image"
OBS_IMAGE_2 = "observation.image2" OBS_IMAGE_2 = "observation.image2"
OBS_IMAGE = "image"
OBS_IMAGE_2 = "image2"
# OBS_IMAGE = "image"
# OBS_IMAGE_2 = "wrist_image"
OBS_IMAGES = "observation.images" OBS_IMAGES = "observation.images"
ACTION = "action" ACTION = "action"
REWARD = "next.reward" REWARD = "next.reward"
+2 -2
View File
@@ -295,8 +295,8 @@ class LiberoEnv(EnvConfig):
default_factory=lambda: { default_factory=lambda: {
"action": ACTION, "action": ACTION,
"agent_pos": OBS_STATE, "agent_pos": OBS_STATE,
"pixels/agentview_image": f"observation.images.{OBS_IMAGE}", "pixels/agentview_image": f"{OBS_IMAGE}",
"pixels/robot0_eye_in_hand_image": f"observation.images.{OBS_IMAGE_2}", "pixels/robot0_eye_in_hand_image": f"{OBS_IMAGE_2}",
} }
) )
+9 -3
View File
@@ -180,8 +180,8 @@ class LiberoEnv(gym.Env):
) # agentview_image (main) or robot0_eye_in_hand_image (wrist) ) # agentview_image (main) or robot0_eye_in_hand_image (wrist)
# TODO: jadechoghari, check mapping # TODO: jadechoghari, check mapping
self.camera_name_mapping = { self.camera_name_mapping = {
"agentview_image": OBS_IMAGE, "agentview_image": "image",
"robot0_eye_in_hand_image": OBS_IMAGE_2, "robot0_eye_in_hand_image": "image2",
} }
self.num_steps_wait = ( self.num_steps_wait = (
@@ -234,10 +234,16 @@ class LiberoEnv(gym.Env):
self.action_space = spaces.Box(low=-1, high=1, shape=(7,), dtype=np.float32) self.action_space = spaces.Box(low=-1, high=1, shape=(7,), dtype=np.float32)
def render(self): def render1(self):
raw_obs = self._env.env._get_observations() raw_obs = self._env.env._get_observations()
image = self._format_raw_obs(raw_obs)["pixels"][OBS_IMAGE] image = self._format_raw_obs(raw_obs)["pixels"][OBS_IMAGE]
return image return image
def render(self):
raw_obs = self._env.env._get_observations()
formatted = self._format_raw_obs(raw_obs)
# grab the "main" camera
return formatted["pixels"]["image"]
def _make_envs_task(self, task_suite, task_id: int = 0): def _make_envs_task(self, task_suite, task_id: int = 0):
task = task_suite.get_task(task_id) task = task_suite.get_task(task_id)
+34 -32
View File
@@ -26,61 +26,63 @@ from lerobot.configs.types import FeatureType, PolicyFeature
from lerobot.envs.configs import EnvConfig from lerobot.envs.configs import EnvConfig
from lerobot.utils.utils import get_channel_first_image_shape from lerobot.utils.utils import get_channel_first_image_shape
def preprocess_observation(
def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Tensor]: observations: dict[str, np.ndarray], cfg: dict[str, Any] = None
# TODO(aliberts, rcadene): refactor this to use features from the environment (no hardcoding) ) -> dict[str, Tensor]:
"""Convert environment observation to LeRobot format observation. """Convert environment observation to LeRobot format observation.
Args: Args:
observation: Dictionary of observation batches from a Gym vector environment. observations: Dictionary of observation batches from a Gym vector environment.
cfg: Policy config containing expected feature keys.
Returns: Returns:
Dictionary of observation batches with keys renamed to LeRobot format and values as tensors. Dictionary of observation batches with keys renamed to match policy expectations.
""" """
# map to expected inputs for the policy
return_observations = {} return_observations = {}
# expected keys from policy
policy_img_keys = list(cfg.image_features.keys()) if cfg else ["observation.image"]
state_key = cfg.robot_state_feature_key if cfg else "observation.state"
# handle images
if "pixels" in observations: if "pixels" in observations:
if isinstance(observations["pixels"], dict): if isinstance(observations["pixels"], dict):
imgs = {f"observation.images.{key}": img for key, img in observations["pixels"].items()} env_img_keys = list(observations["pixels"].keys())
imgs = observations["pixels"]
else: else:
imgs = {"observation.image": observations["pixels"]} env_img_keys = ["pixels"]
imgs = {"pixels": observations["pixels"]}
# build rename map env_key -> policy_key
rename_map = dict(zip(env_img_keys, policy_img_keys))
for imgkey, img in imgs.items(): for imgkey, img in imgs.items():
# TODO(aliberts, rcadene): use transforms.ToTensor()? target_key = rename_map.get(imgkey, imgkey)
img = torch.from_numpy(img) img = torch.from_numpy(img)
# When preprocessing observations in a non-vectorized environment, we need to add a batch dimension. # sanity checks
# This is the case for human-in-the-loop RL where there is only one environment.
if img.ndim == 3:
img = img.unsqueeze(0)
# sanity check that images are channel last
_, h, w, c = img.shape _, h, w, c = img.shape
assert c < h and c < w, f"expect channel last images, but instead got {img.shape=}" assert c < h and c < w, f"expect channel last images, got {img.shape=}"
assert img.dtype == torch.uint8, f"expect torch.uint8, got {img.dtype=}"
# sanity check that images are uint8 # channel last → channel first, normalize
assert img.dtype == torch.uint8, f"expect torch.uint8, but instead {img.dtype=}"
# convert to channel first of type float32 in range [0,1]
img = einops.rearrange(img, "b h w c -> b c h w").contiguous() img = einops.rearrange(img, "b h w c -> b c h w").contiguous()
img = img.type(torch.float32) img = img.float() / 255.0
img /= 255
return_observations[imgkey] = img return_observations[target_key] = img
# handle state
if "environment_state" in observations: if "environment_state" in observations:
env_state = torch.from_numpy(observations["environment_state"]).float() return_observations["observation.environment_state"] = torch.from_numpy(
if env_state.dim() == 1: observations["environment_state"]
env_state = env_state.unsqueeze(0) ).float()
return_observations["observation.environment_state"] = env_state return_observations[state_key] = torch.from_numpy(observations["agent_pos"]).float()
# TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing if "task" in observations:
agent_pos = torch.from_numpy(observations["agent_pos"]).float() return_observations["task"] = observations["task"]
if agent_pos.dim() == 1:
agent_pos = agent_pos.unsqueeze(0)
return_observations["observation.state"] = agent_pos
return return_observations return return_observations
def env_to_policy_features(env_cfg: EnvConfig) -> dict[str, PolicyFeature]: def env_to_policy_features(env_cfg: EnvConfig) -> dict[str, PolicyFeature]:
# TODO(aliberts, rcadene): remove this hardcoding of keys and just use the nested keys as is # TODO(aliberts, rcadene): remove this hardcoding of keys and just use the nested keys as is
# (need to also refactor preprocess_observation and externalize normalization from policies) # (need to also refactor preprocess_observation and externalize normalization from policies)
-1
View File
@@ -168,7 +168,6 @@ def make_policy(
else: else:
# Make a fresh policy. # Make a fresh policy.
policy = policy_cls(**kwargs) policy = policy_cls(**kwargs)
policy.to(cfg.device) policy.to(cfg.device)
assert isinstance(policy, nn.Module) assert isinstance(policy, nn.Module)
+3 -3
View File
@@ -146,7 +146,8 @@ def rollout(
check_env_attributes_and_types(env) check_env_attributes_and_types(env)
while not np.all(done) and step < max_steps: while not np.all(done) and step < max_steps:
# Numpy array to tensor and changing dictionary keys to LeRobot policy format. # Numpy array to tensor and changing dictionary keys to LeRobot policy format.
observation = preprocess_observation(observation) # observation = preprocess_observation(observation)
observation = preprocess_observation(observation, cfg=policy.config)
if return_observations: if return_observations:
all_observations.append(deepcopy(observation)) all_observations.append(deepcopy(observation))
@@ -159,7 +160,6 @@ def rollout(
observation = add_envs_task(env, observation) observation = add_envs_task(env, observation)
with torch.inference_mode(): with torch.inference_mode():
action = policy.select_action(observation) action = policy.select_action(observation)
observation["observation.images.image"]
# Convert to CPU / numpy. # Convert to CPU / numpy.
action = action.to("cpu").numpy() action = action.to("cpu").numpy()
assert action.ndim == 2, "Action dimensions should be (batch, action_dim)" assert action.ndim == 2, "Action dimensions should be (batch, action_dim)"
@@ -198,7 +198,7 @@ def rollout(
# Track the final observation. # Track the final observation.
if return_observations: if return_observations:
observation = preprocess_observation(observation) observation = preprocess_observation(observation, cfg=policy.config)
all_observations.append(deepcopy(observation)) all_observations.append(deepcopy(observation))
# Stack the sequence along the first dimension so that we have (batch, sequence, *) tensors. # Stack the sequence along the first dimension so that we have (batch, sequence, *) tensors.
+2
View File
@@ -269,6 +269,7 @@ def train(cfg: TrainPipelineConfig):
continue # Skip the overall stats since we already printed it continue # Skip the overall stats since we already printed it
print(f"\nAggregated Metrics for {task_group}:") print(f"\nAggregated Metrics for {task_group}:")
print(task_group_info["aggregated"]) print(task_group_info["aggregated"])
breakpoint()
else: else:
print("START EVAL") print("START EVAL")
eval_info = eval_policy( eval_info = eval_policy(
@@ -279,6 +280,7 @@ def train(cfg: TrainPipelineConfig):
max_episodes_rendered=4, max_episodes_rendered=4,
start_seed=cfg.seed, start_seed=cfg.seed,
) )
breakpoint()
aggregated = eval_info["aggregated"] aggregated = eval_info["aggregated"]
print("END EVAL") print("END EVAL")