From 8d2c66abd2013a2ece5b68096b589f81e2819b3b Mon Sep 17 00:00:00 2001
From: "Jade Choghari (jchoghar)" <chogharijade@gmail.com>
Date: Mon, 25 Aug 2025 06:25:02 -0400
Subject: [PATCH] final refactor/fix

---
 examples/5_train_libero.sh                 |  2 +-
 examples/{test.sh => 6_evaluate_libero.sh} |  6 +-
 src/lerobot/envs/configs.py                |  4 +-
 src/lerobot/envs/factory.py                |  4 +-
 src/lerobot/envs/utils.py                  | 66 ++++++++++------------
 src/lerobot/scripts/eval.py                | 31 +++-------
 src/lerobot/scripts/train.py               |  9 +--
 7 files changed, 47 insertions(+), 75 deletions(-)
 rename examples/{test.sh => 6_evaluate_libero.sh} (72%)

diff --git a/examples/5_train_libero.sh b/examples/5_train_libero.sh
index 5c7fe5d0b..bfaf6a331 100755
--- a/examples/5_train_libero.sh
+++ b/examples/5_train_libero.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # config
-REPO_ID=yzembodied/libero_10_image_task_1
+REPO_ID=jadechoghari/smol-libero
 TASK=libero_10
 OUTPUT_DIR=./outputs/
 
diff --git a/examples/test.sh b/examples/6_evaluate_libero.sh
similarity index 72%
rename from examples/test.sh
rename to examples/6_evaluate_libero.sh
index d6f29f35e..2552e4602 100644
--- a/examples/test.sh
+++ b/examples/6_evaluate_libero.sh
@@ -2,14 +2,12 @@
 
 unset LEROBOT_HOME
 unset HF_LEROBOT_HOME
-# === CONFIGURATION ===
-POLICY_PATH="ganatrask/lerobot-pi0-libero-object"  # or outputs/train/.../pretrained_model
+# CONFIGURATION
+POLICY_PATH="ganatrask/lerobot-pi0-libero-object"
 TASK=libero_object
 ENV_TYPE="libero"
 BATCH_SIZE=1
 N_EPISODES=1
-USE_AMP=false
-DEVICE=cuda
 
 # RUN EVALUATION
 python src/lerobot/scripts/eval.py \
diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py
index da3e2d5bf..b7dc030b5 100644
--- a/src/lerobot/envs/configs.py
+++ b/src/lerobot/envs/configs.py
@@ -295,8 +295,8 @@ class LiberoEnv(EnvConfig):
         default_factory=lambda: {
             "action": ACTION,
             "agent_pos": OBS_STATE,
-            "pixels/agentview_image": f"{OBS_IMAGE}",
-            "pixels/robot0_eye_in_hand_image": f"{OBS_IMAGE_2}",
+            "pixels/agentview_image": f"{OBS_IMAGES}.image",
+            "pixels/robot0_eye_in_hand_image": f"{OBS_IMAGES}.image2",
         }
     )
 
diff --git a/src/lerobot/envs/factory.py b/src/lerobot/envs/factory.py
index cb897e68d..09233d4e9 100644
--- a/src/lerobot/envs/factory.py
+++ b/src/lerobot/envs/factory.py
@@ -41,12 +41,12 @@ def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> g
     Args:
         cfg (EnvConfig): the config of the environment to instantiate.
         n_envs (int, optional): The number of parallelized env to return. Defaults to 1.
-        use_async_envs (bool, optional): Wether to return an AsyncVectorEnv or a SyncVectorEnv. Defaults to
+        use_async_envs (bool, optional): Whether to return an AsyncVectorEnv or a SyncVectorEnv. Defaults to
             False.
 
     Raises:
         ValueError: if n_envs < 1
-        ModuleNotFoundError: If the requested env package is not intalled
+        ModuleNotFoundError: If the requested env package is not installed
 
     Returns:
         gym.vector.VectorEnv: The parallelized gym.env instance.
diff --git a/src/lerobot/envs/utils.py b/src/lerobot/envs/utils.py
index 2ad5364a3..4ac17cdf2 100644
--- a/src/lerobot/envs/utils.py
+++ b/src/lerobot/envs/utils.py
@@ -26,65 +26,59 @@ from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.envs.configs import EnvConfig
 from lerobot.utils.utils import get_channel_first_image_shape
 
-
-def preprocess_observation(
-    observations: dict[str, np.ndarray], cfg: dict[str, Any] = None
-) -> dict[str, Tensor]:
+def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Tensor]:
+    # TODO(aliberts, rcadene): refactor this to use features from the environment (no hardcoding)
     """Convert environment observation to LeRobot format observation.
     Args:
-        observations: Dictionary of observation batches from a Gym vector environment.
-        cfg: Policy config containing expected feature keys.
+        observation: Dictionary of observation batches from a Gym vector environment.
     Returns:
-        Dictionary of observation batches with keys renamed to match policy expectations.
+        Dictionary of observation batches with keys renamed to LeRobot format and values as tensors.
     """
+    # map to expected inputs for the policy
     return_observations = {}
-
-    # expected keys from policy
-    policy_img_keys = list(cfg.image_features.keys()) if cfg else ["observation.image"]
-    state_key = cfg.robot_state_feature_key if cfg else "observation.state"
-
-    # handle images
     if "pixels" in observations:
         if isinstance(observations["pixels"], dict):
-            env_img_keys = list(observations["pixels"].keys())
-            imgs = observations["pixels"]
+            imgs = {f"observation.images.{key}": img for key, img in observations["pixels"].items()}
         else:
-            env_img_keys = ["pixels"]
-            imgs = {"pixels": observations["pixels"]}
-
-        # build rename map env_key -> policy_key
-        rename_map = dict(zip(env_img_keys, policy_img_keys, strict=False))
+            imgs = {"observation.image": observations["pixels"]}
 
         for imgkey, img in imgs.items():
-            target_key = rename_map.get(imgkey, imgkey)
-
+            # TODO(aliberts, rcadene): use transforms.ToTensor()?
             img = torch.from_numpy(img)
 
-            # sanity checks
+            # When preprocessing observations in a non-vectorized environment, we need to add a batch dimension.
+            # This is the case for human-in-the-loop RL where there is only one environment.
+            if img.ndim == 3:
+                img = img.unsqueeze(0)
+            # sanity check that images are channel last
             _, h, w, c = img.shape
-            assert c < h and c < w, f"expect channel last images, got {img.shape=}"
-            assert img.dtype == torch.uint8, f"expect torch.uint8, got {img.dtype=}"
+            assert c < h and c < w, f"expect channel last images, but instead got {img.shape=}"
 
-            # channel last → channel first, normalize
+            # sanity check that images are uint8
+            assert img.dtype == torch.uint8, f"expect torch.uint8, but instead {img.dtype=}"
+
+            # convert to channel first of type float32 in range [0,1]
             img = einops.rearrange(img, "b h w c -> b c h w").contiguous()
-            img = img.float() / 255.0
+            img = img.type(torch.float32)
+            img /= 255
 
-            return_observations[target_key] = img
+            return_observations[imgkey] = img
 
-    # handle state
     if "environment_state" in observations:
-        return_observations["observation.environment_state"] = torch.from_numpy(
-            observations["environment_state"]
-        ).float()
+        env_state = torch.from_numpy(observations["environment_state"]).float()
+        if env_state.dim() == 1:
+            env_state = env_state.unsqueeze(0)
 
-    return_observations[state_key] = torch.from_numpy(observations["agent_pos"]).float()
+        return_observations["observation.environment_state"] = env_state
 
-    if "task" in observations:
-        return_observations["task"] = observations["task"]
+    # TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing
+    agent_pos = torch.from_numpy(observations["agent_pos"]).float()
+    if agent_pos.dim() == 1:
+        agent_pos = agent_pos.unsqueeze(0)
+    return_observations["observation.state"] = agent_pos
 
     return return_observations
 
-
 def env_to_policy_features(env_cfg: EnvConfig) -> dict[str, PolicyFeature]:
     # TODO(aliberts, rcadene): remove this hardcoding of keys and just use the nested keys as is
     # (need to also refactor preprocess_observation and externalize normalization from policies)
diff --git a/src/lerobot/scripts/eval.py b/src/lerobot/scripts/eval.py
index 37ef6348f..98a19c94e 100644
--- a/src/lerobot/scripts/eval.py
+++ b/src/lerobot/scripts/eval.py
@@ -62,6 +62,7 @@ import einops
 import gymnasium as gym
 import numpy as np
 import torch
+from termcolor import colored
 from torch import Tensor, nn
 from tqdm import trange
 
@@ -73,6 +74,7 @@ from lerobot.policies.factory import make_policy
 from lerobot.policies.pretrained import PreTrainedPolicy
 from lerobot.policies.utils import get_device_from_parameters
 from lerobot.utils.io_utils import write_video
+from lerobot.utils.random_utils import set_seed
 from lerobot.utils.utils import (
     get_safe_torch_device,
     init_logging,
@@ -146,8 +148,7 @@ def rollout(
     check_env_attributes_and_types(env)
     while not np.all(done) and step < max_steps:
         # Numpy array to tensor and changing dictionary keys to LeRobot policy format.
-        # observation = preprocess_observation(observation)
-        observation = preprocess_observation(observation, cfg=policy.config)
+        observation = preprocess_observation(observation)
         if return_observations:
             all_observations.append(deepcopy(observation))
 
@@ -459,24 +460,8 @@ def _compile_episode_data(
 
     return data_dict
 
-
-def set_global_seed(seed):
-    """Set seed for reproducibility."""
-    import random
-
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed_all(seed)
-
-
-def log_output_dir(out_dir):
-    logging.info("Output dir:" + f" {out_dir}")
-
-
 @parser.wrap()
-def eval(cfg: EvalPipelineConfig):
+def eval_main(cfg: EvalPipelineConfig):
     logging.info(pformat(asdict(cfg)))
 
     # Check device is available
@@ -484,9 +469,9 @@ def eval(cfg: EvalPipelineConfig):
 
     torch.backends.cudnn.benchmark = True
     torch.backends.cuda.matmul.allow_tf32 = True
-    set_global_seed(cfg.seed)
+    set_seed(cfg.seed)
 
-    log_output_dir(cfg.output_dir)
+    logging.info(colored("Output dir:", "yellow", attrs=["bold"]) + f" {cfg.output_dir}")
 
     logging.info("Making environment.")
     env = make_env(cfg.env, n_envs=cfg.eval.batch_size, use_async_envs=cfg.eval.use_async_envs)
@@ -494,11 +479,9 @@ def eval(cfg: EvalPipelineConfig):
     logging.info("Making policy.")
     policy = make_policy(
         cfg=cfg.policy,
-        # device=device,
         env_cfg=cfg.env,
     )
     policy.eval()
-
     with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext():
         if cfg.env.multitask_eval:
             info = eval_policy_multitask(
@@ -663,4 +646,4 @@ def eval_policy_multitask(
 
 if __name__ == "__main__":
     init_logging()
-    eval()
+    eval_main()
diff --git a/src/lerobot/scripts/train.py b/src/lerobot/scripts/train.py
index 82f78e595..a79ac88cd 100644
--- a/src/lerobot/scripts/train.py
+++ b/src/lerobot/scripts/train.py
@@ -186,7 +186,6 @@ def train(cfg: TrainPipelineConfig):
     dl_iter = cycle(dataloader)
 
     policy.train()
-
     train_metrics = {
         "loss": AverageMeter("loss", ":.3f"),
         "grad_norm": AverageMeter("grdn", ":.3f"),
@@ -263,15 +262,14 @@ def train(cfg: TrainPipelineConfig):
                         max_parallel_tasks=cfg.env.max_parallel_tasks,
                     )
                     aggregated = eval_info["overall"]["aggregated"]
-                    # Print per-suite stats
+                    # Print per-suite stats, log?
                     for task_group, task_group_info in eval_info.items():
                         if task_group == "overall":
                             continue  # Skip the overall stats since we already printed it
                         print(f"\nAggregated Metrics for {task_group}:")
                         print(task_group_info["aggregated"])
-                        breakpoint()
+                    breakpoint()
                 else:
-                    print("START EVAL")
                     eval_info = eval_policy(
                         eval_env,
                         policy,
@@ -280,9 +278,8 @@ def train(cfg: TrainPipelineConfig):
                         max_episodes_rendered=4,
                         start_seed=cfg.seed,
                     )
-                    breakpoint()
                     aggregated = eval_info["aggregated"]
-                    print("END EVAL")
+                    breakpoint()
 
             eval_metrics = {
                 "avg_sum_reward": AverageMeter("∑rwrd", ":.3f"),