From 92adf2218f9d8dc189a92caa576704ee2c9dd569 Mon Sep 17 00:00:00 2001
From: "Jade Choghari (jchoghar)" <chogharijade@gmai.com>
Date: Tue, 2 Sep 2025 05:18:46 -0400
Subject: [PATCH] iterate on review

---
 examples/5_train_libero.sh      |  6 +++---
 examples/6_evaluate_libero.sh   |  2 +-
 src/lerobot/configs/policies.py |  7 -------
 src/lerobot/envs/factory.py     |  5 ++++-
 src/lerobot/envs/libero.py      | 12 ++++++++----
 src/lerobot/scripts/eval.py     | 14 +++++---------
 src/lerobot/scripts/train.py    |  1 -
 7 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/examples/5_train_libero.sh b/examples/5_train_libero.sh
index 6a0440c4d..1f2cf7bb8 100755
--- a/examples/5_train_libero.sh
+++ b/examples/5_train_libero.sh
@@ -2,7 +2,7 @@
 
 # config
 REPO_ID=jadechoghari/smol-libero3
-TASK=libero_10
+TASK=libero_10,libero_spatial
 OUTPUT_DIR=./outputs/
 
 # clean previous run
@@ -13,7 +13,7 @@ STEPS=100000
 BATCH_SIZE=4
 EVAL_FREQ=1
 SAVE_FREQ=10000
-NUM_WORKERS=0
+NUM_WORKERS=4
 
 # model params
 POLICY=smolvla
@@ -48,6 +48,6 @@ python src/lerobot/scripts/train.py \
   --save_freq=$SAVE_FREQ \
   --num_workers=$NUM_WORKERS \
   --policy.repo_id=$VLM_REPO_ID \
-  --env.multitask_eval=False \
+  --env.multitask_eval=True \
   --eval.batch_size=1 \
   --eval.n_episodes=1 \
diff --git a/examples/6_evaluate_libero.sh b/examples/6_evaluate_libero.sh
index e7eec7cc3..97b2c17ee 100644
--- a/examples/6_evaluate_libero.sh
+++ b/examples/6_evaluate_libero.sh
@@ -4,7 +4,7 @@ unset LEROBOT_HOME
 unset HF_LEROBOT_HOME
 # CONFIGURATION
 POLICY_PATH="ganatrask/lerobot-pi0-libero-object"
-TASK=libero_object
+TASK=libero_object,libero_spatial
 ENV_TYPE="libero"
 BATCH_SIZE=1
 N_EPISODES=1
diff --git a/src/lerobot/configs/policies.py b/src/lerobot/configs/policies.py
index a7112aec6..f5fa727cf 100644
--- a/src/lerobot/configs/policies.py
+++ b/src/lerobot/configs/policies.py
@@ -125,13 +125,6 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
                 return ft
         return None
 
-    @property
-    def robot_state_feature_key(self) -> PolicyFeature | None:
-        for key, ft in self.input_features.items():
-            if ft.type is FeatureType.STATE:
-                return key
-        return None
-
     @property
     def env_state_feature(self) -> PolicyFeature | None:
         for _, ft in self.input_features.items():
diff --git a/src/lerobot/envs/factory.py b/src/lerobot/envs/factory.py
index 09233d4e9..649d8a017 100644
--- a/src/lerobot/envs/factory.py
+++ b/src/lerobot/envs/factory.py
@@ -35,7 +35,7 @@ def make_env_config(env_type: str, **kwargs) -> EnvConfig:
         raise ValueError(f"Policy type '{env_type}' is not available.")
 
 
-def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> gym.vector.VectorEnv | None:
+def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> gym.vector.VectorEnv | dict[str, dict[int, gym.vector.VectorEnv]]:
     """Makes a gym vector environment according to the config.
 
     Args:
@@ -50,6 +50,9 @@ def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> g
 
     Returns:
         gym.vector.VectorEnv: The parallelized gym.env instance.
+        dict[str, dict[int, gym.vector.VectorEnv]]: A mapping from task suite
+            names to indexed vectorized environments (when multitask eval is used).
+
     """
     if n_envs < 1:
         raise ValueError("`n_envs must be at least 1")
diff --git a/src/lerobot/envs/libero.py b/src/lerobot/envs/libero.py
index 5905a3334..a897eb42d 100644
--- a/src/lerobot/envs/libero.py
+++ b/src/lerobot/envs/libero.py
@@ -79,7 +79,6 @@ def create_libero_envs(
                 _task
             ]()  # can also choose libero_spatial, libero_object, libero_10 etc.
             tasks_ids = list(range(len(task_suite.tasks)))
-            # tasks_ids = [0] # FIXME(mshukor): debug
             for tasks_id in tasks_ids:
                 episode_indices = list(range(n_envs))
                 print(
@@ -148,7 +147,7 @@ def get_libero_dummy_action():
     """Get dummy/no-op action, used to roll out the simulation while the robot does nothing."""
     return [0, 0, 0, 0, 0, 0, -1]
 
-
+ACTION_DIM = 8
 class LiberoEnv(gym.Env):
     metadata = {"render_modes": ["rgb_array"], "render_fps": 80}
 
@@ -179,7 +178,12 @@ class LiberoEnv(gym.Env):
         self.camera_name = camera_name.split(
             ","
         )  # agentview_image (main) or robot0_eye_in_hand_image (wrist)
-        # TODO: jadechoghari, check mapping
+        
+        # Map raw camera names to "image1" and "image2".
+        # The preprocessing step `preprocess_observation` will then prefix these with `.images.*`,
+        # following the LeRobot convention (e.g., `observation.images.image`, `observation.images.image2`).
+        # This ensures the policy consistently receives observations in the
+        # expected format regardless of the original camera naming.
         self.camera_name_mapping = {
             "agentview_image": "image",
             "robot0_eye_in_hand_image": "image2",
@@ -227,7 +231,7 @@ class LiberoEnv(gym.Env):
                     "agent_pos": spaces.Box(
                         low=-1000.0,
                         high=1000.0,
-                        shape=(8,),  # TODO: jadechoghari, check compatible
+                        shape=(ACTION_DIM,),
                         dtype=np.float64,
                     ),
                 }
diff --git a/src/lerobot/scripts/eval.py b/src/lerobot/scripts/eval.py
index 3965b6e74..3145bed35 100644
--- a/src/lerobot/scripts/eval.py
+++ b/src/lerobot/scripts/eval.py
@@ -178,8 +178,9 @@ def rollout(
             successes = [False] * env.num_envs
 
         # Keep track of which environments are done so far.
-        # done = terminated | truncated | done
-        # TODO: jadechoghari changed, this is cleaner
+        # Mark the episode as done if we reach the maximum step limit.
+        # This ensures that the rollout always terminates cleanly at `max_steps`,
+        # and allows logging/saving (e.g., videos) to be triggered consistently.
         done = terminated | truncated | done
         if step + 1 == max_steps:
             done = np.ones_like(done, dtype=bool)
@@ -191,8 +192,7 @@ def rollout(
 
         step += 1
         running_success_rate = (
-            # einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() #TODO: changed by jade
-            einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "max")
+            einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean()
         )
         progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"})
         progbar.update()
@@ -321,8 +321,7 @@ def eval_policy(
         sum_rewards.extend(batch_sum_rewards.tolist())
         batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max")
         max_rewards.extend(batch_max_rewards.tolist())
-        # batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
-        batch_successes = einops.reduce((rollout_data["success"] * mask).float(), "b n -> b", "max")
+        batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
         all_successes.extend(batch_successes.tolist())
         if seeds:
             all_seeds.extend(seeds)
@@ -495,7 +494,6 @@ def eval_main(cfg: EvalPipelineConfig):
                 max_parallel_tasks=cfg.env.max_parallel_tasks,
                 verbose=False,
             )
-            # Print overall stats
             print("Overall Aggregated Metrics:")
             print(info["overall"]["aggregated"])
 
@@ -548,7 +546,6 @@ def eval_policy_multitask(
     def eval_task(task_group, task_id, env):
         """Evaluates a single task in parallel."""
         print(f"Evaluating: task_group: {task_group}, task_id: {task_id} ...")
-        # jadechoghari : added multi video eval support
         if videos_dir is not None:
             task_videos_dir = videos_dir / f"{task_group}_{task_id}"
             task_videos_dir.mkdir(parents=True, exist_ok=True)
@@ -560,7 +557,6 @@ def eval_policy_multitask(
             task_videos_dir,
             return_episode_data,
             start_seed,
-            # verbose=verbose,
         )
 
         per_episode = task_result["per_episode"]
diff --git a/src/lerobot/scripts/train.py b/src/lerobot/scripts/train.py
index 8e40ce554..b34f94364 100644
--- a/src/lerobot/scripts/train.py
+++ b/src/lerobot/scripts/train.py
@@ -299,7 +299,6 @@ def train(cfg: TrainPipelineConfig):
                 wandb_logger.log_video(eval_info["video_paths"][0], step, mode="eval")
 
     if eval_env:
-        # added by jade, close all env in multi eval setup
         if cfg.env.multitask_eval:
             for _task_group, envs_dict in eval_env.items():
                 for _idx, env in envs_dict.items():