iterate on review

2026-07-24 02:06:15 +00:00 · 2025-09-02 05:18:46 -04:00
parent f3614dd812
commit 92adf2218f
7 changed files with 21 additions and 26 deletions
@@ -2,7 +2,7 @@
 # config
 REPO_ID=jadechoghari/smol-libero3
-TASK=libero_10
+TASK=libero_10,libero_spatial
 OUTPUT_DIR=./outputs/
 # clean previous run
@@ -13,7 +13,7 @@ STEPS=100000
 BATCH_SIZE=4
 EVAL_FREQ=1
 SAVE_FREQ=10000
-NUM_WORKERS=0
+NUM_WORKERS=4
 # model params
 POLICY=smolvla
@@ -48,6 +48,6 @@ python src/lerobot/scripts/train.py \
  --save_freq=$SAVE_FREQ \
  --num_workers=$NUM_WORKERS \
  --policy.repo_id=$VLM_REPO_ID \
-  --env.multitask_eval=False \
+  --env.multitask_eval=True \
  --eval.batch_size=1 \
  --eval.n_episodes=1 \
@@ -4,7 +4,7 @@ unset LEROBOT_HOME
 unset HF_LEROBOT_HOME
 # CONFIGURATION
 POLICY_PATH="ganatrask/lerobot-pi0-libero-object"
-TASK=libero_object
+TASK=libero_object,libero_spatial
 ENV_TYPE="libero"
 BATCH_SIZE=1
 N_EPISODES=1
@@ -125,13 +125,6 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
                return ft
        return None
    @property
    def robot_state_feature_key(self) -> PolicyFeature | None:
        for key, ft in self.input_features.items():
            if ft.type is FeatureType.STATE:
                return key
        return None
    @property
    def env_state_feature(self) -> PolicyFeature | None:
        for _, ft in self.input_features.items():
@@ -35,7 +35,7 @@ def make_env_config(env_type: str, **kwargs) -> EnvConfig:
        raise ValueError(f"Policy type '{env_type}' is not available.")
-def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> gym.vector.VectorEnv | None:
+def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> gym.vector.VectorEnv | dict[str, dict[int, gym.vector.VectorEnv]]:
    """Makes a gym vector environment according to the config.
    Args:
@@ -50,6 +50,9 @@ def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> g
    Returns:
        gym.vector.VectorEnv: The parallelized gym.env instance.
        dict[str, dict[int, gym.vector.VectorEnv]]: A mapping from task suite
            names to indexed vectorized environments (when multitask eval is used).
    """
    if n_envs < 1:
        raise ValueError("`n_envs must be at least 1")
@@ -79,7 +79,6 @@ def create_libero_envs(
                _task
            ]()  # can also choose libero_spatial, libero_object, libero_10 etc.
            tasks_ids = list(range(len(task_suite.tasks)))
            # tasks_ids = [0] # FIXME(mshukor): debug
            for tasks_id in tasks_ids:
                episode_indices = list(range(n_envs))
                print(
@@ -148,7 +147,7 @@ def get_libero_dummy_action():
    """Get dummy/no-op action, used to roll out the simulation while the robot does nothing."""
    return [0, 0, 0, 0, 0, 0, -1]
-
+ACTION_DIM = 8
 class LiberoEnv(gym.Env):
    metadata = {"render_modes": ["rgb_array"], "render_fps": 80}
@@ -179,7 +178,12 @@ class LiberoEnv(gym.Env):
        self.camera_name = camera_name.split(
            ","
        )  # agentview_image (main) or robot0_eye_in_hand_image (wrist)
-        # TODO: jadechoghari, check mapping
+        
        # Map raw camera names to "image1" and "image2".
        # The preprocessing step `preprocess_observation` will then prefix these with `.images.*`,
        # following the LeRobot convention (e.g., `observation.images.image`, `observation.images.image2`).
        # This ensures the policy consistently receives observations in the
        # expected format regardless of the original camera naming.
        self.camera_name_mapping = {
            "agentview_image": "image",
            "robot0_eye_in_hand_image": "image2",
@@ -227,7 +231,7 @@ class LiberoEnv(gym.Env):
                    "agent_pos": spaces.Box(
                        low=-1000.0,
                        high=1000.0,
-                        shape=(8,),  # TODO: jadechoghari, check compatible
+                        shape=(ACTION_DIM,),
                        dtype=np.float64,
                    ),
                }
@@ -178,8 +178,9 @@ def rollout(
            successes = [False] * env.num_envs
        # Keep track of which environments are done so far.
-        # done = terminated | truncated | done
+        # Mark the episode as done if we reach the maximum step limit.
-        # TODO: jadechoghari changed, this is cleaner
+        # This ensures that the rollout always terminates cleanly at `max_steps`,
        # and allows logging/saving (e.g., videos) to be triggered consistently.
        done = terminated | truncated | done
        if step + 1 == max_steps:
            done = np.ones_like(done, dtype=bool)
@@ -191,8 +192,7 @@ def rollout(
        step += 1
        running_success_rate = (
-            # einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() #TODO: changed by jade
+            einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean()
            einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "max")
        )
        progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"})
        progbar.update()
@@ -321,8 +321,7 @@ def eval_policy(
        sum_rewards.extend(batch_sum_rewards.tolist())
        batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max")
        max_rewards.extend(batch_max_rewards.tolist())
-        # batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
+        batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
        batch_successes = einops.reduce((rollout_data["success"] * mask).float(), "b n -> b", "max")
        all_successes.extend(batch_successes.tolist())
        if seeds:
            all_seeds.extend(seeds)
@@ -495,7 +494,6 @@ def eval_main(cfg: EvalPipelineConfig):
                max_parallel_tasks=cfg.env.max_parallel_tasks,
                verbose=False,
            )
            # Print overall stats
            print("Overall Aggregated Metrics:")
            print(info["overall"]["aggregated"])
@@ -548,7 +546,6 @@ def eval_policy_multitask(
    def eval_task(task_group, task_id, env):
        """Evaluates a single task in parallel."""
        print(f"Evaluating: task_group: {task_group}, task_id: {task_id} ...")
        # jadechoghari : added multi video eval support
        if videos_dir is not None:
            task_videos_dir = videos_dir / f"{task_group}_{task_id}"
            task_videos_dir.mkdir(parents=True, exist_ok=True)
@@ -560,7 +557,6 @@ def eval_policy_multitask(
            task_videos_dir,
            return_episode_data,
            start_seed,
            # verbose=verbose,
        )
        per_episode = task_result["per_episode"]
@@ -299,7 +299,6 @@ def train(cfg: TrainPipelineConfig):
                wandb_logger.log_video(eval_info["video_paths"][0], step, mode="eval")
    if eval_env:
        # added by jade, close all env in multi eval setup
        if cfg.env.multitask_eval:
            for _task_group, envs_dict in eval_env.items():
                for _idx, env in envs_dict.items():