From 92adf2218f9d8dc189a92caa576704ee2c9dd569 Mon Sep 17 00:00:00 2001 From: "Jade Choghari (jchoghar)" Date: Tue, 2 Sep 2025 05:18:46 -0400 Subject: [PATCH] iterate on review --- examples/5_train_libero.sh | 6 +++--- examples/6_evaluate_libero.sh | 2 +- src/lerobot/configs/policies.py | 7 ------- src/lerobot/envs/factory.py | 5 ++++- src/lerobot/envs/libero.py | 12 ++++++++---- src/lerobot/scripts/eval.py | 14 +++++--------- src/lerobot/scripts/train.py | 1 - 7 files changed, 21 insertions(+), 26 deletions(-) diff --git a/examples/5_train_libero.sh b/examples/5_train_libero.sh index 6a0440c4d..1f2cf7bb8 100755 --- a/examples/5_train_libero.sh +++ b/examples/5_train_libero.sh @@ -2,7 +2,7 @@ # config REPO_ID=jadechoghari/smol-libero3 -TASK=libero_10 +TASK=libero_10,libero_spatial OUTPUT_DIR=./outputs/ # clean previous run @@ -13,7 +13,7 @@ STEPS=100000 BATCH_SIZE=4 EVAL_FREQ=1 SAVE_FREQ=10000 -NUM_WORKERS=0 +NUM_WORKERS=4 # model params POLICY=smolvla @@ -48,6 +48,6 @@ python src/lerobot/scripts/train.py \ --save_freq=$SAVE_FREQ \ --num_workers=$NUM_WORKERS \ --policy.repo_id=$VLM_REPO_ID \ - --env.multitask_eval=False \ + --env.multitask_eval=True \ --eval.batch_size=1 \ --eval.n_episodes=1 \ diff --git a/examples/6_evaluate_libero.sh b/examples/6_evaluate_libero.sh index e7eec7cc3..97b2c17ee 100644 --- a/examples/6_evaluate_libero.sh +++ b/examples/6_evaluate_libero.sh @@ -4,7 +4,7 @@ unset LEROBOT_HOME unset HF_LEROBOT_HOME # CONFIGURATION POLICY_PATH="ganatrask/lerobot-pi0-libero-object" -TASK=libero_object +TASK=libero_object,libero_spatial ENV_TYPE="libero" BATCH_SIZE=1 N_EPISODES=1 diff --git a/src/lerobot/configs/policies.py b/src/lerobot/configs/policies.py index a7112aec6..f5fa727cf 100644 --- a/src/lerobot/configs/policies.py +++ b/src/lerobot/configs/policies.py @@ -125,13 +125,6 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC): return ft return None - @property - def robot_state_feature_key(self) -> PolicyFeature | None: - for key, ft in self.input_features.items(): - if ft.type is FeatureType.STATE: - return key - return None - @property def env_state_feature(self) -> PolicyFeature | None: for _, ft in self.input_features.items(): diff --git a/src/lerobot/envs/factory.py b/src/lerobot/envs/factory.py index 09233d4e9..649d8a017 100644 --- a/src/lerobot/envs/factory.py +++ b/src/lerobot/envs/factory.py @@ -35,7 +35,7 @@ def make_env_config(env_type: str, **kwargs) -> EnvConfig: raise ValueError(f"Policy type '{env_type}' is not available.") -def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> gym.vector.VectorEnv | None: +def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> gym.vector.VectorEnv | dict[str, dict[int, gym.vector.VectorEnv]]: """Makes a gym vector environment according to the config. Args: @@ -50,6 +50,9 @@ def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> g Returns: gym.vector.VectorEnv: The parallelized gym.env instance. + dict[str, dict[int, gym.vector.VectorEnv]]: A mapping from task suite + names to indexed vectorized environments (when multitask eval is used). + """ if n_envs < 1: raise ValueError("`n_envs must be at least 1") diff --git a/src/lerobot/envs/libero.py b/src/lerobot/envs/libero.py index 5905a3334..a897eb42d 100644 --- a/src/lerobot/envs/libero.py +++ b/src/lerobot/envs/libero.py @@ -79,7 +79,6 @@ def create_libero_envs( _task ]() # can also choose libero_spatial, libero_object, libero_10 etc. tasks_ids = list(range(len(task_suite.tasks))) - # tasks_ids = [0] # FIXME(mshukor): debug for tasks_id in tasks_ids: episode_indices = list(range(n_envs)) print( @@ -148,7 +147,7 @@ def get_libero_dummy_action(): """Get dummy/no-op action, used to roll out the simulation while the robot does nothing.""" return [0, 0, 0, 0, 0, 0, -1] - +ACTION_DIM = 8 class LiberoEnv(gym.Env): metadata = {"render_modes": ["rgb_array"], "render_fps": 80} @@ -179,7 +178,12 @@ class LiberoEnv(gym.Env): self.camera_name = camera_name.split( "," ) # agentview_image (main) or robot0_eye_in_hand_image (wrist) - # TODO: jadechoghari, check mapping + + # Map raw camera names to "image1" and "image2". + # The preprocessing step `preprocess_observation` will then prefix these with `.images.*`, + # following the LeRobot convention (e.g., `observation.images.image`, `observation.images.image2`). + # This ensures the policy consistently receives observations in the + # expected format regardless of the original camera naming. self.camera_name_mapping = { "agentview_image": "image", "robot0_eye_in_hand_image": "image2", @@ -227,7 +231,7 @@ class LiberoEnv(gym.Env): "agent_pos": spaces.Box( low=-1000.0, high=1000.0, - shape=(8,), # TODO: jadechoghari, check compatible + shape=(ACTION_DIM,), dtype=np.float64, ), } diff --git a/src/lerobot/scripts/eval.py b/src/lerobot/scripts/eval.py index 3965b6e74..3145bed35 100644 --- a/src/lerobot/scripts/eval.py +++ b/src/lerobot/scripts/eval.py @@ -178,8 +178,9 @@ def rollout( successes = [False] * env.num_envs # Keep track of which environments are done so far. - # done = terminated | truncated | done - # TODO: jadechoghari changed, this is cleaner + # Mark the episode as done if we reach the maximum step limit. + # This ensures that the rollout always terminates cleanly at `max_steps`, + # and allows logging/saving (e.g., videos) to be triggered consistently. done = terminated | truncated | done if step + 1 == max_steps: done = np.ones_like(done, dtype=bool) @@ -191,8 +192,7 @@ def rollout( step += 1 running_success_rate = ( - # einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() #TODO: changed by jade - einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "max") + einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() ) progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"}) progbar.update() @@ -321,8 +321,7 @@ def eval_policy( sum_rewards.extend(batch_sum_rewards.tolist()) batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max") max_rewards.extend(batch_max_rewards.tolist()) - # batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any") - batch_successes = einops.reduce((rollout_data["success"] * mask).float(), "b n -> b", "max") + batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any") all_successes.extend(batch_successes.tolist()) if seeds: all_seeds.extend(seeds) @@ -495,7 +494,6 @@ def eval_main(cfg: EvalPipelineConfig): max_parallel_tasks=cfg.env.max_parallel_tasks, verbose=False, ) - # Print overall stats print("Overall Aggregated Metrics:") print(info["overall"]["aggregated"]) @@ -548,7 +546,6 @@ def eval_policy_multitask( def eval_task(task_group, task_id, env): """Evaluates a single task in parallel.""" print(f"Evaluating: task_group: {task_group}, task_id: {task_id} ...") - # jadechoghari : added multi video eval support if videos_dir is not None: task_videos_dir = videos_dir / f"{task_group}_{task_id}" task_videos_dir.mkdir(parents=True, exist_ok=True) @@ -560,7 +557,6 @@ def eval_policy_multitask( task_videos_dir, return_episode_data, start_seed, - # verbose=verbose, ) per_episode = task_result["per_episode"] diff --git a/src/lerobot/scripts/train.py b/src/lerobot/scripts/train.py index 8e40ce554..b34f94364 100644 --- a/src/lerobot/scripts/train.py +++ b/src/lerobot/scripts/train.py @@ -299,7 +299,6 @@ def train(cfg: TrainPipelineConfig): wandb_logger.log_video(eval_info["video_paths"][0], step, mode="eval") if eval_env: - # added by jade, close all env in multi eval setup if cfg.env.multitask_eval: for _task_group, envs_dict in eval_env.items(): for _idx, env in envs_dict.items():