iterate on review

This commit is contained in:
Jade Choghari (jchoghar)
2025-09-02 05:18:46 -04:00
parent f3614dd812
commit 92adf2218f
7 changed files with 21 additions and 26 deletions
+3 -3
View File
@@ -2,7 +2,7 @@
# config # config
REPO_ID=jadechoghari/smol-libero3 REPO_ID=jadechoghari/smol-libero3
TASK=libero_10 TASK=libero_10,libero_spatial
OUTPUT_DIR=./outputs/ OUTPUT_DIR=./outputs/
# clean previous run # clean previous run
@@ -13,7 +13,7 @@ STEPS=100000
BATCH_SIZE=4 BATCH_SIZE=4
EVAL_FREQ=1 EVAL_FREQ=1
SAVE_FREQ=10000 SAVE_FREQ=10000
NUM_WORKERS=0 NUM_WORKERS=4
# model params # model params
POLICY=smolvla POLICY=smolvla
@@ -48,6 +48,6 @@ python src/lerobot/scripts/train.py \
--save_freq=$SAVE_FREQ \ --save_freq=$SAVE_FREQ \
--num_workers=$NUM_WORKERS \ --num_workers=$NUM_WORKERS \
--policy.repo_id=$VLM_REPO_ID \ --policy.repo_id=$VLM_REPO_ID \
--env.multitask_eval=False \ --env.multitask_eval=True \
--eval.batch_size=1 \ --eval.batch_size=1 \
--eval.n_episodes=1 \ --eval.n_episodes=1 \
+1 -1
View File
@@ -4,7 +4,7 @@ unset LEROBOT_HOME
unset HF_LEROBOT_HOME unset HF_LEROBOT_HOME
# CONFIGURATION # CONFIGURATION
POLICY_PATH="ganatrask/lerobot-pi0-libero-object" POLICY_PATH="ganatrask/lerobot-pi0-libero-object"
TASK=libero_object TASK=libero_object,libero_spatial
ENV_TYPE="libero" ENV_TYPE="libero"
BATCH_SIZE=1 BATCH_SIZE=1
N_EPISODES=1 N_EPISODES=1
-7
View File
@@ -125,13 +125,6 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
return ft return ft
return None return None
@property
def robot_state_feature_key(self) -> PolicyFeature | None:
for key, ft in self.input_features.items():
if ft.type is FeatureType.STATE:
return key
return None
@property @property
def env_state_feature(self) -> PolicyFeature | None: def env_state_feature(self) -> PolicyFeature | None:
for _, ft in self.input_features.items(): for _, ft in self.input_features.items():
+4 -1
View File
@@ -35,7 +35,7 @@ def make_env_config(env_type: str, **kwargs) -> EnvConfig:
raise ValueError(f"Policy type '{env_type}' is not available.") raise ValueError(f"Policy type '{env_type}' is not available.")
def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> gym.vector.VectorEnv | None: def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> gym.vector.VectorEnv | dict[str, dict[int, gym.vector.VectorEnv]]:
"""Makes a gym vector environment according to the config. """Makes a gym vector environment according to the config.
Args: Args:
@@ -50,6 +50,9 @@ def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> g
Returns: Returns:
gym.vector.VectorEnv: The parallelized gym.env instance. gym.vector.VectorEnv: The parallelized gym.env instance.
dict[str, dict[int, gym.vector.VectorEnv]]: A mapping from task suite
names to indexed vectorized environments (when multitask eval is used).
""" """
if n_envs < 1: if n_envs < 1:
raise ValueError("`n_envs must be at least 1") raise ValueError("`n_envs must be at least 1")
+8 -4
View File
@@ -79,7 +79,6 @@ def create_libero_envs(
_task _task
]() # can also choose libero_spatial, libero_object, libero_10 etc. ]() # can also choose libero_spatial, libero_object, libero_10 etc.
tasks_ids = list(range(len(task_suite.tasks))) tasks_ids = list(range(len(task_suite.tasks)))
# tasks_ids = [0] # FIXME(mshukor): debug
for tasks_id in tasks_ids: for tasks_id in tasks_ids:
episode_indices = list(range(n_envs)) episode_indices = list(range(n_envs))
print( print(
@@ -148,7 +147,7 @@ def get_libero_dummy_action():
"""Get dummy/no-op action, used to roll out the simulation while the robot does nothing.""" """Get dummy/no-op action, used to roll out the simulation while the robot does nothing."""
return [0, 0, 0, 0, 0, 0, -1] return [0, 0, 0, 0, 0, 0, -1]
ACTION_DIM = 8
class LiberoEnv(gym.Env): class LiberoEnv(gym.Env):
metadata = {"render_modes": ["rgb_array"], "render_fps": 80} metadata = {"render_modes": ["rgb_array"], "render_fps": 80}
@@ -179,7 +178,12 @@ class LiberoEnv(gym.Env):
self.camera_name = camera_name.split( self.camera_name = camera_name.split(
"," ","
) # agentview_image (main) or robot0_eye_in_hand_image (wrist) ) # agentview_image (main) or robot0_eye_in_hand_image (wrist)
# TODO: jadechoghari, check mapping
# Map raw camera names to "image1" and "image2".
# The preprocessing step `preprocess_observation` will then prefix these with `.images.*`,
# following the LeRobot convention (e.g., `observation.images.image`, `observation.images.image2`).
# This ensures the policy consistently receives observations in the
# expected format regardless of the original camera naming.
self.camera_name_mapping = { self.camera_name_mapping = {
"agentview_image": "image", "agentview_image": "image",
"robot0_eye_in_hand_image": "image2", "robot0_eye_in_hand_image": "image2",
@@ -227,7 +231,7 @@ class LiberoEnv(gym.Env):
"agent_pos": spaces.Box( "agent_pos": spaces.Box(
low=-1000.0, low=-1000.0,
high=1000.0, high=1000.0,
shape=(8,), # TODO: jadechoghari, check compatible shape=(ACTION_DIM,),
dtype=np.float64, dtype=np.float64,
), ),
} }
+5 -9
View File
@@ -178,8 +178,9 @@ def rollout(
successes = [False] * env.num_envs successes = [False] * env.num_envs
# Keep track of which environments are done so far. # Keep track of which environments are done so far.
# done = terminated | truncated | done # Mark the episode as done if we reach the maximum step limit.
# TODO: jadechoghari changed, this is cleaner # This ensures that the rollout always terminates cleanly at `max_steps`,
# and allows logging/saving (e.g., videos) to be triggered consistently.
done = terminated | truncated | done done = terminated | truncated | done
if step + 1 == max_steps: if step + 1 == max_steps:
done = np.ones_like(done, dtype=bool) done = np.ones_like(done, dtype=bool)
@@ -191,8 +192,7 @@ def rollout(
step += 1 step += 1
running_success_rate = ( running_success_rate = (
# einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() #TODO: changed by jade einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean()
einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "max")
) )
progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"}) progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"})
progbar.update() progbar.update()
@@ -321,8 +321,7 @@ def eval_policy(
sum_rewards.extend(batch_sum_rewards.tolist()) sum_rewards.extend(batch_sum_rewards.tolist())
batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max") batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max")
max_rewards.extend(batch_max_rewards.tolist()) max_rewards.extend(batch_max_rewards.tolist())
# batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any") batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
batch_successes = einops.reduce((rollout_data["success"] * mask).float(), "b n -> b", "max")
all_successes.extend(batch_successes.tolist()) all_successes.extend(batch_successes.tolist())
if seeds: if seeds:
all_seeds.extend(seeds) all_seeds.extend(seeds)
@@ -495,7 +494,6 @@ def eval_main(cfg: EvalPipelineConfig):
max_parallel_tasks=cfg.env.max_parallel_tasks, max_parallel_tasks=cfg.env.max_parallel_tasks,
verbose=False, verbose=False,
) )
# Print overall stats
print("Overall Aggregated Metrics:") print("Overall Aggregated Metrics:")
print(info["overall"]["aggregated"]) print(info["overall"]["aggregated"])
@@ -548,7 +546,6 @@ def eval_policy_multitask(
def eval_task(task_group, task_id, env): def eval_task(task_group, task_id, env):
"""Evaluates a single task in parallel.""" """Evaluates a single task in parallel."""
print(f"Evaluating: task_group: {task_group}, task_id: {task_id} ...") print(f"Evaluating: task_group: {task_group}, task_id: {task_id} ...")
# jadechoghari : added multi video eval support
if videos_dir is not None: if videos_dir is not None:
task_videos_dir = videos_dir / f"{task_group}_{task_id}" task_videos_dir = videos_dir / f"{task_group}_{task_id}"
task_videos_dir.mkdir(parents=True, exist_ok=True) task_videos_dir.mkdir(parents=True, exist_ok=True)
@@ -560,7 +557,6 @@ def eval_policy_multitask(
task_videos_dir, task_videos_dir,
return_episode_data, return_episode_data,
start_seed, start_seed,
# verbose=verbose,
) )
per_episode = task_result["per_episode"] per_episode = task_result["per_episode"]
-1
View File
@@ -299,7 +299,6 @@ def train(cfg: TrainPipelineConfig):
wandb_logger.log_video(eval_info["video_paths"][0], step, mode="eval") wandb_logger.log_video(eval_info["video_paths"][0], step, mode="eval")
if eval_env: if eval_env:
# added by jade, close all env in multi eval setup
if cfg.env.multitask_eval: if cfg.env.multitask_eval:
for _task_group, envs_dict in eval_env.items(): for _task_group, envs_dict in eval_env.items():
for _idx, env in envs_dict.items(): for _idx, env in envs_dict.items():