mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-21 11:39:50 +00:00
iterate on review
This commit is contained in:
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
# config
|
# config
|
||||||
REPO_ID=jadechoghari/smol-libero3
|
REPO_ID=jadechoghari/smol-libero3
|
||||||
TASK=libero_10
|
TASK=libero_10,libero_spatial
|
||||||
OUTPUT_DIR=./outputs/
|
OUTPUT_DIR=./outputs/
|
||||||
|
|
||||||
# clean previous run
|
# clean previous run
|
||||||
@@ -13,7 +13,7 @@ STEPS=100000
|
|||||||
BATCH_SIZE=4
|
BATCH_SIZE=4
|
||||||
EVAL_FREQ=1
|
EVAL_FREQ=1
|
||||||
SAVE_FREQ=10000
|
SAVE_FREQ=10000
|
||||||
NUM_WORKERS=0
|
NUM_WORKERS=4
|
||||||
|
|
||||||
# model params
|
# model params
|
||||||
POLICY=smolvla
|
POLICY=smolvla
|
||||||
@@ -48,6 +48,6 @@ python src/lerobot/scripts/train.py \
|
|||||||
--save_freq=$SAVE_FREQ \
|
--save_freq=$SAVE_FREQ \
|
||||||
--num_workers=$NUM_WORKERS \
|
--num_workers=$NUM_WORKERS \
|
||||||
--policy.repo_id=$VLM_REPO_ID \
|
--policy.repo_id=$VLM_REPO_ID \
|
||||||
--env.multitask_eval=False \
|
--env.multitask_eval=True \
|
||||||
--eval.batch_size=1 \
|
--eval.batch_size=1 \
|
||||||
--eval.n_episodes=1 \
|
--eval.n_episodes=1 \
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ unset LEROBOT_HOME
|
|||||||
unset HF_LEROBOT_HOME
|
unset HF_LEROBOT_HOME
|
||||||
# CONFIGURATION
|
# CONFIGURATION
|
||||||
POLICY_PATH="ganatrask/lerobot-pi0-libero-object"
|
POLICY_PATH="ganatrask/lerobot-pi0-libero-object"
|
||||||
TASK=libero_object
|
TASK=libero_object,libero_spatial
|
||||||
ENV_TYPE="libero"
|
ENV_TYPE="libero"
|
||||||
BATCH_SIZE=1
|
BATCH_SIZE=1
|
||||||
N_EPISODES=1
|
N_EPISODES=1
|
||||||
|
|||||||
@@ -125,13 +125,6 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
|
|||||||
return ft
|
return ft
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@property
|
|
||||||
def robot_state_feature_key(self) -> PolicyFeature | None:
|
|
||||||
for key, ft in self.input_features.items():
|
|
||||||
if ft.type is FeatureType.STATE:
|
|
||||||
return key
|
|
||||||
return None
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def env_state_feature(self) -> PolicyFeature | None:
|
def env_state_feature(self) -> PolicyFeature | None:
|
||||||
for _, ft in self.input_features.items():
|
for _, ft in self.input_features.items():
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ def make_env_config(env_type: str, **kwargs) -> EnvConfig:
|
|||||||
raise ValueError(f"Policy type '{env_type}' is not available.")
|
raise ValueError(f"Policy type '{env_type}' is not available.")
|
||||||
|
|
||||||
|
|
||||||
def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> gym.vector.VectorEnv | None:
|
def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> gym.vector.VectorEnv | dict[str, dict[int, gym.vector.VectorEnv]]:
|
||||||
"""Makes a gym vector environment according to the config.
|
"""Makes a gym vector environment according to the config.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -50,6 +50,9 @@ def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> g
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
gym.vector.VectorEnv: The parallelized gym.env instance.
|
gym.vector.VectorEnv: The parallelized gym.env instance.
|
||||||
|
dict[str, dict[int, gym.vector.VectorEnv]]: A mapping from task suite
|
||||||
|
names to indexed vectorized environments (when multitask eval is used).
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if n_envs < 1:
|
if n_envs < 1:
|
||||||
raise ValueError("`n_envs must be at least 1")
|
raise ValueError("`n_envs must be at least 1")
|
||||||
|
|||||||
@@ -79,7 +79,6 @@ def create_libero_envs(
|
|||||||
_task
|
_task
|
||||||
]() # can also choose libero_spatial, libero_object, libero_10 etc.
|
]() # can also choose libero_spatial, libero_object, libero_10 etc.
|
||||||
tasks_ids = list(range(len(task_suite.tasks)))
|
tasks_ids = list(range(len(task_suite.tasks)))
|
||||||
# tasks_ids = [0] # FIXME(mshukor): debug
|
|
||||||
for tasks_id in tasks_ids:
|
for tasks_id in tasks_ids:
|
||||||
episode_indices = list(range(n_envs))
|
episode_indices = list(range(n_envs))
|
||||||
print(
|
print(
|
||||||
@@ -148,7 +147,7 @@ def get_libero_dummy_action():
|
|||||||
"""Get dummy/no-op action, used to roll out the simulation while the robot does nothing."""
|
"""Get dummy/no-op action, used to roll out the simulation while the robot does nothing."""
|
||||||
return [0, 0, 0, 0, 0, 0, -1]
|
return [0, 0, 0, 0, 0, 0, -1]
|
||||||
|
|
||||||
|
ACTION_DIM = 8
|
||||||
class LiberoEnv(gym.Env):
|
class LiberoEnv(gym.Env):
|
||||||
metadata = {"render_modes": ["rgb_array"], "render_fps": 80}
|
metadata = {"render_modes": ["rgb_array"], "render_fps": 80}
|
||||||
|
|
||||||
@@ -179,7 +178,12 @@ class LiberoEnv(gym.Env):
|
|||||||
self.camera_name = camera_name.split(
|
self.camera_name = camera_name.split(
|
||||||
","
|
","
|
||||||
) # agentview_image (main) or robot0_eye_in_hand_image (wrist)
|
) # agentview_image (main) or robot0_eye_in_hand_image (wrist)
|
||||||
# TODO: jadechoghari, check mapping
|
|
||||||
|
# Map raw camera names to "image1" and "image2".
|
||||||
|
# The preprocessing step `preprocess_observation` will then prefix these with `.images.*`,
|
||||||
|
# following the LeRobot convention (e.g., `observation.images.image`, `observation.images.image2`).
|
||||||
|
# This ensures the policy consistently receives observations in the
|
||||||
|
# expected format regardless of the original camera naming.
|
||||||
self.camera_name_mapping = {
|
self.camera_name_mapping = {
|
||||||
"agentview_image": "image",
|
"agentview_image": "image",
|
||||||
"robot0_eye_in_hand_image": "image2",
|
"robot0_eye_in_hand_image": "image2",
|
||||||
@@ -227,7 +231,7 @@ class LiberoEnv(gym.Env):
|
|||||||
"agent_pos": spaces.Box(
|
"agent_pos": spaces.Box(
|
||||||
low=-1000.0,
|
low=-1000.0,
|
||||||
high=1000.0,
|
high=1000.0,
|
||||||
shape=(8,), # TODO: jadechoghari, check compatible
|
shape=(ACTION_DIM,),
|
||||||
dtype=np.float64,
|
dtype=np.float64,
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -178,8 +178,9 @@ def rollout(
|
|||||||
successes = [False] * env.num_envs
|
successes = [False] * env.num_envs
|
||||||
|
|
||||||
# Keep track of which environments are done so far.
|
# Keep track of which environments are done so far.
|
||||||
# done = terminated | truncated | done
|
# Mark the episode as done if we reach the maximum step limit.
|
||||||
# TODO: jadechoghari changed, this is cleaner
|
# This ensures that the rollout always terminates cleanly at `max_steps`,
|
||||||
|
# and allows logging/saving (e.g., videos) to be triggered consistently.
|
||||||
done = terminated | truncated | done
|
done = terminated | truncated | done
|
||||||
if step + 1 == max_steps:
|
if step + 1 == max_steps:
|
||||||
done = np.ones_like(done, dtype=bool)
|
done = np.ones_like(done, dtype=bool)
|
||||||
@@ -191,8 +192,7 @@ def rollout(
|
|||||||
|
|
||||||
step += 1
|
step += 1
|
||||||
running_success_rate = (
|
running_success_rate = (
|
||||||
# einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() #TODO: changed by jade
|
einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean()
|
||||||
einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "max")
|
|
||||||
)
|
)
|
||||||
progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"})
|
progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"})
|
||||||
progbar.update()
|
progbar.update()
|
||||||
@@ -321,8 +321,7 @@ def eval_policy(
|
|||||||
sum_rewards.extend(batch_sum_rewards.tolist())
|
sum_rewards.extend(batch_sum_rewards.tolist())
|
||||||
batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max")
|
batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max")
|
||||||
max_rewards.extend(batch_max_rewards.tolist())
|
max_rewards.extend(batch_max_rewards.tolist())
|
||||||
# batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
|
batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
|
||||||
batch_successes = einops.reduce((rollout_data["success"] * mask).float(), "b n -> b", "max")
|
|
||||||
all_successes.extend(batch_successes.tolist())
|
all_successes.extend(batch_successes.tolist())
|
||||||
if seeds:
|
if seeds:
|
||||||
all_seeds.extend(seeds)
|
all_seeds.extend(seeds)
|
||||||
@@ -495,7 +494,6 @@ def eval_main(cfg: EvalPipelineConfig):
|
|||||||
max_parallel_tasks=cfg.env.max_parallel_tasks,
|
max_parallel_tasks=cfg.env.max_parallel_tasks,
|
||||||
verbose=False,
|
verbose=False,
|
||||||
)
|
)
|
||||||
# Print overall stats
|
|
||||||
print("Overall Aggregated Metrics:")
|
print("Overall Aggregated Metrics:")
|
||||||
print(info["overall"]["aggregated"])
|
print(info["overall"]["aggregated"])
|
||||||
|
|
||||||
@@ -548,7 +546,6 @@ def eval_policy_multitask(
|
|||||||
def eval_task(task_group, task_id, env):
|
def eval_task(task_group, task_id, env):
|
||||||
"""Evaluates a single task in parallel."""
|
"""Evaluates a single task in parallel."""
|
||||||
print(f"Evaluating: task_group: {task_group}, task_id: {task_id} ...")
|
print(f"Evaluating: task_group: {task_group}, task_id: {task_id} ...")
|
||||||
# jadechoghari : added multi video eval support
|
|
||||||
if videos_dir is not None:
|
if videos_dir is not None:
|
||||||
task_videos_dir = videos_dir / f"{task_group}_{task_id}"
|
task_videos_dir = videos_dir / f"{task_group}_{task_id}"
|
||||||
task_videos_dir.mkdir(parents=True, exist_ok=True)
|
task_videos_dir.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -560,7 +557,6 @@ def eval_policy_multitask(
|
|||||||
task_videos_dir,
|
task_videos_dir,
|
||||||
return_episode_data,
|
return_episode_data,
|
||||||
start_seed,
|
start_seed,
|
||||||
# verbose=verbose,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
per_episode = task_result["per_episode"]
|
per_episode = task_result["per_episode"]
|
||||||
|
|||||||
@@ -299,7 +299,6 @@ def train(cfg: TrainPipelineConfig):
|
|||||||
wandb_logger.log_video(eval_info["video_paths"][0], step, mode="eval")
|
wandb_logger.log_video(eval_info["video_paths"][0], step, mode="eval")
|
||||||
|
|
||||||
if eval_env:
|
if eval_env:
|
||||||
# added by jade, close all env in multi eval setup
|
|
||||||
if cfg.env.multitask_eval:
|
if cfg.env.multitask_eval:
|
||||||
for _task_group, envs_dict in eval_env.items():
|
for _task_group, envs_dict in eval_env.items():
|
||||||
for _idx, env in envs_dict.items():
|
for _idx, env in envs_dict.items():
|
||||||
|
|||||||
Reference in New Issue
Block a user