mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-25 05:29:55 +00:00
iterate on review
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
|
||||
# config
|
||||
REPO_ID=jadechoghari/smol-libero3
|
||||
TASK=libero_10
|
||||
TASK=libero_10,libero_spatial
|
||||
OUTPUT_DIR=./outputs/
|
||||
|
||||
# clean previous run
|
||||
@@ -13,7 +13,7 @@ STEPS=100000
|
||||
BATCH_SIZE=4
|
||||
EVAL_FREQ=1
|
||||
SAVE_FREQ=10000
|
||||
NUM_WORKERS=0
|
||||
NUM_WORKERS=4
|
||||
|
||||
# model params
|
||||
POLICY=smolvla
|
||||
@@ -48,6 +48,6 @@ python src/lerobot/scripts/train.py \
|
||||
--save_freq=$SAVE_FREQ \
|
||||
--num_workers=$NUM_WORKERS \
|
||||
--policy.repo_id=$VLM_REPO_ID \
|
||||
--env.multitask_eval=False \
|
||||
--env.multitask_eval=True \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=1 \
|
||||
|
||||
@@ -4,7 +4,7 @@ unset LEROBOT_HOME
|
||||
unset HF_LEROBOT_HOME
|
||||
# CONFIGURATION
|
||||
POLICY_PATH="ganatrask/lerobot-pi0-libero-object"
|
||||
TASK=libero_object
|
||||
TASK=libero_object,libero_spatial
|
||||
ENV_TYPE="libero"
|
||||
BATCH_SIZE=1
|
||||
N_EPISODES=1
|
||||
|
||||
@@ -125,13 +125,6 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
|
||||
return ft
|
||||
return None
|
||||
|
||||
@property
|
||||
def robot_state_feature_key(self) -> PolicyFeature | None:
|
||||
for key, ft in self.input_features.items():
|
||||
if ft.type is FeatureType.STATE:
|
||||
return key
|
||||
return None
|
||||
|
||||
@property
|
||||
def env_state_feature(self) -> PolicyFeature | None:
|
||||
for _, ft in self.input_features.items():
|
||||
|
||||
@@ -35,7 +35,7 @@ def make_env_config(env_type: str, **kwargs) -> EnvConfig:
|
||||
raise ValueError(f"Policy type '{env_type}' is not available.")
|
||||
|
||||
|
||||
def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> gym.vector.VectorEnv | None:
|
||||
def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> gym.vector.VectorEnv | dict[str, dict[int, gym.vector.VectorEnv]]:
|
||||
"""Makes a gym vector environment according to the config.
|
||||
|
||||
Args:
|
||||
@@ -50,6 +50,9 @@ def make_env(cfg: EnvConfig, n_envs: int = 1, use_async_envs: bool = False) -> g
|
||||
|
||||
Returns:
|
||||
gym.vector.VectorEnv: The parallelized gym.env instance.
|
||||
dict[str, dict[int, gym.vector.VectorEnv]]: A mapping from task suite
|
||||
names to indexed vectorized environments (when multitask eval is used).
|
||||
|
||||
"""
|
||||
if n_envs < 1:
|
||||
raise ValueError("`n_envs must be at least 1")
|
||||
|
||||
@@ -79,7 +79,6 @@ def create_libero_envs(
|
||||
_task
|
||||
]() # can also choose libero_spatial, libero_object, libero_10 etc.
|
||||
tasks_ids = list(range(len(task_suite.tasks)))
|
||||
# tasks_ids = [0] # FIXME(mshukor): debug
|
||||
for tasks_id in tasks_ids:
|
||||
episode_indices = list(range(n_envs))
|
||||
print(
|
||||
@@ -148,7 +147,7 @@ def get_libero_dummy_action():
|
||||
"""Get dummy/no-op action, used to roll out the simulation while the robot does nothing."""
|
||||
return [0, 0, 0, 0, 0, 0, -1]
|
||||
|
||||
|
||||
ACTION_DIM = 8
|
||||
class LiberoEnv(gym.Env):
|
||||
metadata = {"render_modes": ["rgb_array"], "render_fps": 80}
|
||||
|
||||
@@ -179,7 +178,12 @@ class LiberoEnv(gym.Env):
|
||||
self.camera_name = camera_name.split(
|
||||
","
|
||||
) # agentview_image (main) or robot0_eye_in_hand_image (wrist)
|
||||
# TODO: jadechoghari, check mapping
|
||||
|
||||
# Map raw camera names to "image1" and "image2".
|
||||
# The preprocessing step `preprocess_observation` will then prefix these with `.images.*`,
|
||||
# following the LeRobot convention (e.g., `observation.images.image`, `observation.images.image2`).
|
||||
# This ensures the policy consistently receives observations in the
|
||||
# expected format regardless of the original camera naming.
|
||||
self.camera_name_mapping = {
|
||||
"agentview_image": "image",
|
||||
"robot0_eye_in_hand_image": "image2",
|
||||
@@ -227,7 +231,7 @@ class LiberoEnv(gym.Env):
|
||||
"agent_pos": spaces.Box(
|
||||
low=-1000.0,
|
||||
high=1000.0,
|
||||
shape=(8,), # TODO: jadechoghari, check compatible
|
||||
shape=(ACTION_DIM,),
|
||||
dtype=np.float64,
|
||||
),
|
||||
}
|
||||
|
||||
@@ -178,8 +178,9 @@ def rollout(
|
||||
successes = [False] * env.num_envs
|
||||
|
||||
# Keep track of which environments are done so far.
|
||||
# done = terminated | truncated | done
|
||||
# TODO: jadechoghari changed, this is cleaner
|
||||
# Mark the episode as done if we reach the maximum step limit.
|
||||
# This ensures that the rollout always terminates cleanly at `max_steps`,
|
||||
# and allows logging/saving (e.g., videos) to be triggered consistently.
|
||||
done = terminated | truncated | done
|
||||
if step + 1 == max_steps:
|
||||
done = np.ones_like(done, dtype=bool)
|
||||
@@ -191,8 +192,7 @@ def rollout(
|
||||
|
||||
step += 1
|
||||
running_success_rate = (
|
||||
# einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() #TODO: changed by jade
|
||||
einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "max")
|
||||
einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean()
|
||||
)
|
||||
progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"})
|
||||
progbar.update()
|
||||
@@ -321,8 +321,7 @@ def eval_policy(
|
||||
sum_rewards.extend(batch_sum_rewards.tolist())
|
||||
batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max")
|
||||
max_rewards.extend(batch_max_rewards.tolist())
|
||||
# batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
|
||||
batch_successes = einops.reduce((rollout_data["success"] * mask).float(), "b n -> b", "max")
|
||||
batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
|
||||
all_successes.extend(batch_successes.tolist())
|
||||
if seeds:
|
||||
all_seeds.extend(seeds)
|
||||
@@ -495,7 +494,6 @@ def eval_main(cfg: EvalPipelineConfig):
|
||||
max_parallel_tasks=cfg.env.max_parallel_tasks,
|
||||
verbose=False,
|
||||
)
|
||||
# Print overall stats
|
||||
print("Overall Aggregated Metrics:")
|
||||
print(info["overall"]["aggregated"])
|
||||
|
||||
@@ -548,7 +546,6 @@ def eval_policy_multitask(
|
||||
def eval_task(task_group, task_id, env):
|
||||
"""Evaluates a single task in parallel."""
|
||||
print(f"Evaluating: task_group: {task_group}, task_id: {task_id} ...")
|
||||
# jadechoghari : added multi video eval support
|
||||
if videos_dir is not None:
|
||||
task_videos_dir = videos_dir / f"{task_group}_{task_id}"
|
||||
task_videos_dir.mkdir(parents=True, exist_ok=True)
|
||||
@@ -560,7 +557,6 @@ def eval_policy_multitask(
|
||||
task_videos_dir,
|
||||
return_episode_data,
|
||||
start_seed,
|
||||
# verbose=verbose,
|
||||
)
|
||||
|
||||
per_episode = task_result["per_episode"]
|
||||
|
||||
@@ -299,7 +299,6 @@ def train(cfg: TrainPipelineConfig):
|
||||
wandb_logger.log_video(eval_info["video_paths"][0], step, mode="eval")
|
||||
|
||||
if eval_env:
|
||||
# added by jade, close all env in multi eval setup
|
||||
if cfg.env.multitask_eval:
|
||||
for _task_group, envs_dict in eval_env.items():
|
||||
for _idx, env in envs_dict.items():
|
||||
|
||||
Reference in New Issue
Block a user