diff --git a/examples/5_train_libero.sh b/examples/5_train_libero.sh index cdde853fa..5c7fe5d0b 100755 --- a/examples/5_train_libero.sh +++ b/examples/5_train_libero.sh @@ -16,7 +16,7 @@ SAVE_FREQ=10000 NUM_WORKERS=0 # model params -POLICY=smolvla +POLICY=pi0 USE_AMP=false OPTIMIZER_LR=1e-4 PEFT_METHOD=lora @@ -30,11 +30,13 @@ USE_IMAGENET_STATS=false ENABLE_IMG_TRANSFORM=true MAX_NUM_IMAGES=2 MAX_IMAGE_DIM=1024 +unset LEROBOT_HOME +unset HF_LEROBOT_HOME echo -e "\033[1;33m[WARNING]\033[0m LIBERO is not yet fully supported in this PR!" + # launch -PYTORCH_ENABLE_MPS_FALLBACK=1 DEVICE=cpu python src/lerobot/scripts/train.py \ - --policy.device=cpu \ +python src/lerobot/scripts/train.py \ --policy.type=$POLICY \ --dataset.repo_id=$REPO_ID \ --env.type=libero \ @@ -45,11 +47,7 @@ PYTORCH_ENABLE_MPS_FALLBACK=1 DEVICE=cpu python src/lerobot/scripts/train.py \ --eval_freq=$EVAL_FREQ \ --save_freq=$SAVE_FREQ \ --num_workers=$NUM_WORKERS \ - --policy.max_action_dim=$MAX_ACTION_DIM \ - --policy.max_state_dim=$MAX_STATE_DIM \ - --policy.use_amp=$USE_AMP \ - --policy.optimizer_lr=$OPTIMIZER_LR \ - --policy.load_vlm_weights=$LOAD_VLM_WEIGHTS \ --policy.repo_id=$VLM_REPO_ID \ - --env.multitask_eval=False \ + --env.multitask_eval=True \ --eval.batch_size=1 \ + --eval.n_episodes=1 \ diff --git a/examples/test.sh b/examples/test.sh new file mode 100644 index 000000000..5dfcd581e --- /dev/null +++ b/examples/test.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# Example evaluation script for LeRobot policies +unset LEROBOT_HOME +unset HF_LEROBOT_HOME +# === CONFIGURATION === +POLICY_PATH="ganatrask/lerobot-pi0-libero-object" # or outputs/train/.../pretrained_model +TASK=libero_object +ENV_TYPE="libero" +BATCH_SIZE=1 +N_EPISODES=1 +USE_AMP=false +DEVICE=cuda + +# === RUN EVALUATION === +python src/lerobot/scripts/eval.py \ + --policy.path="$POLICY_PATH" \ + --env.type="$ENV_TYPE" \ + --eval.batch_size="$BATCH_SIZE" \ + --eval.n_episodes="$N_EPISODES" \ + --env.multitask_eval=False \ + --env.task=$TASK \ diff --git a/obs_000000.png b/obs_000000.png new file mode 100644 index 000000000..4f083783c Binary files /dev/null and b/obs_000000.png differ diff --git a/obs_000100.png b/obs_000100.png new file mode 100644 index 000000000..f5a614487 Binary files /dev/null and b/obs_000100.png differ diff --git a/obs_000200.png b/obs_000200.png new file mode 100644 index 000000000..25fc8bb52 Binary files /dev/null and b/obs_000200.png differ diff --git a/obs_000300.png b/obs_000300.png new file mode 100644 index 000000000..09b2604e7 Binary files /dev/null and b/obs_000300.png differ diff --git a/obs_000400.png b/obs_000400.png new file mode 100644 index 000000000..42912b8ec Binary files /dev/null and b/obs_000400.png differ diff --git a/obs_000500.png b/obs_000500.png new file mode 100644 index 000000000..b28399da2 Binary files /dev/null and b/obs_000500.png differ diff --git a/src/lerobot/constants.py b/src/lerobot/constants.py index 94a91a95a..3d6f9edc3 100644 --- a/src/lerobot/constants.py +++ b/src/lerobot/constants.py @@ -19,10 +19,12 @@ from huggingface_hub.constants import HF_HOME OBS_ENV_STATE = "observation.environment_state" OBS_STATE = "observation.state" -# OBS_IMAGE = "observation.image" -# OBS_IMAGE_2 = "observation.image2" +OBS_IMAGE = "observation.image" +OBS_IMAGE_2 = "observation.image2" OBS_IMAGE = "image" -OBS_IMAGE_2 = "wrist_image" +OBS_IMAGE_2 = "image2" +# OBS_IMAGE = "image" +# OBS_IMAGE_2 = "wrist_image" OBS_IMAGES = "observation.images" ACTION = "action" REWARD = "next.reward" diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py index da3e2d5bf..47a23eb04 100644 --- a/src/lerobot/envs/configs.py +++ b/src/lerobot/envs/configs.py @@ -295,8 +295,8 @@ class LiberoEnv(EnvConfig): default_factory=lambda: { "action": ACTION, "agent_pos": OBS_STATE, - "pixels/agentview_image": f"{OBS_IMAGE}", - "pixels/robot0_eye_in_hand_image": f"{OBS_IMAGE_2}", + "pixels/agentview_image": f"observation.images.{OBS_IMAGE}", + "pixels/robot0_eye_in_hand_image": f"observation.images.{OBS_IMAGE_2}", } ) diff --git a/src/lerobot/envs/libero.py b/src/lerobot/envs/libero.py index 121728abf..10bc4aa9f 100644 --- a/src/lerobot/envs/libero.py +++ b/src/lerobot/envs/libero.py @@ -32,6 +32,9 @@ def create_libero_envs( Returns: dict[str, dict[str, list[LiberoEnv]]]: keys are task_suite and values are list of LiberoEnv envs. """ + print("num envs", n_envs) + print("multitask_eval", multitask_eval) + print("gym_kwargs", gym_kwargs) if gym_kwargs is None: gym_kwargs = {} @@ -45,6 +48,7 @@ def create_libero_envs( episode_indices = list(range(n_envs)) elif len(tasks_id) < n_envs and n_envs % len(tasks_id) == 0: n_repeat = n_envs // len(tasks_id) + print("n_repeat", n_repeat) episode_indices = [] for i in range(len(tasks_id)): episode_indices.extend(list(range(n_repeat))) @@ -313,11 +317,9 @@ class LiberoEnv(gym.Env): def step(self, action): assert action.ndim == 1 raw_obs, reward, done, info = self._env.step(action) - is_success = self._env.check_success() terminated = done or is_success info["is_success"] = is_success - print(f"[LiberoEnv.step] done={done}, is_success={is_success}, terminated={terminated}") observation = self._format_raw_obs(raw_obs) truncated = False # note if it is unable to complete get libero error after many steps diff --git a/src/lerobot/envs/utils.py b/src/lerobot/envs/utils.py index 00676a011..2cf9efcfe 100644 --- a/src/lerobot/envs/utils.py +++ b/src/lerobot/envs/utils.py @@ -97,7 +97,6 @@ def env_to_policy_features(env_cfg: EnvConfig) -> dict[str, PolicyFeature]: policy_key = env_cfg.features_map[key] policy_features[policy_key] = feature - return policy_features diff --git a/src/lerobot/policies/factory.py b/src/lerobot/policies/factory.py index ef56bdb61..cab752112 100644 --- a/src/lerobot/policies/factory.py +++ b/src/lerobot/policies/factory.py @@ -156,7 +156,6 @@ def make_policy( "by default without stats from a dataset." ) features = env_to_policy_features(env_cfg) - cfg.output_features = {key: ft for key, ft in features.items() if ft.type is FeatureType.ACTION} cfg.input_features = {key: ft for key, ft in features.items() if key not in cfg.output_features} kwargs["config"] = cfg diff --git a/src/lerobot/scripts/eval.py b/src/lerobot/scripts/eval.py index a3c7d822c..77b10ff5a 100644 --- a/src/lerobot/scripts/eval.py +++ b/src/lerobot/scripts/eval.py @@ -56,7 +56,7 @@ from copy import deepcopy from dataclasses import asdict from pathlib import Path from pprint import pformat - +import concurrent import einops import gymnasium as gym import numpy as np @@ -156,10 +156,29 @@ def rollout( # Infer "task" from attributes of environments. # TODO: works with SyncVectorEnv but not AsyncVectorEnv observation = add_envs_task(env, observation) + if step % 100 == 0: + import imageio.v2 as imageio + + img = observation["observation.images.image"] # (1, 3, 256, 256) + + if isinstance(img, torch.Tensor): + img = img.detach().cpu().numpy() + + # remove batch → (3, 256, 256) + img = img[0] + + # transpose → (256, 256, 3) + img = np.transpose(img, (1, 2, 0)) + + # scale + convert to uint8 + img = (img * 255).clip(0, 255).astype(np.uint8) + + # now works + imageio.imwrite(f"obs_{step:06d}.png", img) with torch.inference_mode(): action = policy.select_action(observation) - + observation['observation.images.image'] # Convert to CPU / numpy. action = action.to("cpu").numpy() assert action.ndim == 2, "Action dimensions should be (batch, action_dim)" @@ -177,7 +196,12 @@ def rollout( successes = [False] * env.num_envs # Keep track of which environments are done so far. + # done = terminated | truncated | done + #TODO: jadechoghari changed, this is cleaner done = terminated | truncated | done + if step + 1 == max_steps: + done = np.ones_like(done, dtype=bool) + all_actions.append(torch.from_numpy(action)) all_rewards.append(torch.from_numpy(reward)) @@ -185,7 +209,6 @@ def rollout( all_successes.append(torch.tensor(successes)) step += 1 - print(step) running_success_rate = ( # einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() #TODO: changed by jade einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "max") @@ -254,6 +277,7 @@ def eval_policy( # Determine how many batched rollouts we need to get n_episodes. Note that if n_episodes is not evenly # divisible by env.num_envs we end up discarding some data in the last batch. n_batches = n_episodes // env.num_envs + int((n_episodes % env.num_envs) != 0) + print("n_batches", n_batches) # Keep track of some metrics. sum_rewards = [] @@ -374,7 +398,7 @@ def eval_policy( # Wait till all video rendering threads are done. for thread in threads: thread.join() - + # Compile eval info. info = { "per_episode": [ @@ -403,7 +427,6 @@ def eval_policy( "eval_ep_s": (time.time() - start) / n_episodes, }, } - if return_episode_data: info["episodes"] = episode_data @@ -457,13 +480,22 @@ def _compile_episode_data( return data_dict - +def set_global_seed(seed): + """Set seed for reproducibility.""" + import random + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) +def log_output_dir(out_dir): + logging.info("Output dir:"+ f" {out_dir}") @parser.wrap() def eval(cfg: EvalPipelineConfig): logging.info(pformat(asdict(cfg))) # Check device is available - device = get_safe_torch_device(cfg.device, log=True) + device = get_safe_torch_device(cfg.policy.device, log=True) torch.backends.cudnn.benchmark = True torch.backends.cuda.matmul.allow_tf32 = True @@ -477,12 +509,12 @@ def eval(cfg: EvalPipelineConfig): logging.info("Making policy.") policy = make_policy( cfg=cfg.policy, - device=device, + # device=device, env_cfg=cfg.env, ) policy.eval() - with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.use_amp else nullcontext(): + with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext(): if cfg.env.multitask_eval: info = eval_policy_multitask( env, @@ -555,7 +587,7 @@ def eval_policy_multitask( videos_dir, return_episode_data, start_seed, - verbose=verbose, + # verbose=verbose, ) per_episode = task_result["per_episode"] @@ -642,4 +674,4 @@ def eval_policy_multitask( if __name__ == "__main__": init_logging() - eval_main() + eval() diff --git a/src/lerobot/scripts/train.py b/src/lerobot/scripts/train.py index 6cb476afb..9234f9a5b 100644 --- a/src/lerobot/scripts/train.py +++ b/src/lerobot/scripts/train.py @@ -269,7 +269,10 @@ def train(cfg: TrainPipelineConfig): continue # Skip the overall stats since we already printed it print(f"\nAggregated Metrics for {task_group}:") print(task_group_info["aggregated"]) + breakpoint() else: + print("START EVAL") + breakpoint() eval_info = eval_policy( eval_env, policy, @@ -278,6 +281,8 @@ def train(cfg: TrainPipelineConfig): max_episodes_rendered=4, start_seed=cfg.seed, ) + aggregated = eval_info["aggregated"] + print("END EVAL") eval_metrics = { "avg_sum_reward": AverageMeter("∑rwrd", ":.3f"), @@ -287,9 +292,9 @@ def train(cfg: TrainPipelineConfig): eval_tracker = MetricsTracker( cfg.batch_size, dataset.num_frames, dataset.num_episodes, eval_metrics, initial_step=step ) - eval_tracker.eval_s = eval_info["aggregated"].pop("eval_s") - eval_tracker.avg_sum_reward = eval_info["aggregated"].pop("avg_sum_reward") - eval_tracker.pc_success = eval_info["aggregated"].pop("pc_success") + eval_tracker.eval_s = aggregated.pop("eval_s") + eval_tracker.avg_sum_reward = aggregated.pop("avg_sum_reward") + eval_tracker.pc_success = aggregated.pop("pc_success") logging.info(eval_tracker) if wandb_logger: wandb_log_dict = {**eval_tracker.to_dict(), **eval_info}