diff --git a/.gitignore b/.gitignore index c4d1f769f..7b29fd3b2 100644 --- a/.gitignore +++ b/.gitignore @@ -173,3 +173,4 @@ outputs/ # Dev folders .cache/* +src/libero diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 3f68e9eb7..000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "Libero"] - path = Libero - url = https://github.com/Lifelong-Robot-Learning/LIBERO.git diff --git a/Libero b/Libero deleted file mode 160000 index 8f1084e31..000000000 --- a/Libero +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 8f1084e3132a39270c3a13ebe37270a43ece2a01 diff --git a/examples/5_train_libero.sh b/examples/5_train_libero.sh old mode 100644 new mode 100755 index 0b8633cd6..cdde853fa --- a/examples/5_train_libero.sh +++ b/examples/5_train_libero.sh @@ -1,9 +1,9 @@ #!/bin/bash # config -REPO_ID=physical-intelligence/libero +REPO_ID=yzembodied/libero_10_image_task_1 TASK=libero_10 -OUTPUT_DIR=./outputs/train_run/smolvla2_libero +OUTPUT_DIR=./outputs/ # clean previous run rm -rf $OUTPUT_DIR @@ -11,7 +11,7 @@ rm -rf $OUTPUT_DIR # training params STEPS=100000 BATCH_SIZE=4 -EVAL_FREQ=2000 +EVAL_FREQ=1 SAVE_FREQ=10000 NUM_WORKERS=0 @@ -33,7 +33,8 @@ MAX_IMAGE_DIM=1024 echo -e "\033[1;33m[WARNING]\033[0m LIBERO is not yet fully supported in this PR!" # launch -MUJOCO_GL=egl python src/lerobot/scripts/train.py \ +PYTORCH_ENABLE_MPS_FALLBACK=1 DEVICE=cpu python src/lerobot/scripts/train.py \ + --policy.device=cpu \ --policy.type=$POLICY \ --dataset.repo_id=$REPO_ID \ --env.type=libero \ @@ -48,12 +49,7 @@ MUJOCO_GL=egl python src/lerobot/scripts/train.py \ --policy.max_state_dim=$MAX_STATE_DIM \ --policy.use_amp=$USE_AMP \ --policy.optimizer_lr=$OPTIMIZER_LR \ - --policy.peft_method=$PEFT_METHOD \ --policy.load_vlm_weights=$LOAD_VLM_WEIGHTS \ --policy.repo_id=$VLM_REPO_ID \ - --dataset.use_imagenet_stats=$USE_IMAGENET_STATS \ - --dataset.image_transforms.enable=$ENABLE_IMG_TRANSFORM \ - --dataset.max_num_images=$MAX_NUM_IMAGES \ - --dataset.max_image_dim=$MAX_IMAGE_DIM \ - # --policy.exclude_image_keys=wrist_image \ - # --policy.use_env_state=false + --env.multitask_eval=False \ + --eval.batch_size=1 \ diff --git a/src/lerobot/constants.py b/src/lerobot/constants.py index bc5b2013c..94a91a95a 100644 --- a/src/lerobot/constants.py +++ b/src/lerobot/constants.py @@ -19,8 +19,10 @@ from huggingface_hub.constants import HF_HOME OBS_ENV_STATE = "observation.environment_state" OBS_STATE = "observation.state" -OBS_IMAGE = "observation.image" -OBS_IMAGE_2 = "observation.image2" +# OBS_IMAGE = "observation.image" +# OBS_IMAGE_2 = "observation.image2" +OBS_IMAGE = "image" +OBS_IMAGE_2 = "wrist_image" OBS_IMAGES = "observation.images" ACTION = "action" REWARD = "next.reward" diff --git a/src/lerobot/envs/libero.py b/src/lerobot/envs/libero.py index 412923259..2c7ee0808 100644 --- a/src/lerobot/envs/libero.py +++ b/src/lerobot/envs/libero.py @@ -174,10 +174,12 @@ class LiberoEnv(gym.Env): self.camera_name = camera_name.split( "," ) # agentview_image (main) or robot0_eye_in_hand_image (wrist) + #TODO: jadechoghari, check mapping self.camera_name_mapping = { "agentview_image": OBS_IMAGE, "robot0_eye_in_hand_image": OBS_IMAGE_2, } + self.num_steps_wait = ( 10 # Do nothing for the first few timesteps to wait for the simulator drops objects ) @@ -220,7 +222,7 @@ class LiberoEnv(gym.Env): "agent_pos": spaces.Box( low=-1000.0, high=1000.0, - shape=(8,), + shape=(8,), #TODO: jadechoghari, check compatible dtype=np.float64, ), } @@ -293,7 +295,7 @@ class LiberoEnv(gym.Env): info = {"is_success": False} return observation, info - def step(self, action): + def step1(self, action): assert action.ndim == 1 raw_obs, reward, done, info = self._env.step(action) @@ -307,6 +309,18 @@ class LiberoEnv(gym.Env): print(self.task, self.task_id, done, is_success) truncated = False return observation, reward, terminated, truncated, info + def step(self, action): + assert action.ndim == 1 + raw_obs, reward, done, info = self._env.step(action) + + is_success = self._env.check_success() + terminated = done or is_success + info["is_success"] = is_success + print(f"[LiberoEnv.step] done={done}, is_success={is_success}, terminated={terminated}") + observation = self._format_raw_obs(raw_obs) + truncated = False + # note if it is unable to complete get libero error after many steps + return observation, reward, terminated, truncated, info def close(self): self._env.close() diff --git a/src/lerobot/scripts/eval.py b/src/lerobot/scripts/eval.py index 4beaaef5e..09ea1da85 100644 --- a/src/lerobot/scripts/eval.py +++ b/src/lerobot/scripts/eval.py @@ -143,7 +143,7 @@ def rollout( leave=False, ) check_env_attributes_and_types(env) - while not np.all(done): + while not np.all(done) and step < max_steps: # Numpy array to tensor and changing dictionary keys to LeRobot policy format. observation = preprocess_observation(observation) if return_observations: @@ -185,8 +185,11 @@ def rollout( all_successes.append(torch.tensor(successes)) step += 1 + print(step) running_success_rate = ( - einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() + # einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() #TODO: changed by jade + einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "max") + ) progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"}) progbar.update() @@ -315,7 +318,8 @@ def eval_policy( sum_rewards.extend(batch_sum_rewards.tolist()) batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max") max_rewards.extend(batch_max_rewards.tolist()) - batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any") + # batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any") + batch_successes = einops.reduce((rollout_data["success"] * mask).float(), "b n -> b", "max") all_successes.extend(batch_successes.tolist()) if seeds: all_seeds.extend(seeds)