add changes

2026-07-23 17:56:07 +00:00 · 2025-08-10 01:32:28 -04:00
parent 4c19a71d7c
commit a25480d363
7 changed files with 35 additions and 22 deletions
@@ -173,3 +173,4 @@ outputs/
 # Dev folders
 .cache/*
 src/libero
@@ -1,3 +0,0 @@
 [submodule "Libero"]
 	path = Libero
 	url = https://github.com/Lifelong-Robot-Learning/LIBERO.git
@@ -1,9 +1,9 @@
 #!/bin/bash
 # config
-REPO_ID=physical-intelligence/libero
+REPO_ID=yzembodied/libero_10_image_task_1
 TASK=libero_10
-OUTPUT_DIR=./outputs/train_run/smolvla2_libero
+OUTPUT_DIR=./outputs/
 # clean previous run
 rm -rf $OUTPUT_DIR
@@ -11,7 +11,7 @@ rm -rf $OUTPUT_DIR
 # training params
 STEPS=100000
 BATCH_SIZE=4
-EVAL_FREQ=2000
+EVAL_FREQ=1
 SAVE_FREQ=10000
 NUM_WORKERS=0
@@ -33,7 +33,8 @@ MAX_IMAGE_DIM=1024
 echo -e "\033[1;33m[WARNING]\033[0m LIBERO is not yet fully supported in this PR!"
 # launch
-MUJOCO_GL=egl python src/lerobot/scripts/train.py \
+PYTORCH_ENABLE_MPS_FALLBACK=1 DEVICE=cpu python src/lerobot/scripts/train.py \
  --policy.device=cpu \
  --policy.type=$POLICY \
  --dataset.repo_id=$REPO_ID \
  --env.type=libero \
@@ -48,12 +49,7 @@ MUJOCO_GL=egl python src/lerobot/scripts/train.py \
  --policy.max_state_dim=$MAX_STATE_DIM \
  --policy.use_amp=$USE_AMP \
  --policy.optimizer_lr=$OPTIMIZER_LR \
  --policy.peft_method=$PEFT_METHOD \
  --policy.load_vlm_weights=$LOAD_VLM_WEIGHTS \
  --policy.repo_id=$VLM_REPO_ID \
-  --dataset.use_imagenet_stats=$USE_IMAGENET_STATS \
+  --env.multitask_eval=False \
-  --dataset.image_transforms.enable=$ENABLE_IMG_TRANSFORM \
+  --eval.batch_size=1 \
  --dataset.max_num_images=$MAX_NUM_IMAGES \
  --dataset.max_image_dim=$MAX_IMAGE_DIM \
  # --policy.exclude_image_keys=wrist_image \
  # --policy.use_env_state=false
@@ -19,8 +19,10 @@ from huggingface_hub.constants import HF_HOME
 OBS_ENV_STATE = "observation.environment_state"
 OBS_STATE = "observation.state"
-OBS_IMAGE = "observation.image"
+# OBS_IMAGE = "observation.image"
-OBS_IMAGE_2 = "observation.image2"
+# OBS_IMAGE_2 = "observation.image2"
 OBS_IMAGE = "image"
 OBS_IMAGE_2 = "wrist_image"
 OBS_IMAGES = "observation.images"
 ACTION = "action"
 REWARD = "next.reward"
@@ -174,10 +174,12 @@ class LiberoEnv(gym.Env):
        self.camera_name = camera_name.split(
            ","
        )  # agentview_image (main) or robot0_eye_in_hand_image (wrist)
        #TODO: jadechoghari, check mapping
        self.camera_name_mapping = {
            "agentview_image": OBS_IMAGE,
            "robot0_eye_in_hand_image": OBS_IMAGE_2,
        }
        self.num_steps_wait = (
            10  # Do nothing for the first few timesteps to wait for the simulator drops objects
        )
@@ -220,7 +222,7 @@ class LiberoEnv(gym.Env):
                    "agent_pos": spaces.Box(
                        low=-1000.0,
                        high=1000.0,
-                        shape=(8,),
+                        shape=(8,), #TODO: jadechoghari, check compatible
                        dtype=np.float64,
                    ),
                }
@@ -293,7 +295,7 @@ class LiberoEnv(gym.Env):
        info = {"is_success": False}
        return observation, info
-    def step(self, action):
+    def step1(self, action):
        assert action.ndim == 1
        raw_obs, reward, done, info = self._env.step(action)
@@ -307,6 +309,18 @@ class LiberoEnv(gym.Env):
            print(self.task, self.task_id, done, is_success)
        truncated = False
        return observation, reward, terminated, truncated, info
    def step(self, action):
        assert action.ndim == 1
        raw_obs, reward, done, info = self._env.step(action)
        is_success = self._env.check_success()
        terminated = done or is_success
        info["is_success"] = is_success
        print(f"[LiberoEnv.step] done={done}, is_success={is_success}, terminated={terminated}")
        observation = self._format_raw_obs(raw_obs)
        truncated = False
        # note if it is unable to complete get libero error after many steps
        return observation, reward, terminated, truncated, info
    def close(self):
        self._env.close()
@@ -143,7 +143,7 @@ def rollout(
        leave=False,
    )
    check_env_attributes_and_types(env)
-    while not np.all(done):
+    while not np.all(done) and step < max_steps:
        # Numpy array to tensor and changing dictionary keys to LeRobot policy format.
        observation = preprocess_observation(observation)
        if return_observations:
@@ -185,8 +185,11 @@ def rollout(
        all_successes.append(torch.tensor(successes))
        step += 1
        print(step)
        running_success_rate = (
-            einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean()
+            # einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() #TODO: changed by jade
            einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "max")
        )
        progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"})
        progbar.update()
@@ -315,7 +318,8 @@ def eval_policy(
        sum_rewards.extend(batch_sum_rewards.tolist())
        batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max")
        max_rewards.extend(batch_max_rewards.tolist())
-        batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
+        # batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
        batch_successes = einops.reduce((rollout_data["success"] * mask).float(), "b n -> b", "max")
        all_successes.extend(batch_successes.tolist())
        if seeds:
            all_seeds.extend(seeds)