add changes

This commit is contained in:
Jade Choghari
2025-08-10 01:32:28 -04:00
parent 4c19a71d7c
commit a25480d363
7 changed files with 35 additions and 22 deletions
+1
View File
@@ -173,3 +173,4 @@ outputs/
# Dev folders # Dev folders
.cache/* .cache/*
src/libero
-3
View File
@@ -1,3 +0,0 @@
[submodule "Libero"]
path = Libero
url = https://github.com/Lifelong-Robot-Learning/LIBERO.git
Submodule Libero deleted from 8f1084e313
Regular → Executable
+7 -11
View File
@@ -1,9 +1,9 @@
#!/bin/bash #!/bin/bash
# config # config
REPO_ID=physical-intelligence/libero REPO_ID=yzembodied/libero_10_image_task_1
TASK=libero_10 TASK=libero_10
OUTPUT_DIR=./outputs/train_run/smolvla2_libero OUTPUT_DIR=./outputs/
# clean previous run # clean previous run
rm -rf $OUTPUT_DIR rm -rf $OUTPUT_DIR
@@ -11,7 +11,7 @@ rm -rf $OUTPUT_DIR
# training params # training params
STEPS=100000 STEPS=100000
BATCH_SIZE=4 BATCH_SIZE=4
EVAL_FREQ=2000 EVAL_FREQ=1
SAVE_FREQ=10000 SAVE_FREQ=10000
NUM_WORKERS=0 NUM_WORKERS=0
@@ -33,7 +33,8 @@ MAX_IMAGE_DIM=1024
echo -e "\033[1;33m[WARNING]\033[0m LIBERO is not yet fully supported in this PR!" echo -e "\033[1;33m[WARNING]\033[0m LIBERO is not yet fully supported in this PR!"
# launch # launch
MUJOCO_GL=egl python src/lerobot/scripts/train.py \ PYTORCH_ENABLE_MPS_FALLBACK=1 DEVICE=cpu python src/lerobot/scripts/train.py \
--policy.device=cpu \
--policy.type=$POLICY \ --policy.type=$POLICY \
--dataset.repo_id=$REPO_ID \ --dataset.repo_id=$REPO_ID \
--env.type=libero \ --env.type=libero \
@@ -48,12 +49,7 @@ MUJOCO_GL=egl python src/lerobot/scripts/train.py \
--policy.max_state_dim=$MAX_STATE_DIM \ --policy.max_state_dim=$MAX_STATE_DIM \
--policy.use_amp=$USE_AMP \ --policy.use_amp=$USE_AMP \
--policy.optimizer_lr=$OPTIMIZER_LR \ --policy.optimizer_lr=$OPTIMIZER_LR \
--policy.peft_method=$PEFT_METHOD \
--policy.load_vlm_weights=$LOAD_VLM_WEIGHTS \ --policy.load_vlm_weights=$LOAD_VLM_WEIGHTS \
--policy.repo_id=$VLM_REPO_ID \ --policy.repo_id=$VLM_REPO_ID \
--dataset.use_imagenet_stats=$USE_IMAGENET_STATS \ --env.multitask_eval=False \
--dataset.image_transforms.enable=$ENABLE_IMG_TRANSFORM \ --eval.batch_size=1 \
--dataset.max_num_images=$MAX_NUM_IMAGES \
--dataset.max_image_dim=$MAX_IMAGE_DIM \
# --policy.exclude_image_keys=wrist_image \
# --policy.use_env_state=false
+4 -2
View File
@@ -19,8 +19,10 @@ from huggingface_hub.constants import HF_HOME
OBS_ENV_STATE = "observation.environment_state" OBS_ENV_STATE = "observation.environment_state"
OBS_STATE = "observation.state" OBS_STATE = "observation.state"
OBS_IMAGE = "observation.image" # OBS_IMAGE = "observation.image"
OBS_IMAGE_2 = "observation.image2" # OBS_IMAGE_2 = "observation.image2"
OBS_IMAGE = "image"
OBS_IMAGE_2 = "wrist_image"
OBS_IMAGES = "observation.images" OBS_IMAGES = "observation.images"
ACTION = "action" ACTION = "action"
REWARD = "next.reward" REWARD = "next.reward"
+16 -2
View File
@@ -174,10 +174,12 @@ class LiberoEnv(gym.Env):
self.camera_name = camera_name.split( self.camera_name = camera_name.split(
"," ","
) # agentview_image (main) or robot0_eye_in_hand_image (wrist) ) # agentview_image (main) or robot0_eye_in_hand_image (wrist)
#TODO: jadechoghari, check mapping
self.camera_name_mapping = { self.camera_name_mapping = {
"agentview_image": OBS_IMAGE, "agentview_image": OBS_IMAGE,
"robot0_eye_in_hand_image": OBS_IMAGE_2, "robot0_eye_in_hand_image": OBS_IMAGE_2,
} }
self.num_steps_wait = ( self.num_steps_wait = (
10 # Do nothing for the first few timesteps to wait for the simulator drops objects 10 # Do nothing for the first few timesteps to wait for the simulator drops objects
) )
@@ -220,7 +222,7 @@ class LiberoEnv(gym.Env):
"agent_pos": spaces.Box( "agent_pos": spaces.Box(
low=-1000.0, low=-1000.0,
high=1000.0, high=1000.0,
shape=(8,), shape=(8,), #TODO: jadechoghari, check compatible
dtype=np.float64, dtype=np.float64,
), ),
} }
@@ -293,7 +295,7 @@ class LiberoEnv(gym.Env):
info = {"is_success": False} info = {"is_success": False}
return observation, info return observation, info
def step(self, action): def step1(self, action):
assert action.ndim == 1 assert action.ndim == 1
raw_obs, reward, done, info = self._env.step(action) raw_obs, reward, done, info = self._env.step(action)
@@ -307,6 +309,18 @@ class LiberoEnv(gym.Env):
print(self.task, self.task_id, done, is_success) print(self.task, self.task_id, done, is_success)
truncated = False truncated = False
return observation, reward, terminated, truncated, info return observation, reward, terminated, truncated, info
def step(self, action):
assert action.ndim == 1
raw_obs, reward, done, info = self._env.step(action)
is_success = self._env.check_success()
terminated = done or is_success
info["is_success"] = is_success
print(f"[LiberoEnv.step] done={done}, is_success={is_success}, terminated={terminated}")
observation = self._format_raw_obs(raw_obs)
truncated = False
# note if it is unable to complete get libero error after many steps
return observation, reward, terminated, truncated, info
def close(self): def close(self):
self._env.close() self._env.close()
+7 -3
View File
@@ -143,7 +143,7 @@ def rollout(
leave=False, leave=False,
) )
check_env_attributes_and_types(env) check_env_attributes_and_types(env)
while not np.all(done): while not np.all(done) and step < max_steps:
# Numpy array to tensor and changing dictionary keys to LeRobot policy format. # Numpy array to tensor and changing dictionary keys to LeRobot policy format.
observation = preprocess_observation(observation) observation = preprocess_observation(observation)
if return_observations: if return_observations:
@@ -185,8 +185,11 @@ def rollout(
all_successes.append(torch.tensor(successes)) all_successes.append(torch.tensor(successes))
step += 1 step += 1
print(step)
running_success_rate = ( running_success_rate = (
einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() # einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() #TODO: changed by jade
einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "max")
) )
progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"}) progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"})
progbar.update() progbar.update()
@@ -315,7 +318,8 @@ def eval_policy(
sum_rewards.extend(batch_sum_rewards.tolist()) sum_rewards.extend(batch_sum_rewards.tolist())
batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max") batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max")
max_rewards.extend(batch_max_rewards.tolist()) max_rewards.extend(batch_max_rewards.tolist())
batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any") # batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
batch_successes = einops.reduce((rollout_data["success"] * mask).float(), "b n -> b", "max")
all_successes.extend(batch_successes.tolist()) all_successes.extend(batch_successes.tolist())
if seeds: if seeds:
all_seeds.extend(seeds) all_seeds.extend(seeds)