add changes

This commit is contained in:
Jade Choghari
2025-08-10 01:32:28 -04:00
parent 4c19a71d7c
commit a25480d363
7 changed files with 35 additions and 22 deletions
+1
View File
@@ -173,3 +173,4 @@ outputs/
# Dev folders
.cache/*
src/libero
-3
View File
@@ -1,3 +0,0 @@
[submodule "Libero"]
path = Libero
url = https://github.com/Lifelong-Robot-Learning/LIBERO.git
Submodule Libero deleted from 8f1084e313
Regular → Executable
+7 -11
View File
@@ -1,9 +1,9 @@
#!/bin/bash
# config
REPO_ID=physical-intelligence/libero
REPO_ID=yzembodied/libero_10_image_task_1
TASK=libero_10
OUTPUT_DIR=./outputs/train_run/smolvla2_libero
OUTPUT_DIR=./outputs/
# clean previous run
rm -rf $OUTPUT_DIR
@@ -11,7 +11,7 @@ rm -rf $OUTPUT_DIR
# training params
STEPS=100000
BATCH_SIZE=4
EVAL_FREQ=2000
EVAL_FREQ=1
SAVE_FREQ=10000
NUM_WORKERS=0
@@ -33,7 +33,8 @@ MAX_IMAGE_DIM=1024
echo -e "\033[1;33m[WARNING]\033[0m LIBERO is not yet fully supported in this PR!"
# launch
MUJOCO_GL=egl python src/lerobot/scripts/train.py \
PYTORCH_ENABLE_MPS_FALLBACK=1 DEVICE=cpu python src/lerobot/scripts/train.py \
--policy.device=cpu \
--policy.type=$POLICY \
--dataset.repo_id=$REPO_ID \
--env.type=libero \
@@ -48,12 +49,7 @@ MUJOCO_GL=egl python src/lerobot/scripts/train.py \
--policy.max_state_dim=$MAX_STATE_DIM \
--policy.use_amp=$USE_AMP \
--policy.optimizer_lr=$OPTIMIZER_LR \
--policy.peft_method=$PEFT_METHOD \
--policy.load_vlm_weights=$LOAD_VLM_WEIGHTS \
--policy.repo_id=$VLM_REPO_ID \
--dataset.use_imagenet_stats=$USE_IMAGENET_STATS \
--dataset.image_transforms.enable=$ENABLE_IMG_TRANSFORM \
--dataset.max_num_images=$MAX_NUM_IMAGES \
--dataset.max_image_dim=$MAX_IMAGE_DIM \
# --policy.exclude_image_keys=wrist_image \
# --policy.use_env_state=false
--env.multitask_eval=False \
--eval.batch_size=1 \
+4 -2
View File
@@ -19,8 +19,10 @@ from huggingface_hub.constants import HF_HOME
OBS_ENV_STATE = "observation.environment_state"
OBS_STATE = "observation.state"
OBS_IMAGE = "observation.image"
OBS_IMAGE_2 = "observation.image2"
# OBS_IMAGE = "observation.image"
# OBS_IMAGE_2 = "observation.image2"
OBS_IMAGE = "image"
OBS_IMAGE_2 = "wrist_image"
OBS_IMAGES = "observation.images"
ACTION = "action"
REWARD = "next.reward"
+16 -2
View File
@@ -174,10 +174,12 @@ class LiberoEnv(gym.Env):
self.camera_name = camera_name.split(
","
) # agentview_image (main) or robot0_eye_in_hand_image (wrist)
#TODO: jadechoghari, check mapping
self.camera_name_mapping = {
"agentview_image": OBS_IMAGE,
"robot0_eye_in_hand_image": OBS_IMAGE_2,
}
self.num_steps_wait = (
10 # Do nothing for the first few timesteps to wait for the simulator drops objects
)
@@ -220,7 +222,7 @@ class LiberoEnv(gym.Env):
"agent_pos": spaces.Box(
low=-1000.0,
high=1000.0,
shape=(8,),
shape=(8,), #TODO: jadechoghari, check compatible
dtype=np.float64,
),
}
@@ -293,7 +295,7 @@ class LiberoEnv(gym.Env):
info = {"is_success": False}
return observation, info
def step(self, action):
def step1(self, action):
assert action.ndim == 1
raw_obs, reward, done, info = self._env.step(action)
@@ -307,6 +309,18 @@ class LiberoEnv(gym.Env):
print(self.task, self.task_id, done, is_success)
truncated = False
return observation, reward, terminated, truncated, info
def step(self, action):
assert action.ndim == 1
raw_obs, reward, done, info = self._env.step(action)
is_success = self._env.check_success()
terminated = done or is_success
info["is_success"] = is_success
print(f"[LiberoEnv.step] done={done}, is_success={is_success}, terminated={terminated}")
observation = self._format_raw_obs(raw_obs)
truncated = False
# note if it is unable to complete get libero error after many steps
return observation, reward, terminated, truncated, info
def close(self):
self._env.close()
+7 -3
View File
@@ -143,7 +143,7 @@ def rollout(
leave=False,
)
check_env_attributes_and_types(env)
while not np.all(done):
while not np.all(done) and step < max_steps:
# Numpy array to tensor and changing dictionary keys to LeRobot policy format.
observation = preprocess_observation(observation)
if return_observations:
@@ -185,8 +185,11 @@ def rollout(
all_successes.append(torch.tensor(successes))
step += 1
print(step)
running_success_rate = (
einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean()
# einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() #TODO: changed by jade
einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "max")
)
progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"})
progbar.update()
@@ -315,7 +318,8 @@ def eval_policy(
sum_rewards.extend(batch_sum_rewards.tolist())
batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max")
max_rewards.extend(batch_max_rewards.tolist())
batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
# batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
batch_successes = einops.reduce((rollout_data["success"] * mask).float(), "b n -> b", "max")
all_successes.extend(batch_successes.tolist())
if seeds:
all_seeds.extend(seeds)