mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-23 20:50:02 +00:00
add changes
This commit is contained in:
@@ -173,3 +173,4 @@ outputs/
|
||||
|
||||
# Dev folders
|
||||
.cache/*
|
||||
src/libero
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
[submodule "Libero"]
|
||||
path = Libero
|
||||
url = https://github.com/Lifelong-Robot-Learning/LIBERO.git
|
||||
-1
Submodule Libero deleted from 8f1084e313
Regular → Executable
+7
-11
@@ -1,9 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
# config
|
||||
REPO_ID=physical-intelligence/libero
|
||||
REPO_ID=yzembodied/libero_10_image_task_1
|
||||
TASK=libero_10
|
||||
OUTPUT_DIR=./outputs/train_run/smolvla2_libero
|
||||
OUTPUT_DIR=./outputs/
|
||||
|
||||
# clean previous run
|
||||
rm -rf $OUTPUT_DIR
|
||||
@@ -11,7 +11,7 @@ rm -rf $OUTPUT_DIR
|
||||
# training params
|
||||
STEPS=100000
|
||||
BATCH_SIZE=4
|
||||
EVAL_FREQ=2000
|
||||
EVAL_FREQ=1
|
||||
SAVE_FREQ=10000
|
||||
NUM_WORKERS=0
|
||||
|
||||
@@ -33,7 +33,8 @@ MAX_IMAGE_DIM=1024
|
||||
|
||||
echo -e "\033[1;33m[WARNING]\033[0m LIBERO is not yet fully supported in this PR!"
|
||||
# launch
|
||||
MUJOCO_GL=egl python src/lerobot/scripts/train.py \
|
||||
PYTORCH_ENABLE_MPS_FALLBACK=1 DEVICE=cpu python src/lerobot/scripts/train.py \
|
||||
--policy.device=cpu \
|
||||
--policy.type=$POLICY \
|
||||
--dataset.repo_id=$REPO_ID \
|
||||
--env.type=libero \
|
||||
@@ -48,12 +49,7 @@ MUJOCO_GL=egl python src/lerobot/scripts/train.py \
|
||||
--policy.max_state_dim=$MAX_STATE_DIM \
|
||||
--policy.use_amp=$USE_AMP \
|
||||
--policy.optimizer_lr=$OPTIMIZER_LR \
|
||||
--policy.peft_method=$PEFT_METHOD \
|
||||
--policy.load_vlm_weights=$LOAD_VLM_WEIGHTS \
|
||||
--policy.repo_id=$VLM_REPO_ID \
|
||||
--dataset.use_imagenet_stats=$USE_IMAGENET_STATS \
|
||||
--dataset.image_transforms.enable=$ENABLE_IMG_TRANSFORM \
|
||||
--dataset.max_num_images=$MAX_NUM_IMAGES \
|
||||
--dataset.max_image_dim=$MAX_IMAGE_DIM \
|
||||
# --policy.exclude_image_keys=wrist_image \
|
||||
# --policy.use_env_state=false
|
||||
--env.multitask_eval=False \
|
||||
--eval.batch_size=1 \
|
||||
|
||||
@@ -19,8 +19,10 @@ from huggingface_hub.constants import HF_HOME
|
||||
|
||||
OBS_ENV_STATE = "observation.environment_state"
|
||||
OBS_STATE = "observation.state"
|
||||
OBS_IMAGE = "observation.image"
|
||||
OBS_IMAGE_2 = "observation.image2"
|
||||
# OBS_IMAGE = "observation.image"
|
||||
# OBS_IMAGE_2 = "observation.image2"
|
||||
OBS_IMAGE = "image"
|
||||
OBS_IMAGE_2 = "wrist_image"
|
||||
OBS_IMAGES = "observation.images"
|
||||
ACTION = "action"
|
||||
REWARD = "next.reward"
|
||||
|
||||
@@ -174,10 +174,12 @@ class LiberoEnv(gym.Env):
|
||||
self.camera_name = camera_name.split(
|
||||
","
|
||||
) # agentview_image (main) or robot0_eye_in_hand_image (wrist)
|
||||
#TODO: jadechoghari, check mapping
|
||||
self.camera_name_mapping = {
|
||||
"agentview_image": OBS_IMAGE,
|
||||
"robot0_eye_in_hand_image": OBS_IMAGE_2,
|
||||
}
|
||||
|
||||
self.num_steps_wait = (
|
||||
10 # Do nothing for the first few timesteps to wait for the simulator drops objects
|
||||
)
|
||||
@@ -220,7 +222,7 @@ class LiberoEnv(gym.Env):
|
||||
"agent_pos": spaces.Box(
|
||||
low=-1000.0,
|
||||
high=1000.0,
|
||||
shape=(8,),
|
||||
shape=(8,), #TODO: jadechoghari, check compatible
|
||||
dtype=np.float64,
|
||||
),
|
||||
}
|
||||
@@ -293,7 +295,7 @@ class LiberoEnv(gym.Env):
|
||||
info = {"is_success": False}
|
||||
return observation, info
|
||||
|
||||
def step(self, action):
|
||||
def step1(self, action):
|
||||
assert action.ndim == 1
|
||||
raw_obs, reward, done, info = self._env.step(action)
|
||||
|
||||
@@ -307,6 +309,18 @@ class LiberoEnv(gym.Env):
|
||||
print(self.task, self.task_id, done, is_success)
|
||||
truncated = False
|
||||
return observation, reward, terminated, truncated, info
|
||||
def step(self, action):
|
||||
assert action.ndim == 1
|
||||
raw_obs, reward, done, info = self._env.step(action)
|
||||
|
||||
is_success = self._env.check_success()
|
||||
terminated = done or is_success
|
||||
info["is_success"] = is_success
|
||||
print(f"[LiberoEnv.step] done={done}, is_success={is_success}, terminated={terminated}")
|
||||
observation = self._format_raw_obs(raw_obs)
|
||||
truncated = False
|
||||
# note if it is unable to complete get libero error after many steps
|
||||
return observation, reward, terminated, truncated, info
|
||||
|
||||
def close(self):
|
||||
self._env.close()
|
||||
|
||||
@@ -143,7 +143,7 @@ def rollout(
|
||||
leave=False,
|
||||
)
|
||||
check_env_attributes_and_types(env)
|
||||
while not np.all(done):
|
||||
while not np.all(done) and step < max_steps:
|
||||
# Numpy array to tensor and changing dictionary keys to LeRobot policy format.
|
||||
observation = preprocess_observation(observation)
|
||||
if return_observations:
|
||||
@@ -185,8 +185,11 @@ def rollout(
|
||||
all_successes.append(torch.tensor(successes))
|
||||
|
||||
step += 1
|
||||
print(step)
|
||||
running_success_rate = (
|
||||
einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean()
|
||||
# einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() #TODO: changed by jade
|
||||
einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "max")
|
||||
|
||||
)
|
||||
progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"})
|
||||
progbar.update()
|
||||
@@ -315,7 +318,8 @@ def eval_policy(
|
||||
sum_rewards.extend(batch_sum_rewards.tolist())
|
||||
batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max")
|
||||
max_rewards.extend(batch_max_rewards.tolist())
|
||||
batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
|
||||
# batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
|
||||
batch_successes = einops.reduce((rollout_data["success"] * mask).float(), "b n -> b", "max")
|
||||
all_successes.extend(batch_successes.tolist())
|
||||
if seeds:
|
||||
all_seeds.extend(seeds)
|
||||
|
||||
Reference in New Issue
Block a user