mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-20 19:19:56 +00:00
add changes
This commit is contained in:
@@ -173,3 +173,4 @@ outputs/
|
|||||||
|
|
||||||
# Dev folders
|
# Dev folders
|
||||||
.cache/*
|
.cache/*
|
||||||
|
src/libero
|
||||||
|
|||||||
@@ -1,3 +0,0 @@
|
|||||||
[submodule "Libero"]
|
|
||||||
path = Libero
|
|
||||||
url = https://github.com/Lifelong-Robot-Learning/LIBERO.git
|
|
||||||
-1
Submodule Libero deleted from 8f1084e313
Regular → Executable
+7
-11
@@ -1,9 +1,9 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# config
|
# config
|
||||||
REPO_ID=physical-intelligence/libero
|
REPO_ID=yzembodied/libero_10_image_task_1
|
||||||
TASK=libero_10
|
TASK=libero_10
|
||||||
OUTPUT_DIR=./outputs/train_run/smolvla2_libero
|
OUTPUT_DIR=./outputs/
|
||||||
|
|
||||||
# clean previous run
|
# clean previous run
|
||||||
rm -rf $OUTPUT_DIR
|
rm -rf $OUTPUT_DIR
|
||||||
@@ -11,7 +11,7 @@ rm -rf $OUTPUT_DIR
|
|||||||
# training params
|
# training params
|
||||||
STEPS=100000
|
STEPS=100000
|
||||||
BATCH_SIZE=4
|
BATCH_SIZE=4
|
||||||
EVAL_FREQ=2000
|
EVAL_FREQ=1
|
||||||
SAVE_FREQ=10000
|
SAVE_FREQ=10000
|
||||||
NUM_WORKERS=0
|
NUM_WORKERS=0
|
||||||
|
|
||||||
@@ -33,7 +33,8 @@ MAX_IMAGE_DIM=1024
|
|||||||
|
|
||||||
echo -e "\033[1;33m[WARNING]\033[0m LIBERO is not yet fully supported in this PR!"
|
echo -e "\033[1;33m[WARNING]\033[0m LIBERO is not yet fully supported in this PR!"
|
||||||
# launch
|
# launch
|
||||||
MUJOCO_GL=egl python src/lerobot/scripts/train.py \
|
PYTORCH_ENABLE_MPS_FALLBACK=1 DEVICE=cpu python src/lerobot/scripts/train.py \
|
||||||
|
--policy.device=cpu \
|
||||||
--policy.type=$POLICY \
|
--policy.type=$POLICY \
|
||||||
--dataset.repo_id=$REPO_ID \
|
--dataset.repo_id=$REPO_ID \
|
||||||
--env.type=libero \
|
--env.type=libero \
|
||||||
@@ -48,12 +49,7 @@ MUJOCO_GL=egl python src/lerobot/scripts/train.py \
|
|||||||
--policy.max_state_dim=$MAX_STATE_DIM \
|
--policy.max_state_dim=$MAX_STATE_DIM \
|
||||||
--policy.use_amp=$USE_AMP \
|
--policy.use_amp=$USE_AMP \
|
||||||
--policy.optimizer_lr=$OPTIMIZER_LR \
|
--policy.optimizer_lr=$OPTIMIZER_LR \
|
||||||
--policy.peft_method=$PEFT_METHOD \
|
|
||||||
--policy.load_vlm_weights=$LOAD_VLM_WEIGHTS \
|
--policy.load_vlm_weights=$LOAD_VLM_WEIGHTS \
|
||||||
--policy.repo_id=$VLM_REPO_ID \
|
--policy.repo_id=$VLM_REPO_ID \
|
||||||
--dataset.use_imagenet_stats=$USE_IMAGENET_STATS \
|
--env.multitask_eval=False \
|
||||||
--dataset.image_transforms.enable=$ENABLE_IMG_TRANSFORM \
|
--eval.batch_size=1 \
|
||||||
--dataset.max_num_images=$MAX_NUM_IMAGES \
|
|
||||||
--dataset.max_image_dim=$MAX_IMAGE_DIM \
|
|
||||||
# --policy.exclude_image_keys=wrist_image \
|
|
||||||
# --policy.use_env_state=false
|
|
||||||
|
|||||||
@@ -19,8 +19,10 @@ from huggingface_hub.constants import HF_HOME
|
|||||||
|
|
||||||
OBS_ENV_STATE = "observation.environment_state"
|
OBS_ENV_STATE = "observation.environment_state"
|
||||||
OBS_STATE = "observation.state"
|
OBS_STATE = "observation.state"
|
||||||
OBS_IMAGE = "observation.image"
|
# OBS_IMAGE = "observation.image"
|
||||||
OBS_IMAGE_2 = "observation.image2"
|
# OBS_IMAGE_2 = "observation.image2"
|
||||||
|
OBS_IMAGE = "image"
|
||||||
|
OBS_IMAGE_2 = "wrist_image"
|
||||||
OBS_IMAGES = "observation.images"
|
OBS_IMAGES = "observation.images"
|
||||||
ACTION = "action"
|
ACTION = "action"
|
||||||
REWARD = "next.reward"
|
REWARD = "next.reward"
|
||||||
|
|||||||
@@ -174,10 +174,12 @@ class LiberoEnv(gym.Env):
|
|||||||
self.camera_name = camera_name.split(
|
self.camera_name = camera_name.split(
|
||||||
","
|
","
|
||||||
) # agentview_image (main) or robot0_eye_in_hand_image (wrist)
|
) # agentview_image (main) or robot0_eye_in_hand_image (wrist)
|
||||||
|
#TODO: jadechoghari, check mapping
|
||||||
self.camera_name_mapping = {
|
self.camera_name_mapping = {
|
||||||
"agentview_image": OBS_IMAGE,
|
"agentview_image": OBS_IMAGE,
|
||||||
"robot0_eye_in_hand_image": OBS_IMAGE_2,
|
"robot0_eye_in_hand_image": OBS_IMAGE_2,
|
||||||
}
|
}
|
||||||
|
|
||||||
self.num_steps_wait = (
|
self.num_steps_wait = (
|
||||||
10 # Do nothing for the first few timesteps to wait for the simulator drops objects
|
10 # Do nothing for the first few timesteps to wait for the simulator drops objects
|
||||||
)
|
)
|
||||||
@@ -220,7 +222,7 @@ class LiberoEnv(gym.Env):
|
|||||||
"agent_pos": spaces.Box(
|
"agent_pos": spaces.Box(
|
||||||
low=-1000.0,
|
low=-1000.0,
|
||||||
high=1000.0,
|
high=1000.0,
|
||||||
shape=(8,),
|
shape=(8,), #TODO: jadechoghari, check compatible
|
||||||
dtype=np.float64,
|
dtype=np.float64,
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
@@ -293,7 +295,7 @@ class LiberoEnv(gym.Env):
|
|||||||
info = {"is_success": False}
|
info = {"is_success": False}
|
||||||
return observation, info
|
return observation, info
|
||||||
|
|
||||||
def step(self, action):
|
def step1(self, action):
|
||||||
assert action.ndim == 1
|
assert action.ndim == 1
|
||||||
raw_obs, reward, done, info = self._env.step(action)
|
raw_obs, reward, done, info = self._env.step(action)
|
||||||
|
|
||||||
@@ -307,6 +309,18 @@ class LiberoEnv(gym.Env):
|
|||||||
print(self.task, self.task_id, done, is_success)
|
print(self.task, self.task_id, done, is_success)
|
||||||
truncated = False
|
truncated = False
|
||||||
return observation, reward, terminated, truncated, info
|
return observation, reward, terminated, truncated, info
|
||||||
|
def step(self, action):
|
||||||
|
assert action.ndim == 1
|
||||||
|
raw_obs, reward, done, info = self._env.step(action)
|
||||||
|
|
||||||
|
is_success = self._env.check_success()
|
||||||
|
terminated = done or is_success
|
||||||
|
info["is_success"] = is_success
|
||||||
|
print(f"[LiberoEnv.step] done={done}, is_success={is_success}, terminated={terminated}")
|
||||||
|
observation = self._format_raw_obs(raw_obs)
|
||||||
|
truncated = False
|
||||||
|
# note if it is unable to complete get libero error after many steps
|
||||||
|
return observation, reward, terminated, truncated, info
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
self._env.close()
|
self._env.close()
|
||||||
|
|||||||
@@ -143,7 +143,7 @@ def rollout(
|
|||||||
leave=False,
|
leave=False,
|
||||||
)
|
)
|
||||||
check_env_attributes_and_types(env)
|
check_env_attributes_and_types(env)
|
||||||
while not np.all(done):
|
while not np.all(done) and step < max_steps:
|
||||||
# Numpy array to tensor and changing dictionary keys to LeRobot policy format.
|
# Numpy array to tensor and changing dictionary keys to LeRobot policy format.
|
||||||
observation = preprocess_observation(observation)
|
observation = preprocess_observation(observation)
|
||||||
if return_observations:
|
if return_observations:
|
||||||
@@ -185,8 +185,11 @@ def rollout(
|
|||||||
all_successes.append(torch.tensor(successes))
|
all_successes.append(torch.tensor(successes))
|
||||||
|
|
||||||
step += 1
|
step += 1
|
||||||
|
print(step)
|
||||||
running_success_rate = (
|
running_success_rate = (
|
||||||
einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean()
|
# einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() #TODO: changed by jade
|
||||||
|
einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "max")
|
||||||
|
|
||||||
)
|
)
|
||||||
progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"})
|
progbar.set_postfix({"running_success_rate": f"{running_success_rate.item() * 100:.1f}%"})
|
||||||
progbar.update()
|
progbar.update()
|
||||||
@@ -315,7 +318,8 @@ def eval_policy(
|
|||||||
sum_rewards.extend(batch_sum_rewards.tolist())
|
sum_rewards.extend(batch_sum_rewards.tolist())
|
||||||
batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max")
|
batch_max_rewards = einops.reduce((rollout_data["reward"] * mask), "b n -> b", "max")
|
||||||
max_rewards.extend(batch_max_rewards.tolist())
|
max_rewards.extend(batch_max_rewards.tolist())
|
||||||
batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
|
# batch_successes = einops.reduce((rollout_data["success"] * mask), "b n -> b", "any")
|
||||||
|
batch_successes = einops.reduce((rollout_data["success"] * mask).float(), "b n -> b", "max")
|
||||||
all_successes.extend(batch_successes.tolist())
|
all_successes.extend(batch_successes.tolist())
|
||||||
if seeds:
|
if seeds:
|
||||||
all_seeds.extend(seeds)
|
all_seeds.extend(seeds)
|
||||||
|
|||||||
Reference in New Issue
Block a user