add multitask

This commit is contained in:
Jade Choghari (jchoghar)
2025-08-17 14:27:53 -04:00
parent c20bf75ba0
commit ac0993c2e3
15 changed files with 91 additions and 32 deletions
+7 -9
View File
@@ -16,7 +16,7 @@ SAVE_FREQ=10000
NUM_WORKERS=0
# model params
POLICY=smolvla
POLICY=pi0
USE_AMP=false
OPTIMIZER_LR=1e-4
PEFT_METHOD=lora
@@ -30,11 +30,13 @@ USE_IMAGENET_STATS=false
ENABLE_IMG_TRANSFORM=true
MAX_NUM_IMAGES=2
MAX_IMAGE_DIM=1024
unset LEROBOT_HOME
unset HF_LEROBOT_HOME
echo -e "\033[1;33m[WARNING]\033[0m LIBERO is not yet fully supported in this PR!"
# launch
PYTORCH_ENABLE_MPS_FALLBACK=1 DEVICE=cpu python src/lerobot/scripts/train.py \
--policy.device=cpu \
python src/lerobot/scripts/train.py \
--policy.type=$POLICY \
--dataset.repo_id=$REPO_ID \
--env.type=libero \
@@ -45,11 +47,7 @@ PYTORCH_ENABLE_MPS_FALLBACK=1 DEVICE=cpu python src/lerobot/scripts/train.py \
--eval_freq=$EVAL_FREQ \
--save_freq=$SAVE_FREQ \
--num_workers=$NUM_WORKERS \
--policy.max_action_dim=$MAX_ACTION_DIM \
--policy.max_state_dim=$MAX_STATE_DIM \
--policy.use_amp=$USE_AMP \
--policy.optimizer_lr=$OPTIMIZER_LR \
--policy.load_vlm_weights=$LOAD_VLM_WEIGHTS \
--policy.repo_id=$VLM_REPO_ID \
--env.multitask_eval=False \
--env.multitask_eval=True \
--eval.batch_size=1 \
--eval.n_episodes=1 \
+22
View File
@@ -0,0 +1,22 @@
#!/bin/bash
# Example evaluation script for LeRobot policies
unset LEROBOT_HOME
unset HF_LEROBOT_HOME
# === CONFIGURATION ===
POLICY_PATH="ganatrask/lerobot-pi0-libero-object" # or outputs/train/.../pretrained_model
TASK=libero_object
ENV_TYPE="libero"
BATCH_SIZE=1
N_EPISODES=1
USE_AMP=false
DEVICE=cuda
# === RUN EVALUATION ===
python src/lerobot/scripts/eval.py \
--policy.path="$POLICY_PATH" \
--env.type="$ENV_TYPE" \
--eval.batch_size="$BATCH_SIZE" \
--eval.n_episodes="$N_EPISODES" \
--env.multitask_eval=False \
--env.task=$TASK \
BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

+5 -3
View File
@@ -19,10 +19,12 @@ from huggingface_hub.constants import HF_HOME
OBS_ENV_STATE = "observation.environment_state"
OBS_STATE = "observation.state"
# OBS_IMAGE = "observation.image"
# OBS_IMAGE_2 = "observation.image2"
OBS_IMAGE = "observation.image"
OBS_IMAGE_2 = "observation.image2"
OBS_IMAGE = "image"
OBS_IMAGE_2 = "wrist_image"
OBS_IMAGE_2 = "image2"
# OBS_IMAGE = "image"
# OBS_IMAGE_2 = "wrist_image"
OBS_IMAGES = "observation.images"
ACTION = "action"
REWARD = "next.reward"
+2 -2
View File
@@ -295,8 +295,8 @@ class LiberoEnv(EnvConfig):
default_factory=lambda: {
"action": ACTION,
"agent_pos": OBS_STATE,
"pixels/agentview_image": f"{OBS_IMAGE}",
"pixels/robot0_eye_in_hand_image": f"{OBS_IMAGE_2}",
"pixels/agentview_image": f"observation.images.{OBS_IMAGE}",
"pixels/robot0_eye_in_hand_image": f"observation.images.{OBS_IMAGE_2}",
}
)
+4 -2
View File
@@ -32,6 +32,9 @@ def create_libero_envs(
Returns:
dict[str, dict[str, list[LiberoEnv]]]: keys are task_suite and values are list of LiberoEnv envs.
"""
print("num envs", n_envs)
print("multitask_eval", multitask_eval)
print("gym_kwargs", gym_kwargs)
if gym_kwargs is None:
gym_kwargs = {}
@@ -45,6 +48,7 @@ def create_libero_envs(
episode_indices = list(range(n_envs))
elif len(tasks_id) < n_envs and n_envs % len(tasks_id) == 0:
n_repeat = n_envs // len(tasks_id)
print("n_repeat", n_repeat)
episode_indices = []
for i in range(len(tasks_id)):
episode_indices.extend(list(range(n_repeat)))
@@ -313,11 +317,9 @@ class LiberoEnv(gym.Env):
def step(self, action):
assert action.ndim == 1
raw_obs, reward, done, info = self._env.step(action)
is_success = self._env.check_success()
terminated = done or is_success
info["is_success"] = is_success
print(f"[LiberoEnv.step] done={done}, is_success={is_success}, terminated={terminated}")
observation = self._format_raw_obs(raw_obs)
truncated = False
# note if it is unable to complete get libero error after many steps
-1
View File
@@ -97,7 +97,6 @@ def env_to_policy_features(env_cfg: EnvConfig) -> dict[str, PolicyFeature]:
policy_key = env_cfg.features_map[key]
policy_features[policy_key] = feature
return policy_features
-1
View File
@@ -156,7 +156,6 @@ def make_policy(
"by default without stats from a dataset."
)
features = env_to_policy_features(env_cfg)
cfg.output_features = {key: ft for key, ft in features.items() if ft.type is FeatureType.ACTION}
cfg.input_features = {key: ft for key, ft in features.items() if key not in cfg.output_features}
kwargs["config"] = cfg
+42 -10
View File
@@ -56,7 +56,7 @@ from copy import deepcopy
from dataclasses import asdict
from pathlib import Path
from pprint import pformat
import concurrent
import einops
import gymnasium as gym
import numpy as np
@@ -156,10 +156,29 @@ def rollout(
# Infer "task" from attributes of environments.
# TODO: works with SyncVectorEnv but not AsyncVectorEnv
observation = add_envs_task(env, observation)
if step % 100 == 0:
import imageio.v2 as imageio
img = observation["observation.images.image"] # (1, 3, 256, 256)
if isinstance(img, torch.Tensor):
img = img.detach().cpu().numpy()
# remove batch → (3, 256, 256)
img = img[0]
# transpose → (256, 256, 3)
img = np.transpose(img, (1, 2, 0))
# scale + convert to uint8
img = (img * 255).clip(0, 255).astype(np.uint8)
# now works
imageio.imwrite(f"obs_{step:06d}.png", img)
with torch.inference_mode():
action = policy.select_action(observation)
observation['observation.images.image']
# Convert to CPU / numpy.
action = action.to("cpu").numpy()
assert action.ndim == 2, "Action dimensions should be (batch, action_dim)"
@@ -177,7 +196,12 @@ def rollout(
successes = [False] * env.num_envs
# Keep track of which environments are done so far.
# done = terminated | truncated | done
#TODO: jadechoghari changed, this is cleaner
done = terminated | truncated | done
if step + 1 == max_steps:
done = np.ones_like(done, dtype=bool)
all_actions.append(torch.from_numpy(action))
all_rewards.append(torch.from_numpy(reward))
@@ -185,7 +209,6 @@ def rollout(
all_successes.append(torch.tensor(successes))
step += 1
print(step)
running_success_rate = (
# einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "any").numpy().mean() #TODO: changed by jade
einops.reduce(torch.stack(all_successes, dim=1), "b n -> b", "max")
@@ -254,6 +277,7 @@ def eval_policy(
# Determine how many batched rollouts we need to get n_episodes. Note that if n_episodes is not evenly
# divisible by env.num_envs we end up discarding some data in the last batch.
n_batches = n_episodes // env.num_envs + int((n_episodes % env.num_envs) != 0)
print("n_batches", n_batches)
# Keep track of some metrics.
sum_rewards = []
@@ -403,7 +427,6 @@ def eval_policy(
"eval_ep_s": (time.time() - start) / n_episodes,
},
}
if return_episode_data:
info["episodes"] = episode_data
@@ -457,13 +480,22 @@ def _compile_episode_data(
return data_dict
def set_global_seed(seed):
"""Set seed for reproducibility."""
import random
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
def log_output_dir(out_dir):
logging.info("Output dir:"+ f" {out_dir}")
@parser.wrap()
def eval(cfg: EvalPipelineConfig):
logging.info(pformat(asdict(cfg)))
# Check device is available
device = get_safe_torch_device(cfg.device, log=True)
device = get_safe_torch_device(cfg.policy.device, log=True)
torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True
@@ -477,12 +509,12 @@ def eval(cfg: EvalPipelineConfig):
logging.info("Making policy.")
policy = make_policy(
cfg=cfg.policy,
device=device,
# device=device,
env_cfg=cfg.env,
)
policy.eval()
with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.use_amp else nullcontext():
with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext():
if cfg.env.multitask_eval:
info = eval_policy_multitask(
env,
@@ -555,7 +587,7 @@ def eval_policy_multitask(
videos_dir,
return_episode_data,
start_seed,
verbose=verbose,
# verbose=verbose,
)
per_episode = task_result["per_episode"]
@@ -642,4 +674,4 @@ def eval_policy_multitask(
if __name__ == "__main__":
init_logging()
eval_main()
eval()
+8 -3
View File
@@ -269,7 +269,10 @@ def train(cfg: TrainPipelineConfig):
continue # Skip the overall stats since we already printed it
print(f"\nAggregated Metrics for {task_group}:")
print(task_group_info["aggregated"])
breakpoint()
else:
print("START EVAL")
breakpoint()
eval_info = eval_policy(
eval_env,
policy,
@@ -278,6 +281,8 @@ def train(cfg: TrainPipelineConfig):
max_episodes_rendered=4,
start_seed=cfg.seed,
)
aggregated = eval_info["aggregated"]
print("END EVAL")
eval_metrics = {
"avg_sum_reward": AverageMeter("∑rwrd", ":.3f"),
@@ -287,9 +292,9 @@ def train(cfg: TrainPipelineConfig):
eval_tracker = MetricsTracker(
cfg.batch_size, dataset.num_frames, dataset.num_episodes, eval_metrics, initial_step=step
)
eval_tracker.eval_s = eval_info["aggregated"].pop("eval_s")
eval_tracker.avg_sum_reward = eval_info["aggregated"].pop("avg_sum_reward")
eval_tracker.pc_success = eval_info["aggregated"].pop("pc_success")
eval_tracker.eval_s = aggregated.pop("eval_s")
eval_tracker.avg_sum_reward = aggregated.pop("avg_sum_reward")
eval_tracker.pc_success = aggregated.pop("pc_success")
logging.info(eval_tracker)
if wandb_logger:
wandb_log_dict = {**eval_tracker.to_dict(), **eval_info}