mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-22 12:09:42 +00:00
Merge branch 'train-smolvla' into add-multitraining
:wq a
This commit is contained in:
@@ -0,0 +1,90 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# smolvla training
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# repo/env
|
||||||
|
cd ~/lerobot || exit 1
|
||||||
|
# conda activate lerobot
|
||||||
|
export LC_ALL=C
|
||||||
|
|
||||||
|
|
||||||
|
rm -f core-*
|
||||||
|
|
||||||
|
# storage / caches (use RAID to avoid filling $HOME)
|
||||||
|
RAID=/raid/jade
|
||||||
|
export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
|
||||||
|
export HF_HOME=$RAID/.cache/huggingface
|
||||||
|
export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
|
||||||
|
export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
|
||||||
|
export WANDB_CACHE_DIR=$RAID/.cache/wandb
|
||||||
|
export TMPDIR=$RAID/.cache/tmp
|
||||||
|
mkdir -p $TMPDIR
|
||||||
|
export WANDB_MODE=offline
|
||||||
|
export HF_DATASETS_OFFLINE=1
|
||||||
|
export HF_HUB_OFFLINE=1
|
||||||
|
export TOKENIZERS_PARALLELISM=false
|
||||||
|
export MUJOCO_GL=egl
|
||||||
|
|
||||||
|
# will only use if accelerate is used
|
||||||
|
PORT=29522
|
||||||
|
|
||||||
|
# =================== CONFIG ===================
|
||||||
|
ENV=libero
|
||||||
|
TASK=libero_spatial
|
||||||
|
REPO_ID=physical-intelligence/libero
|
||||||
|
|
||||||
|
POLICY=smolvla
|
||||||
|
VLM=HuggingFaceTB/SmolVLM2-2.2B-Instruct
|
||||||
|
|
||||||
|
# Optim / scheduling
|
||||||
|
LR=1e-4
|
||||||
|
DECAY_LR=2.5e-6
|
||||||
|
DECAY_STEPS=30000
|
||||||
|
USE_AMP=false
|
||||||
|
TRAIN_EXPERT_ONLY=true
|
||||||
|
N_ACTION_STEPS=1
|
||||||
|
SEED=1000
|
||||||
|
|
||||||
|
# Training loop
|
||||||
|
OFFLINE_STEPS=100000
|
||||||
|
BATCH_SIZE=32
|
||||||
|
EVAL_FREQ=0
|
||||||
|
SAVE_FREQ=300000
|
||||||
|
EVAL_BATCH_SIZE=1
|
||||||
|
NUM_EPISODES=1
|
||||||
|
|
||||||
|
# GPU selection 0, 1, 2, 3
|
||||||
|
export CUDA_VISIBLE_DEVICES=1
|
||||||
|
|
||||||
|
# naming/output dir
|
||||||
|
TRAIN_DIR=$RAID/logs/lerobot/lerobot_${REPO_ID//\//_}_${POLICY}_lr${LR}bs${BATCH_SIZE}steps${OFFLINE_STEPS}
|
||||||
|
echo "Training dir: $TRAIN_DIR"
|
||||||
|
|
||||||
|
# train
|
||||||
|
rm -rf "$TRAIN_DIR"
|
||||||
|
|
||||||
|
python src/lerobot/scripts/train.py \
|
||||||
|
--policy.type=$POLICY \
|
||||||
|
--policy.vlm_model_name=$VLM \
|
||||||
|
--dataset.repo_id=$REPO_ID \
|
||||||
|
--dataset.root=$HF_DATASETS_CACHE \
|
||||||
|
--env.type=$ENV \
|
||||||
|
--env.task=$TASK \
|
||||||
|
--output_dir=$TRAIN_DIR \
|
||||||
|
--batch_size=$BATCH_SIZE \
|
||||||
|
--steps=$OFFLINE_STEPS \
|
||||||
|
--eval_freq=$EVAL_FREQ \
|
||||||
|
--save_freq=$SAVE_FREQ \
|
||||||
|
--eval.batch_size=$EVAL_BATCH_SIZE \
|
||||||
|
--eval.n_episodes=$NUM_EPISODES \
|
||||||
|
--policy.use_amp=$USE_AMP \
|
||||||
|
--policy.optimizer_lr=$LR \
|
||||||
|
--policy.repo_id=None \
|
||||||
|
--policy.scheduler_decay_lr=$DECAY_LR \
|
||||||
|
--policy.scheduler_decay_steps=$DECAY_STEPS \
|
||||||
|
--policy.n_action_steps=$N_ACTION_STEPS \
|
||||||
|
--policy.train_expert_only=$TRAIN_EXPERT_ONLY \
|
||||||
|
--policy.vlm_model_name=/raid/jade/.cache/huggingface/models/SmolVLM2-2.2B-Instruct \
|
||||||
|
--seed=$SEED \
|
||||||
|
--wandb.enable=false
|
||||||
@@ -63,7 +63,7 @@ import torch.nn.functional as F # noqa: N812
|
|||||||
from torch import Tensor, nn
|
from torch import Tensor, nn
|
||||||
from transformers import AutoProcessor
|
from transformers import AutoProcessor
|
||||||
|
|
||||||
from lerobot.constants import ACTION, OBS_STATE
|
from lerobot.constants import ACTION
|
||||||
from lerobot.policies.normalize import (
|
from lerobot.policies.normalize import (
|
||||||
Normalize,
|
Normalize,
|
||||||
Unnormalize,
|
Unnormalize,
|
||||||
@@ -75,7 +75,8 @@ from lerobot.policies.utils import (
|
|||||||
populate_queues,
|
populate_queues,
|
||||||
)
|
)
|
||||||
from lerobot.utils.utils import get_safe_dtype
|
from lerobot.utils.utils import get_safe_dtype
|
||||||
|
OBS_STATE = 'state'
|
||||||
|
ACTION = 'actions'
|
||||||
# Matches ".soNNN", optionally followed by "-something", up to the "_buffer_" marker
|
# Matches ".soNNN", optionally followed by "-something", up to the "_buffer_" marker
|
||||||
_VARIANT_RE = re.compile(r"\.so\d+(?:-[\w]+)?_buffer_")
|
_VARIANT_RE = re.compile(r"\.so\d+(?:-[\w]+)?_buffer_")
|
||||||
|
|
||||||
@@ -824,12 +825,21 @@ class VLAFlowMatching(nn.Module):
|
|||||||
pad_masks = torch.cat(pad_masks, dim=1)
|
pad_masks = torch.cat(pad_masks, dim=1)
|
||||||
att_masks = torch.tensor(att_masks, dtype=embs.dtype, device=embs.device)
|
att_masks = torch.tensor(att_masks, dtype=embs.dtype, device=embs.device)
|
||||||
att_masks = att_masks[None, :].expand(bsize, len(att_masks))
|
att_masks = att_masks[None, :].expand(bsize, len(att_masks))
|
||||||
|
# added by jade
|
||||||
|
seq_len = pad_masks.shape[1]
|
||||||
|
if seq_len < self.config.chunk_size:
|
||||||
|
embs = pad_tensor(embs, self.config.chunk_size, pad_value=0)
|
||||||
|
pad_masks = pad_tensor(pad_masks, self.config.chunk_size, pad_value=0)
|
||||||
|
att_masks = pad_tensor(att_masks, self.config.chunk_size, pad_value=0)
|
||||||
return embs, pad_masks, att_masks
|
return embs, pad_masks, att_masks
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self, images, img_masks, lang_tokens, lang_masks, state, actions, noise=None, time=None
|
self, images, img_masks, lang_tokens, lang_masks, state, actions, noise=None, time=None
|
||||||
) -> Tensor:
|
) -> Tensor:
|
||||||
"""Do a full training forward pass and compute the loss (batch_size x num_steps x num_motors)"""
|
"""Do a full training forward pass and compute the loss (batch_size x num_steps x num_motors)"""
|
||||||
|
#added by jade
|
||||||
|
if actions.ndim == 2:
|
||||||
|
actions = actions[:, None, :].expand(-1, self.config.chunk_size, -1)
|
||||||
if noise is None:
|
if noise is None:
|
||||||
noise = self.sample_noise(actions.shape, actions.device)
|
noise = self.sample_noise(actions.shape, actions.device)
|
||||||
|
|
||||||
@@ -857,7 +867,8 @@ class VLAFlowMatching(nn.Module):
|
|||||||
use_cache=False,
|
use_cache=False,
|
||||||
fill_kv_cache=False,
|
fill_kv_cache=False,
|
||||||
)
|
)
|
||||||
suffix_out = suffix_out[:, -self.config.chunk_size :]
|
# suffix_out = suffix_out[:, -self.config.chunk_size :]
|
||||||
|
suffix_out = suffix_out[:, -self.config.chunk_size:, :]
|
||||||
# Original openpi code, upcast attention output
|
# Original openpi code, upcast attention output
|
||||||
suffix_out = suffix_out.to(dtype=torch.float32)
|
suffix_out = suffix_out.to(dtype=torch.float32)
|
||||||
v_t = self.action_out_proj(suffix_out)
|
v_t = self.action_out_proj(suffix_out)
|
||||||
|
|||||||
@@ -77,7 +77,8 @@ class SmolVLMWithExpertModel(nn.Module):
|
|||||||
self.vlm = AutoModelForImageTextToText.from_pretrained(
|
self.vlm = AutoModelForImageTextToText.from_pretrained(
|
||||||
model_id,
|
model_id,
|
||||||
device_map="auto",
|
device_map="auto",
|
||||||
torch_dtype="bfloat16",
|
# torch_dtype="bfloat16",
|
||||||
|
torch_dtype=torch.float16,
|
||||||
low_cpu_mem_usage=True,
|
low_cpu_mem_usage=True,
|
||||||
)
|
)
|
||||||
config = self.vlm.config
|
config = self.vlm.config
|
||||||
|
|||||||
Reference in New Issue
Block a user