From 61e55830dadc2bbaa56feed5306c5d0d1282bad0 Mon Sep 17 00:00:00 2001 From: Jade Choghari Date: Thu, 4 Sep 2025 12:12:10 +0200 Subject: [PATCH] add train --- examples/7_train_libero_smolvla.sh | 90 +++++++++++++++++++ .../policies/smolvla/modeling_smolvla.py | 17 +++- .../policies/smolvla/smolvlm_with_expert.py | 3 +- 3 files changed, 106 insertions(+), 4 deletions(-) create mode 100644 examples/7_train_libero_smolvla.sh diff --git a/examples/7_train_libero_smolvla.sh b/examples/7_train_libero_smolvla.sh new file mode 100644 index 000000000..3943e3c96 --- /dev/null +++ b/examples/7_train_libero_smolvla.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# smolvla training + +set -euo pipefail + +# repo/env +cd ~/lerobot || exit 1 +# conda activate lerobot +export LC_ALL=C + + +rm -f core-* + +# storage / caches (use RAID to avoid filling $HOME) +RAID=/raid/jade +export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers +export HF_HOME=$RAID/.cache/huggingface +export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets +export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot +export WANDB_CACHE_DIR=$RAID/.cache/wandb +export TMPDIR=$RAID/.cache/tmp +mkdir -p $TMPDIR +export WANDB_MODE=offline +export HF_DATASETS_OFFLINE=1 +export HF_HUB_OFFLINE=1 +export TOKENIZERS_PARALLELISM=false +export MUJOCO_GL=egl + +# will only use if accelerate is used +PORT=29522 + +# =================== CONFIG =================== +ENV=libero +TASK=libero_spatial +REPO_ID=physical-intelligence/libero + +POLICY=smolvla +VLM=HuggingFaceTB/SmolVLM2-2.2B-Instruct + +# Optim / scheduling +LR=1e-4 +DECAY_LR=2.5e-6 +DECAY_STEPS=30000 +USE_AMP=false +TRAIN_EXPERT_ONLY=true +N_ACTION_STEPS=1 +SEED=1000 + +# Training loop +OFFLINE_STEPS=100000 +BATCH_SIZE=32 +EVAL_FREQ=0 +SAVE_FREQ=300000 +EVAL_BATCH_SIZE=1 +NUM_EPISODES=1 + +# GPU selection 0, 1, 2, 3 +export CUDA_VISIBLE_DEVICES=1 + +# naming/output dir +TRAIN_DIR=$RAID/logs/lerobot/lerobot_${REPO_ID//\//_}_${POLICY}_lr${LR}bs${BATCH_SIZE}steps${OFFLINE_STEPS} +echo "Training dir: $TRAIN_DIR" + +# train +rm -rf "$TRAIN_DIR" + +python src/lerobot/scripts/train.py \ + --policy.type=$POLICY \ + --policy.vlm_model_name=$VLM \ + --dataset.repo_id=$REPO_ID \ + --dataset.root=$HF_DATASETS_CACHE \ + --env.type=$ENV \ + --env.task=$TASK \ + --output_dir=$TRAIN_DIR \ + --batch_size=$BATCH_SIZE \ + --steps=$OFFLINE_STEPS \ + --eval_freq=$EVAL_FREQ \ + --save_freq=$SAVE_FREQ \ + --eval.batch_size=$EVAL_BATCH_SIZE \ + --eval.n_episodes=$NUM_EPISODES \ + --policy.use_amp=$USE_AMP \ + --policy.optimizer_lr=$LR \ + --policy.repo_id=None \ + --policy.scheduler_decay_lr=$DECAY_LR \ + --policy.scheduler_decay_steps=$DECAY_STEPS \ + --policy.n_action_steps=$N_ACTION_STEPS \ + --policy.train_expert_only=$TRAIN_EXPERT_ONLY \ + --policy.vlm_model_name=/raid/jade/.cache/huggingface/models/SmolVLM2-2.2B-Instruct \ + --seed=$SEED \ + --wandb.enable=false diff --git a/src/lerobot/policies/smolvla/modeling_smolvla.py b/src/lerobot/policies/smolvla/modeling_smolvla.py index 18f2fc58a..95ed993d2 100644 --- a/src/lerobot/policies/smolvla/modeling_smolvla.py +++ b/src/lerobot/policies/smolvla/modeling_smolvla.py @@ -63,7 +63,7 @@ import torch.nn.functional as F # noqa: N812 from torch import Tensor, nn from transformers import AutoProcessor -from lerobot.constants import ACTION, OBS_STATE +from lerobot.constants import ACTION from lerobot.policies.normalize import ( Normalize, Unnormalize, @@ -75,7 +75,8 @@ from lerobot.policies.utils import ( populate_queues, ) from lerobot.utils.utils import get_safe_dtype - +OBS_STATE = 'state' +ACTION = 'actions' # Matches ".soNNN", optionally followed by "-something", up to the "_buffer_" marker _VARIANT_RE = re.compile(r"\.so\d+(?:-[\w]+)?_buffer_") @@ -824,12 +825,21 @@ class VLAFlowMatching(nn.Module): pad_masks = torch.cat(pad_masks, dim=1) att_masks = torch.tensor(att_masks, dtype=embs.dtype, device=embs.device) att_masks = att_masks[None, :].expand(bsize, len(att_masks)) + # added by jade + seq_len = pad_masks.shape[1] + if seq_len < self.config.chunk_size: + embs = pad_tensor(embs, self.config.chunk_size, pad_value=0) + pad_masks = pad_tensor(pad_masks, self.config.chunk_size, pad_value=0) + att_masks = pad_tensor(att_masks, self.config.chunk_size, pad_value=0) return embs, pad_masks, att_masks def forward( self, images, img_masks, lang_tokens, lang_masks, state, actions, noise=None, time=None ) -> Tensor: """Do a full training forward pass and compute the loss (batch_size x num_steps x num_motors)""" + #added by jade + if actions.ndim == 2: + actions = actions[:, None, :].expand(-1, self.config.chunk_size, -1) if noise is None: noise = self.sample_noise(actions.shape, actions.device) @@ -857,7 +867,8 @@ class VLAFlowMatching(nn.Module): use_cache=False, fill_kv_cache=False, ) - suffix_out = suffix_out[:, -self.config.chunk_size :] + # suffix_out = suffix_out[:, -self.config.chunk_size :] + suffix_out = suffix_out[:, -self.config.chunk_size:, :] # Original openpi code, upcast attention output suffix_out = suffix_out.to(dtype=torch.float32) v_t = self.action_out_proj(suffix_out) diff --git a/src/lerobot/policies/smolvla/smolvlm_with_expert.py b/src/lerobot/policies/smolvla/smolvlm_with_expert.py index f3d1a693a..f6a49dccf 100644 --- a/src/lerobot/policies/smolvla/smolvlm_with_expert.py +++ b/src/lerobot/policies/smolvla/smolvlm_with_expert.py @@ -77,7 +77,8 @@ class SmolVLMWithExpertModel(nn.Module): self.vlm = AutoModelForImageTextToText.from_pretrained( model_id, device_map="auto", - torch_dtype="bfloat16", + # torch_dtype="bfloat16", + torch_dtype=torch.float16, low_cpu_mem_usage=True, ) config = self.vlm.config