From 61e55830dadc2bbaa56feed5306c5d0d1282bad0 Mon Sep 17 00:00:00 2001
From: Jade Choghari <chogharijade@gmail.com>
Date: Thu, 4 Sep 2025 12:12:10 +0200
Subject: [PATCH] add train

---
 examples/7_train_libero_smolvla.sh            | 90 +++++++++++++++++++
 .../policies/smolvla/modeling_smolvla.py      | 17 +++-
 .../policies/smolvla/smolvlm_with_expert.py   |  3 +-
 3 files changed, 106 insertions(+), 4 deletions(-)
 create mode 100644 examples/7_train_libero_smolvla.sh

diff --git a/examples/7_train_libero_smolvla.sh b/examples/7_train_libero_smolvla.sh
new file mode 100644
index 000000000..3943e3c96
--- /dev/null
+++ b/examples/7_train_libero_smolvla.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+# smolvla training
+
+set -euo pipefail
+
+# repo/env
+cd ~/lerobot || exit 1
+# conda activate lerobot
+export LC_ALL=C
+
+
+rm -f core-*
+
+# storage / caches (use RAID to avoid filling $HOME)
+RAID=/raid/jade
+export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
+export HF_HOME=$RAID/.cache/huggingface
+export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
+export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
+export WANDB_CACHE_DIR=$RAID/.cache/wandb
+export TMPDIR=$RAID/.cache/tmp
+mkdir -p $TMPDIR
+export WANDB_MODE=offline
+export HF_DATASETS_OFFLINE=1
+export HF_HUB_OFFLINE=1
+export TOKENIZERS_PARALLELISM=false
+export MUJOCO_GL=egl
+
+# will only use if accelerate is used
+PORT=29522
+
+# =================== CONFIG ===================
+ENV=libero
+TASK=libero_spatial
+REPO_ID=physical-intelligence/libero
+
+POLICY=smolvla
+VLM=HuggingFaceTB/SmolVLM2-2.2B-Instruct
+
+# Optim / scheduling
+LR=1e-4
+DECAY_LR=2.5e-6
+DECAY_STEPS=30000
+USE_AMP=false
+TRAIN_EXPERT_ONLY=true
+N_ACTION_STEPS=1
+SEED=1000
+
+# Training loop
+OFFLINE_STEPS=100000
+BATCH_SIZE=32
+EVAL_FREQ=0
+SAVE_FREQ=300000
+EVAL_BATCH_SIZE=1
+NUM_EPISODES=1
+
+# GPU selection 0, 1, 2, 3
+export CUDA_VISIBLE_DEVICES=1
+
+# naming/output dir
+TRAIN_DIR=$RAID/logs/lerobot/lerobot_${REPO_ID//\//_}_${POLICY}_lr${LR}bs${BATCH_SIZE}steps${OFFLINE_STEPS}
+echo "Training dir: $TRAIN_DIR"
+
+# train
+rm -rf "$TRAIN_DIR"
+
+python src/lerobot/scripts/train.py \
+  --policy.type=$POLICY \
+  --policy.vlm_model_name=$VLM \
+  --dataset.repo_id=$REPO_ID \
+  --dataset.root=$HF_DATASETS_CACHE \
+  --env.type=$ENV \
+  --env.task=$TASK \
+  --output_dir=$TRAIN_DIR \
+  --batch_size=$BATCH_SIZE \
+  --steps=$OFFLINE_STEPS \
+  --eval_freq=$EVAL_FREQ \
+  --save_freq=$SAVE_FREQ \
+  --eval.batch_size=$EVAL_BATCH_SIZE \
+  --eval.n_episodes=$NUM_EPISODES \
+  --policy.use_amp=$USE_AMP \
+  --policy.optimizer_lr=$LR \
+  --policy.repo_id=None \
+  --policy.scheduler_decay_lr=$DECAY_LR \
+  --policy.scheduler_decay_steps=$DECAY_STEPS \
+  --policy.n_action_steps=$N_ACTION_STEPS \
+  --policy.train_expert_only=$TRAIN_EXPERT_ONLY \
+  --policy.vlm_model_name=/raid/jade/.cache/huggingface/models/SmolVLM2-2.2B-Instruct \
+  --seed=$SEED \
+  --wandb.enable=false
diff --git a/src/lerobot/policies/smolvla/modeling_smolvla.py b/src/lerobot/policies/smolvla/modeling_smolvla.py
index 18f2fc58a..95ed993d2 100644
--- a/src/lerobot/policies/smolvla/modeling_smolvla.py
+++ b/src/lerobot/policies/smolvla/modeling_smolvla.py
@@ -63,7 +63,7 @@ import torch.nn.functional as F  # noqa: N812
 from torch import Tensor, nn
 from transformers import AutoProcessor
 
-from lerobot.constants import ACTION, OBS_STATE
+from lerobot.constants import ACTION
 from lerobot.policies.normalize import (
     Normalize,
     Unnormalize,
@@ -75,7 +75,8 @@ from lerobot.policies.utils import (
     populate_queues,
 )
 from lerobot.utils.utils import get_safe_dtype
-
+OBS_STATE = 'state'
+ACTION = 'actions'
 # Matches ".soNNN", optionally followed by "-something", up to the "_buffer_" marker
 _VARIANT_RE = re.compile(r"\.so\d+(?:-[\w]+)?_buffer_")
 
@@ -824,12 +825,21 @@ class VLAFlowMatching(nn.Module):
         pad_masks = torch.cat(pad_masks, dim=1)
         att_masks = torch.tensor(att_masks, dtype=embs.dtype, device=embs.device)
         att_masks = att_masks[None, :].expand(bsize, len(att_masks))
+        # added by jade
+        seq_len = pad_masks.shape[1]
+        if seq_len < self.config.chunk_size:
+            embs = pad_tensor(embs, self.config.chunk_size, pad_value=0)
+            pad_masks = pad_tensor(pad_masks, self.config.chunk_size, pad_value=0)
+            att_masks = pad_tensor(att_masks, self.config.chunk_size, pad_value=0)
         return embs, pad_masks, att_masks
 
     def forward(
         self, images, img_masks, lang_tokens, lang_masks, state, actions, noise=None, time=None
     ) -> Tensor:
         """Do a full training forward pass and compute the loss (batch_size x num_steps x num_motors)"""
+        #added by jade
+        if actions.ndim == 2:
+            actions = actions[:, None, :].expand(-1, self.config.chunk_size, -1)
         if noise is None:
             noise = self.sample_noise(actions.shape, actions.device)
 
@@ -857,7 +867,8 @@ class VLAFlowMatching(nn.Module):
             use_cache=False,
             fill_kv_cache=False,
         )
-        suffix_out = suffix_out[:, -self.config.chunk_size :]
+        # suffix_out = suffix_out[:, -self.config.chunk_size :]
+        suffix_out = suffix_out[:, -self.config.chunk_size:, :]
         # Original openpi code, upcast attention output
         suffix_out = suffix_out.to(dtype=torch.float32)
         v_t = self.action_out_proj(suffix_out)
diff --git a/src/lerobot/policies/smolvla/smolvlm_with_expert.py b/src/lerobot/policies/smolvla/smolvlm_with_expert.py
index f3d1a693a..f6a49dccf 100644
--- a/src/lerobot/policies/smolvla/smolvlm_with_expert.py
+++ b/src/lerobot/policies/smolvla/smolvlm_with_expert.py
@@ -77,7 +77,8 @@ class SmolVLMWithExpertModel(nn.Module):
             self.vlm = AutoModelForImageTextToText.from_pretrained(
                 model_id,
                 device_map="auto",
-                torch_dtype="bfloat16",
+                # torch_dtype="bfloat16",
+                torch_dtype=torch.float16,
                 low_cpu_mem_usage=True,
             )
             config = self.vlm.config