diff --git a/examples/6_evaluate_libero.sh b/examples/6_evaluate_libero.sh index ad6ca0f13..c15d71c95 100644 --- a/examples/6_evaluate_libero.sh +++ b/examples/6_evaluate_libero.sh @@ -14,11 +14,11 @@ export HF_DATASETS_OFFLINE=1 export HF_HUB_OFFLINE=1 export TOKENIZERS_PARALLELISM=false export MUJOCO_GL=egl -export CUDA_VISIBLE_DEVICES=3 +export CUDA_VISIBLE_DEVICES=2 # CONFIGURATION POLICY_PATH="/raid/jade/logs/lerobot/lerobot_2_HuggingFaceVLA_libero_smolvla_lr1e-4bs32steps100000/checkpoints/100000/pretrained_model" -POLICY_PATH="/raid/jade/models/smolvlamust" +POLICY_PATH="/raid/jade/logs/lerobot/lerobot_new_HuggingfaceVLA_libero_smolvla_lr1e-4bs32steps100000/checkpoints/100000/pretrained_model" TASK=libero_spatial ENV_TYPE="libero" BATCH_SIZE=10 diff --git a/examples/9_evaluate_must.sh b/examples/9_evaluate_must.sh new file mode 100644 index 000000000..534153330 --- /dev/null +++ b/examples/9_evaluate_must.sh @@ -0,0 +1,2811 @@ +#!/bin/bash + +#SBATCH --job-name=lerobot_eval_smolpi0_libero_eval10ep_ca_sa2_16vlm_w075_smolvlm2b_lr7e5 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --gpus-per-node=1 +#SBATCH --mail-type=END,FAIL +#SBATCH --output=/lustre/fswork/projects/rech/dyf/ugz83ue/logs/slurm/lerobot_eval_smolpi0_libero_eval10ep_ca_sa2_16vlm_w075_smolvlm2b_lr7e5.out +###SBATCH --nodelist=jean-zay-a101 +#SBATCH --cpus-per-task=45 +###SBATCH --exclusive +#SBATCH --time=15:00:00 +#SBATCH --mail-user=mustafa.shukor@isir.upmc.fr + +##SBATCH --partition=gpu_p2 +##SBATCH --qos=qos_gpu-t3 +###SBATCH -C v100-32g +##SBATCH -A dyf@v100 + +##SBATCH --partition=gpu_p5 +##SBATCH -C a100 +###SBATCH -A dyf@a100 +##SBATCH -A lqm@a100 +##SBATCH --qos=qos_gpu_a100-dev +##SBATCH --qos=qos_gpu_a100-t3 + +#SBATCH --partition=gpu_p6 +#SBATCH -C h100 +#SBATCH -A lqm@h100 +###SBATCH --qos=qos_gpu_h100-dev +#SBATCH --qos=qos_gpu_h100-t3 + +###SBATCH --begin=now+2hour + +# cd ~/lerobot_pi +# source ~/.bashrc +# source activate lerobot +# export LC_ALL=C + +# rm core-* +export CUDA_VISIBLE_DEVICES=3 +# storage / caches +RAID=/raid/jade +export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers +export HF_HOME=$RAID/.cache/huggingface +export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets +export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot +export WANDB_CACHE_DIR=$RAID/.cache/wandb +export TMPDIR=$RAID/.cache/tmp +mkdir -p $TMPDIR +export WANDB_MODE=offline +export HF_DATASETS_OFFLINE=1 +# export HF_HUB_OFFLINE=1 +export TOKENIZERS_PARALLELISM=false +export MUJOCO_GL=egl +export CUDA_VISIBLE_DEVICES=3 + +PORT=29512 + +## then later +## wandb sync wandb/offline-run-* + + +ENV=libero + +# TASK=libero_10 +TASK=libero_spatial +# TASK=libero_spatial +# TASK=libero_10 +# TASK=libero_spatial + + +POLICY_NAME=smolpi0 + +POLICY=smolpi0 +ENV=libero + + + + + + +CKPT_KEYS_MAPPING=model._orig_mod.//model. +LOAD_VLM_WEIGHTS=true +PEFT_METHOD=freeze +SELF_ATTN_ONLY_ACTIONS=false +CAUSAL_ATTENTION_ON_HISTORY=false + +PREDICT_RELATIVE_ACTIONS=false +RELATIVE_ACTIONS_MODE=first +SHUFFLE_CAMERA_POSITIONS=false + +VLM_IMG_SIZE=-1 +REGRESSION_LOSS=false + + +# ## Baseline for ablation study +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=max_length +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4_bs8_steps100000_gpus2_freeze32_onlyexpert_1act_promptfalse_imgtoktrue_nobs1_compiletrue_cross_attn_pref0_gap1_localimgfalse_reverseimgorderfalse_statetopreftrue/checkpoints/last/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=max_length +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4_bs8_steps100000_gpus2_freeze32_onlyexpert_1act_promptfalse_imgtoktrue_nobs1_compiletrue_cross_attn/checkpoints/last/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=false +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=max_length +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4_bs8_steps100000_gpus2_freeze32_onlyexpert_1act_promptfalse_imgtoktrue_nobs1_compiletrue_self_attn/checkpoints/last/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=self_attn +# STATE_TO_PREFIX=false +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_self_attn_gap1_localimgfalse_statetopreffalse_explay0_vlml0_causalacttrue_sa0/checkpoints/last/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=self_attn +# STATE_TO_PREFIX=false +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr5e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm2250/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-256M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr8e-5bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm22b/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-2.2B-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4_bs8_steps100000_gpus2_freeze32_onlyexpert_1act_promptfalse_imgtoktrue_nobs1_compiletrue_self_attn_pref0_gap1_localimgfalse_reverseimgorderfalse_statetopreftrue_toklongest_explay0_vlml0_causalacttrue/checkpoints/last/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=self_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa2/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4_bs8_steps100000_gpus2_freeze32_onlyexpert_1act_promptfalse_imgtoktrue_nobs1_compiletrue_cross_attn_pref0_gap1_localimgfalse_reverseimgorderfalse_statetopreffalse/checkpoints/last/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=false +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_self_attn_gap1_localimgfalse_statetopreffalse_explay0_vlml0_causalacttrue_sa0_smolvlm2500/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=self_attn +# STATE_TO_PREFIX=false +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_self_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm2500/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=self_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=8 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml8_causalactfalse_sa0/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalactfalse_sa0/checkpoints/last/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=24 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml24_causalactfalse_sa0/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtokfalse_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm2500/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=false +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=100 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm2500_chunk100/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=30 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm2500_chunk30/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=10 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm2500_chunk10/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=1 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm2500_chunk1/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=16 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay16_vlml0_causalactfalse_sa0_smolvlm2500_chunk50_nobs1/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=2 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm2500_chunk50_nobs2/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=3 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4_bs8_steps100000_gpus2_freeze32_onlyexpert_1act_promptfalse_imgtoktrue_nobs3_compiletrue_cross_attn_pref0_gap1_localimgfalse_reverseimgorderfalse_statetopreftrue_toklongest/checkpoints/last/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="observation.state" +# N_OBS_STEPS=3 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm2500_chunk50_nobs3_paststates/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="observation.state,image" +# N_OBS_STEPS=3 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm2500_chunk50_nobs3_paststatesimgs/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=1 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr9.5e-5bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm2500_chunk50_nobs1_expw1/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr9.5e-5bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm2500_chunk50_nobs1_expw0.75/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.25 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr2e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm2500_chunk50_nobs1_expw0.25/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="observation.state,image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtokfalse_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalactfalse_sa0_smolvlm2500_chunk50_nobs1_expw0.5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=false +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="observation.state,image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs16steps100000gpus2freeze32_imgtokfalse_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalactfalse_sa0_smolvlm2500_chunk50_nobs1_expw0.5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=false +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="observation.state,image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtokfalse_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalactfalse_sa0_smolvlm2500_chunk50_nobs1_expw0.5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=false +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM2-500M-Video-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr5e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm1250_chunk50_nobs1_expw0.5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-256M-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr8e-5bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm12b_chunk50_nobs1_expw0.5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-Instruct + + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm1500_chunk50_nobs1_expw0.5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr8e-5bs8steps100000gpus2freeze32_imgtokfalse_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm1500_chunk50_nobs1_expw0.5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=false +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr8e-5bs8steps100000gpus2freeze32_imgtokfalse_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm1500_chunk50_nobs1_expw0.5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=false +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalactfalse_sa0_smolvlm2500_chunk50_nobs1_expw0.5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalactfalse_sa0_smolvlm2500_chunk50_nobs1_expw0.5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm2500_chunk50_nobs1_expw0.5_rep/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalactfalse_sa2_smolvlm2500_chunk50_nobs1_expw0.5_rep/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtokfalse_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=false +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2full8_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-6/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2full8_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2full8_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# PEFT_METHOD=lora +# PEFT_TARGET_MODEL=text +# LORA_TARGET_MODULES=q_proj,v_proj,k_proj +# LORA_R=32 +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2lora32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_loraqkv/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# PEFT_METHOD=lora +# PEFT_TARGET_MODEL=text +# LORA_TARGET_MODULES=q_proj,v_proj,k_proj +# LORA_R=32 +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2lora32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-5_loraqkv/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# PEFT_METHOD=lora +# PEFT_TARGET_MODEL=text +# LORA_TARGET_MODULES=q_proj,v_proj,k_proj,up_proj,down_proj,gate_proj +# LORA_R=32 +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2lora32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# PEFT_METHOD=lora +# PEFT_TARGET_MODEL=text +# LORA_TARGET_MODULES=q_proj,v_proj,k_proj,up_proj,down_proj,gate_proj +# LORA_R=32 +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2lora32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# PEFT_METHOD=lora +# PEFT_TARGET_MODEL=text +# LORA_TARGET_MODULES=q_proj,v_proj,k_proj,up_proj,down_proj,gate_proj +# LORA_R=32 +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2lora32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-6/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2lora32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-5/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtokfalse_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_loraqkv/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=false +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtokfalse_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=false +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs16steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs16steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs16steps100000gpus2freeze32_imgtokfalse_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=false +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + + +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtokfalse_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=false +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# SELF_ATTN_ONLY_ACTIONS=true +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_saacttrue/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=max_length +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_saactfalse_droptrue_max_length/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=max_length +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_saactfalse_dropfalse_max_length/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=max_length +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr9.5e-5bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_saactfalse_dropfalse_max_length/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr9.5e-5bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_saactfalse_dropfalse_longest/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_saactfalse_dropfalse_longest/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_ptdroidfull/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_ptcomv3freeze/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_ptcomv1v2full/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_ptcomv1v2freeze/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr2e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans1true/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr2e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans3true/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_self_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans1false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=self_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_self_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalactfalse_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans1false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=self_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=false +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans1false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans1false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_ptcomv3freeze25_trans1false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_ptcomv3freeze50_trans1false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_ptcomv3freeze75_trans1false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_ptcomv3freeze100_trans1false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans6true/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans4true/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans7true/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans5true/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans2true/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans1true/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans8true/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans9true/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_ptcomv1v2full/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.25 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.25_lrvlm1e-4_longest_pt_trans0false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=1 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw1_lrvlm1e-4_longest_pt_trans0false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.25 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw0.25_lrvlm1e-4_longest_pt_trans0false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=1 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml0_causalacttrue_sa0_smolvlm2500_chunk50_nobs1_expw1_lrvlm1e-4_longest_pt_trans0false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="observation.state" +# N_OBS_STEPS=3 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs3statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image,observation.state" +# N_OBS_STEPS=3 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs3_expw0.75_lrvlm1e-4_longest_pt_trans0false/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image,observation.state" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr1e-5100000/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image,observation.state" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr1e-530000/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image,observation.state" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr5e-6100000/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-6100000/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0true_decaylr2.5e-630000/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr1e-5200000/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr5e-6200000/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr1e-530000/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-6200000/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-6100000/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr5e-6100000/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvla500base_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLA-500M-Base + + + +# PREDICT_RELATIVE_ACTIONS=true +# RELATIVE_ACTIONS_MODE=relative +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relacttruerelative/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + + +# PREDICT_RELATIVE_ACTIONS=true +# RELATIVE_ACTIONS_MODE=first +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relacttruefirst/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvla500base_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLA-500M-Base + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr8e-5bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-6100000_relactfalsefirst_camfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr6e-5bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-6100000_relactfalsefirst_camfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr4e-5bs8steps100000gpus2freeze32_imgtoktrue_cross_attn_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1statestrue_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-6100000_relactfalsefirst_camfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_ptcomv1v2freezebs64transv0_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans1true_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# REGRESSION_LOSS=true +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs8steps100000gpus2freeze32_cross_attn_vlml0_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regtrue/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# REGRESSION_LOSS=true +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regtrue/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-6100000_relactfalsefirst_camfalse_vim-1/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr2e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-6100000_relactfalsefirst_camfalse_vim-1/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr3e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-6100000_relactfalsefirst_camfalse_vim-1/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# REGRESSION_LOSS=true +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr9e-5bs8steps100000gpus2freeze32_cross_attn_vlml0_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regtrue/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + + +# REGRESSION_LOSS=true +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.5 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=0 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr2e-4bs8steps100000gpus2freeze32_cross_attn_vlml0_sa0_smolvlm2500_chunk50_nobs1_expw0.5_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regtrue/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=0 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr9e-5bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm2500_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-6100000_relactfalsefirst_camfalse_vim-1_regfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr5e-4bs8steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm2250_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-256M-Video-Instruct + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr8e-5bs8steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm22b_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-2.2B-Instruct + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr5e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm1250_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-256M-Video-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr4e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm1250_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-256M-Video-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr3e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm1250_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-256M-Video-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr6e-5bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm12b_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-2.2B-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm12b_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-2.2B-Instruct + + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr9e-5bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm12b_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-2.2B-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr7e-5bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm12b_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-2.2B-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr8e-5bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm12b_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-2.2B-Instruct + + +CAUSAL_ATTENTION_ON_HISTORY=true +SELF_ATTN_ONLY_ACTIONS=false +EXPERT_WIDTH_MULTIPLIER=0.75 +PAST_OBS_KEYS="image" +N_OBS_STEPS=1 +NUM_EXPERT_LAYERS=0 +CHUNK_SIZE=50 +NUM_VLM_LAYERS=16 +PAD_LANG_TO=longest +EVAL_CKPT=/raid/jade/models/smolvlamust +ADD_IMAGE_TOKENS=true +ATTN_MODE=cross_attn +STATE_TO_PREFIX=true +CAUSAL_ACTION_ATTENTION_MASK=true +SELF_ATTN_EVERY_N_LAYERS=2 +VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr6e-5bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm22b_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse_compilefalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-2.2B-Instruct + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr9e-5bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm22b_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse_compilefalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-2.2B-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr8e-5bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm22b_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse_compilefalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-2.2B-Instruct + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm22b_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse_compilefalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-2.2B-Instruct + + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr7e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm1250_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-256M-Video-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr1e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm2250_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse_compilefalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-256M-Video-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr3e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm2250_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse_compilefalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-256M-Video-Instruct + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr5e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm2250_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse_compilefalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-256M-Video-Instruct + + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr7e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm2250_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse_compilefalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-256M-Video-Instruct + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr6e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm2250_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse_compilefalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-256M-Video-Instruct + +# CAUSAL_ATTENTION_ON_HISTORY=true +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=$WORK/logs/lerobot/lerobot_physical_intelligence_libero_smolpi0_lr4e-4bs32steps100000gpus2freeze32_cross_attn_vlml16_sa2_smolvlm2250_chunk50_nobs1_expw0.75_lrvlm1e-4_longest_pt_trans0false_decaylr2.5e-630000_relactfalsefirst_camfalse_vim-1_regfalse_compilefalse/checkpoints/best/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM2-256M-Video-Instruct + + +# # TASK=libero_spatial +# MULTITASK_EVAL=false +# N_EPISODES=50 + +# TASK=libero_spatial,libero_object,libero_goal,libero_10 +MULTITASK_EVAL=true +# N_EPISODES=5 +N_EPISODES=1 + +# MAX_PARRALLEL_TASKS=5 +# MAX_PARRALLEL_TASKS=2 +MAX_PARRALLEL_TASKS=1 + +# NUM_EVALS=2 +# SEEDS=(1000 5000) +SEEDS=(5000) +# ACTION_STEPS_LIST=(1 10 30 50) +ACTION_STEPS_LIST=(1) +# ACTION_STEPS_LIST=(50) +TASK_LIST=(libero_spatial libero_object libero_goal libero_10) +TASK_LIST=(libero_spatial) +for SEED in "${SEEDS[@]}"; do + for N_ACTION_STEPS in "${ACTION_STEPS_LIST[@]}"; do + for TASK in "${TASK_LIST[@]}"; do + echo "$TASK Evaluating: $EVAL_CKPT | N_ACTION_STEPS=$N_ACTION_STEPS | EVAL SEED=$SEED" + python src/lerobot/scripts/eval.py \ + --output_dir=/raid/jade/logs/lerobot/tmp \ + --env.type=$ENV \ + --env.task=$TASK \ + --eval.batch_size=$N_EPISODES \ + --eval.n_episodes=$N_EPISODES \ + --seed=$SEED \ + --policy.use_amp=false \ + --policy.path=$EVAL_CKPT \ + --policy.n_action_steps=$N_ACTION_STEPS \ + --policy.checkpoint_path=$EVAL_CKPT \ + --env.multitask_eval=$MULTITASK_EVAL --env.max_parallel_tasks=$MAX_PARRALLEL_TASKS \ + --policy.add_image_special_tokens=$ADD_IMAGE_TOKENS \ + --policy.attention_mode=$ATTN_MODE \ + --policy.causal_action_attention_mask=$CAUSAL_ACTION_ATTENTION_MASK \ + --policy.state_to_prefix=$STATE_TO_PREFIX \ + --policy.self_attn_every_n_layers=$SELF_ATTN_EVERY_N_LAYERS \ + --policy.pad_language_to=$PAD_LANG_TO \ + --policy.load_vlm_weights=$LOAD_VLM_WEIGHTS \ + --policy.vlm_model_name=$VLM_NAME \ + --policy.num_vlm_layers=$NUM_VLM_LAYERS \ + --policy.chunk_size=$CHUNK_SIZE \ + --policy.n_obs_steps=$N_OBS_STEPS \ + --policy.past_obs_keys=$PAST_OBS_KEYS \ + --policy.num_expert_layers=$NUM_EXPERT_LAYERS \ + --policy.expert_width_multiplier=$EXPERT_WIDTH_MULTIPLIER \ + --policy.peft_method=$PEFT_METHOD \ + --policy.self_attn_only_actions=$SELF_ATTN_ONLY_ACTIONS \ + --policy.causal_attention_on_history=$CAUSAL_ATTENTION_ON_HISTORY \ + --policy.predict_relative_actions=$PREDICT_RELATIVE_ACTIONS --policy.relative_actions_mode=$RELATIVE_ACTIONS_MODE --policy.shuffle_camera_positions=$SHUFFLE_CAMERA_POSITIONS \ + --policy.vlm_img_size=$VLM_IMG_SIZE \ + --policy.regression_loss=$REGRESSION_LOSS + # --policy.peft_config.r=$LORA_R --policy.peft_config.target_modules=$LORA_TARGET_MODULES --policy.peft_method=$PEFT_METHOD --policy.peft_target_model=$PEFT_TARGET_MODEL + + echo "Done with: $EVAL_CKPT | Steps=$N_ACTION_STEPS | EVAL SEED=$SEED" + echo "------------------------------------------------------" + done + done +done + + +# ############################################################################################################################################ +# ############################################################################################################################################ +# ############################################################################################################################################ +# ########### Offline eval + + +# # ############################ +# # # Community datasets V1 +# # # REPO_ID=pranavsaroha/so100_legos4,pranavsaroha/so100_onelego2,jpata/so100_pick_place_tangerine,pranavsaroha/so100_onelego3,pranavsaroha/so100_carrot_2,pranavsaroha/so100_carrot_5,pandaRQ/pick_med_1,HITHY/so100_strawberry,vladfatu/so100_above,koenvanwijk/orange50-1,koenvanwijk/orange50-variation-2,FeiYjf/new_GtoR,CSCSXX/pick_place_cube_1.18,vladfatu/so100_office,dragon-95/so100_sorting,dragon-95/so100_sorting_1,nbaron99/so100_pick_and_place4,Beegbrain/pick_place_green_block,Ityl/so100_recording2,dragon-95/so100_sorting_2,dragon-95/so100_sorting_3,aractingi/push_cube_offline_data,HITHY/so100_peach3,HITHY/so100_peach4,shreyasgite/so100_legocube_50,shreyasgite/so100_base_env,triton7777/so100_dataset_mix,Deason11/Open_the_drawer_to_place_items,Deason11/PLACE_TAPE_PUSH_DRAWER,NONHUMAN-RESEARCH/SOARM100_TASK_VENDA,mikechambers/block_cup_14,samsam0510/tooth_extraction_3,samsam0510/tooth_extraction_4,samsam0510/cube_reorientation_2,samsam0510/cube_reorientation_4,samsam0510/glove_reorientation_1,DorayakiLin/so100_pick_charger_on_tissue,zijian2022/noticehuman3,liuhuanjim013/so100_th +# # # Inconsistent actions dim: Deason11/Open_the_drawer_to_place_items, Deason11/PLACE_TAPE_PUSH_DRAWER +# # # Filtered datasets +# # REPO_ID=pranavsaroha/so100_onelego2,pranavsaroha/so100_onelego3,pranavsaroha/so100_carrot_2,vladfatu/so100_above,koenvanwijk/orange50-1,CSCSXX/pick_place_cube_1.18,dragon-95/so100_sorting,dragon-95/so100_sorting_1,nbaron99/so100_pick_and_place4,Beegbrain/pick_place_green_block,dragon-95/so100_sorting_3,HITHY/so100_peach3,shreyasgite/so100_legocube_50,triton7777/so100_dataset_mix,NONHUMAN-RESEARCH/SOARM100_TASK_VENDA,mikechambers/block_cup_14,samsam0510/tooth_extraction_3,samsam0510/tooth_extraction_4,samsam0510/cube_reorientation_2,samsam0510/cube_reorientation_4,samsam0510/glove_reorientation_1,vladfatu/so100_office,pranavsaroha/so100_legos4,Ityl/so100_recording2,FeiYjf/new_GtoR,dragon-95/so100_sorting_2,HITHY/so100_peach4,jpata/so100_pick_place_tangerine,HITHY/so100_strawberry,shreyasgite/so100_base_env,koenvanwijk/orange50-variation-2,pranavsaroha/so100_carrot_5,pandaRQ/pick_med_1,aractingi/push_cube_offline_data,DorayakiLin/so100_pick_charger_on_tissue,zijian2022/noticehuman3,liuhuanjim013/so100_th +# # SAMPLING_WEIGHTS= +# # DATASET_NAME=so100_community_v1 + + +# # # Community datasets V2 +# # # Inconsistent actions: 1g0rrr/sam_openpi_solder1, 1g0rrr/sam_openpi03, 1g0rrr/sam_openpi_solder2 +# # # Other issues: pierfabre/rabbit bensprenger/right_arm_p_brick_in_box_with_y_noise_v0 pierfabre/horse pierfabre/pig2 pierfabre/pig3 pierfabre/cow2,pierfabre/sheep +# # # REPO_ID=Chojins/chess_game_009_white,sihyun77/suho_3_17_1,sihyun77/sihyun_3_17_2,sihyun77/suho_3_17_3,sihyun77/sihyun_3_17_5,Odog16/so100_cube_drop_pick_v1,sihyun77/sihyun_main_2,sihyun77/suho_main_2,Bartm3/dice2,sihyun77/sihyun_main_3,Loki0929/so100_duck,pietroom/holdthis,pietroom/actualeasytask,Beegbrain/pick_lemon_and_drop_in_bowl,Beegbrain/sweep_tissue_cube,zijian2022/321,gxy1111/so100_pick_place,Odog16/so100_cube_stacking_v1,sihyun77/mond_1,andlyu/so100_indoor_1,andlyu/so100_indoor_3,frk2/so100large,lirislab/sweep_tissue_cube,lirislab/lemon_into_bowl,lirislab/red_cube_into_green_lego_block,lirislab/red_cube_into_blue_cube,00ri/so100_battery,frk2/so100largediffcam,FsqZ/so100_1,ZGGZZG/so100_drop0,Chojins/chess_game_000_white_red,smanni/train_so100_fluffy_box,ganker5/so100_push_20250328,ganker5/so100_dataline_0328,ganker5/so100_color_0328,CrazyYhang/A1234-B-C_mvA2B,RasmusP/so100_Orange2Green,sixpigs1/so100_pick_cube_in_box,ganker5/so100_push_20250331,ganker5/so100_dataline_20250331,lirislab/put_caps_into_teabox,lirislab/close_top_drawer_teabox,lirislab/open_top_drawer_teabox,lirislab/unfold_bottom_right,lirislab/push_cup_target,lirislab/put_banana_bowl,Chojins/chess_game_001_blue_stereo,Chojins/chess_game_001_red_stereo,ganker5/so100_toy_20250402,Gano007/so100_medic,00ri/so100_battery_bin_center,paszea/so100_whale_2,lirislab/fold_bottom_right,lirislab/put_coffee_cap_teabox,therarelab/so100_pick_place_2,paszea/so100_whale_3,paszea/so100_whale_4,paszea/so100_lego,LemonadeDai/so100_coca,zijian2022/backgrounda,zijian2022/backgroundb,356c/so100_nut_sort_1,Mwuqiu/so100_0408_muti,aimihat/so100_tape,lirislab/so100_demo,356c/so100_duck_reposition_1,zijian2022/sort1,weiye11/so100_410_zwy,VoicAndrei/so100_banana_to_plate_only,sixpigs1/so100_stack_cube_error,isadev/bougies3,zijian2022/close3,bensprenger/left_arm_yellow_brick_in_box_v0,lirislab/guess_who_so100,bensprenger/left_arm_yellow_brick_in_box_with_purple_noise_v0,roboticshack/team16-can-stacking,zijian2022/insert2,roboticshack/team-7-right-arm-grasp-tape,Jiangeng/so100_413,roboticshack/team9-pick_cube_place_static_plate,AndrejOrsula/lerobot_double_ball_stacking_random,roboticshack/left-arm-grasp-lego-brick,roboticshack/team-7-left-arm-grasp-motor,roboticshack/team9-pick_chicken_place_plate,roboticshack/team13-two-balls-stacking,tkc79/so100_lego_box_1,roboticshack/team13-three-balls-stacking,pierfabre/chicken,roboticshack/team16-water-pouring,ad330/cubePlace,Jiafei1224/so100_pa222per,paszea/so100_lego_2cam,bensprenger/chess_game_001_blue_stereo,Mohamedal/put_banana,tkc79/so100_lego_box_2,samanthalhy/so100_herding_1,jlesein/TestBoulon7 +# # REPO_ID=pierfabre/rabbit,bensprenger/right_arm_p_brick_in_box_with_y_noise_v0,pierfabre/horse,pierfabre/pig2,pierfabre/pig3,pierfabre/cow2,pierfabre/sheep,Chojins/chess_game_009_white,sihyun77/suho_3_17_1,sihyun77/sihyun_3_17_2,sihyun77/suho_3_17_3,sihyun77/sihyun_3_17_5,Odog16/so100_cube_drop_pick_v1,sihyun77/sihyun_main_2,sihyun77/suho_main_2,Bartm3/dice2,sihyun77/sihyun_main_3,Loki0929/so100_duck,pietroom/holdthis,pietroom/actualeasytask,Beegbrain/pick_lemon_and_drop_in_bowl,Beegbrain/sweep_tissue_cube,zijian2022/321,gxy1111/so100_pick_place,Odog16/so100_cube_stacking_v1,sihyun77/mond_1,andlyu/so100_indoor_1,andlyu/so100_indoor_3,frk2/so100large,lirislab/sweep_tissue_cube,lirislab/lemon_into_bowl,lirislab/red_cube_into_green_lego_block,lirislab/red_cube_into_blue_cube,00ri/so100_battery,frk2/so100largediffcam,FsqZ/so100_1,ZGGZZG/so100_drop0,Chojins/chess_game_000_white_red,smanni/train_so100_fluffy_box,ganker5/so100_push_20250328,ganker5/so100_dataline_0328,ganker5/so100_color_0328,CrazyYhang/A1234-B-C_mvA2B,RasmusP/so100_Orange2Green,sixpigs1/so100_pick_cube_in_box,ganker5/so100_push_20250331,ganker5/so100_dataline_20250331,lirislab/put_caps_into_teabox,lirislab/close_top_drawer_teabox,lirislab/open_top_drawer_teabox,lirislab/unfold_bottom_right,lirislab/push_cup_target,lirislab/put_banana_bowl,Chojins/chess_game_001_blue_stereo,Chojins/chess_game_001_red_stereo,ganker5/so100_toy_20250402,Gano007/so100_medic,00ri/so100_battery_bin_center,paszea/so100_whale_2,lirislab/fold_bottom_right,lirislab/put_coffee_cap_teabox,therarelab/so100_pick_place_2,paszea/so100_whale_3,paszea/so100_whale_4,paszea/so100_lego,LemonadeDai/so100_coca,zijian2022/backgrounda,zijian2022/backgroundb,356c/so100_nut_sort_1,Mwuqiu/so100_0408_muti,aimihat/so100_tape,lirislab/so100_demo,356c/so100_duck_reposition_1,zijian2022/sort1,weiye11/so100_410_zwy,VoicAndrei/so100_banana_to_plate_only,sixpigs1/so100_stack_cube_error,isadev/bougies3,zijian2022/close3,bensprenger/left_arm_yellow_brick_in_box_v0,lirislab/guess_who_so100,bensprenger/left_arm_yellow_brick_in_box_with_purple_noise_v0,roboticshack/team16-can-stacking,zijian2022/insert2,roboticshack/team-7-right-arm-grasp-tape,Jiangeng/so100_413,roboticshack/team9-pick_cube_place_static_plate,AndrejOrsula/lerobot_double_ball_stacking_random,roboticshack/left-arm-grasp-lego-brick,roboticshack/team-7-left-arm-grasp-motor,roboticshack/team9-pick_chicken_place_plate,roboticshack/team13-two-balls-stacking,tkc79/so100_lego_box_1,roboticshack/team13-three-balls-stacking,pierfabre/chicken,roboticshack/team16-water-pouring,ad330/cubePlace,Jiafei1224/so100_pa222per,paszea/so100_lego_2cam,bensprenger/chess_game_001_blue_stereo,Mohamedal/put_banana,tkc79/so100_lego_box_2,samanthalhy/so100_herding_1,jlesein/TestBoulon7 +# # SAMPLING_WEIGHTS= +# # DATASET_NAME=so100_community_v2 + +# # Community datasets V1+V2 +# # REPO_ID=pierfabre/rabbit,bensprenger/right_arm_p_brick_in_box_with_y_noise_v0,pierfabre/horse,pierfabre/pig2,pierfabre/pig3,pierfabre/cow2,pierfabre/sheep,Chojins/chess_game_009_white,sihyun77/suho_3_17_1,sihyun77/sihyun_3_17_2,sihyun77/suho_3_17_3,sihyun77/sihyun_3_17_5,Odog16/so100_cube_drop_pick_v1,sihyun77/sihyun_main_2,sihyun77/suho_main_2,Bartm3/dice2,sihyun77/sihyun_main_3,Loki0929/so100_duck,pietroom/holdthis,pietroom/actualeasytask,Beegbrain/pick_lemon_and_drop_in_bowl,Beegbrain/sweep_tissue_cube,zijian2022/321,gxy1111/so100_pick_place,Odog16/so100_cube_stacking_v1,sihyun77/mond_1,andlyu/so100_indoor_1,andlyu/so100_indoor_3,frk2/so100large,lirislab/sweep_tissue_cube,lirislab/lemon_into_bowl,lirislab/red_cube_into_green_lego_block,lirislab/red_cube_into_blue_cube,00ri/so100_battery,frk2/so100largediffcam,FsqZ/so100_1,ZGGZZG/so100_drop0,Chojins/chess_game_000_white_red,smanni/train_so100_fluffy_box,ganker5/so100_push_20250328,ganker5/so100_dataline_0328,ganker5/so100_color_0328,CrazyYhang/A1234-B-C_mvA2B,RasmusP/so100_Orange2Green,sixpigs1/so100_pick_cube_in_box,ganker5/so100_push_20250331,ganker5/so100_dataline_20250331,lirislab/put_caps_into_teabox,lirislab/close_top_drawer_teabox,lirislab/open_top_drawer_teabox,lirislab/unfold_bottom_right,lirislab/push_cup_target,lirislab/put_banana_bowl,Chojins/chess_game_001_blue_stereo,Chojins/chess_game_001_red_stereo,ganker5/so100_toy_20250402,Gano007/so100_medic,00ri/so100_battery_bin_center,paszea/so100_whale_2,lirislab/fold_bottom_right,lirislab/put_coffee_cap_teabox,therarelab/so100_pick_place_2,paszea/so100_whale_3,paszea/so100_whale_4,paszea/so100_lego,LemonadeDai/so100_coca,zijian2022/backgrounda,zijian2022/backgroundb,356c/so100_nut_sort_1,Mwuqiu/so100_0408_muti,aimihat/so100_tape,lirislab/so100_demo,356c/so100_duck_reposition_1,zijian2022/sort1,weiye11/so100_410_zwy,VoicAndrei/so100_banana_to_plate_only,sixpigs1/so100_stack_cube_error,isadev/bougies3,zijian2022/close3,bensprenger/left_arm_yellow_brick_in_box_v0,lirislab/guess_who_so100,bensprenger/left_arm_yellow_brick_in_box_with_purple_noise_v0,roboticshack/team16-can-stacking,zijian2022/insert2,roboticshack/team-7-right-arm-grasp-tape,Jiangeng/so100_413,roboticshack/team9-pick_cube_place_static_plate,AndrejOrsula/lerobot_double_ball_stacking_random,roboticshack/left-arm-grasp-lego-brick,roboticshack/team-7-left-arm-grasp-motor,roboticshack/team9-pick_chicken_place_plate,roboticshack/team13-two-balls-stacking,tkc79/so100_lego_box_1,roboticshack/team13-three-balls-stacking,pierfabre/chicken,roboticshack/team16-water-pouring,ad330/cubePlace,Jiafei1224/so100_pa222per,paszea/so100_lego_2cam,bensprenger/chess_game_001_blue_stereo,Mohamedal/put_banana,tkc79/so100_lego_box_2,samanthalhy/so100_herding_1,jlesein/TestBoulon7,pranavsaroha/so100_onelego2,pranavsaroha/so100_onelego3,pranavsaroha/so100_carrot_2,vladfatu/so100_above,koenvanwijk/orange50-1,CSCSXX/pick_place_cube_1.18,dragon-95/so100_sorting,dragon-95/so100_sorting_1,nbaron99/so100_pick_and_place4,Beegbrain/pick_place_green_block,dragon-95/so100_sorting_3,HITHY/so100_peach3,shreyasgite/so100_legocube_50,triton7777/so100_dataset_mix,NONHUMAN-RESEARCH/SOARM100_TASK_VENDA,mikechambers/block_cup_14,samsam0510/tooth_extraction_3,samsam0510/tooth_extraction_4,samsam0510/cube_reorientation_2,samsam0510/cube_reorientation_4,samsam0510/glove_reorientation_1,vladfatu/so100_office,pranavsaroha/so100_legos4,Ityl/so100_recording2,FeiYjf/new_GtoR,dragon-95/so100_sorting_2,HITHY/so100_peach4,jpata/so100_pick_place_tangerine,HITHY/so100_strawberry,shreyasgite/so100_base_env,koenvanwijk/orange50-variation-2,pranavsaroha/so100_carrot_5,pandaRQ/pick_med_1,aractingi/push_cube_offline_data,DorayakiLin/so100_pick_charger_on_tissue,zijian2022/noticehuman3,liuhuanjim013/so100_th +# REPO_ID=pierfabre/rabbit,bensprenger/right_arm_p_brick_in_box_with_y_noise_v0,pierfabre/horse,pierfabre/pig2 +# SAMPLING_WEIGHTS= + +# # # Community V3 +# # # issues, yskim2025/unitylerobot (version), cranberrysoft/so100 (don't exist),29 datasets different actions: nguyen-v/so100_rotate_red_button satvikahuja/mixer_on_off_new_1 ... +# # REPO_ID=satvikahuja/mixer_on_off_new_1,aergogo/so100_pick_place,andy309/so100_0314_fold_cloths,jchun/so100_pickplace_small_20250323_120056,astroyat/cube,Ofiroz91/so_100_cube2bowl,HappyPablo/dec3_data2,ZCM5115/so100_1210,francescocrivelli/orange_feeding,francescocrivelli/carrot_eating,0x00raghu/toffee_red,0x00raghu/toffee_red_2,0x00raghu/toffee_red_3__,0x00raghu/toffee_blue,0x00raghu/toffee_blue_2,0x00raghu/toffee_to_hand_1,0x00raghu/toffee_to_hand_2,liyitenga/so100_bi_hello,liyitenga/so100_bi_giveme5,ZCM5115/so100_2Arm3cameras_movebox,pranavsaroha/so100_carrot_1,pranavsaroha/so100_carrot_3,pranavsaroha/so100_carrot_4,maximilienroberti/so100_lego_red_box,pranavsaroha/so100_squishy,rabhishek100/so100_train_dataset,pranavsaroha/so100_squishy100,swarajgosavi/kikobot_pusht_real_v2,pandaRQ/pickmed,swarajgosavi/act_kikobot_pusht_real,pranavsaroha/so100_squishy2colors,pranavsaroha/so100_squishy2colors_1,Chojins/chess_game_001_white,jmrog/so100_sweet_pick,Chojins/chess_game_002_white,pranavsaroha/so100_squishy2colors_2_new,Chojins/chess_game_003_white,aractingi/pick_place_lego_cube,Chojins/chess_game_004_white,Chojins/chess_game_005_white,Chojins/chess_game_006_white,Chojins/chess_game_007_white,koenvanwijk/blue2,jlitch/so100multicam3,koenvanwijk/blue52,jlitch/so100multicam6,aractingi/pick_place_lego_cube_1,jlitch/so100multicam7,vladfatu/so100_ds,Chojins/chess_game_000_white,HITHY/so100-kiwi,HITHY/so100_peach1,HITHY/so100_redstrawberry,satvikahuja/orange_mixer_1,satvikahuja/mixer_on_off,satvikahuja/orange_pick_place_new1,satvikahuja/mixer_on_off_new,danmac1/real_real332,FeiYjf/Makalu_push,liyitenga/so100_pick_taffy1,chmadran/so100_dataset04,FeiYjf/Maklu_dataset,FeiYjf/new_Dataset,liyitenga/so100_pick_taffy2,satvikahuja/mixer_on_off_new_4,CSCSXX/pick_place_cube_1.17,liyitenga/so100_pick_taffy3,liyitenga/so100_pick_taffy4,yuz1wan/so100_pick_pink,yuz1wan/so100_pick_wahaha,yuz1wan/so100_pp_pink,yuz1wan/so100_pour_cup,liyitenga/so100_pick_taffy5,liyitenga/so100_pick_taffy6,yuz1wan/so100_button,yuz1wan/so100_pickplace,liyitenga/so100_pick_taffy7,FeiYjf/push_gg,FeiYjf/push_0094,swarajgosavi/act_kikobot_block_real,liyitenga/so100_pick_taffy8,phospho-ai/OrangeBrick3Cameras,vaishanthr/toy_pick_place,SeanLMH/so100_picknplace_v2,pepijn223/yellow_lego_in_box1,DimiSch/so100_50ep_2,DimiSch/so100_50ep_3,SeanLMH/so100_picknplace,nbaron99/so100_pick_and_place2,chmadran/so100_dataset08,vaishanthr/toy_pickplace_50ep,Beegbrain/pick_place_green_block_lr,Ityl/so100_recording1,vaishanthr/toy_pickplace,ad330/so100_box_pickPlace,Beegbrain/so100_put_cube_cup,aractingi/push_green_cube_hf,aractingi/push_green_cube_hf_cropped_resized,carpit680/giraffe_task,carpit680/giraffe_sock_demo_1,DimiSch/so100_terra_50_2,carpit680/giraffe_sock_demo_2,aractingi/push_cube_to_face_reward,aractingi/push_cube_to_face_reward_cropped_resized,aractingi/push_cube_reward_data,aractingi/push_cube_reward_data_cropped_resized,aractingi/push_cube_offline_data_cropped_resized,aractingi/push_cube_front_side_reward,aractingi/push_cube_front_side_reward_cropped_resized,aractingi/push_cube_front_side_reward_long,aractingi/push_cube_front_side_reward_long_cropped_resized,aractingi/push_cube_reward,aractingi/push_cube_reward_cropped_resized,aractingi/push_cube_square_reward_cropped_resized,aractingi/push_cube_square_reward_1,aractingi/push_cube_square_reward_1_cropped_resized,aractingi/push_cube_square_light_reward,aractingi/push_cube_square_light_offline_demo,aractingi/push_cube_square_light_offline_demo_cropped_resized,denghj/dataset_red_tape01,aractingi/push_cube_square_offline_demo,aractingi/push_cube_square_offline_demo_cropped_resized,Beegbrain/stack_two_cubes,FeiYjf/Test_NNNN,LegrandFrederic/Orange-brick-lower-resolution,aractingi/pick_place_lego_cube_cropped_resized,aractingi/push_cube_overfit,aractingi/push_cube_overfit_cropped_resized,HITHY/so100_peach,zaringleb/so100_cube_2,andreasBihlmaier/dual_arm_transfer_2025_02_16,zaringleb/so100_cube_4_binary,1g0rrr/reward_pickplace1,1g0rrr/reward_pickplace1_cropped_resized,FeiYjf/Hold_Pieces,FeiYjf/Grab_Pieces,hegdearyandev/so100_eraser_cup_v1,jbraumann/so100_1902,liyitenga/so100_pick_taffy10,mikechambers/block_cup_5,zaringleb/so100_cube_5_linear,yuz1wan/so100_pickplace_0223_2,yuz1wan/so100_pickplace_0223_3,samsam0510/mj_data_temp,samsam0510/tape_insert_1,samsam0510/tape_insert_2,pengjunkun/so100_push_to_hole,Deason11/Random_Kitchen,1g0rrr/reward_dataset_name2,1g0rrr/reward_dataset_name2_cropped_resized,1g0rrr/offline_dataset_name2,1g0rrr/offline_dataset_name2_cropped_resized,aractingi/push_cube_simp_cropped_resized,danielkr452/so100_work6,Loki0929/so100_100,yuz1wan/so100_fold_0227_1,yuz1wan/so100_fold_0227_2,speedyyoshi/so100_grasp_pink_block,lirislab/stack_two_red_cubes,lirislab/red_cube_into_mug,lirislab/green_lego_block_into_mug,lirislab/green_lego_block_into_mug_easy,kevin510/lerobot-cat-toy-placement,NONHUMAN-RESEARCH/SOARM100_TASK_VENDA_BOX,wangjl1512/pour_water,airthebear/so100_GL,zijian2022/noticehuman1,zijian2022/noticehuman2,kantine/so100_kapla_tower6,zijian2022/noticehuman5,zijian2022/llm40,Ashton3/lerobot-aloha,zijian2022/noticehuman50,AaronNewman/screwdriver_task_batch1,AaronNewman/screwdriver_task_batch2,AaronNewman/screwdriver_task_batch3,zijian2022/noticehuman60,zijian2022/noticehuman70,Bartm3/tape_to_bin,liuhuanjim013/so100_th_1,Pi-robot/barbecue_flip,Pi-robot/barbecue_put,wangjl1512/doll,sshh11/so100_orange_50ep_1,sshh11/so100_orange_50ep_2,DorayakiLin/so100_pick_cube_in_box,Bartm3/tape_to_bin2,luke250305/play_dice_250311.1,andy309/so100_0311_1152,sihyun77/suho_so100,sihyun77/si_so100,shreyasgite/so100_base_left,sihyun77/suho_red,liuhuanjim013/so100_block,andy309/so100_0313_no_wrist_camera,zijian2022/l9,zijian2022/n1_2,DorayakiLin/so100_stack_cube,andy309/so100_0313_no_wrist_camera_with_two_arms_cloths,joaoocruz00/so100_makeitD1,zijian2022/l10_1,zijian2022/l10_5,sihyun77/suho_red2,sihyun77/suho_angel,sihyun77/sihyun_king,acrampette/third_arm_01,Winster/so100_cube,1g0rrr/sam_openpi03,thedevansh/mar16_1336,hkphoooey/throw_stuffie,doujiangwang/task1_10epi_100000step,sihyun77/sihyun_3_17_1,acrampette/third_arm_02,imsyed00/so100_yellowbowl_pickplace_1,kumarhans/so100_tape_task,sihyun77/sihyun_main,doujiangwang/task2_10epi_100000step,kantine/industrial_robothon_buttons_expert,kantine/industrial_robothon_buttons_anomaly,kantine/industrial_robothon_hatchAndProbe_expert,kantine/industrial_robothon_hatchAndProbe_anomaly,Odog16/so100_tea_towel_folding_v1,zijian2022/so100_318,zijian2022/so100_318_1,Congying1112/so100_place_blue_bottle_with_two_cameras,Congying1112/so100_place_blue_bottle_with_two_cameras2,Congying1112/so100_place_blue_bottle_with_single_camera,pietroom/first_task_short,kantine/industrial_screws_sorting_expert,kantine/industrial_screws_sorting_anomaly,pietroom/second_task,zijian2022/c0,doujiangwang/task4_10epi_100000step,Congying1112/so100_switch_with_onhand_camera,HYAIYN/so100_get_orange_10epi,doujiangwang/task5_10epi_100000step,1g0rrr/sam_openpi_cube_low10,1g0rrr/sam_openpi_cube_top10,1g0rrr/sam_openpi_wire10,1g0rrr/sam_openpi_solder1,1g0rrr/sam_openpi_solder2,wcode/so100_put_pen_50,jchun/so100_pickplace_small_20250322_193929,bnarin/so100_tic_tac_toe_we_do_it_live,dc2ac/so100-t5,chmadran/so100_home_dataset,baladhurgesh97/so100_final_picking_3,bnarin/so100_tic_tac_toe_move_0_0,bnarin/so100_tic_tac_toe_move_1_0,bnarin/so100_tic_tac_toe_move_2_1,bnarin/so100_tic_tac_toe_move_4_0,zaringleb/so100_cube_6_2d,andlyu/so100_indoor_0,andlyu/so100_indoor_2,Winster/so100_sim,badwolf256/so100_twin_cam_duck,Congying1112/so100_simplepick_with_2_cameras_from_top,andlyu/so100_indoor_4,Zak-Y/so100_grap_dataset,kantine/domotic_pouringCoffee_expert,kantine/domotic_pouringCoffee_anomaly,lucasngoo/so100_strawberry_grape,kantine/domotic_makingCoffee_expert,kantine/domotic_makingCoffee_anomaly,ZGGZZG/so100_drop1,kantine/industrial_soldering_expert,kantine/industrial_soldering_anomaly,Yotofu/so100_sweeper_shoes,kantine/domotic_dishTidyUp_expert,kantine/domotic_dishTidyUp_anomaly,kantine/domotic_groceriesSorting_expert,kantine/domotic_groceriesSorting_anomaly,badwolf256/so100_twin_cam_duck_v2,kantine/domotic_vegetagblesAndFruitsSorting_expert,kantine/domotic_vegetagblesAndFruitsSorting_anomaly,kantine/domotic_setTheTable_expert,kantine/domotic_setTheTable_anomaly,therarelab/so100_pick_place,abhisb/so100_51_ep,andlyu/so100_indoor_val_0,allenchienxxx/so100Test,lizi178119985/so100_jia,badwolf256/so100_twin_cam_duck_v3,andrewcole712/so100_tape_bin_place,Gano007/so100_lolo,Zak-Y/so100_three_cameras_dataset,Gano007/so100_doliprane,XXRRSSRR/so100_v3_num_episodes_50,zijian2022/assemblyarm2,ganker5/so100_action_20250403,andlyu/so100_indoor_val2,Gano007/so100_gano,paszea/so100_whale_grab,paszea/so100_whale,Clementppr/lerobot_pick_and_place_dataset_world_model,andlyu/so100_indoor_10,RasmusP/so100_dataset50ep_a,RasmusP/so100_dataset50ep,Gano007/so100_second,zaringleb/so100_cude_linear_and_2d_comb,dsfsg/grasp_pens,zijian2022/digitalfix,zijian2022/digitalfix2,zijian2022/digitalfix3,T1g3rGE/so100_pickplace_small_20250407_171912,sihyun77/mond_13,abokinala/sputnik_100_11_pick_place_container,dsfsg/bring_bottle,duthvik/sputnik_100_13_pick_place_container,abokinala/sputnik_100_12_pick_place_container,Mwuqiu/so100_0408,AK51/4090_01,356c/so100_rope_reposition_1,paszea/so100_lego_mix,abokinala/sputnik_100_14_pick_place_container,abokinala/sputnik_100_23_pick_place_surface,jiajun001/eraser00_2,jlesein/TestBoulon2,duthvik/sputnik_100_31_pour_liquid,duthvik/sputnik_100_24_pick_place_surface,duthvik/sputnik_100_25_pick_place_surface,duthvik/sputnik_100_17_pick_place_container,duthvik/sputnik_100_26_pick_place_surface,VoicAndrei/so100_banana_to_plate_rebel_full,isadev/bougies1,danaaubakirova/so100_task_1,danaaubakirova/so100_task_2,danaaubakirova/so100_task_3,danaaubakirova/so100_task_4,sixpigs1/so100_pick_cube_in_box_error,sixpigs1/so100_push_cube_error,sixpigs1/so100_pull_cube_error,isadev/bougies2,therarelab/med_dis_rare_6,duthvik/sputnik_100_27_pick_place_surface,zijian2022/closer3,duthvik/sputnik_100_41_custom_tasks,duthvik/sputnik_100_42_custom_tasks,duthvik/sputnik_100_43_custom_tasks,duthvik/sputnik_100_44_custom_tasks,duthvik/sputnik_100_51_kitchen_tasks,duthvik/sputnik_100_52_kitchen_tasks,duthvik/sputnik_100_53_kitchen_tasks,duthvik/sputnik_100_45_custom_tasks,duthvik/sputnik_100_32_pour_liquid,duthvik/sputnik_100_29_pick_place_surface,duthvik/sputnik_100_18_pick_place_container,sixpigs1/so100_pull_cube_by_tool_error,sixpigs1/so100_insert_cylinder_error,abokinala/sputnik_100_54_kitchen_tasks,abokinala/sputnik_100_55_kitchen_tasks,m1b/so100_bluelego,abokinala/sputnik_100_46_custom_tasks,m1b/so100_bluelego_updt,kantine/flip_A0,kantine/flip_A1,kantine/flip_A2,kantine/flip_A3,lirislab/guess_who_no_cond,kantine/flip_A4,kantine/flip_A5,lirislab/guess_who_lighting,nguyen-v/so100_press_red_button,nguyen-v/so100_bimanual_grab_lemon_put_in_box2,pierfabre/cow,nguyen-v/press_red_button_new,nguyen-v/so100_rotate_red_button,raghav-katta-1/lerobot2,Cidoyi/so100_all_notes,roboticshack/team10-red-block,Cidoyi/so100_all_notes_1,roboticshack/team_5-QuiEstCe_everyBox,roboticshack/team11_pianobot,roboticshack/team2-guess_who_so100,roboticshack/team2-guess_who_so100_light,roboticshack/team2-guess_who_so100_edge_case,roboticshack/team2-guess_who_less_ligth,Cidoyi/so100_all_notes_3,dsfsg/grasp_pen_and_bottle,abokinala/sputnik_100_60_kitchen_tasks,abokinala/sputnik_100_58_kitchen_tasks,danaaubakirova/so100_v2_task_1,danaaubakirova/so100_v2_task_2,danaaubakirova/so100_v2_task_3,danaaubakirova/so100_v2_task_4,zijian2022/force1,zijian2022/force2,zijian2022/force3,jiajun001/eraser00_3,zijian2022/bi2,zijian2022/bi1,zijian2022/hand1,Setchii/so100_grab_ball,MossProphet/so100_square-1-2-3.2 +# # SAMPLING_WEIGHTS= +# # DATASET_NAME=so100_community_v3 + +# ########################## + +# ROBOT=so100 +# export TOKENIZERS_PARALLELISM=false +# export MUJOCO_GL=egl + + + +# SAMPLING_WEIGHTS= +# FEATURES_VERSION=2 +# NUM_IMAGE_TRANSFORMS=10 +# TRAIN_ON_ALL_FEATURES=true +# NORM_PER_ROBOT=true +# USE_IMAGENET_STATS=false + +# MAX_STATE_DIM=6 +# MAX_ACTION_DIM=6 +# MAX_NUM_IMAGES=3 +# MAX_IMAGE_DIM=256 + + +# SEED=5000 +# BATCH_SIZE=32 +# # EVAL_STEPS=1000 +# EVAL_STEPS=100 + + + + + +# SELF_ATTN_ONLY_ACTIONS=false +# EXPERT_WIDTH_MULTIPLIER=0.75 +# PAST_OBS_KEYS="image" +# N_OBS_STEPS=1 +# NUM_EXPERT_LAYERS=0 +# CHUNK_SIZE=50 +# NUM_VLM_LAYERS=16 +# PAD_LANG_TO=longest +# EVAL_CKPT=/lustre/fswork/projects/rech/dyf/ugz83ue/logs/lerobot/lerobot_so100_community_v1_v2_smolpi0_lr1e-4bs64steps200000gpus4freeze32_imgtoktrue_cross_attn_gap1_localimgfalse_statetopreftrue_explay0_vlml16_causalacttrue_sa2_smolvlm2500_chunk50_nobs1_expw0.75_feat2_lrvlm1e-4_droptrue_max_length/checkpoints/080000/pretrained_model/ +# ADD_IMAGE_TOKENS=true +# ATTN_MODE=cross_attn +# STATE_TO_PREFIX=true +# CAUSAL_ACTION_ATTENTION_MASK=true +# SELF_ATTN_EVERY_N_LAYERS=2 +# VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct + + +# python lerobot/scripts/offline_inference.py \ +# --output_dir=$WORK/logs/lerobot/tmp \ +# --batch_size=$BATCH_SIZE \ +# --seed=$SEED \ +# --eval_steps=$EVAL_STEPS \ +# --use_amp=false \ +# --device=cuda \ +# --dataset.repo_id=$REPO_ID --dataset.local_files_only=true --dataset.sampling_weights=$SAMPLING_WEIGHTS --dataset.use_imagenet_stats=$USE_IMAGENET_STATS --policy.normalize_per_robot_type=$NORM_PER_ROBOT \ +# --dataset.image_transforms.max_num_transforms=$NUM_IMAGE_TRANSFORMS --dataset.image_transforms.enable=true --dataset.train_on_all_features=$TRAIN_ON_ALL_FEATURES \ +# --dataset.max_action_dim=$MAX_ACTION_DIM --dataset.max_state_dim=$MAX_STATE_DIM --dataset.max_num_images=$MAX_NUM_IMAGES --dataset.max_image_dim=$MAX_IMAGE_DIM --dataset.features_version=$FEATURES_VERSION \ +# --policy.type=$POLICY \ +# --policy.checkpoint_path=$EVAL_CKPT \ +# --policy.checkpoint_keys_mapping=$CKPT_KEYS_MAPPING \ +# --policy.add_image_special_tokens=$ADD_IMAGE_TOKENS \ +# --policy.attention_mode=$ATTN_MODE \ +# --policy.causal_action_attention_mask=$CAUSAL_ACTION_ATTENTION_MASK \ +# --policy.state_to_prefix=$STATE_TO_PREFIX \ +# --policy.self_attn_every_n_layers=$SELF_ATTN_EVERY_N_LAYERS \ +# --policy.vlm_model_name=$VLM_NAME \ +# --policy.pad_language_to=$PAD_LANG_TO \ +# --policy.load_vlm_weights=$LOAD_VLM_WEIGHTS \ +# --policy.num_vlm_layers=$NUM_VLM_LAYERS \ +# --policy.chunk_size=$CHUNK_SIZE \ +# --policy.n_obs_steps=$N_OBS_STEPS \ +# --policy.past_obs_keys=$PAST_OBS_KEYS \ +# --policy.num_expert_layers=$NUM_EXPERT_LAYERS \ +# --policy.expert_width_multiplier=$EXPERT_WIDTH_MULTIPLIER \ +# --policy.peft_method=$PEFT_METHOD \ +# --policy.self_attn_only_actions=$SELF_ATTN_ONLY_ACTIONS \ +# --policy.robot_type=$ROBOT + + + + +# MULTITASK_EVAL=true +# N_EPISODES=5 +# MAX_PARRALLEL_TASKS=1 +# ACTION_STEPS_LIST=(1) +# TASK_LIST=(libero_10) +# for N_ACTION_STEPS in "${ACTION_STEPS_LIST[@]}"; do +# for TASK in "${TASK_LIST[@]}"; do +# echo "$TASK Evaluating: $EVAL_CKPT | N_ACTION_STEPS=$N_ACTION_STEPS" +# python lerobot/scripts/eval.py \ +# --output_dir=$WORK/logs/lerobot/tmp \ +# --env.type=$ENV \ +# --env.task=$TASK \ +# --eval.batch_size=$N_EPISODES \ +# --eval.n_episodes=$N_EPISODES \ +# --use_amp=false \ +# --device=cuda \ +# --policy.n_action_steps=$N_ACTION_STEPS \ +# --policy.type=$POLICY \ +# --policy.checkpoint_path=$EVAL_CKPT \ +# --policy.checkpoint_keys_mapping=$CKPT_KEYS_MAPPING \ +# --env.multitask_eval=$MULTITASK_EVAL --env.max_parallel_tasks=$MAX_PARRALLEL_TASKS \ +# --policy.add_image_special_tokens=$ADD_IMAGE_TOKENS \ +# --policy.attention_mode=$ATTN_MODE \ +# --policy.causal_action_attention_mask=$CAUSAL_ACTION_ATTENTION_MASK \ +# --policy.state_to_prefix=$STATE_TO_PREFIX \ +# --policy.self_attn_every_n_layers=$SELF_ATTN_EVERY_N_LAYERS \ +# --policy.vlm_model_name=$VLM_NAME \ +# --policy.load_vlm_weights=$LOAD_VLM_WEIGHTS \ +# --policy.num_vlm_layers=$NUM_VLM_LAYERS \ +# --policy.chunk_size=$CHUNK_SIZE + +# echo "Done with: $EVAL_CKPT | Steps=$N_ACTION_STEPS" +# echo "------------------------------------------------------" +# done +# done + diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py index 5c648de70..e965446db 100644 --- a/src/lerobot/envs/configs.py +++ b/src/lerobot/envs/configs.py @@ -320,8 +320,6 @@ class LiberoEnv(EnvConfig): @property def gym_kwargs(self) -> dict: return { - # "task": self.task, "obs_type": self.obs_type, "render_mode": self.render_mode, - # "max_episode_steps": self.episode_length, } diff --git a/src/lerobot/envs/factory.py b/src/lerobot/envs/factory.py index be49a9990..211b41714 100644 --- a/src/lerobot/envs/factory.py +++ b/src/lerobot/envs/factory.py @@ -56,37 +56,36 @@ def make_env( names to indexed vectorized environments (when multitask eval is used). """ - if n_envs < 1: - raise ValueError("`n_envs must be at least 1") + if n_envs < 1: + raise ValueError("`n_envs` must be at least 1") - # batched version of the env that returns an observation of shape (b, c) - env_cls = gym.vector.AsyncVectorEnv if use_async_envs else gym.vector.SyncVectorEnv + env_cls = gym.vector.AsyncVectorEnv if use_async_envs else gym.vector.SyncVectorEnv - if "libero" in cfg.type: - from lerobot.envs.libero import create_libero_envs + + if "libero" in cfg.type: + from lerobot.envs.libero import create_libero_envs + return create_libero_envs( + task=cfg.task, + n_envs=n_envs, + camera_name=cfg.camera_name, + init_states=cfg.init_states, + gym_kwargs=cfg.gym_kwargs, + env_cls=env_cls, + multitask_eval=cfg.multitask_eval, + ) - env = create_libero_envs( - task=cfg.task, - n_envs=n_envs, - camera_name=cfg.camera_name, - init_states=cfg.init_states, - gym_kwargs=cfg.gym_kwargs, - env_cls=env_cls, - multitask_eval=cfg.multitask_eval, - ) - else: + package_name = f"gym_{cfg.type}" try: importlib.import_module(package_name) except ModuleNotFoundError as e: - print( - f"{package_name} is not installed. Please install it with `pip install 'lerobot[{cfg.type}]'`" - ) - raise e + raise ModuleNotFoundError( + f"{package_name} is not installed. Install with: pip install \"lerobot[{cfg.type}]\"" + ) from e gym_handle = f"{package_name}/{cfg.task}" - env = env_cls( - [lambda: gym.make(gym_handle, disable_env_checker=True, **cfg.gym_kwargs) for _ in range(n_envs)] - ) + + def _make_one(): + return gym.make(gym_handle, disable_env_checker=True, **(cfg.gym_kwargs or {})) - return env + return env_cls([_make_one for _ in range(n_envs)]) diff --git a/src/lerobot/envs/libero copy.py b/src/lerobot/envs/libero copy.py new file mode 100644 index 000000000..83ccd2fb9 --- /dev/null +++ b/src/lerobot/envs/libero copy.py @@ -0,0 +1,326 @@ +import math +import os +from collections import defaultdict +from collections.abc import Callable +from itertools import chain +from typing import Any + +import gymnasium as gym +import numpy as np +import torch +from gymnasium import spaces +from libero.libero import benchmark, get_libero_path +from libero.libero.envs import OffScreenRenderEnv + + +def create_libero_envs( + task: str, + n_envs: int, + gym_kwargs: dict[str, Any] = None, + camera_name: str = "agentview_image,robot0_eye_in_hand_image", + init_states: bool = True, + env_cls: Callable = None, + multitask_eval: bool = True, +) -> dict[str, dict[str, Any]]: + """ + Here n_envs is per task and equal to the number of rollouts. + Returns: + dict[str, dict[str, list[LiberoEnv]]]: keys are task_suite and values are list of LiberoEnv envs. + """ + print("num envs", n_envs) + print("multitask_eval", multitask_eval) + print("gym_kwargs", gym_kwargs) + if gym_kwargs is None: + gym_kwargs = {} + + if not multitask_eval: + benchmark_dict = benchmark.get_benchmark_dict() + task_suite = benchmark_dict[task]() # can also choose libero_spatial, libero_object, libero_10 etc. + tasks_id = list(range(len(task_suite.tasks))) + episode_indices = [0 for i in range(len(tasks_id))] + if len(tasks_id) == 1: + tasks_id = [tasks_id[0] for _ in range(n_envs)] + episode_indices = list(range(n_envs)) + elif len(tasks_id) < n_envs and n_envs % len(tasks_id) == 0: + n_repeat = n_envs // len(tasks_id) + print("n_repeat", n_repeat) + episode_indices = [] + for _ in range(len(tasks_id)): + episode_indices.extend(list(range(n_repeat))) + tasks_id = list(chain.from_iterable([[item] * n_repeat for item in tasks_id])) + elif n_envs < len(tasks_id): + tasks_id = tasks_id[:n_envs] + episode_indices = list(range(n_envs))[:n_envs] + print(f"WARNING: n_envs < len(tasks_id), evaluating only on {tasks_id}") + print(f"Creating Libero envs with task ids {tasks_id} from suite {task}") + assert n_envs == len(tasks_id), ( + f"len(n_envs) and tasks_id should be the same, got {n_envs} and {len(tasks_id)}" + ) + return env_cls( + [ + lambda i=i: LiberoEnv( + task_suite=task_suite, + task_id=tasks_id[i], + task_suite_name=task, + camera_name=camera_name, + init_states=init_states, + episode_index=episode_indices[i], + **gym_kwargs, + ) + for i in range(n_envs) + ] + ) + else: + envs = defaultdict(dict) + benchmark_dict = benchmark.get_benchmark_dict() + task = task.split(",") + for _task in task: + task_suite = benchmark_dict[ + _task + ]() # can also choose libero_spatial, libero_object, libero_10 etc. + tasks_ids = list(range(len(task_suite.tasks))) + for tasks_id in tasks_ids: + episode_indices = list(range(n_envs)) + print( + f"Creating Libero envs with task ids {tasks_id} from suite {_task}, episode_indices: {episode_indices}" + ) + envs_list = [ + ( + lambda i=i, + task_suite=task_suite, + tasks_id=tasks_id, + _task=_task, + episode_indices=episode_indices: LiberoEnv( + task_suite=task_suite, + task_id=tasks_id, + task_suite_name=_task, + camera_name=camera_name, + init_states=init_states, + episode_index=episode_indices[i], + **gym_kwargs, + ) + ) + for i in range(n_envs) + ] + envs[_task][tasks_id] = env_cls(envs_list) + return envs + + +def quat2axisangle(quat): + """ + Copied from robosuite: https://github.com/ARISE-Initiative/robosuite/blob/eafb81f54ffc104f905ee48a16bb15f059176ad3/robosuite/utils/transform_utils.py#L490C1-L512C55 + + Converts quaternion to axis-angle format. + Returns a unit vector direction scaled by its angle in radians. + + Args: + quat (np.array): (x,y,z,w) vec4 float angles + + Returns: + np.array: (ax,ay,az) axis-angle exponential coordinates + """ + # clip quaternion + if quat[3] > 1.0: + quat[3] = 1.0 + elif quat[3] < -1.0: + quat[3] = -1.0 + + den = np.sqrt(1.0 - quat[3] * quat[3]) + if math.isclose(den, 0.0): + # This is (close to) a zero degree rotation, immediately return + return np.zeros(3) + + return (quat[:3] * 2.0 * math.acos(quat[3])) / den + + +def get_task_init_states(task_suite, i): + init_states_path = os.path.join( + get_libero_path("init_states"), + task_suite.tasks[i].problem_folder, + task_suite.tasks[i].init_states_file, + ) + init_states = torch.load(init_states_path, weights_only=False) # nosec B614 + return init_states + + +def get_libero_dummy_action(): + """Get dummy/no-op action, used to roll out the simulation while the robot does nothing.""" + return [0, 0, 0, 0, 0, 0, -1] + + +OBS_STATE_DIM = 8 +ACTION_DIM = 7 + + +class LiberoEnv(gym.Env): + metadata = {"render_modes": ["rgb_array"], "render_fps": 80} + + def __init__( + self, + task_suite, + task_id, + task_suite_name, + camera_name="agentview_image,robot0_eye_in_hand_image", + obs_type="pixels", + render_mode="rgb_array", + observation_width=256, + observation_height=256, + visualization_width=640, + visualization_height=480, + init_states=True, + episode_index=0, + ): + super().__init__() + self.task_id = task_id + self.obs_type = obs_type + self.render_mode = render_mode + self.observation_width = observation_width + self.observation_height = observation_height + self.visualization_width = visualization_width + self.visualization_height = visualization_height + self.init_states = init_states + self.camera_name = camera_name.split( + "," + ) # agentview_image (main) or robot0_eye_in_hand_image (wrist) + + # Map raw camera names to "image1" and "image2". + # The preprocessing step `preprocess_observation` will then prefix these with `.images.*`, + # following the LeRobot convention (e.g., `observation.images.image`, `observation.images.image2`). + # This ensures the policy consistently receives observations in the + # expected format regardless of the original camera naming. + self.camera_name_mapping = { + "agentview_image": "image", + "robot0_eye_in_hand_image": "image2", + } + + self.num_steps_wait = ( + 10 # Do nothing for the first few timesteps to wait for the simulator drops objects + ) + self.episode_index = episode_index + + self._env = self._make_envs_task(task_suite, self.task_id) + if task_suite_name == "libero_spatial": + max_steps = 220 # longest training demo has 193 steps + elif task_suite_name == "libero_object": + max_steps = 280 # longest training demo has 254 steps + elif task_suite_name == "libero_goal": + max_steps = 300 # longest training demo has 270 steps + elif task_suite_name == "libero_10": + max_steps = 520 # longest training demo has 505 steps + elif task_suite_name == "libero_90": + max_steps = 400 # longest training demo has 373 steps + self._max_episode_steps = max_steps + + images = {} + for cam in self.camera_name: + images[self.camera_name_mapping[cam]] = spaces.Box( + low=0, + high=255, + shape=(self.observation_height, self.observation_width, 3), + dtype=np.uint8, + ) + + if self.obs_type == "state": + raise NotImplementedError() + elif self.obs_type == "pixels": + self.observation_space = spaces.Dict( + { + "pixels": spaces.Dict(images), + } + ) + elif self.obs_type == "pixels_agent_pos": + self.observation_space = spaces.Dict( + { + "pixels": spaces.Dict(images), + "agent_pos": spaces.Box( + low=-1000.0, + high=1000.0, + shape=(OBS_STATE_DIM,), + dtype=np.float64, + ), + } + ) + + self.action_space = spaces.Box(low=-1, high=1, shape=(ACTION_DIM,), dtype=np.float32) + + def render(self): + raw_obs = self._env.env._get_observations() + image = self._format_raw_obs(raw_obs)["pixels"]["image"] + return image + + def _make_envs_task(self, task_suite, task_id: int = 0): + task = task_suite.get_task(task_id) + self.task = task.name + self.task_description = task.language + task_bddl_file = os.path.join(get_libero_path("bddl_files"), task.problem_folder, task.bddl_file) + + env_args = { + "bddl_file_name": task_bddl_file, + "camera_heights": self.observation_height, + "camera_widths": self.observation_width, + } + env = OffScreenRenderEnv(**env_args) + env.reset() + if self.init_states: + init_states = get_task_init_states( + task_suite, task_id + ) # for benchmarking purpose, we fix the a set of initial states FIXME(mshukor): should be in the reset()? + init_state_id = self.episode_index # episode index + env.set_init_state(init_states[init_state_id]) + + return env + + def _format_raw_obs(self, raw_obs): + images = {} + for camera_name in self.camera_name: + image = raw_obs[camera_name] + image = image[::-1, ::-1] # rotate 180 degrees + images[self.camera_name_mapping[camera_name]] = image + state = np.concatenate( + ( + raw_obs["robot0_eef_pos"], + quat2axisangle(raw_obs["robot0_eef_quat"]), + raw_obs["robot0_gripper_qpos"], + ) + ) + agent_pos = state + if self.obs_type == "state": + raise NotImplementedError() + elif self.obs_type == "pixels": + obs = {"pixels": images.copy()} + elif self.obs_type == "pixels_agent_pos": + obs = { + "pixels": images.copy(), + "agent_pos": agent_pos, + } + return obs + + def reset(self, seed=None, **kwargs): + super().reset(seed=seed) + + self._env.seed(seed) + raw_obs = self._env.reset() + # Do nothing for the first few timesteps to wait for the simulator drops objects + for _ in range(self.num_steps_wait): + raw_obs, _, _, _ = self._env.step(get_libero_dummy_action()) + observation = self._format_raw_obs(raw_obs) + info = {"is_success": False} + return observation, info + + def step(self, action): + assert action.ndim == 1 + raw_obs, reward, done, info = self._env.step(action) + + is_success = self._env.check_success() + terminated = done or is_success + info["is_success"] = done # is_success + + observation = self._format_raw_obs(raw_obs) + if done: + self.reset() + print(self.task, self.task_id, done, is_success) + truncated = False + return observation, reward, terminated, truncated, info + + def close(self): + self._env.close() diff --git a/src/lerobot/envs/libero.py b/src/lerobot/envs/libero.py index f815228e7..83ccd2fb9 100644 --- a/src/lerobot/envs/libero.py +++ b/src/lerobot/envs/libero.py @@ -245,9 +245,8 @@ class LiberoEnv(gym.Env): def render(self): raw_obs = self._env.env._get_observations() - formatted = self._format_raw_obs(raw_obs) - # grab the "main" camera - return formatted["pixels"]["image"] + image = self._format_raw_obs(raw_obs)["pixels"]["image"] + return image def _make_envs_task(self, task_suite, task_id: int = 0): task = task_suite.get_task(task_id) @@ -277,7 +276,6 @@ class LiberoEnv(gym.Env): image = raw_obs[camera_name] image = image[::-1, ::-1] # rotate 180 degrees images[self.camera_name_mapping[camera_name]] = image - # images = image if len(images) == 1 else images state = np.concatenate( ( raw_obs["robot0_eef_pos"], @@ -311,14 +309,17 @@ class LiberoEnv(gym.Env): def step(self, action): assert action.ndim == 1 - action[-1] = 1.0 - action[-1] raw_obs, reward, done, info = self._env.step(action) + is_success = self._env.check_success() terminated = done or is_success - info["is_success"] = is_success + info["is_success"] = done # is_success + observation = self._format_raw_obs(raw_obs) + if done: + self.reset() + print(self.task, self.task_id, done, is_success) truncated = False - # note if it is unable to complete get libero error after many steps return observation, reward, terminated, truncated, info def close(self): diff --git a/src/lerobot/envs/libero2.py b/src/lerobot/envs/libero2.py new file mode 100644 index 000000000..1e794072c --- /dev/null +++ b/src/lerobot/envs/libero2.py @@ -0,0 +1,308 @@ +import math +import os +from collections import defaultdict +from itertools import chain +from typing import Any, Callable + +import gymnasium as gym +import numpy as np +import torch +from gymnasium import spaces +from libero.libero import benchmark, get_libero_path +from libero.libero.envs import OffScreenRenderEnv + + +OBS_IMAGE = "observation.image" +OBS_IMAGE_2 = "observation.image2" +def create_libero_envs( + task: str, + n_envs: int, + gym_kwargs: dict[str, Any] = None, + camera_name: str = "agentview_image,robot0_eye_in_hand_image", + init_states: bool = True, + env_cls: Callable = None, + multitask_eval: bool = True, +) -> dict[str, dict[str, Any]]: + """ + Here n_envs is per task and equal to the number of rollouts. + Returns: + dict[str, dict[str, list[LiberoEnv]]]: keys are task_suite and values are list of LiberoEnv envs. + """ + if gym_kwargs is None: + gym_kwargs = {} + + if not multitask_eval: + benchmark_dict = benchmark.get_benchmark_dict() + task_suite = benchmark_dict[task]() # can also choose libero_spatial, libero_object, libero_10 etc. + tasks_id = list(range(len(task_suite.tasks))) + episode_indices = [0 for i in range(len(tasks_id))] + if len(tasks_id) == 1: + tasks_id = [tasks_id[0] for _ in range(n_envs)] + episode_indices = list(range(n_envs)) + elif len(tasks_id) < n_envs and n_envs % len(tasks_id) == 0: + n_repeat = n_envs // len(tasks_id) + episode_indices = [] + for i in range(len(tasks_id)): + episode_indices.extend(list(range(n_repeat))) + tasks_id = list(chain.from_iterable([[item] * n_repeat for item in tasks_id])) + elif n_envs < len(tasks_id): + tasks_id = tasks_id[:n_envs] + episode_indices = list(range(n_envs))[:n_envs] + print(f"WARNING: n_envs < len(tasks_id), evaluating only on {tasks_id}") + print(f"Creating Libero envs with task ids {tasks_id} from suite {task}") + assert n_envs == len( + tasks_id + ), f"len(n_envs) and tasks_id should be the same, got {n_envs} and {len(tasks_id)}" + return env_cls( + [ + lambda i=i: LiberoEnv( + task_suite=task_suite, + task_id=tasks_id[i], + task_suite_name=task, + camera_name=camera_name, + init_states=init_states, + episode_index=episode_indices[i], + **gym_kwargs, + ) + for i in range(n_envs) + ] + ) + else: + envs = defaultdict(dict) + benchmark_dict = benchmark.get_benchmark_dict() + task = task.split(",") + for _task in task: + task_suite = benchmark_dict[ + _task + ]() # can also choose libero_spatial, libero_object, libero_10 etc. + tasks_ids = list(range(len(task_suite.tasks))) + # tasks_ids = [0] # FIXME(mshukor): debug + for tasks_id in tasks_ids: + episode_indices = list(range(n_envs)) + print( + f"Creating Libero envs with task ids {tasks_id} from suite {_task}, episode_indices: {episode_indices}" + ) + envs_list = [ + lambda i=i: LiberoEnv( + task_suite=task_suite, + task_id=tasks_id, + task_suite_name=_task, + camera_name=camera_name, + init_states=init_states, + episode_index=episode_indices[i], + **gym_kwargs, + ) + for i in range(n_envs) + ] + envs[_task][tasks_id] = env_cls(envs_list) + return envs + + +def quat2axisangle(quat): + """ + Copied from robosuite: https://github.com/ARISE-Initiative/robosuite/blob/eafb81f54ffc104f905ee48a16bb15f059176ad3/robosuite/utils/transform_utils.py#L490C1-L512C55 + + Converts quaternion to axis-angle format. + Returns a unit vector direction scaled by its angle in radians. + + Args: + quat (np.array): (x,y,z,w) vec4 float angles + + Returns: + np.array: (ax,ay,az) axis-angle exponential coordinates + """ + # clip quaternion + if quat[3] > 1.0: + quat[3] = 1.0 + elif quat[3] < -1.0: + quat[3] = -1.0 + + den = np.sqrt(1.0 - quat[3] * quat[3]) + if math.isclose(den, 0.0): + # This is (close to) a zero degree rotation, immediately return + return np.zeros(3) + + return (quat[:3] * 2.0 * math.acos(quat[3])) / den + + +def get_task_init_states(task_suite, i): + init_states_path = os.path.join( + get_libero_path("init_states"), + task_suite.tasks[i].problem_folder, + task_suite.tasks[i].init_states_file, + ) + init_states = torch.load(init_states_path, weights_only=False) + return init_states + + +def get_libero_dummy_action(): + """Get dummy/no-op action, used to roll out the simulation while the robot does nothing.""" + return [0, 0, 0, 0, 0, 0, -1] + + +class LiberoEnv(gym.Env): + metadata = {"render_modes": ["rgb_array"], "render_fps": 80} + + def __init__( + self, + task_suite, + task_id, + task_suite_name, + camera_name="agentview_image,robot0_eye_in_hand_image", + obs_type="pixels", + render_mode="rgb_array", + observation_width=256, + observation_height=256, + visualization_width=640, + visualization_height=480, + init_states=True, + episode_index=0, + ): + super().__init__() + self.task_id = task_id + self.obs_type = obs_type + self.render_mode = render_mode + self.observation_width = observation_width + self.observation_height = observation_height + self.visualization_width = visualization_width + self.visualization_height = visualization_height + self.init_states = init_states + self.camera_name = camera_name.split( + "," + ) # agentview_image (main) or robot0_eye_in_hand_image (wrist) + self.camera_name_mapping = { + "agentview_image": OBS_IMAGE, + "robot0_eye_in_hand_image": OBS_IMAGE_2, + } + self.num_steps_wait = ( + 10 # Do nothing for the first few timesteps to wait for the simulator drops objects + ) + self.episode_index = episode_index + + self._env = self._make_envs_task(task_suite, self.task_id) + if task_suite_name == "libero_spatial": + max_steps = 220 # longest training demo has 193 steps + elif task_suite_name == "libero_object": + max_steps = 280 # longest training demo has 254 steps + elif task_suite_name == "libero_goal": + max_steps = 300 # longest training demo has 270 steps + elif task_suite_name == "libero_10": + max_steps = 520 # longest training demo has 505 steps + elif task_suite_name == "libero_90": + max_steps = 400 # longest training demo has 373 steps + self._max_episode_steps = max_steps + + images = {} + for cam in self.camera_name: + images[self.camera_name_mapping[cam]] = spaces.Box( + low=0, + high=255, + shape=(self.observation_height, self.observation_width, 3), + dtype=np.uint8, + ) + + if self.obs_type == "state": + raise NotImplementedError() + elif self.obs_type == "pixels": + self.observation_space = spaces.Dict( + { + "pixels": spaces.Dict(images), + } + ) + elif self.obs_type == "pixels_agent_pos": + self.observation_space = spaces.Dict( + { + "pixels": spaces.Dict(images), + "agent_pos": spaces.Box( + low=-1000.0, + high=1000.0, + shape=(8,), + dtype=np.float64, + ), + } + ) + + self.action_space = spaces.Box(low=-1, high=1, shape=(7,), dtype=np.float32) + + def render(self): + raw_obs = self._env.env._get_observations() + image = self._format_raw_obs(raw_obs)["pixels"][OBS_IMAGE] + return image + + def _make_envs_task(self, task_suite, task_id: int = 0): + task = task_suite.get_task(task_id) + self.task = task.name + self.task_description = task.language + task_bddl_file = os.path.join(get_libero_path("bddl_files"), task.problem_folder, task.bddl_file) + + env_args = { + "bddl_file_name": task_bddl_file, + "camera_heights": self.observation_height, + "camera_widths": self.observation_width, + } + env = OffScreenRenderEnv(**env_args) + env.reset() + if self.init_states: + init_states = get_task_init_states( + task_suite, task_id + ) # for benchmarking purpose, we fix the a set of initial states FIXME(mshukor): should be in the reset()? + init_state_id = self.episode_index # episode index + env.set_init_state(init_states[init_state_id]) + + return env + + def _format_raw_obs(self, raw_obs): + images = {} + for camera_name in self.camera_name: + image = raw_obs[camera_name] + image = image[::-1, ::-1] # rotate 180 degrees + images[self.camera_name_mapping[camera_name]] = image + # images = image if len(images) == 1 else images + state = np.concatenate( + ( + raw_obs["robot0_eef_pos"], + quat2axisangle(raw_obs["robot0_eef_quat"]), + raw_obs["robot0_gripper_qpos"], + ) + ) + agent_pos = state + if self.obs_type == "state": + raise NotImplementedError() + elif self.obs_type == "pixels": + obs = {"pixels": images.copy()} + elif self.obs_type == "pixels_agent_pos": + obs = { + "pixels": images.copy(), + "agent_pos": agent_pos, + } + return obs + + def reset(self, seed=None, **kwargs): + super().reset(seed=seed) + + self._env.seed(seed) + raw_obs = self._env.reset() + # Do nothing for the first few timesteps to wait for the simulator drops objects + for _ in range(self.num_steps_wait): + raw_obs, _, _, _ = self._env.step(get_libero_dummy_action()) + observation = self._format_raw_obs(raw_obs) + info = {"is_success": False} + return observation, info + + def step(self, action): + assert action.ndim == 1 + raw_obs, reward, done, info = self._env.step(action) + + is_success = self._env.check_success() + terminated = done or is_success + info["is_success"] = done # is_success + + observation = self._format_raw_obs(raw_obs) + if done: + self.reset() + print(self.task, self.task_id, done, is_success) + truncated = False + return observation, reward, terminated, truncated, info + + def close(self): + self._env.close() diff --git a/src/lerobot/envs/utils.py b/src/lerobot/envs/utils.py index 2cf9efcfe..5ae252dbe 100644 --- a/src/lerobot/envs/utils.py +++ b/src/lerobot/envs/utils.py @@ -80,7 +80,56 @@ def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Ten return return_observations +def preprocess_observation1( + observations: dict[str, np.ndarray], cfg: dict[str, Any] = None +) -> dict[str, Tensor]: + # TODO(aliberts, rcadene): refactor this to use features from the environment (no hardcoding) + """Convert environment observation to LeRobot format observation. + Args: + observation: Dictionary of observation batches from a Gym vector environment. + Returns: + Dictionary of observation batches with keys renamed to LeRobot format and values as tensors. + """ + # map to expected inputs for the policy + return_observations = {} + image_key = list(cfg.image_features.keys())[0] if cfg else "observation.image" + state_key = cfg.robot_state_feature_key if cfg else "observation.state" + if "pixels" in observations: + if isinstance(observations["pixels"], dict): + # imgs = {f"{image_key}.{key}": img for key, img in observations["pixels"].items()} + imgs = observations["pixels"] # keys should be OBS_IMAGE, OBS_IMAGE_2, OBS_IMAGE_3 + else: + imgs = {f"{image_key}": observations["pixels"]} + for imgkey, img in imgs.items(): + # TODO(aliberts, rcadene): use transforms.ToTensor()? + img = torch.from_numpy(img) + + # sanity check that images are channel last + _, h, w, c = img.shape + assert c < h and c < w, f"expect channel last images, but instead got {img.shape=}" + + # sanity check that images are uint8 + assert img.dtype == torch.uint8, f"expect torch.uint8, but instead {img.dtype=}" + + # convert to channel first of type float32 in range [0,1] + img = einops.rearrange(img, "b h w c -> b c h w").contiguous() + img = img.type(torch.float32) + img /= 255 + + return_observations[imgkey] = img + + if "environment_state" in observations: + return_observations["observation.environment_state"] = torch.from_numpy( + observations["environment_state"] + ).float() + + # TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing + # requirement for "agent_pos" + return_observations[state_key] = torch.from_numpy(observations["agent_pos"]).float() + if "task" in observations: + return_observations["task"] = observations["task"] + return return_observations def env_to_policy_features(env_cfg: EnvConfig) -> dict[str, PolicyFeature]: # TODO(aliberts, rcadene): remove this hardcoding of keys and just use the nested keys as is # (need to also refactor preprocess_observation and externalize normalization from policies) diff --git a/src/lerobot/policies/factory.py b/src/lerobot/policies/factory.py index 79461d3a9..b7d92d988 100644 --- a/src/lerobot/policies/factory.py +++ b/src/lerobot/policies/factory.py @@ -177,6 +177,6 @@ def make_policy( policy = policy_cls(**kwargs) policy.to(cfg.device) assert isinstance(policy, nn.Module) + breakpoint() # policy = torch.compile(policy, mode="reduce-overhead") - return policy diff --git a/src/lerobot/policies/smolpi0/modeling_smolpi0.py b/src/lerobot/policies/smolpi0/modeling_smolpi0.py index fa2d3d5a7..9a128f7b6 100644 --- a/src/lerobot/policies/smolpi0/modeling_smolpi0.py +++ b/src/lerobot/policies/smolpi0/modeling_smolpi0.py @@ -51,7 +51,9 @@ policy = Pi0Policy.from_pretrained("lerobot/pi0") import math from collections import deque - +import os +import re +import safetensors import torch import torch.nn.functional as F # noqa: N812 from torch import Tensor, nn @@ -169,7 +171,72 @@ def resize_with_pad(img, width, height, pad_value=-1): padded_img = F.pad(resized_img, (pad_width, 0, pad_height, 0), value=pad_value) return padded_img +_VARIANT_RE = re.compile(r"\.so\d+(?:-[\w]+)?_buffer_") +def canonicalise(k: str) -> str: + """ + Remove dataset-variant markers like '.so100-blue_' or '.so100_' from a + normalisation-buffer key. + """ + return _VARIANT_RE.sub(".buffer_", k) +def standardise_state_dict( + checkpoint: dict[str, torch.Tensor], ref_keys: set[str], *, verbose: bool = True +) -> tuple[dict[str, torch.Tensor], list[str]]: + """ + • Re-keys `checkpoint ` so that every entry matches the *reference* key set. + • If several variant keys collapse to the same canonical name we keep the + first one and log the collision. + • Returns the new dict + a list of entries that could not be matched. + """ + out, collisions, unmatched = {}, {}, [] + + for k, v in checkpoint.items(): + canon = canonicalise(k) + if canon in ref_keys: + if canon in out: # duplicate after collapsing + collisions.setdefault(canon, []).append(k) + else: + out[canon] = v + else: + unmatched.append(k) + + if verbose: + for canon, variants in collisions.items(): + print(f"[standardise_state_dict] '{canon}' ← {variants}") + if unmatched: + print(f"[standardise_state_dict] kept {len(unmatched)} unmatched keys") + + out.update({k: checkpoint[k] for k in unmatched}) + return out, unmatched + +def load_smolvla( + model: torch.nn.Module, + filename: str | os.PathLike, + *, + device: str = "cpu", + checkpoint_keys_mapping: str = "", +) -> torch.nn.Module: + state_dict = safetensors.torch.load_file(filename, device=device) + + # Optional user-supplied renames (e.g. "model._orig_mod.//model.") + if checkpoint_keys_mapping and "//" in checkpoint_keys_mapping: + state_dict = rename_checkpoint_keys(state_dict, checkpoint_keys_mapping) + + state_dict, _ = standardise_state_dict(state_dict, set(model.state_dict().keys())) + + # HACK(aliberts): to not overwrite normalization parameters as they should come from the dataset + norm_keys = ("normalize_inputs", "normalize_targets", "unnormalize_outputs") + state_dict = {k: v for k, v in state_dict.items() if not k.startswith(norm_keys)} + + missing, unexpected = model.load_state_dict(state_dict, strict=False) + if not all(key.startswith(norm_keys) for key in missing) or unexpected: + raise RuntimeError( + "SmolVLA %d missing / %d unexpected keys", + len(missing), + len(unexpected), + ) + + return model def pad_vector(vector, new_dim): """Can be (batch_size x sequence_length x features_dimension) or (batch_size x features_dimension) @@ -219,7 +286,27 @@ def aloha_gripper_to_angular(value): # The values 0.4 and 1.5 were measured on an actual Trossen robot. return normalize(value, min_val=0.4, max_val=1.5) +def rename_checkpoint_keys(checkpoint: dict, rename_str: str): + """ + Renames keys in a checkpoint dictionary based on the given rename string. + Args: + checkpoint (dict): The checkpoint dictionary. + rename_str (str): A string specifying key mappings in the format "old1//new1,old2//new2". + + Returns: + dict: The modified checkpoint with renamed keys. + """ + + rename_dict = dict(pair.split("//") for pair in rename_str.split(",")) + + new_checkpoint = {} + for k, v in checkpoint.items(): + for old_key, new_key in rename_dict.items(): + if old_key in k: + k = k.replace(old_key, new_key) + new_checkpoint[k] = v + return new_checkpoint def aloha_gripper_from_angular(value): # Convert from the gripper position used by pi0 to the gripper position that is used by Aloha. # Note that the units are still angular but the range is different. @@ -333,7 +420,7 @@ class SMOLPI0Policy(PreTrainedPolicy): self.model.vlm_with_expert.merge_lora_weights() @torch.no_grad - def select_action_chunk(self, batch: dict[str, Tensor], noise: Tensor | None = None) -> Tensor: + def predict_action_chunk(self, batch: dict[str, Tensor], noise: Tensor | None = None) -> Tensor: """Select a single action given environment observations. This method wraps `select_actions` in order to return one action at a time for execution in the @@ -364,7 +451,24 @@ class SMOLPI0Policy(PreTrainedPolicy): actions = self._pi_aloha_encode_actions(actions) return actions - + + # HACK(aliberts, danaaubakirova): we overwrite this classmethod here to fix smolVLA-specific issues + @classmethod + def _load_as_safetensor( + cls, + model: "SmolVLAPolicy", + model_file: str, + map_location: str, + strict: bool, + **kwargs, + ): + safetensors.torch.load_model(model, model_file, strict=strict, device=map_location) + return load_smolvla( + model, + model_file, + device=map_location, + checkpoint_keys_mapping="model._orig_mod.//model.", + ) @torch.no_grad def select_action(self, batch: dict[str, Tensor], noise: Tensor | None = None) -> Tensor: """Select a single action given environment observations. diff --git a/src/lerobot/policies/smolvla/modeling_smolvla.py b/src/lerobot/policies/smolvla/modeling_smolvla.py index 9b7e3520a..1c07d98e5 100644 --- a/src/lerobot/policies/smolvla/modeling_smolvla.py +++ b/src/lerobot/policies/smolvla/modeling_smolvla.py @@ -1027,7 +1027,7 @@ from lerobot.policies.utils import ( populate_queues, ) from lerobot.utils.utils import get_safe_dtype - +# OBS_STATE = 'state' # Matches ".soNNN", optionally followed by "-something", up to the "_buffer_" marker _VARIANT_RE = re.compile(r"\.so\d+(?:-[\w]+)?_buffer_") @@ -1347,6 +1347,7 @@ class SmolVLAPolicy(PreTrainedPolicy): # Unpad actions original_action_dim = self.config.action_feature.shape[0] + original_action_dim = 7 actions = actions[:, :, :original_action_dim] actions = self.unnormalize_outputs({ACTION: actions})[ACTION] diff --git a/src/lerobot/scripts/eval.py b/src/lerobot/scripts/eval.py index 92a3bf833..6fbae645b 100644 --- a/src/lerobot/scripts/eval.py +++ b/src/lerobot/scripts/eval.py @@ -69,7 +69,7 @@ from tqdm import trange from lerobot.configs import parser from lerobot.configs.eval import EvalPipelineConfig from lerobot.envs.factory import make_env -from lerobot.envs.utils import add_envs_task, check_env_attributes_and_types, preprocess_observation +from lerobot.envs.utils import add_envs_task, check_env_attributes_and_types, preprocess_observation, preprocess_observation1 from lerobot.policies.factory import make_policy from lerobot.policies.pretrained import PreTrainedPolicy from lerobot.policies.utils import get_device_from_parameters @@ -125,6 +125,10 @@ def rollout( # Reset the policy and environments. policy.reset() + # added by jade + # for k in list(policy.config.input_features.keys()): + # if k.startswith("observation.image"): + # policy.config.input_features["observation.images." + k.split("observation.", 1)[1]] = policy.config.input_features.pop(k) observation, info = env.reset(seed=seeds) if render_callback is not None: render_callback(env) @@ -149,6 +153,7 @@ def rollout( while not np.all(done) and step < max_steps: # Numpy array to tensor and changing dictionary keys to LeRobot policy format. observation = preprocess_observation(observation) + # observation = preprocess_observation1(observation) if return_observations: all_observations.append(deepcopy(observation)) @@ -159,6 +164,26 @@ def rollout( # Infer "task" from attributes of environments. # TODO: works with SyncVectorEnv but not AsyncVectorEnv observation = add_envs_task(env, observation) + # breakpoint() + # observation = { + # k.replace("observation.images.", "observation.") if k.startswith("observation.images.") else k: v + # for k, v in observation.items() + # # } + # if "observation.image" in observation: + # observation["image"] = observation.pop("observation.image").to( + # device, non_blocking=device.type == "cuda" + # ) + + # if "observation.image2" in observation: + # observation["wrist_image"] = observation.pop("observation.image2").to( + # device, non_blocking=device.type == "cuda" + # ) + + # if "observation.state" in observation: + # observation["state"] = observation.pop("observation.state").to( + # device, non_blocking=device.type == "cuda" + # ) + with torch.inference_mode(): action = policy.select_action(observation) # Convert to CPU / numpy. @@ -489,12 +514,11 @@ def _inject_normalization_stats(policy: SmolVLAPolicy, dataset_meta: LeRobotData print("Normalization layers recreated with dataset stats.") -def load_smolvla(cfg, dataset_repo: str): +def load_smolvla(cfg, dataset_repo: str, policy): from lerobot.datasets.lerobot_dataset import LeRobotDataset dataset = LeRobotDataset(dataset_repo, root='/raid/jade/.cache/huggingface/datasets/') - policy = make_policy(cfg=cfg, ds_meta=dataset.meta) _inject_normalization_stats(policy=policy, dataset_meta=dataset.meta) # only needed if stats are missing - return policy, dataset + return policy.to("cuda"), dataset @parser.wrap() @@ -505,7 +529,7 @@ def eval_main(cfg: EvalPipelineConfig): device = get_safe_torch_device(cfg.policy.device, log=True) #login to hf from huggingface_hub import login - login() + # login() torch.backends.cudnn.benchmark = True torch.backends.cuda.matmul.allow_tf32 = True set_seed(cfg.seed) @@ -520,9 +544,10 @@ def eval_main(cfg: EvalPipelineConfig): cfg=cfg.policy, env_cfg=cfg.env, ) - # breakpoint() - load_smolvla(cfg.policy, "physical-intelligence/libero") - # breakpoint() + breakpoint() + # policy, _ = load_smolvla(cfg.policy, "physical-intelligence/libero", policy) + # rename "image" -> "observation.image" + policy.eval() with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext(): if cfg.env.multitask_eval: