mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-16 09:09:48 +00:00
new things
This commit is contained in:
@@ -1,14 +1,45 @@
|
||||
#!/bin/bash
|
||||
|
||||
unset LEROBOT_HOME
|
||||
unset HF_LEROBOT_HOME
|
||||
# storage / caches
|
||||
RAID=/raid/jade
|
||||
export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
|
||||
export HF_HOME=$RAID/.cache/huggingface
|
||||
export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
|
||||
export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
|
||||
export WANDB_CACHE_DIR=$RAID/.cache/wandb
|
||||
export TMPDIR=$RAID/.cache/tmp
|
||||
mkdir -p $TMPDIR
|
||||
export WANDB_MODE=offline
|
||||
export HF_DATASETS_OFFLINE=1
|
||||
export HF_HUB_OFFLINE=1
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
export MUJOCO_GL=egl
|
||||
export CUDA_VISIBLE_DEVICES=3
|
||||
|
||||
# CONFIGURATION
|
||||
POLICY_PATH="bicmol/smolvla-libero"
|
||||
POLICY_PATH="/raid/jade/logs/lerobot/lerobot_2_HuggingFaceVLA_libero_smolvla_lr1e-4bs32steps100000/checkpoints/100000/pretrained_model"
|
||||
POLICY_PATH="/raid/jade/models/smolvlamust"
|
||||
TASK=libero_spatial
|
||||
ENV_TYPE="libero"
|
||||
BATCH_SIZE=1
|
||||
N_EPISODES=1
|
||||
BATCH_SIZE=10
|
||||
N_EPISODES=10
|
||||
# storage / caches
|
||||
RAID=/raid/jade
|
||||
N_ACTION_STEPS=1
|
||||
export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
|
||||
export HF_HOME=$RAID/.cache/huggingface
|
||||
export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
|
||||
export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
|
||||
export WANDB_CACHE_DIR=$RAID/.cache/wandb
|
||||
export TMPDIR=$RAID/.cache/tmp
|
||||
mkdir -p $TMPDIR
|
||||
export WANDB_MODE=offline
|
||||
# export HF_DATASETS_OFFLINE=1
|
||||
# export HF_HUB_OFFLINE=1
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
export MUJOCO_GL=egl
|
||||
export MUJOCO_GL=egl
|
||||
unset HF_HUB_OFFLINE
|
||||
# RUN EVALUATION
|
||||
python src/lerobot/scripts/eval.py \
|
||||
--policy.path="$POLICY_PATH" \
|
||||
@@ -17,3 +48,11 @@ python src/lerobot/scripts/eval.py \
|
||||
--eval.n_episodes="$N_EPISODES" \
|
||||
--env.multitask_eval=False \
|
||||
--env.task=$TASK \
|
||||
# python examples/evaluate_libero.py \
|
||||
# --policy_path "$POLICY_PATH" \
|
||||
# --task_suite_name "$TASK" \
|
||||
# --num_steps_wait 10 \
|
||||
# --num_trials_per_task 10 \
|
||||
# --video_out_path "data/libero/videos" \
|
||||
# --device "cuda" \
|
||||
# --seed 7
|
||||
@@ -0,0 +1,76 @@
|
||||
#!/bin/bash
|
||||
|
||||
# storage / caches
|
||||
RAID=/raid/jade
|
||||
export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
|
||||
export HF_HOME=$RAID/.cache/huggingface
|
||||
export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
|
||||
export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
|
||||
export WANDB_CACHE_DIR=$RAID/.cache/wandb
|
||||
export TMPDIR=$RAID/.cache/tmp
|
||||
mkdir -p $TMPDIR
|
||||
export WANDB_MODE=offline
|
||||
export HF_DATASETS_OFFLINE=1
|
||||
export HF_HUB_OFFLINE=1
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
export MUJOCO_GL=egl
|
||||
export CUDA_VISIBLE_DEVICES=3
|
||||
|
||||
# CONFIGURATION
|
||||
POLICY_PATH="/raid/jade/logs/lerobot/lerobot_2_HuggingFaceVLA_libero_smolvla_lr1e-4bs32steps100000/checkpoints/100000/pretrained_model"
|
||||
POLICY_PATH="AustineJohnBreaker/smolvla_stratch_libero_spatial"
|
||||
TASK=libero_spatial
|
||||
ENV_TYPE="libero"
|
||||
BATCH_SIZE=10
|
||||
N_EPISODES=10
|
||||
USE_AMP=false
|
||||
N_ACTION_STEPS=1
|
||||
SELF_ATTN_EVERY_N_LAYERS=2
|
||||
VLM_NAME=HuggingFaceTB/SmolVLM-500M-Instruct
|
||||
PAD_LANG_TO=longest
|
||||
LOAD_VLM_WEIGHTS=true
|
||||
NUM_VLM_LAYERS=16
|
||||
CHUNK_SIZE=50
|
||||
N_OBS_STEPS=1
|
||||
NUM_EXPERT_LAYERS=0
|
||||
EXPERT_WIDTH_MULTIPLIER=0.5
|
||||
|
||||
|
||||
# storage / caches
|
||||
RAID=/raid/jade
|
||||
export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
|
||||
export HF_HOME=$RAID/.cache/huggingface
|
||||
export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
|
||||
export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
|
||||
export WANDB_CACHE_DIR=$RAID/.cache/wandb
|
||||
export TMPDIR=$RAID/.cache/tmp
|
||||
mkdir -p $TMPDIR
|
||||
export WANDB_MODE=offline
|
||||
# export HF_DATASETS_OFFLINE=1
|
||||
# export HF_HUB_OFFLINE=1
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
export MUJOCO_GL=egl
|
||||
export MUJOCO_GL=egl
|
||||
ADD_IMAGE_TOKENS=true
|
||||
unset HF_HUB_OFFLINE
|
||||
# RUN EVALUATION
|
||||
python src/lerobot/scripts/eval.py \
|
||||
--policy.path="$POLICY_PATH" \
|
||||
--env.type="$ENV_TYPE" \
|
||||
--eval.batch_size="$BATCH_SIZE" \
|
||||
--eval.n_episodes="$N_EPISODES" \
|
||||
--env.multitask_eval=False \
|
||||
--env.task=$TASK \
|
||||
--policy.use_amp=$USE_AMP \
|
||||
--policy.n_action_steps=$N_ACTION_STEPS \
|
||||
# --policy.add_image_special_tokens=$ADD_IMAGE_TOKENS \
|
||||
--policy.attention_mode=$ATTN_MODE \
|
||||
--policy.self_attn_every_n_layers=$SELF_ATTN_EVERY_N_LAYERS \
|
||||
--policy.vlm_model_name=$VLM_NAME \
|
||||
--policy.pad_language_to=$PAD_LANG_TO \
|
||||
--policy.load_vlm_weights=$LOAD_VLM_WEIGHTS \
|
||||
--policy.num_vlm_layers=$NUM_VLM_LAYERS \
|
||||
--policy.chunk_size=$CHUNK_SIZE \
|
||||
--policy.n_obs_steps=$N_OBS_STEPS \
|
||||
--policy.num_expert_layers=$NUM_EXPERT_LAYERS \
|
||||
--policy.expert_width_multiplier=$EXPERT_WIDTH_MULTIPLIER \
|
||||
@@ -0,0 +1,93 @@
|
||||
#!/bin/bash
|
||||
# smolvla training with accelerate
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# repo/env
|
||||
cd ~/lerobot || exit 1
|
||||
# conda activate lerobot
|
||||
export LC_ALL=C
|
||||
|
||||
rm -f core-*
|
||||
|
||||
# storage / caches
|
||||
RAID=/raid/jade
|
||||
export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
|
||||
export HF_HOME=$RAID/.cache/huggingface
|
||||
export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
|
||||
export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
|
||||
export WANDB_CACHE_DIR=$RAID/.cache/wandb
|
||||
export TMPDIR=$RAID/.cache/tmp
|
||||
mkdir -p $TMPDIR
|
||||
export WANDB_MODE=offline
|
||||
export HF_DATASETS_OFFLINE=1
|
||||
export HF_HUB_OFFLINE=1
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
export MUJOCO_GL=egl
|
||||
|
||||
# CONFIG
|
||||
ENV=libero
|
||||
TASK=libero_spatial
|
||||
REPO_ID=physical-intelligence/libero
|
||||
|
||||
POLICY=smolvla
|
||||
VLM=HuggingFaceTB/SmolVLM2-500M-Instruct
|
||||
|
||||
# Optim / scheduling
|
||||
LR=1e-4
|
||||
DECAY_LR=2.5e-6
|
||||
DECAY_STEPS=30000
|
||||
USE_AMP=true # set to true for mixed precision
|
||||
TRAIN_EXPERT_ONLY=true
|
||||
N_ACTION_STEPS=1
|
||||
SEED=1000
|
||||
|
||||
# Training loop
|
||||
OFFLINE_STEPS=100000
|
||||
BATCH_SIZE=32
|
||||
EVAL_FREQ=0
|
||||
SAVE_FREQ=20000
|
||||
EVAL_BATCH_SIZE=1
|
||||
NUM_EPISODES=1
|
||||
|
||||
# number of gpus to use
|
||||
NUM_PROCESSES=2
|
||||
export CUDA_VISIBLE_DEVICES=1,3
|
||||
PORT=29522
|
||||
|
||||
# naming/output dir
|
||||
TRAIN_DIR=$RAID/logs/lerobot/lerobot_2_${REPO_ID//\//_}_${POLICY}_lr${LR}bs${BATCH_SIZE}steps${OFFLINE_STEPS}
|
||||
echo "Training dir: $TRAIN_DIR"
|
||||
|
||||
rm -rf "$TRAIN_DIR"
|
||||
|
||||
# RUN
|
||||
python -m accelerate.commands.launch \
|
||||
--num_processes $NUM_PROCESSES \
|
||||
--num_machines 1 \
|
||||
--main_process_port $PORT \
|
||||
--mixed_precision=$( [ "$USE_AMP" = true ] && echo "bf16" || echo "no" ) \
|
||||
src/lerobot/scripts/train_accelerate.py \
|
||||
--policy.type=$POLICY \
|
||||
--policy.use_amp=True \
|
||||
--policy.vlm_model_name=$VLM \
|
||||
--dataset.repo_id=$REPO_ID \
|
||||
--dataset.root=$HF_DATASETS_CACHE \
|
||||
--env.type=$ENV \
|
||||
--env.task=$TASK \
|
||||
--output_dir=$TRAIN_DIR \
|
||||
--batch_size=$BATCH_SIZE \
|
||||
--steps=$OFFLINE_STEPS \
|
||||
--eval_freq=$EVAL_FREQ \
|
||||
--save_freq=$SAVE_FREQ \
|
||||
--eval.batch_size=$EVAL_BATCH_SIZE \
|
||||
--eval.n_episodes=$NUM_EPISODES \
|
||||
--policy.optimizer_lr=$LR \
|
||||
--policy.repo_id=None \
|
||||
--policy.scheduler_decay_lr=$DECAY_LR \
|
||||
--policy.scheduler_decay_steps=$DECAY_STEPS \
|
||||
--policy.n_action_steps=$N_ACTION_STEPS \
|
||||
--policy.train_expert_only=$TRAIN_EXPERT_ONLY \
|
||||
--policy.vlm_model_name=$VLM \
|
||||
--seed=$SEED \
|
||||
--wandb.enable=false
|
||||
@@ -21,8 +21,8 @@ export WANDB_CACHE_DIR=$RAID/.cache/wandb
|
||||
export TMPDIR=$RAID/.cache/tmp
|
||||
mkdir -p $TMPDIR
|
||||
export WANDB_MODE=offline
|
||||
export HF_DATASETS_OFFLINE=1
|
||||
export HF_HUB_OFFLINE=1
|
||||
# export HF_DATASETS_OFFLINE=1
|
||||
# export HF_HUB_OFFLINE=1
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
export MUJOCO_GL=egl
|
||||
|
||||
@@ -31,11 +31,11 @@ PORT=29522
|
||||
|
||||
# =================== CONFIG ===================
|
||||
ENV=libero
|
||||
TASK=libero_spatial
|
||||
TASK=libero_object
|
||||
REPO_ID=physical-intelligence/libero
|
||||
|
||||
ROOT=$RAID
|
||||
POLICY=smolvla
|
||||
VLM=HuggingFaceTB/SmolVLM2-2.2B-Instruct
|
||||
VLM=HuggingFaceTB/SmolVLM2-500M-Instruct
|
||||
|
||||
# Optim / scheduling
|
||||
LR=1e-4
|
||||
@@ -55,10 +55,10 @@ EVAL_BATCH_SIZE=1
|
||||
NUM_EPISODES=1
|
||||
|
||||
# GPU selection 0, 1, 2, 3
|
||||
export CUDA_VISIBLE_DEVICES=1
|
||||
export CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
# naming/output dir
|
||||
TRAIN_DIR=$RAID/logs/lerobot/lerobot_${REPO_ID//\//_}_${POLICY}_lr${LR}bs${BATCH_SIZE}steps${OFFLINE_STEPS}
|
||||
TRAIN_DIR=$RAID/logs/lerobot/lerobot_solo_${REPO_ID//\//_}_${POLICY}_lr${LR}bs${BATCH_SIZE}steps${OFFLINE_STEPS}
|
||||
echo "Training dir: $TRAIN_DIR"
|
||||
|
||||
# train
|
||||
@@ -68,7 +68,6 @@ python src/lerobot/scripts/train.py \
|
||||
--policy.type=$POLICY \
|
||||
--policy.vlm_model_name=$VLM \
|
||||
--dataset.repo_id=$REPO_ID \
|
||||
--dataset.root=$HF_DATASETS_CACHE \
|
||||
--env.type=$ENV \
|
||||
--env.task=$TASK \
|
||||
--output_dir=$TRAIN_DIR \
|
||||
@@ -85,6 +84,6 @@ python src/lerobot/scripts/train.py \
|
||||
--policy.scheduler_decay_steps=$DECAY_STEPS \
|
||||
--policy.n_action_steps=$N_ACTION_STEPS \
|
||||
--policy.train_expert_only=$TRAIN_EXPERT_ONLY \
|
||||
--policy.vlm_model_name=/raid/jade/.cache/huggingface/models/SmolVLM2-2.2B-Instruct \
|
||||
--policy.vlm_model_name=$VLM \
|
||||
--seed=$SEED \
|
||||
--wandb.enable=false
|
||||
|
||||
@@ -0,0 +1,141 @@
|
||||
#!/bin/bash
|
||||
# smolvla training with accelerate
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# repo/env
|
||||
cd ~/lerobot || exit 1
|
||||
# conda activate lerobot
|
||||
export LC_ALL=C
|
||||
|
||||
rm -f core-*
|
||||
|
||||
# storage / caches
|
||||
RAID=/raid/jade
|
||||
export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
|
||||
export HF_HOME=$RAID/.cache/huggingface
|
||||
export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
|
||||
export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
|
||||
export WANDB_CACHE_DIR=$RAID/.cache/wandb
|
||||
export TMPDIR=$RAID/.cache/tmp
|
||||
mkdir -p $TMPDIR
|
||||
export WANDB_MODE=offline
|
||||
# export HF_DATASETS_OFFLINE=1
|
||||
# export HF_HUB_OFFLINE=1
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
export MUJOCO_GL=egl
|
||||
|
||||
# CONFIG
|
||||
ENV=libero
|
||||
TASK=libero_spatial
|
||||
REPO_ID=HuggingfaceVLA/libero
|
||||
|
||||
POLICY=smolvla
|
||||
VLM=HuggingFaceTB/SmolVLM2-500M-Instruct
|
||||
|
||||
# Optim / scheduling
|
||||
LR=1e-4
|
||||
DECAY_LR=2.5e-6
|
||||
DECAY_STEPS=30000
|
||||
USE_AMP=true # set to true for mixed precision
|
||||
TRAIN_EXPERT_ONLY=true
|
||||
N_ACTION_STEPS=1
|
||||
SEED=1000
|
||||
LOAD_VLM_WEIGHTS=true
|
||||
# Training loop
|
||||
OFFLINE_STEPS=100000
|
||||
BATCH_SIZE=32
|
||||
EVAL_FREQ=0
|
||||
SAVE_FREQ=20000
|
||||
EVAL_BATCH_SIZE=1
|
||||
NUM_EPISODES=1
|
||||
ADD_IMAGE_TOKENS=tru
|
||||
N_OBS_STEPS=1
|
||||
ATTN_MODE=cross_attn
|
||||
EXPERT_WIDTH_MULTIPLIER=0.5
|
||||
# number of gpus to use
|
||||
NUM_PROCESSES=2
|
||||
NUM_VLM_LAYERS=0
|
||||
SELF_ATTN_EVERY_N_LAYERS=2
|
||||
CHUNK_SIZE=50
|
||||
export CUDA_VISIBLE_DEVICES=0
|
||||
PORT=29522
|
||||
PREFIX_LENGTH=0
|
||||
LOAD_VLM_WEIGHTS=true
|
||||
# naming/output dir
|
||||
TRAIN_DIR=$RAID/logs/lerobot/lerobot_new_${REPO_ID//\//_}_${POLICY}_lr${LR}bs${BATCH_SIZE}steps${OFFLINE_STEPS}
|
||||
echo "Training dir: $TRAIN_DIR"
|
||||
|
||||
rm -rf "$TRAIN_DIR"
|
||||
|
||||
# RUN
|
||||
# python -m accelerate.commands.launch \
|
||||
# --num_processes $NUM_PROCESSES \
|
||||
# --num_machines 1 \
|
||||
# --main_process_port $PORT \
|
||||
# --mixed_precision=$( [ "$USE_AMP" = true ] && echo "bf16" || echo "no" ) \
|
||||
# src/lerobot/scripts/train_accelerate.py \
|
||||
# --policy.type=$POLICY \
|
||||
# --policy.use_amp=True \
|
||||
# --policy.vlm_model_name=$VLM \
|
||||
# --dataset.repo_id=$REPO_ID \
|
||||
# --dataset.root=$HF_DATASETS_CACHE \
|
||||
# --env.type=$ENV \
|
||||
# --env.task=$TASK \
|
||||
# --output_dir=$TRAIN_DIR \
|
||||
# --batch_size=$BATCH_SIZE \
|
||||
# --steps=$OFFLINE_STEPS \
|
||||
# --eval_freq=$EVAL_FREQ \
|
||||
# --save_freq=$SAVE_FREQ \
|
||||
# --eval.batch_size=$EVAL_BATCH_SIZE \
|
||||
# --eval.n_episodes=$NUM_EPISODES \
|
||||
# --policy.optimizer_lr=$LR \
|
||||
# --policy.repo_id=None \
|
||||
# --policy.scheduler_decay_lr=$DECAY_LR \
|
||||
# --policy.scheduler_decay_steps=$DECAY_STEPS \
|
||||
# --policy.n_action_steps=$N_ACTION_STEPS \
|
||||
# --policy.train_expert_only=$TRAIN_EXPERT_ONLY \
|
||||
# --policy.vlm_model_name=$VLM \
|
||||
# --policy.n_obs_steps=$N_OBS_STEPS \
|
||||
# --policy.attention_mode=$ATTN_MODE \
|
||||
# --policy.prefix_length=$PREFIX_LENGTH \
|
||||
# --policy.num_vlm_layers=$NUM_VLM_LAYERS \
|
||||
# --policy.chunk_size=$CHUNK_SIZE \
|
||||
# --policy.expert_width_multiplier=$EXPERT_WIDTH_MULTIPLIER \
|
||||
# --policy.self_attn_every_n_layers=$SELF_ATTN_EVERY_N_LAYERS \
|
||||
# --seed=$SEED \
|
||||
# --wandb.enable=false
|
||||
|
||||
|
||||
python src/lerobot/scripts/train.py \
|
||||
--policy.type=$POLICY \
|
||||
--policy.use_amp=False \
|
||||
--policy.vlm_model_name=$VLM \
|
||||
--dataset.repo_id=$REPO_ID \
|
||||
--dataset.root='/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data' \
|
||||
--env.type=$ENV \
|
||||
--env.task=$TASK \
|
||||
--output_dir=$TRAIN_DIR \
|
||||
--batch_size=$BATCH_SIZE \
|
||||
--steps=$OFFLINE_STEPS \
|
||||
--eval_freq=$EVAL_FREQ \
|
||||
--save_freq=$SAVE_FREQ \
|
||||
--eval.batch_size=$EVAL_BATCH_SIZE \
|
||||
--eval.n_episodes=$NUM_EPISODES \
|
||||
--policy.optimizer_lr=$LR \
|
||||
--policy.repo_id=None \
|
||||
--policy.scheduler_decay_lr=$DECAY_LR \
|
||||
--policy.scheduler_decay_steps=$DECAY_STEPS \
|
||||
--policy.n_action_steps=$N_ACTION_STEPS \
|
||||
--policy.train_expert_only=$TRAIN_EXPERT_ONLY \
|
||||
--policy.vlm_model_name=$VLM \
|
||||
--policy.n_obs_steps=$N_OBS_STEPS \
|
||||
--policy.attention_mode=$ATTN_MODE \
|
||||
--policy.prefix_length=$PREFIX_LENGTH \
|
||||
--policy.num_vlm_layers=$NUM_VLM_LAYERS \
|
||||
--policy.chunk_size=$CHUNK_SIZE \
|
||||
--policy.load_vlm_weights=$LOAD_VLM_WEIGHTS \
|
||||
--policy.expert_width_multiplier=$EXPERT_WIDTH_MULTIPLIER \
|
||||
--policy.self_attn_every_n_layers=$SELF_ATTN_EVERY_N_LAYERS \
|
||||
--seed=$SEED \
|
||||
--wandb.enable=false
|
||||
@@ -0,0 +1,27 @@
|
||||
from huggingface_hub import HfApi
|
||||
api = HfApi()
|
||||
# api.upload_large_folder(
|
||||
# repo_id="HuggingFaceVLA/libero",
|
||||
# repo_type="dataset",
|
||||
# folder_path="/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero",
|
||||
# )
|
||||
api.upload_large_folder(
|
||||
repo_id="HuggingFaceVLA/metaworld_mt50",
|
||||
repo_type="dataset",
|
||||
folder_path="/raid/jade/.cache/huggingface/lerobot/metaworld_mt50",
|
||||
)
|
||||
# repo_id="HuggingFaceVLA/libero"
|
||||
# # Upload extra files
|
||||
# api.upload_file(
|
||||
# repo_id=repo_id,
|
||||
# repo_type="dataset",
|
||||
# path_or_fileobj="/raid/jade/libero_converted/README.md",
|
||||
# path_in_repo="README.md"
|
||||
# )
|
||||
|
||||
# api.upload_folder(
|
||||
# repo_id=repo_id,
|
||||
# repo_type="dataset",
|
||||
# folder_path="/raid/jade/libero_converted/meta",
|
||||
# path_in_repo="meta"
|
||||
# )
|
||||
@@ -0,0 +1,35 @@
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
# # First parquet (cached HF version)
|
||||
meta1 = pq.read_metadata("/raid/jade/.cache/huggingface/datasets/data/chunk-000/episode_000000.parquet")
|
||||
meta1 = pq.read_metadata("//raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000019.parquet")
|
||||
print("First parquet key_value_metadata:")
|
||||
print(meta1.metadata) # low-level file metadata
|
||||
# print()
|
||||
print("Second")
|
||||
# Second parquet (your converted version)
|
||||
meta2 = pq.read_metadata("//raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000019.parquet")
|
||||
print("\nSecond parquet key_value_metadata:")
|
||||
# print(meta2.metadata)
|
||||
|
||||
# from datasets import load_dataset
|
||||
# root_dir = "/raid/jade/libero_converted"
|
||||
|
||||
# # Load all parquet files under the root_dir recursively
|
||||
# ds = load_dataset("parquet", data_files=f"{root_dir}/**/*.parquet")
|
||||
|
||||
# print(ds) # prints split info
|
||||
# print(ds["train"].features) # check schema/features
|
||||
|
||||
# # Peek at one row
|
||||
# example = ds["train"][0]
|
||||
# print(example.keys())
|
||||
# print(type(example["observation.images.image"]))
|
||||
# print(type(example["observation.images.image2"]))
|
||||
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
for ep in ["episode_000019.parquet", "episode_000021.parquet", "episode_000026.parquet"]:
|
||||
path = f"/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/{ep}"
|
||||
schema = pq.read_schema(path)
|
||||
print(ep, schema.names)
|
||||
@@ -0,0 +1,253 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Convert local LeRobot datasets from v2.0 to v2.1 format.
|
||||
This script adapts the official converter to work with local datasets.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
# Add lerobot to path
|
||||
sys.path.insert(0, '/home/jade_choghari/lerobot/src')
|
||||
|
||||
from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
|
||||
from lerobot.datasets.utils import EPISODES_STATS_PATH, STATS_PATH, load_stats, write_info
|
||||
from lerobot.datasets.v21.convert_stats import check_aggregate_stats, convert_stats
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def convert_local_dataset(
|
||||
dataset_path: str,
|
||||
num_workers: int = 4,
|
||||
skip_if_converted: bool = True
|
||||
):
|
||||
"""
|
||||
Convert a local dataset from v2.0 to v2.1 format.
|
||||
|
||||
Args:
|
||||
dataset_path: Path to the local dataset directory
|
||||
num_workers: Number of workers for parallel processing
|
||||
skip_if_converted: Skip if already has episodes_stats.jsonl
|
||||
"""
|
||||
dataset_path = Path(dataset_path)
|
||||
|
||||
print(f"🔄 Converting local dataset: {dataset_path}")
|
||||
|
||||
# Check if already converted
|
||||
episodes_stats_path = dataset_path / "meta" / "episodes_stats.jsonl"
|
||||
if episodes_stats_path.exists() and skip_if_converted:
|
||||
# Check if file is empty
|
||||
file_size = episodes_stats_path.stat().st_size
|
||||
if file_size == 0:
|
||||
print(f" ⚠️ episodes_stats.jsonl is empty, will regenerate")
|
||||
else:
|
||||
# Check if file has content
|
||||
with open(episodes_stats_path, 'r') as f:
|
||||
content = f.read().strip()
|
||||
if not content:
|
||||
print(f" ⚠️ episodes_stats.jsonl has no content, will regenerate")
|
||||
else:
|
||||
print(f" ⏭️ Already has episodes_stats.jsonl, skipping")
|
||||
return True
|
||||
|
||||
try:
|
||||
# Check if this is a v2.0 dataset that needs conversion
|
||||
episodes_stats_path = dataset_path / "meta" / "episodes_stats.jsonl"
|
||||
stats_path = dataset_path / "meta" / "stats.json"
|
||||
|
||||
if not episodes_stats_path.exists() and stats_path.exists():
|
||||
print(f" 🔄 Detected v2.0 dataset, creating temporary episodes_stats.jsonl...")
|
||||
# Create empty episodes_stats.jsonl to allow loading
|
||||
episodes_stats_path.touch()
|
||||
created_temp_file = True
|
||||
else:
|
||||
created_temp_file = False
|
||||
|
||||
# Load dataset from local path with pyav video backend
|
||||
print(f" 📂 Loading dataset from local path...")
|
||||
# Use a dummy repo_id since we're loading locally
|
||||
dummy_repo_id = f"{dataset_path.parent.name}/{dataset_path.name}"
|
||||
dataset = LeRobotDataset(
|
||||
dummy_repo_id,
|
||||
root=str(dataset_path),
|
||||
# video_backend="pyav",
|
||||
# local_files_only=True
|
||||
)
|
||||
|
||||
# Remove temporary file if we created it
|
||||
if created_temp_file and episodes_stats_path.exists() and episodes_stats_path.stat().st_size == 0:
|
||||
episodes_stats_path.unlink()
|
||||
print(f" 🗑️ Removed temporary episodes_stats.jsonl")
|
||||
|
||||
# Remove existing episodes_stats if present (ensure clean conversion)
|
||||
episodes_stats_path = dataset_path / "meta" / "episodes_stats.jsonl"
|
||||
if episodes_stats_path.exists():
|
||||
episodes_stats_path.unlink()
|
||||
print(f" 🗑️ Removed existing episodes_stats.jsonl")
|
||||
|
||||
# Check if video directory exists before conversion
|
||||
videos_dir = dataset_path / "videos"
|
||||
if not videos_dir.exists():
|
||||
print(f" ⚠️ No videos directory found - will skip video statistics")
|
||||
|
||||
# Convert stats
|
||||
print(f" 📊 Computing episode statistics...")
|
||||
convert_stats(dataset, num_workers=num_workers)
|
||||
|
||||
# Load reference stats for validation if they exist
|
||||
stats_path = dataset.root / STATS_PATH
|
||||
if stats_path.exists():
|
||||
print(f" ✅ Validating against reference stats...")
|
||||
try:
|
||||
ref_stats = load_stats(dataset.root)
|
||||
check_aggregate_stats(dataset, ref_stats)
|
||||
print(f" ✅ Stats validation passed!")
|
||||
except AssertionError as e:
|
||||
print(f" ⚠️ Stats validation failed with minor differences: {e}")
|
||||
print(f" ⚠️ This is likely due to floating-point precision, continuing anyway...")
|
||||
# Check if the error is just a small numerical difference
|
||||
if "Max absolute difference:" in str(e) and "Max relative difference:" in str(e):
|
||||
print(f" ✅ Treating as acceptable numerical precision difference")
|
||||
else:
|
||||
raise e
|
||||
|
||||
# Remove old stats.json file
|
||||
print(f" 🗑️ Removing old stats.json")
|
||||
stats_path.unlink()
|
||||
else:
|
||||
print(f" ⚠️ No reference stats found, skipping validation")
|
||||
|
||||
# Update codebase version
|
||||
dataset.meta.info["codebase_version"] = CODEBASE_VERSION
|
||||
write_info(dataset.meta.info, dataset.root)
|
||||
|
||||
print(f" ✅ Successfully converted to v2.1")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Failed to convert: {e}")
|
||||
logger.exception("Conversion failed")
|
||||
return False
|
||||
|
||||
def convert_multiple_datasets(
|
||||
base_dirs: list[str],
|
||||
max_datasets: int = None,
|
||||
num_workers: int = 4
|
||||
):
|
||||
"""Convert multiple datasets from base directories."""
|
||||
|
||||
datasets_to_convert = []
|
||||
|
||||
# Scan for datasets needing conversion
|
||||
for base_dir in base_dirs:
|
||||
base_path = Path(base_dir)
|
||||
if not base_path.exists():
|
||||
print(f"⚠️ Directory not found: {base_dir}")
|
||||
continue
|
||||
|
||||
print(f"🔍 Scanning: {base_dir}")
|
||||
|
||||
# Walk through author/dataset structure
|
||||
for author_dir in sorted(base_path.iterdir()):
|
||||
if not author_dir.is_dir():
|
||||
continue
|
||||
|
||||
for dataset_dir in sorted(author_dir.iterdir()):
|
||||
if not dataset_dir.is_dir():
|
||||
continue
|
||||
|
||||
# Check if needs conversion
|
||||
episodes_stats_path = dataset_dir / "meta" / "episodes_stats.jsonl"
|
||||
info_path = dataset_dir / "meta" / "info.json"
|
||||
|
||||
needs_conversion = False
|
||||
if info_path.exists():
|
||||
if not episodes_stats_path.exists():
|
||||
needs_conversion = True
|
||||
print(f" 📝 Found (missing): {author_dir.name}/{dataset_dir.name}")
|
||||
else:
|
||||
# Check if episodes_stats file is empty
|
||||
try:
|
||||
file_size = episodes_stats_path.stat().st_size
|
||||
if file_size == 0:
|
||||
needs_conversion = True
|
||||
print(f" 📝 Found (empty): {author_dir.name}/{dataset_dir.name}")
|
||||
else:
|
||||
# Check if file has content
|
||||
with open(episodes_stats_path, 'r') as f:
|
||||
content = f.read().strip()
|
||||
if not content:
|
||||
needs_conversion = True
|
||||
print(f" 📝 Found (no content): {author_dir.name}/{dataset_dir.name}")
|
||||
except Exception as e:
|
||||
# If we can't read the file, consider it needs conversion
|
||||
needs_conversion = True
|
||||
print(f" 📝 Found (read error): {author_dir.name}/{dataset_dir.name}")
|
||||
|
||||
if needs_conversion:
|
||||
datasets_to_convert.append(dataset_dir)
|
||||
|
||||
if not datasets_to_convert:
|
||||
print("🎉 No datasets need conversion!")
|
||||
return
|
||||
|
||||
if max_datasets:
|
||||
datasets_to_convert = datasets_to_convert[:max_datasets]
|
||||
|
||||
print(f"\n🚀 Converting {len(datasets_to_convert)} datasets...")
|
||||
|
||||
successful = 0
|
||||
failed = 0
|
||||
|
||||
for i, dataset_path in enumerate(datasets_to_convert, 1):
|
||||
print(f"\n[{i}/{len(datasets_to_convert)}] {dataset_path.parent.name}/{dataset_path.name}")
|
||||
|
||||
success = convert_local_dataset(dataset_path, num_workers=num_workers)
|
||||
if success:
|
||||
successful += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
print(f"\n📊 Conversion Summary:")
|
||||
print(f" ✅ Successful: {successful}")
|
||||
print(f" ❌ Failed: {failed}")
|
||||
print(f" 📈 Success rate: {successful}/{len(datasets_to_convert)} ({100*successful/len(datasets_to_convert):.1f}%)")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Convert local LeRobot datasets to v2.1 format")
|
||||
parser.add_argument("--dataset", type=str, help="Single dataset path to convert")
|
||||
parser.add_argument("--base-dirs", nargs="+",
|
||||
default=["/fsx/dana_aubakirova/vla/community_dataset_v1"],
|
||||
help="Base directories to scan for datasets")
|
||||
parser.add_argument("--max-datasets", type=int, help="Maximum number of datasets to convert")
|
||||
parser.add_argument("--num-workers", type=int, default=4, help="Number of workers for stats computation")
|
||||
parser.add_argument("--all", action="store_true", help="Convert all datasets in base directories")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.dataset:
|
||||
# Convert single dataset
|
||||
success = convert_local_dataset(args.dataset, num_workers=args.num_workers)
|
||||
if success:
|
||||
print(f"\n🎉 Successfully converted: {args.dataset}")
|
||||
else:
|
||||
print(f"\n💥 Failed to convert: {args.dataset}")
|
||||
sys.exit(1)
|
||||
|
||||
elif args.all:
|
||||
# Convert all datasets
|
||||
convert_multiple_datasets(
|
||||
args.base_dirs,
|
||||
max_datasets=args.max_datasets,
|
||||
num_workers=args.num_workers
|
||||
)
|
||||
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,126 @@
|
||||
import os
|
||||
import pyarrow.parquet as pq
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
# Root directory of converted data
|
||||
root_dir = "/raid/jade/libero_converted"
|
||||
|
||||
# No renaming
|
||||
rename_map = {
|
||||
|
||||
}
|
||||
|
||||
# Hugging Face features metadata (constant across all files)
|
||||
HF_METADATA = {
|
||||
b"huggingface": b'{"info": {"features": {"observation.images.image": {"_type": "Image"}, "observation.images.image2": {"_type": "Image"}, "state": {"feature": {"dtype": "float32", "_type": "Value"}, "length": 8, "_type": "Sequence"}, "actions": {"feature": {"dtype": "float32", "_type": "Value"}, "length": 7, "_type": "Sequence"}, "timestamp": {"dtype": "float32", "_type": "Value"}, "frame_index": {"dtype": "int64", "_type": "Value"}, "episode_index": {"dtype": "int64", "_type": "Value"}, "index": {"dtype": "int64", "_type": "Value"}, "task_index": {"dtype": "int64", "_type": "Value"}}}}'
|
||||
}
|
||||
|
||||
def patch_parquet(parquet_path, hf_metadata):
|
||||
try:
|
||||
table = pq.read_table(parquet_path)
|
||||
|
||||
# Merge metadata
|
||||
new_meta = dict(table.schema.metadata or {})
|
||||
new_meta.update(hf_metadata)
|
||||
|
||||
# Apply metadata to table
|
||||
table = table.replace_schema_metadata(new_meta)
|
||||
|
||||
# Write safely via temp file
|
||||
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".parquet")
|
||||
os.close(tmp_fd)
|
||||
pq.write_table(table, tmp_path)
|
||||
shutil.move(tmp_path, parquet_path)
|
||||
|
||||
print(f"✅ Patched: {parquet_path}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ Failed on {parquet_path}: {e}")
|
||||
return False
|
||||
|
||||
# Walk through all chunk dirs and patch parquet files
|
||||
for dirpath, _, filenames in os.walk(root_dir):
|
||||
for fname in filenames:
|
||||
if fname.endswith(".parquet"):
|
||||
fpath = os.path.join(dirpath, fname)
|
||||
patch_parquet(fpath, HF_METADATA)#!/usr/bin/env python3
|
||||
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import pyarrow.parquet as pq
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
# Explicit list of files to patch
|
||||
FILES_TO_PATCH = [
|
||||
"/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000021.parquet",
|
||||
"/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000022.parquet",
|
||||
"/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000023.parquet",
|
||||
"/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000024.parquet",
|
||||
"/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000025.parquet",
|
||||
]
|
||||
|
||||
# Optional renaming map (fill in as needed)
|
||||
rename_map = {
|
||||
# "old_column_name": "new_column_name",
|
||||
"image": "observation.images.image",
|
||||
"image2": "observation.images.image2",
|
||||
"actions": "action",
|
||||
}
|
||||
|
||||
# Hugging Face features metadata (constant across all files)
|
||||
HF_METADATA = {
|
||||
b"huggingface": b'{"info": {"features": {'
|
||||
b'"observation.images.image": {"_type": "Image"}, '
|
||||
b'"observation.images.image2": {"_type": "Image"}, '
|
||||
b'"state": {"feature": {"dtype": "float32", "_type": "Value"}, "length": 8, "_type": "Sequence"}, '
|
||||
b'"actions": {"feature": {"dtype": "float32", "_type": "Value"}, "length": 7, "_type": "Sequence"}, '
|
||||
b'"timestamp": {"dtype": "float32", "_type": "Value"}, '
|
||||
b'"frame_index": {"dtype": "int64", "_type": "Value"}, '
|
||||
b'"episode_index": {"dtype": "int64", "_type": "Value"}, '
|
||||
b'"index": {"dtype": "int64", "_type": "Value"}, '
|
||||
b'"task_index": {"dtype": "int64", "_type": "Value"}}}}'
|
||||
}
|
||||
|
||||
def patch_parquet(parquet_path, hf_metadata, rename_map):
|
||||
try:
|
||||
# Load parquet table
|
||||
table = pq.read_table(parquet_path)
|
||||
|
||||
# If renaming is needed
|
||||
if rename_map:
|
||||
schema = table.schema
|
||||
new_names = [
|
||||
rename_map.get(name, name) for name in schema.names
|
||||
]
|
||||
table = table.rename_columns(new_names)
|
||||
|
||||
# Merge schema metadata
|
||||
new_meta = dict(table.schema.metadata or {})
|
||||
new_meta.update(hf_metadata)
|
||||
|
||||
# Replace metadata in table
|
||||
table = table.replace_schema_metadata(new_meta)
|
||||
|
||||
# Write safely via temp file
|
||||
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".parquet")
|
||||
os.close(tmp_fd)
|
||||
pq.write_table(table, tmp_path)
|
||||
|
||||
# Replace original file
|
||||
shutil.move(tmp_path, parquet_path)
|
||||
|
||||
print(f"✅ Patched: {parquet_path}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ Failed on {parquet_path}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for fpath in FILES_TO_PATCH:
|
||||
if os.path.exists(fpath):
|
||||
patch_parquet(fpath, HF_METADATA, rename_map)
|
||||
else:
|
||||
print(f"⚠️ File not found: {fpath}")
|
||||
@@ -0,0 +1,255 @@
|
||||
"""
|
||||
This script demonstrates how to evaluate a pretrained smolVLA policy on the LIBERO benchmark.
|
||||
"""
|
||||
|
||||
import collections
|
||||
import dataclasses
|
||||
import logging
|
||||
import math
|
||||
import pathlib
|
||||
|
||||
import cv2
|
||||
import draccus
|
||||
import imageio
|
||||
import numpy as np
|
||||
import torch
|
||||
from libero.libero import benchmark, get_libero_path
|
||||
from libero.libero.envs import OffScreenRenderEnv
|
||||
from tqdm import tqdm
|
||||
|
||||
from lerobot.policies.smolvla.modeling_smolvla import SmolVLAPolicy
|
||||
from lerobot.policies.pi0.modeling_pi0 import PI0Policy
|
||||
|
||||
LIBERO_DUMMY_ACTION = [0.0] * 6 + [-1.0]
|
||||
LIBERO_ENV_RESOLUTION = 256 # resolution used to render training data
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Args:
|
||||
"""
|
||||
Evaluation arguments for smolVLA on LIBERO.
|
||||
"""
|
||||
|
||||
# --- Hugging Face arguments ---
|
||||
policy_path: str = "lerobot/smolvla_base"
|
||||
"""Path to the pretrained policy on the Hugging Face Hub or local directory."""
|
||||
|
||||
# --- LIBERO environment-specific parameters ---
|
||||
task_suite_name: str = "libero_spatial"
|
||||
"""Task suite. Options: libero_spatial, libero_object, libero_goal, libero_10, libero_90"""
|
||||
num_steps_wait: int = 10
|
||||
"""Number of steps to wait for objects to stabilize in sim."""
|
||||
num_trials_per_task: int = 50
|
||||
"""Number of rollouts per task."""
|
||||
|
||||
# --- Evaluation arguments ---
|
||||
video_out_path: str = "data/libero/videos"
|
||||
"""Path to save videos."""
|
||||
device: str = "cuda"
|
||||
"""Device to use for evaluation."""
|
||||
|
||||
seed: int = 7
|
||||
"""Random Seed (for reproducibility)"""
|
||||
|
||||
|
||||
@draccus.wrap()
|
||||
def eval_libero(args: Args) -> None:
|
||||
# Set random seed
|
||||
torch.manual_seed(args.seed)
|
||||
np.random.seed(args.seed)
|
||||
|
||||
# --- Load Policy ---
|
||||
policy = SmolVLAPolicy.from_pretrained(args.policy_path)
|
||||
policy.to(args.device)
|
||||
policy.eval()
|
||||
|
||||
# --- Initialize LIBERO task suite ---
|
||||
benchmark_dict = benchmark.get_benchmark_dict()
|
||||
try:
|
||||
task_suite = benchmark_dict[args.task_suite_name]()
|
||||
except KeyError:
|
||||
raise ValueError(
|
||||
f"Unknown task suite: {args.task_suite_name}. "
|
||||
f"Available options are: {list(benchmark_dict.keys())}"
|
||||
)
|
||||
num_tasks_in_suite = task_suite.n_tasks
|
||||
logging.info(f"Task suite: {args.task_suite_name}")
|
||||
|
||||
pathlib.Path(args.video_out_path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if args.task_suite_name == "libero_spatial":
|
||||
max_steps = 220 # longest training demo has 193 steps
|
||||
elif args.task_suite_name == "libero_object":
|
||||
max_steps = 280 # longest training demo has 254 steps
|
||||
elif args.task_suite_name == "libero_goal":
|
||||
max_steps = 300 # longest training demo has 270 steps
|
||||
elif args.task_suite_name == "libero_10":
|
||||
max_steps = 520 # longest training demo has 505 steps
|
||||
elif args.task_suite_name == "libero_90":
|
||||
max_steps = 400 # longest training demo has 373 steps
|
||||
else:
|
||||
# Fallback for custom task suites
|
||||
max_steps = 520
|
||||
|
||||
# --- Evaluation Loop ---
|
||||
total_episodes, total_successes = 0, 0
|
||||
for task_id in tqdm(range(num_tasks_in_suite), desc="Tasks"):
|
||||
# Get task
|
||||
task = task_suite.get_task(task_id)
|
||||
|
||||
# Get default LIBERO initial states
|
||||
initial_states = task_suite.get_task_init_states(task_id)
|
||||
|
||||
# Initialize LIBERO environment and task description
|
||||
env, task_description = _get_libero_env(task, LIBERO_ENV_RESOLUTION, args.seed)
|
||||
|
||||
# Start episodes
|
||||
task_episodes, task_successes = 0, 0
|
||||
for episode_idx in tqdm(
|
||||
range(min(args.num_trials_per_task, len(initial_states))),
|
||||
desc=f"Task {task_id}: {task.language}",
|
||||
leave=False,
|
||||
):
|
||||
logging.info(f"\nTask: {task_description}")
|
||||
|
||||
# Reset environment and policy
|
||||
env.reset()
|
||||
policy.reset()
|
||||
|
||||
# Set initial states
|
||||
obs = env.set_init_state(initial_states[episode_idx])
|
||||
|
||||
# IMPORTANT: Do nothing for the first few timesteps because the simulator drops objects
|
||||
# and we need to wait for them to fall
|
||||
for _ in range(args.num_steps_wait):
|
||||
obs, _, _, _ = env.step(LIBERO_DUMMY_ACTION)
|
||||
|
||||
# Setup
|
||||
t = 0
|
||||
frames = []
|
||||
done = False
|
||||
|
||||
# Add initial frame
|
||||
agentview_image = np.ascontiguousarray(obs["agentview_image"][::-1, ::-1])
|
||||
# frames.append(agentview_image)
|
||||
# import ipdb; ipdb.set_trace()
|
||||
logging.info(f"Starting episode {task_episodes+1}...")
|
||||
while t < max_steps:
|
||||
try:
|
||||
# Get preprocessed image
|
||||
# IMPORTANT: rotate 180 degrees to match train preprocessing
|
||||
wrist_img = np.ascontiguousarray(obs["robot0_eye_in_hand_image"][::-1, ::-1])
|
||||
agentview_image = np.ascontiguousarray(obs["agentview_image"][::-1, ::-1])
|
||||
frames.append(agentview_image)
|
||||
|
||||
# Prepare observations dict
|
||||
state = np.concatenate(
|
||||
(
|
||||
obs["robot0_eef_pos"],
|
||||
_quat2axisangle(obs["robot0_eef_quat"]),
|
||||
obs["robot0_gripper_qpos"],
|
||||
)
|
||||
)
|
||||
observation = {
|
||||
"observation.images.image": torch.from_numpy(agentview_image / 255.0)
|
||||
.permute(2, 0, 1)
|
||||
.to(torch.float32)
|
||||
.to(args.device).unsqueeze(0),
|
||||
"observation.images.image2": torch.from_numpy(wrist_img / 255.0)
|
||||
.permute(2, 0, 1)
|
||||
.to(torch.float32)
|
||||
.to(args.device).unsqueeze(0),
|
||||
"observation.state": torch.from_numpy(state).to(torch.float32).to(args.device).unsqueeze(0),
|
||||
"task": task_description,
|
||||
}
|
||||
|
||||
# Query model to get action
|
||||
with torch.inference_mode():
|
||||
action_tensor = policy.select_action(observation)
|
||||
action = action_tensor.cpu().numpy()[0]
|
||||
action[-1] = 1 - action[-1]
|
||||
|
||||
# Execute action in environment
|
||||
obs, _, done, _ = env.step(action)
|
||||
if done:
|
||||
task_successes += 1
|
||||
total_successes += 1
|
||||
break
|
||||
t += 1
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Caught exception: {e}")
|
||||
break
|
||||
|
||||
task_episodes += 1
|
||||
total_episodes += 1
|
||||
|
||||
# Save a replay video of the episode
|
||||
suffix = "success" if done else "failure"
|
||||
task_segment = task_description.replace(" ", "_").replace("/", "_")
|
||||
video_path = (
|
||||
pathlib.Path(args.video_out_path) / f"rollout_task_{task_id}_episode_{episode_idx}_{task_segment}_{suffix}.mp4"
|
||||
)
|
||||
fps = 30
|
||||
writer = imageio.get_writer(video_path, fps=fps)
|
||||
|
||||
for image in frames:
|
||||
writer.append_data(image)
|
||||
writer.close()
|
||||
logging.info(f"Saved video to {video_path}")
|
||||
|
||||
# Log current results
|
||||
logging.info(f"Success: {done}")
|
||||
if total_episodes > 0:
|
||||
logging.info(f"# episodes completed so far: {total_episodes}")
|
||||
logging.info(f"# successes: {total_successes} ({total_successes / total_episodes * 100:.1f}%)")
|
||||
|
||||
# Log final results for the task
|
||||
if task_episodes > 0:
|
||||
logging.info(f"Task {task_id} success rate: {float(task_successes) / float(task_episodes):.2f}")
|
||||
if total_episodes > 0:
|
||||
logging.info(f"Cumulative success rate: {float(total_successes) / float(total_episodes):.2f}")
|
||||
|
||||
logging.info("--- Evaluation finished ---")
|
||||
if total_episodes > 0:
|
||||
logging.info(f"Total success rate: {float(total_successes) / float(total_episodes):.2f}")
|
||||
logging.info(f"Total episodes: {total_episodes}")
|
||||
logging.info(f"Total successes: {total_successes}")
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def _get_libero_env(task, resolution, seed):
|
||||
"""Initializes and returns the LIBERO environment, along with the task description."""
|
||||
task_description = task.language
|
||||
task_bddl_file = pathlib.Path(get_libero_path("bddl_files")) / task.problem_folder / task.bddl_file
|
||||
env_args = {
|
||||
"bddl_file_name": str(task_bddl_file),
|
||||
"camera_heights": resolution,
|
||||
"camera_widths": resolution,
|
||||
}
|
||||
env = OffScreenRenderEnv(**env_args)
|
||||
env.seed(seed) # IMPORTANT: seed seems to affect object positions even when using fixed initial state
|
||||
return env, task_description
|
||||
|
||||
|
||||
def _quat2axisangle(quat):
|
||||
"""
|
||||
Copied from robosuite:
|
||||
https://github.com/ARISE-Initiative/robosuite/blob/eafb81f54ffc104f905ee48a16bb15f059176ad3/robosuite/utils/transform_utils.py#L490C1-L512C55
|
||||
"""
|
||||
# clip quaternion
|
||||
if quat[3] > 1.0:
|
||||
quat[3] = 1.0
|
||||
elif quat[3] < -1.0:
|
||||
quat[3] = -1.0
|
||||
|
||||
den = np.sqrt(1.0 - quat[3] * quat[3])
|
||||
if math.isclose(den, 0.0):
|
||||
# This is (close to) a zero degree rotation, immediately return
|
||||
return np.zeros(3)
|
||||
|
||||
return (quat[:3] * 2.0 * math.acos(quat[3])) / den
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
eval_libero()
|
||||
@@ -0,0 +1,8 @@
|
||||
imageio[ffmpeg]
|
||||
numpy==1.22.4
|
||||
tqdm
|
||||
tyro
|
||||
PyYaml
|
||||
opencv-python==4.6.0.66
|
||||
robosuite==1.4.1
|
||||
matplotlib==3.5.3
|
||||
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import pyarrow.parquet as pq
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
FILES_TO_PATCH = [
|
||||
"/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000021.parquet",
|
||||
"/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000022.parquet",
|
||||
"/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000023.parquet",
|
||||
"/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000024.parquet",
|
||||
"/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000025.parquet",
|
||||
]
|
||||
|
||||
# Column renaming map
|
||||
rename_map = {
|
||||
"wrist_image": "observation.images.image2",
|
||||
"actions": "action",
|
||||
}
|
||||
|
||||
# Hugging Face metadata
|
||||
HF_METADATA = {
|
||||
b"huggingface": b'{"info": {"features": {'
|
||||
b'"observation.images.image": {"_type": "Image"}, '
|
||||
b'"observation.images.image2": {"_type": "Image"}, '
|
||||
b'"state": {"feature": {"dtype": "float32", "_type": "Value"}, "length": 8, "_type": "Sequence"}, '
|
||||
b'"action": {"feature": {"dtype": "float32", "_type": "Value"}, "length": 7, "_type": "Sequence"}, '
|
||||
b'"timestamp": {"dtype": "float32", "_type": "Value"}, '
|
||||
b'"frame_index": {"dtype": "int64", "_type": "Value"}, '
|
||||
b'"episode_index": {"dtype": "int64", "_type": "Value"}, '
|
||||
b'"index": {"dtype": "int64", "_type": "Value"}, '
|
||||
b'"task_index": {"dtype": "int64", "_type": "Value"}}}}'
|
||||
}
|
||||
|
||||
def patch_parquet(parquet_path, hf_metadata, rename_map):
|
||||
try:
|
||||
table = pq.read_table(parquet_path)
|
||||
|
||||
# Apply column renames if needed
|
||||
if rename_map:
|
||||
schema = table.schema
|
||||
new_names = [rename_map.get(name, name) for name in schema.names]
|
||||
table = table.rename_columns(new_names)
|
||||
|
||||
# Merge schema metadata
|
||||
new_meta = dict(table.schema.metadata or {})
|
||||
new_meta.update(hf_metadata)
|
||||
|
||||
# Replace metadata
|
||||
table = table.replace_schema_metadata(new_meta)
|
||||
|
||||
# Write via temp file
|
||||
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".parquet")
|
||||
os.close(tmp_fd)
|
||||
pq.write_table(table, tmp_path)
|
||||
|
||||
shutil.move(tmp_path, parquet_path)
|
||||
print(f"✅ Patched: {parquet_path}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ Failed on {parquet_path}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for fpath in FILES_TO_PATCH:
|
||||
if os.path.exists(fpath):
|
||||
patch_parquet(fpath, HF_METADATA, rename_map)
|
||||
else:
|
||||
print(f"⚠️ File not found: {fpath}")
|
||||
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import pyarrow.parquet as pq
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
# Root directory containing all parquet files
|
||||
ROOT_DIR = "/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data"
|
||||
|
||||
# Column renaming map (normalize schema to what training expects)
|
||||
rename_map = {
|
||||
"state": "observation.state",
|
||||
}
|
||||
|
||||
# Hugging Face metadata (aligned with expected feature names)
|
||||
HF_METADATA = {
|
||||
b"huggingface": b'{"info": {"features": {'
|
||||
b'"observation.images.image": {"_type": "Image"}, '
|
||||
b'"observation.images.image2": {"_type": "Image"}, '
|
||||
b'"observation.state": {"feature": {"dtype": "float32", "_type": "Value"}, "length": 8, "_type": "Sequence"}, '
|
||||
b'"action": {"feature": {"dtype": "float32", "_type": "Value"}, "length": 7, "_type": "Sequence"}, '
|
||||
b'"timestamp": {"dtype": "float32", "_type": "Value"}, '
|
||||
b'"frame_index": {"dtype": "int64", "_type": "Value"}, '
|
||||
b'"episode_index": {"dtype": "int64", "_type": "Value"}, '
|
||||
b'"index": {"dtype": "int64", "_type": "Value"}, '
|
||||
b'"task_index": {"dtype": "int64", "_type": "Value"}}}}'
|
||||
}
|
||||
|
||||
def patch_parquet(parquet_path, hf_metadata, rename_map):
|
||||
try:
|
||||
# Read the parquet table
|
||||
table = pq.read_table(parquet_path)
|
||||
|
||||
# Apply renames if necessary
|
||||
if rename_map:
|
||||
new_names = [rename_map.get(name, name) for name in table.schema.names]
|
||||
if new_names != table.schema.names:
|
||||
table = table.rename_columns(new_names)
|
||||
|
||||
# Update metadata
|
||||
new_meta = dict(table.schema.metadata or {})
|
||||
new_meta.update(hf_metadata)
|
||||
table = table.replace_schema_metadata(new_meta)
|
||||
|
||||
# Write to temp file then atomically move back
|
||||
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".parquet")
|
||||
os.close(tmp_fd)
|
||||
pq.write_table(table, tmp_path)
|
||||
shutil.move(tmp_path, parquet_path)
|
||||
|
||||
# Debug print
|
||||
print(f"✅ Patched: {parquet_path}")
|
||||
print(" Columns:", table.schema.names)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ Failed on {parquet_path}: {e}")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
for dirpath, _, filenames in os.walk(ROOT_DIR):
|
||||
for fname in filenames:
|
||||
if fname.endswith(".parquet"):
|
||||
fpath = os.path.join(dirpath, fname)
|
||||
patch_parquet(fpath, HF_METADATA, rename_map)
|
||||
@@ -0,0 +1,3 @@
|
||||
from huggingface_hub import HfApi
|
||||
hub_api = HfApi()
|
||||
hub_api.create_tag("HuggingFaceVLA/libero", tag="v2.1", repo_type="dataset")
|
||||
Reference in New Issue
Block a user