#!/bin/bash

# Quick test to verify the fix for task_indices length mismatch
# This should now work correctly even with --num-samples < full dataset length

echo "Testing annotate_pgen.py with --num-samples=100 on full dataset..."

python examples/dataset/annotate_pgen.py \
    --data-dir /fsx/jade_choghari/.cache/huggingface/lerobot/lerobot/svla_so101_pickplace \
    --model Qwen/Qwen3-VL-30B-A3B-Instruct \
    --num-samples 100 \
    --sample-interval 1.0 \
    --output-dir /fsx/jade_choghari/outputs/pgen_test_fixed

if [ $? -eq 0 ]; then
    echo "✓ SUCCESS: Script completed without errors!"
    echo ""
    echo "Verifying output..."
    
    # Check that all frames have task_index_high_level
    python -c "
from lerobot.datasets.lerobot_dataset import LeRobotDataset
import numpy as np

ds = LeRobotDataset(repo_id='local_test', root='/fsx/jade_choghari/outputs/pgen_test_fixed')
print(f'Dataset has {len(ds)} frames')
print(f'Features: {list(ds.features.keys())}')

# Check that task_index_high_level exists
assert 'task_index_high_level' in ds.features, 'task_index_high_level not in features!'

# Sample some frames
for idx in [0, 50, 99, 100, 500, 1000, 11938]:
    if idx < len(ds):
        frame = ds[idx]
        task_idx = frame['task_index_high_level'].item()
        print(f'Frame {idx}: task_index_high_level = {task_idx}')

print('✓ All checks passed!')
"
else
    echo "✗ FAILED: Script exited with error code $?"
fi