mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-16 00:59:46 +00:00
43 lines
1.3 KiB
Bash
Executable File
43 lines
1.3 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Example script to run synthetic data generation with Qwen VLM
|
|
# This generates user prompts and robot utterances for hierarchical policy training
|
|
|
|
# Configuration
|
|
REPO_ID="lerobot/svla_so101_pickplace"
|
|
MODEL="Qwen/Qwen3-VL-30B-A3B-Instruct"
|
|
# Alternative: MODEL="Qwen/Qwen2-VL-7B-Instruct"
|
|
|
|
|
|
OUTPUT_DIR="/fsx/jade_choghari/outputs/pgen_annotations1"
|
|
BATCH_SIZE=32
|
|
TEMPERATURE=0.9
|
|
SAMPLE_INTERVAL=5.0 # Generate dialogue every 1 second (all episodes processed)
|
|
|
|
# Run synthetic data generation (processes ALL episodes)
|
|
python examples/dataset/annotate_pgen.py \
|
|
--repo-id "$REPO_ID" \
|
|
--model "$MODEL" \
|
|
--output-dir "$OUTPUT_DIR" \
|
|
--temperature "$TEMPERATURE" \
|
|
--batch-size "$BATCH_SIZE" \
|
|
--sample-interval "$SAMPLE_INTERVAL" \
|
|
--num-image-views-per-sample 1
|
|
|
|
# For faster testing, increase sample interval:
|
|
# --sample-interval 5.0 # Samples every 5 seconds (much faster)
|
|
|
|
# To push to hub after generation:
|
|
# Add --push-to-hub flag
|
|
|
|
# Efficient batch processing: 4 episodes at once
|
|
# python examples/dataset/annotate_pgen.py \
|
|
# --repo-id "$REPO_ID" \
|
|
# --model "$MODEL" \
|
|
# --output-dir "$OUTPUT_DIR" \
|
|
# --video-mode \
|
|
# --video-key observation.images.up \
|
|
# --video-batch-size "$BATCH_SIZE" \
|
|
# --sample-interval 1.0
|
|
|