add docs

2026-05-23 12:40:08 +00:00 · 2026-01-19 10:36:25 +00:00
parent fd917e4fa0
commit b864c13dfb
3 changed files with 477 additions and 0 deletions
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Example script to run synthetic data generation with Qwen VLM
+# This generates user prompts and robot utterances for hierarchical policy training
+
+# Configuration
+REPO_ID="lerobot/libero_video"
+MODEL="Qwen/Qwen3-VL-30B-A3B-Instruct"
+# or: MODEL="Qwen/Qwen2-VL-7B-Instruct"
+
+
+OUTPUT_DIR="/fsx/jade_choghari/outputs/libero-annotate"
+
+BATCH_SIZE=16
+TEMPERATURE=0.9
+SAMPLE_INTERVAL=5.0  # generate dialogue every 1 second (all episodes processed)
+
+# Run subtask annotation
+python /admin/home/jade_choghari/lerobot/src/lerobot/policies/pi05_full/annotate/subtask_annotate.py \
+    --repo-id "$REPO_ID" \
+    --video-key observation.images.image \
+    --output-dir "$OUTPUT_DIR" \
+    --output-repo-id "jadechoghari/libero-annotate" \
+    --batch-size "$BATCH_SIZE" \
+# run synthetic data generation (all episodes processed)
+# python examples/dataset/annotate_pgen.py \
+#     --repo-id "$REPO_ID" \
+#     --model "$MODEL" \
+#     --output-dir "$OUTPUT_DIR" \
+#     --temperature "$TEMPERATURE" \
+#     --batch-size "$BATCH_SIZE" \
+#     --sample-interval "$SAMPLE_INTERVAL" \
+#     --image-key observation.images.base \
+#     --num-image-views-per-sample 1
+
+# for faster testing, increase sample interval:
+# --sample-interval 5.0  # Samples every 5 seconds (much faster)
+
+# to push to hub after generation:
+# add --push-to-hub flag
+
+# efficient batch processing: 4 episodes at once
+# python examples/dataset/annotate_pgen.py \
+#     --repo-id "$REPO_ID" \
+#     --model "$MODEL" \
+#     --output-dir "$OUTPUT_DIR" \
+#     --video-mode \
+#     --video-key observation.images.up \
+#     --video-batch-size "$BATCH_SIZE" \
+#     --sample-interval 1.0