diff --git a/examples/dataset/annotate.py b/examples/dataset/annotate.py
index 895145def..065873d5d 100644
--- a/examples/dataset/annotate.py
+++ b/examples/dataset/annotate.py
@@ -513,7 +513,7 @@ class Qwen3VL(BaseVLM):
                 data = json.loads(match.group())
                 skills_data = data.get("skills", [])
                 return [Skill.from_dict(s) for s in skills_data]
-
+        breakpoint()
         raise ValueError(f"Could not parse skills from response: {response[:200]}...")
 
 
@@ -1057,14 +1057,50 @@ def save_skill_annotations(
     update_frame_task_indices(dataset, annotations, skill_to_task_idx)
 
     # Step 3: Also save the raw skill annotations as JSON for reference
-    skills_data = {
-        "coarse_description": annotations[next(iter(annotations))].description,
-        "skill_to_task_index": skill_to_task_idx,
-        "episodes": {str(ep_idx): ann.to_dict() for ep_idx, ann in annotations.items()},
-    }
-
     skills_path = output_path or (dataset.root / "meta" / "skills.json")
     skills_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    # Load existing skills data if it exists and is not empty
+    existing_skills_data = None
+    if skills_path.exists():
+        try:
+            with open(skills_path, "r") as f:
+                existing_skills_data = json.load(f)
+                if existing_skills_data and len(existing_skills_data.get("episodes", {})) > 0:
+                    console.print(f"[cyan]Found existing skills.json with {len(existing_skills_data.get('episodes', {}))} episodes, merging...[/cyan]")
+        except (json.JSONDecodeError, IOError):
+            console.print("[yellow]Warning: Could not load existing skills.json, will create new file[/yellow]")
+            existing_skills_data = None
+    
+    # Prepare new annotations
+    new_episodes = {str(ep_idx): ann.to_dict() for ep_idx, ann in annotations.items()}
+    
+    # Merge with existing data if available
+    if existing_skills_data:
+        # Preserve existing episodes that are not being updated
+        merged_episodes = existing_skills_data.get("episodes", {}).copy()
+        merged_episodes.update(new_episodes)
+        
+        # Merge skill_to_task_index mappings
+        merged_skill_to_task = existing_skills_data.get("skill_to_task_index", {}).copy()
+        merged_skill_to_task.update(skill_to_task_idx)
+        
+        # Use existing coarse_description if available, otherwise use new one
+        coarse_desc = existing_skills_data.get("coarse_description", annotations[next(iter(annotations))].description)
+        
+        skills_data = {
+            "coarse_description": coarse_desc,
+            "skill_to_task_index": merged_skill_to_task,
+            "episodes": merged_episodes,
+        }
+        console.print(f"[cyan]Updated {len(new_episodes)} episode(s), total episodes in skills.json: {len(merged_episodes)}[/cyan]")
+    else:
+        # No existing data, create new
+        skills_data = {
+            "coarse_description": annotations[next(iter(annotations))].description,
+            "skill_to_task_index": skill_to_task_idx,
+            "episodes": new_episodes,
+        }
 
     with open(skills_path, "w") as f:
         json.dump(skills_data, f, indent=2)
diff --git a/examples/dataset/run.sh b/examples/dataset/run.sh
index d2f92fe7d..164cda5ae 100644
--- a/examples/dataset/run.sh
+++ b/examples/dataset/run.sh
@@ -1,5 +1,10 @@
+# python examples/dataset/annotate.py \
+#     --repo-id lerobot/svla_so101_pickplace \
+#     --video-key observation.images.side \
+#     --model Qwen/Qwen3-VL-30B-A3B-Instruct \
+
 python examples/dataset/annotate.py \
     --repo-id lerobot/svla_so101_pickplace \
     --video-key observation.images.side \
     --model Qwen/Qwen3-VL-30B-A3B-Instruct \
- 
\ No newline at end of file
+    --episodes 3 5 7 44
\ No newline at end of file
diff --git a/examples/dataset/run_pgen.sh b/examples/dataset/run_pgen.sh
index e64f03205..7cb86dd76 100755
--- a/examples/dataset/run_pgen.sh
+++ b/examples/dataset/run_pgen.sh
@@ -10,9 +10,9 @@ MODEL="Qwen/Qwen3-VL-30B-A3B-Instruct"
 
 
 OUTPUT_DIR="/fsx/jade_choghari/outputs/pgen_annotations"
-BATCH_SIZE=1
-TEMPERATURE=0.7
-SAMPLE_INTERVAL=1.0  # Generate dialogue every 1 second (all episodes processed)
+BATCH_SIZE=32
+TEMPERATURE=0.9
+SAMPLE_INTERVAL=5.0  # Generate dialogue every 1 second (all episodes processed)
 
 # Run synthetic data generation (processes ALL episodes)
 python examples/dataset/annotate_pgen.py \