diff --git a/examples/dataset_annotation/subtask_annotation.py b/examples/dataset_annotation/subtask_annotation.py
index cb9fb6350..5689b6a09 100644
--- a/examples/dataset_annotation/subtask_annotation.py
+++ b/examples/dataset_annotation/subtask_annotation.py
@@ -43,7 +43,7 @@ Usage:
 pip install transformers torch qwen-vl-utils accelerate
 
 # Annotate and push to hub:
-python subtask_annotation_local.py \\
+python subtask_annotation.py \\
   --repo-id pepijn223/mydataset \\
   --subtasks "reach,grasp,lift,place" \\
   --video-key observation.images.base \\
@@ -65,7 +65,7 @@ from rich.console import Console
 from rich.panel import Panel
 from rich.progress import Progress, SpinnerColumn, TextColumn
 from rich.tree import Tree
-from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+from transformers import Qwen3VLMoeForConditionalGeneration, AutoProcessor
 
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 
@@ -158,7 +158,7 @@ class VideoAnnotator:
         self.console.print(f"[cyan]Loading model: {model_name}...[/cyan]")
         
         # Load model and processor
-        self.model = Qwen2VLForConditionalGeneration.from_pretrained(
+        self.model = Qwen3VLMoeForConditionalGeneration.from_pretrained(
             model_name,
             torch_dtype=torch_dtype,
             device_map=device,
@@ -743,15 +743,15 @@ def main():
         epilog="""
 Examples:
   # List available cameras:
-  python subtask_annotation_local.py --repo-id pepijn223/mydataset --subtasks "reach,grasp" --max-episodes 0
+  python subtask_annotation.py --repo-id pepijn223/mydataset --subtasks "reach,grasp" --max-episodes 0
   
   # Annotate with specific camera:
-  python subtask_annotation_local.py --repo-id pepijn223/mydataset --subtasks "reach,grasp" --video-key observation.images.top --push-to-hub
+  python subtask_annotation.py --repo-id pepijn223/mydataset --subtasks "reach,grasp" --video-key observation.images.top --push-to-hub
   
-  # Use smaller model (7B instead of 30B):
-  python subtask_annotation_local.py --repo-id pepijn223/mydataset --subtasks "reach,grasp" --video-key observation.images.top --model Qwen/Qwen2-VL-7B-Instruct --push-to-hub
+  # Use custom model:
+  python subtask_annotation.py --repo-id pepijn223/mydataset --subtasks "reach,grasp" --video-key observation.images.top --model Qwen/Qwen3-VL-30B-A3B-Instruct --push-to-hub
 
-Note: The 7B model requires ~16GB VRAM. Use 2B model (~8GB VRAM) if needed.
+Note: The 30B model requires ~60GB VRAM. Make sure you have sufficient GPU memory.
 """
     )
     parser.add_argument(
@@ -783,7 +783,7 @@ Note: The 7B model requires ~16GB VRAM. Use 2B model (~8GB VRAM) if needed.
         "--model",
         type=str,
         default="Qwen/Qwen3-VL-30B-A3B-Instruct",
-        help="Model to use (default: Qwen/Qwen3-VL-30B-A3B-Instruct). Other options: Qwen/Qwen2-VL-2B-Instruct, Qwen/Qwen2-VL-7B-Instruct",
+        help="Qwen3-VL model to use (default: Qwen/Qwen3-VL-30B-A3B-Instruct)",
     )
     parser.add_argument(
         "--skip-existing",