diff --git a/src/lerobot/data_processing/data_annotations/vlm_annotations.py b/src/lerobot/data_processing/data_annotations/vlm_annotations.py
index 237c8af51..010b26504 100644
--- a/src/lerobot/data_processing/data_annotations/vlm_annotations.py
+++ b/src/lerobot/data_processing/data_annotations/vlm_annotations.py
@@ -19,7 +19,9 @@ import re
 from abc import ABC, abstractmethod
 from pathlib import Path
 
+import cv2
 import torch
+from PIL import Image
 
 from lerobot.data_processing.data_annotations.subtask_annotations import Skill
 from lerobot.utils.constants import (
@@ -87,6 +89,21 @@ class BaseVLM(ABC):
         pass
 
 
+def _load_video_frames(video_path: Path) -> list[Image.Image]:
+    """Read all frames from a video as PIL Images, bypassing torchcodec's fps defaults."""
+    frames: list[Image.Image] = []
+    cap = cv2.VideoCapture(str(video_path))
+    try:
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frames.append(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
+    finally:
+        cap.release()
+    return frames
+
+
 def create_skill_segmentation_prompt(
     coarse_goal: str | None = None,
     subtask_labels: list[str] | None = None,
@@ -149,7 +166,7 @@ class Qwen2VL(BaseVLM):
             {
                 "role": "user",
                 "content": [
-                    {"type": "video", "video": str(video_path), "fps": 1.0},
+                    {"type": "video", "video": _load_video_frames(video_path)},
                     {
                         "type": "text",
                         "text": f"Video duration: {duration_str} (exactly {episode_duration:.1f} seconds). Segment into atomic skills. Last skill must end at {episode_duration:.1f}.",
@@ -198,7 +215,7 @@ class Qwen2VL(BaseVLM):
                 {
                     "role": "user",
                     "content": [
-                        {"type": "video", "video": str(video_path), "fps": 1.0},
+                        {"type": "video", "video": _load_video_frames(video_path)},
                         {
                             "type": "text",
                             "text": f"Video duration: {duration_str} (exactly {duration:.1f} seconds). Segment into atomic skills. Last skill must end at {duration:.1f}.",
@@ -322,7 +339,7 @@ class Qwen3VL(BaseVLM):
             {
                 "role": "user",
                 "content": [
-                    {"type": "video", "video": str(video_path), "fps": 1.0},
+                    {"type": "video", "video": _load_video_frames(video_path)},
                     {
                         "type": "text",
                         "text": f"Video duration: {duration_str} (exactly {episode_duration:.1f} seconds). Segment into atomic skills. Last skill must end at {episode_duration:.1f}.",
@@ -370,7 +387,7 @@ class Qwen3VL(BaseVLM):
                 {
                     "role": "user",
                     "content": [
-                        {"type": "video", "video": str(video_path), "fps": 1.0},
+                        {"type": "video", "video": _load_video_frames(video_path)},
                         {
                             "type": "text",
                             "text": f"Video duration: {duration_str} (exactly {duration:.1f} seconds). Segment into atomic skills. Last skill must end at {duration:.1f}.",
@@ -486,7 +503,7 @@ class Qwen35VL(BaseVLM):
             {
                 "role": "user",
                 "content": [
-                    {"type": "video", "video": str(video_path), "fps": 1.0},
+                    {"type": "video", "video": _load_video_frames(video_path)},
                     {
                         "type": "text",
                         "text": f"Video duration: {duration_str} (exactly {episode_duration:.1f} seconds). Segment into atomic skills. Last skill must end at {episode_duration:.1f}.",
@@ -534,7 +551,7 @@ class Qwen35VL(BaseVLM):
                 {
                     "role": "user",
                     "content": [
-                        {"type": "video", "video": str(video_path), "fps": 1.0},
+                        {"type": "video", "video": _load_video_frames(video_path)},
                         {
                             "type": "text",
                             "text": f"Video duration: {duration_str} (exactly {duration:.1f} seconds). Segment into atomic skills. Last skill must end at {duration:.1f}.",