diff --git a/src/lerobot/annotations/steerable_pipeline/frames.py b/src/lerobot/annotations/steerable_pipeline/frames.py
index 64ee14caf..5a6a5879c 100644
--- a/src/lerobot/annotations/steerable_pipeline/frames.py
+++ b/src/lerobot/annotations/steerable_pipeline/frames.py
@@ -36,7 +36,7 @@ from typing import Any, Protocol
 import PIL.Image
 import torch
 
-from lerobot.configs import VideoEncoderConfig
+from lerobot.configs import RGBEncoderConfig
 from lerobot.datasets.video_utils import decode_video_frames, reencode_video
 
 from .reader import EpisodeRecord, snap_to_frame
@@ -164,7 +164,9 @@ class VideoFrameProvider:
         # only for video-stored cameras. Image-stored cameras (also in
         # ``camera_keys``) would KeyError, so restrict the list — and the
         # default — to video keys.
-        keys = list(self._meta.video_keys)
+        # Depth cameras are excluded from the annotation pipeline for now.
+        depth_keys = set(self._meta.depth_keys)
+        keys = [key for key in self._meta.video_keys if key not in depth_keys]
         # Last-resort fallback: if metadata didn't surface any video keys but
         # the caller explicitly named a camera (``--vlm.camera_key=...``),
         # trust them — the key is by definition known to exist on the dataset.
@@ -276,7 +278,7 @@ class VideoFrameProvider:
         from_timestamp = float(ep[f"videos/{self.camera_key}/from_timestamp"])
         to_timestamp = float(ep[f"videos/{self.camera_key}/to_timestamp"])
         src = self.root / self._meta.get_video_file_path(record.episode_index, self.camera_key)
-        encoder = VideoEncoderConfig(vcodec="h264", pix_fmt="yuv420p", g=None, crf=23, preset="ultrafast")
+        encoder = RGBEncoderConfig(vcodec="h264", pix_fmt="yuv420p", g=None, crf=23, preset="ultrafast")
         try:
             reencode_video(
                 src,