feat(annotate): Module 1 samples image frames at fps rate

Replace the fixed max_video_frames count with a rate (default 1 fps). A 30 s episode now sends 30 frames; a 5 s episode sends 5; capped at max_video_frames (default 128) to avoid blowing up the payload on long episodes. Override with --module_1.frames_per_second=2.0 for denser sampling, or --module_1.frames_per_second=0.5 for sparser. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 19:19:56 +00:00 · 2026-04-28 19:48:25 +02:00
parent 33053e6996
commit 3dee7bf762
2 changed files with 13 additions and 3 deletions
@@ -32,7 +32,14 @@ class Module1Config:
    """
    enabled: bool = True
-    max_video_frames: int = 32
+    frames_per_second: float = 1.0
    """Sample one image-frame per ``1/fps`` seconds across the episode for
    Module 1's subtask-decomposition prompt. ``1.0`` = 1 fps. Capped by
    ``max_video_frames`` to avoid blowing up the request payload."""
    max_video_frames: int = 128
    """Hard cap on the number of frames Module 1 sends. With ``fps=1`` and
    a 30 s episode this yields 30 frames. Bumped from 32 since each frame
    is small (~30-100 KB PNG when base64'd)."""
    min_subtask_seconds: float = 1.5
    plan_max_steps: int = 8
    use_video_url: bool = False
@@ -175,9 +175,12 @@ class PlanSubtasksMemoryModule:
                else []
            )
        else:
-            video_frames = self.frame_provider.video_for_episode(
+            target_count = max(
-                record, self.config.max_video_frames
+                1,
                int(round(episode_duration * self.config.frames_per_second)),
            )
            target_count = min(target_count, self.config.max_video_frames)
            video_frames = self.frame_provider.video_for_episode(record, target_count)
            video_block = to_video_block(video_frames)
        content = [*video_block, {"type": "text", "text": prompt}]
        messages = [{"role": "user", "content": content}]