From 3dee7bf762ceacb6e027040aa88cf60fe971d6e9 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Tue, 28 Apr 2026 19:48:25 +0200 Subject: [PATCH] feat(annotate): Module 1 samples image frames at fps rate Replace the fixed max_video_frames count with a rate (default 1 fps). A 30 s episode now sends 30 frames; a 5 s episode sends 5; capped at max_video_frames (default 128) to avoid blowing up the payload on long episodes. Override with --module_1.frames_per_second=2.0 for denser sampling, or --module_1.frames_per_second=0.5 for sparser. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/lerobot/annotations/steerable_pipeline/config.py | 9 ++++++++- .../steerable_pipeline/modules/plan_subtasks_memory.py | 7 +++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/lerobot/annotations/steerable_pipeline/config.py b/src/lerobot/annotations/steerable_pipeline/config.py index 1aede15b9..b6c463ea6 100644 --- a/src/lerobot/annotations/steerable_pipeline/config.py +++ b/src/lerobot/annotations/steerable_pipeline/config.py @@ -32,7 +32,14 @@ class Module1Config: """ enabled: bool = True - max_video_frames: int = 32 + frames_per_second: float = 1.0 + """Sample one image-frame per ``1/fps`` seconds across the episode for + Module 1's subtask-decomposition prompt. ``1.0`` = 1 fps. Capped by + ``max_video_frames`` to avoid blowing up the request payload.""" + max_video_frames: int = 128 + """Hard cap on the number of frames Module 1 sends. With ``fps=1`` and + a 30 s episode this yields 30 frames. Bumped from 32 since each frame + is small (~30-100 KB PNG when base64'd).""" min_subtask_seconds: float = 1.5 plan_max_steps: int = 8 use_video_url: bool = False diff --git a/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py b/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py index dafddb70a..6c74b3134 100644 --- a/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py +++ b/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py @@ -175,9 +175,12 @@ class PlanSubtasksMemoryModule: else [] ) else: - video_frames = self.frame_provider.video_for_episode( - record, self.config.max_video_frames + target_count = max( + 1, + int(round(episode_duration * self.config.frames_per_second)), ) + target_count = min(target_count, self.config.max_video_frames) + video_frames = self.frame_provider.video_for_episode(record, target_count) video_block = to_video_block(video_frames) content = [*video_block, {"type": "text", "text": prompt}] messages = [{"role": "user", "content": content}]