diff --git a/src/lerobot/annotations/steerable_pipeline/config.py b/src/lerobot/annotations/steerable_pipeline/config.py index 1aede15b9..b6c463ea6 100644 --- a/src/lerobot/annotations/steerable_pipeline/config.py +++ b/src/lerobot/annotations/steerable_pipeline/config.py @@ -32,7 +32,14 @@ class Module1Config: """ enabled: bool = True - max_video_frames: int = 32 + frames_per_second: float = 1.0 + """Sample one image-frame per ``1/fps`` seconds across the episode for + Module 1's subtask-decomposition prompt. ``1.0`` = 1 fps. Capped by + ``max_video_frames`` to avoid blowing up the request payload.""" + max_video_frames: int = 128 + """Hard cap on the number of frames Module 1 sends. With ``fps=1`` and + a 30 s episode this yields 30 frames. Bumped from 32 since each frame + is small (~30-100 KB PNG when base64'd).""" min_subtask_seconds: float = 1.5 plan_max_steps: int = 8 use_video_url: bool = False diff --git a/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py b/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py index dafddb70a..6c74b3134 100644 --- a/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py +++ b/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py @@ -175,9 +175,12 @@ class PlanSubtasksMemoryModule: else [] ) else: - video_frames = self.frame_provider.video_for_episode( - record, self.config.max_video_frames + target_count = max( + 1, + int(round(episode_duration * self.config.frames_per_second)), ) + target_count = min(target_count, self.config.max_video_frames) + video_frames = self.frame_provider.video_for_episode(record, target_count) video_block = to_video_block(video_frames) content = [*video_block, {"type": "text", "text": prompt}] messages = [{"role": "user", "content": content}]