mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-20 19:19:56 +00:00
feat(annotate): Module 1 samples image frames at fps rate
Replace the fixed max_video_frames count with a rate (default 1 fps). A 30 s episode now sends 30 frames; a 5 s episode sends 5; capped at max_video_frames (default 128) to avoid blowing up the payload on long episodes. Override with --module_1.frames_per_second=2.0 for denser sampling, or --module_1.frames_per_second=0.5 for sparser. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -32,7 +32,14 @@ class Module1Config:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
enabled: bool = True
|
enabled: bool = True
|
||||||
max_video_frames: int = 32
|
frames_per_second: float = 1.0
|
||||||
|
"""Sample one image-frame per ``1/fps`` seconds across the episode for
|
||||||
|
Module 1's subtask-decomposition prompt. ``1.0`` = 1 fps. Capped by
|
||||||
|
``max_video_frames`` to avoid blowing up the request payload."""
|
||||||
|
max_video_frames: int = 128
|
||||||
|
"""Hard cap on the number of frames Module 1 sends. With ``fps=1`` and
|
||||||
|
a 30 s episode this yields 30 frames. Bumped from 32 since each frame
|
||||||
|
is small (~30-100 KB PNG when base64'd)."""
|
||||||
min_subtask_seconds: float = 1.5
|
min_subtask_seconds: float = 1.5
|
||||||
plan_max_steps: int = 8
|
plan_max_steps: int = 8
|
||||||
use_video_url: bool = False
|
use_video_url: bool = False
|
||||||
|
|||||||
@@ -175,9 +175,12 @@ class PlanSubtasksMemoryModule:
|
|||||||
else []
|
else []
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
video_frames = self.frame_provider.video_for_episode(
|
target_count = max(
|
||||||
record, self.config.max_video_frames
|
1,
|
||||||
|
int(round(episode_duration * self.config.frames_per_second)),
|
||||||
)
|
)
|
||||||
|
target_count = min(target_count, self.config.max_video_frames)
|
||||||
|
video_frames = self.frame_provider.video_for_episode(record, target_count)
|
||||||
video_block = to_video_block(video_frames)
|
video_block = to_video_block(video_frames)
|
||||||
content = [*video_block, {"type": "text", "text": prompt}]
|
content = [*video_block, {"type": "text", "text": prompt}]
|
||||||
messages = [{"role": "user", "content": content}]
|
messages = [{"role": "user", "content": content}]
|
||||||
|
|||||||
Reference in New Issue
Block a user