mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-21 19:49:49 +00:00
precommit
This commit is contained in:
@@ -13,28 +13,28 @@ import numpy as np
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from huggingface_hub import snapshot_download
|
from huggingface_hub import snapshot_download
|
||||||
|
|
||||||
|
|
||||||
DATASETS = [
|
DATASETS = [
|
||||||
{"repo_id": "lerobot-data-collection/level2_final_quality3", "episode": 1100},
|
{"repo_id": "lerobot-data-collection/level2_final_quality3", "episode": 1100},
|
||||||
]
|
]
|
||||||
CAMERA_KEY = "observation.images.base" # None = auto-select first camera, or set e.g. "observation.images.top"
|
CAMERA_KEY = (
|
||||||
|
"observation.images.base" # None = auto-select first camera, or set e.g. "observation.images.top"
|
||||||
|
)
|
||||||
OUTPUT_DIR = Path("/Users/pepijnkooijmans/Documents/GitHub_local/progress_videos")
|
OUTPUT_DIR = Path("/Users/pepijnkooijmans/Documents/GitHub_local/progress_videos")
|
||||||
OUTPUT_DIR.mkdir(exist_ok=True)
|
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||||
|
|
||||||
# Progress line spans the full video height
|
# Progress line spans the full video height
|
||||||
GRAPH_Y_TOP_FRAC = 0.01
|
GRAPH_Y_TOP_FRAC = 0.01
|
||||||
GRAPH_Y_BOT_FRAC = 0.99
|
GRAPH_Y_BOT_FRAC = 0.99
|
||||||
LINE_THICKNESS = 3
|
LINE_THICKNESS = 3
|
||||||
SHADOW_THICKNESS = 6 # white edge thickness
|
SHADOW_THICKNESS = 6 # white edge thickness
|
||||||
REF_ALPHA = 0.45 # opacity of the 1.0 reference line
|
REF_ALPHA = 0.45 # opacity of the 1.0 reference line
|
||||||
FILL_ALPHA = 0.55 # opacity of the grey fill under the line
|
FILL_ALPHA = 0.55 # opacity of the grey fill under the line
|
||||||
SCORE_FONT_SCALE = 0.8
|
SCORE_FONT_SCALE = 0.8
|
||||||
TASK_FONT_SCALE = 0.55
|
TASK_FONT_SCALE = 0.55
|
||||||
|
|
||||||
|
|
||||||
def download_episode(repo_id: str, episode: int) -> Path:
|
def download_episode(repo_id: str, episode: int) -> Path:
|
||||||
"""Download only the files needed for this episode."""
|
"""Download only the files needed for this episode."""
|
||||||
safe_ep = f"{episode:06d}"
|
|
||||||
# We need: meta/, sarm_progress.parquet, and the relevant video/data chunks.
|
# We need: meta/, sarm_progress.parquet, and the relevant video/data chunks.
|
||||||
# We'll download meta + sarm first, then figure out chunks.
|
# We'll download meta + sarm first, then figure out chunks.
|
||||||
print(f"\n[1/5] Downloading metadata for {repo_id} …")
|
print(f"\n[1/5] Downloading metadata for {repo_id} …")
|
||||||
@@ -79,29 +79,32 @@ def load_episode_meta(local: Path, episode: int) -> dict:
|
|||||||
row = row.iloc[0]
|
row = row.iloc[0]
|
||||||
|
|
||||||
# Extract video chunk/file index for first camera
|
# Extract video chunk/file index for first camera
|
||||||
cam_key = first_cam.replace(".", "/") # some datasets store as nested key
|
|
||||||
# Try both dot and slash variants of the key
|
# Try both dot and slash variants of the key
|
||||||
chunk_col = f"videos/{first_cam}/chunk_index"
|
chunk_col = f"videos/{first_cam}/chunk_index"
|
||||||
file_col = f"videos/{first_cam}/file_index"
|
file_col = f"videos/{first_cam}/file_index"
|
||||||
ts_col = f"videos/{first_cam}/from_timestamp"
|
ts_col = f"videos/{first_cam}/from_timestamp"
|
||||||
to_col = f"videos/{first_cam}/to_timestamp"
|
to_col = f"videos/{first_cam}/to_timestamp"
|
||||||
|
|
||||||
# Some datasets use different column naming
|
# Some datasets use different column naming
|
||||||
if chunk_col not in row.index:
|
if chunk_col not in row.index:
|
||||||
# Try without the 'videos/' prefix
|
# Try without the 'videos/' prefix
|
||||||
chunk_col = f"{first_cam}/chunk_index"
|
chunk_col = f"{first_cam}/chunk_index"
|
||||||
file_col = f"{first_cam}/file_index"
|
file_col = f"{first_cam}/file_index"
|
||||||
ts_col = f"{first_cam}/from_timestamp"
|
ts_col = f"{first_cam}/from_timestamp"
|
||||||
to_col = f"{first_cam}/to_timestamp"
|
to_col = f"{first_cam}/to_timestamp"
|
||||||
if chunk_col not in row.index:
|
if chunk_col not in row.index:
|
||||||
raise RuntimeError(f"Cannot find video metadata columns for {first_cam}.\nAvailable: {list(row.index)}")
|
raise RuntimeError(
|
||||||
|
f"Cannot find video metadata columns for {first_cam}.\nAvailable: {list(row.index)}"
|
||||||
|
)
|
||||||
|
|
||||||
chunk_idx = int(row[chunk_col])
|
chunk_idx = int(row[chunk_col])
|
||||||
file_idx = int(row[file_col])
|
file_idx = int(row[file_col])
|
||||||
from_ts = float(row[ts_col])
|
from_ts = float(row[ts_col])
|
||||||
to_ts = float(row[to_col])
|
to_ts = float(row[to_col])
|
||||||
|
|
||||||
video_template = info.get("video_path", "videos/{video_key}/chunk-{chunk_index:03d}/file-{file_index:03d}.mp4")
|
video_template = info.get(
|
||||||
|
"video_path", "videos/{video_key}/chunk-{chunk_index:03d}/file-{file_index:03d}.mp4"
|
||||||
|
)
|
||||||
video_rel = video_template.format(
|
video_rel = video_template.format(
|
||||||
video_key=first_cam,
|
video_key=first_cam,
|
||||||
chunk_index=chunk_idx,
|
chunk_index=chunk_idx,
|
||||||
@@ -197,13 +200,20 @@ def extract_episode_clip(video_path: Path, from_ts: float, to_ts: float, out_pat
|
|||||||
duration = to_ts - from_ts
|
duration = to_ts - from_ts
|
||||||
print(f"[3/5] Extracting clip [{from_ts:.3f}s → {to_ts:.3f}s] ({duration:.2f}s) …")
|
print(f"[3/5] Extracting clip [{from_ts:.3f}s → {to_ts:.3f}s] ({duration:.2f}s) …")
|
||||||
cmd = [
|
cmd = [
|
||||||
"ffmpeg", "-y",
|
"ffmpeg",
|
||||||
"-ss", str(from_ts),
|
"-y",
|
||||||
"-i", str(video_path),
|
"-ss",
|
||||||
"-t", str(duration),
|
str(from_ts),
|
||||||
"-c:v", "libx264",
|
"-i",
|
||||||
"-preset", "fast",
|
str(video_path),
|
||||||
"-crf", "18",
|
"-t",
|
||||||
|
str(duration),
|
||||||
|
"-c:v",
|
||||||
|
"libx264",
|
||||||
|
"-preset",
|
||||||
|
"fast",
|
||||||
|
"-crf",
|
||||||
|
"18",
|
||||||
"-an",
|
"-an",
|
||||||
str(out_path),
|
str(out_path),
|
||||||
]
|
]
|
||||||
@@ -226,7 +236,6 @@ def precompute_pixels(
|
|||||||
"""
|
"""
|
||||||
frame_indices = progress_data[:, 0].astype(float)
|
frame_indices = progress_data[:, 0].astype(float)
|
||||||
progress_vals = np.clip(progress_data[:, 1].astype(float), 0.0, 1.0)
|
progress_vals = np.clip(progress_data[:, 1].astype(float), 0.0, 1.0)
|
||||||
n = len(frame_indices)
|
|
||||||
|
|
||||||
y_top = int(frame_h * GRAPH_Y_TOP_FRAC)
|
y_top = int(frame_h * GRAPH_Y_TOP_FRAC)
|
||||||
y_bot = int(frame_h * GRAPH_Y_BOT_FRAC)
|
y_bot = int(frame_h * GRAPH_Y_BOT_FRAC)
|
||||||
@@ -254,10 +263,13 @@ def prerender_fill(
|
|||||||
"""Pre-render the full grey fill polygon under the curve as a BGRA image."""
|
"""Pre-render the full grey fill polygon under the curve as a BGRA image."""
|
||||||
y_bot = int(frame_h * GRAPH_Y_BOT_FRAC)
|
y_bot = int(frame_h * GRAPH_Y_BOT_FRAC)
|
||||||
fill_img = np.zeros((frame_h, frame_w, 4), dtype=np.uint8)
|
fill_img = np.zeros((frame_h, frame_w, 4), dtype=np.uint8)
|
||||||
poly = np.concatenate([
|
poly = np.concatenate(
|
||||||
pixels,
|
[
|
||||||
[[pixels[-1][0], y_bot], [pixels[0][0], y_bot]],
|
pixels,
|
||||||
], axis=0).astype(np.int32)
|
[[pixels[-1][0], y_bot], [pixels[0][0], y_bot]],
|
||||||
|
],
|
||||||
|
axis=0,
|
||||||
|
).astype(np.int32)
|
||||||
cv2.fillPoly(fill_img, [poly], color=(128, 128, 128, int(255 * FILL_ALPHA)))
|
cv2.fillPoly(fill_img, [poly], color=(128, 128, 128, int(255 * FILL_ALPHA)))
|
||||||
return fill_img
|
return fill_img
|
||||||
|
|
||||||
@@ -270,9 +282,9 @@ def alpha_composite(base: np.ndarray, overlay_bgra: np.ndarray, x_max: int) -> N
|
|||||||
roi_o = overlay_bgra[:, :x_max]
|
roi_o = overlay_bgra[:, :x_max]
|
||||||
alpha = roi_o[:, :, 3:4].astype(np.float32) / 255.0
|
alpha = roi_o[:, :, 3:4].astype(np.float32) / 255.0
|
||||||
roi_b[:] = np.clip(
|
roi_b[:] = np.clip(
|
||||||
roi_o[:, :, :3].astype(np.float32) * alpha
|
roi_o[:, :, :3].astype(np.float32) * alpha + roi_b.astype(np.float32) * (1.0 - alpha),
|
||||||
+ roi_b.astype(np.float32) * (1.0 - alpha),
|
0,
|
||||||
0, 255,
|
255,
|
||||||
).astype(np.uint8)
|
).astype(np.uint8)
|
||||||
|
|
||||||
|
|
||||||
@@ -302,17 +314,14 @@ def composite_video(
|
|||||||
n_total = int(cv2.VideoCapture(str(clip_path)).get(cv2.CAP_PROP_FRAME_COUNT))
|
n_total = int(cv2.VideoCapture(str(clip_path)).get(cv2.CAP_PROP_FRAME_COUNT))
|
||||||
pixels = precompute_pixels(progress_data, n_total, frame_w, frame_h)
|
pixels = precompute_pixels(progress_data, n_total, frame_w, frame_h)
|
||||||
|
|
||||||
y_top = int(frame_h * GRAPH_Y_TOP_FRAC)
|
y_ref = int(frame_h * GRAPH_Y_TOP_FRAC)
|
||||||
y_bot = int(frame_h * GRAPH_Y_BOT_FRAC)
|
|
||||||
y_ref = y_top
|
|
||||||
|
|
||||||
# Pre-render fill polygon (line is drawn per-frame with live color)
|
# Pre-render fill polygon (line is drawn per-frame with live color)
|
||||||
fill_img = prerender_fill(pixels, frame_w, frame_h)
|
fill_img = prerender_fill(pixels, frame_w, frame_h)
|
||||||
|
|
||||||
# 1.0 reference line overlay (full width, drawn once)
|
# 1.0 reference line overlay (full width, drawn once)
|
||||||
ref_img = np.zeros((frame_h, frame_w, 4), dtype=np.uint8)
|
ref_img = np.zeros((frame_h, frame_w, 4), dtype=np.uint8)
|
||||||
cv2.line(ref_img, (0, y_ref), (frame_w - 1, y_ref),
|
cv2.line(ref_img, (0, y_ref), (frame_w - 1, y_ref), (200, 200, 200, int(255 * REF_ALPHA)), 1, cv2.LINE_AA)
|
||||||
(200, 200, 200, int(255 * REF_ALPHA)), 1, cv2.LINE_AA)
|
|
||||||
|
|
||||||
frame_indices = progress_data[:, 0].astype(int)
|
frame_indices = progress_data[:, 0].astype(int)
|
||||||
progress_vals = progress_data[:, 1].astype(float)
|
progress_vals = progress_data[:, 1].astype(float)
|
||||||
@@ -343,33 +352,52 @@ def composite_video(
|
|||||||
t_cur = (n_drawn - 1) / max(len(progress_vals) - 1, 1)
|
t_cur = (n_drawn - 1) / max(len(progress_vals) - 1, 1)
|
||||||
line_col = progress_color(t_cur)
|
line_col = progress_color(t_cur)
|
||||||
pts = pixels[:n_drawn].reshape(-1, 1, 2).astype(np.int32)
|
pts = pixels[:n_drawn].reshape(-1, 1, 2).astype(np.int32)
|
||||||
cv2.polylines(frame, [pts], isClosed=False,
|
cv2.polylines(
|
||||||
color=(255, 255, 255), thickness=SHADOW_THICKNESS,
|
frame,
|
||||||
lineType=cv2.LINE_AA)
|
[pts],
|
||||||
cv2.polylines(frame, [pts], isClosed=False,
|
isClosed=False,
|
||||||
color=line_col, thickness=LINE_THICKNESS,
|
color=(255, 255, 255),
|
||||||
lineType=cv2.LINE_AA)
|
thickness=SHADOW_THICKNESS,
|
||||||
|
lineType=cv2.LINE_AA,
|
||||||
|
)
|
||||||
|
cv2.polylines(
|
||||||
|
frame, [pts], isClosed=False, color=line_col, thickness=LINE_THICKNESS, lineType=cv2.LINE_AA
|
||||||
|
)
|
||||||
|
|
||||||
# 4. score — bottom right
|
# 4. score — bottom right
|
||||||
if n_drawn > 0:
|
if n_drawn > 0:
|
||||||
score = float(progress_vals[min(n_drawn, len(progress_vals)) - 1])
|
score = float(progress_vals[min(n_drawn, len(progress_vals)) - 1])
|
||||||
score_text = f"{score:.2f}"
|
score_text = f"{score:.2f}"
|
||||||
(tw, th), _ = cv2.getTextSize(score_text, cv2.FONT_HERSHEY_SIMPLEX,
|
(tw, th), _ = cv2.getTextSize(score_text, cv2.FONT_HERSHEY_SIMPLEX, SCORE_FONT_SCALE, 2)
|
||||||
SCORE_FONT_SCALE, 2)
|
|
||||||
sx = frame_w - tw - 12
|
sx = frame_w - tw - 12
|
||||||
sy = frame_h - 12
|
sy = frame_h - 12
|
||||||
# coloured score matching current gradient position
|
# coloured score matching current gradient position
|
||||||
t_cur = (n_drawn - 1) / max(len(progress_vals) - 1, 1)
|
t_cur = (n_drawn - 1) / max(len(progress_vals) - 1, 1)
|
||||||
score_col = progress_color(t_cur)
|
score_col = progress_color(t_cur)
|
||||||
cv2.putText(frame, score_text, (sx, sy), cv2.FONT_HERSHEY_SIMPLEX,
|
cv2.putText(
|
||||||
SCORE_FONT_SCALE, (0, 0, 0), 4, cv2.LINE_AA)
|
frame,
|
||||||
cv2.putText(frame, score_text, (sx, sy), cv2.FONT_HERSHEY_SIMPLEX,
|
score_text,
|
||||||
SCORE_FONT_SCALE, score_col, 2, cv2.LINE_AA)
|
(sx, sy),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
SCORE_FONT_SCALE,
|
||||||
|
(0, 0, 0),
|
||||||
|
4,
|
||||||
|
cv2.LINE_AA,
|
||||||
|
)
|
||||||
|
cv2.putText(
|
||||||
|
frame,
|
||||||
|
score_text,
|
||||||
|
(sx, sy),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
SCORE_FONT_SCALE,
|
||||||
|
score_col,
|
||||||
|
2,
|
||||||
|
cv2.LINE_AA,
|
||||||
|
)
|
||||||
|
|
||||||
# 5. task name — top centre
|
# 5. task name — top centre
|
||||||
if task_name:
|
if task_name:
|
||||||
(tw, _), _ = cv2.getTextSize(task_name, cv2.FONT_HERSHEY_SIMPLEX,
|
(tw, _), _ = cv2.getTextSize(task_name, cv2.FONT_HERSHEY_SIMPLEX, TASK_FONT_SCALE, 1)
|
||||||
TASK_FONT_SCALE, 1)
|
|
||||||
tx = max((frame_w - tw) // 2, 4)
|
tx = max((frame_w - tw) // 2, 4)
|
||||||
draw_text_outlined(frame, task_name, (tx, 22), TASK_FONT_SCALE)
|
draw_text_outlined(frame, task_name, (tx, 22), TASK_FONT_SCALE)
|
||||||
|
|
||||||
@@ -385,21 +413,38 @@ def composite_video(
|
|||||||
# Convert to GIF: full resolution, 12fps, 128-color diff palette (<40MB)
|
# Convert to GIF: full resolution, 12fps, 128-color diff palette (<40MB)
|
||||||
gif_path = out_path.with_suffix(".gif")
|
gif_path = out_path.with_suffix(".gif")
|
||||||
palette = out_path.parent / "_palette.png"
|
palette = out_path.parent / "_palette.png"
|
||||||
r1 = subprocess.run([
|
r1 = subprocess.run( # nosec B607
|
||||||
"ffmpeg", "-y", "-i", str(tmp_path),
|
[
|
||||||
"-vf", f"fps=10,scale={frame_w}:-1:flags=lanczos,palettegen=max_colors=128:stats_mode=diff",
|
"ffmpeg",
|
||||||
"-update", "1",
|
"-y",
|
||||||
str(palette),
|
"-i",
|
||||||
], capture_output=True, text=True)
|
str(tmp_path),
|
||||||
|
"-vf",
|
||||||
|
f"fps=10,scale={frame_w}:-1:flags=lanczos,palettegen=max_colors=128:stats_mode=diff",
|
||||||
|
"-update",
|
||||||
|
"1",
|
||||||
|
str(palette),
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
if r1.returncode != 0:
|
if r1.returncode != 0:
|
||||||
print(f" WARNING: palettegen failed:\n{r1.stderr[-500:]}")
|
print(f" WARNING: palettegen failed:\n{r1.stderr[-500:]}")
|
||||||
r2 = subprocess.run([
|
r2 = subprocess.run( # nosec B607
|
||||||
"ffmpeg", "-y",
|
[
|
||||||
"-i", str(tmp_path), "-i", str(palette),
|
"ffmpeg",
|
||||||
"-filter_complex",
|
"-y",
|
||||||
f"fps=10,scale={frame_w}:-1:flags=lanczos[v];[v][1:v]paletteuse=dither=bayer:bayer_scale=3",
|
"-i",
|
||||||
str(gif_path),
|
str(tmp_path),
|
||||||
], capture_output=True, text=True)
|
"-i",
|
||||||
|
str(palette),
|
||||||
|
"-filter_complex",
|
||||||
|
f"fps=10,scale={frame_w}:-1:flags=lanczos[v];[v][1:v]paletteuse=dither=bayer:bayer_scale=3",
|
||||||
|
str(gif_path),
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
if r2.returncode != 0:
|
if r2.returncode != 0:
|
||||||
print(f" WARNING: gif encode failed:\n{r2.stderr[-500:]}")
|
print(f" WARNING: gif encode failed:\n{r2.stderr[-500:]}")
|
||||||
tmp_path.unlink(missing_ok=True)
|
tmp_path.unlink(missing_ok=True)
|
||||||
@@ -409,9 +454,9 @@ def composite_video(
|
|||||||
|
|
||||||
def process_dataset(repo_id: str, episode: int):
|
def process_dataset(repo_id: str, episode: int):
|
||||||
safe_name = repo_id.replace("/", "_")
|
safe_name = repo_id.replace("/", "_")
|
||||||
print(f"\n{'='*60}")
|
print(f"\n{'=' * 60}")
|
||||||
print(f"Processing: {repo_id} | episode {episode}")
|
print(f"Processing: {repo_id} | episode {episode}")
|
||||||
print(f"{'='*60}")
|
print(f"{'=' * 60}")
|
||||||
|
|
||||||
# 1. Download metadata
|
# 1. Download metadata
|
||||||
local = download_episode(repo_id, episode)
|
local = download_episode(repo_id, episode)
|
||||||
@@ -448,8 +493,15 @@ def process_dataset(repo_id: str, episode: int):
|
|||||||
|
|
||||||
# 7. Composite (draw line directly on frames)
|
# 7. Composite (draw line directly on frames)
|
||||||
out_path = OUTPUT_DIR / f"{safe_name}_ep{episode}_progress.mp4"
|
out_path = OUTPUT_DIR / f"{safe_name}_ep{episode}_progress.mp4"
|
||||||
final = composite_video(clip_path, progress_data, out_path, actual_fps, frame_h, frame_w,
|
final = composite_video(
|
||||||
task_name=ep_meta.get("task_name", ""))
|
clip_path,
|
||||||
|
progress_data,
|
||||||
|
out_path,
|
||||||
|
actual_fps,
|
||||||
|
frame_h,
|
||||||
|
frame_w,
|
||||||
|
task_name=ep_meta.get("task_name", ""),
|
||||||
|
)
|
||||||
clip_path.unlink(missing_ok=True)
|
clip_path.unlink(missing_ok=True)
|
||||||
print(f"\n✓ Done: {final}")
|
print(f"\n✓ Done: {final}")
|
||||||
return final
|
return final
|
||||||
@@ -464,9 +516,11 @@ if __name__ == "__main__":
|
|||||||
results.append(out)
|
results.append(out)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"\nERROR processing {cfg['repo_id']}: {e}")
|
print(f"\nERROR processing {cfg['repo_id']}: {e}")
|
||||||
import traceback; traceback.print_exc()
|
import traceback
|
||||||
|
|
||||||
print("\n" + "="*60)
|
traceback.print_exc()
|
||||||
|
|
||||||
|
print("\n" + "=" * 60)
|
||||||
print("Output files:")
|
print("Output files:")
|
||||||
for r in results:
|
for r in results:
|
||||||
print(f" {r}")
|
print(f" {r}")
|
||||||
|
|||||||
Reference in New Issue
Block a user