From 6370949e5cf48e64eb8bc1811f51c17a242c66e6 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Mon, 23 Mar 2026 20:15:15 -0700 Subject: [PATCH] feat(viz): add dataset quality visualization tools Add three new analysis scripts for dataset quality insight: - create_frame_grid.py: random frame grid JPG for visual inspection - workspace_density.py: 3D TCP trajectory clustering with K-means - action_consistency.py: KNN-based action-state consistency analysis with action chunk support (default chunk=30) matching policy learning Also update create_progress_videos.py with configurable camera selection. Made-with: Cursor --- .../visualization_tools/action_consistency.py | 510 ++++++++++++++++++ .../visualization_tools/create_frame_grid.py | 178 ++++++ .../create_progress_videos.py | 2 +- .../visualization_tools/workspace_density.py | 496 +++++++++++++++++ 4 files changed, 1185 insertions(+), 1 deletion(-) create mode 100644 examples/dataset/visualization_tools/action_consistency.py create mode 100644 examples/dataset/visualization_tools/create_frame_grid.py create mode 100644 examples/dataset/visualization_tools/workspace_density.py diff --git a/examples/dataset/visualization_tools/action_consistency.py b/examples/dataset/visualization_tools/action_consistency.py new file mode 100644 index 000000000..b2be567f2 --- /dev/null +++ b/examples/dataset/visualization_tools/action_consistency.py @@ -0,0 +1,510 @@ +""" +Action-state consistency analysis for imitation learning datasets. +For each frame, finds K nearest neighbors in state space (from other episodes) +and measures the variance of corresponding actions. High variance at similar +states = contradictory supervision for the policy. + +Outputs a comparison figure with histograms, per-episode curves, and spatial +heatmaps showing where demonstrations conflict. +""" + +import json +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from huggingface_hub import snapshot_download +from matplotlib.colors import LinearSegmentedColormap +from scipy.spatial import cKDTree + +DATASETS = [ + {"repo_id": "lerobot-data-collection/level2_final_quality3", "label": "HQ curated"}, + {"repo_id": "lerobot-data-collection/level12_rac_2_2026-02-08_1", "label": "Full collection"}, +] +OUTPUT_DIR = Path("/Users/pepijnkooijmans/Documents/GitHub_local/progress_videos") +OUTPUT_DIR.mkdir(exist_ok=True) + +MAX_FRAMES = 10_000 +K_NEIGHBORS = 50 +ACTION_CHUNK_SIZE = 30 +SEED = 42 +DPI = 150 + +CONSISTENCY_CMAP = LinearSegmentedColormap.from_list( + "consistency", ["#0a2e0a", "#1a8e1a", "#88cc22", "#ffaa22", "#ff2222"] +) + +# FK chains from OpenArm bimanual URDF (same as workspace_density.py). +LEFT_CHAIN = [ + ((-np.pi / 2, 0, 0), (0, 0.031, 0.698), None), + ((0, 0, 0), (0, 0, 0.0625), (0, 0, 1)), + ((-np.pi / 2, 0, 0), (-0.0301, 0, 0.06), (-1, 0, 0)), + ((0, 0, 0), (0.0301, 0, 0.06625), (0, 0, 1)), + ((0, 0, 0), (0, 0.0315, 0.15375), (0, 1, 0)), + ((0, 0, 0), (0, -0.0315, 0.0955), (0, 0, 1)), + ((0, 0, 0), (0.0375, 0, 0.1205), (1, 0, 0)), + ((0, 0, 0), (-0.0375, 0, 0), (0, -1, 0)), + ((0, 0, 0), (0, 0, 0.1001), None), + ((0, 0, 0), (0, 0, 0.08), None), +] +RIGHT_CHAIN = [ + ((np.pi / 2, 0, 0), (0, -0.031, 0.698), None), + ((0, 0, 0), (0, 0, 0.0625), (0, 0, 1)), + ((np.pi / 2, 0, 0), (-0.0301, 0, 0.06), (-1, 0, 0)), + ((0, 0, 0), (0.0301, 0, 0.06625), (0, 0, 1)), + ((0, 0, 0), (0, 0.0315, 0.15375), (0, 1, 0)), + ((0, 0, 0), (0, -0.0315, 0.0955), (0, 0, 1)), + ((0, 0, 0), (0.0375, 0, 0.1205), (1, 0, 0)), + ((0, 0, 0), (-0.0375, 0, 0), (0, 1, 0)), + ((0, 0, 0), (0, 0, 0.1001), None), + ((0, 0, 0), (0, 0, 0.08), None), +] + + +# ── FK math ───────────────────────────────────────────── + + +def _rot_x(a: float) -> np.ndarray: + c, s = np.cos(a), np.sin(a) + return np.array([[1, 0, 0], [0, c, -s], [0, s, c]]) + + +def _rot_y(a: float) -> np.ndarray: + c, s = np.cos(a), np.sin(a) + return np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]]) + + +def _rot_z(a: float) -> np.ndarray: + c, s = np.cos(a), np.sin(a) + return np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]]) + + +def _tf(rpy: tuple, xyz: tuple) -> np.ndarray: + r, p, y = rpy + mat = np.eye(4) + mat[:3, :3] = _rot_z(y) @ _rot_y(p) @ _rot_x(r) + mat[:3, 3] = xyz + return mat + + +def _batch_axis_rot(axis: tuple, angles: np.ndarray) -> np.ndarray: + n = len(angles) + ax = np.asarray(axis, dtype=np.float64) + ax = ax / np.linalg.norm(ax) + x, y, z = ax + c = np.cos(angles) + s = np.sin(angles) + t = 1 - c + rot = np.zeros((n, 4, 4)) + rot[:, 0, 0] = t * x * x + c + rot[:, 0, 1] = t * x * y - s * z + rot[:, 0, 2] = t * x * z + s * y + rot[:, 1, 0] = t * x * y + s * z + rot[:, 1, 1] = t * y * y + c + rot[:, 1, 2] = t * y * z - s * x + rot[:, 2, 0] = t * x * z - s * y + rot[:, 2, 1] = t * y * z + s * x + rot[:, 2, 2] = t * z * z + c + rot[:, 3, 3] = 1.0 + return rot + + +def batch_fk(chain: list, joint_angles: np.ndarray) -> np.ndarray: + n = joint_angles.shape[0] + tf_batch = np.tile(np.eye(4), (n, 1, 1)) + qi = 0 + for rpy, xyz, axis in chain: + tf_batch = tf_batch @ _tf(rpy, xyz) + if axis is not None: + rot = _batch_axis_rot(axis, joint_angles[:, qi]) + tf_batch = np.einsum("nij,njk->nik", tf_batch, rot) + qi += 1 + return tf_batch[:, :3, 3] + + +# ── Data helpers ──────────────────────────────────────── + + +def _flatten_names(obj: object) -> list[str]: + if isinstance(obj, dict): + out: list[str] = [] + for v in obj.values(): + out.extend(_flatten_names(v)) + return out + if isinstance(obj, (list, tuple)): + out = [] + for item in obj: + if isinstance(item, (list, tuple, dict)): + out.extend(_flatten_names(item)) + else: + out.append(str(item)) + return out + return [str(obj)] + + +def _detect_and_convert(vals: np.ndarray) -> np.ndarray: + mx = np.max(np.abs(vals)) + if mx > 360: + print(f" Unit detection: servo ticks (max={mx:.0f})") + return (vals - 2048) / 2048 * np.pi + if mx > 6.3: + print(f" Unit detection: degrees (max={mx:.1f})") + return np.deg2rad(vals) + print(f" Unit detection: radians (max={mx:.3f})") + return vals.astype(np.float64) + + +def _find_joint_indices(features: dict, state_col: str, n_dim: int) -> tuple[list[int], list[int]]: + feat = features.get("observation.state", features.get(state_col, {})) + names = _flatten_names(feat.get("names", [])) + left_idx: list[int] = [] + right_idx: list[int] = [] + if names and len(names) == n_dim: + names_l = [n.lower() for n in names] + print(f" Feature names: {names[:4]}…{names[-4:]}") + for j in range(1, 8): + for i, nm in enumerate(names_l): + if f"left_joint_{j}" in nm and i not in left_idx: + left_idx.append(i) + break + for i, nm in enumerate(names_l): + if f"right_joint_{j}" in nm and i not in right_idx: + right_idx.append(i) + break + if len(left_idx) == 7 and len(right_idx) == 7: + print(f" Matched by name: left={left_idx} right={right_idx}") + return left_idx, right_idx + if n_dim >= 16: + print(" Falling back to positional: [0:7]=left, [8:15]=right") + return list(range(7)), list(range(8, 15)) + if n_dim >= 14: + print(" Falling back to positional: [0:7]=left, [7:14]=right") + return list(range(7)), list(range(7, 14)) + raise RuntimeError(f"State dim {n_dim} too small for bimanual 7-DOF robot") + + +def download_data(repo_id: str) -> Path: + print(f" Downloading {repo_id} (parquet only) …") + return Path( + snapshot_download( + repo_id=repo_id, + repo_type="dataset", + allow_patterns=["meta/**", "data/**"], + ignore_patterns=["*.mp4", "videos/**"], + ) + ) + + +# ── Data loading ──────────────────────────────────────── + + +def _build_action_chunks( + actions: np.ndarray, episode_ids: np.ndarray, chunk_size: int +) -> tuple[np.ndarray, np.ndarray]: + """ + Build action chunks: for each frame, concatenate the next chunk_size actions + from the same episode. Returns (action_chunks, valid_mask). + Frames too close to episode end to form a full chunk are marked invalid. + """ + n = len(actions) + act_dim = actions.shape[1] + chunks = np.zeros((n, chunk_size * act_dim), dtype=np.float64) + valid = np.zeros(n, dtype=bool) + + for i in range(n): + end = i + chunk_size + if end > n: + continue + # All frames in the chunk must belong to the same episode + if episode_ids[i] != episode_ids[end - 1]: + continue + chunks[i] = actions[i:end].ravel() + valid[i] = True + + return chunks, valid + + +def load_state_action_data(local: Path, max_frames: int, chunk_size: int, rng: np.random.Generator) -> dict: + """ + Load observation.state and action columns, build action chunks of size + chunk_size (matching what the policy learns), subsample, normalize. + """ + info = json.loads((local / "meta" / "info.json").read_text()) + features = info.get("features", {}) + + dfs = [pd.read_parquet(pq) for pq in sorted((local / "data").glob("**/*.parquet"))] + df = pd.concat(dfs, ignore_index=True) + n_total = len(df) + print(f" Total frames: {n_total:,}") + + state_col = next((c for c in df.columns if "observation.state" in c), None) + action_col = next((c for c in df.columns if c == "action"), None) + if state_col is None: + raise RuntimeError(f"No observation.state column. Available: {list(df.columns)}") + if action_col is None: + raise RuntimeError(f"No action column. Available: {list(df.columns)}") + + ep_col = next((c for c in df.columns if c == "episode_index"), None) + if ep_col is None: + raise RuntimeError(f"No episode_index column. Available: {list(df.columns)}") + + state_all = np.stack(df[state_col].values).astype(np.float64) + action_all = np.stack(df[action_col].values).astype(np.float64) + episode_all = df[ep_col].values.astype(np.int64) + + n_dim = state_all.shape[1] + act_dim = action_all.shape[1] + print(f" State dim: {n_dim} Action dim: {act_dim} Chunk size: {chunk_size}") + print(f" Action chunk dim: {chunk_size * act_dim}") + + left_idx, right_idx = _find_joint_indices(features, state_col, n_dim) + + # Build action chunks within episode boundaries + print(" Building action chunks …") + action_chunks, valid = _build_action_chunks(action_all, episode_all, chunk_size) + valid_idx = np.where(valid)[0] + print(f" Valid frames (with full action chunk): {len(valid_idx):,} / {n_total:,}") + + # Subsample from valid frames only + if len(valid_idx) > max_frames: + chosen = np.sort(rng.choice(valid_idx, max_frames, replace=False)) + else: + chosen = valid_idx + print(f" Using {len(chosen):,} frames") + + state_raw = state_all[chosen] + action_raw = action_chunks[chosen] + episode_ids = episode_all[chosen] + + # Z-score normalize for fair KNN distance + state_mean = state_raw.mean(axis=0) + state_std = state_raw.std(axis=0) + state_std[state_std < 1e-8] = 1.0 + state_norm = (state_raw - state_mean) / state_std + + action_mean = action_raw.mean(axis=0) + action_std = action_raw.std(axis=0) + action_std[action_std < 1e-8] = 1.0 + action_norm = (action_raw - action_mean) / action_std + + return { + "state_raw": state_raw, + "state_norm": state_norm, + "action_raw": action_raw, + "action_norm": action_norm, + "episode_ids": episode_ids, + "left_joint_idx": left_idx, + "right_joint_idx": right_idx, + "n_total": n_total, + } + + +# ── KNN consistency ───────────────────────────────────── + + +def compute_consistency( + state_norm: np.ndarray, + action_norm: np.ndarray, + episode_ids: np.ndarray, + k: int, +) -> np.ndarray: + """ + For each frame, find K nearest neighbors in state space from *other* episodes. + Return per-frame action variance (mean across action dims). + """ + n = len(state_norm) + print(f" Building KD-tree on {n:,} state vectors …") + tree = cKDTree(state_norm) + + # Query extra neighbors to have room after filtering same-episode + k_query = min(k * 3, n - 1) + print(f" Querying {k_query} neighbors per frame …") + dists, indices = tree.query(state_norm, k=k_query + 1) + + # indices[:, 0] is the point itself — skip it + indices = indices[:, 1:] + + print(" Computing cross-episode action variance …") + variance = np.zeros(n) + for i in range(n): + ep_i = episode_ids[i] + neighbors = indices[i] + cross_ep = neighbors[episode_ids[neighbors] != ep_i][:k] + if len(cross_ep) < 2: + variance[i] = 0.0 + continue + neighbor_actions = action_norm[cross_ep] + variance[i] = np.mean(np.var(neighbor_actions, axis=0)) + + return variance + + +# ── Visualization ─────────────────────────────────────── + + +def render(results: list[dict], out_path: Path) -> None: + n_ds = len(results) + fig, axes = plt.subplots(3, n_ds, figsize=(9 * n_ds, 18), facecolor="#0d1117") + if n_ds == 1: + axes = axes[:, np.newaxis] + + headline_parts = [] + + for col, r in enumerate(results): + variance = r["variance"] + episode_ids = r["episode_ids"] + tcp_xz = r["tcp_xz"] + label = r["label"] + + median_var = np.median(variance) + mean_var = np.mean(variance) + headline_parts.append(f"{label}: median={median_var:.3f}, mean={mean_var:.3f}") + + # Row 0: Histogram of per-frame action variance + ax = axes[0, col] + ax.set_facecolor("#0d1117") + nonzero = variance[variance > 0] + if len(nonzero) > 0: + bins = np.logspace(np.log10(nonzero.min().clip(1e-6)), np.log10(nonzero.max()), 60) + ax.hist(nonzero, bins=bins, color="#4363d8", alpha=0.8, edgecolor="#222") + ax.set_xscale("log") + ax.axvline(median_var, color="#ff6600", linewidth=2, label=f"median={median_var:.3f}") + ax.axvline(mean_var, color="#ff2222", linewidth=2, linestyle="--", label=f"mean={mean_var:.3f}") + ax.set_xlabel("Action variance (log scale)", color="#888", fontsize=10) + ax.set_ylabel("Frame count", color="#888", fontsize=10) + ax.set_title(f"{label}\nPer-frame action variance distribution", color="white", fontsize=12, pad=10) + ax.tick_params(colors="#555", labelsize=8) + for spine in ax.spines.values(): + spine.set_color("#333") + ax.legend(fontsize=9, facecolor="#1a1a2e", edgecolor="#333", labelcolor="white") + + # Row 1: Per-episode mean inconsistency curve (sorted) + ax = axes[1, col] + ax.set_facecolor("#0d1117") + unique_eps = np.unique(episode_ids) + ep_means = np.array([variance[episode_ids == ep].mean() for ep in unique_eps]) + sorted_means = np.sort(ep_means)[::-1] + ep_x = np.arange(len(sorted_means)) + + p90 = np.percentile(ep_means, 90) + above_p90 = np.sum(ep_means > p90) + + ax.fill_between(ep_x, sorted_means, alpha=0.3, color="#4363d8") + ax.plot(ep_x, sorted_means, color="#4363d8", linewidth=1.2) + ax.axhline( + np.median(ep_means), color="#ff6600", linewidth=1.5, label=f"median={np.median(ep_means):.3f}" + ) + ax.axhline( + p90, color="#ff2222", linewidth=1, linestyle=":", label=f"p90={p90:.3f} ({above_p90} eps above)" + ) + ax.set_xlabel("Episode rank (worst → best)", color="#888", fontsize=10) + ax.set_ylabel("Mean action variance", color="#888", fontsize=10) + ax.set_title( + f"{label}\nPer-episode inconsistency ({len(unique_eps):,} episodes)", + color="white", + fontsize=12, + pad=10, + ) + ax.tick_params(colors="#555", labelsize=8) + for spine in ax.spines.values(): + spine.set_color("#333") + ax.legend(fontsize=9, facecolor="#1a1a2e", edgecolor="#333", labelcolor="white") + + # Row 2: Spatial heatmap (XZ side view) colored by local action variance + ax = axes[2, col] + ax.set_facecolor("#0d1117") + order = np.argsort(variance) + pts = tcp_xz[order] + var_sorted = variance[order] + + vmin = np.percentile(variance[variance > 0], 5) if np.any(variance > 0) else 0 + vmax = np.percentile(variance[variance > 0], 95) if np.any(variance > 0) else 1 + + sc = ax.scatter( + pts[:, 0], + pts[:, 1], + c=var_sorted, + cmap=CONSISTENCY_CMAP, + s=0.5, + alpha=0.6, + vmin=vmin, + vmax=vmax, + rasterized=True, + ) + ax.set_xlabel("X (m)", color="#888", fontsize=10) + ax.set_ylabel("Z (m)", color="#888", fontsize=10) + ax.set_title( + f"{label}\nAction variance by TCP position (XZ side)", + color="white", + fontsize=12, + pad=10, + ) + ax.tick_params(colors="#555", labelsize=8) + for spine in ax.spines.values(): + spine.set_color("#333") + ax.set_aspect("equal") + cbar = fig.colorbar(sc, ax=ax, shrink=0.8, pad=0.02) + cbar.set_label("Action variance", color="white", fontsize=9) + cbar.ax.tick_params(colors="#aaa", labelsize=7) + + fig.suptitle( + f"Action-State Consistency Analysis (action chunk = {ACTION_CHUNK_SIZE})\n" + + " | ".join(headline_parts), + color="white", + fontsize=15, + y=0.99, + ) + plt.tight_layout(rect=[0, 0, 1, 0.96]) + plt.savefig(out_path, dpi=DPI, bbox_inches="tight", facecolor=fig.get_facecolor()) + plt.close() + print(f"\n✓ Saved: {out_path}") + + +# ── Main ──────────────────────────────────────────────── + + +def main() -> None: + rng = np.random.default_rng(SEED) + results = [] + + for ds in DATASETS: + repo_id, label = ds["repo_id"], ds["label"] + print(f"\n{'=' * 60}") + print(f" {label}: {repo_id}") + print(f"{'=' * 60}") + + local = download_data(repo_id) + data = load_state_action_data(local, MAX_FRAMES, ACTION_CHUNK_SIZE, rng) + + variance = compute_consistency( + data["state_norm"], data["action_norm"], data["episode_ids"], K_NEIGHBORS + ) + print( + f" Variance stats: median={np.median(variance):.4f} mean={np.mean(variance):.4f} " + f"p90={np.percentile(variance, 90):.4f}" + ) + + # Compute FK for spatial heatmap (left arm TCP, XZ projection) + print(" Computing FK for spatial heatmap …") + left_raw = data["state_raw"][:, data["left_joint_idx"]] + left_rad = _detect_and_convert(left_raw) + left_tcp = batch_fk(LEFT_CHAIN, left_rad) + tcp_xz = left_tcp[:, [0, 2]] + + results.append( + { + "label": label, + "variance": variance, + "episode_ids": data["episode_ids"], + "tcp_xz": tcp_xz, + "n_total": data["n_total"], + } + ) + + out = OUTPUT_DIR / "action_consistency_comparison.jpg" + render(results, out) + + +if __name__ == "__main__": + main() diff --git a/examples/dataset/visualization_tools/create_frame_grid.py b/examples/dataset/visualization_tools/create_frame_grid.py new file mode 100644 index 000000000..cb4c5ae44 --- /dev/null +++ b/examples/dataset/visualization_tools/create_frame_grid.py @@ -0,0 +1,178 @@ +""" +Create a JPG grid of random frames sampled from a LeRobot video dataset. +Downloads metadata + video chunks from HuggingFace, picks random frames, +decodes them, and tiles into a single image. +""" + +import json +import random +from pathlib import Path + +import cv2 +import numpy as np +import pandas as pd +from huggingface_hub import snapshot_download + +REPO_ID = "lerobot-data-collection/level2_final_quality3" +CAMERA_KEY = "observation.images.base" +GRID_COLS = 15 +GRID_ROWS = 10 +THUMB_WIDTH = 160 +OUTPUT_DIR = Path("/Users/pepijnkooijmans/Documents/GitHub_local/progress_videos") +OUTPUT_DIR.mkdir(exist_ok=True) +SEED = 1 + + +def download_metadata(repo_id: str) -> Path: + """Download only metadata (no videos yet).""" + print(f"[1/3] Downloading metadata for {repo_id} …") + return Path( + snapshot_download( + repo_id=repo_id, + repo_type="dataset", + allow_patterns=["meta/**"], + ignore_patterns=["*.mp4"], + ) + ) + + +def load_video_info(local: Path) -> tuple[str, list[dict], int]: + """Parse info.json and episode parquets. Returns (camera_key, episode_rows, fps).""" + info = json.loads((local / "meta" / "info.json").read_text()) + fps = info["fps"] + features = info["features"] + + video_keys = [k for k, v in features.items() if v.get("dtype") == "video"] + if not video_keys: + raise RuntimeError("No video keys found in dataset features") + + if CAMERA_KEY is not None: + if CAMERA_KEY not in video_keys: + raise RuntimeError(f"CAMERA_KEY='{CAMERA_KEY}' not found. Available: {video_keys}") + cam = CAMERA_KEY + else: + cam = video_keys[0] + print(f" camera='{cam}' all_cams={video_keys} fps={fps}") + + ep_rows = [] + for pq in sorted((local / "meta" / "episodes").glob("**/*.parquet")): + ep_rows.append(pd.read_parquet(pq)) + ep_df = pd.concat(ep_rows, ignore_index=True) + + video_template = info.get( + "video_path", + "videos/{video_key}/chunk-{chunk_index:03d}/file-{file_index:03d}.mp4", + ) + + chunk_col = f"videos/{cam}/chunk_index" + file_col = f"videos/{cam}/file_index" + ts_from = f"videos/{cam}/from_timestamp" + ts_to = f"videos/{cam}/to_timestamp" + if chunk_col not in ep_df.columns: + chunk_col = f"{cam}/chunk_index" + file_col = f"{cam}/file_index" + ts_from = f"{cam}/from_timestamp" + ts_to = f"{cam}/to_timestamp" + + episodes = [] + for _, row in ep_df.iterrows(): + ci = int(row[chunk_col]) + fi = int(row[file_col]) + episodes.append( + { + "episode_index": int(row["episode_index"]), + "chunk_index": ci, + "file_index": fi, + "from_ts": float(row[ts_from]), + "to_ts": float(row[ts_to]), + "video_rel": video_template.format(video_key=cam, chunk_index=ci, file_index=fi), + } + ) + return cam, episodes, fps + + +def pick_random_frames(episodes: list[dict], fps: int, n: int, rng: random.Random) -> list[dict]: + """Pick n random (episode, timestamp) pairs, return sorted by video file for efficient access.""" + picks = [] + for _ in range(n): + ep = rng.choice(episodes) + duration = ep["to_ts"] - ep["from_ts"] + if duration <= 0: + continue + t = ep["from_ts"] + rng.random() * duration + picks.append({**ep, "seek_ts": t}) + picks.sort(key=lambda p: (p["video_rel"], p["seek_ts"])) + return picks + + +def download_video_files(repo_id: str, local: Path, picks: list[dict]) -> None: + """Download only the video files we need.""" + needed = sorted({p["video_rel"] for p in picks}) + print(f"[2/3] Downloading {len(needed)} video file(s) …") + snapshot_download( + repo_id=repo_id, + repo_type="dataset", + local_dir=str(local), + allow_patterns=needed, + ) + + +def extract_frame(video_path: Path, seek_ts: float) -> np.ndarray | None: + """Decode a single frame at the given timestamp.""" + cap = cv2.VideoCapture(str(video_path)) + cap.set(cv2.CAP_PROP_POS_MSEC, seek_ts * 1000.0) + ret, frame = cap.read() + cap.release() + return frame if ret else None + + +def build_grid(frames: list[np.ndarray], cols: int, thumb_w: int) -> np.ndarray: + """Resize frames to uniform thumbnails and tile into a grid.""" + if not frames: + raise RuntimeError("No frames decoded") + + h0, w0 = frames[0].shape[:2] + thumb_h = int(thumb_w * h0 / w0) + + thumbs = [cv2.resize(f, (thumb_w, thumb_h), interpolation=cv2.INTER_AREA) for f in frames] + + rows = [] + for i in range(0, len(thumbs), cols): + row_thumbs = thumbs[i : i + cols] + while len(row_thumbs) < cols: + row_thumbs.append(np.zeros_like(row_thumbs[0])) + rows.append(np.hstack(row_thumbs)) + return np.vstack(rows) + + +def main() -> None: + rng = random.Random(SEED) + n_frames = GRID_COLS * GRID_ROWS + + local = download_metadata(REPO_ID) + cam, episodes, fps = load_video_info(local) + picks = pick_random_frames(episodes, fps, n_frames, rng) + download_video_files(REPO_ID, local, picks) + + print(f"[3/3] Decoding {n_frames} frames …") + frames: list[np.ndarray] = [] + for p in picks: + vp = local / p["video_rel"] + if not vp.exists(): + print(f" SKIP: {p['video_rel']} not found") + continue + frame = extract_frame(vp, p["seek_ts"]) + if frame is not None: + frames.append(frame) + + print(f" Decoded {len(frames)}/{n_frames} frames") + grid = build_grid(frames, GRID_COLS, THUMB_WIDTH) + + safe_name = REPO_ID.replace("/", "_") + out_path = OUTPUT_DIR / f"{safe_name}_grid_{GRID_COLS}x{GRID_ROWS}.jpg" + cv2.imwrite(str(out_path), grid, [cv2.IMWRITE_JPEG_QUALITY, 92]) + print(f"\n✓ Saved: {out_path} ({grid.shape[1]}×{grid.shape[0]})") + + +if __name__ == "__main__": + main() diff --git a/examples/dataset/visualization_tools/create_progress_videos.py b/examples/dataset/visualization_tools/create_progress_videos.py index 20e50dffc..9f673cbe6 100644 --- a/examples/dataset/visualization_tools/create_progress_videos.py +++ b/examples/dataset/visualization_tools/create_progress_videos.py @@ -14,7 +14,7 @@ import pandas as pd from huggingface_hub import snapshot_download DATASETS = [ - {"repo_id": "lerobot-data-collection/level2_final_quality3", "episode": 1100}, + {"repo_id": "lerobot-data-collection/level2_final_quality3", "episode": 250}, ] CAMERA_KEY = ( "observation.images.base" # None = auto-select first camera, or set e.g. "observation.images.top" diff --git a/examples/dataset/visualization_tools/workspace_density.py b/examples/dataset/visualization_tools/workspace_density.py new file mode 100644 index 000000000..602772a95 --- /dev/null +++ b/examples/dataset/visualization_tools/workspace_density.py @@ -0,0 +1,496 @@ +""" +Visualize end-effector workspace density and trajectory clusters for OpenArm datasets. +Downloads joint position data (no videos) from HuggingFace, computes forward +kinematics per episode, clusters trajectories with K-means, and renders +2D projections comparing dataset coverage and multimodality. +""" + +import json +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from huggingface_hub import snapshot_download +from sklearn.cluster import KMeans + +DATASETS = [ + {"repo_id": "lerobot-data-collection/level2_final_quality3", "label": "HQ curated"}, + {"repo_id": "lerobot-data-collection/level12_rac_2_2026-02-08_1", "label": "Full collection"}, +] +OUTPUT_DIR = Path("/Users/pepijnkooijmans/Documents/GitHub_local/progress_videos") +OUTPUT_DIR.mkdir(exist_ok=True) + +N_CLUSTERS = 10 +WAYPOINTS = 50 +SEED = 42 +DPI = 180 + +CLUSTER_COLORS = [ + "#e6194b", + "#3cb44b", + "#4363d8", + "#f58231", + "#911eb4", + "#42d4f4", + "#f032e6", + "#bfef45", + "#fabed4", + "#dcbeff", + "#9a6324", + "#fffac8", + "#800000", + "#aaffc3", + "#808000", + "#ffd8b1", + "#000075", + "#a9a9a9", +] + +# FK chains extracted from OpenArm bimanual URDF. +# Each entry: (rpy, xyz, revolute_axis_or_None). +LEFT_CHAIN = [ + ((-np.pi / 2, 0, 0), (0, 0.031, 0.698), None), + ((0, 0, 0), (0, 0, 0.0625), (0, 0, 1)), + ((-np.pi / 2, 0, 0), (-0.0301, 0, 0.06), (-1, 0, 0)), + ((0, 0, 0), (0.0301, 0, 0.06625), (0, 0, 1)), + ((0, 0, 0), (0, 0.0315, 0.15375), (0, 1, 0)), + ((0, 0, 0), (0, -0.0315, 0.0955), (0, 0, 1)), + ((0, 0, 0), (0.0375, 0, 0.1205), (1, 0, 0)), + ((0, 0, 0), (-0.0375, 0, 0), (0, -1, 0)), + ((0, 0, 0), (0, 0, 0.1001), None), + ((0, 0, 0), (0, 0, 0.08), None), +] +RIGHT_CHAIN = [ + ((np.pi / 2, 0, 0), (0, -0.031, 0.698), None), + ((0, 0, 0), (0, 0, 0.0625), (0, 0, 1)), + ((np.pi / 2, 0, 0), (-0.0301, 0, 0.06), (-1, 0, 0)), + ((0, 0, 0), (0.0301, 0, 0.06625), (0, 0, 1)), + ((0, 0, 0), (0, 0.0315, 0.15375), (0, 1, 0)), + ((0, 0, 0), (0, -0.0315, 0.0955), (0, 0, 1)), + ((0, 0, 0), (0.0375, 0, 0.1205), (1, 0, 0)), + ((0, 0, 0), (-0.0375, 0, 0), (0, 1, 0)), + ((0, 0, 0), (0, 0, 0.1001), None), + ((0, 0, 0), (0, 0, 0.08), None), +] + + +# ── FK math ───────────────────────────────────────────── + + +def _rot_x(a: float) -> np.ndarray: + c, s = np.cos(a), np.sin(a) + return np.array([[1, 0, 0], [0, c, -s], [0, s, c]]) + + +def _rot_y(a: float) -> np.ndarray: + c, s = np.cos(a), np.sin(a) + return np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]]) + + +def _rot_z(a: float) -> np.ndarray: + c, s = np.cos(a), np.sin(a) + return np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]]) + + +def _tf(rpy: tuple, xyz: tuple) -> np.ndarray: + """Build a 4x4 homogeneous transform from URDF rpy + xyz.""" + r, p, y = rpy + mat = np.eye(4) + mat[:3, :3] = _rot_z(y) @ _rot_y(p) @ _rot_x(r) + mat[:3, 3] = xyz + return mat + + +def _batch_axis_rot(axis: tuple, angles: np.ndarray) -> np.ndarray: + """Batched Rodrigues rotation: (n,) angles around a fixed axis → (n, 4, 4).""" + n = len(angles) + ax = np.asarray(axis, dtype=np.float64) + ax = ax / np.linalg.norm(ax) + x, y, z = ax + c = np.cos(angles) + s = np.sin(angles) + t = 1 - c + rot = np.zeros((n, 4, 4)) + rot[:, 0, 0] = t * x * x + c + rot[:, 0, 1] = t * x * y - s * z + rot[:, 0, 2] = t * x * z + s * y + rot[:, 1, 0] = t * x * y + s * z + rot[:, 1, 1] = t * y * y + c + rot[:, 1, 2] = t * y * z - s * x + rot[:, 2, 0] = t * x * z - s * y + rot[:, 2, 1] = t * y * z + s * x + rot[:, 2, 2] = t * z * z + c + rot[:, 3, 3] = 1.0 + return rot + + +def batch_fk(chain: list, joint_angles: np.ndarray) -> np.ndarray: + """Vectorized FK: (n, 7) radians → (n, 3) TCP positions in world frame.""" + n = joint_angles.shape[0] + tf_batch = np.tile(np.eye(4), (n, 1, 1)) + qi = 0 + for rpy, xyz, axis in chain: + tf_batch = tf_batch @ _tf(rpy, xyz) + if axis is not None: + rot = _batch_axis_rot(axis, joint_angles[:, qi]) + tf_batch = np.einsum("nij,njk->nik", tf_batch, rot) + qi += 1 + return tf_batch[:, :3, 3] + + +# ── Data loading ──────────────────────────────────────── + + +def _flatten_names(obj: object) -> list[str]: + """Recursively flatten a names structure (list, dict, or nested) into a flat string list.""" + if isinstance(obj, dict): + out: list[str] = [] + for v in obj.values(): + out.extend(_flatten_names(v)) + return out + if isinstance(obj, (list, tuple)): + out = [] + for item in obj: + if isinstance(item, (list, tuple, dict)): + out.extend(_flatten_names(item)) + else: + out.append(str(item)) + return out + return [str(obj)] + + +def _detect_and_convert(vals: np.ndarray) -> np.ndarray: + """Auto-detect servo ticks / degrees / radians and convert to radians.""" + mx = np.max(np.abs(vals)) + if mx > 360: + print(f" Unit detection: servo ticks (max={mx:.0f})") + return (vals - 2048) / 2048 * np.pi + if mx > 6.3: + print(f" Unit detection: degrees (max={mx:.1f})") + return np.deg2rad(vals) + print(f" Unit detection: radians (max={mx:.3f})") + return vals.astype(np.float64) + + +def _find_joint_indices(features: dict, state_col: str, n_dim: int) -> tuple[list[int], list[int]]: + """Try to find left/right joint indices from info.json feature names.""" + feat = features.get("observation.state", features.get(state_col, {})) + names = _flatten_names(feat.get("names", [])) + + left_idx: list[int] = [] + right_idx: list[int] = [] + if names and len(names) == n_dim: + names_l = [n.lower() for n in names] + print(f" Feature names: {names[:4]}…{names[-4:]}") + for j in range(1, 8): + for i, nm in enumerate(names_l): + if f"left_joint_{j}" in nm and i not in left_idx: + left_idx.append(i) + break + for i, nm in enumerate(names_l): + if f"right_joint_{j}" in nm and i not in right_idx: + right_idx.append(i) + break + + if len(left_idx) == 7 and len(right_idx) == 7: + print(f" Matched by name: left={left_idx} right={right_idx}") + return left_idx, right_idx + if n_dim >= 16: + print(" Falling back to positional: [0:7]=left, [8:15]=right") + return list(range(7)), list(range(8, 15)) + if n_dim >= 14: + print(" Falling back to positional: [0:7]=left, [7:14]=right") + return list(range(7)), list(range(7, 14)) + raise RuntimeError(f"State dim {n_dim} too small for bimanual 7-DOF robot") + + +def download_data(repo_id: str) -> Path: + print(f" Downloading {repo_id} (parquet only) …") + return Path( + snapshot_download( + repo_id=repo_id, + repo_type="dataset", + allow_patterns=["meta/**", "data/**"], + ignore_patterns=["*.mp4", "videos/**"], + ) + ) + + +def resample_trajectory(traj: np.ndarray, n_waypoints: int) -> np.ndarray: + """Resample a (F, 3) trajectory to exactly n_waypoints via linear interpolation.""" + f = traj.shape[0] + if f == n_waypoints: + return traj + old_t = np.linspace(0, 1, f) + new_t = np.linspace(0, 1, n_waypoints) + return np.column_stack([np.interp(new_t, old_t, traj[:, d]) for d in range(3)]) + + +def load_episode_trajectories(local: Path) -> list[dict]: + """ + Load per-episode joint data, compute FK, return list of trajectory dicts. + Each dict: {"left_tcp": (F,3), "right_tcp": (F,3), "episode_index": int}. + Uses all episodes in the dataset for a fair comparison. + """ + info = json.loads((local / "meta" / "info.json").read_text()) + features = info.get("features", {}) + + dfs = [pd.read_parquet(pq) for pq in sorted((local / "data").glob("**/*.parquet"))] + df = pd.concat(dfs, ignore_index=True) + print(f" Total frames: {len(df):,}") + + state_col = next((c for c in df.columns if "observation.state" in c), None) + if state_col is None: + raise RuntimeError(f"No observation.state column. Available: {list(df.columns)}") + + first = df[state_col].iloc[0] + if not hasattr(first, "__len__"): + raise RuntimeError(f"observation.state is scalar ({type(first)}), expected array") + + state = np.stack(df[state_col].values).astype(np.float64) + n_dim = state.shape[1] + print(f" State dim: {n_dim} max|val|: {np.max(np.abs(state)):.1f}") + + left_idx, right_idx = _find_joint_indices(features, state_col, n_dim) + + ep_col = next((c for c in df.columns if c == "episode_index"), None) + if ep_col is None: + raise RuntimeError(f"No episode_index column. Available: {list(df.columns)}") + + episode_ids = df[ep_col].values + unique_eps = np.unique(episode_ids) + print(f" Episodes: {len(unique_eps):,}") + + left_raw = state[:, left_idx] + right_raw = state[:, right_idx] + left_all = _detect_and_convert(left_raw) + right_all = _detect_and_convert(right_raw) + + print(" Computing FK per episode …") + trajectories = [] + for ep_id in unique_eps: + mask = episode_ids == ep_id + left_tcp = batch_fk(LEFT_CHAIN, left_all[mask]) + right_tcp = batch_fk(RIGHT_CHAIN, right_all[mask]) + if len(left_tcp) < 3: + continue + trajectories.append({"left_tcp": left_tcp, "right_tcp": right_tcp, "episode_index": int(ep_id)}) + + print(f" Valid trajectories: {len(trajectories):,}") + return trajectories + + +# ── Clustering ────────────────────────────────────────── + + +def cluster_trajectories( + trajectories: list[dict], n_clusters: int, n_waypoints: int +) -> tuple[np.ndarray, np.ndarray]: + """ + K-means on resampled trajectory features. + Combines left+right TCP into a single feature vector per episode. + Returns (labels, centroid_trajs (k, waypoints, 6), spread_per_cluster (k,) in metres). + Spread = mean per-waypoint Euclidean distance from each trajectory to its centroid. + """ + feat_vecs = [] + for t in trajectories: + left_rs = resample_trajectory(t["left_tcp"], n_waypoints) + right_rs = resample_trajectory(t["right_tcp"], n_waypoints) + feat_vecs.append(np.concatenate([left_rs.ravel(), right_rs.ravel()])) + feat_matrix = np.array(feat_vecs) + + k = min(n_clusters, len(feat_vecs)) + km = KMeans(n_clusters=k, n_init=10, random_state=SEED) + labels = km.fit_predict(feat_matrix) + + centroids_flat = km.cluster_centers_ + centroid_trajs = np.zeros((k, n_waypoints, 6)) + for ci in range(k): + left_flat = centroids_flat[ci, : n_waypoints * 3] + right_flat = centroids_flat[ci, n_waypoints * 3 :] + centroid_trajs[ci, :, :3] = left_flat.reshape(n_waypoints, 3) + centroid_trajs[ci, :, 3:] = right_flat.reshape(n_waypoints, 3) + + # Mean per-waypoint distance to centroid (in metres) for each cluster + spread = np.zeros(k) + for ci in range(k): + members = np.where(labels == ci)[0] + if len(members) == 0: + continue + centroid_left = centroid_trajs[ci, :, :3] + centroid_right = centroid_trajs[ci, :, 3:] + dists = [] + for mi in members: + t = trajectories[mi] + left_rs = resample_trajectory(t["left_tcp"], n_waypoints) + right_rs = resample_trajectory(t["right_tcp"], n_waypoints) + d_left = np.linalg.norm(left_rs - centroid_left, axis=1).mean() + d_right = np.linalg.norm(right_rs - centroid_right, axis=1).mean() + dists.append((d_left + d_right) / 2) + spread[ci] = np.mean(dists) + + return labels, centroid_trajs, spread + + +# ── Visualization ─────────────────────────────────────── + +PROJ_VIEWS = [ + ("XZ (side)", 0, 2, "X (m)", "Z (m)"), + ("XY (top)", 0, 1, "X (m)", "Y (m)"), + ("YZ (front)", 1, 2, "Y (m)", "Z (m)"), +] + + +def render(results: list[dict], out_path: Path) -> None: + """ + 2-row × 3-col grid per dataset (3 projections × 2 datasets). + Trajectory lines colored by cluster, centroid trajectories drawn thick. + """ + n_ds = len(results) + n_proj = len(PROJ_VIEWS) + fig, axes = plt.subplots(n_ds, n_proj, figsize=(7 * n_proj, 7 * n_ds), facecolor="#0d1117") + if n_ds == 1: + axes = axes[np.newaxis, :] + + for row, r in enumerate(results): + trajectories = r["trajectories"] + labels = r["labels"] + centroids = r["centroids"] + k = centroids.shape[0] + + cluster_sizes = np.bincount(labels, minlength=k) + size_order = np.argsort(-cluster_sizes) + pcts = cluster_sizes / len(labels) * 100 + spread = r["spread"] + + for col, (view_name, dim_a, dim_b, xlabel, ylabel) in enumerate(PROJ_VIEWS): + ax = axes[row, col] + ax.set_facecolor("#0d1117") + + for ti, traj in enumerate(trajectories): + color = CLUSTER_COLORS[labels[ti] % len(CLUSTER_COLORS)] + for tcp_key in ("left_tcp", "right_tcp"): + pts = traj[tcp_key] + ax.plot(pts[:, dim_a], pts[:, dim_b], color=color, alpha=0.12, linewidth=0.4) + + for ci in range(k): + color = CLUSTER_COLORS[ci % len(CLUSTER_COLORS)] + left_c = centroids[ci, :, :3] + right_c = centroids[ci, :, 3:] + lw = 1.5 + 2.0 * cluster_sizes[ci] / cluster_sizes.max() + for c_pts in (left_c, right_c): + ax.plot( + c_pts[:, dim_a], + c_pts[:, dim_b], + color=color, + linewidth=lw, + alpha=0.95, + zorder=10, + ) + ax.plot( + c_pts[0, dim_a], + c_pts[0, dim_b], + "o", + color=color, + markersize=4, + zorder=11, + ) + ax.plot( + c_pts[-1, dim_a], + c_pts[-1, dim_b], + "s", + color=color, + markersize=4, + zorder=11, + ) + + ax.set_xlabel(xlabel, color="#888", fontsize=9) + ax.set_ylabel(ylabel, color="#888", fontsize=9) + ax.tick_params(colors="#555", labelsize=7) + for spine in ax.spines.values(): + spine.set_color("#333") + ax.set_aspect("equal") + + mean_spread_cm = np.average(spread, weights=cluster_sizes) * 100 + if col == 0: + ax.set_title( + f"{r['label']} ({r['n_episodes']:,} episodes, {k} clusters, " + f"avg spread {mean_spread_cm:.1f}cm)", + color="white", + fontsize=11, + pad=10, + ) + else: + ax.set_title(view_name, color="#aaa", fontsize=10, pad=8) + + # Cluster size + spread legend on the rightmost panel + legend_ax = axes[row, -1] + for ci in size_order: + color = CLUSTER_COLORS[ci % len(CLUSTER_COLORS)] + spread_cm = spread[ci] * 100 + label = f"C{ci}: {cluster_sizes[ci]} eps ({pcts[ci]:.0f}%) ±{spread_cm:.1f}cm" + legend_ax.plot([], [], color=color, linewidth=3, label=label) + legend_ax.legend( + loc="upper right", + fontsize=7, + frameon=True, + facecolor="#1a1a2e", + edgecolor="#333", + labelcolor="white", + handlelength=1.5, + ) + + fig.suptitle( + "End-Effector Trajectory Clusters (FK · K-means)", + color="white", + fontsize=16, + y=0.98, + ) + plt.tight_layout(rect=[0, 0, 1, 0.95]) + plt.savefig(out_path, dpi=DPI, bbox_inches="tight", facecolor=fig.get_facecolor()) + plt.close() + print(f"\n✓ Saved: {out_path}") + + +# ── Main ──────────────────────────────────────────────── + + +def main() -> None: + results = [] + + for ds in DATASETS: + repo_id, label = ds["repo_id"], ds["label"] + print(f"\n{'=' * 60}") + print(f" {label}: {repo_id}") + print(f"{'=' * 60}") + + local = download_data(repo_id) + trajectories = load_episode_trajectories(local) + labels, centroids, spread = cluster_trajectories(trajectories, N_CLUSTERS, WAYPOINTS) + + cluster_sizes = np.bincount(labels, minlength=centroids.shape[0]) + print(f" Cluster sizes: {sorted(cluster_sizes, reverse=True)}") + for ci in np.argsort(-cluster_sizes): + print( + f" C{ci}: {cluster_sizes[ci]} eps ({cluster_sizes[ci] / len(labels) * 100:.0f}%) " + f"spread ±{spread[ci] * 100:.1f}cm" + ) + + results.append( + { + "label": label, + "trajectories": trajectories, + "labels": labels, + "centroids": centroids, + "spread": spread, + "n_episodes": len(trajectories), + } + ) + + out = OUTPUT_DIR / "workspace_trajectory_clusters.jpg" + render(results, out) + + +if __name__ == "__main__": + main()