Files
lerobot/scripts/parity_robometer.py
T
2026-05-17 14:59:23 +02:00

233 lines
8.5 KiB
Python

#!/usr/bin/env python
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
"""Functional parity check: LeRobot Robometer vs. upstream Robometer.
Runs the in-tree :class:`RobometerRewardModel` on the same frames + task that
upstream Robometer was run on, and compares per-frame progress / success
predictions against reference outputs saved by upstream's
``scripts/example_inference_local.py``.
Workflow:
1. In the upstream Robometer environment (where ``robometer`` is importable),
run::
python third_party/robometer/scripts/example_inference_local.py \\
--model-path robometer/Robometer-4B \\
--video /path/to/episode.mp4 \\
--task "Open the drawer" \\
--fps 1.0 \\
--out /tmp/robometer_upstream.npy
This produces:
- ``/tmp/robometer_upstream.npy`` (progress predictions)
- ``/tmp/robometer_upstream_success_probs.npy`` (success probabilities)
2. Extract the exact same frames the upstream script used, save as ``.npz``::
# quick helper: extract frames at the same fps and save as .npz
python -c "
from third_party.robometer.scripts.example_inference_local import load_frames_input
import numpy as np
frames = load_frames_input('/path/to/episode.mp4', fps=1.0, max_frames=512)
np.savez('/tmp/robometer_frames.npz', frames=frames)
"
3. In this LeRobot env, run this script::
uv run python scripts/parity_robometer.py \\
--frames /tmp/robometer_frames.npz \\
--task "Open the drawer" \\
--upstream-progress /tmp/robometer_upstream.npy \\
--upstream-success /tmp/robometer_upstream_success_probs.npy \\
--lerobot-model lilkm/robometer-4b
"""
from __future__ import annotations
import argparse
import sys
import numpy as np
import torch
from lerobot.configs.rewards import RewardModelConfig
from lerobot.rewards.robometer import RobometerConfig, RobometerRewardModel
from lerobot.rewards.robometer.modeling_robometer import decode_progress_outputs
from lerobot.rewards.robometer.processor_robometer import RobometerEncoderProcessorStep
def _load_frames(path: str) -> np.ndarray:
"""Load frames from .npy/.npz. Expects (T, H, W, C) uint8."""
if path.endswith(".npy"):
frames = np.load(path)
elif path.endswith(".npz"):
with np.load(path, allow_pickle=False) as npz:
frames = npz["frames"].copy() if "frames" in npz else next(iter(npz.values())).copy()
else:
raise ValueError(f"Frames must be .npy or .npz (got {path!r}).")
if frames.dtype != np.uint8:
frames = np.clip(frames, 0, 255).astype(np.uint8)
if frames.ndim != 4:
raise ValueError(f"Frames must be 4D (T,H,W,C); got shape {frames.shape}.")
if frames.shape[-1] not in (1, 3):
# Probably (T,C,H,W) — transpose
if frames.shape[1] in (1, 3):
frames = frames.transpose(0, 2, 3, 1)
else:
raise ValueError(f"Cannot interpret frame channel layout: {frames.shape}.")
return frames
def _run_lerobot(
frames: np.ndarray,
task: str,
model_path: str,
device: str,
) -> tuple[np.ndarray, np.ndarray]:
"""Run LeRobot's Robometer on the given frames; return (progress, success)."""
cfg = RobometerConfig(pretrained_path=model_path, device=device, max_frames=None)
model = RobometerRewardModel.from_pretrained(model_path, config=cfg)
encoder = RobometerEncoderProcessorStep(
base_model_id=model.config.base_model_id,
use_multi_image=model.config.use_multi_image,
use_per_frame_progress_token=model.config.use_per_frame_progress_token,
max_frames=None,
)
batch = encoder.encode_samples([(frames, task)])
model_device = next(model.model.parameters()).device
inputs = {key: value.to(model_device) if hasattr(value, "to") else value for key, value in batch.items()}
model.eval()
with torch.no_grad():
progress_logits, success_logits = model._compute_rbm_logits(inputs)
decoded = decode_progress_outputs(
progress_logits,
success_logits,
is_discrete_mode=model.config.use_discrete_progress,
)
progress = np.asarray(decoded["progress_pred"][0], dtype=np.float32)
success = (
np.asarray(decoded["success_probs"][0], dtype=np.float32)
if decoded["success_probs"]
else np.array([], dtype=np.float32)
)
return progress, success
def _compare(name: str, lerobot: np.ndarray, upstream: np.ndarray, atol: float, rtol: float) -> bool:
print(f"\n=== {name} ===")
if lerobot.shape != upstream.shape:
print(f"shape mismatch: lerobot={lerobot.shape} upstream={upstream.shape}")
return False
abs_diff = np.abs(lerobot - upstream)
rel_diff = abs_diff / (np.abs(upstream) + 1e-12)
print(f"shape : {lerobot.shape}")
print(f"max |Δ| : {abs_diff.max():.3e}")
print(f"mean |Δ| : {abs_diff.mean():.3e}")
print(f"max rel |Δ| : {rel_diff.max():.3e}")
print(f"lerobot[:5] : {lerobot[:5]}")
print(f"upstream[:5] : {upstream[:5]}")
within_tol = bool(np.allclose(lerobot, upstream, atol=atol, rtol=rtol))
print(f"allclose(atol={atol}, rtol={rtol}) -> {within_tol}")
return within_tol
def main() -> int:
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"--frames",
required=True,
help=".npy / .npz file with the exact frames upstream was run on (T,H,W,C uint8).",
)
parser.add_argument("--task", required=True, help="Task instruction string.")
parser.add_argument(
"--upstream-progress",
required=True,
help="Reference progress .npy saved by upstream example_inference_local.py.",
)
parser.add_argument(
"--upstream-success",
default=None,
help="Optional reference success_probs .npy. If omitted, success comparison is skipped.",
)
parser.add_argument(
"--lerobot-model",
default="lilkm/robometer-4b",
help="LeRobot-format Robometer Hub repo id or local path.",
)
parser.add_argument(
"--device",
default="cuda" if torch.cuda.is_available() else "cpu",
help="Device for the LeRobot model (default: cuda if available).",
)
parser.add_argument(
"--atol",
type=float,
default=1e-3,
help="Absolute tolerance for allclose (default: 1e-3; bf16 round-trip headroom).",
)
parser.add_argument(
"--rtol",
type=float,
default=1e-2,
help="Relative tolerance for allclose (default: 1e-2).",
)
parser.add_argument(
"--out-prefix",
default="lerobot_robometer_outputs",
help="Save the LeRobot outputs as <prefix>_progress.npy / <prefix>_success.npy.",
)
args = parser.parse_args()
# 0. Sanity: confirm the LeRobot config is a RobometerConfig.
cfg = RewardModelConfig.from_pretrained(args.lerobot_model)
if not isinstance(cfg, RobometerConfig):
print(f"ERROR: {args.lerobot_model!r} does not resolve to a RobometerConfig.", file=sys.stderr)
return 2
# 1. Load frames + task + upstream reference outputs.
frames = _load_frames(args.frames)
upstream_progress = np.load(args.upstream_progress).astype(np.float32)
upstream_success = (
np.load(args.upstream_success).astype(np.float32) if args.upstream_success is not None else None
)
print(f"Loaded {frames.shape[0]} frames at {frames.shape[1:]}, task={args.task!r}")
print(f"LeRobot model: {args.lerobot_model} device: {args.device}")
# 2. Run LeRobot pipeline.
progress, success = _run_lerobot(frames, args.task, args.lerobot_model, args.device)
np.save(f"{args.out_prefix}_progress.npy", progress)
if success.size > 0:
np.save(f"{args.out_prefix}_success.npy", success)
print(f"Saved LeRobot outputs to {args.out_prefix}_progress.npy / _success.npy")
# 3. Compare to upstream references.
progress_ok = _compare("progress", progress, upstream_progress, args.atol, args.rtol)
if upstream_success is not None and success.size > 0:
success_ok = _compare("success_probs", success, upstream_success, args.atol, args.rtol)
else:
success_ok = True
print("\n(skipping success comparison — upstream success file not provided)")
print()
if progress_ok and success_ok:
print("Parity check passed.")
return 0
print("Parity check FAILED.")
return 1
if __name__ == "__main__":
sys.exit(main())