mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-21 19:49:49 +00:00
Add Robometer reward model
This commit is contained in:
@@ -0,0 +1,164 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
|
||||
"""Pinpoint exactly which rows of ``embed_tokens`` / ``lm_head`` differ.
|
||||
|
||||
Useful follow-up to ``scripts/verify_robometer_export.py`` when the verifier
|
||||
reports a small tail of differing keys but you want to know whether the
|
||||
diff is:
|
||||
|
||||
1. Concentrated in the 5 special-token rows added by ``resize_token_embeddings``
|
||||
(expected non-determinism: mean-resize sampling differs between runs).
|
||||
2. Spread across the full vocabulary (would point to a real loading bug).
|
||||
|
||||
Also confirms whether ``apply_upstream_checkpoint`` actually overwrites the
|
||||
embed/lm-head tensors when loading the upstream state dict (vs. silently
|
||||
skipping them due to a key mismatch).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
import torch
|
||||
from safetensors.torch import load_file
|
||||
|
||||
from lerobot.configs.rewards import RewardModelConfig
|
||||
from lerobot.rewards.robometer import RobometerConfig, RobometerRewardModel
|
||||
from lerobot.rewards.robometer._upstream_loader import (
|
||||
_download_robometer_snapshot,
|
||||
_remap_state_dict_keys,
|
||||
_resolve_checkpoint_safetensors_files,
|
||||
apply_upstream_checkpoint,
|
||||
)
|
||||
|
||||
EMBED_KEY = "model.model.language_model.embed_tokens.weight"
|
||||
LMHEAD_KEY = "model.lm_head.weight"
|
||||
|
||||
|
||||
def _load_upstream(path: str) -> RobometerRewardModel:
|
||||
cfg = RobometerConfig(pretrained_path=path, device="cpu")
|
||||
model = RobometerRewardModel(cfg)
|
||||
apply_upstream_checkpoint(model, path)
|
||||
model.eval()
|
||||
return model
|
||||
|
||||
|
||||
def _load_lerobot(path: str) -> RobometerRewardModel:
|
||||
cfg = RewardModelConfig.from_pretrained(path)
|
||||
if not isinstance(cfg, RobometerConfig):
|
||||
raise TypeError(f"Expected RobometerConfig, got {type(cfg)}")
|
||||
cfg.pretrained_path = path
|
||||
cfg.device = "cpu"
|
||||
return RobometerRewardModel.from_pretrained(path, config=cfg)
|
||||
|
||||
|
||||
def _inspect_upstream_state_dict(upstream_path: str, model: RobometerRewardModel) -> None:
|
||||
"""Dump the upstream state-dict view of the embed/lm-head tensors.
|
||||
|
||||
Loads the raw upstream safetensors (pre-remap), runs the remapper, and
|
||||
reports whether the embed/lm-head keys survive into the merged dict that
|
||||
eventually hits ``model.load_state_dict``.
|
||||
"""
|
||||
snapshot_dir = _download_robometer_snapshot(upstream_path)
|
||||
files = _resolve_checkpoint_safetensors_files(snapshot_dir)
|
||||
merged: dict[str, torch.Tensor] = {}
|
||||
for path in files:
|
||||
merged.update(load_file(str(path)))
|
||||
remapped = _remap_state_dict_keys(merged, model)
|
||||
|
||||
print(f"\n=== Upstream state-dict inspection (snapshot at {snapshot_dir}) ===")
|
||||
print(f"raw keys (before remap) : {len(merged)}")
|
||||
print(f"keys after remap : {len(remapped)}")
|
||||
print(f"model expects (state_dict): {len(model.state_dict())}")
|
||||
|
||||
expected = set(model.state_dict())
|
||||
present_after_remap = set(remapped) & expected
|
||||
print(f"keys present after remap : {len(present_after_remap)}")
|
||||
|
||||
missing_keys = expected - set(remapped)
|
||||
print(f"keys missing from remap : {len(missing_keys)}")
|
||||
if missing_keys:
|
||||
sample = list(missing_keys)[:10]
|
||||
print(f" sample missing keys : {sample}")
|
||||
|
||||
unexpected_keys = set(remapped) - expected
|
||||
print(f"keys unexpected by model : {len(unexpected_keys)}")
|
||||
if unexpected_keys:
|
||||
sample = list(unexpected_keys)[:10]
|
||||
print(f" sample unexpected keys : {sample}")
|
||||
|
||||
for key in (EMBED_KEY, LMHEAD_KEY):
|
||||
present = key in remapped
|
||||
shape = tuple(remapped[key].shape) if present else None
|
||||
print(f" {key:60s} present={present}, shape={shape}")
|
||||
|
||||
|
||||
def _diff_embed(name: str, a: torch.Tensor, b: torch.Tensor, special_token_count: int) -> None:
|
||||
a = a.float()
|
||||
b = b.float()
|
||||
if a.shape != b.shape:
|
||||
print(f"❌ {name} shape mismatch: {tuple(a.shape)} vs {tuple(b.shape)}")
|
||||
return
|
||||
|
||||
abs_diff = (a - b).abs()
|
||||
per_row_max = abs_diff.max(dim=1).values
|
||||
nz_rows = (per_row_max > 0).nonzero(as_tuple=True)[0].tolist()
|
||||
print(f"\n=== {name} (shape {tuple(a.shape)}) ===")
|
||||
print(f"global max|Δ| = {abs_diff.max().item():.3e}")
|
||||
print(f"rows with any diff = {len(nz_rows)}")
|
||||
if nz_rows:
|
||||
first = nz_rows[:10]
|
||||
last = nz_rows[-10:]
|
||||
print(f" first nonzero rows = {first}")
|
||||
print(f" last nonzero rows = {last}")
|
||||
vocab_size = a.shape[0]
|
||||
base_vocab = vocab_size - special_token_count
|
||||
special_rows = list(range(base_vocab, vocab_size))
|
||||
in_special = [r for r in nz_rows if r in special_rows]
|
||||
out_special = [r for r in nz_rows if r not in special_rows]
|
||||
print(
|
||||
f" diffs in special-token rows ({base_vocab}..{vocab_size - 1}): {len(in_special)}/{special_token_count}"
|
||||
)
|
||||
print(f" diffs in base-vocab rows (0..{base_vocab - 1}) : {len(out_special)}")
|
||||
for r in special_rows:
|
||||
print(
|
||||
f" row {r}: max|Δ|={per_row_max[r].item():.3e}, "
|
||||
f"upstream_norm={a[r].norm().item():.3e}, lerobot_norm={b[r].norm().item():.3e}"
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
|
||||
)
|
||||
parser.add_argument("--upstream", required=True)
|
||||
parser.add_argument("--lerobot", required=True)
|
||||
parser.add_argument(
|
||||
"--special-token-count",
|
||||
type=int,
|
||||
default=5,
|
||||
help="Number of special tokens Robometer adds. Defaults to len(ROBOMETER_SPECIAL_TOKENS)=5.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"Loading upstream: {args.upstream}")
|
||||
upstream = _load_upstream(args.upstream)
|
||||
print(f"Loading LeRobot-format: {args.lerobot}")
|
||||
lerobot = _load_lerobot(args.lerobot)
|
||||
|
||||
_inspect_upstream_state_dict(args.upstream, upstream)
|
||||
|
||||
sd_u, sd_l = upstream.state_dict(), lerobot.state_dict()
|
||||
|
||||
for key in (EMBED_KEY, LMHEAD_KEY):
|
||||
if key not in sd_u or key not in sd_l:
|
||||
print(f"❌ key missing: {key} (upstream={key in sd_u}, lerobot={key in sd_l})")
|
||||
continue
|
||||
_diff_embed(key, sd_u[key], sd_l[key], args.special_token_count)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user