From f6a13b133891729d1c69149a2f4456747b699a9a Mon Sep 17 00:00:00 2001 From: Khalil Meftah Date: Sun, 17 May 2026 14:59:23 +0200 Subject: [PATCH] Add Robometer reward model --- .../create_robometer_progress_videos.py | 244 +++++++++ pyproject.toml | 2 + scripts/debug_robometer_embed_diff.py | 164 ++++++ scripts/extract_libero_episode_for_parity.py | 168 ++++++ scripts/parity_robometer.py | 232 +++++++++ scripts/verify_robometer_export.py | 149 ++++++ src/lerobot/configs/rewards.py | 13 +- src/lerobot/rewards/__init__.py | 2 + src/lerobot/rewards/factory.py | 18 +- src/lerobot/rewards/robometer/__init__.py | 19 + .../rewards/robometer/_upstream_loader.py | 229 ++++++++ .../robometer/configuration_robometer.py | 162 ++++++ .../rewards/robometer/modeling_robometer.py | 493 ++++++++++++++++++ .../rewards/robometer/processor_robometer.py | 348 +++++++++++++ .../scripts/lerobot_export_robometer.py | 151 ++++++ .../templates/lerobot_modelcard_template.md | 2 - .../lerobot_rewardmodel_modelcard_template.md | 2 + tests/rewards/test_robometer.py | 299 +++++++++++ uv.lock | 14 +- 19 files changed, 2701 insertions(+), 10 deletions(-) create mode 100644 examples/dataset/create_robometer_progress_videos.py create mode 100644 scripts/debug_robometer_embed_diff.py create mode 100644 scripts/extract_libero_episode_for_parity.py create mode 100644 scripts/parity_robometer.py create mode 100644 scripts/verify_robometer_export.py create mode 100644 src/lerobot/rewards/robometer/__init__.py create mode 100644 src/lerobot/rewards/robometer/_upstream_loader.py create mode 100644 src/lerobot/rewards/robometer/configuration_robometer.py create mode 100644 src/lerobot/rewards/robometer/modeling_robometer.py create mode 100644 src/lerobot/rewards/robometer/processor_robometer.py create mode 100644 src/lerobot/scripts/lerobot_export_robometer.py create mode 100644 tests/rewards/test_robometer.py diff --git a/examples/dataset/create_robometer_progress_videos.py b/examples/dataset/create_robometer_progress_videos.py new file mode 100644 index 000000000..ac90108b9 --- /dev/null +++ b/examples/dataset/create_robometer_progress_videos.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python + +# Copyright 2026 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Create videos with a Robometer progress overlay for one LeRobot dataset episode. + +This is a lightweight smoke-test utility for Robometer checkpoints. It downloads +one episode video, samples a small number of frames, runs Robometer on those +frames, and reuses the progress overlay renderer from +``examples/dataset/create_progress_videos.py``. + +Example: + + uv run python examples/dataset/create_robometer_progress_videos.py \\ + --repo-id lerobot/aloha_mobile_cabinet \\ + --episode 0 \\ + --reward-model-path lilkm/robometer-4b \\ + --device cuda +""" + +from __future__ import annotations + +import argparse +import logging +from pathlib import Path + +import cv2 +import numpy as np +import torch + +from examples.dataset.create_progress_videos import ( + composite_progress_video, + convert_mp4_to_gif, + download_episode_metadata, + download_video_file, + load_episode_meta, +) +from lerobot.rewards.robometer import RobometerConfig, RobometerRewardModel +from lerobot.rewards.robometer.modeling_robometer import decode_progress_outputs +from lerobot.rewards.robometer.processor_robometer import RobometerEncoderProcessorStep +from lerobot.utils.utils import init_logging + + +def _default_device() -> str: + return "cuda" if torch.cuda.is_available() else "cpu" + + +def sample_episode_frames( + video_path: Path, + *, + from_timestamp: float, + to_timestamp: float, + fps: float, + num_frames: int, +) -> tuple[np.ndarray, np.ndarray]: + """Sample RGB frames uniformly from an episode video segment. + + Returns: + ``(frames, frame_indices)`` where ``frames`` is ``(T,H,W,C)`` uint8 RGB + and ``frame_indices`` are local episode frame indices used for overlay. + """ + if num_frames <= 0: + raise ValueError(f"num_frames must be positive, got {num_frames}") + + duration_seconds = to_timestamp - from_timestamp + total_frames = max(int(round(duration_seconds * fps)), 1) + frame_indices = np.linspace(0, total_frames - 1, num=min(num_frames, total_frames), dtype=int) + + capture = cv2.VideoCapture(str(video_path)) + frames: list[np.ndarray] = [] + try: + for frame_idx in frame_indices: + timestamp = from_timestamp + frame_idx / fps + capture.set(cv2.CAP_PROP_POS_MSEC, timestamp * 1000) + ret, frame_bgr = capture.read() + if not ret: + logging.warning("Could not read frame %d at %.3fs", frame_idx, timestamp) + continue + frames.append(cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)) + finally: + capture.release() + + if not frames: + raise RuntimeError(f"No frames could be sampled from {video_path}") + + return np.stack(frames), frame_indices[: len(frames)] + + +def predict_robometer_progress( + frames: np.ndarray, + *, + task: str, + reward_model_path: str, + device: str, +) -> list[float]: + """Run Robometer and return per-sampled-frame progress predictions.""" + config = RobometerConfig(pretrained_path=reward_model_path, device=device, max_frames=None) + model = RobometerRewardModel.from_pretrained(reward_model_path, config=config) + + encoder = RobometerEncoderProcessorStep( + base_model_id=model.config.base_model_id, + use_multi_image=model.config.use_multi_image, + use_per_frame_progress_token=model.config.use_per_frame_progress_token, + max_frames=None, + ) + batch = encoder.encode_samples([(frames, task)]) + + model_device = next(model.model.parameters()).device + inputs = {key: value.to(model_device) if hasattr(value, "to") else value for key, value in batch.items()} + + model.eval() + with torch.no_grad(): + progress_logits, success_logits = model._compute_rbm_logits(inputs) + + decoded = decode_progress_outputs( + progress_logits, + success_logits, + is_discrete_mode=model.config.use_discrete_progress, + ) + return decoded["progress_pred"][0] + + +def process_dataset( + repo_id: str, + episode: int, + reward_model_path: str, + device: str, + camera_key: str | None, + output_dir: Path, + num_frames: int, + task: str | None = None, + create_gif: bool = False, +) -> Path: + safe_name = repo_id.replace("/", "_") + logging.info("Processing %s episode %d with Robometer %s", repo_id, episode, reward_model_path) + + local_path = download_episode_metadata(repo_id, episode) + episode_meta = load_episode_meta(local_path, episode, camera_key) + video_path = download_video_file(repo_id, local_path, episode_meta["video_rel"]) + + task_name = task or episode_meta.get("task_name", "") + if not task_name: + raise ValueError("No task found in dataset metadata. Pass --task explicitly.") + + frames, frame_indices = sample_episode_frames( + video_path, + from_timestamp=episode_meta["from_ts"], + to_timestamp=episode_meta["to_ts"], + fps=episode_meta["fps"], + num_frames=num_frames, + ) + logging.info("Sampled %d frames for Robometer inference", len(frames)) + + progress = predict_robometer_progress( + frames, + task=task_name, + reward_model_path=reward_model_path, + device=device, + ) + progress_data = np.stack([frame_indices, np.asarray(progress, dtype=np.float32)], axis=1) + logging.info("Progress predictions: %s", [round(float(value), 3) for value in progress]) + + output_path = output_dir / f"{safe_name}_ep{episode}_robometer_progress.mp4" + final_path = composite_progress_video( + video_path=video_path, + from_timestamp=episode_meta["from_ts"], + to_timestamp=episode_meta["to_ts"], + progress_data=progress_data, + output_path=output_path, + fps=episode_meta["fps"], + task_name=task_name, + ) + + if create_gif: + final_path = convert_mp4_to_gif(final_path) + return final_path + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Create MP4/GIF videos with Robometer progress overlay for dataset episodes." + ) + parser.add_argument("--repo-id", required=True, help="Hugging Face LeRobot dataset repo id.") + parser.add_argument("--episode", type=int, required=True, help="Episode index to visualize.") + parser.add_argument( + "--reward-model-path", + default="lilkm/robometer-4b", + help="Robometer checkpoint path or Hub repo id (e.g. lilkm/robometer-4b).", + ) + parser.add_argument("--device", default=_default_device(), help="Torch device for Robometer inference.") + parser.add_argument( + "--camera-key", + default=None, + help="Camera observation key (e.g. observation.images.top). Auto-selects first camera if omitted.", + ) + parser.add_argument( + "--task", default=None, help="Task description override if dataset metadata lacks one." + ) + parser.add_argument( + "--num-frames", + type=int, + default=8, + help="Number of episode frames to sample for Robometer inference.", + ) + parser.add_argument( + "--output-dir", + type=Path, + default=Path("progress_videos"), + help="Directory to write output files.", + ) + parser.add_argument("--gif", action="store_true", help="Also generate a GIF from the MP4 output.") + args = parser.parse_args() + + init_logging() + args.output_dir.mkdir(parents=True, exist_ok=True) + + result = process_dataset( + repo_id=args.repo_id, + episode=args.episode, + reward_model_path=args.reward_model_path, + device=args.device, + camera_key=args.camera_key, + output_dir=args.output_dir, + num_frames=args.num_frames, + task=args.task, + create_gif=args.gif, + ) + logging.info("Output: %s", result) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index f983134ab..0f5fa2362 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -204,6 +204,7 @@ groot = [ "flash-attn>=2.5.9,<3.0.0 ; sys_platform != 'darwin'" ] sarm = ["lerobot[transformers-dep]", "pydantic>=2.0.0,<3.0.0", "faker>=33.0.0,<35.0.0", "lerobot[matplotlib-dep]", "lerobot[qwen-vl-utils-dep]"] +robometer = ["lerobot[transformers-dep]", "lerobot[qwen-vl-utils-dep]", "lerobot[peft-dep]"] xvla = ["lerobot[transformers-dep]"] eo1 = ["lerobot[transformers-dep]", "lerobot[qwen-vl-utils-dep]"] hilserl = ["lerobot[transformers-dep]", "lerobot[dataset]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpcio-dep]", "lerobot[placo-dep]"] @@ -302,6 +303,7 @@ lerobot-imgtransform-viz="lerobot.scripts.lerobot_imgtransform_viz:main" lerobot-edit-dataset="lerobot.scripts.lerobot_edit_dataset:main" lerobot-setup-can="lerobot.scripts.lerobot_setup_can:main" lerobot-rollout="lerobot.scripts.lerobot_rollout:main" +lerobot-export-robometer="lerobot.scripts.lerobot_export_robometer:main" # ---------------- Tool Configurations ---------------- diff --git a/scripts/debug_robometer_embed_diff.py b/scripts/debug_robometer_embed_diff.py new file mode 100644 index 000000000..e55c9bc95 --- /dev/null +++ b/scripts/debug_robometer_embed_diff.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python +# Copyright 2026 The HuggingFace Inc. team. All rights reserved. +"""Pinpoint exactly which rows of ``embed_tokens`` / ``lm_head`` differ. + +Useful follow-up to ``scripts/verify_robometer_export.py`` when the verifier +reports a small tail of differing keys but you want to know whether the +diff is: + +1. Concentrated in the 5 special-token rows added by ``resize_token_embeddings`` + (expected non-determinism: mean-resize sampling differs between runs). +2. Spread across the full vocabulary (would point to a real loading bug). + +Also confirms whether ``apply_upstream_checkpoint`` actually overwrites the +embed/lm-head tensors when loading the upstream state dict (vs. silently +skipping them due to a key mismatch). +""" + +from __future__ import annotations + +import argparse +import sys + +import torch +from safetensors.torch import load_file + +from lerobot.configs.rewards import RewardModelConfig +from lerobot.rewards.robometer import RobometerConfig, RobometerRewardModel +from lerobot.rewards.robometer._upstream_loader import ( + _download_robometer_snapshot, + _remap_state_dict_keys, + _resolve_checkpoint_safetensors_files, + apply_upstream_checkpoint, +) + +EMBED_KEY = "model.model.language_model.embed_tokens.weight" +LMHEAD_KEY = "model.lm_head.weight" + + +def _load_upstream(path: str) -> RobometerRewardModel: + cfg = RobometerConfig(pretrained_path=path, device="cpu") + model = RobometerRewardModel(cfg) + apply_upstream_checkpoint(model, path) + model.eval() + return model + + +def _load_lerobot(path: str) -> RobometerRewardModel: + cfg = RewardModelConfig.from_pretrained(path) + if not isinstance(cfg, RobometerConfig): + raise TypeError(f"Expected RobometerConfig, got {type(cfg)}") + cfg.pretrained_path = path + cfg.device = "cpu" + return RobometerRewardModel.from_pretrained(path, config=cfg) + + +def _inspect_upstream_state_dict(upstream_path: str, model: RobometerRewardModel) -> None: + """Dump the upstream state-dict view of the embed/lm-head tensors. + + Loads the raw upstream safetensors (pre-remap), runs the remapper, and + reports whether the embed/lm-head keys survive into the merged dict that + eventually hits ``model.load_state_dict``. + """ + snapshot_dir = _download_robometer_snapshot(upstream_path) + files = _resolve_checkpoint_safetensors_files(snapshot_dir) + merged: dict[str, torch.Tensor] = {} + for path in files: + merged.update(load_file(str(path))) + remapped = _remap_state_dict_keys(merged, model) + + print(f"\n=== Upstream state-dict inspection (snapshot at {snapshot_dir}) ===") + print(f"raw keys (before remap) : {len(merged)}") + print(f"keys after remap : {len(remapped)}") + print(f"model expects (state_dict): {len(model.state_dict())}") + + expected = set(model.state_dict()) + present_after_remap = set(remapped) & expected + print(f"keys present after remap : {len(present_after_remap)}") + + missing_keys = expected - set(remapped) + print(f"keys missing from remap : {len(missing_keys)}") + if missing_keys: + sample = list(missing_keys)[:10] + print(f" sample missing keys : {sample}") + + unexpected_keys = set(remapped) - expected + print(f"keys unexpected by model : {len(unexpected_keys)}") + if unexpected_keys: + sample = list(unexpected_keys)[:10] + print(f" sample unexpected keys : {sample}") + + for key in (EMBED_KEY, LMHEAD_KEY): + present = key in remapped + shape = tuple(remapped[key].shape) if present else None + print(f" {key:60s} present={present}, shape={shape}") + + +def _diff_embed(name: str, a: torch.Tensor, b: torch.Tensor, special_token_count: int) -> None: + a = a.float() + b = b.float() + if a.shape != b.shape: + print(f"❌ {name} shape mismatch: {tuple(a.shape)} vs {tuple(b.shape)}") + return + + abs_diff = (a - b).abs() + per_row_max = abs_diff.max(dim=1).values + nz_rows = (per_row_max > 0).nonzero(as_tuple=True)[0].tolist() + print(f"\n=== {name} (shape {tuple(a.shape)}) ===") + print(f"global max|Δ| = {abs_diff.max().item():.3e}") + print(f"rows with any diff = {len(nz_rows)}") + if nz_rows: + first = nz_rows[:10] + last = nz_rows[-10:] + print(f" first nonzero rows = {first}") + print(f" last nonzero rows = {last}") + vocab_size = a.shape[0] + base_vocab = vocab_size - special_token_count + special_rows = list(range(base_vocab, vocab_size)) + in_special = [r for r in nz_rows if r in special_rows] + out_special = [r for r in nz_rows if r not in special_rows] + print( + f" diffs in special-token rows ({base_vocab}..{vocab_size - 1}): {len(in_special)}/{special_token_count}" + ) + print(f" diffs in base-vocab rows (0..{base_vocab - 1}) : {len(out_special)}") + for r in special_rows: + print( + f" row {r}: max|Δ|={per_row_max[r].item():.3e}, " + f"upstream_norm={a[r].norm().item():.3e}, lerobot_norm={b[r].norm().item():.3e}" + ) + + +def main() -> int: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--upstream", required=True) + parser.add_argument("--lerobot", required=True) + parser.add_argument( + "--special-token-count", + type=int, + default=5, + help="Number of special tokens Robometer adds. Defaults to len(ROBOMETER_SPECIAL_TOKENS)=5.", + ) + args = parser.parse_args() + + print(f"Loading upstream: {args.upstream}") + upstream = _load_upstream(args.upstream) + print(f"Loading LeRobot-format: {args.lerobot}") + lerobot = _load_lerobot(args.lerobot) + + _inspect_upstream_state_dict(args.upstream, upstream) + + sd_u, sd_l = upstream.state_dict(), lerobot.state_dict() + + for key in (EMBED_KEY, LMHEAD_KEY): + if key not in sd_u or key not in sd_l: + print(f"❌ key missing: {key} (upstream={key in sd_u}, lerobot={key in sd_l})") + continue + _diff_embed(key, sd_u[key], sd_l[key], args.special_token_count) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/extract_libero_episode_for_parity.py b/scripts/extract_libero_episode_for_parity.py new file mode 100644 index 000000000..be4b6069b --- /dev/null +++ b/scripts/extract_libero_episode_for_parity.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python +# Copyright 2026 The HuggingFace Inc. team. All rights reserved. +"""Extract one LIBERO episode for Robometer parity testing. + +Loads a LeRobot LIBERO (or any video-bearing LeRobot) dataset, picks one +episode, samples ``--num-frames`` frames uniformly across its duration +(matching upstream Robometer's default of 8 frames), and saves them to +``.npz`` plus a sidecar ``.txt`` task file. + +The ``.npz`` layout (``frames`` key, ``(T, H, W, C) uint8``) is what upstream +``example_inference_local.py`` consumes, so the same file feeds both pipelines +and frame sampling cannot drift. + +Workflow: + +1. Run this script (LeRobot env) to produce ``frames.npz`` + ``task.txt``. +2. Pass them to upstream ``scripts/example_inference_local.py`` + (upstream env) to produce reference progress / success outputs. +3. Pass the same ``frames.npz`` to ``scripts/parity_robometer.py`` + (LeRobot env) to compare both sides. + +Example: + + uv run python scripts/extract_libero_episode_for_parity.py \\ + --repo-id lerobot/libero_10_image \\ + --episode 0 \\ + --num-frames 8 \\ + --out-dir /tmp/libero_ep0 +""" + +from __future__ import annotations + +import argparse +import sys +from pathlib import Path + +import numpy as np +import torch + +from lerobot.configs.types import FeatureType +from lerobot.datasets.lerobot_dataset import LeRobotDataset + + +def _pick_visual_feature(features: dict, requested: str | None) -> str: + """Return a visual feature key, preferring ``requested`` when given.""" + visual_keys = [ + key + for key, ft in features.items() + if getattr(ft, "type", None) == FeatureType.VISUAL or ft.get("dtype", "") == "video" + ] + if not visual_keys: + raise ValueError(f"Dataset has no visual feature; available: {list(features)}") + if requested is not None: + if requested not in visual_keys: + raise ValueError(f"Camera key {requested!r} not in dataset visual features {visual_keys}") + return requested + return visual_keys[0] + + +def _frame_uint8_hwc(tensor: torch.Tensor) -> np.ndarray: + """Convert a LeRobotDataset video frame to ``uint8`` ``(H, W, C)`` RGB.""" + arr = tensor.detach().cpu().numpy() + if arr.ndim == 3 and arr.shape[0] in (1, 3): + arr = arr.transpose(1, 2, 0) + if arr.dtype != np.uint8: + arr = np.clip(arr * 255.0 if arr.max() <= 1.0 + 1e-3 else arr, 0, 255).astype(np.uint8) + if arr.shape[-1] == 1: + arr = np.repeat(arr, 3, axis=-1) + return arr + + +def main() -> int: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--repo-id", + default="lerobot/libero_10_image", + help="LeRobot LIBERO (or other) dataset repo id (default: lerobot/libero_10_image).", + ) + parser.add_argument("--episode", type=int, default=0, help="Episode index.") + parser.add_argument( + "--camera-key", + default=None, + help="Visual feature key (e.g. observation.images.image). Auto-selects first if omitted.", + ) + parser.add_argument( + "--num-frames", + type=int, + default=8, + help="Number of frames to sample uniformly (default: 8 — Robometer's training-time default).", + ) + parser.add_argument( + "--out-dir", + type=Path, + default=Path("outputs/robometer_parity/libero"), + help="Directory to write frames.npz / task.txt / frame_indices.npy.", + ) + args = parser.parse_args() + + print(f"Loading {args.repo_id} (episode {args.episode})...") + dataset = LeRobotDataset(args.repo_id, episodes=[args.episode]) + + camera_key = _pick_visual_feature(dataset.features, args.camera_key) + print(f"Using camera key: {camera_key}") + + ep_from = int(dataset.episode_data_index["from"][0].item()) + ep_to = int(dataset.episode_data_index["to"][0].item()) + total_frames = ep_to - ep_from + if total_frames <= 0: + print(f"ERROR: episode {args.episode} has no frames.", file=sys.stderr) + return 1 + print(f"Episode has {total_frames} frames; sampling {args.num_frames} uniformly.") + + indices = np.linspace(0, total_frames - 1, num=min(args.num_frames, total_frames), dtype=int) + frames: list[np.ndarray] = [] + task: str = "" + for offset in indices: + sample = dataset[ep_from + int(offset)] + frame_tensor = sample[camera_key] + frames.append(_frame_uint8_hwc(frame_tensor)) + if not task: + task = sample.get("task", "") or "" + + if not task: + print("ERROR: episode has no task description in metadata.", file=sys.stderr) + return 1 + + frames_array = np.stack(frames) + + args.out_dir.mkdir(parents=True, exist_ok=True) + frames_path = args.out_dir / "frames.npz" + task_path = args.out_dir / "task.txt" + indices_path = args.out_dir / "frame_indices.npy" + + np.savez(frames_path, frames=frames_array) + task_path.write_text(task + "\n", encoding="utf-8") + np.save(indices_path, indices) + + print() + print(f"Wrote {frames_path} (shape={frames_array.shape}, dtype={frames_array.dtype})") + print(f"Wrote {task_path} (task={task!r})") + print(f"Wrote {indices_path} (frame_indices={indices.tolist()})") + print() + print("Next steps:") + print(" # in upstream env (where `robometer` is importable):") + print( + f" python third_party/robometer/scripts/example_inference_local.py \\\n" + f" --model-path robometer/Robometer-4B \\\n" + f" --video {frames_path} \\\n" + f' --task "{task}" \\\n' + f" --out {args.out_dir / 'upstream.npy'}" + ) + print() + print(" # back in LeRobot env:") + print( + f" uv run python scripts/parity_robometer.py \\\n" + f" --frames {frames_path} \\\n" + f' --task "{task}" \\\n' + f" --upstream-progress {args.out_dir / 'upstream.npy'} \\\n" + f" --upstream-success {args.out_dir / 'upstream_success_probs.npy'}" + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/parity_robometer.py b/scripts/parity_robometer.py new file mode 100644 index 000000000..62cfaed34 --- /dev/null +++ b/scripts/parity_robometer.py @@ -0,0 +1,232 @@ +#!/usr/bin/env python +# Copyright 2026 The HuggingFace Inc. team. All rights reserved. +"""Functional parity check: LeRobot Robometer vs. upstream Robometer. + +Runs the in-tree :class:`RobometerRewardModel` on the same frames + task that +upstream Robometer was run on, and compares per-frame progress / success +predictions against reference outputs saved by upstream's +``scripts/example_inference_local.py``. + +Workflow: + +1. In the upstream Robometer environment (where ``robometer`` is importable), + run:: + + python third_party/robometer/scripts/example_inference_local.py \\ + --model-path robometer/Robometer-4B \\ + --video /path/to/episode.mp4 \\ + --task "Open the drawer" \\ + --fps 1.0 \\ + --out /tmp/robometer_upstream.npy + + This produces: + - ``/tmp/robometer_upstream.npy`` (progress predictions) + - ``/tmp/robometer_upstream_success_probs.npy`` (success probabilities) + +2. Extract the exact same frames the upstream script used, save as ``.npz``:: + + # quick helper: extract frames at the same fps and save as .npz + python -c " + from third_party.robometer.scripts.example_inference_local import load_frames_input + import numpy as np + frames = load_frames_input('/path/to/episode.mp4', fps=1.0, max_frames=512) + np.savez('/tmp/robometer_frames.npz', frames=frames) + " + +3. In this LeRobot env, run this script:: + + uv run python scripts/parity_robometer.py \\ + --frames /tmp/robometer_frames.npz \\ + --task "Open the drawer" \\ + --upstream-progress /tmp/robometer_upstream.npy \\ + --upstream-success /tmp/robometer_upstream_success_probs.npy \\ + --lerobot-model lilkm/robometer-4b +""" + +from __future__ import annotations + +import argparse +import sys + +import numpy as np +import torch + +from lerobot.configs.rewards import RewardModelConfig +from lerobot.rewards.robometer import RobometerConfig, RobometerRewardModel +from lerobot.rewards.robometer.modeling_robometer import decode_progress_outputs +from lerobot.rewards.robometer.processor_robometer import RobometerEncoderProcessorStep + + +def _load_frames(path: str) -> np.ndarray: + """Load frames from .npy/.npz. Expects (T, H, W, C) uint8.""" + if path.endswith(".npy"): + frames = np.load(path) + elif path.endswith(".npz"): + with np.load(path, allow_pickle=False) as npz: + frames = npz["frames"].copy() if "frames" in npz else next(iter(npz.values())).copy() + else: + raise ValueError(f"Frames must be .npy or .npz (got {path!r}).") + + if frames.dtype != np.uint8: + frames = np.clip(frames, 0, 255).astype(np.uint8) + if frames.ndim != 4: + raise ValueError(f"Frames must be 4D (T,H,W,C); got shape {frames.shape}.") + if frames.shape[-1] not in (1, 3): + # Probably (T,C,H,W) — transpose + if frames.shape[1] in (1, 3): + frames = frames.transpose(0, 2, 3, 1) + else: + raise ValueError(f"Cannot interpret frame channel layout: {frames.shape}.") + return frames + + +def _run_lerobot( + frames: np.ndarray, + task: str, + model_path: str, + device: str, +) -> tuple[np.ndarray, np.ndarray]: + """Run LeRobot's Robometer on the given frames; return (progress, success).""" + cfg = RobometerConfig(pretrained_path=model_path, device=device, max_frames=None) + model = RobometerRewardModel.from_pretrained(model_path, config=cfg) + + encoder = RobometerEncoderProcessorStep( + base_model_id=model.config.base_model_id, + use_multi_image=model.config.use_multi_image, + use_per_frame_progress_token=model.config.use_per_frame_progress_token, + max_frames=None, + ) + batch = encoder.encode_samples([(frames, task)]) + + model_device = next(model.model.parameters()).device + inputs = {key: value.to(model_device) if hasattr(value, "to") else value for key, value in batch.items()} + + model.eval() + with torch.no_grad(): + progress_logits, success_logits = model._compute_rbm_logits(inputs) + + decoded = decode_progress_outputs( + progress_logits, + success_logits, + is_discrete_mode=model.config.use_discrete_progress, + ) + progress = np.asarray(decoded["progress_pred"][0], dtype=np.float32) + success = ( + np.asarray(decoded["success_probs"][0], dtype=np.float32) + if decoded["success_probs"] + else np.array([], dtype=np.float32) + ) + return progress, success + + +def _compare(name: str, lerobot: np.ndarray, upstream: np.ndarray, atol: float, rtol: float) -> bool: + print(f"\n=== {name} ===") + if lerobot.shape != upstream.shape: + print(f"shape mismatch: lerobot={lerobot.shape} upstream={upstream.shape}") + return False + + abs_diff = np.abs(lerobot - upstream) + rel_diff = abs_diff / (np.abs(upstream) + 1e-12) + print(f"shape : {lerobot.shape}") + print(f"max |Δ| : {abs_diff.max():.3e}") + print(f"mean |Δ| : {abs_diff.mean():.3e}") + print(f"max rel |Δ| : {rel_diff.max():.3e}") + print(f"lerobot[:5] : {lerobot[:5]}") + print(f"upstream[:5] : {upstream[:5]}") + + within_tol = bool(np.allclose(lerobot, upstream, atol=atol, rtol=rtol)) + print(f"allclose(atol={atol}, rtol={rtol}) -> {within_tol}") + return within_tol + + +def main() -> int: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--frames", + required=True, + help=".npy / .npz file with the exact frames upstream was run on (T,H,W,C uint8).", + ) + parser.add_argument("--task", required=True, help="Task instruction string.") + parser.add_argument( + "--upstream-progress", + required=True, + help="Reference progress .npy saved by upstream example_inference_local.py.", + ) + parser.add_argument( + "--upstream-success", + default=None, + help="Optional reference success_probs .npy. If omitted, success comparison is skipped.", + ) + parser.add_argument( + "--lerobot-model", + default="lilkm/robometer-4b", + help="LeRobot-format Robometer Hub repo id or local path.", + ) + parser.add_argument( + "--device", + default="cuda" if torch.cuda.is_available() else "cpu", + help="Device for the LeRobot model (default: cuda if available).", + ) + parser.add_argument( + "--atol", + type=float, + default=1e-3, + help="Absolute tolerance for allclose (default: 1e-3; bf16 round-trip headroom).", + ) + parser.add_argument( + "--rtol", + type=float, + default=1e-2, + help="Relative tolerance for allclose (default: 1e-2).", + ) + parser.add_argument( + "--out-prefix", + default="lerobot_robometer_outputs", + help="Save the LeRobot outputs as _progress.npy / _success.npy.", + ) + args = parser.parse_args() + + # 0. Sanity: confirm the LeRobot config is a RobometerConfig. + cfg = RewardModelConfig.from_pretrained(args.lerobot_model) + if not isinstance(cfg, RobometerConfig): + print(f"ERROR: {args.lerobot_model!r} does not resolve to a RobometerConfig.", file=sys.stderr) + return 2 + + # 1. Load frames + task + upstream reference outputs. + frames = _load_frames(args.frames) + upstream_progress = np.load(args.upstream_progress).astype(np.float32) + upstream_success = ( + np.load(args.upstream_success).astype(np.float32) if args.upstream_success is not None else None + ) + + print(f"Loaded {frames.shape[0]} frames at {frames.shape[1:]}, task={args.task!r}") + print(f"LeRobot model: {args.lerobot_model} device: {args.device}") + + # 2. Run LeRobot pipeline. + progress, success = _run_lerobot(frames, args.task, args.lerobot_model, args.device) + np.save(f"{args.out_prefix}_progress.npy", progress) + if success.size > 0: + np.save(f"{args.out_prefix}_success.npy", success) + print(f"Saved LeRobot outputs to {args.out_prefix}_progress.npy / _success.npy") + + # 3. Compare to upstream references. + progress_ok = _compare("progress", progress, upstream_progress, args.atol, args.rtol) + if upstream_success is not None and success.size > 0: + success_ok = _compare("success_probs", success, upstream_success, args.atol, args.rtol) + else: + success_ok = True + print("\n(skipping success comparison — upstream success file not provided)") + + print() + if progress_ok and success_ok: + print("Parity check passed.") + return 0 + print("Parity check FAILED.") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/verify_robometer_export.py b/scripts/verify_robometer_export.py new file mode 100644 index 000000000..682c5f4e3 --- /dev/null +++ b/scripts/verify_robometer_export.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python +# Copyright 2026 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 + +"""Verify that a LeRobot-format Robometer is byte-equivalent to its upstream source. + +Run this once after publishing a LeRobot-format Robometer to the Hub, before +flipping the default `RobometerConfig.pretrained_path` to it. It loads both +the upstream snapshot and the re-exported copy, compares state dicts, and +prints a clear pass/fail summary. + +Example: + + python scripts/verify_robometer_export.py \\ + --upstream robometer/Robometer-4B \\ + --lerobot lerobot/robometer-4b + + python scripts/verify_robometer_export.py \\ + --upstream robometer/Robometer-4B \\ + --lerobot ./robometer-4b-lerobot # local folder also works +""" + +from __future__ import annotations + +import argparse +import sys + +from lerobot.configs.rewards import RewardModelConfig +from lerobot.rewards.robometer import RobometerConfig, RobometerRewardModel +from lerobot.rewards.robometer._upstream_loader import apply_upstream_checkpoint + + +def _load_upstream(path: str) -> RobometerRewardModel: + # Fresh ``RobometerConfig`` (``vlm_config=None``) triggers + # ``RobometerRewardModel.__init__``'s upstream-matching path: download + # base Qwen, resize for ROBOMETER_SPECIAL_TOKENS. The subsequent + # ``apply_upstream_checkpoint`` call resizes again if the checkpoint's + # vocab differs (e.g. upstream was trained against an older Qwen). + cfg = RobometerConfig(pretrained_path=path, device="cpu") + model = RobometerRewardModel(cfg) + apply_upstream_checkpoint(model, path) + model.eval() + return model + + +def _load_lerobot(path: str) -> RobometerRewardModel: + cfg = RewardModelConfig.from_pretrained(path) + if not isinstance(cfg, RobometerConfig): + raise TypeError(f"Expected RobometerConfig in LeRobot export, got {type(cfg)}") + cfg.pretrained_path = path + cfg.device = "cpu" + return RobometerRewardModel.from_pretrained(path, config=cfg) + + +def compare_state_dicts(a: RobometerRewardModel, b: RobometerRewardModel) -> bool: + sd_a, sd_b = a.state_dict(), b.state_dict() + keys_a, keys_b = set(sd_a), set(sd_b) + + missing = keys_a - keys_b + extra = keys_b - keys_a + if missing: + print(f"❌ {len(missing)} keys missing in LeRobot-format model (sample: {list(missing)[:5]})") + if extra: + print(f"❌ {len(extra)} extra keys in LeRobot-format model (sample: {list(extra)[:5]})") + if missing or extra: + return False + + diff_summary: list[tuple[str, float]] = [] + for key in sorted(keys_a): + ta, tb = sd_a[key], sd_b[key] + if ta.shape != tb.shape: + print(f"❌ shape mismatch at {key}: {tuple(ta.shape)} vs {tuple(tb.shape)}") + return False + # Compare in float to avoid bfloat16 equality quirks. + max_abs = (ta.float() - tb.float()).abs().max().item() + if max_abs > 0: + diff_summary.append((key, max_abs)) + + if not diff_summary: + print(f"✅ All {len(keys_a)} parameters identical") + return True + + # Some keys differ; show worst offenders. + diff_summary.sort(key=lambda kv: kv[1], reverse=True) + print(f"⚠️ {len(diff_summary)} keys differ. Top 10 by max abs diff:") + for key, value in diff_summary[:10]: + print(f" {key:60s} max|Δ| = {value:.3e}") + + # Tolerance: bf16 round-trips can introduce ULP-level noise but no real + # change. Allow up to 1e-3 absolute difference; anything larger is a real + # divergence. + worst = diff_summary[0][1] + if worst < 1e-3: + print(f"✅ Worst diff {worst:.3e} is within bf16 round-trip tolerance") + return True + print(f"❌ Worst diff {worst:.3e} exceeds tolerance (1e-3)") + return False + + +def main() -> int: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--upstream", required=True, help="Upstream Robometer repo id or local path.") + parser.add_argument("--lerobot", required=True, help="LeRobot-format Robometer repo id or local path.") + args = parser.parse_args() + + print(f"Loading upstream: {args.upstream}") + upstream = _load_upstream(args.upstream) + print(f"Loading LeRobot-format: {args.lerobot}") + lerobot = _load_lerobot(args.lerobot) + + print("\n=== Config comparison ===") + config_ok = True + for field in [ + "base_model_id", + "torch_dtype", + "use_multi_image", + "use_per_frame_progress_token", + "average_temporal_patches", + "frame_pooling", + "frame_pooling_attn_temperature", + "progress_loss_type", + "progress_discrete_bins", + ]: + a, b = getattr(upstream.config, field), getattr(lerobot.config, field) + field_ok = a == b + config_ok = config_ok and field_ok + ok = "✅" if field_ok else "❌" + print(f" {ok} {field}: upstream={a!r}, lerobot={b!r}") + + print("\n=== State-dict comparison ===") + state_dict_ok = compare_state_dicts(upstream, lerobot) + + print() + if config_ok and state_dict_ok: + print("🎉 Verification passed — safe to flip the default.") + return 0 + print("⛔ Verification failed — DO NOT flip the default.") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/lerobot/configs/rewards.py b/src/lerobot/configs/rewards.py index d495160bf..42acac94b 100644 --- a/src/lerobot/configs/rewards.py +++ b/src/lerobot/configs/rewards.py @@ -89,9 +89,16 @@ class RewardModelConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC): def reward_delta_indices(self) -> list | None: # type: ignore[type-arg] return None - @abc.abstractmethod - def get_optimizer_preset(self) -> OptimizerConfig: - raise NotImplementedError + def get_optimizer_preset(self) -> OptimizerConfig | None: + """Default optimizer for this reward model, or ``None`` for zero-shot models. + + Trainable reward models (e.g. SARM, Classifier) must override this with a + concrete optimizer config. Zero-shot reward models (e.g. Robometer) leave + the default ``None`` — they error out earlier via the + :attr:`~lerobot.rewards.pretrained.PreTrainedRewardModel.is_trainable` + check in ``lerobot-train``. + """ + return None def get_scheduler_preset(self) -> LRSchedulerConfig | None: return None diff --git a/src/lerobot/rewards/__init__.py b/src/lerobot/rewards/__init__.py index 203fe2ee1..a6a98c3c6 100644 --- a/src/lerobot/rewards/__init__.py +++ b/src/lerobot/rewards/__init__.py @@ -20,11 +20,13 @@ from .factory import ( make_reward_pre_post_processors as make_reward_pre_post_processors, ) from .pretrained import PreTrainedRewardModel as PreTrainedRewardModel +from .robometer.configuration_robometer import RobometerConfig as RobometerConfig from .sarm.configuration_sarm import SARMConfig as SARMConfig __all__ = [ # Configuration classes "RewardClassifierConfig", + "RobometerConfig", "SARMConfig", # Base class "PreTrainedRewardModel", diff --git a/src/lerobot/rewards/factory.py b/src/lerobot/rewards/factory.py index f6716f3fb..d8f810c15 100644 --- a/src/lerobot/rewards/factory.py +++ b/src/lerobot/rewards/factory.py @@ -24,6 +24,7 @@ from lerobot.configs.rewards import RewardModelConfig from lerobot.processor import PolicyAction, PolicyProcessorPipeline from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig from lerobot.rewards.pretrained import PreTrainedRewardModel +from lerobot.rewards.robometer.configuration_robometer import RobometerConfig from lerobot.rewards.sarm.configuration_sarm import SARMConfig @@ -36,7 +37,7 @@ def get_reward_model_class(name: str) -> type[PreTrainedRewardModel]: Args: name: The name of the reward model. Supported names are "reward_classifier", - "sarm". + "sarm", "robometer". Returns: The reward model class corresponding to the given name. @@ -52,6 +53,10 @@ def get_reward_model_class(name: str) -> type[PreTrainedRewardModel]: from lerobot.rewards.sarm.modeling_sarm import SARMRewardModel return SARMRewardModel + elif name == "robometer": + from lerobot.rewards.robometer.modeling_robometer import RobometerRewardModel + + return RobometerRewardModel else: try: return _get_reward_model_cls_from_name(name=name) @@ -68,7 +73,7 @@ def make_reward_model_config(reward_type: str, **kwargs) -> RewardModelConfig: Args: reward_type: The type of the reward model. Supported types include - "reward_classifier", "sarm". + "reward_classifier", "sarm", "robometer". **kwargs: Keyword arguments to be passed to the configuration class constructor. Returns: @@ -81,6 +86,8 @@ def make_reward_model_config(reward_type: str, **kwargs) -> RewardModelConfig: return RewardClassifierConfig(**kwargs) elif reward_type == "sarm": return SARMConfig(**kwargs) + elif reward_type == "robometer": + return RobometerConfig(**kwargs) else: try: config_cls = RewardModelConfig.get_choice_class(reward_type) @@ -160,6 +167,13 @@ def make_reward_pre_post_processors( dataset_stats=kwargs.get("dataset_stats"), dataset_meta=kwargs.get("dataset_meta"), ) + elif isinstance(reward_cfg, RobometerConfig): + from lerobot.rewards.robometer.processor_robometer import make_robometer_pre_post_processors + + return make_robometer_pre_post_processors( + config=reward_cfg, + dataset_stats=kwargs.get("dataset_stats"), + ) else: try: diff --git a/src/lerobot/rewards/robometer/__init__.py b/src/lerobot/rewards/robometer/__init__.py new file mode 100644 index 000000000..d20d92d37 --- /dev/null +++ b/src/lerobot/rewards/robometer/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2026 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .configuration_robometer import RobometerConfig +from .modeling_robometer import RobometerRewardModel +from .processor_robometer import make_robometer_pre_post_processors + +__all__ = ["RobometerConfig", "RobometerRewardModel", "make_robometer_pre_post_processors"] diff --git a/src/lerobot/rewards/robometer/_upstream_loader.py b/src/lerobot/rewards/robometer/_upstream_loader.py new file mode 100644 index 000000000..05f1a918b --- /dev/null +++ b/src/lerobot/rewards/robometer/_upstream_loader.py @@ -0,0 +1,229 @@ +# Copyright 2026 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Upstream/legacy Robometer checkpoint loader. + +This module is **only** used by the one-time conversion tooling +(:mod:`lerobot.scripts.lerobot_export_robometer` and +``scripts/verify_robometer_export.py``). It supports: + +- Sharded upstream checkpoints (``model-0000X-of-Y.safetensors`` + index). +- PEFT/LoRA adapter checkpoints (``adapter_config.json`` + adapter weights). +- Local snapshot directories or Hugging Face Hub repo ids. + +Once :class:`~lerobot.rewards.robometer.RobometerRewardModel` is loaded +through this module, calling ``save_pretrained`` writes the canonical +LeRobot-native layout (single ``model.safetensors`` + ``config.json``) that +the base loader understands. + +The runtime path +(:meth:`~lerobot.rewards.pretrained.PreTrainedRewardModel.from_pretrained`) +does **not** import this file. It is safe to delete once you no longer need +the conversion tooling. +""" + +from __future__ import annotations + +import json +import logging +from pathlib import Path +from typing import Any + +from huggingface_hub import snapshot_download +from safetensors.torch import load_file +from torch import Tensor, nn + +from lerobot.utils.import_utils import require_package + +logger = logging.getLogger(__name__) + + +def _download_robometer_snapshot( + pretrained_path: str, + *, + hub_token: str | None = None, +) -> Path: + """Resolve a Robometer snapshot directory. + + - If ``pretrained_path`` is an existing local directory, return it directly. + - Otherwise treat ``pretrained_path`` as a Hugging Face repo id (optionally + with ``@revision``) and download it via ``snapshot_download``. + """ + local_candidate = Path(pretrained_path) + if local_candidate.is_dir(): + return local_candidate + + if "@" in pretrained_path: + repo_id, revision = pretrained_path.split("@", 1) + else: + repo_id, revision = pretrained_path, None + + return Path( + snapshot_download( + repo_id=repo_id, + revision=revision, + token=hub_token, + allow_patterns=[ + "*.json", + "*.safetensors", + "*.bin", + "*.txt", + "*.model", + "tokenizer*", + "special_tokens_map.json", + ], + ) + ) + + +def _maybe_apply_peft(base_model: Any, snapshot_dir: Path) -> Any: + adapter_config = snapshot_dir / "adapter_config.json" + if not adapter_config.exists(): + return base_model + + require_package("peft", extra="peft-dep") + from peft import PeftModel + + return PeftModel.from_pretrained(base_model, str(snapshot_dir)) + + +def _remap_state_dict_keys(state_dict: dict[str, Tensor], model: nn.Module) -> dict[str, Tensor]: + """Try a few common prefix swaps so PEFT-wrapped checkpoints load cleanly.""" + model_keys = set(model.state_dict().keys()) + remapped: dict[str, Tensor] = {} + + for key, value in state_dict.items(): + if key in model_keys: + remapped[key] = value + continue + + candidates: list[str] = [] + if key.startswith("model.model."): + candidates.append(key.replace("model.model.", "model.base_model.model.model.", 1)) + candidates.append(key.replace("model.model.", "model.", 1)) + if key.startswith("model."): + candidates.append(f"model.{key}") + candidates.append(key.replace("model.", "", 1)) + else: + candidates.append(f"model.{key}") + if key.startswith("model.") and not key.startswith("model.base_model."): + parts = key.split(".", 1) + if len(parts) == 2: + candidates.append(f"model.base_model.{parts[1]}") + + for candidate in candidates: + if candidate in model_keys: + remapped[candidate] = value + break + else: + remapped[key] = value + + return remapped + + +def _resolve_checkpoint_safetensors_files(snapshot_dir: Path) -> list[Path]: + """Pick the safetensors files that hold the full model weights. + + When ``model.safetensors.index.json`` is present, only the files it lists are + loaded. Otherwise any ``model*.safetensors`` shards are preferred over + sidecar files. Falls back to every ``*.safetensors`` in the snapshot. + """ + index_path = snapshot_dir / "model.safetensors.index.json" + if index_path.exists(): + with index_path.open() as f: + weight_map = json.load(f).get("weight_map", {}) + indexed = sorted( + {snapshot_dir / name for name in weight_map.values() if (snapshot_dir / name).exists()} + ) + if indexed: + return indexed + + model_shards = sorted(snapshot_dir.glob("model*.safetensors")) + if model_shards: + return model_shards + + return sorted(snapshot_dir.glob("*.safetensors")) + + +def apply_upstream_checkpoint( + model: nn.Module, + pretrained_path: str, + *, + hub_token: str | None = None, +) -> None: + """Load an upstream (sharded / PEFT) Robometer checkpoint into ``model``. + + Downloads the snapshot, optionally applies PEFT wrapping, merges sharded + ``.safetensors`` files in memory, remaps PEFT-prefixed keys, and loads them + into ``model`` non-strictly. ``model`` must already be constructed with the + matching Robometer architecture (e.g. via + :class:`~lerobot.rewards.robometer.RobometerRewardModel` ``__init__``). + """ + snapshot_dir = _download_robometer_snapshot(pretrained_path, hub_token=hub_token) + + # PEFT adapter checkpoints wrap the base model before weight loading so the + # remapper can place adapter tensors at the right prefix. + base_model = getattr(model, "model", None) + if base_model is not None: + wrapped = _maybe_apply_peft(base_model, snapshot_dir) + if wrapped is not base_model: + model.model = wrapped + + files = _resolve_checkpoint_safetensors_files(snapshot_dir) + if not files: + logger.warning("No *.safetensors files in %s; using freshly initialised heads", snapshot_dir) + return + + merged: dict[str, Tensor] = {} + for path in files: + merged.update(load_file(str(path))) + + remapped = _remap_state_dict_keys(merged, model) + + # Defensive vocab-match. With the corrected resize logic + # (``_resize_embeddings_for_robometer`` uses ``len(tokenizer) + 5``), + # a freshly built ``RobometerRewardModel`` should already share the same + # vocabulary as the upstream checkpoint (e.g. 151,674 for + # ``robometer/Robometer-4B``). This block stays in place as a safety net + # in case a future upstream variant uses a different vocab — we never + # want ``load_state_dict`` to trip on a silent shape mismatch. + base_model = getattr(model, "model", None) + if base_model is not None and hasattr(base_model, "get_input_embeddings"): + for key in ( + "model.model.language_model.embed_tokens.weight", + "model.language_model.embed_tokens.weight", + "model.embed_tokens.weight", + ): + tensor = remapped.get(key) + if tensor is None: + continue + ckpt_vocab = int(tensor.shape[0]) + current_vocab = int(base_model.get_input_embeddings().num_embeddings) + if ckpt_vocab != current_vocab: + logger.info( + "Resizing model embed table %d -> %d to match upstream checkpoint vocab " + "(upstream was trained against a different Qwen revision).", + current_vocab, + ckpt_vocab, + ) + base_model.resize_token_embeddings(ckpt_vocab) + break + + missing, unexpected = model.load_state_dict(remapped, strict=False) + if missing: + logger.debug("Robometer checkpoint missing %d keys (sample: %s)", len(missing), missing[:5]) + if unexpected: + logger.debug( + "Robometer checkpoint had %d unexpected keys (sample: %s)", len(unexpected), unexpected[:5] + ) diff --git a/src/lerobot/rewards/robometer/configuration_robometer.py b/src/lerobot/rewards/robometer/configuration_robometer.py new file mode 100644 index 000000000..6136c0e60 --- /dev/null +++ b/src/lerobot/rewards/robometer/configuration_robometer.py @@ -0,0 +1,162 @@ +# Copyright 2026 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from copy import deepcopy +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any + +from lerobot.configs import FeatureType, NormalizationMode, PolicyFeature +from lerobot.configs.rewards import RewardModelConfig +from lerobot.utils.constants import OBS_IMAGES +from lerobot.utils.import_utils import _transformers_available, require_package + +if TYPE_CHECKING or _transformers_available: + from transformers import AutoConfig, AutoTokenizer +else: + AutoConfig = None # type: ignore[assignment] + AutoTokenizer = None # type: ignore[assignment] + + +@RewardModelConfig.register_subclass("robometer") +@dataclass +class RobometerConfig(RewardModelConfig): + """Configuration for the Robometer reward model.""" + + pretrained_path: str | None = "lilkm/Robometer-4B" + image_key: str = OBS_IMAGES + ".top" + task_key: str = "task" + default_task: str | None = None + + max_frames: int | None = 8 + reward_output: str = "progress" # "progress" or "success" + success_threshold: float = 0.5 + + license: str | None = "apache-2.0" + tags: list[str] | None = field( + default_factory=lambda: ["reward-model", "vision-language", "qwen3-vl", "zero-shot"] + ) + + base_model_id: str = "Qwen/Qwen3-VL-4B-Instruct" + torch_dtype: str = "bfloat16" + use_multi_image: bool = True + use_per_frame_progress_token: bool = True + average_temporal_patches: bool = True + frame_pooling: str = "mean" # "mean" | "boundary" | "attention" + frame_pooling_attn_temperature: float = 1.0 + progress_loss_type: str = "discrete" # "l1" | "l2" | "discrete" + progress_discrete_bins: int = 10 + + # Serialised Qwen backbone config (post-resize). Always populated by + # ``__post_init__`` from ``base_model_id`` + ``len(tokenizer) + 5``, so it + # is never ``None`` after construction (EO-1 style). Saved into + # ``config.json`` automatically by the base ``_save_pretrained``. + vlm_config: dict[str, Any] | None = None + + input_features: dict[str, PolicyFeature] = field(default_factory=dict) + output_features: dict[str, PolicyFeature] = field(default_factory=dict) + normalization_mapping: dict[str, NormalizationMode] = field( + default_factory=lambda: { + "VISUAL": NormalizationMode.IDENTITY, + "REWARD": NormalizationMode.IDENTITY, + } + ) + + def __post_init__(self) -> None: + super().__post_init__() + if self.reward_output not in {"progress", "success"}: + raise ValueError(f"reward_output must be 'progress' or 'success', got {self.reward_output!r}") + if self.max_frames is not None and self.max_frames < 1: + raise ValueError(f"max_frames must be >= 1, got {self.max_frames}") + if self.frame_pooling not in {"mean", "boundary", "attention"}: + raise ValueError(f"frame_pooling must be mean/boundary/attention; got {self.frame_pooling!r}") + if self.frame_pooling_attn_temperature <= 0: + raise ValueError("frame_pooling_attn_temperature must be > 0") + if self.progress_loss_type not in {"l1", "l2", "discrete"}: + raise ValueError(f"progress_loss_type must be l1/l2/discrete; got {self.progress_loss_type!r}") + if self.use_per_frame_progress_token and not self.use_multi_image: + raise ValueError("use_per_frame_progress_token=True requires use_multi_image=True") + + if self.image_key not in self.input_features: + self.input_features[self.image_key] = PolicyFeature(shape=(3, 224, 224), type=FeatureType.VISUAL) + self.output_features.setdefault("progress", PolicyFeature(shape=(1,), type=FeatureType.REWARD)) + self.output_features.setdefault("success", PolicyFeature(shape=(1,), type=FeatureType.REWARD)) + + # Deterministically populate ``vlm_config`` so it is never ``None`` + # after construction (mirrors EO-1's ``__post_init__`` snapshot). + # The target vocab matches upstream Robometer's runtime resize + # ``base_model.resize_token_embeddings(len(processor.tokenizer))`` — + # see ``third_party/robometer/.../setup_utils.py`` — + # i.e. ``len(tokenizer) + len(ROBOMETER_SPECIAL_TOKENS)``. + # + # For ``Qwen/Qwen3-VL-4B-Instruct`` this gives 151,669 + 5 = 151,674, + # which is exactly the published ``robometer/Robometer-4B`` checkpoint + # vocab. NB: ``text_config.vocab_size`` in the raw Qwen config is the + # padded embedding-table size (151,936), not the tokenizer length — + # we override it with the tokenizer-driven value to stay consistent + # with upstream. + if self.vlm_config is None: + require_package("transformers", extra="robometer") + # Local import avoids a top-level cycle (modeling_robometer imports + # this module). ``ROBOMETER_SPECIAL_TOKENS`` is the single source + # of truth for the resize delta. + from lerobot.rewards.robometer.modeling_robometer import ROBOMETER_SPECIAL_TOKENS + + vlm = AutoConfig.from_pretrained(self.base_model_id).to_dict() + tokenizer = AutoTokenizer.from_pretrained(self.base_model_id) + text_config = vlm.get("text_config") + if not isinstance(text_config, dict): + raise ValueError( + f"Backbone config for {self.base_model_id!r} has no nested `text_config`; " + "Robometer expects a Qwen-VL-style config." + ) + text_config["vocab_size"] = len(tokenizer) + len(ROBOMETER_SPECIAL_TOKENS) + self.vlm_config = vlm + + @property + def use_discrete_progress(self) -> bool: + """Whether the progress head outputs distribution logits over bins.""" + return self.progress_loss_type.lower() == "discrete" + + @property + def vlm_backbone_config(self): + """Reconstruct the Qwen backbone config from :attr:`vlm_config`. + + ``vlm_config`` is always populated after :meth:`__post_init__` + (either fresh, computed from the tokenizer, or loaded from a saved + ``config.json`` via draccus). + """ + require_package("transformers", extra="robometer") + config_dict = deepcopy(self.vlm_config) + model_type = config_dict.pop("model_type", None) + if model_type is None: + raise ValueError("vlm_config must include `model_type` to reconstruct the backbone config") + return AutoConfig.for_model(model_type, **config_dict) + + @property + def observation_delta_indices(self) -> list[int] | None: + return None + + @property + def action_delta_indices(self) -> None: + return None + + @property + def reward_delta_indices(self) -> None: + return None + + def validate_features(self) -> None: + if self.image_key not in self.input_features: + raise ValueError(f"Robometer requires image input feature {self.image_key!r}") diff --git a/src/lerobot/rewards/robometer/modeling_robometer.py b/src/lerobot/rewards/robometer/modeling_robometer.py new file mode 100644 index 000000000..9edd2483e --- /dev/null +++ b/src/lerobot/rewards/robometer/modeling_robometer.py @@ -0,0 +1,493 @@ +# Copyright 2026 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Robometer reward model. + +- Qwen3-VL backbone (default: ``Qwen/Qwen3-VL-4B-Instruct``). +- Progress + success heads at inference; the preference head is preserved in the + state dict but not queried. +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Any + +import torch +from torch import Tensor, nn + +from lerobot.rewards.pretrained import PreTrainedRewardModel +from lerobot.rewards.robometer.configuration_robometer import RobometerConfig +from lerobot.utils.constants import OBS_PREFIX +from lerobot.utils.import_utils import _transformers_available, require_package + +if TYPE_CHECKING or _transformers_available: + from transformers import AutoModelForImageTextToText +else: + AutoModelForImageTextToText = None # type: ignore[assignment] + +logger = logging.getLogger(__name__) + +# Namespace for Robometer's pre-encoded Qwen-VL observation tensors. The +# processor writes both Qwen-VL tensors and Robometer-specific token ids / +# metadata here; the model reads them at inference (no tokenizer needed in +# the model — EO1-style separation). +ROBOMETER_FEATURE_PREFIX = f"{OBS_PREFIX}robometer." +ROBOMETER_QWEN_INPUT_KEYS = ( + "input_ids", + "attention_mask", + "pixel_values", + "pixel_values_videos", + "image_grid_thw", + "video_grid_thw", + "second_per_grid_ts", +) +ROBOMETER_METADATA_KEYS = ( + "prog_token_id", + "vision_start_token_id", + "vision_end_token_id", + "video_merge_size", +) +ROBOMETER_INPUT_KEYS = ROBOMETER_QWEN_INPUT_KEYS + ROBOMETER_METADATA_KEYS + +# Order matters: the released checkpoint resized `embed_tokens` after adding +# these tokens in this order, so changing the set or order would silently +# misalign the saved embedding rows with their token ids. `<|reward_token|>` +# and `<|sim_token|>` are vestigial (never read by any head) but still occupy +# rows the checkpoint expects. +ROBOMETER_SPECIAL_TOKENS = ( + "<|split_token|>", + "<|reward_token|>", + "<|pref_token|>", + "<|sim_token|>", + "<|prog_token|>", +) + + +def convert_bins_to_continuous(bin_logits: Tensor) -> Tensor: + """Collapse per-bin logits into a single value in ``[0, 1]``. + + The discrete progress head outputs ``num_bins`` logits per frame. Bins are + evenly spaced centers in ``[0, 1]``; the continuous prediction is the + softmax-weighted mean of those centers. + """ + bin_probs = torch.softmax(bin_logits, dim=-1) + num_bins = bin_logits.shape[-1] + bin_centers = torch.linspace(0.0, 1.0, num_bins, device=bin_logits.device, dtype=bin_logits.dtype) + return (bin_probs * bin_centers).sum(dim=-1) + + +def squeeze_last_safe(x: Tensor) -> Tensor: + """Drop a trailing singleton dim only when present. + + Matches the upstream helper of the same name in + ``robometer.models.rbm`` (kept module-level and non-underscored to mirror + upstream). + """ + return x.squeeze(-1) if x.ndim > 1 and x.shape[-1] == 1 else x + + +def _torch_dtype(name: str) -> torch.dtype: + dtype = getattr(torch, name, None) + if isinstance(dtype, torch.dtype): + return dtype + raise ValueError(f"Unknown torch dtype: {name!r}") + + +class RobometerPredictionHead(nn.Sequential): + """Small MLP head used for Robometer's progress / success / preference outputs. + + Subclasses ``nn.Sequential`` (not ``nn.Module``) so the ``state_dict`` keys + stay flat (``progress_head.0.weight``, ``progress_head.1.weight``, ...) and + remain byte-compatible with the published ``lilkm/robometer-4b`` checkpoint. + """ + + def __init__(self, hidden_dim: int, output_size: int, *, dropout: float, with_sigmoid: bool) -> None: + layers: list[nn.Module] = [ + nn.Linear(hidden_dim, hidden_dim // 2), + nn.LayerNorm(hidden_dim // 2), + nn.GELU(), + nn.Dropout(dropout), + nn.Linear(hidden_dim // 2, output_size), + ] + if with_sigmoid: + layers.append(nn.Sigmoid()) + super().__init__(*layers) + + +def decode_progress_outputs( + progress_logits: Tensor | None, + success_logits: Tensor | None, + *, + is_discrete_mode: bool, +) -> dict[str, list[list[float]]]: + """Decode RBM head outputs into per-frame floats. + + Args: + progress_logits: ``(B, T)`` (continuous) or ``(B, T, num_bins)`` (discrete). + success_logits: ``(B, T)`` raw logits, ``sigmoid``-ed to probabilities. + is_discrete_mode: if True the progress logits get a softmax over bins + and are projected onto bin centers via :func:`convert_bins_to_continuous`. + + Returns: + Dict with ``progress_pred`` and ``success_probs``, each a list of + length ``B`` of per-frame float lists. + """ + progress_pred: list[list[float]] = [] + success_probs: list[list[float]] = [] + + if progress_logits is not None: + for sample_logits in progress_logits: + if is_discrete_mode: + continuous = convert_bins_to_continuous(sample_logits.detach().float().cpu()) + progress_pred.append(continuous.flatten().tolist()) + else: + progress_pred.append(sample_logits.detach().float().cpu().flatten().tolist()) + + if success_logits is not None: + for sample_logits in success_logits: + success_probs.append(torch.sigmoid(sample_logits.detach().float().cpu()).flatten().tolist()) + + return {"progress_pred": progress_pred, "success_probs": success_probs} + + +class RobometerRewardModel(PreTrainedRewardModel): + """Robometer reward model: Qwen3-VL backbone + progress/success heads.""" + + name = "robometer" + config_class = RobometerConfig + + def __init__(self, config: RobometerConfig, *, dropout: float = 0.1) -> None: + require_package("transformers", extra="robometer") + super().__init__(config) + self.config = config + + # Two backbone-build paths (EO-1 style, branched on ``pretrained_path``): + # + # - Fresh training (``pretrained_path is None``): download the base + # Qwen weights and resize the embed table to match + # ``vlm_config.text_config.vocab_size`` — populated deterministically + # in ``RobometerConfig.__post_init__`` as + # ``len(tokenizer) + len(ROBOMETER_SPECIAL_TOKENS)``, mirroring + # upstream Robometer's ``_add_special_tokens_and_resize`` in + # ``third_party/robometer/.../setup_utils.py``. + # + # - Loading a saved checkpoint (``pretrained_path`` is set): rebuild + # the empty architecture from ``vlm_config`` via + # ``AutoModelForImageTextToText.from_config`` so the subsequent + # ``model.safetensors`` load is a direct fill of the right shape — + # no redundant Qwen weight download. + torch_dtype = _torch_dtype(config.torch_dtype) + if config.pretrained_path is None: + self.model = AutoModelForImageTextToText.from_pretrained( + config.base_model_id, + dtype=torch_dtype, + trust_remote_code=True, + ) + target_vocab = config.vlm_config["text_config"]["vocab_size"] + self.model.resize_token_embeddings(target_vocab) + else: + self.model = AutoModelForImageTextToText.from_config( + config.vlm_backbone_config, + dtype=torch_dtype, + trust_remote_code=True, + ) + + # All Qwen-VL backbones Robometer supports expose `text_config.hidden_size`. + # Falls back to the top-level `hidden_size` so future non-multimodal + # variants would still resolve. + backbone_config = self.model.config + text_config = getattr(backbone_config, "text_config", None) + hidden_size = getattr(text_config, "hidden_size", None) if text_config is not None else None + if hidden_size is None: + hidden_size = getattr(backbone_config, "hidden_size", None) + if hidden_size is None: + raise AttributeError( + f"Could not infer hidden_size from backbone config of {config.base_model_id}" + ) + hidden_dim = int(hidden_size) + + # Robometer's three prediction heads + frame-pool attention. The + # preference head is preserved to match the published state-dict layout + # even though only progress + success are consumed at inference, and + # `frame_pool_attn` is always allocated so checkpoints trained with + # `frame_pooling="attention"` load without remapping. + progress_output = config.progress_discrete_bins if config.use_discrete_progress else 1 + self.progress_head = RobometerPredictionHead( + hidden_dim, + progress_output, + dropout=dropout, + with_sigmoid=not config.use_discrete_progress, + ) + self.preference_head = RobometerPredictionHead(hidden_dim, 1, dropout=dropout, with_sigmoid=False) + self.success_head = RobometerPredictionHead(hidden_dim, 1, dropout=dropout, with_sigmoid=False) + self.frame_pool_attn = nn.Linear(hidden_dim, 1, bias=False) + + # Match the dtype of the loaded base model so weight loading is a no-op cast. + model_dtype = next(self.model.parameters()).dtype + self.progress_head.to(dtype=model_dtype) + self.preference_head.to(dtype=model_dtype) + self.success_head.to(dtype=model_dtype) + self.frame_pool_attn.to(dtype=model_dtype) + + def compute_reward(self, batch: dict[str, Tensor]) -> Tensor: + inputs = { + key: batch[f"{ROBOMETER_FEATURE_PREFIX}{key}"] + for key in ROBOMETER_INPUT_KEYS + if f"{ROBOMETER_FEATURE_PREFIX}{key}" in batch + } + if "input_ids" not in inputs: + raise KeyError( + f"Robometer batch missing pre-encoded inputs (expected " + f"`{ROBOMETER_FEATURE_PREFIX}input_ids`). Make sure the " + "RobometerEncoderProcessorStep ran before `compute_reward`." + ) + + device = next(self.model.parameters()).device + inputs = {key: value.to(device) if hasattr(value, "to") else value for key, value in inputs.items()} + + self.eval() + with torch.no_grad(): + progress_logits, success_logits = self._compute_rbm_logits(inputs) + + decoded = decode_progress_outputs( + progress_logits, + success_logits, + is_discrete_mode=self.config.use_discrete_progress, + ) + values = ( + decoded["success_probs"] if self.config.reward_output == "success" else decoded["progress_pred"] + ) + + rewards = torch.stack([torch.as_tensor(seq, dtype=torch.float32)[-1] for seq in values]) + if self.config.reward_output == "success": + rewards = (rewards > self.config.success_threshold).float() + return rewards.to(self.config.device or "cpu") + + def _compute_rbm_logits( + self, + inputs: dict[str, Any], + ) -> tuple[Tensor, Tensor]: + """Run the Qwen3-VL backbone and apply Robometer's heads. + + ``inputs`` is the encoded batch produced by + :class:`RobometerEncoderProcessorStep`. It carries Qwen tensors as well + as Robometer-specific metadata (``prog_token_id``, + ``vision_start_token_id``, ``vision_end_token_id``, ``video_merge_size``) + — the metadata is popped here so the rest can be forwarded straight to + the Qwen model. + + Returns ``(progress_logits, success_logits)``. Shapes: + + - ``progress_logits``: ``(B, T)`` (continuous) or ``(B, T, num_bins)`` (discrete). + - ``success_logits``: ``(B, T)`` raw logits (sigmoid happens at decode time). + """ + prog_token_id = inputs.pop("prog_token_id", None) + vision_start_token_id = inputs.pop("vision_start_token_id", None) + vision_end_token_id = inputs.pop("vision_end_token_id", None) + video_merge_size = inputs.pop("video_merge_size", 14) + + # Qwen3-VL doesn't reliably populate `last_hidden_state`; ask for the + # full hidden-state tuple and take the last layer. This matches the + # `is_qwen3` path in upstream Robometer's `RBM.forward_qwen` (main). + outputs = self.model(**inputs, output_hidden_states=True, return_dict=True) + hidden_state = ( + outputs.hidden_states[-1] + if getattr(outputs, "hidden_states", None) + else outputs.last_hidden_state + ) + + input_ids = inputs["input_ids"] + if self.config.use_per_frame_progress_token: + if prog_token_id is None: + raise KeyError("`prog_token_id` missing in batch (run RobometerEncoderProcessorStep first)") + return self._process_token_extraction(hidden_state, input_ids, prog_token_id=prog_token_id) + if self.config.use_multi_image: + if vision_start_token_id is None or vision_end_token_id is None: + raise KeyError( + "`vision_start_token_id` / `vision_end_token_id` missing in batch " + "(run RobometerEncoderProcessorStep first)" + ) + return self._process_multi_image_frames( + hidden_state, + input_ids, + start_id=vision_start_token_id, + end_id=vision_end_token_id, + ) + video_grid_thw = inputs.get("video_grid_thw") + if video_grid_thw is None: + raise ValueError("video_grid_thw is required for video-mode Robometer inference") + if vision_start_token_id is None: + raise KeyError("`vision_start_token_id` missing in batch") + return self._process_video_frames( + hidden_state, + input_ids, + video_grid_thw, + start_id=vision_start_token_id, + merge_size=video_merge_size, + ) + + def _apply_heads_to_hidden_states(self, frame_embeddings: Tensor) -> tuple[Tensor, Tensor]: + """Apply progress + success heads to a tensor of frame embeddings. + + Mirrors upstream ``RBM._apply_heads_to_hidden_states``. + """ + progress_out = self.progress_head(frame_embeddings) + progress = progress_out if self.config.use_discrete_progress else squeeze_last_safe(progress_out) + success = squeeze_last_safe(self.success_head(frame_embeddings)) + return progress, success + + def _process_token_extraction( + self, + hidden_state: Tensor, + input_ids: Tensor, + *, + prog_token_id: int, + ) -> tuple[Tensor, Tensor]: + """Per-frame progress/success from ``<|prog_token|>`` positions. + + Mirrors the progress-sample branch of upstream + ``RBM._process_token_extraction``. + """ + token_mask = input_ids == prog_token_id + batch_indices, positions = token_mask.nonzero(as_tuple=True) + if positions.numel() == 0: + raise ValueError("`<|prog_token|>` not found in any sequence") + + per_sample_hidden = [ + hidden_state[i, positions[batch_indices == i]] for i in range(input_ids.shape[0]) + ] + progress_list, success_list = [], [] + for embeddings in per_sample_hidden: + if embeddings.shape[0] == 0: + raise ValueError("`<|prog_token|>` missing in a sequence") + progress, success = self._apply_heads_to_hidden_states(embeddings) + progress_list.append(progress) + success_list.append(success) + + return torch.stack(progress_list), torch.stack(success_list) + + def _process_multi_image_frames( + self, + hidden_state: Tensor, + input_ids: Tensor, + *, + start_id: int, + end_id: int, + ) -> tuple[Tensor, Tensor]: + """Per-frame progress/success in multi-image mode (Qwen-VL). + + Mirrors upstream ``RBM._process_multi_image_frames`` (progress-sample + branch only — we don't run preference at inference). + """ + progress_list, success_list = [], [] + for batch_idx in range(input_ids.shape[0]): + seq_ids = input_ids[batch_idx] + seq_hidden = hidden_state[batch_idx] + frame_embeddings = self._extract_hidden_states_from_token_pairs( + seq_hidden, seq_ids, start_id, end_id + ) + progress, success = self._apply_heads_to_hidden_states(frame_embeddings) + progress_list.append(progress) + success_list.append(success) + + return torch.stack(progress_list), torch.stack(success_list) + + def _extract_hidden_states_from_token_pairs( + self, + hidden_state: Tensor, + input_ids: Tensor, + start_id: int, + end_id: int, + ) -> Tensor: + start_positions = (input_ids == start_id).nonzero(as_tuple=True)[0] + end_positions = (input_ids == end_id).nonzero(as_tuple=True)[0] + if start_positions.numel() == 0: + raise ValueError("`<|vision_start|>` not found in sequence") + if start_positions.numel() != end_positions.numel(): + raise ValueError( + f"Mismatched vision token counts: {start_positions.numel()} start vs " + f"{end_positions.numel()} end" + ) + + frames: list[Tensor] = [] + for start, end in zip(start_positions.tolist(), end_positions.tolist(), strict=True): + if start >= end: + raise ValueError(f"Invalid vision token pair: start={start} end={end}") + patch_tokens = hidden_state[start + 1 : end] + if patch_tokens.shape[0] == 0: + frames.append((hidden_state[start] + hidden_state[end]) / 2.0) + continue + + pooling = self.config.frame_pooling + if pooling == "mean": + frames.append(patch_tokens.mean(dim=0)) + elif pooling == "boundary": + frames.append(patch_tokens[-1]) + else: # attention + scores = ( + self.frame_pool_attn(patch_tokens).squeeze(-1) + / self.config.frame_pooling_attn_temperature + ) + weights = torch.softmax(scores, dim=0).unsqueeze(-1) + frames.append((weights * patch_tokens).sum(dim=0)) + + return torch.stack(frames) + + def _process_video_frames( + self, + hidden_state: Tensor, + input_ids: Tensor, + video_grid_thw: Tensor, + *, + start_id: int, + merge_size: int, + ) -> tuple[Tensor, Tensor]: + """Per-frame progress/success in video mode (Qwen-VL). + + Mirrors upstream ``RBM._process_video_frames`` / + ``RBM._extract_progress_from_trajectory`` (progress-sample branch + only — preference is not run at inference). In particular, + ``average_temporal_patches=False`` reads the *boundary* token at + ``cursor + tokens_per_frame`` to match upstream byte-for-byte. + """ + progress_list, success_list = [], [] + for batch_idx in range(input_ids.shape[0]): + seq_ids = input_ids[batch_idx] + seq_hidden = hidden_state[batch_idx] + start_positions = (seq_ids == start_id).nonzero(as_tuple=True)[0] + if start_positions.numel() == 0: + raise ValueError("`<|vision_start|>` not found in sequence") + t_dim, h_dim, w_dim = (int(x) for x in video_grid_thw[batch_idx].tolist()) + tokens_per_frame = (h_dim * w_dim) // (merge_size**2) + + cursor = start_positions[0].item() + frame_embeddings: list[Tensor] = [] + for _ in range(t_dim): + if self.config.average_temporal_patches: + patch = seq_hidden[cursor : cursor + tokens_per_frame] + frame_embeddings.append(patch.mean(dim=0)) + else: + # Upstream takes the position *one past* the patch span as + # the per-frame boundary; see + # `RBM._extract_progress_from_trajectory`. + frame_embeddings.append(seq_hidden[cursor + tokens_per_frame]) + cursor += tokens_per_frame + + stacked = torch.stack(frame_embeddings) + progress, success = self._apply_heads_to_hidden_states(stacked) + progress_list.append(progress) + success_list.append(success) + + return torch.stack(progress_list), torch.stack(success_list) diff --git a/src/lerobot/rewards/robometer/processor_robometer.py b/src/lerobot/rewards/robometer/processor_robometer.py new file mode 100644 index 000000000..b4905179d --- /dev/null +++ b/src/lerobot/rewards/robometer/processor_robometer.py @@ -0,0 +1,348 @@ +# Copyright 2026 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Robometer pre/post processing pipelines.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any + +import numpy as np +import torch +from PIL import Image +from torch import Tensor + +from lerobot.configs import PipelineFeatureType, PolicyFeature +from lerobot.processor import ( + AddBatchDimensionProcessorStep, + DeviceProcessorStep, + PolicyAction, + PolicyProcessorPipeline, + ProcessorStep, + ProcessorStepRegistry, + policy_action_to_transition, +) +from lerobot.rewards.robometer.configuration_robometer import RobometerConfig +from lerobot.rewards.robometer.modeling_robometer import ( + ROBOMETER_FEATURE_PREFIX, + ROBOMETER_SPECIAL_TOKENS, +) +from lerobot.types import EnvTransition, TransitionKey +from lerobot.utils.constants import ( + OBS_IMAGES, + POLICY_POSTPROCESSOR_DEFAULT_NAME, + POLICY_PREPROCESSOR_DEFAULT_NAME, +) +from lerobot.utils.import_utils import _transformers_available, require_package + +if TYPE_CHECKING or _transformers_available: + from transformers import AutoProcessor +else: + AutoProcessor = None + +PROGRESS_PROMPT = ( + "The task for the robot is '{task}'. Given the trajectory video, predict " + "the task progress at each frame, how far along the robot is towards " + "completing the task, a float between 0 and 1, where 0 is the starting " + "state and 1 is when the task is completed. If the robot is not " + "performing the same task, predict 0 progress." +) + + +def _frames_to_pil(frames: np.ndarray) -> list[Image.Image]: + """Convert ``(T, H, W, C)`` uint8 frames to a list of PIL images.""" + if frames.ndim != 4: + raise ValueError(f"Expected (T,H,W,C) frames; got shape {frames.shape}") + if frames.dtype != np.uint8: + frames = np.clip(frames, 0, 255).astype(np.uint8) + return [Image.fromarray(frames[i]) for i in range(frames.shape[0])] + + +def _video_to_numpy(video: Tensor, *, max_frames: int | None) -> np.ndarray: + """Convert one trajectory tensor to a ``(T, H, W, C) uint8`` numpy array.""" + if max_frames is not None: + video = video[-max_frames:] + if video.shape[1] in (1, 3): + video = video.permute(0, 2, 3, 1) + elif video.shape[-1] not in (1, 3): + raise ValueError(f"Expected channel dim of size 1 or 3, got shape {tuple(video.shape)}") + + array = video.detach().cpu().numpy() + if np.issubdtype(array.dtype, np.floating) and array.size > 0 and array.max() <= 1.0: + array = array * 255.0 + return np.clip(array, 0, 255).astype(np.uint8) + + +def _expand_tasks(task: Any, *, batch_size: int, default: str | None) -> list[str]: + if task is None: + task = default + if task is None: + raise KeyError("Robometer expected a task description in complementary data") + if isinstance(task, str): + return [task] * batch_size + if isinstance(task, tuple): + task = list(task) + if not (isinstance(task, list) and all(isinstance(item, str) for item in task)): + raise TypeError(f"Robometer task must be a string or list of strings, got {type(task)}") + if len(task) == 1 and batch_size > 1: + return task * batch_size + if len(task) != batch_size: + raise ValueError(f"Expected {batch_size} tasks, got {len(task)}") + return task + + +@dataclass +@ProcessorStepRegistry.register(name="robometer_encoder") +class RobometerEncoderProcessorStep(ProcessorStep): + """Encode raw frames + task into Qwen-VL tensors for the Robometer model. + + Loads a :class:`~transformers.AutoProcessor` matching ``base_model_id`` and + registers Robometer's special tokens on the tokenizer. The matching + embedding resize happens model-side in + :meth:`RobometerRewardModel.__init__`. This step owns the tokenizer — the + model itself never needs one — and is the EO1-style boundary between + pre-processing and modeling. + + At call time the step reads: + + - ``observation[image_key]``: ``(B, T, C, H, W)`` or ``(B, C, H, W)`` frames. + - ``complementary_data[task_key]``: a string or list of strings. + + and writes ``observation[f"{ROBOMETER_FEATURE_PREFIX}"]`` for: + + - the Qwen-VL processor outputs: ``input_ids``, ``attention_mask``, + ``pixel_values``, ``image_grid_thw``, ``video_grid_thw``, ... + - Robometer-specific token ids consumed by the model heads: + ``prog_token_id``, ``vision_start_token_id``, ``vision_end_token_id``, + ``video_merge_size``. + """ + + base_model_id: str = "Qwen/Qwen3-VL-4B-Instruct" + image_key: str = OBS_IMAGES + ".top" + task_key: str = "task" + default_task: str | None = None + max_frames: int | None = 8 + use_multi_image: bool = True + use_per_frame_progress_token: bool = True + max_length: int = 1024 + + _processor: Any = field(default=None, init=False, repr=False) + + def __post_init__(self) -> None: + require_package("transformers", extra="robometer") + require_package("qwen-vl-utils", extra="robometer", import_name="qwen_vl_utils") + + self._processor = AutoProcessor.from_pretrained( + self.base_model_id, + trust_remote_code=True, + do_sample_frames=False, + padding_side="right", + ) + + # Register Robometer's special tokens on the tokenizer. The matching + # embedding resize happens model-side in `RobometerRewardModel.__init__`. + tokenizer = self._processor.tokenizer + # Qwen tokenizers may not define a pad token, but batched prompts/videos + # require padding, so reuse EOS as the padding token. + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + for token in ROBOMETER_SPECIAL_TOKENS: + if token not in tokenizer.get_vocab(): + tokenizer.add_special_tokens({"additional_special_tokens": [token]}) + + def __call__(self, transition: EnvTransition) -> EnvTransition: + observation = transition.get(TransitionKey.OBSERVATION) + complementary = transition.get(TransitionKey.COMPLEMENTARY_DATA) or {} + if not isinstance(observation, dict): + raise ValueError("RobometerEncoderProcessorStep requires an observation dict") + + if self.image_key not in observation: + raise KeyError(f"Robometer expected image key {self.image_key!r} in observation") + + frames = observation[self.image_key] + tensor = frames.detach().cpu() if isinstance(frames, Tensor) else torch.as_tensor(frames) + if tensor.ndim == 4: + tensor = tensor.unsqueeze(1) + elif tensor.ndim != 5: + raise ValueError( + f"Expected Robometer frames with shape (B,C,H,W) or (B,T,C,H,W); got {tuple(tensor.shape)}" + ) + + batch_size = tensor.shape[0] + tasks = _expand_tasks( + complementary.get(self.task_key, self.default_task), + batch_size=batch_size, + default=self.default_task, + ) + + samples = [ + (_video_to_numpy(tensor[i], max_frames=self.max_frames), tasks[i]) for i in range(batch_size) + ] + encoded = self.encode_samples(samples) + + new_observation = dict(observation) + for key, value in encoded.items(): + new_observation[f"{ROBOMETER_FEATURE_PREFIX}{key}"] = value + + new_transition = transition.copy() + new_transition[TransitionKey.OBSERVATION] = new_observation + return new_transition + + def encode_samples(self, samples: list[tuple[np.ndarray, str]]) -> dict[str, Tensor]: + """Run the Qwen-VL processor on a list of ``(frames, task)`` samples. + + Used internally by ``__call__`` and exposed for callers that want to + run the encoder on a single trajectory without building an + :class:`EnvTransition` (see ``examples/dataset/create_robometer_progress_videos.py``). + """ + from qwen_vl_utils import process_vision_info + + conversations = [self._build_conversation(frames, task) for frames, task in samples] + + texts = [ + self._processor.apply_chat_template( + msg, + tokenize=False, + add_generation_prompt=False, + add_vision_id=True, + enable_thinking=False, + fps=1, + ) + for msg in conversations + ] + + process_kwargs: dict[str, Any] = { + "return_video_kwargs": True, + "return_video_metadata": True, + } + image_processor = getattr(self._processor, "image_processor", None) + if image_processor is not None and hasattr(image_processor, "patch_size"): + process_kwargs["image_patch_size"] = image_processor.patch_size + + image_inputs, video_inputs, video_kwargs = process_vision_info(conversations, **process_kwargs) + + videos: list[Any] | None = None + video_metadatas: list[Any] | None = None + if video_inputs: + if isinstance(video_inputs[0], tuple) and len(video_inputs[0]) == 2: + videos_seq, metadatas_seq = zip(*video_inputs, strict=False) + videos = list(videos_seq) + video_metadatas = list(metadatas_seq) + else: + videos = list(video_inputs) + + processor_kwargs: dict[str, Any] = { + "text": texts, + "images": image_inputs, + "padding": True, + "truncation": False, + "max_length": self.max_length, + "return_tensors": "pt", + "do_resize": False, + } + if videos is not None: + processor_kwargs["videos"] = videos + if video_metadatas is not None: + processor_kwargs["video_metadata"] = video_metadatas + if video_kwargs: + processor_kwargs.update(video_kwargs) + + encoded = self._processor(**processor_kwargs) + + # Write Robometer-specific token ids and the video patch merge size into + # the encoded batch so `RobometerRewardModel` doesn't need its own + # tokenizer at inference (EO1-style separation: the processor owns the + # tokenizer, the model owns the backbone and heads). + tokenizer = self._processor.tokenizer + encoded["prog_token_id"] = tokenizer.convert_tokens_to_ids("<|prog_token|>") + encoded["vision_start_token_id"] = tokenizer.convert_tokens_to_ids("<|vision_start|>") + encoded["vision_end_token_id"] = tokenizer.convert_tokens_to_ids("<|vision_end|>") + video_processor = getattr(self._processor, "video_processor", None) + encoded["video_merge_size"] = int(getattr(video_processor, "merge_size", 14)) + return encoded + + def _build_conversation(self, frames: np.ndarray, task: str) -> list[dict[str, Any]]: + pil_frames = _frames_to_pil(frames) + prompt = PROGRESS_PROMPT.format(task=task) + content: list[dict[str, Any]] = [{"type": "text", "text": prompt}] + + if self.use_multi_image: + for image in pil_frames: + content.append({"type": "image", "image": image}) + if self.use_per_frame_progress_token: + content.append({"type": "text", "text": "<|prog_token|>"}) + else: + content.append({"type": "video", "video": pil_frames, "sample_fps": 1.0}) + + return [{"role": "user", "content": content}] + + def transform_features( + self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]] + ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]: + # The Qwen-VL processor produces variable-length sequence tensors that + # don't fit the static `PolicyFeature(shape=...)` mould; we deliberately + # do not advertise the new observation keys here. + return features + + def get_config(self) -> dict[str, Any]: + return { + "base_model_id": self.base_model_id, + "image_key": self.image_key, + "task_key": self.task_key, + "default_task": self.default_task, + "max_frames": self.max_frames, + "use_multi_image": self.use_multi_image, + "use_per_frame_progress_token": self.use_per_frame_progress_token, + "max_length": self.max_length, + } + + +def make_robometer_pre_post_processors( + config: RobometerConfig, + dataset_stats: dict[str, dict[str, Any]] | None = None, +) -> tuple[ + PolicyProcessorPipeline[dict[str, Any], dict[str, Any]], + PolicyProcessorPipeline[PolicyAction, PolicyAction], +]: + """Pipeline that pre-encodes frames + task into Qwen-VL tensors. + + The preprocessor adds a batch dimension if needed, runs Robometer's + encoder, and moves everything to the configured device. The + postprocessor is the identity since Robometer outputs a single reward + tensor (no action to un-normalise). + """ + del dataset_stats # Robometer has its own normalisation inside the Qwen-VL processor. + + preprocessor = PolicyProcessorPipeline[dict[str, Any], dict[str, Any]]( + steps=[ + AddBatchDimensionProcessorStep(), + RobometerEncoderProcessorStep( + base_model_id=config.base_model_id, + image_key=config.image_key, + task_key=config.task_key, + default_task=config.default_task, + max_frames=config.max_frames, + use_multi_image=config.use_multi_image, + use_per_frame_progress_token=config.use_per_frame_progress_token, + ), + DeviceProcessorStep(device=config.device or "cpu"), + ], + name=POLICY_PREPROCESSOR_DEFAULT_NAME, + ) + postprocessor = PolicyProcessorPipeline( + name=POLICY_POSTPROCESSOR_DEFAULT_NAME, + to_transition=policy_action_to_transition, + ) + return preprocessor, postprocessor diff --git a/src/lerobot/scripts/lerobot_export_robometer.py b/src/lerobot/scripts/lerobot_export_robometer.py new file mode 100644 index 000000000..2d7fbd911 --- /dev/null +++ b/src/lerobot/scripts/lerobot_export_robometer.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python +# Copyright 2026 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Re-save a Robometer checkpoint in LeRobot HF format. + +LeRobot's reward model format is ``config.json`` (a draccus-encoded +:class:`~lerobot.rewards.robometer.RobometerConfig`) plus a single +``model.safetensors`` containing the merged base + heads weights. The +released checkpoint at ``lilkm/robometer-4b`` already follows this layout; +this script is for converting other Robometer variants (e.g. a future +upstream release or a local training run) into the same format. + +Example: + +.. code-block:: shell + + lerobot-export-robometer \\ + --src robometer/Robometer-4B \\ + --dst ./robometer-4b-lerobot +""" + +from __future__ import annotations + +import argparse +import logging +from pathlib import Path + +from lerobot.rewards.robometer import RobometerConfig, RobometerRewardModel +from lerobot.rewards.robometer._upstream_loader import apply_upstream_checkpoint +from lerobot.utils.utils import init_logging + + +def export_robometer_to_lerobot( + src: str, + dst: str | Path, + *, + device: str = "cpu", + dataset_repo_id: str = "", + write_model_card: bool = True, +) -> Path: + """Load Robometer from ``src`` and re-save it under ``dst`` in LeRobot HF format. + + Produces ``config.json``, ``model.safetensors``, and (optionally) ``README.md``. + + Args: + src: Upstream source. Hugging Face repo id (``"robometer/Robometer-4B"``, + optionally ``"...@revision"``) or a local snapshot directory. + dst: Output directory. ``config.json`` and ``model.safetensors`` are + written here. + device: Where to place the model during loading. Defaults to CPU; use + ``"cuda"`` if you want to verify on GPU before saving. + dataset_repo_id: Hugging Face dataset id the model was trained on + (e.g. ``"robometer/RBM-1M"``). Written into the model card's + ``datasets:`` metadata. Leave empty if not applicable. + write_model_card: Generate a ``README.md`` using LeRobot's reward + model card template. Disable if you want to write the README + yourself. + + Returns: + The resolved output directory. + """ + # A fresh ``RobometerConfig`` has ``vlm_config=None``, which routes + # ``__init__`` through the upstream-matching path: download base Qwen, + # resize embeddings per ``ROBOMETER_SPECIAL_TOKENS``. ``apply_upstream_checkpoint`` + # then resizes again (if needed) to match the upstream checkpoint's vocab + # and overlays its weights. ``_save_pretrained`` snapshots the resulting + # post-resize architecture into ``vlm_config`` for fast future loads. + cfg = RobometerConfig(pretrained_path=src, device=device) + model = RobometerRewardModel(cfg) + apply_upstream_checkpoint(model, src) + model.to(device) + model.eval() + + dst = Path(dst) + dst.mkdir(parents=True, exist_ok=True) + model.save_pretrained(str(dst)) + + if write_model_card: + card = model.generate_model_card( + dataset_repo_id=dataset_repo_id, + model_type=model.config.type, + license=model.config.license, + tags=model.config.tags, + ) + card.save(str(dst / "README.md")) + + return dst + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "--src", + default="robometer/Robometer-4B", + help="Upstream Robometer source (HF repo id or local directory).", + ) + parser.add_argument( + "--dst", + required=True, + help="Output directory for the LeRobot-format checkpoint.", + ) + parser.add_argument( + "--device", + default="cpu", + help="Torch device to load the model on (default: cpu). Conversion only " + "needs CPU; use cuda if you also want to smoke-test inference.", + ) + parser.add_argument( + "--dataset", + default="", + help="Optional Hugging Face dataset id used for training " + "(e.g. `robometer/RBM-1M`). Written into the auto-generated model card's " + "`datasets:` metadata.", + ) + parser.add_argument( + "--no-readme", + action="store_true", + help="Skip writing README.md. Use if you want to author the model card by hand.", + ) + return parser.parse_args() + + +def main() -> None: + init_logging() + args = _parse_args() + out = export_robometer_to_lerobot( + src=args.src, + dst=args.dst, + device=args.device, + dataset_repo_id=args.dataset, + write_model_card=not args.no_readme, + ) + logging.info("Saved LeRobot-format Robometer checkpoint to %s", out) + + +if __name__ == "__main__": + main() diff --git a/src/lerobot/templates/lerobot_modelcard_template.md b/src/lerobot/templates/lerobot_modelcard_template.md index f0dd0da07..b93e83b6e 100644 --- a/src/lerobot/templates/lerobot_modelcard_template.md +++ b/src/lerobot/templates/lerobot_modelcard_template.md @@ -41,8 +41,6 @@ For more details, see the [Physical Intelligence π₀ blog post](https://www.ph For more details, see the [Physical Intelligence π₀.₅ blog post](https://www.physicalintelligence.company/blog/pi05). {% elif model_name == "gaussian_actor" %} This is a Gaussian Actor policy (Gaussian policy with a tanh squash) — the policy-side component used by [Soft Actor-Critic (SAC)](https://huggingface.co/papers/1801.01290) and related maximum-entropy continuous-control algorithms. -{% elif model_name == "reward_classifier" %} -A reward classifier is a lightweight neural network that scores observations or trajectories for task success, providing a learned reward signal or offline evaluation when explicit rewards are unavailable. {% else %} _Model type not recognized — please update this template._ {% endif %} diff --git a/src/lerobot/templates/lerobot_rewardmodel_modelcard_template.md b/src/lerobot/templates/lerobot_rewardmodel_modelcard_template.md index 933bf7586..691e0801b 100644 --- a/src/lerobot/templates/lerobot_rewardmodel_modelcard_template.md +++ b/src/lerobot/templates/lerobot_rewardmodel_modelcard_template.md @@ -13,6 +13,8 @@ A reward classifier is a lightweight neural network that scores observations or trajectories for task success, providing a learned reward signal or offline evaluation when explicit rewards are unavailable. {% elif model_name == "sarm" %} A Success-Aware Reward Model (SARM) predicts a dense reward signal from observations, typically used downstream for reinforcement learning or human-in-the-loop fine-tuning when task success is not directly observable. +{% elif model_name == "robometer" %} +Robometer is a zero-shot general-purpose robotic reward model built on a fine-tuned Qwen3-VL backbone with progress, preference, and success heads. Given a video and a task description it outputs a per-frame progress signal in [0, 1] and a per-frame success probability — suitable for offline reward labelling and for low-frequency reward signals during RL fine-tuning of robot policies. {% else %} _Reward model type not recognized — please update this template._ {% endif %} diff --git a/tests/rewards/test_robometer.py b/tests/rewards/test_robometer.py new file mode 100644 index 000000000..3077df351 --- /dev/null +++ b/tests/rewards/test_robometer.py @@ -0,0 +1,299 @@ +# Copyright 2026 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the in-tree Robometer reward model.""" + +from __future__ import annotations + +from types import SimpleNamespace + +import pytest +import torch + +from lerobot.configs.rewards import RewardModelConfig +from lerobot.rewards.factory import get_reward_model_class, make_reward_model_config +from lerobot.rewards.robometer import RobometerConfig +from lerobot.rewards.robometer.modeling_robometer import ( + ROBOMETER_FEATURE_PREFIX, + convert_bins_to_continuous, + decode_progress_outputs, +) + + +class _FakeQwenConfig: + """Stand-in for a Qwen3-VL config (the `model.config` attribute). + + ``to_dict`` matches HF's ``PretrainedConfig.to_dict`` closely enough for + ``RobometerRewardModel._save_pretrained`` to snapshot a meaningful + ``vlm_config`` into the saved ``config.json`` and for the reload path + to round-trip through ``AutoConfig.for_model``. + """ + + def __init__(self, hidden_dim: int = 8, vocab_size: int = 100) -> None: + self.text_config = SimpleNamespace(hidden_size=hidden_dim, vocab_size=vocab_size) + self._hidden_dim = hidden_dim + self._vocab_size = vocab_size + + def to_dict(self) -> dict: + return { + "model_type": "fake_qwen", + "text_config": { + "hidden_size": self._hidden_dim, + "vocab_size": self._vocab_size, + }, + } + + +class _FakeEmbeddings(torch.nn.Module): + def __init__(self, num_embeddings: int = 100) -> None: + super().__init__() + self.num_embeddings = num_embeddings + + +class _FakeBaseModel(torch.nn.Module): + """Stand-in for the Qwen3-VL backbone during tests. + + Provides the minimum surface `RobometerRewardModel.__init__` and + `_compute_rbm_logits` rely on: a `parameters()` iterator (for dtype + + device), a `config.text_config.hidden_size`, a `config.to_dict()` so + `_save_pretrained` can snapshot `vlm_config`, + `get_input_embeddings()` / `resize_token_embeddings()` so the fresh-init + embed resize is a no-op, and a forward that returns a `SimpleNamespace` + with a `hidden_states` tuple. + """ + + def __init__(self, hidden_dim: int = 8) -> None: + super().__init__() + self._param = torch.nn.Parameter(torch.zeros(1)) + self.hidden_dim = hidden_dim + self.config = _FakeQwenConfig(hidden_dim) + self._embeddings = _FakeEmbeddings() + + def get_input_embeddings(self) -> _FakeEmbeddings: + return self._embeddings + + def resize_token_embeddings(self, new_size: int) -> None: + self._embeddings.num_embeddings = new_size + + def forward(self, **kwargs): # noqa: ARG002 - intentional kwargs sink + input_ids = kwargs["input_ids"] + return SimpleNamespace( + hidden_states=(torch.zeros(input_ids.shape[0], input_ids.shape[1], self.hidden_dim),), + last_hidden_state=torch.zeros(input_ids.shape[0], input_ids.shape[1], self.hidden_dim), + ) + + +class _FakeTokenizer: + """Minimal stand-in for an HF tokenizer. + + ``RobometerConfig.__post_init__`` uses ``len(tokenizer)`` to compute the + deterministic resize target ``len(tokenizer) + len(ROBOMETER_SPECIAL_TOKENS)``, + so a working ``__len__`` is all we need. + """ + + def __init__(self, length: int = 100) -> None: + self._length = length + + def __len__(self) -> int: + return self._length + + +def _patch_build(monkeypatch) -> None: + """Stub out the HF AutoX calls so Robometer construction stays cheap in tests. + + Covers (EO-1 style — no model-side override hooks): + * ``AutoConfig.from_pretrained`` (config side) — used by + ``RobometerConfig.__post_init__`` to snapshot the backbone config. + * ``AutoTokenizer.from_pretrained`` (config side) — used by + ``__post_init__`` to compute ``len(tokenizer) + 5``. + * ``AutoConfig.for_model`` — used by + ``RobometerConfig.vlm_backbone_config`` when rebuilding for ``from_config``. + * ``AutoModelForImageTextToText.from_pretrained`` — fresh-training path + (``pretrained_path is None``). + * ``AutoModelForImageTextToText.from_config`` — checkpoint-reload path + (``pretrained_path`` is set). + """ + from lerobot.rewards.robometer import configuration_robometer, modeling_robometer + + monkeypatch.setattr( + modeling_robometer.AutoModelForImageTextToText, + "from_pretrained", + lambda *args, **kwargs: _FakeBaseModel(hidden_dim=8), + ) + monkeypatch.setattr( + modeling_robometer.AutoModelForImageTextToText, + "from_config", + lambda *args, **kwargs: _FakeBaseModel(hidden_dim=8), + ) + monkeypatch.setattr( + configuration_robometer.AutoConfig, + "for_model", + lambda *args, **kwargs: _FakeQwenConfig(hidden_dim=8), + ) + monkeypatch.setattr( + configuration_robometer.AutoConfig, + "from_pretrained", + lambda *args, **kwargs: _FakeQwenConfig(hidden_dim=8), + ) + monkeypatch.setattr( + configuration_robometer.AutoTokenizer, + "from_pretrained", + lambda *args, **kwargs: _FakeTokenizer(length=100), + ) + + +def _make_batch(features: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: + """Build a `compute_reward`-ready batch using Robometer's namespaced keys.""" + return {f"{ROBOMETER_FEATURE_PREFIX}{key}": value for key, value in features.items()} + + +def test_robometer_config_registered(monkeypatch): + _patch_build(monkeypatch) + assert "robometer" in RewardModelConfig.get_known_choices() + assert RewardModelConfig.get_choice_class("robometer") is RobometerConfig + assert isinstance(make_reward_model_config("robometer", device="cpu"), RobometerConfig) + + +def test_robometer_factory_returns_in_tree_class(): + from lerobot.rewards.robometer.modeling_robometer import RobometerRewardModel + + assert get_reward_model_class("robometer") is RobometerRewardModel + + +def test_convert_bins_to_continuous_returns_expected_values(): + # Two frames: first peaks at bin 0 (center 0.0), second peaks at bin 9 (center 1.0). + bin_logits = torch.full((2, 10), -10.0) + bin_logits[0, 0] = 10.0 + bin_logits[1, -1] = 10.0 + values = convert_bins_to_continuous(bin_logits) + assert values.shape == (2,) + assert torch.allclose(values, torch.tensor([0.0, 1.0]), atol=1e-3) + + +def test_decode_progress_outputs_returns_last_frame_values(): + progress = torch.tensor([[0.1, 0.9], [0.4, 0.6]]) + success_logits = torch.tensor([[0.0, 5.0], [0.0, -5.0]]) + + outputs = decode_progress_outputs(progress, success_logits, is_discrete_mode=False) + + assert outputs["progress_pred"] == [pytest.approx([0.1, 0.9]), pytest.approx([0.4, 0.6])] + assert outputs["success_probs"][0][-1] == pytest.approx(torch.sigmoid(torch.tensor(5.0)).item(), abs=1e-3) + assert outputs["success_probs"][1][-1] == pytest.approx( + torch.sigmoid(torch.tensor(-5.0)).item(), abs=1e-3 + ) + + +def test_decode_progress_outputs_discrete_mode_softmaxes_over_bins(): + # 2 frames, peaks at bin 0 and bin 9 → continuous predictions 0.0 and 1.0 + bin_logits = torch.full((1, 2, 10), -10.0) + bin_logits[0, 0, 0] = 10.0 + bin_logits[0, 1, -1] = 10.0 + + outputs = decode_progress_outputs(bin_logits, success_logits=None, is_discrete_mode=True) + + assert outputs["success_probs"] == [] + assert outputs["progress_pred"][0] == pytest.approx([0.0, 1.0], abs=1e-3) + + +def test_robometer_compute_reward_reads_pre_encoded_inputs(monkeypatch): + from lerobot.rewards.robometer.modeling_robometer import RobometerRewardModel + + progress = torch.tensor([[0.1, 0.9], [0.4, 0.6]]) + success_logits = torch.tensor([[0.0, 5.0], [0.0, -5.0]]) + _patch_build(monkeypatch) + + cfg = RobometerConfig(device="cpu", reward_output="progress", progress_loss_type="l2") + model = RobometerRewardModel(cfg) + # Bypass the Qwen3-VL forward + head extraction with deterministic logits. + monkeypatch.setattr(model, "_compute_rbm_logits", lambda _inputs: (progress, success_logits)) + + batch = _make_batch({"input_ids": torch.zeros(2, 2, dtype=torch.long)}) + rewards = model.compute_reward(batch) + + assert torch.allclose(rewards, torch.tensor([0.9, 0.6])) + + +def test_robometer_compute_reward_can_return_binary_success(monkeypatch): + from lerobot.rewards.robometer.modeling_robometer import RobometerRewardModel + + progress = torch.tensor([[0.1, 0.9], [0.4, 0.6]]) + success_logits = torch.tensor([[0.0, 5.0], [0.0, -5.0]]) # sigmoid(5) > 0.5; sigmoid(-5) < 0.5 + _patch_build(monkeypatch) + + cfg = RobometerConfig( + device="cpu", + reward_output="success", + success_threshold=0.5, + progress_loss_type="l2", + ) + model = RobometerRewardModel(cfg) + monkeypatch.setattr(model, "_compute_rbm_logits", lambda _inputs: (progress, success_logits)) + + batch = _make_batch({"input_ids": torch.zeros(2, 2, dtype=torch.long)}) + rewards = model.compute_reward(batch) + + assert torch.equal(rewards, torch.tensor([1.0, 0.0])) + + +def test_robometer_compute_reward_errors_when_inputs_missing(monkeypatch): + from lerobot.rewards.robometer.modeling_robometer import RobometerRewardModel + + _patch_build(monkeypatch) + + cfg = RobometerConfig(device="cpu", progress_loss_type="l2") + model = RobometerRewardModel(cfg) + + with pytest.raises(KeyError, match=r"observation\.robometer\.input_ids"): + model.compute_reward({}) + + +def test_robometer_save_pretrained_roundtrips(monkeypatch, tmp_path): + """Saving and reloading a Robometer model in LeRobot HF format must produce + a single ``model.safetensors`` + ``config.json`` (no Hydra ``config.yaml``). + """ + from huggingface_hub.constants import CONFIG_NAME, SAFETENSORS_SINGLE_FILE + + from lerobot.rewards.robometer.modeling_robometer import RobometerRewardModel + + _patch_build(monkeypatch) + cfg = RobometerConfig( + device="cpu", + pretrained_path="robometer/Robometer-4B", + # Knobs the user might tweak — must survive the round-trip. + image_key="observation.images.cam_top", + task_key="task", + reward_output="success", + success_threshold=0.7, + progress_loss_type="l2", + ) + model = RobometerRewardModel(cfg) + model.save_pretrained(str(tmp_path)) + + # Exactly the files LeRobot's HubMixin promises. + assert (tmp_path / CONFIG_NAME).exists() + assert (tmp_path / SAFETENSORS_SINGLE_FILE).exists() + assert not (tmp_path / "config.yaml").exists() # we want HF-style, not Hydra + + # Reload from the local directory: no Hub fetch, no YAML overlay. The + # base class drives subclass dispatch via the `type` field in config.json. + reloaded_cfg = RewardModelConfig.from_pretrained(str(tmp_path)) + assert isinstance(reloaded_cfg, RobometerConfig) + reloaded_cfg.pretrained_path = str(tmp_path) # mimic lerobot-train's `validate()` + reloaded = RobometerRewardModel.from_pretrained(str(tmp_path), config=reloaded_cfg) + + assert reloaded.config.image_key == "observation.images.cam_top" + assert reloaded.config.task_key == "task" + assert reloaded.config.reward_output == "success" + assert reloaded.config.success_threshold == 0.7 + assert reloaded.config.progress_loss_type == "l2" # came back from config.json diff --git a/uv.lock b/uv.lock index 408a9a351..ea3bf4443 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.12" resolution-markers = [ "(python_full_version >= '3.15' and platform_machine == 'AMD64' and sys_platform == 'linux') or (python_full_version >= '3.15' and platform_machine == 'x86_64' and sys_platform == 'linux')", @@ -1142,7 +1142,7 @@ name = "decord" version = "0.6.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", marker = "(platform_machine != 'arm64' and sys_platform == 'darwin') or (platform_machine == 'AMD64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "numpy", marker = "(platform_machine != 'arm64' and platform_machine != 's390x' and sys_platform == 'darwin') or (platform_machine == 'AMD64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/11/79/936af42edf90a7bd4e41a6cac89c913d4b47fa48a26b042d5129a9242ee3/decord-0.6.0-py3-none-manylinux2010_x86_64.whl", hash = "sha256:51997f20be8958e23b7c4061ba45d0efcd86bffd5fe81c695d0befee0d442976", size = 13602299, upload-time = "2021-06-14T21:30:55.486Z" }, @@ -2972,6 +2972,11 @@ qwen-vl-utils-dep = [ reachy2 = [ { name = "reachy2-sdk" }, ] +robometer = [ + { name = "peft" }, + { name = "qwen-vl-utils" }, + { name = "transformers" }, +] robstride = [ { name = "python-can" }, ] @@ -3122,6 +3127,7 @@ requires-dist = [ { name = "lerobot", extras = ["peft"], marker = "extra == 'all'" }, { name = "lerobot", extras = ["peft-dep"], marker = "extra == 'groot'" }, { name = "lerobot", extras = ["peft-dep"], marker = "extra == 'peft'" }, + { name = "lerobot", extras = ["peft-dep"], marker = "extra == 'robometer'" }, { name = "lerobot", extras = ["peft-dep"], marker = "extra == 'wallx'" }, { name = "lerobot", extras = ["phone"], marker = "extra == 'all'" }, { name = "lerobot", extras = ["pi"], marker = "extra == 'all'" }, @@ -3139,6 +3145,7 @@ requires-dist = [ { name = "lerobot", extras = ["pyzmq-dep"], marker = "extra == 'lekiwi'" }, { name = "lerobot", extras = ["pyzmq-dep"], marker = "extra == 'unitree-g1'" }, { name = "lerobot", extras = ["qwen-vl-utils-dep"], marker = "extra == 'eo1'" }, + { name = "lerobot", extras = ["qwen-vl-utils-dep"], marker = "extra == 'robometer'" }, { name = "lerobot", extras = ["qwen-vl-utils-dep"], marker = "extra == 'sarm'" }, { name = "lerobot", extras = ["qwen-vl-utils-dep"], marker = "extra == 'wallx'" }, { name = "lerobot", extras = ["reachy2"], marker = "extra == 'all'" }, @@ -3160,6 +3167,7 @@ requires-dist = [ { name = "lerobot", extras = ["transformers-dep"], marker = "extra == 'multi-task-dit'" }, { name = "lerobot", extras = ["transformers-dep"], marker = "extra == 'peft'" }, { name = "lerobot", extras = ["transformers-dep"], marker = "extra == 'pi'" }, + { name = "lerobot", extras = ["transformers-dep"], marker = "extra == 'robometer'" }, { name = "lerobot", extras = ["transformers-dep"], marker = "extra == 'sarm'" }, { name = "lerobot", extras = ["transformers-dep"], marker = "extra == 'smolvla'" }, { name = "lerobot", extras = ["transformers-dep"], marker = "extra == 'wallx'" }, @@ -3227,7 +3235,7 @@ requires-dist = [ { name = "transformers", marker = "extra == 'transformers-dep'", specifier = ">=5.4.0,<5.6.0" }, { name = "wandb", marker = "extra == 'training'", specifier = ">=0.24.0,<0.25.0" }, ] -provides-extras = ["dataset", "training", "hardware", "viz", "core-scripts", "evaluation", "dataset-viz", "av-dep", "pygame-dep", "placo-dep", "transformers-dep", "grpcio-dep", "can-dep", "peft-dep", "scipy-dep", "diffusers-dep", "qwen-vl-utils-dep", "matplotlib-dep", "pyserial-dep", "deepdiff-dep", "pynput-dep", "pyzmq-dep", "feetech", "dynamixel", "damiao", "robstride", "openarms", "gamepad", "hopejr", "lekiwi", "unitree-g1", "reachy2", "kinematics", "intelrealsense", "phone", "diffusion", "wallx", "pi", "smolvla", "multi-task-dit", "groot", "sarm", "xvla", "eo1", "hilserl", "async", "peft", "dev", "notebook", "test", "video-benchmark", "aloha", "pusht", "libero", "metaworld", "all"] +provides-extras = ["dataset", "training", "hardware", "viz", "core-scripts", "evaluation", "dataset-viz", "av-dep", "pygame-dep", "placo-dep", "transformers-dep", "grpcio-dep", "can-dep", "peft-dep", "scipy-dep", "diffusers-dep", "qwen-vl-utils-dep", "matplotlib-dep", "pyserial-dep", "deepdiff-dep", "pynput-dep", "pyzmq-dep", "feetech", "dynamixel", "damiao", "robstride", "openarms", "gamepad", "hopejr", "lekiwi", "unitree-g1", "reachy2", "kinematics", "intelrealsense", "phone", "diffusion", "wallx", "pi", "smolvla", "multi-task-dit", "groot", "sarm", "robometer", "xvla", "eo1", "hilserl", "async", "peft", "dev", "notebook", "test", "video-benchmark", "aloha", "pusht", "libero", "metaworld", "all"] [[package]] name = "librt"