From c6a51b9b60cd461cbc7caf602d1d8b7292e7764f Mon Sep 17 00:00:00 2001 From: Pepijn Date: Mon, 8 Jun 2026 11:22:42 +0200 Subject: [PATCH] refactor(lingbot_va): drop hardcoded action quantiles; source from checkpoint The LIBERO/RoboTwin action (un)normalization quantiles were hardcoded as module constants in processor_lingbot_va.py. They are already serialized into each checkpoint's policy_postprocessor.json (via LingBotVAActionUnnormalizeStep.get_config) and restored on load by PolicyProcessorPipeline.from_pretrained, so the constants are dead at eval/load time for the released checkpoints (verified: libero_long/robotwin/base all carry their quantiles on the Hub). - Remove LIBERO_ACTION_Q01/Q99, ROBOTWIN_ACTION_Q01/Q99 and _default_action_quantiles. - make_lingbot_va_pre_post_processors now defaults a fresh (unconverted) build to a neutral [-1, 1] mapping (identity rescale); real per-benchmark stats come from the saved checkpoint (or postprocessor_overrides), analogous to dataset-stats normalization. - Update the config doc comment to point at the checkpoint as the source of truth. - Tests: replace the LIBERO-default assertion with a neutral-default check, and add a save_pretrained/from_pretrained round-trip guard for the quantile serialization. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../lingbot_va/configuration_lingbot_va.py | 5 +- .../lingbot_va/processor_lingbot_va.py | 67 ++++--------------- tests/policies/lingbot_va/test_processor.py | 29 ++++++-- 3 files changed, 39 insertions(+), 62 deletions(-) diff --git a/src/lerobot/policies/lingbot_va/configuration_lingbot_va.py b/src/lerobot/policies/lingbot_va/configuration_lingbot_va.py index 0afe51a54..1f54bd322 100644 --- a/src/lerobot/policies/lingbot_va/configuration_lingbot_va.py +++ b/src/lerobot/policies/lingbot_va/configuration_lingbot_va.py @@ -102,8 +102,9 @@ class LingBotVAConfig(PreTrainedConfig): max_sequence_length: int = 512 # UMT5 prompt length # Subset of the 30-d action space actually used by the benchmark (LIBERO = 7-DoF). - # The fixed action (un)normalization quantiles live in the post-processor - # (``LingBotVAActionUnnormalizeStep`` in ``processor_lingbot_va.py``), not here. + # The fixed action (un)normalization quantiles are not stored here nor hardcoded in the + # processor: they are serialized into the checkpoint's ``policy_postprocessor.json`` + # (``LingBotVAActionUnnormalizeStep``) and restored on load by ``from_pretrained``. used_action_channel_ids: list[int] = field(default_factory=lambda: list(range(7))) # Opt-in: VAE-decode the predicted video latents and stash them on diff --git a/src/lerobot/policies/lingbot_va/processor_lingbot_va.py b/src/lerobot/policies/lingbot_va/processor_lingbot_va.py index 217926508..2e616f702 100644 --- a/src/lerobot/policies/lingbot_va/processor_lingbot_va.py +++ b/src/lerobot/policies/lingbot_va/processor_lingbot_va.py @@ -47,59 +47,14 @@ from lerobot.utils.constants import ( from .configuration_lingbot_va import LingBotVAConfig -# Upstream LIBERO action-normalization quantiles (single 7-DoF arm + gripper). -# Verbatim from wan_va/configs/va_libero_cfg.py (channels 0-6 of a 30-dim action space). -# These are the fixed (un)normalization stats baked into the released LIBERO checkpoint; they -# live here (in the processor) and are serialized into the saved post-processor config. -LIBERO_ACTION_Q01 = [ - -0.6589285731315613, - -0.84375, - -0.9375, - -0.12107142806053162, - -0.15964286029338837, - -0.26571428775787354, - -1.0, -] -LIBERO_ACTION_Q99 = [ - 0.8999999761581421, - 0.8544642925262451, - 0.9375, - 0.17142857611179352, - 0.1842857152223587, - 0.34392857551574707, - 1.0, -] - - -# Upstream RoboTwin action quantiles, reordered to the model's used-channel layout -# [left xyz+quat (0-6), left gripper (28), right xyz+quat (7-13), right gripper (29)] = 16 channels. -# Verbatim from wan_va/configs/va_robotwin_cfg.py ``norm_stat`` (quaternion + gripper channels use the -# neutral [-1, 1] / [0, 1] mapping). Positions are quantile-scaled; rotations pass through. -ROBOTWIN_ACTION_Q01 = [ - -0.06172713458538055, -3.6716461181640625e-05, -0.08783501386642456, -1.0, -1.0, -1.0, -1.0, - 0.0, - -0.3547105032205582, -1.3113021850585938e-06, -0.11975435614585876, -1.0, -1.0, -1.0, -1.0, - 0.0, -] # fmt: skip -ROBOTWIN_ACTION_Q99 = [ - 0.3462600058317184, 0.39966784834861746, 0.14745532035827624, 1.0, 1.0, 1.0, 1.0, - 1.0, - 0.034201726913452024, 0.39142737388610793, 0.1792279863357542, 1.0, 1.0, 1.0, 1.0, - 1.0, -] # fmt: skip - - -def _default_action_quantiles(n_used: int) -> tuple[list[float], list[float]]: - """Return the fixed (q01, q99) for the used action channels, by benchmark channel count. - - LIBERO = 7 (single 7-DoF arm), RoboTwin = 16 (dual-arm eef pose + grippers). Falls back to a - neutral ``[-1, 1]`` mapping (no rescale) for any other channel count. - """ - if n_used == len(LIBERO_ACTION_Q01): - return list(LIBERO_ACTION_Q01), list(LIBERO_ACTION_Q99) - if n_used == len(ROBOTWIN_ACTION_Q01): - return list(ROBOTWIN_ACTION_Q01), list(ROBOTWIN_ACTION_Q99) - return [-1.0] * n_used, [1.0] * n_used +# LingBot-VA applies a *fixed* per-channel action quantile (un)normalization rather than +# dataset-derived stats. The benchmark-specific quantiles (LIBERO 7-DoF, RoboTwin 16-d eef) are +# deliberately NOT hardcoded here: they are serialized into each checkpoint's +# ``policy_postprocessor.json`` (via ``LingBotVAActionUnnormalizeStep.get_config``) and restored on +# load by ``PolicyProcessorPipeline.from_pretrained``. A freshly built (unconverted) policy defaults +# to a neutral ``[-1, 1]`` mapping (identity rescale); the real stats always come from the checkpoint +# (or via ``postprocessor_overrides``). To regenerate a checkpoint from scratch, source the quantiles +# from the upstream ``wan_va/configs/va_{libero,robotwin}_cfg.py`` and pass them through. @dataclass @@ -148,9 +103,11 @@ def make_lingbot_va_pre_post_processors( DeviceProcessorStep(device=config.device), ] - action_q01, action_q99 = _default_action_quantiles(len(config.used_action_channel_ids)) + # Fresh-build default: neutral [-1, 1] mapping (identity rescale). The real per-benchmark + # quantiles are restored from the checkpoint's saved post-processor config by from_pretrained. + n_used = len(config.used_action_channel_ids) output_steps: list[ProcessorStep] = [ - LingBotVAActionUnnormalizeStep(action_q01=action_q01, action_q99=action_q99), + LingBotVAActionUnnormalizeStep(action_q01=[-1.0] * n_used, action_q99=[1.0] * n_used), DeviceProcessorStep(device="cpu"), ] diff --git a/tests/policies/lingbot_va/test_processor.py b/tests/policies/lingbot_va/test_processor.py index 7be875648..f1ec5013b 100644 --- a/tests/policies/lingbot_va/test_processor.py +++ b/tests/policies/lingbot_va/test_processor.py @@ -21,10 +21,10 @@ import torch from lerobot.configs.types import FeatureType, PolicyFeature from lerobot.policies.lingbot_va.configuration_lingbot_va import LingBotVAConfig from lerobot.policies.lingbot_va.processor_lingbot_va import ( - LIBERO_ACTION_Q01, LingBotVAActionUnnormalizeStep, make_lingbot_va_pre_post_processors, ) +from lerobot.processor import PolicyProcessorPipeline from lerobot.utils.constants import ( OBS_IMAGES, POLICY_POSTPROCESSOR_DEFAULT_NAME, @@ -73,10 +73,29 @@ def test_make_pre_post_processors_names_and_steps() -> None: assert any(isinstance(s, LingBotVAActionUnnormalizeStep) for s in post.steps) -def test_postprocessor_applies_unnormalization() -> None: +def test_freshly_built_postprocessor_is_neutral() -> None: + # A fresh (unconverted) policy defaults to a neutral [-1, 1] mapping (identity rescale): the real + # per-benchmark quantiles are NOT hardcoded, they are restored from the saved checkpoint on load. cfg = _make_config() _, post = make_lingbot_va_pre_post_processors(cfg, dataset_stats=None) - # A normalized action of all -1 should map back to q01 (the LIBERO 7-DoF default quantiles). - normed = torch.full((1, len(cfg.used_action_channel_ids)), -1.0) + normed = torch.tensor([[0.3, -0.5, 1.0, -1.0, 0.0, 0.7, -0.2]]) out = post(normed) - assert torch.allclose(out, torch.tensor(LIBERO_ACTION_Q01).unsqueeze(0), atol=1e-4) + assert torch.allclose(out, normed, atol=1e-4) + + +def test_postprocessor_quantiles_survive_save_load(tmp_path) -> None: + # Regression guard for the Hub mechanism this policy relies on: the benchmark quantiles live in + # the serialized post-processor config and must round-trip through save_pretrained/from_pretrained. + q01 = [-0.6, -0.8, -0.9, -0.1, -0.15, -0.25, -1.0] + q99 = [0.9, 0.85, 0.9, 0.17, 0.18, 0.34, 1.0] + post = PolicyProcessorPipeline[torch.Tensor, torch.Tensor]( + steps=[LingBotVAActionUnnormalizeStep(action_q01=q01, action_q99=q99)], + name=POLICY_POSTPROCESSOR_DEFAULT_NAME, + ) + post.save_pretrained(tmp_path) + loaded = PolicyProcessorPipeline.from_pretrained( + tmp_path, config_filename=f"{POLICY_POSTPROCESSOR_DEFAULT_NAME}.json" + ) + step = next(s for s in loaded.steps if isinstance(s, LingBotVAActionUnnormalizeStep)) + assert step.action_q01 == q01 + assert step.action_q99 == q99