From c6a51b9b60cd461cbc7caf602d1d8b7292e7764f Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Mon, 8 Jun 2026 11:22:42 +0200
Subject: [PATCH] refactor(lingbot_va): drop hardcoded action quantiles; source
 from checkpoint

The LIBERO/RoboTwin action (un)normalization quantiles were hardcoded as module
constants in processor_lingbot_va.py. They are already serialized into each
checkpoint's policy_postprocessor.json (via LingBotVAActionUnnormalizeStep.get_config)
and restored on load by PolicyProcessorPipeline.from_pretrained, so the constants are
dead at eval/load time for the released checkpoints (verified: libero_long/robotwin/base
all carry their quantiles on the Hub).

- Remove LIBERO_ACTION_Q01/Q99, ROBOTWIN_ACTION_Q01/Q99 and _default_action_quantiles.
- make_lingbot_va_pre_post_processors now defaults a fresh (unconverted) build to a
  neutral [-1, 1] mapping (identity rescale); real per-benchmark stats come from the
  saved checkpoint (or postprocessor_overrides), analogous to dataset-stats normalization.
- Update the config doc comment to point at the checkpoint as the source of truth.
- Tests: replace the LIBERO-default assertion with a neutral-default check, and add a
  save_pretrained/from_pretrained round-trip guard for the quantile serialization.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../lingbot_va/configuration_lingbot_va.py    |  5 +-
 .../lingbot_va/processor_lingbot_va.py        | 67 ++++---------------
 tests/policies/lingbot_va/test_processor.py   | 29 ++++++--
 3 files changed, 39 insertions(+), 62 deletions(-)

diff --git a/src/lerobot/policies/lingbot_va/configuration_lingbot_va.py b/src/lerobot/policies/lingbot_va/configuration_lingbot_va.py
index 0afe51a54..1f54bd322 100644
--- a/src/lerobot/policies/lingbot_va/configuration_lingbot_va.py
+++ b/src/lerobot/policies/lingbot_va/configuration_lingbot_va.py
@@ -102,8 +102,9 @@ class LingBotVAConfig(PreTrainedConfig):
     max_sequence_length: int = 512  # UMT5 prompt length
 
     # Subset of the 30-d action space actually used by the benchmark (LIBERO = 7-DoF).
-    # The fixed action (un)normalization quantiles live in the post-processor
-    # (``LingBotVAActionUnnormalizeStep`` in ``processor_lingbot_va.py``), not here.
+    # The fixed action (un)normalization quantiles are not stored here nor hardcoded in the
+    # processor: they are serialized into the checkpoint's ``policy_postprocessor.json``
+    # (``LingBotVAActionUnnormalizeStep``) and restored on load by ``from_pretrained``.
     used_action_channel_ids: list[int] = field(default_factory=lambda: list(range(7)))
 
     # Opt-in: VAE-decode the predicted video latents and stash them on
diff --git a/src/lerobot/policies/lingbot_va/processor_lingbot_va.py b/src/lerobot/policies/lingbot_va/processor_lingbot_va.py
index 217926508..2e616f702 100644
--- a/src/lerobot/policies/lingbot_va/processor_lingbot_va.py
+++ b/src/lerobot/policies/lingbot_va/processor_lingbot_va.py
@@ -47,59 +47,14 @@ from lerobot.utils.constants import (
 
 from .configuration_lingbot_va import LingBotVAConfig
 
-# Upstream LIBERO action-normalization quantiles (single 7-DoF arm + gripper).
-# Verbatim from wan_va/configs/va_libero_cfg.py (channels 0-6 of a 30-dim action space).
-# These are the fixed (un)normalization stats baked into the released LIBERO checkpoint; they
-# live here (in the processor) and are serialized into the saved post-processor config.
-LIBERO_ACTION_Q01 = [
-    -0.6589285731315613,
-    -0.84375,
-    -0.9375,
-    -0.12107142806053162,
-    -0.15964286029338837,
-    -0.26571428775787354,
-    -1.0,
-]
-LIBERO_ACTION_Q99 = [
-    0.8999999761581421,
-    0.8544642925262451,
-    0.9375,
-    0.17142857611179352,
-    0.1842857152223587,
-    0.34392857551574707,
-    1.0,
-]
-
-
-# Upstream RoboTwin action quantiles, reordered to the model's used-channel layout
-# [left xyz+quat (0-6), left gripper (28), right xyz+quat (7-13), right gripper (29)] = 16 channels.
-# Verbatim from wan_va/configs/va_robotwin_cfg.py ``norm_stat`` (quaternion + gripper channels use the
-# neutral [-1, 1] / [0, 1] mapping). Positions are quantile-scaled; rotations pass through.
-ROBOTWIN_ACTION_Q01 = [
-    -0.06172713458538055, -3.6716461181640625e-05, -0.08783501386642456, -1.0, -1.0, -1.0, -1.0,
-    0.0,
-    -0.3547105032205582, -1.3113021850585938e-06, -0.11975435614585876, -1.0, -1.0, -1.0, -1.0,
-    0.0,
-]  # fmt: skip
-ROBOTWIN_ACTION_Q99 = [
-    0.3462600058317184, 0.39966784834861746, 0.14745532035827624, 1.0, 1.0, 1.0, 1.0,
-    1.0,
-    0.034201726913452024, 0.39142737388610793, 0.1792279863357542, 1.0, 1.0, 1.0, 1.0,
-    1.0,
-]  # fmt: skip
-
-
-def _default_action_quantiles(n_used: int) -> tuple[list[float], list[float]]:
-    """Return the fixed (q01, q99) for the used action channels, by benchmark channel count.
-
-    LIBERO = 7 (single 7-DoF arm), RoboTwin = 16 (dual-arm eef pose + grippers). Falls back to a
-    neutral ``[-1, 1]`` mapping (no rescale) for any other channel count.
-    """
-    if n_used == len(LIBERO_ACTION_Q01):
-        return list(LIBERO_ACTION_Q01), list(LIBERO_ACTION_Q99)
-    if n_used == len(ROBOTWIN_ACTION_Q01):
-        return list(ROBOTWIN_ACTION_Q01), list(ROBOTWIN_ACTION_Q99)
-    return [-1.0] * n_used, [1.0] * n_used
+# LingBot-VA applies a *fixed* per-channel action quantile (un)normalization rather than
+# dataset-derived stats. The benchmark-specific quantiles (LIBERO 7-DoF, RoboTwin 16-d eef) are
+# deliberately NOT hardcoded here: they are serialized into each checkpoint's
+# ``policy_postprocessor.json`` (via ``LingBotVAActionUnnormalizeStep.get_config``) and restored on
+# load by ``PolicyProcessorPipeline.from_pretrained``. A freshly built (unconverted) policy defaults
+# to a neutral ``[-1, 1]`` mapping (identity rescale); the real stats always come from the checkpoint
+# (or via ``postprocessor_overrides``). To regenerate a checkpoint from scratch, source the quantiles
+# from the upstream ``wan_va/configs/va_{libero,robotwin}_cfg.py`` and pass them through.
 
 
 @dataclass
@@ -148,9 +103,11 @@ def make_lingbot_va_pre_post_processors(
         DeviceProcessorStep(device=config.device),
     ]
 
-    action_q01, action_q99 = _default_action_quantiles(len(config.used_action_channel_ids))
+    # Fresh-build default: neutral [-1, 1] mapping (identity rescale). The real per-benchmark
+    # quantiles are restored from the checkpoint's saved post-processor config by from_pretrained.
+    n_used = len(config.used_action_channel_ids)
     output_steps: list[ProcessorStep] = [
-        LingBotVAActionUnnormalizeStep(action_q01=action_q01, action_q99=action_q99),
+        LingBotVAActionUnnormalizeStep(action_q01=[-1.0] * n_used, action_q99=[1.0] * n_used),
         DeviceProcessorStep(device="cpu"),
     ]
 
diff --git a/tests/policies/lingbot_va/test_processor.py b/tests/policies/lingbot_va/test_processor.py
index 7be875648..f1ec5013b 100644
--- a/tests/policies/lingbot_va/test_processor.py
+++ b/tests/policies/lingbot_va/test_processor.py
@@ -21,10 +21,10 @@ import torch
 from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.policies.lingbot_va.configuration_lingbot_va import LingBotVAConfig
 from lerobot.policies.lingbot_va.processor_lingbot_va import (
-    LIBERO_ACTION_Q01,
     LingBotVAActionUnnormalizeStep,
     make_lingbot_va_pre_post_processors,
 )
+from lerobot.processor import PolicyProcessorPipeline
 from lerobot.utils.constants import (
     OBS_IMAGES,
     POLICY_POSTPROCESSOR_DEFAULT_NAME,
@@ -73,10 +73,29 @@ def test_make_pre_post_processors_names_and_steps() -> None:
     assert any(isinstance(s, LingBotVAActionUnnormalizeStep) for s in post.steps)
 
 
-def test_postprocessor_applies_unnormalization() -> None:
+def test_freshly_built_postprocessor_is_neutral() -> None:
+    # A fresh (unconverted) policy defaults to a neutral [-1, 1] mapping (identity rescale): the real
+    # per-benchmark quantiles are NOT hardcoded, they are restored from the saved checkpoint on load.
     cfg = _make_config()
     _, post = make_lingbot_va_pre_post_processors(cfg, dataset_stats=None)
-    # A normalized action of all -1 should map back to q01 (the LIBERO 7-DoF default quantiles).
-    normed = torch.full((1, len(cfg.used_action_channel_ids)), -1.0)
+    normed = torch.tensor([[0.3, -0.5, 1.0, -1.0, 0.0, 0.7, -0.2]])
     out = post(normed)
-    assert torch.allclose(out, torch.tensor(LIBERO_ACTION_Q01).unsqueeze(0), atol=1e-4)
+    assert torch.allclose(out, normed, atol=1e-4)
+
+
+def test_postprocessor_quantiles_survive_save_load(tmp_path) -> None:
+    # Regression guard for the Hub mechanism this policy relies on: the benchmark quantiles live in
+    # the serialized post-processor config and must round-trip through save_pretrained/from_pretrained.
+    q01 = [-0.6, -0.8, -0.9, -0.1, -0.15, -0.25, -1.0]
+    q99 = [0.9, 0.85, 0.9, 0.17, 0.18, 0.34, 1.0]
+    post = PolicyProcessorPipeline[torch.Tensor, torch.Tensor](
+        steps=[LingBotVAActionUnnormalizeStep(action_q01=q01, action_q99=q99)],
+        name=POLICY_POSTPROCESSOR_DEFAULT_NAME,
+    )
+    post.save_pretrained(tmp_path)
+    loaded = PolicyProcessorPipeline.from_pretrained(
+        tmp_path, config_filename=f"{POLICY_POSTPROCESSOR_DEFAULT_NAME}.json"
+    )
+    step = next(s for s in loaded.steps if isinstance(s, LingBotVAActionUnnormalizeStep))
+    assert step.action_q01 == q01
+    assert step.action_q99 == q99