refactor(lingbot_va): drop hardcoded action quantiles; source from checkpoint

The LIBERO/RoboTwin action (un)normalization quantiles were hardcoded as module constants in processor_lingbot_va.py. They are already serialized into each checkpoint's policy_postprocessor.json (via LingBotVAActionUnnormalizeStep.get_config) and restored on load by PolicyProcessorPipeline.from_pretrained, so the constants are dead at eval/load time for the released checkpoints (verified: libero_long/robotwin/base all carry their quantiles on the Hub). - Remove LIBERO_ACTION_Q01/Q99, ROBOTWIN_ACTION_Q01/Q99 and _default_action_quantiles. - make_lingbot_va_pre_post_processors now defaults a fresh (unconverted) build to a neutral [-1, 1] mapping (identity rescale); real per-benchmark stats come from the saved checkpoint (or postprocessor_overrides), analogous to dataset-stats normalization. - Update the config doc comment to point at the checkpoint as the source of truth. - Tests: replace the LIBERO-default assertion with a neutral-default check, and add a save_pretrained/from_pretrained round-trip guard for the quantile serialization. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-17 16:27:04 +00:00 · 2026-06-08 11:22:42 +02:00
parent ab49c71c22
commit c6a51b9b60
3 changed files with 39 additions and 62 deletions
@@ -102,8 +102,9 @@ class LingBotVAConfig(PreTrainedConfig):
    max_sequence_length: int = 512  # UMT5 prompt length

    # Subset of the 30-d action space actually used by the benchmark (LIBERO = 7-DoF).
-    # The fixed action (un)normalization quantiles live in the post-processor
-    # (``LingBotVAActionUnnormalizeStep`` in ``processor_lingbot_va.py``), not here.
+    # The fixed action (un)normalization quantiles are not stored here nor hardcoded in the
+    # processor: they are serialized into the checkpoint's ``policy_postprocessor.json``
+    # (``LingBotVAActionUnnormalizeStep``) and restored on load by ``from_pretrained``.
    used_action_channel_ids: list[int] = field(default_factory=lambda: list(range(7)))

    # Opt-in: VAE-decode the predicted video latents and stash them on
@@ -47,59 +47,14 @@ from lerobot.utils.constants import (

 from .configuration_lingbot_va import LingBotVAConfig

-# Upstream LIBERO action-normalization quantiles (single 7-DoF arm + gripper).
-# Verbatim from wan_va/configs/va_libero_cfg.py (channels 0-6 of a 30-dim action space).
-# These are the fixed (un)normalization stats baked into the released LIBERO checkpoint; they
-# live here (in the processor) and are serialized into the saved post-processor config.
-LIBERO_ACTION_Q01 = [
-    -0.6589285731315613,
-    -0.84375,
-    -0.9375,
-    -0.12107142806053162,
-    -0.15964286029338837,
-    -0.26571428775787354,
-    -1.0,
-]
-LIBERO_ACTION_Q99 = [
-    0.8999999761581421,
-    0.8544642925262451,
-    0.9375,
-    0.17142857611179352,
-    0.1842857152223587,
-    0.34392857551574707,
-    1.0,
-]
-
-
-# Upstream RoboTwin action quantiles, reordered to the model's used-channel layout
-# [left xyz+quat (0-6), left gripper (28), right xyz+quat (7-13), right gripper (29)] = 16 channels.
-# Verbatim from wan_va/configs/va_robotwin_cfg.py ``norm_stat`` (quaternion + gripper channels use the
-# neutral [-1, 1] / [0, 1] mapping). Positions are quantile-scaled; rotations pass through.
-ROBOTWIN_ACTION_Q01 = [
-    -0.06172713458538055, -3.6716461181640625e-05, -0.08783501386642456, -1.0, -1.0, -1.0, -1.0,
-    0.0,
-    -0.3547105032205582, -1.3113021850585938e-06, -0.11975435614585876, -1.0, -1.0, -1.0, -1.0,
-    0.0,
-]  # fmt: skip
-ROBOTWIN_ACTION_Q99 = [
-    0.3462600058317184, 0.39966784834861746, 0.14745532035827624, 1.0, 1.0, 1.0, 1.0,
-    1.0,
-    0.034201726913452024, 0.39142737388610793, 0.1792279863357542, 1.0, 1.0, 1.0, 1.0,
-    1.0,
-]  # fmt: skip
-
-
-def _default_action_quantiles(n_used: int) -> tuple[list[float], list[float]]:
-    """Return the fixed (q01, q99) for the used action channels, by benchmark channel count.
-
-    LIBERO = 7 (single 7-DoF arm), RoboTwin = 16 (dual-arm eef pose + grippers). Falls back to a
-    neutral ``[-1, 1]`` mapping (no rescale) for any other channel count.
-    """
-    if n_used == len(LIBERO_ACTION_Q01):
-        return list(LIBERO_ACTION_Q01), list(LIBERO_ACTION_Q99)
-    if n_used == len(ROBOTWIN_ACTION_Q01):
-        return list(ROBOTWIN_ACTION_Q01), list(ROBOTWIN_ACTION_Q99)
-    return [-1.0] * n_used, [1.0] * n_used
+# LingBot-VA applies a *fixed* per-channel action quantile (un)normalization rather than
+# dataset-derived stats. The benchmark-specific quantiles (LIBERO 7-DoF, RoboTwin 16-d eef) are
+# deliberately NOT hardcoded here: they are serialized into each checkpoint's
+# ``policy_postprocessor.json`` (via ``LingBotVAActionUnnormalizeStep.get_config``) and restored on
+# load by ``PolicyProcessorPipeline.from_pretrained``. A freshly built (unconverted) policy defaults
+# to a neutral ``[-1, 1]`` mapping (identity rescale); the real stats always come from the checkpoint
+# (or via ``postprocessor_overrides``). To regenerate a checkpoint from scratch, source the quantiles
+# from the upstream ``wan_va/configs/va_{libero,robotwin}_cfg.py`` and pass them through.


@dataclass
@@ -148,9 +103,11 @@ def make_lingbot_va_pre_post_processors(
        DeviceProcessorStep(device=config.device),
    ]

-    action_q01, action_q99 = _default_action_quantiles(len(config.used_action_channel_ids))
+    # Fresh-build default: neutral [-1, 1] mapping (identity rescale). The real per-benchmark
+    # quantiles are restored from the checkpoint's saved post-processor config by from_pretrained.
+    n_used = len(config.used_action_channel_ids)
    output_steps: list[ProcessorStep] = [
-        LingBotVAActionUnnormalizeStep(action_q01=action_q01, action_q99=action_q99),
+        LingBotVAActionUnnormalizeStep(action_q01=[-1.0] * n_used, action_q99=[1.0] * n_used),
        DeviceProcessorStep(device="cpu"),
    ]

@@ -21,10 +21,10 @@ import torch
 from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.policies.lingbot_va.configuration_lingbot_va import LingBotVAConfig
 from lerobot.policies.lingbot_va.processor_lingbot_va import (
-    LIBERO_ACTION_Q01,
    LingBotVAActionUnnormalizeStep,
    make_lingbot_va_pre_post_processors,
 )
+from lerobot.processor import PolicyProcessorPipeline
 from lerobot.utils.constants import (
    OBS_IMAGES,
    POLICY_POSTPROCESSOR_DEFAULT_NAME,
@@ -73,10 +73,29 @@ def test_make_pre_post_processors_names_and_steps() -> None:
    assert any(isinstance(s, LingBotVAActionUnnormalizeStep) for s in post.steps)


-def test_postprocessor_applies_unnormalization() -> None:
+def test_freshly_built_postprocessor_is_neutral() -> None:
+    # A fresh (unconverted) policy defaults to a neutral [-1, 1] mapping (identity rescale): the real
+    # per-benchmark quantiles are NOT hardcoded, they are restored from the saved checkpoint on load.
    cfg = _make_config()
    _, post = make_lingbot_va_pre_post_processors(cfg, dataset_stats=None)
-    # A normalized action of all -1 should map back to q01 (the LIBERO 7-DoF default quantiles).
-    normed = torch.full((1, len(cfg.used_action_channel_ids)), -1.0)
+    normed = torch.tensor([[0.3, -0.5, 1.0, -1.0, 0.0, 0.7, -0.2]])
    out = post(normed)
-    assert torch.allclose(out, torch.tensor(LIBERO_ACTION_Q01).unsqueeze(0), atol=1e-4)
+    assert torch.allclose(out, normed, atol=1e-4)
+
+
+def test_postprocessor_quantiles_survive_save_load(tmp_path) -> None:
+    # Regression guard for the Hub mechanism this policy relies on: the benchmark quantiles live in
+    # the serialized post-processor config and must round-trip through save_pretrained/from_pretrained.
+    q01 = [-0.6, -0.8, -0.9, -0.1, -0.15, -0.25, -1.0]
+    q99 = [0.9, 0.85, 0.9, 0.17, 0.18, 0.34, 1.0]
+    post = PolicyProcessorPipeline[torch.Tensor, torch.Tensor](
+        steps=[LingBotVAActionUnnormalizeStep(action_q01=q01, action_q99=q99)],
+        name=POLICY_POSTPROCESSOR_DEFAULT_NAME,
+    )
+    post.save_pretrained(tmp_path)
+    loaded = PolicyProcessorPipeline.from_pretrained(
+        tmp_path, config_filename=f"{POLICY_POSTPROCESSOR_DEFAULT_NAME}.json"
+    )
+    step = next(s for s in loaded.steps if isinstance(s, LingBotVAActionUnnormalizeStep))
+    assert step.action_q01 == q01
+    assert step.action_q99 == q99