refactor(lingbot_va): drop hardcoded action quantiles; source from checkpoint

The LIBERO/RoboTwin action (un)normalization quantiles were hardcoded as module
constants in processor_lingbot_va.py. They are already serialized into each
checkpoint's policy_postprocessor.json (via LingBotVAActionUnnormalizeStep.get_config)
and restored on load by PolicyProcessorPipeline.from_pretrained, so the constants are
dead at eval/load time for the released checkpoints (verified: libero_long/robotwin/base
all carry their quantiles on the Hub).

- Remove LIBERO_ACTION_Q01/Q99, ROBOTWIN_ACTION_Q01/Q99 and _default_action_quantiles.
- make_lingbot_va_pre_post_processors now defaults a fresh (unconverted) build to a
  neutral [-1, 1] mapping (identity rescale); real per-benchmark stats come from the
  saved checkpoint (or postprocessor_overrides), analogous to dataset-stats normalization.
- Update the config doc comment to point at the checkpoint as the source of truth.
- Tests: replace the LIBERO-default assertion with a neutral-default check, and add a
  save_pretrained/from_pretrained round-trip guard for the quantile serialization.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pepijn
2026-06-08 11:22:42 +02:00
parent ab49c71c22
commit c6a51b9b60
3 changed files with 39 additions and 62 deletions
@@ -102,8 +102,9 @@ class LingBotVAConfig(PreTrainedConfig):
max_sequence_length: int = 512 # UMT5 prompt length
# Subset of the 30-d action space actually used by the benchmark (LIBERO = 7-DoF).
# The fixed action (un)normalization quantiles live in the post-processor
# (``LingBotVAActionUnnormalizeStep`` in ``processor_lingbot_va.py``), not here.
# The fixed action (un)normalization quantiles are not stored here nor hardcoded in the
# processor: they are serialized into the checkpoint's ``policy_postprocessor.json``
# (``LingBotVAActionUnnormalizeStep``) and restored on load by ``from_pretrained``.
used_action_channel_ids: list[int] = field(default_factory=lambda: list(range(7)))
# Opt-in: VAE-decode the predicted video latents and stash them on
@@ -47,59 +47,14 @@ from lerobot.utils.constants import (
from .configuration_lingbot_va import LingBotVAConfig
# Upstream LIBERO action-normalization quantiles (single 7-DoF arm + gripper).
# Verbatim from wan_va/configs/va_libero_cfg.py (channels 0-6 of a 30-dim action space).
# These are the fixed (un)normalization stats baked into the released LIBERO checkpoint; they
# live here (in the processor) and are serialized into the saved post-processor config.
LIBERO_ACTION_Q01 = [
-0.6589285731315613,
-0.84375,
-0.9375,
-0.12107142806053162,
-0.15964286029338837,
-0.26571428775787354,
-1.0,
]
LIBERO_ACTION_Q99 = [
0.8999999761581421,
0.8544642925262451,
0.9375,
0.17142857611179352,
0.1842857152223587,
0.34392857551574707,
1.0,
]
# Upstream RoboTwin action quantiles, reordered to the model's used-channel layout
# [left xyz+quat (0-6), left gripper (28), right xyz+quat (7-13), right gripper (29)] = 16 channels.
# Verbatim from wan_va/configs/va_robotwin_cfg.py ``norm_stat`` (quaternion + gripper channels use the
# neutral [-1, 1] / [0, 1] mapping). Positions are quantile-scaled; rotations pass through.
ROBOTWIN_ACTION_Q01 = [
-0.06172713458538055, -3.6716461181640625e-05, -0.08783501386642456, -1.0, -1.0, -1.0, -1.0,
0.0,
-0.3547105032205582, -1.3113021850585938e-06, -0.11975435614585876, -1.0, -1.0, -1.0, -1.0,
0.0,
] # fmt: skip
ROBOTWIN_ACTION_Q99 = [
0.3462600058317184, 0.39966784834861746, 0.14745532035827624, 1.0, 1.0, 1.0, 1.0,
1.0,
0.034201726913452024, 0.39142737388610793, 0.1792279863357542, 1.0, 1.0, 1.0, 1.0,
1.0,
] # fmt: skip
def _default_action_quantiles(n_used: int) -> tuple[list[float], list[float]]:
"""Return the fixed (q01, q99) for the used action channels, by benchmark channel count.
LIBERO = 7 (single 7-DoF arm), RoboTwin = 16 (dual-arm eef pose + grippers). Falls back to a
neutral ``[-1, 1]`` mapping (no rescale) for any other channel count.
"""
if n_used == len(LIBERO_ACTION_Q01):
return list(LIBERO_ACTION_Q01), list(LIBERO_ACTION_Q99)
if n_used == len(ROBOTWIN_ACTION_Q01):
return list(ROBOTWIN_ACTION_Q01), list(ROBOTWIN_ACTION_Q99)
return [-1.0] * n_used, [1.0] * n_used
# LingBot-VA applies a *fixed* per-channel action quantile (un)normalization rather than
# dataset-derived stats. The benchmark-specific quantiles (LIBERO 7-DoF, RoboTwin 16-d eef) are
# deliberately NOT hardcoded here: they are serialized into each checkpoint's
# ``policy_postprocessor.json`` (via ``LingBotVAActionUnnormalizeStep.get_config``) and restored on
# load by ``PolicyProcessorPipeline.from_pretrained``. A freshly built (unconverted) policy defaults
# to a neutral ``[-1, 1]`` mapping (identity rescale); the real stats always come from the checkpoint
# (or via ``postprocessor_overrides``). To regenerate a checkpoint from scratch, source the quantiles
# from the upstream ``wan_va/configs/va_{libero,robotwin}_cfg.py`` and pass them through.
@dataclass
@@ -148,9 +103,11 @@ def make_lingbot_va_pre_post_processors(
DeviceProcessorStep(device=config.device),
]
action_q01, action_q99 = _default_action_quantiles(len(config.used_action_channel_ids))
# Fresh-build default: neutral [-1, 1] mapping (identity rescale). The real per-benchmark
# quantiles are restored from the checkpoint's saved post-processor config by from_pretrained.
n_used = len(config.used_action_channel_ids)
output_steps: list[ProcessorStep] = [
LingBotVAActionUnnormalizeStep(action_q01=action_q01, action_q99=action_q99),
LingBotVAActionUnnormalizeStep(action_q01=[-1.0] * n_used, action_q99=[1.0] * n_used),
DeviceProcessorStep(device="cpu"),
]
+24 -5
View File
@@ -21,10 +21,10 @@ import torch
from lerobot.configs.types import FeatureType, PolicyFeature
from lerobot.policies.lingbot_va.configuration_lingbot_va import LingBotVAConfig
from lerobot.policies.lingbot_va.processor_lingbot_va import (
LIBERO_ACTION_Q01,
LingBotVAActionUnnormalizeStep,
make_lingbot_va_pre_post_processors,
)
from lerobot.processor import PolicyProcessorPipeline
from lerobot.utils.constants import (
OBS_IMAGES,
POLICY_POSTPROCESSOR_DEFAULT_NAME,
@@ -73,10 +73,29 @@ def test_make_pre_post_processors_names_and_steps() -> None:
assert any(isinstance(s, LingBotVAActionUnnormalizeStep) for s in post.steps)
def test_postprocessor_applies_unnormalization() -> None:
def test_freshly_built_postprocessor_is_neutral() -> None:
# A fresh (unconverted) policy defaults to a neutral [-1, 1] mapping (identity rescale): the real
# per-benchmark quantiles are NOT hardcoded, they are restored from the saved checkpoint on load.
cfg = _make_config()
_, post = make_lingbot_va_pre_post_processors(cfg, dataset_stats=None)
# A normalized action of all -1 should map back to q01 (the LIBERO 7-DoF default quantiles).
normed = torch.full((1, len(cfg.used_action_channel_ids)), -1.0)
normed = torch.tensor([[0.3, -0.5, 1.0, -1.0, 0.0, 0.7, -0.2]])
out = post(normed)
assert torch.allclose(out, torch.tensor(LIBERO_ACTION_Q01).unsqueeze(0), atol=1e-4)
assert torch.allclose(out, normed, atol=1e-4)
def test_postprocessor_quantiles_survive_save_load(tmp_path) -> None:
# Regression guard for the Hub mechanism this policy relies on: the benchmark quantiles live in
# the serialized post-processor config and must round-trip through save_pretrained/from_pretrained.
q01 = [-0.6, -0.8, -0.9, -0.1, -0.15, -0.25, -1.0]
q99 = [0.9, 0.85, 0.9, 0.17, 0.18, 0.34, 1.0]
post = PolicyProcessorPipeline[torch.Tensor, torch.Tensor](
steps=[LingBotVAActionUnnormalizeStep(action_q01=q01, action_q99=q99)],
name=POLICY_POSTPROCESSOR_DEFAULT_NAME,
)
post.save_pretrained(tmp_path)
loaded = PolicyProcessorPipeline.from_pretrained(
tmp_path, config_filename=f"{POLICY_POSTPROCESSOR_DEFAULT_NAME}.json"
)
step = next(s for s in loaded.steps if isinstance(s, LingBotVAActionUnnormalizeStep))
assert step.action_q01 == q01
assert step.action_q99 == q99