mirror of
https://github.com/huggingface/lerobot.git
synced 2026-06-17 16:27:04 +00:00
refactor(lingbot_va): drop hardcoded action quantiles; source from checkpoint
The LIBERO/RoboTwin action (un)normalization quantiles were hardcoded as module constants in processor_lingbot_va.py. They are already serialized into each checkpoint's policy_postprocessor.json (via LingBotVAActionUnnormalizeStep.get_config) and restored on load by PolicyProcessorPipeline.from_pretrained, so the constants are dead at eval/load time for the released checkpoints (verified: libero_long/robotwin/base all carry their quantiles on the Hub). - Remove LIBERO_ACTION_Q01/Q99, ROBOTWIN_ACTION_Q01/Q99 and _default_action_quantiles. - make_lingbot_va_pre_post_processors now defaults a fresh (unconverted) build to a neutral [-1, 1] mapping (identity rescale); real per-benchmark stats come from the saved checkpoint (or postprocessor_overrides), analogous to dataset-stats normalization. - Update the config doc comment to point at the checkpoint as the source of truth. - Tests: replace the LIBERO-default assertion with a neutral-default check, and add a save_pretrained/from_pretrained round-trip guard for the quantile serialization. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -102,8 +102,9 @@ class LingBotVAConfig(PreTrainedConfig):
|
||||
max_sequence_length: int = 512 # UMT5 prompt length
|
||||
|
||||
# Subset of the 30-d action space actually used by the benchmark (LIBERO = 7-DoF).
|
||||
# The fixed action (un)normalization quantiles live in the post-processor
|
||||
# (``LingBotVAActionUnnormalizeStep`` in ``processor_lingbot_va.py``), not here.
|
||||
# The fixed action (un)normalization quantiles are not stored here nor hardcoded in the
|
||||
# processor: they are serialized into the checkpoint's ``policy_postprocessor.json``
|
||||
# (``LingBotVAActionUnnormalizeStep``) and restored on load by ``from_pretrained``.
|
||||
used_action_channel_ids: list[int] = field(default_factory=lambda: list(range(7)))
|
||||
|
||||
# Opt-in: VAE-decode the predicted video latents and stash them on
|
||||
|
||||
@@ -47,59 +47,14 @@ from lerobot.utils.constants import (
|
||||
|
||||
from .configuration_lingbot_va import LingBotVAConfig
|
||||
|
||||
# Upstream LIBERO action-normalization quantiles (single 7-DoF arm + gripper).
|
||||
# Verbatim from wan_va/configs/va_libero_cfg.py (channels 0-6 of a 30-dim action space).
|
||||
# These are the fixed (un)normalization stats baked into the released LIBERO checkpoint; they
|
||||
# live here (in the processor) and are serialized into the saved post-processor config.
|
||||
LIBERO_ACTION_Q01 = [
|
||||
-0.6589285731315613,
|
||||
-0.84375,
|
||||
-0.9375,
|
||||
-0.12107142806053162,
|
||||
-0.15964286029338837,
|
||||
-0.26571428775787354,
|
||||
-1.0,
|
||||
]
|
||||
LIBERO_ACTION_Q99 = [
|
||||
0.8999999761581421,
|
||||
0.8544642925262451,
|
||||
0.9375,
|
||||
0.17142857611179352,
|
||||
0.1842857152223587,
|
||||
0.34392857551574707,
|
||||
1.0,
|
||||
]
|
||||
|
||||
|
||||
# Upstream RoboTwin action quantiles, reordered to the model's used-channel layout
|
||||
# [left xyz+quat (0-6), left gripper (28), right xyz+quat (7-13), right gripper (29)] = 16 channels.
|
||||
# Verbatim from wan_va/configs/va_robotwin_cfg.py ``norm_stat`` (quaternion + gripper channels use the
|
||||
# neutral [-1, 1] / [0, 1] mapping). Positions are quantile-scaled; rotations pass through.
|
||||
ROBOTWIN_ACTION_Q01 = [
|
||||
-0.06172713458538055, -3.6716461181640625e-05, -0.08783501386642456, -1.0, -1.0, -1.0, -1.0,
|
||||
0.0,
|
||||
-0.3547105032205582, -1.3113021850585938e-06, -0.11975435614585876, -1.0, -1.0, -1.0, -1.0,
|
||||
0.0,
|
||||
] # fmt: skip
|
||||
ROBOTWIN_ACTION_Q99 = [
|
||||
0.3462600058317184, 0.39966784834861746, 0.14745532035827624, 1.0, 1.0, 1.0, 1.0,
|
||||
1.0,
|
||||
0.034201726913452024, 0.39142737388610793, 0.1792279863357542, 1.0, 1.0, 1.0, 1.0,
|
||||
1.0,
|
||||
] # fmt: skip
|
||||
|
||||
|
||||
def _default_action_quantiles(n_used: int) -> tuple[list[float], list[float]]:
|
||||
"""Return the fixed (q01, q99) for the used action channels, by benchmark channel count.
|
||||
|
||||
LIBERO = 7 (single 7-DoF arm), RoboTwin = 16 (dual-arm eef pose + grippers). Falls back to a
|
||||
neutral ``[-1, 1]`` mapping (no rescale) for any other channel count.
|
||||
"""
|
||||
if n_used == len(LIBERO_ACTION_Q01):
|
||||
return list(LIBERO_ACTION_Q01), list(LIBERO_ACTION_Q99)
|
||||
if n_used == len(ROBOTWIN_ACTION_Q01):
|
||||
return list(ROBOTWIN_ACTION_Q01), list(ROBOTWIN_ACTION_Q99)
|
||||
return [-1.0] * n_used, [1.0] * n_used
|
||||
# LingBot-VA applies a *fixed* per-channel action quantile (un)normalization rather than
|
||||
# dataset-derived stats. The benchmark-specific quantiles (LIBERO 7-DoF, RoboTwin 16-d eef) are
|
||||
# deliberately NOT hardcoded here: they are serialized into each checkpoint's
|
||||
# ``policy_postprocessor.json`` (via ``LingBotVAActionUnnormalizeStep.get_config``) and restored on
|
||||
# load by ``PolicyProcessorPipeline.from_pretrained``. A freshly built (unconverted) policy defaults
|
||||
# to a neutral ``[-1, 1]`` mapping (identity rescale); the real stats always come from the checkpoint
|
||||
# (or via ``postprocessor_overrides``). To regenerate a checkpoint from scratch, source the quantiles
|
||||
# from the upstream ``wan_va/configs/va_{libero,robotwin}_cfg.py`` and pass them through.
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -148,9 +103,11 @@ def make_lingbot_va_pre_post_processors(
|
||||
DeviceProcessorStep(device=config.device),
|
||||
]
|
||||
|
||||
action_q01, action_q99 = _default_action_quantiles(len(config.used_action_channel_ids))
|
||||
# Fresh-build default: neutral [-1, 1] mapping (identity rescale). The real per-benchmark
|
||||
# quantiles are restored from the checkpoint's saved post-processor config by from_pretrained.
|
||||
n_used = len(config.used_action_channel_ids)
|
||||
output_steps: list[ProcessorStep] = [
|
||||
LingBotVAActionUnnormalizeStep(action_q01=action_q01, action_q99=action_q99),
|
||||
LingBotVAActionUnnormalizeStep(action_q01=[-1.0] * n_used, action_q99=[1.0] * n_used),
|
||||
DeviceProcessorStep(device="cpu"),
|
||||
]
|
||||
|
||||
|
||||
@@ -21,10 +21,10 @@ import torch
|
||||
from lerobot.configs.types import FeatureType, PolicyFeature
|
||||
from lerobot.policies.lingbot_va.configuration_lingbot_va import LingBotVAConfig
|
||||
from lerobot.policies.lingbot_va.processor_lingbot_va import (
|
||||
LIBERO_ACTION_Q01,
|
||||
LingBotVAActionUnnormalizeStep,
|
||||
make_lingbot_va_pre_post_processors,
|
||||
)
|
||||
from lerobot.processor import PolicyProcessorPipeline
|
||||
from lerobot.utils.constants import (
|
||||
OBS_IMAGES,
|
||||
POLICY_POSTPROCESSOR_DEFAULT_NAME,
|
||||
@@ -73,10 +73,29 @@ def test_make_pre_post_processors_names_and_steps() -> None:
|
||||
assert any(isinstance(s, LingBotVAActionUnnormalizeStep) for s in post.steps)
|
||||
|
||||
|
||||
def test_postprocessor_applies_unnormalization() -> None:
|
||||
def test_freshly_built_postprocessor_is_neutral() -> None:
|
||||
# A fresh (unconverted) policy defaults to a neutral [-1, 1] mapping (identity rescale): the real
|
||||
# per-benchmark quantiles are NOT hardcoded, they are restored from the saved checkpoint on load.
|
||||
cfg = _make_config()
|
||||
_, post = make_lingbot_va_pre_post_processors(cfg, dataset_stats=None)
|
||||
# A normalized action of all -1 should map back to q01 (the LIBERO 7-DoF default quantiles).
|
||||
normed = torch.full((1, len(cfg.used_action_channel_ids)), -1.0)
|
||||
normed = torch.tensor([[0.3, -0.5, 1.0, -1.0, 0.0, 0.7, -0.2]])
|
||||
out = post(normed)
|
||||
assert torch.allclose(out, torch.tensor(LIBERO_ACTION_Q01).unsqueeze(0), atol=1e-4)
|
||||
assert torch.allclose(out, normed, atol=1e-4)
|
||||
|
||||
|
||||
def test_postprocessor_quantiles_survive_save_load(tmp_path) -> None:
|
||||
# Regression guard for the Hub mechanism this policy relies on: the benchmark quantiles live in
|
||||
# the serialized post-processor config and must round-trip through save_pretrained/from_pretrained.
|
||||
q01 = [-0.6, -0.8, -0.9, -0.1, -0.15, -0.25, -1.0]
|
||||
q99 = [0.9, 0.85, 0.9, 0.17, 0.18, 0.34, 1.0]
|
||||
post = PolicyProcessorPipeline[torch.Tensor, torch.Tensor](
|
||||
steps=[LingBotVAActionUnnormalizeStep(action_q01=q01, action_q99=q99)],
|
||||
name=POLICY_POSTPROCESSOR_DEFAULT_NAME,
|
||||
)
|
||||
post.save_pretrained(tmp_path)
|
||||
loaded = PolicyProcessorPipeline.from_pretrained(
|
||||
tmp_path, config_filename=f"{POLICY_POSTPROCESSOR_DEFAULT_NAME}.json"
|
||||
)
|
||||
step = next(s for s in loaded.steps if isinstance(s, LingBotVAActionUnnormalizeStep))
|
||||
assert step.action_q01 == q01
|
||||
assert step.action_q99 == q99
|
||||
|
||||
Reference in New Issue
Block a user