feat(processors): Introduce processors for various policy types

- Added `make_processor` function to create processor instances for different policy types, including `tdmpc`, `diffusion`, `act`, `vqbet`, `pi0`, `pi0fast`, `sac`, and `reward_classifier`. - Implemented corresponding processor files for each policy type, encapsulating normalization and unnormalization steps. - Updated existing policies to remove direct normalization dependencies, enhancing modularity and clarity. - Enhanced test coverage to validate the integration of new processors with existing policy configurations.
2026-07-25 02:36:11 +00:00 · 2025-07-07 17:35:47 +02:00
parent 20f2910b63
commit b72274066e
20 changed files with 498 additions and 176 deletions
@@ -35,7 +35,6 @@ from torchvision.ops.misc import FrozenBatchNorm2d

 from lerobot.constants import ACTION, OBS_IMAGES
 from lerobot.policies.act.configuration_act import ACTConfig
-from lerobot.policies.normalize import Normalize, Unnormalize
 from lerobot.policies.pretrained import PreTrainedPolicy


@@ -51,27 +50,16 @@ class ACTPolicy(PreTrainedPolicy):
    def __init__(
        self,
        config: ACTConfig,
-        dataset_stats: dict[str, dict[str, Tensor]] | None = None,
    ):
        """
        Args:
            config: Policy configuration class instance or None, in which case the default instantiation of
                    the configuration class is used.
-            dataset_stats: Dataset statistics to be used for normalization. If not passed here, it is expected
-                that they will be passed with a call to `load_state_dict` before the policy is used.
        """
        super().__init__(config)
        config.validate_features()
        self.config = config

-        self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats)
-        self.normalize_targets = Normalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )
-        self.unnormalize_outputs = Unnormalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )
-
        self.model = ACT(config)

        if config.temporal_ensemble_coeff is not None:
@@ -137,23 +125,19 @@ class ACTPolicy(PreTrainedPolicy):
        """Predict a chunk of actions given environment observations."""
        self.eval()

-        batch = self.normalize_inputs(batch)
        if self.config.image_features:
            batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
            batch[OBS_IMAGES] = [batch[key] for key in self.config.image_features]

        actions = self.model(batch)[0]
-        actions = self.unnormalize_outputs({ACTION: actions})[ACTION]
        return actions

    def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict]:
        """Run the batch through the model and compute the loss for training or validation."""
-        batch = self.normalize_inputs(batch)
        if self.config.image_features:
            batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
            batch[OBS_IMAGES] = [batch[key] for key in self.config.image_features]

-        batch = self.normalize_targets(batch)
        actions_hat, (mu_hat, log_sigma_x2_hat) = self.model(batch)

        l1_loss = (
@@ -303,7 +287,7 @@ class ACT(nn.Module):
                                └───────────────────────┘
    """

-    def __init__(self, config: ACTConfig):
+    def __init__(self, config: ACTConfig, dataset_stats=None):
        # BERT style VAE encoder with input tokens [cls, robot_state, *action_sequence].
        # The cls token forms parameters of the latent's distribution (like this [*means, *log_variances]).
        super().__init__()
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+
+# Copyright 2024 Tony Z. Zhao and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+
+from lerobot.policies.act.configuration_act import ACTConfig
+from lerobot.processor import (
+    NormalizerProcessor,
+    RobotProcessor,
+    UnnormalizerProcessor,
+)
+
+
+def make_act_processor(
+    config: ACTConfig, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None
+) -> tuple[RobotProcessor, RobotProcessor]:
+    input_steps = [
+        NormalizerProcessor(
+            features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+        NormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    output_steps = [
+        UnnormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    return RobotProcessor(steps=input_steps, name="act_preprocessor"), RobotProcessor(
+        steps=output_steps, name="act_postprocessor"
+    )
@@ -35,7 +35,6 @@ from torch import Tensor, nn

 from lerobot.constants import ACTION, OBS_ENV_STATE, OBS_IMAGES, OBS_STATE
 from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
-from lerobot.policies.normalize import Normalize, Unnormalize
 from lerobot.policies.pretrained import PreTrainedPolicy
 from lerobot.policies.utils import (
    get_device_from_parameters,
@@ -57,7 +56,6 @@ class DiffusionPolicy(PreTrainedPolicy):
    def __init__(
        self,
        config: DiffusionConfig,
-        dataset_stats: dict[str, dict[str, Tensor]] | None = None,
    ):
        """
        Args:
@@ -70,14 +68,6 @@ class DiffusionPolicy(PreTrainedPolicy):
        config.validate_features()
        self.config = config

-        self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats)
-        self.normalize_targets = Normalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )
-        self.unnormalize_outputs = Unnormalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )
-
        # queues are populated during rollout of the policy, they contain the n latest observations and actions
        self._queues = None

@@ -106,9 +96,6 @@ class DiffusionPolicy(PreTrainedPolicy):
        batch = {k: torch.stack(list(self._queues[k]), dim=1) for k in batch if k in self._queues}
        actions = self.diffusion.generate_actions(batch)

-        # TODO(rcadene): make above methods return output dictionary?
-        actions = self.unnormalize_outputs({ACTION: actions})[ACTION]
-
        return actions

    @torch.no_grad()
@@ -153,11 +140,9 @@ class DiffusionPolicy(PreTrainedPolicy):

    def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, None]:
        """Run the batch through the model and compute the loss for training or validation."""
-        batch = self.normalize_inputs(batch)
        if self.config.image_features:
            batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
            batch[OBS_IMAGES] = torch.stack([batch[key] for key in self.config.image_features], dim=-4)
-        batch = self.normalize_targets(batch)
        loss = self.diffusion.compute_loss(batch)
        # no output_dict so returning None
        return loss, None
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+# Copyright 2024 Columbia Artificial Intelligence, Robotics Lab,
+# and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+
+from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
+from lerobot.processor import (
+    NormalizerProcessor,
+    RobotProcessor,
+    UnnormalizerProcessor,
+)
+
+
+def make_diffusion_processor(
+    config: DiffusionConfig, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None
+) -> tuple[RobotProcessor, RobotProcessor]:
+    input_steps = [
+        NormalizerProcessor(
+            features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+        NormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    output_steps = [
+        UnnormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    return RobotProcessor(steps=input_steps, name="diffusion_preprocessor"), RobotProcessor(
+        steps=output_steps, name="diffusion_postprocessor"
+    )
@@ -34,6 +34,7 @@ from lerobot.policies.sac.reward_model.configuration_classifier import RewardCla
 from lerobot.policies.smolvla.configuration_smolvla import SmolVLAConfig
 from lerobot.policies.tdmpc.configuration_tdmpc import TDMPCConfig
 from lerobot.policies.vqbet.configuration_vqbet import VQBeTConfig
+from lerobot.processor.pipeline import RobotProcessor


 def get_policy_class(name: str) -> PreTrainedPolicy:
@@ -101,6 +102,83 @@ def make_policy_config(policy_type: str, **kwargs) -> PreTrainedConfig:
        raise ValueError(f"Policy type '{policy_type}' is not available.")


+def make_processor(
+    policy_cfg: PreTrainedConfig,
+    pretrained_path: str | None = None,
+    **kwargs,
+) -> tuple[RobotProcessor, RobotProcessor]:
+    """Make a processor instance for a given policy type.
+
+    This function creates the appropriate processor configuration based on the policy type.
+    Each policy type has its own processor with specific preprocessing steps.
+
+    Args:
+        policy_type: The type of policy to create a processor for (e.g., "act", "diffusion", etc.)
+        pretrained_path: Optional path to load a pretrained processor from. If provided, loads
+            the processor from this path instead of creating a new one.
+        **kwargs: Additional keyword arguments passed to the processor creation.
+
+    Returns:
+        RobotProcessor: The configured processor instance.
+
+    Raises:
+        NotImplementedError: If the policy type doesn't have a processor implemented.
+    """
+    if pretrained_path:
+        # Load a pretrained processor
+        # TODO(azouitine): Handle this case.
+        raise NotImplementedError("Loading a pretrained processor is not implemented.")
+
+    # Create a new processor based on policy type
+    if policy_cfg.type == "tdmpc":
+        from lerobot.policies.tdmpc.processor_tdmpc import make_tdmpc_processor
+
+        return make_tdmpc_processor(policy_cfg, **kwargs)
+
+    elif policy_cfg.type == "diffusion":
+        from lerobot.policies.diffusion.processor_diffusion import make_diffusion_processor
+
+        return make_diffusion_processor(policy_cfg, **kwargs)
+
+    elif policy_cfg.type == "act":
+        from lerobot.policies.act.processor_act import make_act_processor
+
+        return make_act_processor(policy_cfg, **kwargs)
+
+    elif policy_cfg.type == "vqbet":
+        from lerobot.policies.vqbet.processor_vqbet import make_vqbet_processor
+
+        return make_vqbet_processor(policy_cfg, **kwargs)
+
+    elif policy_cfg.type == "pi0":
+        from lerobot.policies.pi0.processor_pi0 import make_pi0_processor
+
+        return make_pi0_processor(policy_cfg, **kwargs)
+
+    elif policy_cfg.type == "pi0fast":
+        from lerobot.policies.pi0fast.processor_pi0fast import make_pi0fast_processor
+
+        return make_pi0fast_processor(policy_cfg, **kwargs)
+
+    elif policy_cfg.type == "sac":
+        from lerobot.policies.sac.processor_sac import make_sac_processor
+
+        return make_sac_processor(policy_cfg, **kwargs)
+
+    elif policy_cfg.type == "reward_classifier":
+        from lerobot.policies.sac.reward_model.processor_classifier import make_classifier_processor
+
+        return make_classifier_processor(policy_cfg, **kwargs)
+
+    elif policy_cfg.type == "smolvla":
+        from lerobot.policies.smolvla.processor_smolvla import make_smolvla_processor
+
+        return make_smolvla_processor(policy_cfg, **kwargs)
+
+    else:
+        raise NotImplementedError(f"Processor for policy type '{policy_cfg.type}' is not implemented.")
+
+
 def make_policy(
    cfg: PreTrainedConfig,
    ds_meta: LeRobotDatasetMetadata | None = None,
@@ -147,7 +225,6 @@ def make_policy(
    kwargs = {}
    if ds_meta is not None:
        features = dataset_to_policy_features(ds_meta.features)
-        kwargs["dataset_stats"] = ds_meta.stats
    else:
        if not cfg.pretrained_path:
            logging.warning(
@@ -59,7 +59,6 @@ from torch import Tensor, nn
 from transformers import AutoTokenizer

 from lerobot.constants import ACTION, OBS_STATE
-from lerobot.policies.normalize import Normalize, Unnormalize
 from lerobot.policies.pi0.configuration_pi0 import PI0Config
 from lerobot.policies.pi0.paligemma_with_expert import (
    PaliGemmaWithExpertConfig,
@@ -223,7 +222,6 @@ class PI0Policy(PreTrainedPolicy):
    def __init__(
        self,
        config: PI0Config,
-        dataset_stats: dict[str, dict[str, Tensor]] | None = None,
    ):
        """
        Args:
@@ -236,14 +234,8 @@ class PI0Policy(PreTrainedPolicy):
        super().__init__(config)
        config.validate_features()
        self.config = config
-        self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats)
-        self.normalize_targets = Normalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )
-        self.unnormalize_outputs = Unnormalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )

+        # TODO(azouitine): Add tokenizer to pipeline
        self.language_tokenizer = AutoTokenizer.from_pretrained("google/paligemma-3b-pt-224")
        self.model = PI0FlowMatching(config)

@@ -377,8 +369,6 @@ class PI0Policy(PreTrainedPolicy):
        if self.config.adapt_to_pi_aloha:
            batch[OBS_STATE] = self._pi_aloha_decode_state(batch[OBS_STATE])

-        batch = self.normalize_inputs(batch)
-
        # Action queue logic for n_action_steps > 1. When the action_queue is depleted, populate it by
        # querying the policy.
        if len(self._action_queue) == 0:
@@ -394,8 +384,6 @@ class PI0Policy(PreTrainedPolicy):
            original_action_dim = self.config.action_feature.shape[0]
            actions = actions[:, :, :original_action_dim]

-            actions = self.unnormalize_outputs({"action": actions})["action"]
-
            if self.config.adapt_to_pi_aloha:
                actions = self._pi_aloha_encode_actions(actions)

@@ -410,9 +398,6 @@ class PI0Policy(PreTrainedPolicy):
            batch[OBS_STATE] = self._pi_aloha_decode_state(batch[OBS_STATE])
            batch[ACTION] = self._pi_aloha_encode_actions_inv(batch[ACTION])

-        batch = self.normalize_inputs(batch)
-        batch = self.normalize_targets(batch)
-
        images, img_masks = self.prepare_images(batch)
        state = self.prepare_state(batch)
        lang_tokens, lang_masks = self.prepare_language(batch)
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+# Copyright 2025 Physical Intelligence and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+
+from lerobot.policies.pi0.configuration_pi0 import PI0Config
+from lerobot.processor import (
+    NormalizerProcessor,
+    RobotProcessor,
+    UnnormalizerProcessor,
+)
+
+
+def make_pi0_processor(
+    config: PI0Config, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None
+) -> tuple[RobotProcessor, RobotProcessor]:
+    input_steps = [
+        NormalizerProcessor(
+            features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+        NormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    output_steps = [
+        UnnormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    return RobotProcessor(steps=input_steps, name="pi0_preprocessor"), RobotProcessor(
+        steps=output_steps, name="pi0_postprocessor"
+    )
@@ -58,7 +58,6 @@ from transformers.cache_utils import HybridCache, StaticCache
 from transformers.models.auto import CONFIG_MAPPING

 from lerobot.constants import ACTION, OBS_STATE
-from lerobot.policies.normalize import Normalize, Unnormalize
 from lerobot.policies.pi0fast.configuration_pi0fast import PI0FASTConfig
 from lerobot.policies.pretrained import PreTrainedPolicy

@@ -146,14 +145,6 @@ class PI0FASTPolicy(PreTrainedPolicy):
        config.validate_features()
        self.config = config

-        self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats)
-        self.normalize_targets = Normalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )
-        self.unnormalize_outputs = Unnormalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )
-
        self.language_tokenizer = AutoProcessor.from_pretrained("google/paligemma-3b-pt-224")
        self.model = PI0FAST(config)

@@ -221,8 +212,6 @@ class PI0FASTPolicy(PreTrainedPolicy):
        if self.config.adapt_to_pi_aloha:
            batch[OBS_STATE] = self._pi_aloha_decode_state(batch[OBS_STATE])

-        batch = self.normalize_inputs(batch)
-
        # Action queue logic for n_action_steps > 1. When the action_queue is depleted, populate it by
        # querying the policy.
        if len(self._action_queue) == 0:
@@ -235,8 +224,6 @@ class PI0FASTPolicy(PreTrainedPolicy):
            ]  # self.config.max_action_dim  # self.config.action_feature.shape[0]
            actions = actions[:, :, :original_action_dim]

-            actions = self.unnormalize_outputs({"action": actions})["action"]
-
            if self.config.adapt_to_pi_aloha:
                actions = self._pi_aloha_encode_actions(actions)

@@ -249,8 +236,6 @@ class PI0FASTPolicy(PreTrainedPolicy):
        if self.config.adapt_to_pi_aloha:
            batch[OBS_STATE] = self._pi_aloha_decode_state(batch[OBS_STATE])
            batch[ACTION] = self._pi_aloha_encode_actions_inv(batch[ACTION])
-        batch = self.normalize_inputs(batch)
-        batch = self.normalize_targets(batch)
        loss_dict = self.model.forward(batch)
        return loss_dict["loss"], loss_dict

@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+# Copyright 2025 Physical Intelligence and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+
+from lerobot.policies.pi0.configuration_pi0 import PI0Config
+from lerobot.processor import (
+    NormalizerProcessor,
+    RobotProcessor,
+    UnnormalizerProcessor,
+)
+
+
+def make_pi0_processor(
+    config: PI0Config, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None
+) -> tuple[RobotProcessor, RobotProcessor]:
+    input_steps = [
+        NormalizerProcessor(
+            features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+        NormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    output_steps = [
+        UnnormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    return RobotProcessor(steps=input_steps, name="pi0_preprocessor"), RobotProcessor(
+        steps=output_steps, name="pi0_postprocessor"
+    )
@@ -28,7 +28,6 @@ import torch.nn.functional as F  # noqa: N812
 from torch import Tensor
 from torch.distributions import MultivariateNormal, TanhTransform, Transform, TransformedDistribution

-from lerobot.policies.normalize import NormalizeBuffer
 from lerobot.policies.pretrained import PreTrainedPolicy
 from lerobot.policies.sac.configuration_sac import SACConfig, is_image_feature
 from lerobot.policies.utils import get_device_from_parameters
@@ -45,7 +44,6 @@ class SACPolicy(
    def __init__(
        self,
        config: SACConfig | None = None,
-        dataset_stats: dict[str, dict[str, Tensor]] | None = None,
    ):
        super().__init__(config)
        config.validate_features()
@@ -53,7 +51,6 @@ class SACPolicy(

        # Determine action dimension and initialize all components
        continuous_action_dim = config.output_features["action"].shape[0]
-        self._init_normalization(dataset_stats)
        self._init_encoders()
        self._init_critics(continuous_action_dim)
        self._init_actor(continuous_action_dim)
@@ -88,8 +85,7 @@ class SACPolicy(

        observations_features = None
        if self.shared_encoder and self.actor.encoder.has_images:
-            # Cache and normalize image features
-            observations_features = self.actor.encoder.get_cached_image_features(batch, normalize=True)
+            observations_features = self.actor.encoder.get_cached_image_features(batch)

        actions, _, _ = self.actor(batch, observations_features)

@@ -391,28 +387,12 @@ class SACPolicy(
        actor_loss = ((self.temperature * log_probs) - min_q_preds).mean()
        return actor_loss

-    def _init_normalization(self, dataset_stats):
-        """Initialize input/output normalization modules."""
-        self.normalize_inputs = nn.Identity()
-        self.normalize_targets = nn.Identity()
-        if self.config.dataset_stats is not None:
-            params = _convert_normalization_params_to_tensor(self.config.dataset_stats)
-            self.normalize_inputs = NormalizeBuffer(
-                self.config.input_features, self.config.normalization_mapping, params
-            )
-            stats = dataset_stats or params
-            self.normalize_targets = NormalizeBuffer(
-                self.config.output_features, self.config.normalization_mapping, stats
-            )
-
    def _init_encoders(self):
        """Initialize shared or separate encoders for actor and critic."""
        self.shared_encoder = self.config.shared_encoder
-        self.encoder_critic = SACObservationEncoder(self.config, self.normalize_inputs)
+        self.encoder_critic = SACObservationEncoder(self.config)
        self.encoder_actor = (
-            self.encoder_critic
-            if self.shared_encoder
-            else SACObservationEncoder(self.config, self.normalize_inputs)
+            self.encoder_critic if self.shared_encoder else SACObservationEncoder(self.config)
        )

    def _init_critics(self, continuous_action_dim):
@@ -424,9 +404,7 @@ class SACPolicy(
            )
            for _ in range(self.config.num_critics)
        ]
-        self.critic_ensemble = CriticEnsemble(
-            encoder=self.encoder_critic, ensemble=heads, output_normalization=self.normalize_targets
-        )
+        self.critic_ensemble = CriticEnsemble(encoder=self.encoder_critic, ensemble=heads)
        target_heads = [
            CriticHead(
                input_dim=self.encoder_critic.output_dim + continuous_action_dim,
@@ -434,9 +412,7 @@ class SACPolicy(
            )
            for _ in range(self.config.num_critics)
        ]
-        self.critic_target = CriticEnsemble(
-            encoder=self.encoder_critic, ensemble=target_heads, output_normalization=self.normalize_targets
-        )
+        self.critic_target = CriticEnsemble(encoder=self.encoder_critic, ensemble=target_heads)
        self.critic_target.load_state_dict(self.critic_ensemble.state_dict())

        if self.config.use_torch_compile:
@@ -490,10 +466,9 @@ class SACPolicy(
 class SACObservationEncoder(nn.Module):
    """Encode image and/or state vector observations."""

-    def __init__(self, config: SACConfig, input_normalizer: nn.Module) -> None:
+    def __init__(self, config: SACConfig) -> None:
        super().__init__()
        self.config = config
-        self.input_normalization = input_normalizer
        self._init_image_layers()
        self._init_state_layers()
        self._compute_output_dim()
@@ -568,11 +543,10 @@ class SACObservationEncoder(nn.Module):
    def forward(
        self, obs: dict[str, Tensor], cache: dict[str, Tensor] | None = None, detach: bool = False
    ) -> Tensor:
-        obs = self.input_normalization(obs)
        parts = []
        if self.has_images:
            if cache is None:
-                cache = self.get_cached_image_features(obs, normalize=False)
+                cache = self.get_cached_image_features(obs)
            parts.append(self._encode_images(cache, detach))
        if self.has_env:
            parts.append(self.env_encoder(obs["observation.environment_state"]))
@@ -585,7 +559,7 @@ class SACObservationEncoder(nn.Module):
            "No parts to concatenate, you should have at least one image or environment state or state"
        )

-    def get_cached_image_features(self, obs: dict[str, Tensor], normalize: bool = False) -> dict[str, Tensor]:
+    def get_cached_image_features(self, obs: dict[str, Tensor]) -> dict[str, Tensor]:
        """Extract and optionally cache image features from observations.

        This function processes image observations through the vision encoder once and returns
@@ -597,26 +571,17 @@ class SACObservationEncoder(nn.Module):
        - The vision encoder forward pass is typically the main computational bottleneck during training and inference
        - Caching these features can provide 2-4x speedup in training and inference

-        Normalization behavior:
-        - When called from inside forward(): set normalize=False since inputs are already normalized
-        - When called from outside forward(): set normalize=True to ensure proper input normalization
-
        Usage patterns:
-        - Called in select_action() with normalize=True
+        - Called in select_action()
        - Called in learner.py's get_observation_features() to pre-compute features for all policy components
-        - Called internally by forward() with normalize=False
+        - Called internally by forward()

        Args:
            obs: Dictionary of observation tensors containing image keys
-            normalize: Whether to normalize observations before encoding
-                      Set to True when calling directly from outside the encoder's forward method
-                      Set to False when calling from within forward() where inputs are already normalized

        Returns:
            Dictionary mapping image keys to their corresponding encoded features
        """
-        if normalize:
-            obs = self.input_normalization(obs)
        batched = torch.cat([obs[k] for k in self.image_keys], dim=0)
        out = self.image_encoder(batched)
        chunks = torch.chunk(out, len(self.image_keys), dim=0)
@@ -747,7 +712,6 @@ class CriticEnsemble(nn.Module):
    Args:
        encoder (SACObservationEncoder): encoder for observations.
        ensemble (List[CriticHead]): list of critic heads.
-        output_normalization (nn.Module): normalization layer for actions.
        init_final (float | None): optional initializer scale for final layers.

    Forward returns a tensor of shape (num_critics, batch_size) containing Q-values.
@@ -757,13 +721,11 @@ class CriticEnsemble(nn.Module):
        self,
        encoder: SACObservationEncoder,
        ensemble: list[CriticHead],
-        output_normalization: nn.Module,
        init_final: float | None = None,
    ):
        super().__init__()
        self.encoder = encoder
        self.init_final = init_final
-        self.output_normalization = output_normalization
        self.critics = nn.ModuleList(ensemble)

    def forward(
@@ -775,11 +737,6 @@ class CriticEnsemble(nn.Module):
        device = get_device_from_parameters(self)
        # Move each tensor in observations to device
        observations = {k: v.to(device) for k, v in observations.items()}
-        # NOTE: We normalize actions it helps for sample efficiency
-        actions: dict[str, torch.tensor] = {"action": actions}
-        # NOTE: Normalization layer took dict in input and outputs a dict that why
-        actions = self.output_normalization(actions)["action"]
-        actions = actions.to(device)

        obs_enc = self.encoder(observations, cache=observation_features)

@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+
+from lerobot.policies.sac.configuration_sac import SACConfig
+from lerobot.processor import (
+    NormalizerProcessor,
+    RobotProcessor,
+    UnnormalizerProcessor,
+)
+
+
+def make_sac_processor(
+    config: SACConfig, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None
+) -> tuple[RobotProcessor, RobotProcessor]:
+    input_steps = [
+        NormalizerProcessor(
+            features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+        NormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    output_steps = [
+        UnnormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    return RobotProcessor(steps=input_steps, name="sac_preprocessor"), RobotProcessor(
+        steps=output_steps, name="sac_postprocessor"
+    )
@@ -20,7 +20,6 @@ import torch
 from torch import Tensor, nn

 from lerobot.constants import OBS_IMAGE, REWARD
-from lerobot.policies.normalize import Normalize, Unnormalize
 from lerobot.policies.pretrained import PreTrainedPolicy
 from lerobot.policies.sac.reward_model.configuration_classifier import RewardClassifierConfig

@@ -108,22 +107,12 @@ class Classifier(PreTrainedPolicy):
    def __init__(
        self,
        config: RewardClassifierConfig,
-        dataset_stats: dict[str, dict[str, Tensor]] | None = None,
    ):
        from transformers import AutoModel

        super().__init__(config)
        self.config = config

-        # Initialize normalization (standardized with the policy framework)
-        self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats)
-        self.normalize_targets = Normalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )
-        self.unnormalize_outputs = Unnormalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )
-
        # Set up encoder
        encoder = AutoModel.from_pretrained(self.config.model_name, trust_remote_code=True)
        # Extract vision model if we're given a multimodal model
@@ -247,10 +236,6 @@ class Classifier(PreTrainedPolicy):

    def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict[str, Tensor]]:
        """Standard forward pass for training compatible with train.py."""
-        # Normalize inputs if needed
-        batch = self.normalize_inputs(batch)
-        batch = self.normalize_targets(batch)
-
        # Extract images and labels
        images, labels = self.extract_images_and_labels(batch)

@@ -0,0 +1,40 @@
+# !/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+
+from lerobot.policies.sac.reward_model.configuration_classifier import RewardClassifierConfig
+from lerobot.processor import (
+    IdentityProcessor,
+    NormalizerProcessor,
+    RobotProcessor,
+)
+
+
+def make_classifier_processor(
+    config: RewardClassifierConfig, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None
+) -> tuple[RobotProcessor, RobotProcessor]:
+    input_steps = [
+        NormalizerProcessor(
+            features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+        NormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    output_steps = [IdentityProcessor()]
+    return RobotProcessor(steps=input_steps, name="classifier_preprocessor"), RobotProcessor(
+        steps=output_steps, name="classifier_postprocessor"
+    )
@@ -64,10 +64,6 @@ from torch import Tensor, nn
 from transformers import AutoProcessor

 from lerobot.constants import ACTION, OBS_STATE
-from lerobot.policies.normalize import (
-    Normalize,
-    Unnormalize,
-)
 from lerobot.policies.pretrained import PreTrainedPolicy
 from lerobot.policies.smolvla.configuration_smolvla import SmolVLAConfig
 from lerobot.policies.smolvla.smolvlm_with_expert import SmolVLMWithExpertModel
@@ -326,26 +322,16 @@ class SmolVLAPolicy(PreTrainedPolicy):
    def __init__(
        self,
        config: SmolVLAConfig,
-        dataset_stats: dict[str, dict[str, Tensor]] | None = None,
    ):
        """
        Args:
            config: Policy configuration class instance or None, in which case the default instantiation of
                    the configuration class is used.
-            dataset_stats: Dataset statistics to be used for normalization. If not passed here, it is expected
-                that they will be passed with a call to `load_state_dict` before the policy is used.
        """

        super().__init__(config)
        config.validate_features()
        self.config = config
-        self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats)
-        self.normalize_targets = Normalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )
-        self.unnormalize_outputs = Unnormalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )

        self.language_tokenizer = AutoProcessor.from_pretrained(self.config.vlm_model_name).tokenizer
        self.model = VLAFlowMatching(config)
@@ -408,8 +394,6 @@ class SmolVLAPolicy(PreTrainedPolicy):
        if self.config.adapt_to_pi_aloha:
            batch[OBS_STATE] = self._pi_aloha_decode_state(batch[OBS_STATE])

-        batch = self.normalize_inputs(batch)
-
        return batch

    @torch.no_grad()
@@ -450,8 +434,7 @@ class SmolVLAPolicy(PreTrainedPolicy):
        if self.config.adapt_to_pi_aloha:
            batch[OBS_STATE] = self._pi_aloha_decode_state(batch[OBS_STATE])
            batch[ACTION] = self._pi_aloha_encode_actions_inv(batch[ACTION])
-        batch = self.normalize_inputs(batch)
-        batch = self.normalize_targets(batch)
+
        images, img_masks = self.prepare_images(batch)
        state = self.prepare_state(batch)
        lang_tokens, lang_masks = self.prepare_language(batch)
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+
+# Copyright 2025 HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+
+from lerobot.policies.smolvla.configuration_smolvla import SmolVLAConfig
+from lerobot.processor import (
+    NormalizerProcessor,
+    RobotProcessor,
+    UnnormalizerProcessor,
+)
+
+
+def make_smolvla_processor(
+    config: SmolVLAConfig, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None
+) -> tuple[RobotProcessor, RobotProcessor]:
+    input_steps = [
+        NormalizerProcessor(
+            features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+        NormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    output_steps = [
+        UnnormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    return RobotProcessor(steps=input_steps, name="smolvla_preprocessor"), RobotProcessor(
+        steps=output_steps, name="smolvla_postprocessor"
+    )
@@ -36,7 +36,6 @@ import torch.nn.functional as F  # noqa: N812
 from torch import Tensor

 from lerobot.constants import ACTION, OBS_ENV_STATE, OBS_IMAGE, OBS_STATE, REWARD
-from lerobot.policies.normalize import Normalize, Unnormalize
 from lerobot.policies.pretrained import PreTrainedPolicy
 from lerobot.policies.tdmpc.configuration_tdmpc import TDMPCConfig
 from lerobot.policies.utils import get_device_from_parameters, get_output_shape, populate_queues
@@ -63,26 +62,19 @@ class TDMPCPolicy(PreTrainedPolicy):
    config_class = TDMPCConfig
    name = "tdmpc"

-    def __init__(self, config: TDMPCConfig, dataset_stats: dict[str, dict[str, Tensor]] | None = None):
+    def __init__(
+        self,
+        config: TDMPCConfig,
+    ):
        """
        Args:
            config: Policy configuration class instance or None, in which case the default instantiation of
                the configuration class is used.
-            dataset_stats: Dataset statistics to be used for normalization. If not passed here, it is expected
-                that they will be passed with a call to `load_state_dict` before the policy is used.
        """
        super().__init__(config)
        config.validate_features()
        self.config = config

-        self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats)
-        self.normalize_targets = Normalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )
-        self.unnormalize_outputs = Unnormalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )
-
        self.model = TDMPCTOLD(config)
        self.model_target = deepcopy(self.model)
        for param in self.model_target.parameters():
@@ -320,11 +312,9 @@ class TDMPCPolicy(PreTrainedPolicy):
        """
        device = get_device_from_parameters(self)

-        batch = self.normalize_inputs(batch)
        if self.config.image_features:
            batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
            batch[OBS_IMAGE] = batch[next(iter(self.config.image_features))]
-        batch = self.normalize_targets(batch)

        info = {}

@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+# Copyright 2024 Nicklas Hansen, Xiaolong Wang, Hao Su,
+# and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+
+from lerobot.policies.tdmpc.configuration_tdmpc import TDMPCConfig
+from lerobot.processor import (
+    NormalizerProcessor,
+    RobotProcessor,
+    UnnormalizerProcessor,
+)
+
+
+def make_tdmpc_processor(
+    config: TDMPCConfig, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None
+) -> tuple[RobotProcessor, RobotProcessor]:
+    input_steps = [
+        NormalizerProcessor(
+            features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+        NormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    output_steps = [
+        UnnormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    return RobotProcessor(steps=input_steps, name="tdmpc_preprocessor"), RobotProcessor(
+        steps=output_steps, name="tdmpc_postprocessor"
+    )
@@ -28,7 +28,6 @@ import torchvision
 from torch import Tensor, nn

 from lerobot.constants import ACTION, OBS_IMAGES, OBS_STATE
-from lerobot.policies.normalize import Normalize, Unnormalize
 from lerobot.policies.pretrained import PreTrainedPolicy
 from lerobot.policies.utils import get_device_from_parameters, get_output_shape, populate_queues
 from lerobot.policies.vqbet.configuration_vqbet import VQBeTConfig
@@ -48,7 +47,6 @@ class VQBeTPolicy(PreTrainedPolicy):
    def __init__(
        self,
        config: VQBeTConfig | None = None,
-        dataset_stats: dict[str, dict[str, Tensor]] | None = None,
    ):
        """
        Args:
@@ -61,14 +59,6 @@ class VQBeTPolicy(PreTrainedPolicy):
        config.validate_features()
        self.config = config

-        self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats)
-        self.normalize_targets = Normalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )
-        self.unnormalize_outputs = Unnormalize(
-            config.output_features, config.normalization_mapping, dataset_stats
-        )
-
        self.vqbet = VQBeTModel(config)

        self.reset()
@@ -165,10 +155,8 @@ class VQBeTPolicy(PreTrainedPolicy):

    def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict]:
        """Run the batch through the model and compute the loss for training or validation."""
-        batch = self.normalize_inputs(batch)
        batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
        batch[OBS_IMAGES] = torch.stack([batch[key] for key in self.config.image_features], dim=-4)
-        batch = self.normalize_targets(batch)
        # VQ-BeT discretizes action using VQ-VAE before training BeT (please refer to section 3.2 in the VQ-BeT paper https://huggingface.co/papers/2403.03181)
        if not self.vqbet.action_head.vqvae_model.discretized.item():
            # loss: total loss of training RVQ
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+# Copyright 2024 Seungjae Lee and Yibin Wang and Haritheja Etukuru
+# and H. Jin Kim and Nur Muhammad Mahi Shafiullah and Lerrel Pinto
+# and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+
+from lerobot.policies.vqbet.configuration_vqbet import VQBeTConfig
+from lerobot.processor import (
+    NormalizerProcessor,
+    RobotProcessor,
+    UnnormalizerProcessor,
+)
+
+
+def make_vqbet_processor(
+    config: VQBeTConfig, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None
+) -> tuple[RobotProcessor, RobotProcessor]:
+    input_steps = [
+        NormalizerProcessor(
+            features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+        NormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    output_steps = [
+        UnnormalizerProcessor(
+            features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats
+        ),
+    ]
+    return RobotProcessor(steps=input_steps, name="vqbet_preprocessor"), RobotProcessor(
+        steps=output_steps, name="vqbet_postprocessor"
+    )
@@ -39,6 +39,7 @@ from lerobot.policies.factory import (
    get_policy_class,
    make_policy,
    make_policy_config,
+    make_processor,
 )
 from lerobot.policies.normalize import Normalize, Unnormalize
 from lerobot.policies.pretrained import PreTrainedPolicy
@@ -151,6 +152,7 @@ def test_policy(ds_repo_id, env_name, env_kwargs, policy_name, policy_kwargs):

    # Check that we can make the policy object.
    dataset = make_dataset(train_cfg)
+    preprocessor, _ = make_processor(train_cfg.policy, None)
    policy = make_policy(train_cfg.policy, ds_meta=dataset.meta)
    assert isinstance(policy, PreTrainedPolicy)

@@ -224,6 +226,7 @@ def test_act_backbone_lr():
    assert cfg.policy.optimizer_lr_backbone == 0.001

    dataset = make_dataset(cfg)
+    preprocessor, _ = make_processor(cfg.policy, None)
    policy = make_policy(cfg.policy, ds_meta=dataset.meta)
    optimizer, _ = make_optimizer_and_scheduler(cfg, policy)
    assert len(optimizer.param_groups) == 2