From b72274066e8432b65838459fb768800c4a0237a0 Mon Sep 17 00:00:00 2001 From: Adil Zouitine Date: Mon, 7 Jul 2025 17:35:47 +0200 Subject: [PATCH] feat(processors): Introduce processors for various policy types - Added `make_processor` function to create processor instances for different policy types, including `tdmpc`, `diffusion`, `act`, `vqbet`, `pi0`, `pi0fast`, `sac`, and `reward_classifier`. - Implemented corresponding processor files for each policy type, encapsulating normalization and unnormalization steps. - Updated existing policies to remove direct normalization dependencies, enhancing modularity and clarity. - Enhanced test coverage to validate the integration of new processors with existing policy configurations. --- src/lerobot/policies/act/modeling_act.py | 18 +---- src/lerobot/policies/act/processor_act.py | 44 +++++++++++ .../policies/diffusion/modeling_diffusion.py | 15 ---- .../policies/diffusion/processor_diffusion.py | 45 +++++++++++ src/lerobot/policies/factory.py | 79 ++++++++++++++++++- src/lerobot/policies/pi0/modeling_pi0.py | 17 +--- src/lerobot/policies/pi0/processor_pi0.py | 45 +++++++++++ .../policies/pi0fast/modeling_pi0fast.py | 15 ---- .../policies/pi0fast/processor_pi0fast.py | 45 +++++++++++ src/lerobot/policies/sac/modeling_sac.py | 63 +++------------ src/lerobot/policies/sac/processor_sac.py | 46 +++++++++++ .../sac/reward_model/modeling_classifier.py | 15 ---- .../sac/reward_model/processor_classifier.py | 40 ++++++++++ .../policies/smolvla/modeling_smolvla.py | 19 +---- .../policies/smolvla/processor_smolvla.py | 44 +++++++++++ src/lerobot/policies/tdmpc/modeling_tdmpc.py | 18 +---- src/lerobot/policies/tdmpc/processor_tdmpc.py | 45 +++++++++++ src/lerobot/policies/vqbet/modeling_vqbet.py | 12 --- src/lerobot/policies/vqbet/processor_vqbet.py | 46 +++++++++++ tests/policies/test_policies.py | 3 + 20 files changed, 498 insertions(+), 176 deletions(-) create mode 100644 src/lerobot/policies/act/processor_act.py create mode 100644 src/lerobot/policies/diffusion/processor_diffusion.py create mode 100644 src/lerobot/policies/pi0/processor_pi0.py create mode 100644 src/lerobot/policies/pi0fast/processor_pi0fast.py create mode 100644 src/lerobot/policies/sac/processor_sac.py create mode 100644 src/lerobot/policies/sac/reward_model/processor_classifier.py create mode 100644 src/lerobot/policies/smolvla/processor_smolvla.py create mode 100644 src/lerobot/policies/tdmpc/processor_tdmpc.py create mode 100644 src/lerobot/policies/vqbet/processor_vqbet.py diff --git a/src/lerobot/policies/act/modeling_act.py b/src/lerobot/policies/act/modeling_act.py index cfd549b25..6a4ad82ce 100644 --- a/src/lerobot/policies/act/modeling_act.py +++ b/src/lerobot/policies/act/modeling_act.py @@ -35,7 +35,6 @@ from torchvision.ops.misc import FrozenBatchNorm2d from lerobot.constants import ACTION, OBS_IMAGES from lerobot.policies.act.configuration_act import ACTConfig -from lerobot.policies.normalize import Normalize, Unnormalize from lerobot.policies.pretrained import PreTrainedPolicy @@ -51,27 +50,16 @@ class ACTPolicy(PreTrainedPolicy): def __init__( self, config: ACTConfig, - dataset_stats: dict[str, dict[str, Tensor]] | None = None, ): """ Args: config: Policy configuration class instance or None, in which case the default instantiation of the configuration class is used. - dataset_stats: Dataset statistics to be used for normalization. If not passed here, it is expected - that they will be passed with a call to `load_state_dict` before the policy is used. """ super().__init__(config) config.validate_features() self.config = config - self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats) - self.normalize_targets = Normalize( - config.output_features, config.normalization_mapping, dataset_stats - ) - self.unnormalize_outputs = Unnormalize( - config.output_features, config.normalization_mapping, dataset_stats - ) - self.model = ACT(config) if config.temporal_ensemble_coeff is not None: @@ -137,23 +125,19 @@ class ACTPolicy(PreTrainedPolicy): """Predict a chunk of actions given environment observations.""" self.eval() - batch = self.normalize_inputs(batch) if self.config.image_features: batch = dict(batch) # shallow copy so that adding a key doesn't modify the original batch[OBS_IMAGES] = [batch[key] for key in self.config.image_features] actions = self.model(batch)[0] - actions = self.unnormalize_outputs({ACTION: actions})[ACTION] return actions def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict]: """Run the batch through the model and compute the loss for training or validation.""" - batch = self.normalize_inputs(batch) if self.config.image_features: batch = dict(batch) # shallow copy so that adding a key doesn't modify the original batch[OBS_IMAGES] = [batch[key] for key in self.config.image_features] - batch = self.normalize_targets(batch) actions_hat, (mu_hat, log_sigma_x2_hat) = self.model(batch) l1_loss = ( @@ -303,7 +287,7 @@ class ACT(nn.Module): └───────────────────────┘ """ - def __init__(self, config: ACTConfig): + def __init__(self, config: ACTConfig, dataset_stats=None): # BERT style VAE encoder with input tokens [cls, robot_state, *action_sequence]. # The cls token forms parameters of the latent's distribution (like this [*means, *log_variances]). super().__init__() diff --git a/src/lerobot/policies/act/processor_act.py b/src/lerobot/policies/act/processor_act.py new file mode 100644 index 000000000..b339212e1 --- /dev/null +++ b/src/lerobot/policies/act/processor_act.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +# Copyright 2024 Tony Z. Zhao and The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from lerobot.policies.act.configuration_act import ACTConfig +from lerobot.processor import ( + NormalizerProcessor, + RobotProcessor, + UnnormalizerProcessor, +) + + +def make_act_processor( + config: ACTConfig, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None +) -> tuple[RobotProcessor, RobotProcessor]: + input_steps = [ + NormalizerProcessor( + features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + NormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + output_steps = [ + UnnormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + return RobotProcessor(steps=input_steps, name="act_preprocessor"), RobotProcessor( + steps=output_steps, name="act_postprocessor" + ) diff --git a/src/lerobot/policies/diffusion/modeling_diffusion.py b/src/lerobot/policies/diffusion/modeling_diffusion.py index 85d4d5981..6c608874f 100644 --- a/src/lerobot/policies/diffusion/modeling_diffusion.py +++ b/src/lerobot/policies/diffusion/modeling_diffusion.py @@ -35,7 +35,6 @@ from torch import Tensor, nn from lerobot.constants import ACTION, OBS_ENV_STATE, OBS_IMAGES, OBS_STATE from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig -from lerobot.policies.normalize import Normalize, Unnormalize from lerobot.policies.pretrained import PreTrainedPolicy from lerobot.policies.utils import ( get_device_from_parameters, @@ -57,7 +56,6 @@ class DiffusionPolicy(PreTrainedPolicy): def __init__( self, config: DiffusionConfig, - dataset_stats: dict[str, dict[str, Tensor]] | None = None, ): """ Args: @@ -70,14 +68,6 @@ class DiffusionPolicy(PreTrainedPolicy): config.validate_features() self.config = config - self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats) - self.normalize_targets = Normalize( - config.output_features, config.normalization_mapping, dataset_stats - ) - self.unnormalize_outputs = Unnormalize( - config.output_features, config.normalization_mapping, dataset_stats - ) - # queues are populated during rollout of the policy, they contain the n latest observations and actions self._queues = None @@ -106,9 +96,6 @@ class DiffusionPolicy(PreTrainedPolicy): batch = {k: torch.stack(list(self._queues[k]), dim=1) for k in batch if k in self._queues} actions = self.diffusion.generate_actions(batch) - # TODO(rcadene): make above methods return output dictionary? - actions = self.unnormalize_outputs({ACTION: actions})[ACTION] - return actions @torch.no_grad() @@ -153,11 +140,9 @@ class DiffusionPolicy(PreTrainedPolicy): def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, None]: """Run the batch through the model and compute the loss for training or validation.""" - batch = self.normalize_inputs(batch) if self.config.image_features: batch = dict(batch) # shallow copy so that adding a key doesn't modify the original batch[OBS_IMAGES] = torch.stack([batch[key] for key in self.config.image_features], dim=-4) - batch = self.normalize_targets(batch) loss = self.diffusion.compute_loss(batch) # no output_dict so returning None return loss, None diff --git a/src/lerobot/policies/diffusion/processor_diffusion.py b/src/lerobot/policies/diffusion/processor_diffusion.py new file mode 100644 index 000000000..6db3ef98b --- /dev/null +++ b/src/lerobot/policies/diffusion/processor_diffusion.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +# Copyright 2024 Columbia Artificial Intelligence, Robotics Lab, +# and The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig +from lerobot.processor import ( + NormalizerProcessor, + RobotProcessor, + UnnormalizerProcessor, +) + + +def make_diffusion_processor( + config: DiffusionConfig, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None +) -> tuple[RobotProcessor, RobotProcessor]: + input_steps = [ + NormalizerProcessor( + features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + NormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + output_steps = [ + UnnormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + return RobotProcessor(steps=input_steps, name="diffusion_preprocessor"), RobotProcessor( + steps=output_steps, name="diffusion_postprocessor" + ) diff --git a/src/lerobot/policies/factory.py b/src/lerobot/policies/factory.py index ef56bdb61..b3fdf0626 100644 --- a/src/lerobot/policies/factory.py +++ b/src/lerobot/policies/factory.py @@ -34,6 +34,7 @@ from lerobot.policies.sac.reward_model.configuration_classifier import RewardCla from lerobot.policies.smolvla.configuration_smolvla import SmolVLAConfig from lerobot.policies.tdmpc.configuration_tdmpc import TDMPCConfig from lerobot.policies.vqbet.configuration_vqbet import VQBeTConfig +from lerobot.processor.pipeline import RobotProcessor def get_policy_class(name: str) -> PreTrainedPolicy: @@ -101,6 +102,83 @@ def make_policy_config(policy_type: str, **kwargs) -> PreTrainedConfig: raise ValueError(f"Policy type '{policy_type}' is not available.") +def make_processor( + policy_cfg: PreTrainedConfig, + pretrained_path: str | None = None, + **kwargs, +) -> tuple[RobotProcessor, RobotProcessor]: + """Make a processor instance for a given policy type. + + This function creates the appropriate processor configuration based on the policy type. + Each policy type has its own processor with specific preprocessing steps. + + Args: + policy_type: The type of policy to create a processor for (e.g., "act", "diffusion", etc.) + pretrained_path: Optional path to load a pretrained processor from. If provided, loads + the processor from this path instead of creating a new one. + **kwargs: Additional keyword arguments passed to the processor creation. + + Returns: + RobotProcessor: The configured processor instance. + + Raises: + NotImplementedError: If the policy type doesn't have a processor implemented. + """ + if pretrained_path: + # Load a pretrained processor + # TODO(azouitine): Handle this case. + raise NotImplementedError("Loading a pretrained processor is not implemented.") + + # Create a new processor based on policy type + if policy_cfg.type == "tdmpc": + from lerobot.policies.tdmpc.processor_tdmpc import make_tdmpc_processor + + return make_tdmpc_processor(policy_cfg, **kwargs) + + elif policy_cfg.type == "diffusion": + from lerobot.policies.diffusion.processor_diffusion import make_diffusion_processor + + return make_diffusion_processor(policy_cfg, **kwargs) + + elif policy_cfg.type == "act": + from lerobot.policies.act.processor_act import make_act_processor + + return make_act_processor(policy_cfg, **kwargs) + + elif policy_cfg.type == "vqbet": + from lerobot.policies.vqbet.processor_vqbet import make_vqbet_processor + + return make_vqbet_processor(policy_cfg, **kwargs) + + elif policy_cfg.type == "pi0": + from lerobot.policies.pi0.processor_pi0 import make_pi0_processor + + return make_pi0_processor(policy_cfg, **kwargs) + + elif policy_cfg.type == "pi0fast": + from lerobot.policies.pi0fast.processor_pi0fast import make_pi0fast_processor + + return make_pi0fast_processor(policy_cfg, **kwargs) + + elif policy_cfg.type == "sac": + from lerobot.policies.sac.processor_sac import make_sac_processor + + return make_sac_processor(policy_cfg, **kwargs) + + elif policy_cfg.type == "reward_classifier": + from lerobot.policies.sac.reward_model.processor_classifier import make_classifier_processor + + return make_classifier_processor(policy_cfg, **kwargs) + + elif policy_cfg.type == "smolvla": + from lerobot.policies.smolvla.processor_smolvla import make_smolvla_processor + + return make_smolvla_processor(policy_cfg, **kwargs) + + else: + raise NotImplementedError(f"Processor for policy type '{policy_cfg.type}' is not implemented.") + + def make_policy( cfg: PreTrainedConfig, ds_meta: LeRobotDatasetMetadata | None = None, @@ -147,7 +225,6 @@ def make_policy( kwargs = {} if ds_meta is not None: features = dataset_to_policy_features(ds_meta.features) - kwargs["dataset_stats"] = ds_meta.stats else: if not cfg.pretrained_path: logging.warning( diff --git a/src/lerobot/policies/pi0/modeling_pi0.py b/src/lerobot/policies/pi0/modeling_pi0.py index e56946ac8..3c0dcc111 100644 --- a/src/lerobot/policies/pi0/modeling_pi0.py +++ b/src/lerobot/policies/pi0/modeling_pi0.py @@ -59,7 +59,6 @@ from torch import Tensor, nn from transformers import AutoTokenizer from lerobot.constants import ACTION, OBS_STATE -from lerobot.policies.normalize import Normalize, Unnormalize from lerobot.policies.pi0.configuration_pi0 import PI0Config from lerobot.policies.pi0.paligemma_with_expert import ( PaliGemmaWithExpertConfig, @@ -223,7 +222,6 @@ class PI0Policy(PreTrainedPolicy): def __init__( self, config: PI0Config, - dataset_stats: dict[str, dict[str, Tensor]] | None = None, ): """ Args: @@ -236,14 +234,8 @@ class PI0Policy(PreTrainedPolicy): super().__init__(config) config.validate_features() self.config = config - self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats) - self.normalize_targets = Normalize( - config.output_features, config.normalization_mapping, dataset_stats - ) - self.unnormalize_outputs = Unnormalize( - config.output_features, config.normalization_mapping, dataset_stats - ) + # TODO(azouitine): Add tokenizer to pipeline self.language_tokenizer = AutoTokenizer.from_pretrained("google/paligemma-3b-pt-224") self.model = PI0FlowMatching(config) @@ -377,8 +369,6 @@ class PI0Policy(PreTrainedPolicy): if self.config.adapt_to_pi_aloha: batch[OBS_STATE] = self._pi_aloha_decode_state(batch[OBS_STATE]) - batch = self.normalize_inputs(batch) - # Action queue logic for n_action_steps > 1. When the action_queue is depleted, populate it by # querying the policy. if len(self._action_queue) == 0: @@ -394,8 +384,6 @@ class PI0Policy(PreTrainedPolicy): original_action_dim = self.config.action_feature.shape[0] actions = actions[:, :, :original_action_dim] - actions = self.unnormalize_outputs({"action": actions})["action"] - if self.config.adapt_to_pi_aloha: actions = self._pi_aloha_encode_actions(actions) @@ -410,9 +398,6 @@ class PI0Policy(PreTrainedPolicy): batch[OBS_STATE] = self._pi_aloha_decode_state(batch[OBS_STATE]) batch[ACTION] = self._pi_aloha_encode_actions_inv(batch[ACTION]) - batch = self.normalize_inputs(batch) - batch = self.normalize_targets(batch) - images, img_masks = self.prepare_images(batch) state = self.prepare_state(batch) lang_tokens, lang_masks = self.prepare_language(batch) diff --git a/src/lerobot/policies/pi0/processor_pi0.py b/src/lerobot/policies/pi0/processor_pi0.py new file mode 100644 index 000000000..8255971c8 --- /dev/null +++ b/src/lerobot/policies/pi0/processor_pi0.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +# Copyright 2025 Physical Intelligence and The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + +from lerobot.policies.pi0.configuration_pi0 import PI0Config +from lerobot.processor import ( + NormalizerProcessor, + RobotProcessor, + UnnormalizerProcessor, +) + + +def make_pi0_processor( + config: PI0Config, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None +) -> tuple[RobotProcessor, RobotProcessor]: + input_steps = [ + NormalizerProcessor( + features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + NormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + output_steps = [ + UnnormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + return RobotProcessor(steps=input_steps, name="pi0_preprocessor"), RobotProcessor( + steps=output_steps, name="pi0_postprocessor" + ) diff --git a/src/lerobot/policies/pi0fast/modeling_pi0fast.py b/src/lerobot/policies/pi0fast/modeling_pi0fast.py index 80e10bc02..902b755ab 100644 --- a/src/lerobot/policies/pi0fast/modeling_pi0fast.py +++ b/src/lerobot/policies/pi0fast/modeling_pi0fast.py @@ -58,7 +58,6 @@ from transformers.cache_utils import HybridCache, StaticCache from transformers.models.auto import CONFIG_MAPPING from lerobot.constants import ACTION, OBS_STATE -from lerobot.policies.normalize import Normalize, Unnormalize from lerobot.policies.pi0fast.configuration_pi0fast import PI0FASTConfig from lerobot.policies.pretrained import PreTrainedPolicy @@ -146,14 +145,6 @@ class PI0FASTPolicy(PreTrainedPolicy): config.validate_features() self.config = config - self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats) - self.normalize_targets = Normalize( - config.output_features, config.normalization_mapping, dataset_stats - ) - self.unnormalize_outputs = Unnormalize( - config.output_features, config.normalization_mapping, dataset_stats - ) - self.language_tokenizer = AutoProcessor.from_pretrained("google/paligemma-3b-pt-224") self.model = PI0FAST(config) @@ -221,8 +212,6 @@ class PI0FASTPolicy(PreTrainedPolicy): if self.config.adapt_to_pi_aloha: batch[OBS_STATE] = self._pi_aloha_decode_state(batch[OBS_STATE]) - batch = self.normalize_inputs(batch) - # Action queue logic for n_action_steps > 1. When the action_queue is depleted, populate it by # querying the policy. if len(self._action_queue) == 0: @@ -235,8 +224,6 @@ class PI0FASTPolicy(PreTrainedPolicy): ] # self.config.max_action_dim # self.config.action_feature.shape[0] actions = actions[:, :, :original_action_dim] - actions = self.unnormalize_outputs({"action": actions})["action"] - if self.config.adapt_to_pi_aloha: actions = self._pi_aloha_encode_actions(actions) @@ -249,8 +236,6 @@ class PI0FASTPolicy(PreTrainedPolicy): if self.config.adapt_to_pi_aloha: batch[OBS_STATE] = self._pi_aloha_decode_state(batch[OBS_STATE]) batch[ACTION] = self._pi_aloha_encode_actions_inv(batch[ACTION]) - batch = self.normalize_inputs(batch) - batch = self.normalize_targets(batch) loss_dict = self.model.forward(batch) return loss_dict["loss"], loss_dict diff --git a/src/lerobot/policies/pi0fast/processor_pi0fast.py b/src/lerobot/policies/pi0fast/processor_pi0fast.py new file mode 100644 index 000000000..8255971c8 --- /dev/null +++ b/src/lerobot/policies/pi0fast/processor_pi0fast.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +# Copyright 2025 Physical Intelligence and The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + +from lerobot.policies.pi0.configuration_pi0 import PI0Config +from lerobot.processor import ( + NormalizerProcessor, + RobotProcessor, + UnnormalizerProcessor, +) + + +def make_pi0_processor( + config: PI0Config, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None +) -> tuple[RobotProcessor, RobotProcessor]: + input_steps = [ + NormalizerProcessor( + features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + NormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + output_steps = [ + UnnormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + return RobotProcessor(steps=input_steps, name="pi0_preprocessor"), RobotProcessor( + steps=output_steps, name="pi0_postprocessor" + ) diff --git a/src/lerobot/policies/sac/modeling_sac.py b/src/lerobot/policies/sac/modeling_sac.py index 878f3cdd8..fcaf02a4b 100644 --- a/src/lerobot/policies/sac/modeling_sac.py +++ b/src/lerobot/policies/sac/modeling_sac.py @@ -28,7 +28,6 @@ import torch.nn.functional as F # noqa: N812 from torch import Tensor from torch.distributions import MultivariateNormal, TanhTransform, Transform, TransformedDistribution -from lerobot.policies.normalize import NormalizeBuffer from lerobot.policies.pretrained import PreTrainedPolicy from lerobot.policies.sac.configuration_sac import SACConfig, is_image_feature from lerobot.policies.utils import get_device_from_parameters @@ -45,7 +44,6 @@ class SACPolicy( def __init__( self, config: SACConfig | None = None, - dataset_stats: dict[str, dict[str, Tensor]] | None = None, ): super().__init__(config) config.validate_features() @@ -53,7 +51,6 @@ class SACPolicy( # Determine action dimension and initialize all components continuous_action_dim = config.output_features["action"].shape[0] - self._init_normalization(dataset_stats) self._init_encoders() self._init_critics(continuous_action_dim) self._init_actor(continuous_action_dim) @@ -88,8 +85,7 @@ class SACPolicy( observations_features = None if self.shared_encoder and self.actor.encoder.has_images: - # Cache and normalize image features - observations_features = self.actor.encoder.get_cached_image_features(batch, normalize=True) + observations_features = self.actor.encoder.get_cached_image_features(batch) actions, _, _ = self.actor(batch, observations_features) @@ -391,28 +387,12 @@ class SACPolicy( actor_loss = ((self.temperature * log_probs) - min_q_preds).mean() return actor_loss - def _init_normalization(self, dataset_stats): - """Initialize input/output normalization modules.""" - self.normalize_inputs = nn.Identity() - self.normalize_targets = nn.Identity() - if self.config.dataset_stats is not None: - params = _convert_normalization_params_to_tensor(self.config.dataset_stats) - self.normalize_inputs = NormalizeBuffer( - self.config.input_features, self.config.normalization_mapping, params - ) - stats = dataset_stats or params - self.normalize_targets = NormalizeBuffer( - self.config.output_features, self.config.normalization_mapping, stats - ) - def _init_encoders(self): """Initialize shared or separate encoders for actor and critic.""" self.shared_encoder = self.config.shared_encoder - self.encoder_critic = SACObservationEncoder(self.config, self.normalize_inputs) + self.encoder_critic = SACObservationEncoder(self.config) self.encoder_actor = ( - self.encoder_critic - if self.shared_encoder - else SACObservationEncoder(self.config, self.normalize_inputs) + self.encoder_critic if self.shared_encoder else SACObservationEncoder(self.config) ) def _init_critics(self, continuous_action_dim): @@ -424,9 +404,7 @@ class SACPolicy( ) for _ in range(self.config.num_critics) ] - self.critic_ensemble = CriticEnsemble( - encoder=self.encoder_critic, ensemble=heads, output_normalization=self.normalize_targets - ) + self.critic_ensemble = CriticEnsemble(encoder=self.encoder_critic, ensemble=heads) target_heads = [ CriticHead( input_dim=self.encoder_critic.output_dim + continuous_action_dim, @@ -434,9 +412,7 @@ class SACPolicy( ) for _ in range(self.config.num_critics) ] - self.critic_target = CriticEnsemble( - encoder=self.encoder_critic, ensemble=target_heads, output_normalization=self.normalize_targets - ) + self.critic_target = CriticEnsemble(encoder=self.encoder_critic, ensemble=target_heads) self.critic_target.load_state_dict(self.critic_ensemble.state_dict()) if self.config.use_torch_compile: @@ -490,10 +466,9 @@ class SACPolicy( class SACObservationEncoder(nn.Module): """Encode image and/or state vector observations.""" - def __init__(self, config: SACConfig, input_normalizer: nn.Module) -> None: + def __init__(self, config: SACConfig) -> None: super().__init__() self.config = config - self.input_normalization = input_normalizer self._init_image_layers() self._init_state_layers() self._compute_output_dim() @@ -568,11 +543,10 @@ class SACObservationEncoder(nn.Module): def forward( self, obs: dict[str, Tensor], cache: dict[str, Tensor] | None = None, detach: bool = False ) -> Tensor: - obs = self.input_normalization(obs) parts = [] if self.has_images: if cache is None: - cache = self.get_cached_image_features(obs, normalize=False) + cache = self.get_cached_image_features(obs) parts.append(self._encode_images(cache, detach)) if self.has_env: parts.append(self.env_encoder(obs["observation.environment_state"])) @@ -585,7 +559,7 @@ class SACObservationEncoder(nn.Module): "No parts to concatenate, you should have at least one image or environment state or state" ) - def get_cached_image_features(self, obs: dict[str, Tensor], normalize: bool = False) -> dict[str, Tensor]: + def get_cached_image_features(self, obs: dict[str, Tensor]) -> dict[str, Tensor]: """Extract and optionally cache image features from observations. This function processes image observations through the vision encoder once and returns @@ -597,26 +571,17 @@ class SACObservationEncoder(nn.Module): - The vision encoder forward pass is typically the main computational bottleneck during training and inference - Caching these features can provide 2-4x speedup in training and inference - Normalization behavior: - - When called from inside forward(): set normalize=False since inputs are already normalized - - When called from outside forward(): set normalize=True to ensure proper input normalization - Usage patterns: - - Called in select_action() with normalize=True + - Called in select_action() - Called in learner.py's get_observation_features() to pre-compute features for all policy components - - Called internally by forward() with normalize=False + - Called internally by forward() Args: obs: Dictionary of observation tensors containing image keys - normalize: Whether to normalize observations before encoding - Set to True when calling directly from outside the encoder's forward method - Set to False when calling from within forward() where inputs are already normalized Returns: Dictionary mapping image keys to their corresponding encoded features """ - if normalize: - obs = self.input_normalization(obs) batched = torch.cat([obs[k] for k in self.image_keys], dim=0) out = self.image_encoder(batched) chunks = torch.chunk(out, len(self.image_keys), dim=0) @@ -747,7 +712,6 @@ class CriticEnsemble(nn.Module): Args: encoder (SACObservationEncoder): encoder for observations. ensemble (List[CriticHead]): list of critic heads. - output_normalization (nn.Module): normalization layer for actions. init_final (float | None): optional initializer scale for final layers. Forward returns a tensor of shape (num_critics, batch_size) containing Q-values. @@ -757,13 +721,11 @@ class CriticEnsemble(nn.Module): self, encoder: SACObservationEncoder, ensemble: list[CriticHead], - output_normalization: nn.Module, init_final: float | None = None, ): super().__init__() self.encoder = encoder self.init_final = init_final - self.output_normalization = output_normalization self.critics = nn.ModuleList(ensemble) def forward( @@ -775,11 +737,6 @@ class CriticEnsemble(nn.Module): device = get_device_from_parameters(self) # Move each tensor in observations to device observations = {k: v.to(device) for k, v in observations.items()} - # NOTE: We normalize actions it helps for sample efficiency - actions: dict[str, torch.tensor] = {"action": actions} - # NOTE: Normalization layer took dict in input and outputs a dict that why - actions = self.output_normalization(actions)["action"] - actions = actions.to(device) obs_enc = self.encoder(observations, cache=observation_features) diff --git a/src/lerobot/policies/sac/processor_sac.py b/src/lerobot/policies/sac/processor_sac.py new file mode 100644 index 000000000..7c93f3f91 --- /dev/null +++ b/src/lerobot/policies/sac/processor_sac.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +# Copyright 2024 The HuggingFace Inc. team. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + +from lerobot.policies.sac.configuration_sac import SACConfig +from lerobot.processor import ( + NormalizerProcessor, + RobotProcessor, + UnnormalizerProcessor, +) + + +def make_sac_processor( + config: SACConfig, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None +) -> tuple[RobotProcessor, RobotProcessor]: + input_steps = [ + NormalizerProcessor( + features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + NormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + output_steps = [ + UnnormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + return RobotProcessor(steps=input_steps, name="sac_preprocessor"), RobotProcessor( + steps=output_steps, name="sac_postprocessor" + ) diff --git a/src/lerobot/policies/sac/reward_model/modeling_classifier.py b/src/lerobot/policies/sac/reward_model/modeling_classifier.py index cadd1c9f2..ca501c3a7 100644 --- a/src/lerobot/policies/sac/reward_model/modeling_classifier.py +++ b/src/lerobot/policies/sac/reward_model/modeling_classifier.py @@ -20,7 +20,6 @@ import torch from torch import Tensor, nn from lerobot.constants import OBS_IMAGE, REWARD -from lerobot.policies.normalize import Normalize, Unnormalize from lerobot.policies.pretrained import PreTrainedPolicy from lerobot.policies.sac.reward_model.configuration_classifier import RewardClassifierConfig @@ -108,22 +107,12 @@ class Classifier(PreTrainedPolicy): def __init__( self, config: RewardClassifierConfig, - dataset_stats: dict[str, dict[str, Tensor]] | None = None, ): from transformers import AutoModel super().__init__(config) self.config = config - # Initialize normalization (standardized with the policy framework) - self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats) - self.normalize_targets = Normalize( - config.output_features, config.normalization_mapping, dataset_stats - ) - self.unnormalize_outputs = Unnormalize( - config.output_features, config.normalization_mapping, dataset_stats - ) - # Set up encoder encoder = AutoModel.from_pretrained(self.config.model_name, trust_remote_code=True) # Extract vision model if we're given a multimodal model @@ -247,10 +236,6 @@ class Classifier(PreTrainedPolicy): def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict[str, Tensor]]: """Standard forward pass for training compatible with train.py.""" - # Normalize inputs if needed - batch = self.normalize_inputs(batch) - batch = self.normalize_targets(batch) - # Extract images and labels images, labels = self.extract_images_and_labels(batch) diff --git a/src/lerobot/policies/sac/reward_model/processor_classifier.py b/src/lerobot/policies/sac/reward_model/processor_classifier.py new file mode 100644 index 000000000..394e85a64 --- /dev/null +++ b/src/lerobot/policies/sac/reward_model/processor_classifier.py @@ -0,0 +1,40 @@ +# !/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from lerobot.policies.sac.reward_model.configuration_classifier import RewardClassifierConfig +from lerobot.processor import ( + IdentityProcessor, + NormalizerProcessor, + RobotProcessor, +) + + +def make_classifier_processor( + config: RewardClassifierConfig, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None +) -> tuple[RobotProcessor, RobotProcessor]: + input_steps = [ + NormalizerProcessor( + features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + NormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + output_steps = [IdentityProcessor()] + return RobotProcessor(steps=input_steps, name="classifier_preprocessor"), RobotProcessor( + steps=output_steps, name="classifier_postprocessor" + ) diff --git a/src/lerobot/policies/smolvla/modeling_smolvla.py b/src/lerobot/policies/smolvla/modeling_smolvla.py index 469645e84..f4f158951 100644 --- a/src/lerobot/policies/smolvla/modeling_smolvla.py +++ b/src/lerobot/policies/smolvla/modeling_smolvla.py @@ -64,10 +64,6 @@ from torch import Tensor, nn from transformers import AutoProcessor from lerobot.constants import ACTION, OBS_STATE -from lerobot.policies.normalize import ( - Normalize, - Unnormalize, -) from lerobot.policies.pretrained import PreTrainedPolicy from lerobot.policies.smolvla.configuration_smolvla import SmolVLAConfig from lerobot.policies.smolvla.smolvlm_with_expert import SmolVLMWithExpertModel @@ -326,26 +322,16 @@ class SmolVLAPolicy(PreTrainedPolicy): def __init__( self, config: SmolVLAConfig, - dataset_stats: dict[str, dict[str, Tensor]] | None = None, ): """ Args: config: Policy configuration class instance or None, in which case the default instantiation of the configuration class is used. - dataset_stats: Dataset statistics to be used for normalization. If not passed here, it is expected - that they will be passed with a call to `load_state_dict` before the policy is used. """ super().__init__(config) config.validate_features() self.config = config - self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats) - self.normalize_targets = Normalize( - config.output_features, config.normalization_mapping, dataset_stats - ) - self.unnormalize_outputs = Unnormalize( - config.output_features, config.normalization_mapping, dataset_stats - ) self.language_tokenizer = AutoProcessor.from_pretrained(self.config.vlm_model_name).tokenizer self.model = VLAFlowMatching(config) @@ -408,8 +394,6 @@ class SmolVLAPolicy(PreTrainedPolicy): if self.config.adapt_to_pi_aloha: batch[OBS_STATE] = self._pi_aloha_decode_state(batch[OBS_STATE]) - batch = self.normalize_inputs(batch) - return batch @torch.no_grad() @@ -450,8 +434,7 @@ class SmolVLAPolicy(PreTrainedPolicy): if self.config.adapt_to_pi_aloha: batch[OBS_STATE] = self._pi_aloha_decode_state(batch[OBS_STATE]) batch[ACTION] = self._pi_aloha_encode_actions_inv(batch[ACTION]) - batch = self.normalize_inputs(batch) - batch = self.normalize_targets(batch) + images, img_masks = self.prepare_images(batch) state = self.prepare_state(batch) lang_tokens, lang_masks = self.prepare_language(batch) diff --git a/src/lerobot/policies/smolvla/processor_smolvla.py b/src/lerobot/policies/smolvla/processor_smolvla.py new file mode 100644 index 000000000..df1a51b4d --- /dev/null +++ b/src/lerobot/policies/smolvla/processor_smolvla.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +# Copyright 2025 HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from lerobot.policies.smolvla.configuration_smolvla import SmolVLAConfig +from lerobot.processor import ( + NormalizerProcessor, + RobotProcessor, + UnnormalizerProcessor, +) + + +def make_smolvla_processor( + config: SmolVLAConfig, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None +) -> tuple[RobotProcessor, RobotProcessor]: + input_steps = [ + NormalizerProcessor( + features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + NormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + output_steps = [ + UnnormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + return RobotProcessor(steps=input_steps, name="smolvla_preprocessor"), RobotProcessor( + steps=output_steps, name="smolvla_postprocessor" + ) diff --git a/src/lerobot/policies/tdmpc/modeling_tdmpc.py b/src/lerobot/policies/tdmpc/modeling_tdmpc.py index 7ba88e5e6..92518d4ad 100644 --- a/src/lerobot/policies/tdmpc/modeling_tdmpc.py +++ b/src/lerobot/policies/tdmpc/modeling_tdmpc.py @@ -36,7 +36,6 @@ import torch.nn.functional as F # noqa: N812 from torch import Tensor from lerobot.constants import ACTION, OBS_ENV_STATE, OBS_IMAGE, OBS_STATE, REWARD -from lerobot.policies.normalize import Normalize, Unnormalize from lerobot.policies.pretrained import PreTrainedPolicy from lerobot.policies.tdmpc.configuration_tdmpc import TDMPCConfig from lerobot.policies.utils import get_device_from_parameters, get_output_shape, populate_queues @@ -63,26 +62,19 @@ class TDMPCPolicy(PreTrainedPolicy): config_class = TDMPCConfig name = "tdmpc" - def __init__(self, config: TDMPCConfig, dataset_stats: dict[str, dict[str, Tensor]] | None = None): + def __init__( + self, + config: TDMPCConfig, + ): """ Args: config: Policy configuration class instance or None, in which case the default instantiation of the configuration class is used. - dataset_stats: Dataset statistics to be used for normalization. If not passed here, it is expected - that they will be passed with a call to `load_state_dict` before the policy is used. """ super().__init__(config) config.validate_features() self.config = config - self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats) - self.normalize_targets = Normalize( - config.output_features, config.normalization_mapping, dataset_stats - ) - self.unnormalize_outputs = Unnormalize( - config.output_features, config.normalization_mapping, dataset_stats - ) - self.model = TDMPCTOLD(config) self.model_target = deepcopy(self.model) for param in self.model_target.parameters(): @@ -320,11 +312,9 @@ class TDMPCPolicy(PreTrainedPolicy): """ device = get_device_from_parameters(self) - batch = self.normalize_inputs(batch) if self.config.image_features: batch = dict(batch) # shallow copy so that adding a key doesn't modify the original batch[OBS_IMAGE] = batch[next(iter(self.config.image_features))] - batch = self.normalize_targets(batch) info = {} diff --git a/src/lerobot/policies/tdmpc/processor_tdmpc.py b/src/lerobot/policies/tdmpc/processor_tdmpc.py new file mode 100644 index 000000000..cb906a12e --- /dev/null +++ b/src/lerobot/policies/tdmpc/processor_tdmpc.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +# Copyright 2024 Nicklas Hansen, Xiaolong Wang, Hao Su, +# and The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from lerobot.policies.tdmpc.configuration_tdmpc import TDMPCConfig +from lerobot.processor import ( + NormalizerProcessor, + RobotProcessor, + UnnormalizerProcessor, +) + + +def make_tdmpc_processor( + config: TDMPCConfig, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None +) -> tuple[RobotProcessor, RobotProcessor]: + input_steps = [ + NormalizerProcessor( + features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + NormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + output_steps = [ + UnnormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + return RobotProcessor(steps=input_steps, name="tdmpc_preprocessor"), RobotProcessor( + steps=output_steps, name="tdmpc_postprocessor" + ) diff --git a/src/lerobot/policies/vqbet/modeling_vqbet.py b/src/lerobot/policies/vqbet/modeling_vqbet.py index feb65bb4c..06c9bcfb9 100644 --- a/src/lerobot/policies/vqbet/modeling_vqbet.py +++ b/src/lerobot/policies/vqbet/modeling_vqbet.py @@ -28,7 +28,6 @@ import torchvision from torch import Tensor, nn from lerobot.constants import ACTION, OBS_IMAGES, OBS_STATE -from lerobot.policies.normalize import Normalize, Unnormalize from lerobot.policies.pretrained import PreTrainedPolicy from lerobot.policies.utils import get_device_from_parameters, get_output_shape, populate_queues from lerobot.policies.vqbet.configuration_vqbet import VQBeTConfig @@ -48,7 +47,6 @@ class VQBeTPolicy(PreTrainedPolicy): def __init__( self, config: VQBeTConfig | None = None, - dataset_stats: dict[str, dict[str, Tensor]] | None = None, ): """ Args: @@ -61,14 +59,6 @@ class VQBeTPolicy(PreTrainedPolicy): config.validate_features() self.config = config - self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats) - self.normalize_targets = Normalize( - config.output_features, config.normalization_mapping, dataset_stats - ) - self.unnormalize_outputs = Unnormalize( - config.output_features, config.normalization_mapping, dataset_stats - ) - self.vqbet = VQBeTModel(config) self.reset() @@ -165,10 +155,8 @@ class VQBeTPolicy(PreTrainedPolicy): def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict]: """Run the batch through the model and compute the loss for training or validation.""" - batch = self.normalize_inputs(batch) batch = dict(batch) # shallow copy so that adding a key doesn't modify the original batch[OBS_IMAGES] = torch.stack([batch[key] for key in self.config.image_features], dim=-4) - batch = self.normalize_targets(batch) # VQ-BeT discretizes action using VQ-VAE before training BeT (please refer to section 3.2 in the VQ-BeT paper https://huggingface.co/papers/2403.03181) if not self.vqbet.action_head.vqvae_model.discretized.item(): # loss: total loss of training RVQ diff --git a/src/lerobot/policies/vqbet/processor_vqbet.py b/src/lerobot/policies/vqbet/processor_vqbet.py new file mode 100644 index 000000000..96cbca392 --- /dev/null +++ b/src/lerobot/policies/vqbet/processor_vqbet.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +# Copyright 2024 Seungjae Lee and Yibin Wang and Haritheja Etukuru +# and H. Jin Kim and Nur Muhammad Mahi Shafiullah and Lerrel Pinto +# and The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from lerobot.policies.vqbet.configuration_vqbet import VQBeTConfig +from lerobot.processor import ( + NormalizerProcessor, + RobotProcessor, + UnnormalizerProcessor, +) + + +def make_vqbet_processor( + config: VQBeTConfig, dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None +) -> tuple[RobotProcessor, RobotProcessor]: + input_steps = [ + NormalizerProcessor( + features=config.input_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + NormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + output_steps = [ + UnnormalizerProcessor( + features=config.output_features, norm_map=config.normalization_mapping, stats=dataset_stats + ), + ] + return RobotProcessor(steps=input_steps, name="vqbet_preprocessor"), RobotProcessor( + steps=output_steps, name="vqbet_postprocessor" + ) diff --git a/tests/policies/test_policies.py b/tests/policies/test_policies.py index da7573d7c..79249f672 100644 --- a/tests/policies/test_policies.py +++ b/tests/policies/test_policies.py @@ -39,6 +39,7 @@ from lerobot.policies.factory import ( get_policy_class, make_policy, make_policy_config, + make_processor, ) from lerobot.policies.normalize import Normalize, Unnormalize from lerobot.policies.pretrained import PreTrainedPolicy @@ -151,6 +152,7 @@ def test_policy(ds_repo_id, env_name, env_kwargs, policy_name, policy_kwargs): # Check that we can make the policy object. dataset = make_dataset(train_cfg) + preprocessor, _ = make_processor(train_cfg.policy, None) policy = make_policy(train_cfg.policy, ds_meta=dataset.meta) assert isinstance(policy, PreTrainedPolicy) @@ -224,6 +226,7 @@ def test_act_backbone_lr(): assert cfg.policy.optimizer_lr_backbone == 0.001 dataset = make_dataset(cfg) + preprocessor, _ = make_processor(cfg.policy, None) policy = make_policy(cfg.policy, ds_meta=dataset.meta) optimizer, _ = make_optimizer_and_scheduler(cfg, policy) assert len(optimizer.param_groups) == 2