refactor(processors): Improve Normalization Processor Performance and Device/Dtype Adaptability (#1880)

* refactor(processors): reorder processor steps for consistency across implementations - Updated the order of processor steps in multiple files to ensure consistency, placing AddBatchDimensionProcessorStep and DeviceProcessorStep before NormalizerProcessorStep. - Adjusted related test assertions to reflect the new order of steps in the preprocessor, enhancing clarity and maintainability. * refactor(normalization): remove dtype specification in tensor conversion for adaptation logic - Updated tensor conversion in the _NormalizationMixin class to remove explicit dtype specification, allowing for automatic adaptation of tensor types. - Adjusted related tests to ensure proper functionality with the new tensor conversion logic, verifying that normalizers adapt correctly to input types.
2026-05-15 16:49:55 +00:00 · 2025-09-08 10:46:35 +02:00
parent f1cfdfced9
commit d32006440c
17 changed files with 677 additions and 72 deletions
@@ -123,11 +123,11 @@ def test_make_smolvla_processor_basic():
    # Check steps in preprocessor
    assert len(preprocessor.steps) == 6
    assert isinstance(preprocessor.steps[0], RenameProcessorStep)
-    assert isinstance(preprocessor.steps[1], NormalizerProcessorStep)
-    assert isinstance(preprocessor.steps[2], AddBatchDimensionProcessorStep)
-    assert isinstance(preprocessor.steps[3], SmolVLANewLineProcessor)
-    # Step 4 would be TokenizerProcessorStep but it's mocked
-    assert isinstance(preprocessor.steps[5], DeviceProcessorStep)
+    assert isinstance(preprocessor.steps[1], AddBatchDimensionProcessorStep)
+    assert isinstance(preprocessor.steps[2], SmolVLANewLineProcessor)
+    # Step 3 would be TokenizerProcessorStep but it's mocked
+    assert isinstance(preprocessor.steps[4], DeviceProcessorStep)
+    assert isinstance(preprocessor.steps[5], NormalizerProcessorStep)

    # Check steps in postprocessor
    assert len(postprocessor.steps) == 2
@@ -404,3 +404,73 @@ def test_smolvla_newline_processor_transform_features():
    }
    result = processor.transform_features(features)
    assert result == features  # Should return unchanged
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+def test_smolvla_processor_bfloat16_device_float32_normalizer():
+    """Test: DeviceProcessor(bfloat16) + NormalizerProcessor(float32) → output bfloat16 via automatic adaptation"""
+    config = create_default_config()
+    config.device = "cuda"
+    stats = create_default_stats()
+
+    with patch(
+        "lerobot.policies.smolvla.processor_smolvla.TokenizerProcessorStep", MockTokenizerProcessorStep
+    ):
+        preprocessor, _ = make_smolvla_pre_post_processors(
+            config,
+            stats,
+            preprocessor_kwargs={"to_transition": lambda x: x, "to_output": lambda x: x},
+            postprocessor_kwargs={"to_transition": lambda x: x, "to_output": lambda x: x},
+        )
+
+    # Modify the pipeline to use bfloat16 device processor with float32 normalizer
+    modified_steps = []
+    for step in preprocessor.steps:
+        if isinstance(step, DeviceProcessorStep):
+            # Device processor converts to bfloat16
+            modified_steps.append(DeviceProcessorStep(device=config.device, float_dtype="bfloat16"))
+        elif isinstance(step, NormalizerProcessorStep):
+            # Normalizer stays configured as float32 (will auto-adapt to bfloat16)
+            modified_steps.append(
+                NormalizerProcessorStep(
+                    features=step.features,
+                    norm_map=step.norm_map,
+                    stats=step.stats,
+                    device=config.device,
+                    dtype=torch.float32,  # Deliberately configured as float32
+                )
+            )
+        else:
+            modified_steps.append(step)
+    preprocessor.steps = modified_steps
+
+    # Verify initial normalizer configuration (SmolVLA has NormalizerProcessorStep at index 5)
+    normalizer_step = preprocessor.steps[5]  # NormalizerProcessorStep
+    assert normalizer_step.dtype == torch.float32
+
+    # Create test data with both state and visual observations
+    observation = {
+        OBS_STATE: torch.randn(8, dtype=torch.float32),
+        OBS_IMAGE: torch.randn(3, 224, 224, dtype=torch.float32),
+    }
+    action = torch.randn(7, dtype=torch.float32)
+    transition = create_transition(
+        observation, action, complementary_data={"task": "test bfloat16 adaptation"}
+    )
+
+    # Process through full pipeline
+    processed = preprocessor(transition)
+
+    # Verify: DeviceProcessor → bfloat16, NormalizerProcessor adapts → final output is bfloat16
+    assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.bfloat16
+    assert (
+        processed[TransitionKey.OBSERVATION][OBS_IMAGE].dtype == torch.bfloat16
+    )  # IDENTITY normalization still gets dtype conversion
+    assert processed[TransitionKey.ACTION].dtype == torch.bfloat16
+
+    # Verify normalizer automatically adapted its internal state
+    assert normalizer_step.dtype == torch.bfloat16
+    # Check state stats (has normalization)
+    for stat_tensor in normalizer_step._tensor_stats[OBS_STATE].values():
+        assert stat_tensor.dtype == torch.bfloat16
+    # OBS_IMAGE uses IDENTITY normalization, so no stats to check