refactor(processors): Improve Normalization Processor Performance and Device/Dtype Adaptability (#1880)

* refactor(processors): reorder processor steps for consistency across implementations - Updated the order of processor steps in multiple files to ensure consistency, placing AddBatchDimensionProcessorStep and DeviceProcessorStep before NormalizerProcessorStep. - Adjusted related test assertions to reflect the new order of steps in the preprocessor, enhancing clarity and maintainability. * refactor(normalization): remove dtype specification in tensor conversion for adaptation logic - Updated tensor conversion in the _NormalizationMixin class to remove explicit dtype specification, allowing for automatic adaptation of tensor types. - Adjusted related tests to ensure proper functionality with the new tensor conversion logic, verifying that normalizers adapt correctly to input types.
2026-07-05 17:17:01 +00:00 · 2025-09-08 10:46:35 +02:00
parent f1cfdfced9
commit d32006440c
17 changed files with 677 additions and 72 deletions
@@ -95,9 +95,9 @@ def test_make_tdmpc_processor_basic():
    # Check steps in preprocessor
    assert len(preprocessor.steps) == 4
    assert isinstance(preprocessor.steps[0], RenameProcessorStep)
-    assert isinstance(preprocessor.steps[1], NormalizerProcessorStep)
-    assert isinstance(preprocessor.steps[2], AddBatchDimensionProcessorStep)
-    assert isinstance(preprocessor.steps[3], DeviceProcessorStep)
+    assert isinstance(preprocessor.steps[1], AddBatchDimensionProcessorStep)
+    assert isinstance(preprocessor.steps[2], DeviceProcessorStep)
+    assert isinstance(preprocessor.steps[3], NormalizerProcessorStep)

    # Check steps in postprocessor
    assert len(postprocessor.steps) == 2
@@ -331,9 +331,24 @@ def test_tdmpc_processor_mixed_precision():
    )

    # Replace DeviceProcessorStep with one that uses float16
-    for i, step in enumerate(preprocessor.steps):
+    modified_steps = []
+    for step in preprocessor.steps:
        if isinstance(step, DeviceProcessorStep):
-            preprocessor.steps[i] = DeviceProcessorStep(device=config.device, float_dtype="float16")
+            modified_steps.append(DeviceProcessorStep(device=config.device, float_dtype="float16"))
+        elif isinstance(step, NormalizerProcessorStep):
+            # Update normalizer to use the same device as the device processor
+            modified_steps.append(
+                NormalizerProcessorStep(
+                    features=step.features,
+                    norm_map=step.norm_map,
+                    stats=step.stats,
+                    device=config.device,
+                    dtype=torch.float16,  # Match the float16 dtype
+                )
+            )
+        else:
+            modified_steps.append(step)
+    preprocessor.steps = modified_steps

    # Create test data
    observation = {
@@ -410,3 +425,67 @@ def test_tdmpc_processor_edge_cases():
    processed = preprocessor(transition)
    assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].shape == (1, 3, 224, 224)
    assert OBS_STATE not in processed[TransitionKey.OBSERVATION]
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+def test_tdmpc_processor_bfloat16_device_float32_normalizer():
+    """Test: DeviceProcessor(bfloat16) + NormalizerProcessor(float32) → output bfloat16 via automatic adaptation"""
+    config = create_default_config()
+    config.device = "cuda"
+    stats = create_default_stats()
+
+    preprocessor, _ = make_tdmpc_pre_post_processors(
+        config,
+        stats,
+        preprocessor_kwargs={"to_transition": lambda x: x, "to_output": lambda x: x},
+    )
+
+    # Modify the pipeline to use bfloat16 device processor with float32 normalizer
+    modified_steps = []
+    for step in preprocessor.steps:
+        if isinstance(step, DeviceProcessorStep):
+            # Device processor converts to bfloat16
+            modified_steps.append(DeviceProcessorStep(device=config.device, float_dtype="bfloat16"))
+        elif isinstance(step, NormalizerProcessorStep):
+            # Normalizer stays configured as float32 (will auto-adapt to bfloat16)
+            modified_steps.append(
+                NormalizerProcessorStep(
+                    features=step.features,
+                    norm_map=step.norm_map,
+                    stats=step.stats,
+                    device=config.device,
+                    dtype=torch.float32,  # Deliberately configured as float32
+                )
+            )
+        else:
+            modified_steps.append(step)
+    preprocessor.steps = modified_steps
+
+    # Verify initial normalizer configuration
+    normalizer_step = preprocessor.steps[3]  # NormalizerProcessorStep
+    assert normalizer_step.dtype == torch.float32
+
+    # Create test data with both state and visual observations
+    observation = {
+        OBS_STATE: torch.randn(12, dtype=torch.float32),
+        OBS_IMAGE: torch.randn(3, 224, 224, dtype=torch.float32),
+    }
+    action = torch.randn(6, dtype=torch.float32)
+    transition = create_transition(observation, action)
+
+    # Process through full pipeline
+    processed = preprocessor(transition)
+
+    # Verify: DeviceProcessor → bfloat16, NormalizerProcessor adapts → final output is bfloat16
+    assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.bfloat16
+    assert (
+        processed[TransitionKey.OBSERVATION][OBS_IMAGE].dtype == torch.bfloat16
+    )  # IDENTITY normalization still gets dtype conversion
+    assert processed[TransitionKey.ACTION].dtype == torch.bfloat16
+
+    # Verify normalizer automatically adapted its internal state
+    assert normalizer_step.dtype == torch.bfloat16
+    # Check state stats (has normalization)
+    for stat_tensor in normalizer_step._tensor_stats[OBS_STATE].values():
+        assert stat_tensor.dtype == torch.bfloat16
+    # OBS_IMAGE uses IDENTITY normalization, so no stats to check