refactor(processors): Improve Normalization Processor Performance and Device/Dtype Adaptability (#1880)

* refactor(processors): reorder processor steps for consistency across implementations

- Updated the order of processor steps in multiple files to ensure consistency, placing AddBatchDimensionProcessorStep and DeviceProcessorStep before NormalizerProcessorStep.
- Adjusted related test assertions to reflect the new order of steps in the preprocessor, enhancing clarity and maintainability.

* refactor(normalization): remove dtype specification in tensor conversion for adaptation logic

- Updated tensor conversion in the _NormalizationMixin class to remove explicit dtype specification, allowing for automatic adaptation of tensor types.
- Adjusted related tests to ensure proper functionality with the new tensor conversion logic, verifying that normalizers adapt correctly to input types.
This commit is contained in:
Adil Zouitine
2025-09-08 10:46:35 +02:00
committed by GitHub
parent f1cfdfced9
commit d32006440c
17 changed files with 677 additions and 72 deletions
+75 -5
View File
@@ -123,11 +123,11 @@ def test_make_smolvla_processor_basic():
# Check steps in preprocessor
assert len(preprocessor.steps) == 6
assert isinstance(preprocessor.steps[0], RenameProcessorStep)
assert isinstance(preprocessor.steps[1], NormalizerProcessorStep)
assert isinstance(preprocessor.steps[2], AddBatchDimensionProcessorStep)
assert isinstance(preprocessor.steps[3], SmolVLANewLineProcessor)
# Step 4 would be TokenizerProcessorStep but it's mocked
assert isinstance(preprocessor.steps[5], DeviceProcessorStep)
assert isinstance(preprocessor.steps[1], AddBatchDimensionProcessorStep)
assert isinstance(preprocessor.steps[2], SmolVLANewLineProcessor)
# Step 3 would be TokenizerProcessorStep but it's mocked
assert isinstance(preprocessor.steps[4], DeviceProcessorStep)
assert isinstance(preprocessor.steps[5], NormalizerProcessorStep)
# Check steps in postprocessor
assert len(postprocessor.steps) == 2
@@ -404,3 +404,73 @@ def test_smolvla_newline_processor_transform_features():
}
result = processor.transform_features(features)
assert result == features # Should return unchanged
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
def test_smolvla_processor_bfloat16_device_float32_normalizer():
"""Test: DeviceProcessor(bfloat16) + NormalizerProcessor(float32) → output bfloat16 via automatic adaptation"""
config = create_default_config()
config.device = "cuda"
stats = create_default_stats()
with patch(
"lerobot.policies.smolvla.processor_smolvla.TokenizerProcessorStep", MockTokenizerProcessorStep
):
preprocessor, _ = make_smolvla_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": lambda x: x, "to_output": lambda x: x},
postprocessor_kwargs={"to_transition": lambda x: x, "to_output": lambda x: x},
)
# Modify the pipeline to use bfloat16 device processor with float32 normalizer
modified_steps = []
for step in preprocessor.steps:
if isinstance(step, DeviceProcessorStep):
# Device processor converts to bfloat16
modified_steps.append(DeviceProcessorStep(device=config.device, float_dtype="bfloat16"))
elif isinstance(step, NormalizerProcessorStep):
# Normalizer stays configured as float32 (will auto-adapt to bfloat16)
modified_steps.append(
NormalizerProcessorStep(
features=step.features,
norm_map=step.norm_map,
stats=step.stats,
device=config.device,
dtype=torch.float32, # Deliberately configured as float32
)
)
else:
modified_steps.append(step)
preprocessor.steps = modified_steps
# Verify initial normalizer configuration (SmolVLA has NormalizerProcessorStep at index 5)
normalizer_step = preprocessor.steps[5] # NormalizerProcessorStep
assert normalizer_step.dtype == torch.float32
# Create test data with both state and visual observations
observation = {
OBS_STATE: torch.randn(8, dtype=torch.float32),
OBS_IMAGE: torch.randn(3, 224, 224, dtype=torch.float32),
}
action = torch.randn(7, dtype=torch.float32)
transition = create_transition(
observation, action, complementary_data={"task": "test bfloat16 adaptation"}
)
# Process through full pipeline
processed = preprocessor(transition)
# Verify: DeviceProcessor → bfloat16, NormalizerProcessor adapts → final output is bfloat16
assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.bfloat16
assert (
processed[TransitionKey.OBSERVATION][OBS_IMAGE].dtype == torch.bfloat16
) # IDENTITY normalization still gets dtype conversion
assert processed[TransitionKey.ACTION].dtype == torch.bfloat16
# Verify normalizer automatically adapted its internal state
assert normalizer_step.dtype == torch.bfloat16
# Check state stats (has normalization)
for stat_tensor in normalizer_step._tensor_stats[OBS_STATE].values():
assert stat_tensor.dtype == torch.bfloat16
# OBS_IMAGE uses IDENTITY normalization, so no stats to check