feat(processor): enhance type safety with generic DataProcessorPipeline for policy and robot pipelines (#1915)

* refactor(processor): enhance type annotations for processors in record, replay, teleoperate, and control utils

- Updated type annotations for preprocessor and postprocessor parameters in record_loop and predict_action functions to specify the expected dictionary types.
- Adjusted robot_action_processor type in ReplayConfig and TeleoperateConfig to improve clarity and maintainability.
- Ensured consistency in type definitions across multiple files, enhancing overall code readability.

* refactor(processor): enhance type annotations for RobotProcessorPipeline in various files

- Updated type annotations for RobotProcessorPipeline instances in evaluate.py, record.py, replay.py, teleoperate.py, and other related files to specify input and output types more clearly.
- Introduced new type conversions for PolicyAction and EnvTransition to improve type safety and maintainability across the processing pipelines.
- Ensured consistency in type definitions, enhancing overall code readability and reducing potential runtime errors.

* refactor(processor): update transition handling in processors to use transition_to_batch

- Replaced direct transition handling with transition_to_batch in various processor tests and implementations to ensure consistent batching of input data.
- Updated assertions in tests to reflect changes in data structure, enhancing clarity and maintainability.
- Improved overall code readability by standardizing the way transitions are processed across different processor types.

* refactor(tests): standardize transition key usage in processor tests

- Updated assertions in processor test files to utilize the TransitionKey for action references, enhancing consistency across tests.
- Replaced direct string references with TransitionKey constants for improved readability and maintainability.
- Ensured that all relevant tests reflect these changes, contributing to a more uniform approach in handling transitions.
This commit is contained in:
Adil Zouitine
2025-09-11 13:36:04 +02:00
committed by GitHub
parent a2489ab0da
commit 376a6457cf
29 changed files with 671 additions and 786 deletions
+55 -64
View File
@@ -33,7 +33,7 @@ from lerobot.processor import (
TransitionKey,
UnnormalizerProcessorStep,
)
from lerobot.processor.converters import create_transition, identity_transition
from lerobot.processor.converters import create_transition, transition_to_batch
def create_default_config():
@@ -93,28 +93,26 @@ def test_act_processor_normalization():
preprocessor, postprocessor = make_act_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create test data
observation = {OBS_STATE: torch.randn(7)}
action = torch.randn(4)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is normalized and batched
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (1, 7)
assert processed[TransitionKey.ACTION].shape == (1, 4)
assert processed[OBS_STATE].shape == (1, 7)
assert processed[TransitionKey.ACTION.value].shape == (1, 4)
# Process action through postprocessor
action_transition = create_transition(action=processed[TransitionKey.ACTION])
postprocessed = postprocessor(action_transition)
postprocessed = postprocessor(processed[TransitionKey.ACTION.value])
# Check that action is unnormalized
assert postprocessed[TransitionKey.ACTION].shape == (1, 4)
assert postprocessed.shape == (1, 4)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -127,28 +125,26 @@ def test_act_processor_cuda():
preprocessor, postprocessor = make_act_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create CPU data
observation = {OBS_STATE: torch.randn(7)}
action = torch.randn(4)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is on CUDA
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device.type == "cuda"
assert processed[TransitionKey.ACTION].device.type == "cuda"
assert processed[OBS_STATE].device.type == "cuda"
assert processed[TransitionKey.ACTION.value].device.type == "cuda"
# Process through postprocessor
action_transition = create_transition(action=processed[TransitionKey.ACTION])
postprocessed = postprocessor(action_transition)
postprocessed = postprocessor(processed[TransitionKey.ACTION.value])
# Check that action is back on CPU
assert postprocessed[TransitionKey.ACTION].device.type == "cpu"
assert postprocessed.device.type == "cpu"
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -161,8 +157,6 @@ def test_act_processor_accelerate_scenario():
preprocessor, postprocessor = make_act_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Simulate Accelerate: data already on GPU
@@ -170,13 +164,14 @@ def test_act_processor_accelerate_scenario():
observation = {OBS_STATE: torch.randn(1, 7).to(device)} # Already batched and on GPU
action = torch.randn(1, 4).to(device)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on same GPU (not moved unnecessarily)
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[TransitionKey.ACTION.value].device == device
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Requires at least 2 GPUs")
@@ -189,7 +184,6 @@ def test_act_processor_multi_gpu():
preprocessor, postprocessor = make_act_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Simulate data on different GPU (like in multi-GPU training)
@@ -197,13 +191,14 @@ def test_act_processor_multi_gpu():
observation = {OBS_STATE: torch.randn(1, 7).to(device)}
action = torch.randn(1, 4).to(device)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on cuda:1 (not moved to cuda:0)
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[TransitionKey.ACTION.value].device == device
def test_act_processor_without_stats():
@@ -213,8 +208,6 @@ def test_act_processor_without_stats():
preprocessor, postprocessor = make_act_pre_post_processors(
config,
dataset_stats=None,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Should still create processors, but normalization won't have stats
@@ -225,8 +218,9 @@ def test_act_processor_without_stats():
observation = {OBS_STATE: torch.randn(7)}
action = torch.randn(4)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
processed = preprocessor(transition)
processed = preprocessor(batch)
assert processed is not None
@@ -238,8 +232,6 @@ def test_act_processor_save_and_load():
preprocessor, postprocessor = make_act_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
with tempfile.TemporaryDirectory() as tmpdir:
@@ -247,18 +239,17 @@ def test_act_processor_save_and_load():
preprocessor.save_pretrained(tmpdir)
# Load preprocessor
loaded_preprocessor = DataProcessorPipeline.from_pretrained(
tmpdir, to_transition=identity_transition, to_output=identity_transition
)
loaded_preprocessor = DataProcessorPipeline.from_pretrained(tmpdir)
# Test that loaded processor works
observation = {OBS_STATE: torch.randn(7)}
action = torch.randn(4)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
processed = loaded_preprocessor(transition)
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (1, 7)
assert processed[TransitionKey.ACTION].shape == (1, 4)
processed = loaded_preprocessor(batch)
assert processed[OBS_STATE].shape == (1, 7)
assert processed[TransitionKey.ACTION.value].shape == (1, 4)
def test_act_processor_device_placement_preservation():
@@ -271,18 +262,17 @@ def test_act_processor_device_placement_preservation():
preprocessor, _ = make_act_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Process CPU data
observation = {OBS_STATE: torch.randn(7)}
action = torch.randn(4)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
processed = preprocessor(transition)
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device.type == "cpu"
assert processed[TransitionKey.ACTION].device.type == "cpu"
processed = preprocessor(batch)
assert processed[OBS_STATE].device.type == "cpu"
assert processed[TransitionKey.ACTION.value].device.type == "cpu"
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -296,8 +286,6 @@ def test_act_processor_mixed_precision():
preprocessor, postprocessor = make_act_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Replace DeviceProcessorStep with one that uses float16
@@ -307,11 +295,12 @@ def test_act_processor_mixed_precision():
modified_steps.append(DeviceProcessorStep(device=config.device, float_dtype="float16"))
elif isinstance(step, NormalizerProcessorStep):
# Update normalizer to use the same device as the device processor
norm_step = step # Now type checker knows this is NormalizerProcessorStep
modified_steps.append(
NormalizerProcessorStep(
features=step.features,
norm_map=step.norm_map,
stats=step.stats,
features=norm_step.features,
norm_map=norm_step.norm_map,
stats=norm_step.stats,
device=config.device,
dtype=torch.float16, # Match the float16 dtype
)
@@ -324,13 +313,14 @@ def test_act_processor_mixed_precision():
observation = {OBS_STATE: torch.randn(7, dtype=torch.float32)}
action = torch.randn(4, dtype=torch.float32)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is converted to float16
assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.float16
assert processed[TransitionKey.ACTION].dtype == torch.float16
assert processed[OBS_STATE].dtype == torch.float16
assert processed[TransitionKey.ACTION.value].dtype == torch.float16
def test_act_processor_batch_consistency():
@@ -341,26 +331,26 @@ def test_act_processor_batch_consistency():
preprocessor, postprocessor = make_act_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Test single sample (unbatched)
observation = {OBS_STATE: torch.randn(7)}
action = torch.randn(4)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
processed = preprocessor(transition)
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape[0] == 1 # Batched
processed = preprocessor(batch)
assert processed["observation.state"].shape[0] == 1 # Batched
# Test already batched data
observation_batched = {OBS_STATE: torch.randn(8, 7)} # Batch of 8
action_batched = torch.randn(8, 4)
transition_batched = create_transition(observation_batched, action_batched)
batch_batched = transition_to_batch(transition_batched)
processed_batched = preprocessor(transition_batched)
assert processed_batched[TransitionKey.OBSERVATION][OBS_STATE].shape[0] == 8
assert processed_batched[TransitionKey.ACTION].shape[0] == 8
processed_batched = preprocessor(batch_batched)
assert processed_batched[OBS_STATE].shape[0] == 8
assert processed_batched[TransitionKey.ACTION.value].shape[0] == 8
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -373,7 +363,6 @@ def test_act_processor_bfloat16_device_float32_normalizer():
preprocessor, _ = make_act_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Modify the pipeline to use bfloat16 device processor with float32 normalizer
@@ -384,11 +373,12 @@ def test_act_processor_bfloat16_device_float32_normalizer():
modified_steps.append(DeviceProcessorStep(device=config.device, float_dtype="bfloat16"))
elif isinstance(step, NormalizerProcessorStep):
# Normalizer stays configured as float32 (will auto-adapt to bfloat16)
norm_step = step # Now type checker knows this is NormalizerProcessorStep
modified_steps.append(
NormalizerProcessorStep(
features=step.features,
norm_map=step.norm_map,
stats=step.stats,
features=norm_step.features,
norm_map=norm_step.norm_map,
stats=norm_step.stats,
device=config.device,
dtype=torch.float32, # Deliberately configured as float32
)
@@ -405,13 +395,14 @@ def test_act_processor_bfloat16_device_float32_normalizer():
observation = {OBS_STATE: torch.randn(7, dtype=torch.float32)} # Start with float32
action = torch.randn(4, dtype=torch.float32)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through full pipeline
processed = preprocessor(transition)
processed = preprocessor(batch)
# Verify: DeviceProcessor → bfloat16, NormalizerProcessor adapts → final output is bfloat16
assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.bfloat16
assert processed[TransitionKey.ACTION].dtype == torch.bfloat16
assert processed[OBS_STATE].dtype == torch.bfloat16
assert processed[TransitionKey.ACTION.value].dtype == torch.bfloat16
# Verify normalizer automatically adapted its internal state
assert normalizer_step.dtype == torch.bfloat16
+61 -64
View File
@@ -31,7 +31,7 @@ from lerobot.processor import (
NormalizerProcessorStep,
TransitionKey,
)
from lerobot.processor.converters import create_transition, identity_transition
from lerobot.processor.converters import create_transition, transition_to_batch
def create_default_config():
@@ -93,8 +93,6 @@ def test_classifier_processor_normalization():
preprocessor, postprocessor = make_classifier_processor(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create test data
@@ -104,14 +102,15 @@ def test_classifier_processor_normalization():
}
action = torch.randn(1) # Dummy action/reward
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is processed
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (10,)
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].shape == (3, 224, 224)
assert processed[TransitionKey.ACTION].shape == (1,)
assert processed[OBS_STATE].shape == (10,)
assert processed[OBS_IMAGE].shape == (3, 224, 224)
assert processed[TransitionKey.ACTION.value].shape == (1,)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -124,8 +123,6 @@ def test_classifier_processor_cuda():
preprocessor, postprocessor = make_classifier_processor(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create CPU data
@@ -136,20 +133,22 @@ def test_classifier_processor_cuda():
action = torch.randn(1)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is on CUDA
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device.type == "cuda"
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device.type == "cuda"
assert processed[TransitionKey.ACTION].device.type == "cuda"
assert processed[OBS_STATE].device.type == "cuda"
assert processed[OBS_IMAGE].device.type == "cuda"
assert processed[TransitionKey.ACTION.value].device.type == "cuda"
# Process through postprocessor
reward_transition = create_transition(action=processed[TransitionKey.ACTION])
postprocessed = postprocessor(reward_transition)
postprocessed = postprocessor(processed[TransitionKey.ACTION.value])
# Check that output is back on CPU
assert postprocessed[TransitionKey.ACTION].device.type == "cpu"
assert postprocessed.device.type == "cpu"
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -162,8 +161,6 @@ def test_classifier_processor_accelerate_scenario():
preprocessor, postprocessor = make_classifier_processor(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Simulate Accelerate: data already on GPU
@@ -175,13 +172,16 @@ def test_classifier_processor_accelerate_scenario():
action = torch.randn(1).to(device)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on same GPU
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION.value].device == device
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Requires at least 2 GPUs")
@@ -202,13 +202,16 @@ def test_classifier_processor_multi_gpu():
action = torch.randn(1).to(device)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on cuda:1
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION.value].device == device
def test_classifier_processor_without_stats():
@@ -229,7 +232,9 @@ def test_classifier_processor_without_stats():
action = torch.randn(1)
transition = create_transition(observation, action)
processed = preprocessor(transition)
batch = transition_to_batch(transition)
processed = preprocessor(batch)
assert processed is not None
@@ -238,22 +243,14 @@ def test_classifier_processor_save_and_load():
config = create_default_config()
stats = create_default_stats()
# Get the steps from the factory function
factory_preprocessor, factory_postprocessor = make_classifier_processor(config, stats)
# Create new processors with EnvTransition input/output
preprocessor = DataProcessorPipeline(
factory_preprocessor.steps, to_transition=identity_transition, to_output=identity_transition
)
preprocessor, postprocessor = make_classifier_processor(config, stats)
with tempfile.TemporaryDirectory() as tmpdir:
# Save preprocessor
preprocessor.save_pretrained(tmpdir)
# Load preprocessor
loaded_preprocessor = DataProcessorPipeline.from_pretrained(
tmpdir, to_transition=identity_transition, to_output=identity_transition
)
loaded_preprocessor = DataProcessorPipeline.from_pretrained(tmpdir)
# Test that loaded processor works
observation = {
@@ -262,11 +259,12 @@ def test_classifier_processor_save_and_load():
}
action = torch.randn(1)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
processed = loaded_preprocessor(transition)
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (10,)
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].shape == (3, 224, 224)
assert processed[TransitionKey.ACTION].shape == (1,)
processed = loaded_preprocessor(batch)
assert processed[OBS_STATE].shape == (10,)
assert processed[OBS_IMAGE].shape == (3, 224, 224)
assert processed[TransitionKey.ACTION.value].shape == (1,)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -276,21 +274,16 @@ def test_classifier_processor_mixed_precision():
config.device = "cuda"
stats = create_default_stats()
# Get the steps from the factory function
factory_preprocessor, factory_postprocessor = make_classifier_processor(config, stats)
preprocessor, postprocessor = make_classifier_processor(config, stats)
# Replace DeviceProcessorStep with one that uses float16
modified_steps = []
for step in factory_preprocessor.steps:
for step in preprocessor.steps:
if isinstance(step, DeviceProcessorStep):
modified_steps.append(DeviceProcessorStep(device=config.device, float_dtype="float16"))
else:
modified_steps.append(step)
# Create new processors with EnvTransition input/output
preprocessor = DataProcessorPipeline(
modified_steps, to_transition=identity_transition, to_output=identity_transition
)
preprocessor.steps = modified_steps
# Create test data
observation = {
@@ -300,13 +293,16 @@ def test_classifier_processor_mixed_precision():
action = torch.randn(1, dtype=torch.float32)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is converted to float16
assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.float16
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].dtype == torch.float16
assert processed[TransitionKey.ACTION].dtype == torch.float16
assert processed[OBS_STATE].dtype == torch.float16
assert processed[OBS_IMAGE].dtype == torch.float16
assert processed[TransitionKey.ACTION.value].dtype == torch.float16
def test_classifier_processor_batch_data():
@@ -317,8 +313,6 @@ def test_classifier_processor_batch_data():
preprocessor, postprocessor = make_classifier_processor(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Test with batched data
@@ -330,13 +324,16 @@ def test_classifier_processor_batch_data():
action = torch.randn(batch_size, 1)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that batch dimension is preserved
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (batch_size, 10)
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].shape == (batch_size, 3, 224, 224)
assert processed[TransitionKey.ACTION].shape == (batch_size, 1)
assert processed[OBS_STATE].shape == (batch_size, 10)
assert processed[OBS_IMAGE].shape == (batch_size, 3, 224, 224)
assert processed[TransitionKey.ACTION.value].shape == (batch_size, 1)
def test_classifier_processor_postprocessor_identity():
@@ -347,17 +344,17 @@ def test_classifier_processor_postprocessor_identity():
preprocessor, postprocessor = make_classifier_processor(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create test data for postprocessor
reward = torch.tensor([[0.8], [0.3], [0.9]]) # Batch of rewards/predictions
transition = create_transition(action=reward)
_ = transition_to_batch(transition)
# Process through postprocessor
processed = postprocessor(transition)
processed = postprocessor(reward)
# IdentityProcessor should leave values unchanged (except device)
assert torch.allclose(processed[TransitionKey.ACTION].cpu(), reward.cpu())
assert processed[TransitionKey.ACTION].device.type == "cpu"
assert torch.allclose(processed.cpu(), reward.cpu())
assert processed.device.type == "cpu"
+66 -121
View File
@@ -33,7 +33,7 @@ from lerobot.processor import (
TransitionKey,
UnnormalizerProcessorStep,
)
from lerobot.processor.converters import create_transition, identity_transition
from lerobot.processor.converters import create_transition, transition_to_batch
def create_default_config():
@@ -96,8 +96,6 @@ def test_diffusion_processor_with_images():
preprocessor, postprocessor = make_diffusion_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create test data with images
@@ -108,13 +106,16 @@ def test_diffusion_processor_with_images():
action = torch.randn(6)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is batched
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (1, 7)
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].shape == (1, 3, 224, 224)
assert processed[TransitionKey.ACTION].shape == (1, 6)
assert processed[OBS_STATE].shape == (1, 7)
assert processed[OBS_IMAGE].shape == (1, 3, 224, 224)
assert processed[TransitionKey.ACTION.value].shape == (1, 6)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -127,8 +128,6 @@ def test_diffusion_processor_cuda():
preprocessor, postprocessor = make_diffusion_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create CPU data
@@ -139,20 +138,22 @@ def test_diffusion_processor_cuda():
action = torch.randn(6)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is on CUDA
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device.type == "cuda"
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device.type == "cuda"
assert processed[TransitionKey.ACTION].device.type == "cuda"
assert processed[OBS_STATE].device.type == "cuda"
assert processed[OBS_IMAGE].device.type == "cuda"
assert processed[TransitionKey.ACTION.value].device.type == "cuda"
# Process through postprocessor
action_transition = create_transition(action=processed[TransitionKey.ACTION])
postprocessed = postprocessor(action_transition)
postprocessed = postprocessor(processed[TransitionKey.ACTION.value])
# Check that action is back on CPU
assert postprocessed[TransitionKey.ACTION].device.type == "cpu"
assert postprocessed.device.type == "cpu"
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -165,8 +166,6 @@ def test_diffusion_processor_accelerate_scenario():
preprocessor, postprocessor = make_diffusion_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Simulate Accelerate: data already on GPU
@@ -178,13 +177,16 @@ def test_diffusion_processor_accelerate_scenario():
action = torch.randn(1, 6).to(device)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on same GPU
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION.value].device == device
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Requires at least 2 GPUs")
@@ -205,13 +207,16 @@ def test_diffusion_processor_multi_gpu():
action = torch.randn(1, 6).to(device)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on cuda:1
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION.value].device == device
def test_diffusion_processor_without_stats():
@@ -221,7 +226,6 @@ def test_diffusion_processor_without_stats():
preprocessor, postprocessor = make_diffusion_pre_post_processors(
config,
dataset_stats=None,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Should still create processors
@@ -236,7 +240,9 @@ def test_diffusion_processor_without_stats():
action = torch.randn(6)
transition = create_transition(observation, action)
processed = preprocessor(transition)
batch = transition_to_batch(transition)
processed = preprocessor(batch)
assert processed is not None
@@ -245,22 +251,14 @@ def test_diffusion_processor_save_and_load():
config = create_default_config()
stats = create_default_stats()
# Get the steps from the factory function
factory_preprocessor, factory_postprocessor = make_diffusion_pre_post_processors(config, stats)
# Create new processors with EnvTransition input/output
preprocessor = DataProcessorPipeline(
factory_preprocessor.steps, to_transition=identity_transition, to_output=identity_transition
)
preprocessor, postprocessor = make_diffusion_pre_post_processors(config, stats)
with tempfile.TemporaryDirectory() as tmpdir:
# Save preprocessor
preprocessor.save_pretrained(tmpdir)
# Load preprocessor
loaded_preprocessor = DataProcessorPipeline.from_pretrained(
tmpdir, to_transition=identity_transition, to_output=identity_transition
)
loaded_preprocessor = DataProcessorPipeline.from_pretrained(tmpdir)
# Test that loaded processor works
observation = {
@@ -269,62 +267,12 @@ def test_diffusion_processor_save_and_load():
}
action = torch.randn(6)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
processed = loaded_preprocessor(transition)
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (1, 7)
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].shape == (1, 3, 224, 224)
assert processed[TransitionKey.ACTION].shape == (1, 6)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
def test_diffusion_processor_mixed_precision():
"""Test Diffusion processor with mixed precision."""
config = create_default_config()
config.device = "cuda"
stats = create_default_stats()
# Get the steps from the factory function
factory_preprocessor, factory_postprocessor = make_diffusion_pre_post_processors(config, stats)
# Replace DeviceProcessorStep with one that uses float16
modified_steps = []
for step in factory_preprocessor.steps:
if isinstance(step, DeviceProcessorStep):
modified_steps.append(DeviceProcessorStep(device=config.device, float_dtype="float16"))
elif isinstance(step, NormalizerProcessorStep):
# Update normalizer to use the same device as the device processor
modified_steps.append(
NormalizerProcessorStep(
features=step.features,
norm_map=step.norm_map,
stats=step.stats,
device=config.device,
dtype=torch.float16, # Match the float16 dtype
)
)
else:
modified_steps.append(step)
# Create new processors with EnvTransition input/output
preprocessor = DataProcessorPipeline(
modified_steps, to_transition=identity_transition, to_output=identity_transition
)
# Create test data
observation = {
OBS_STATE: torch.randn(7, dtype=torch.float32),
OBS_IMAGE: torch.randn(3, 224, 224, dtype=torch.float32),
}
action = torch.randn(6, dtype=torch.float32)
transition = create_transition(observation, action)
# Process through preprocessor
processed = preprocessor(transition)
# Check that data is converted to float16
assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.float16
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].dtype == torch.float16
assert processed[TransitionKey.ACTION].dtype == torch.float16
processed = loaded_preprocessor(batch)
assert processed[OBS_STATE].shape == (1, 7)
assert processed[OBS_IMAGE].shape == (1, 3, 224, 224)
assert processed[TransitionKey.ACTION.value].shape == (1, 6)
def test_diffusion_processor_identity_normalization():
@@ -335,8 +283,6 @@ def test_diffusion_processor_identity_normalization():
preprocessor, postprocessor = make_diffusion_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create test data
@@ -348,12 +294,15 @@ def test_diffusion_processor_identity_normalization():
action = torch.randn(6)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Image should not be normalized (IDENTITY mode)
# Just batched
assert torch.allclose(processed[TransitionKey.OBSERVATION][OBS_IMAGE][0], image_value, rtol=1e-5)
assert torch.allclose(processed[OBS_IMAGE][0], image_value, rtol=1e-5)
def test_diffusion_processor_batch_consistency():
@@ -364,8 +313,6 @@ def test_diffusion_processor_batch_consistency():
preprocessor, postprocessor = make_diffusion_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Test with different batch sizes
@@ -377,13 +324,15 @@ def test_diffusion_processor_batch_consistency():
action = torch.randn(batch_size, 6) if batch_size > 1 else torch.randn(6)
transition = create_transition(observation, action)
processed = preprocessor(transition)
batch = transition_to_batch(transition)
processed = preprocessor(batch)
# Check correct batch size
expected_batch = batch_size if batch_size > 1 else 1
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape[0] == expected_batch
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].shape[0] == expected_batch
assert processed[TransitionKey.ACTION].shape[0] == expected_batch
assert processed[OBS_STATE].shape[0] == expected_batch
assert processed[OBS_IMAGE].shape[0] == expected_batch
assert processed[TransitionKey.ACTION.value].shape[0] == expected_batch
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -393,36 +342,32 @@ def test_diffusion_processor_bfloat16_device_float32_normalizer():
config.device = "cuda"
stats = create_default_stats()
# Get the steps from the factory function
factory_preprocessor, _ = make_diffusion_pre_post_processors(config, stats)
preprocessor, _ = make_diffusion_pre_post_processors(config, stats)
# Modify the pipeline to use bfloat16 device processor with float32 normalizer
modified_steps = []
for step in factory_preprocessor.steps:
for step in preprocessor.steps:
if isinstance(step, DeviceProcessorStep):
# Device processor converts to bfloat16
modified_steps.append(DeviceProcessorStep(device=config.device, float_dtype="bfloat16"))
elif isinstance(step, NormalizerProcessorStep):
# Normalizer stays configured as float32 (will auto-adapt to bfloat16)
norm_step = step # Now type checker knows this is NormalizerProcessorStep
modified_steps.append(
NormalizerProcessorStep(
features=step.features,
norm_map=step.norm_map,
stats=step.stats,
features=norm_step.features,
norm_map=norm_step.norm_map,
stats=norm_step.stats,
device=config.device,
dtype=torch.float32, # Deliberately configured as float32
)
)
else:
modified_steps.append(step)
# Create new processor with modified steps
preprocessor = DataProcessorPipeline(
modified_steps, to_transition=identity_transition, to_output=identity_transition
)
preprocessor.steps = modified_steps
# Verify initial normalizer configuration
normalizer_step = modified_steps[3] # NormalizerProcessorStep
normalizer_step = preprocessor.steps[3] # NormalizerProcessorStep
assert normalizer_step.dtype == torch.float32
# Create test data with both state and visual observations
@@ -433,15 +378,15 @@ def test_diffusion_processor_bfloat16_device_float32_normalizer():
action = torch.randn(6, dtype=torch.float32)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through full pipeline
processed = preprocessor(transition)
processed = preprocessor(batch)
# Verify: DeviceProcessor → bfloat16, NormalizerProcessor adapts → final output is bfloat16
assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.bfloat16
assert (
processed[TransitionKey.OBSERVATION][OBS_IMAGE].dtype == torch.bfloat16
) # IDENTITY normalization still gets dtype conversion
assert processed[TransitionKey.ACTION].dtype == torch.bfloat16
assert processed[OBS_STATE].dtype == torch.bfloat16
assert processed[OBS_IMAGE].dtype == torch.bfloat16 # IDENTITY normalization still gets dtype conversion
assert processed[TransitionKey.ACTION.value].dtype == torch.bfloat16
# Verify normalizer automatically adapted its internal state
assert normalizer_step.dtype == torch.bfloat16
+25 -34
View File
@@ -34,7 +34,7 @@ from lerobot.processor import (
TransitionKey,
UnnormalizerProcessorStep,
)
from lerobot.processor.converters import create_transition, identity_transition
from lerobot.processor.converters import create_transition, transition_to_batch
class MockTokenizerProcessorStep(ProcessorStep):
@@ -91,8 +91,6 @@ def test_make_pi0_processor_basic():
preprocessor, postprocessor = make_pi0_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Check processor names
@@ -195,8 +193,6 @@ def test_pi0_processor_cuda():
preprocessor, postprocessor = make_pi0_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create CPU data
@@ -206,14 +202,15 @@ def test_pi0_processor_cuda():
}
action = torch.randn(6)
transition = create_transition(observation, action, complementary_data={"task": "test task"})
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is on CUDA
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device.type == "cuda"
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device.type == "cuda"
assert processed[TransitionKey.ACTION].device.type == "cuda"
assert processed[OBS_STATE].device.type == "cuda"
assert processed[OBS_IMAGE].device.type == "cuda"
assert processed[TransitionKey.ACTION.value].device.type == "cuda"
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -250,8 +247,6 @@ def test_pi0_processor_accelerate_scenario():
preprocessor, postprocessor = make_pi0_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Simulate Accelerate: data already on GPU and batched
@@ -262,14 +257,15 @@ def test_pi0_processor_accelerate_scenario():
}
action = torch.randn(1, 6).to(device)
transition = create_transition(observation, action, complementary_data={"task": ["test task"]})
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on same GPU
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION.value].device == device
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Requires at least 2 GPUs")
@@ -306,8 +302,6 @@ def test_pi0_processor_multi_gpu():
preprocessor, postprocessor = make_pi0_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Simulate data on different GPU
@@ -318,14 +312,15 @@ def test_pi0_processor_multi_gpu():
}
action = torch.randn(1, 6).to(device)
transition = create_transition(observation, action, complementary_data={"task": ["test task"]})
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on cuda:1
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION.value].device == device
def test_pi0_processor_without_stats():
@@ -337,8 +332,6 @@ def test_pi0_processor_without_stats():
preprocessor, postprocessor = make_pi0_pre_post_processors(
config,
dataset_stats=None,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Should still create processors
@@ -376,8 +369,6 @@ def test_pi0_processor_bfloat16_device_float32_normalizer():
preprocessor, _ = make_pi0_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Modify the pipeline to use bfloat16 device processor with float32 normalizer
@@ -388,11 +379,12 @@ def test_pi0_processor_bfloat16_device_float32_normalizer():
modified_steps.append(DeviceProcessorStep(device=config.device, float_dtype="bfloat16"))
elif isinstance(step, NormalizerProcessorStep):
# Normalizer stays configured as float32 (will auto-adapt to bfloat16)
norm_step = step # Now type checker knows this is NormalizerProcessorStep
modified_steps.append(
NormalizerProcessorStep(
features=step.features,
norm_map=step.norm_map,
stats=step.stats,
features=norm_step.features,
norm_map=norm_step.norm_map,
stats=norm_step.stats,
device=config.device,
dtype=torch.float32, # Deliberately configured as float32
)
@@ -414,16 +406,15 @@ def test_pi0_processor_bfloat16_device_float32_normalizer():
transition = create_transition(
observation, action, complementary_data={"task": "test bfloat16 adaptation"}
)
batch = transition_to_batch(transition)
# Process through full pipeline
processed = preprocessor(transition)
processed = preprocessor(batch)
# Verify: DeviceProcessor → bfloat16, NormalizerProcessor adapts → final output is bfloat16
assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.bfloat16
assert (
processed[TransitionKey.OBSERVATION][OBS_IMAGE].dtype == torch.bfloat16
) # IDENTITY normalization still gets dtype conversion
assert processed[TransitionKey.ACTION].dtype == torch.bfloat16
assert processed[OBS_STATE].dtype == torch.bfloat16
assert processed[OBS_IMAGE].dtype == torch.bfloat16 # IDENTITY normalization still gets dtype conversion
assert processed[TransitionKey.ACTION.value].dtype == torch.bfloat16
# Verify normalizer automatically adapted its internal state
assert normalizer_step.dtype == torch.bfloat16
+61 -85
View File
@@ -33,7 +33,7 @@ from lerobot.processor import (
TransitionKey,
UnnormalizerProcessorStep,
)
from lerobot.processor.converters import create_transition, identity_transition
from lerobot.processor.converters import create_transition, transition_to_batch
def create_default_config():
@@ -69,8 +69,6 @@ def test_make_sac_processor_basic():
preprocessor, postprocessor = make_sac_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Check processor names
@@ -98,30 +96,28 @@ def test_sac_processor_normalization_modes():
preprocessor, postprocessor = make_sac_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create test data
observation = {OBS_STATE: torch.randn(10) * 2} # Larger values to test normalization
action = torch.rand(5) * 2 - 1 # Range [-1, 1]
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is normalized and batched
# State should be mean-std normalized
# Action should be min-max normalized to [-1, 1]
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (1, 10)
assert processed[TransitionKey.ACTION].shape == (1, 5)
assert processed[OBS_STATE].shape == (1, 10)
assert processed[TransitionKey.ACTION.value].shape == (1, 5)
# Process action through postprocessor
action_transition = create_transition(action=processed[TransitionKey.ACTION])
postprocessed = postprocessor(action_transition)
postprocessed = postprocessor(processed[TransitionKey.ACTION.value])
# Check that action is unnormalized (but still batched)
assert postprocessed[TransitionKey.ACTION].shape == (1, 5)
assert postprocessed.shape == (1, 5)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -134,28 +130,26 @@ def test_sac_processor_cuda():
preprocessor, postprocessor = make_sac_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create CPU data
observation = {OBS_STATE: torch.randn(10)}
action = torch.randn(5)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is on CUDA
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device.type == "cuda"
assert processed[TransitionKey.ACTION].device.type == "cuda"
assert processed[OBS_STATE].device.type == "cuda"
assert processed[TransitionKey.ACTION.value].device.type == "cuda"
# Process through postprocessor
action_transition = create_transition(action=processed[TransitionKey.ACTION])
postprocessed = postprocessor(action_transition)
postprocessed = postprocessor(processed[TransitionKey.ACTION.value])
# Check that action is back on CPU
assert postprocessed[TransitionKey.ACTION].device.type == "cpu"
assert postprocessed.device.type == "cpu"
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -168,8 +162,6 @@ def test_sac_processor_accelerate_scenario():
preprocessor, postprocessor = make_sac_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Simulate Accelerate: data already on GPU
@@ -177,13 +169,14 @@ def test_sac_processor_accelerate_scenario():
observation = {OBS_STATE: torch.randn(10).to(device)}
action = torch.randn(5).to(device)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on same GPU
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[TransitionKey.ACTION.value].device == device
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Requires at least 2 GPUs")
@@ -196,8 +189,6 @@ def test_sac_processor_multi_gpu():
preprocessor, postprocessor = make_sac_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Simulate data on different GPU
@@ -205,35 +196,21 @@ def test_sac_processor_multi_gpu():
observation = {OBS_STATE: torch.randn(10).to(device)}
action = torch.randn(5).to(device)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on cuda:1
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[TransitionKey.ACTION.value].device == device
def test_sac_processor_without_stats():
"""Test SAC processor creation without dataset statistics."""
config = create_default_config()
# Get the steps from the factory function
factory_preprocessor, factory_postprocessor = make_sac_pre_post_processors(config, dataset_stats=None)
# Create new processors with EnvTransition input/output
preprocessor = DataProcessorPipeline(
factory_preprocessor.steps,
name=factory_preprocessor.name,
to_transition=identity_transition,
to_output=identity_transition,
)
postprocessor = DataProcessorPipeline(
factory_postprocessor.steps,
name=factory_postprocessor.name,
to_transition=identity_transition,
to_output=identity_transition,
)
preprocessor, postprocessor = make_sac_pre_post_processors(config, dataset_stats=None)
# Should still create processors
assert preprocessor is not None
@@ -243,8 +220,9 @@ def test_sac_processor_without_stats():
observation = {OBS_STATE: torch.randn(10)}
action = torch.randn(5)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
processed = preprocessor(transition)
processed = preprocessor(batch)
assert processed is not None
@@ -256,8 +234,6 @@ def test_sac_processor_save_and_load():
preprocessor, postprocessor = make_sac_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
with tempfile.TemporaryDirectory() as tmpdir:
@@ -265,18 +241,17 @@ def test_sac_processor_save_and_load():
preprocessor.save_pretrained(tmpdir)
# Load preprocessor
loaded_preprocessor = DataProcessorPipeline.from_pretrained(
tmpdir, to_transition=identity_transition, to_output=identity_transition
)
loaded_preprocessor = DataProcessorPipeline.from_pretrained(tmpdir)
# Test that loaded processor works
observation = {OBS_STATE: torch.randn(10)}
action = torch.randn(5)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
processed = loaded_preprocessor(transition)
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (1, 10)
assert processed[TransitionKey.ACTION].shape == (1, 5)
processed = loaded_preprocessor(batch)
assert processed[OBS_STATE].shape == (1, 10)
assert processed[TransitionKey.ACTION.value].shape == (1, 5)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -290,8 +265,6 @@ def test_sac_processor_mixed_precision():
preprocessor, postprocessor = make_sac_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Replace DeviceProcessorStep with one that uses float16
@@ -301,11 +274,12 @@ def test_sac_processor_mixed_precision():
modified_steps.append(DeviceProcessorStep(device=config.device, float_dtype="float16"))
elif isinstance(step, NormalizerProcessorStep):
# Update normalizer to use the same device as the device processor
norm_step = step # Now type checker knows this is NormalizerProcessorStep
modified_steps.append(
NormalizerProcessorStep(
features=step.features,
norm_map=step.norm_map,
stats=step.stats,
features=norm_step.features,
norm_map=norm_step.norm_map,
stats=norm_step.stats,
device=config.device,
dtype=torch.float16, # Match the float16 dtype
)
@@ -318,13 +292,14 @@ def test_sac_processor_mixed_precision():
observation = {OBS_STATE: torch.randn(10, dtype=torch.float32)}
action = torch.randn(5, dtype=torch.float32)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is converted to float16
assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.float16
assert processed[TransitionKey.ACTION].dtype == torch.float16
assert processed[OBS_STATE].dtype == torch.float16
assert processed[TransitionKey.ACTION.value].dtype == torch.float16
def test_sac_processor_batch_data():
@@ -335,8 +310,6 @@ def test_sac_processor_batch_data():
preprocessor, postprocessor = make_sac_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Test with batched data
@@ -344,13 +317,14 @@ def test_sac_processor_batch_data():
observation = {OBS_STATE: torch.randn(batch_size, 10)}
action = torch.randn(batch_size, 5)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that batch dimension is preserved
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (batch_size, 10)
assert processed[TransitionKey.ACTION].shape == (batch_size, 5)
assert processed[OBS_STATE].shape == (batch_size, 10)
assert processed[TransitionKey.ACTION.value].shape == (batch_size, 5)
def test_sac_processor_edge_cases():
@@ -361,22 +335,24 @@ def test_sac_processor_edge_cases():
preprocessor, postprocessor = make_sac_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Test with empty observation
transition = create_transition(observation={}, action=torch.randn(5))
processed = preprocessor(transition)
assert processed[TransitionKey.OBSERVATION] == {}
assert processed[TransitionKey.ACTION].shape == (1, 5)
# Test with observation that has no state key but still exists
observation = {"observation.dummy": torch.randn(1)} # Some dummy observation to pass validation
action = torch.randn(5)
batch = {TransitionKey.ACTION.value: action, **observation}
processed = preprocessor(batch)
# observation.state wasn't in original, so it won't be in processed
assert OBS_STATE not in processed
assert processed[TransitionKey.ACTION.value].shape == (1, 5)
# Test with zero action (representing "null" action)
transition = create_transition(observation={OBS_STATE: torch.randn(10)}, action=torch.zeros(5))
processed = preprocessor(transition)
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (1, 10)
batch = transition_to_batch(transition)
processed = preprocessor(batch)
assert processed[OBS_STATE].shape == (1, 10)
# Action should be present and batched, even if it's zeros
assert processed[TransitionKey.ACTION].shape == (1, 5)
assert processed[TransitionKey.ACTION.value].shape == (1, 5)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -389,8 +365,6 @@ def test_sac_processor_bfloat16_device_float32_normalizer():
preprocessor, _ = make_sac_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Modify the pipeline to use bfloat16 device processor with float32 normalizer
@@ -401,11 +375,12 @@ def test_sac_processor_bfloat16_device_float32_normalizer():
modified_steps.append(DeviceProcessorStep(device=config.device, float_dtype="bfloat16"))
elif isinstance(step, NormalizerProcessorStep):
# Normalizer stays configured as float32 (will auto-adapt to bfloat16)
norm_step = step # Now type checker knows this is NormalizerProcessorStep
modified_steps.append(
NormalizerProcessorStep(
features=step.features,
norm_map=step.norm_map,
stats=step.stats,
features=norm_step.features,
norm_map=norm_step.norm_map,
stats=norm_step.stats,
device=config.device,
dtype=torch.float32, # Deliberately configured as float32
)
@@ -422,13 +397,14 @@ def test_sac_processor_bfloat16_device_float32_normalizer():
observation = {OBS_STATE: torch.randn(10, dtype=torch.float32)} # Start with float32
action = torch.randn(5, dtype=torch.float32)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through full pipeline
processed = preprocessor(transition)
processed = preprocessor(batch)
# Verify: DeviceProcessor → bfloat16, NormalizerProcessor adapts → final output is bfloat16
assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.bfloat16
assert processed[TransitionKey.ACTION].dtype == torch.bfloat16
assert processed[OBS_STATE].dtype == torch.bfloat16
assert processed[TransitionKey.ACTION.value].dtype == torch.bfloat16
# Verify normalizer automatically adapted its internal state
assert normalizer_step.dtype == torch.bfloat16
+28 -31
View File
@@ -37,7 +37,7 @@ from lerobot.processor import (
TransitionKey,
UnnormalizerProcessorStep,
)
from lerobot.processor.converters import create_transition, identity_transition
from lerobot.processor.converters import create_transition, transition_to_batch
class MockTokenizerProcessorStep(ProcessorStep):
@@ -98,8 +98,6 @@ def test_make_smolvla_processor_basic():
preprocessor, postprocessor = make_smolvla_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Check processor names
@@ -204,8 +202,6 @@ def test_smolvla_processor_cuda():
preprocessor, postprocessor = make_smolvla_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create CPU data
@@ -216,13 +212,16 @@ def test_smolvla_processor_cuda():
action = torch.randn(7)
transition = create_transition(observation, action, complementary_data={"task": "test task"})
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is on CUDA
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device.type == "cuda"
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device.type == "cuda"
assert processed[TransitionKey.ACTION].device.type == "cuda"
assert processed[OBS_STATE].device.type == "cuda"
assert processed[OBS_IMAGE].device.type == "cuda"
assert processed[TransitionKey.ACTION.value].device.type == "cuda"
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -261,8 +260,6 @@ def test_smolvla_processor_accelerate_scenario():
preprocessor, postprocessor = make_smolvla_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Simulate Accelerate: data already on GPU and batched
@@ -274,13 +271,16 @@ def test_smolvla_processor_accelerate_scenario():
action = torch.randn(1, 7).to(device)
transition = create_transition(observation, action, complementary_data={"task": ["test task"]})
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on same GPU
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION.value].device == device
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Requires at least 2 GPUs")
@@ -319,8 +319,6 @@ def test_smolvla_processor_multi_gpu():
preprocessor, postprocessor = make_smolvla_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Simulate data on different GPU
@@ -332,13 +330,16 @@ def test_smolvla_processor_multi_gpu():
action = torch.randn(1, 7).to(device)
transition = create_transition(observation, action, complementary_data={"task": ["test task"]})
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on cuda:1
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION.value].device == device
def test_smolvla_processor_without_stats():
@@ -352,8 +353,6 @@ def test_smolvla_processor_without_stats():
preprocessor, postprocessor = make_smolvla_pre_post_processors(
config,
dataset_stats=None,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Should still create processors
@@ -405,8 +404,6 @@ def test_smolvla_processor_bfloat16_device_float32_normalizer():
preprocessor, _ = make_smolvla_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Modify the pipeline to use bfloat16 device processor with float32 normalizer
@@ -444,15 +441,15 @@ def test_smolvla_processor_bfloat16_device_float32_normalizer():
observation, action, complementary_data={"task": "test bfloat16 adaptation"}
)
batch = transition_to_batch(transition)
# Process through full pipeline
processed = preprocessor(transition)
processed = preprocessor(batch)
# Verify: DeviceProcessor → bfloat16, NormalizerProcessor adapts → final output is bfloat16
assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.bfloat16
assert (
processed[TransitionKey.OBSERVATION][OBS_IMAGE].dtype == torch.bfloat16
) # IDENTITY normalization still gets dtype conversion
assert processed[TransitionKey.ACTION].dtype == torch.bfloat16
assert processed[OBS_STATE].dtype == torch.bfloat16
assert processed[OBS_IMAGE].dtype == torch.bfloat16 # IDENTITY normalization still gets dtype conversion
assert processed[TransitionKey.ACTION.value].dtype == torch.bfloat16
# Verify normalizer automatically adapted its internal state
assert normalizer_step.dtype == torch.bfloat16
+72 -86
View File
@@ -33,7 +33,7 @@ from lerobot.processor import (
TransitionKey,
UnnormalizerProcessorStep,
)
from lerobot.processor.converters import create_transition, identity_transition
from lerobot.processor.converters import create_transition, transition_to_batch
def create_default_config():
@@ -72,8 +72,6 @@ def test_make_tdmpc_processor_basic():
preprocessor, postprocessor = make_tdmpc_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Check processor names
@@ -101,8 +99,6 @@ def test_tdmpc_processor_normalization():
preprocessor, postprocessor = make_tdmpc_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create test data
@@ -113,20 +109,22 @@ def test_tdmpc_processor_normalization():
action = torch.randn(6)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is processed and batched
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (1, 12)
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].shape == (1, 3, 224, 224)
assert processed[TransitionKey.ACTION].shape == (1, 6)
assert processed[OBS_STATE].shape == (1, 12)
assert processed[OBS_IMAGE].shape == (1, 3, 224, 224)
assert processed[TransitionKey.ACTION.value].shape == (1, 6)
# Process action through postprocessor
action_transition = create_transition(action=processed[TransitionKey.ACTION])
postprocessed = postprocessor(action_transition)
postprocessed = postprocessor(processed[TransitionKey.ACTION.value])
# Check that action is unnormalized (but still batched)
assert postprocessed[TransitionKey.ACTION].shape == (1, 6)
assert postprocessed.shape == (1, 6)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -139,8 +137,6 @@ def test_tdmpc_processor_cuda():
preprocessor, postprocessor = make_tdmpc_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create CPU data
@@ -151,20 +147,22 @@ def test_tdmpc_processor_cuda():
action = torch.randn(6)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is on CUDA
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device.type == "cuda"
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device.type == "cuda"
assert processed[TransitionKey.ACTION].device.type == "cuda"
assert processed[OBS_STATE].device.type == "cuda"
assert processed[OBS_IMAGE].device.type == "cuda"
assert processed[TransitionKey.ACTION.value].device.type == "cuda"
# Process through postprocessor
action_transition = create_transition(action=processed[TransitionKey.ACTION])
postprocessed = postprocessor(action_transition)
postprocessed = postprocessor(processed[TransitionKey.ACTION.value])
# Check that action is back on CPU
assert postprocessed[TransitionKey.ACTION].device.type == "cpu"
assert postprocessed.device.type == "cpu"
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -177,8 +175,6 @@ def test_tdmpc_processor_accelerate_scenario():
preprocessor, postprocessor = make_tdmpc_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Simulate Accelerate: data already on GPU
@@ -190,13 +186,16 @@ def test_tdmpc_processor_accelerate_scenario():
action = torch.randn(6).to(device)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on same GPU
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION.value].device == device
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Requires at least 2 GPUs")
@@ -209,8 +208,6 @@ def test_tdmpc_processor_multi_gpu():
preprocessor, postprocessor = make_tdmpc_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Simulate data on different GPU
@@ -222,35 +219,23 @@ def test_tdmpc_processor_multi_gpu():
action = torch.randn(6).to(device)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on cuda:1
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION.value].device == device
def test_tdmpc_processor_without_stats():
"""Test TDMPC processor creation without dataset statistics."""
config = create_default_config()
# Get the steps from the factory function
factory_preprocessor, factory_postprocessor = make_tdmpc_pre_post_processors(config, dataset_stats=None)
# Create new processors with EnvTransition input/output
preprocessor = DataProcessorPipeline(
factory_preprocessor.steps,
name=factory_preprocessor.name,
to_transition=identity_transition,
to_output=identity_transition,
)
postprocessor = DataProcessorPipeline(
factory_postprocessor.steps,
name=factory_postprocessor.name,
to_transition=identity_transition,
to_output=identity_transition,
)
preprocessor, postprocessor = make_tdmpc_pre_post_processors(config, dataset_stats=None)
# Should still create processors
assert preprocessor is not None
@@ -263,8 +248,9 @@ def test_tdmpc_processor_without_stats():
}
action = torch.randn(6)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
processed = preprocessor(transition)
processed = preprocessor(batch)
assert processed is not None
@@ -276,8 +262,6 @@ def test_tdmpc_processor_save_and_load():
preprocessor, postprocessor = make_tdmpc_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
with tempfile.TemporaryDirectory() as tmpdir:
@@ -285,9 +269,7 @@ def test_tdmpc_processor_save_and_load():
preprocessor.save_pretrained(tmpdir)
# Load preprocessor
loaded_preprocessor = DataProcessorPipeline.from_pretrained(
tmpdir, to_transition=identity_transition, to_output=identity_transition
)
loaded_preprocessor = DataProcessorPipeline.from_pretrained(tmpdir)
# Test that loaded processor works
observation = {
@@ -297,10 +279,11 @@ def test_tdmpc_processor_save_and_load():
action = torch.randn(6)
transition = create_transition(observation, action)
processed = loaded_preprocessor(transition)
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (1, 12)
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].shape == (1, 3, 224, 224)
assert processed[TransitionKey.ACTION].shape == (1, 6)
batch = transition_to_batch(transition)
processed = loaded_preprocessor(batch)
assert processed[OBS_STATE].shape == (1, 12)
assert processed[OBS_IMAGE].shape == (1, 3, 224, 224)
assert processed[TransitionKey.ACTION.value].shape == (1, 6)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -314,8 +297,6 @@ def test_tdmpc_processor_mixed_precision():
preprocessor, postprocessor = make_tdmpc_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Replace DeviceProcessorStep with one that uses float16
@@ -346,13 +327,16 @@ def test_tdmpc_processor_mixed_precision():
action = torch.randn(6, dtype=torch.float32)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is converted to float16
assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.float16
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].dtype == torch.float16
assert processed[TransitionKey.ACTION].dtype == torch.float16
assert processed[OBS_STATE].dtype == torch.float16
assert processed[OBS_IMAGE].dtype == torch.float16
assert processed[TransitionKey.ACTION.value].dtype == torch.float16
def test_tdmpc_processor_batch_data():
@@ -363,8 +347,6 @@ def test_tdmpc_processor_batch_data():
preprocessor, postprocessor = make_tdmpc_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Test with batched data
@@ -376,13 +358,16 @@ def test_tdmpc_processor_batch_data():
action = torch.randn(batch_size, 6)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that batch dimension is preserved
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (batch_size, 12)
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].shape == (batch_size, 3, 224, 224)
assert processed[TransitionKey.ACTION].shape == (batch_size, 6)
assert processed[OBS_STATE].shape == (batch_size, 12)
assert processed[OBS_IMAGE].shape == (batch_size, 3, 224, 224)
assert processed[TransitionKey.ACTION.value].shape == (batch_size, 6)
def test_tdmpc_processor_edge_cases():
@@ -393,8 +378,6 @@ def test_tdmpc_processor_edge_cases():
preprocessor, postprocessor = make_tdmpc_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Test with only state observation (no image)
@@ -402,17 +385,21 @@ def test_tdmpc_processor_edge_cases():
action = torch.randn(6)
transition = create_transition(observation, action)
processed = preprocessor(transition)
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (1, 12)
assert OBS_IMAGE not in processed[TransitionKey.OBSERVATION]
batch = transition_to_batch(transition)
processed = preprocessor(batch)
assert processed[OBS_STATE].shape == (1, 12)
assert OBS_IMAGE not in processed
# Test with only image observation (no state)
observation = {OBS_IMAGE: torch.randn(3, 224, 224)}
transition = create_transition(observation, action)
processed = preprocessor(transition)
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].shape == (1, 3, 224, 224)
assert OBS_STATE not in processed[TransitionKey.OBSERVATION]
batch = transition_to_batch(transition)
processed = preprocessor(batch)
assert processed[OBS_IMAGE].shape == (1, 3, 224, 224)
assert OBS_STATE not in processed
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -425,7 +412,6 @@ def test_tdmpc_processor_bfloat16_device_float32_normalizer():
preprocessor, _ = make_tdmpc_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Modify the pipeline to use bfloat16 device processor with float32 normalizer
@@ -461,15 +447,15 @@ def test_tdmpc_processor_bfloat16_device_float32_normalizer():
action = torch.randn(6, dtype=torch.float32)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through full pipeline
processed = preprocessor(transition)
processed = preprocessor(batch)
# Verify: DeviceProcessor → bfloat16, NormalizerProcessor adapts → final output is bfloat16
assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.bfloat16
assert (
processed[TransitionKey.OBSERVATION][OBS_IMAGE].dtype == torch.bfloat16
) # IDENTITY normalization still gets dtype conversion
assert processed[TransitionKey.ACTION].dtype == torch.bfloat16
assert processed[OBS_STATE].dtype == torch.bfloat16
assert processed[OBS_IMAGE].dtype == torch.bfloat16 # IDENTITY normalization still gets dtype conversion
assert processed[TransitionKey.ACTION.value].dtype == torch.bfloat16
# Verify normalizer automatically adapted its internal state
assert normalizer_step.dtype == torch.bfloat16
+67 -82
View File
@@ -33,7 +33,7 @@ from lerobot.processor import (
TransitionKey,
UnnormalizerProcessorStep,
)
from lerobot.processor.converters import create_transition, identity_transition
from lerobot.processor.converters import create_transition, transition_to_batch
def create_default_config():
@@ -72,8 +72,6 @@ def test_make_vqbet_processor_basic():
preprocessor, postprocessor = make_vqbet_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Check processor names
@@ -101,8 +99,6 @@ def test_vqbet_processor_with_images():
preprocessor, postprocessor = make_vqbet_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create test data with images and states
@@ -113,13 +109,16 @@ def test_vqbet_processor_with_images():
action = torch.randn(7)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is batched
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (1, 8)
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].shape == (1, 3, 224, 224)
assert processed[TransitionKey.ACTION].shape == (1, 7)
assert processed[OBS_STATE].shape == (1, 8)
assert processed[OBS_IMAGE].shape == (1, 3, 224, 224)
assert processed[TransitionKey.ACTION.value].shape == (1, 7)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -132,8 +131,6 @@ def test_vqbet_processor_cuda():
preprocessor, postprocessor = make_vqbet_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Create CPU data
@@ -144,20 +141,22 @@ def test_vqbet_processor_cuda():
action = torch.randn(7)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is on CUDA
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device.type == "cuda"
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device.type == "cuda"
assert processed[TransitionKey.ACTION].device.type == "cuda"
assert processed[OBS_STATE].device.type == "cuda"
assert processed[OBS_IMAGE].device.type == "cuda"
assert processed[TransitionKey.ACTION.value].device.type == "cuda"
# Process through postprocessor
action_transition = create_transition(action=processed[TransitionKey.ACTION])
postprocessed = postprocessor(action_transition)
postprocessed = postprocessor(processed[TransitionKey.ACTION.value])
# Check that action is back on CPU
assert postprocessed[TransitionKey.ACTION].device.type == "cpu"
assert postprocessed.device.type == "cpu"
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -170,8 +169,6 @@ def test_vqbet_processor_accelerate_scenario():
preprocessor, postprocessor = make_vqbet_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Simulate Accelerate: data already on GPU and batched
@@ -183,13 +180,16 @@ def test_vqbet_processor_accelerate_scenario():
action = torch.randn(1, 7).to(device)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on same GPU
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION.value].device == device
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Requires at least 2 GPUs")
@@ -202,8 +202,6 @@ def test_vqbet_processor_multi_gpu():
preprocessor, postprocessor = make_vqbet_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Simulate data on different GPU
@@ -215,35 +213,23 @@ def test_vqbet_processor_multi_gpu():
action = torch.randn(1, 7).to(device)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data stays on cuda:1
assert processed[TransitionKey.OBSERVATION][OBS_STATE].device == device
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION].device == device
assert processed[OBS_STATE].device == device
assert processed[OBS_IMAGE].device == device
assert processed[TransitionKey.ACTION.value].device == device
def test_vqbet_processor_without_stats():
"""Test VQBeT processor creation without dataset statistics."""
config = create_default_config()
# Get the steps from the factory function
factory_preprocessor, factory_postprocessor = make_vqbet_pre_post_processors(config, dataset_stats=None)
# Create new processors with EnvTransition input/output
preprocessor = DataProcessorPipeline(
factory_preprocessor.steps,
name=factory_preprocessor.name,
to_transition=identity_transition,
to_output=identity_transition,
)
postprocessor = DataProcessorPipeline(
factory_postprocessor.steps,
name=factory_postprocessor.name,
to_transition=identity_transition,
to_output=identity_transition,
)
preprocessor, postprocessor = make_vqbet_pre_post_processors(config, dataset_stats=None)
# Should still create processors
assert preprocessor is not None
@@ -257,7 +243,9 @@ def test_vqbet_processor_without_stats():
action = torch.randn(7)
transition = create_transition(observation, action)
processed = preprocessor(transition)
batch = transition_to_batch(transition)
processed = preprocessor(batch)
assert processed is not None
@@ -269,8 +257,6 @@ def test_vqbet_processor_save_and_load():
preprocessor, postprocessor = make_vqbet_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
with tempfile.TemporaryDirectory() as tmpdir:
@@ -278,9 +264,7 @@ def test_vqbet_processor_save_and_load():
preprocessor.save_pretrained(tmpdir)
# Load preprocessor
loaded_preprocessor = DataProcessorPipeline.from_pretrained(
tmpdir, to_transition=identity_transition, to_output=identity_transition
)
loaded_preprocessor = DataProcessorPipeline.from_pretrained(tmpdir)
# Test that loaded processor works
observation = {
@@ -290,10 +274,11 @@ def test_vqbet_processor_save_and_load():
action = torch.randn(7)
transition = create_transition(observation, action)
processed = loaded_preprocessor(transition)
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (1, 8)
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].shape == (1, 3, 224, 224)
assert processed[TransitionKey.ACTION].shape == (1, 7)
batch = transition_to_batch(transition)
processed = loaded_preprocessor(batch)
assert processed[OBS_STATE].shape == (1, 8)
assert processed[OBS_IMAGE].shape == (1, 3, 224, 224)
assert processed[TransitionKey.ACTION.value].shape == (1, 7)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -307,8 +292,6 @@ def test_vqbet_processor_mixed_precision():
preprocessor, postprocessor = make_vqbet_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Replace DeviceProcessorStep with one that uses float16
@@ -339,13 +322,16 @@ def test_vqbet_processor_mixed_precision():
action = torch.randn(7, dtype=torch.float32)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that data is converted to float16
assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.float16
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].dtype == torch.float16
assert processed[TransitionKey.ACTION].dtype == torch.float16
assert processed[OBS_STATE].dtype == torch.float16
assert processed[OBS_IMAGE].dtype == torch.float16
assert processed[TransitionKey.ACTION.value].dtype == torch.float16
def test_vqbet_processor_large_batch():
@@ -356,8 +342,6 @@ def test_vqbet_processor_large_batch():
preprocessor, postprocessor = make_vqbet_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Test with large batch
@@ -369,13 +353,16 @@ def test_vqbet_processor_large_batch():
action = torch.randn(batch_size, 7)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through preprocessor
processed = preprocessor(transition)
processed = preprocessor(batch)
# Check that batch dimension is preserved
assert processed[TransitionKey.OBSERVATION][OBS_STATE].shape == (batch_size, 8)
assert processed[TransitionKey.OBSERVATION][OBS_IMAGE].shape == (batch_size, 3, 224, 224)
assert processed[TransitionKey.ACTION].shape == (batch_size, 7)
assert processed[OBS_STATE].shape == (batch_size, 8)
assert processed[OBS_IMAGE].shape == (batch_size, 3, 224, 224)
assert processed[TransitionKey.ACTION.value].shape == (batch_size, 7)
def test_vqbet_processor_sequential_processing():
@@ -386,8 +373,6 @@ def test_vqbet_processor_sequential_processing():
preprocessor, postprocessor = make_vqbet_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Process multiple samples sequentially
@@ -400,14 +385,16 @@ def test_vqbet_processor_sequential_processing():
action = torch.randn(7)
transition = create_transition(observation, action)
processed = preprocessor(transition)
batch = transition_to_batch(transition)
processed = preprocessor(batch)
results.append(processed)
# Check that all results are consistent
for result in results:
assert result[TransitionKey.OBSERVATION][OBS_STATE].shape == (1, 8)
assert result[TransitionKey.OBSERVATION][OBS_IMAGE].shape == (1, 3, 224, 224)
assert result[TransitionKey.ACTION].shape == (1, 7)
assert result[OBS_STATE].shape == (1, 8)
assert result[OBS_IMAGE].shape == (1, 3, 224, 224)
assert result[TransitionKey.ACTION.value].shape == (1, 7)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -420,8 +407,6 @@ def test_vqbet_processor_bfloat16_device_float32_normalizer():
preprocessor, _ = make_vqbet_pre_post_processors(
config,
stats,
preprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
postprocessor_kwargs={"to_transition": identity_transition, "to_output": identity_transition},
)
# Modify the pipeline to use bfloat16 device processor with float32 normalizer
@@ -457,15 +442,15 @@ def test_vqbet_processor_bfloat16_device_float32_normalizer():
action = torch.randn(7, dtype=torch.float32)
transition = create_transition(observation, action)
batch = transition_to_batch(transition)
# Process through full pipeline
processed = preprocessor(transition)
processed = preprocessor(batch)
# Verify: DeviceProcessor → bfloat16, NormalizerProcessor adapts → final output is bfloat16
assert processed[TransitionKey.OBSERVATION][OBS_STATE].dtype == torch.bfloat16
assert (
processed[TransitionKey.OBSERVATION][OBS_IMAGE].dtype == torch.bfloat16
) # IDENTITY normalization still gets dtype conversion
assert processed[TransitionKey.ACTION].dtype == torch.bfloat16
assert processed[OBS_STATE].dtype == torch.bfloat16
assert processed[OBS_IMAGE].dtype == torch.bfloat16 # IDENTITY normalization still gets dtype conversion
assert processed[TransitionKey.ACTION.value].dtype == torch.bfloat16
# Verify normalizer automatically adapted its internal state
assert normalizer_step.dtype == torch.bfloat16