refactor(processor): improve processor pipeline typing with generic type (#1810)

* refactor(processor): introduce generic type for to_output

- Always return `TOutput`
- Remove `_prepare_transition`, so `__call__` now always returns `TOutput`
- Update tests accordingly
- This refactor paves the way for adding settings for `to_transition` and `to_output` in `make_processor` and the post-processor

* refactor(processor): consolidate ProcessorKwargs usage across policies

- Removed the ProcessorTypes module and integrated ProcessorKwargs directly into the processor pipeline.
- Updated multiple policy files to utilize the new ProcessorKwargs structure for preprocessor and postprocessor arguments.
- Simplified the handling of processor kwargs by initializing them to empty dictionaries when not provided.
This commit is contained in:
Adil Zouitine
2025-09-02 12:57:14 +02:00
committed by GitHub
parent 08fb310eaa
commit d32b76cc66
26 changed files with 847 additions and 220 deletions
+13 -6
View File
@@ -389,7 +389,7 @@ def test_integration_with_robot_processor(mock_auto_tokenizer):
mock_auto_tokenizer.from_pretrained.return_value = mock_tokenizer
tokenizer_processor = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=6)
robot_processor = RobotProcessor([tokenizer_processor])
robot_processor = RobotProcessor([tokenizer_processor], to_transition=lambda x: x, to_output=lambda x: x)
transition = create_transition(
observation={"state": torch.tensor([1.0, 2.0])},
@@ -427,14 +427,16 @@ def test_save_and_load_pretrained_with_tokenizer_name(mock_auto_tokenizer):
tokenizer_name="test-tokenizer", max_length=32, task_key="instruction"
)
robot_processor = RobotProcessor([original_processor])
robot_processor = RobotProcessor([original_processor], to_transition=lambda x: x, to_output=lambda x: x)
with tempfile.TemporaryDirectory() as temp_dir:
# Save processor
robot_processor.save_pretrained(temp_dir)
# Load processor - tokenizer will be recreated from saved config
loaded_processor = RobotProcessor.from_pretrained(temp_dir)
loaded_processor = RobotProcessor.from_pretrained(
temp_dir, to_transition=lambda x: x, to_output=lambda x: x
)
# Test that loaded processor works
transition = create_transition(
@@ -456,7 +458,7 @@ def test_save_and_load_pretrained_with_tokenizer_object():
original_processor = TokenizerProcessor(tokenizer=mock_tokenizer, max_length=32, task_key="instruction")
robot_processor = RobotProcessor([original_processor])
robot_processor = RobotProcessor([original_processor], to_transition=lambda x: x, to_output=lambda x: x)
with tempfile.TemporaryDirectory() as temp_dir:
# Save processor
@@ -464,7 +466,10 @@ def test_save_and_load_pretrained_with_tokenizer_object():
# Load processor with tokenizer override (since tokenizer object wasn't saved)
loaded_processor = RobotProcessor.from_pretrained(
temp_dir, overrides={"tokenizer_processor": {"tokenizer": mock_tokenizer}}
temp_dir,
overrides={"tokenizer_processor": {"tokenizer": mock_tokenizer}},
to_transition=lambda x: x,
to_output=lambda x: x,
)
# Test that loaded processor works
@@ -952,7 +957,9 @@ def test_integration_with_device_processor(mock_auto_tokenizer):
# Create pipeline with TokenizerProcessor then DeviceProcessor
tokenizer_processor = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=6)
device_processor = DeviceProcessor(device="cuda:0")
robot_processor = RobotProcessor([tokenizer_processor, device_processor])
robot_processor = RobotProcessor(
[tokenizer_processor, device_processor], to_transition=lambda x: x, to_output=lambda x: x
)
# Start with CPU tensors
transition = create_transition(