refactor(processor): improve processor pipeline typing with generic type (#1810)

* refactor(processor): introduce generic type for to_output - Always return `TOutput` - Remove `_prepare_transition`, so `__call__` now always returns `TOutput` - Update tests accordingly - This refactor paves the way for adding settings for `to_transition` and `to_output` in `make_processor` and the post-processor * refactor(processor): consolidate ProcessorKwargs usage across policies - Removed the ProcessorTypes module and integrated ProcessorKwargs directly into the processor pipeline. - Updated multiple policy files to utilize the new ProcessorKwargs structure for preprocessor and postprocessor arguments. - Simplified the handling of processor kwargs by initializing them to empty dictionaries when not provided.
2026-05-16 09:09:48 +00:00 · 2025-09-02 12:57:14 +02:00
parent 08fb310eaa
commit d32b76cc66
26 changed files with 847 additions and 220 deletions
@@ -389,7 +389,7 @@ def test_integration_with_robot_processor(mock_auto_tokenizer):
    mock_auto_tokenizer.from_pretrained.return_value = mock_tokenizer

    tokenizer_processor = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=6)
-    robot_processor = RobotProcessor([tokenizer_processor])
+    robot_processor = RobotProcessor([tokenizer_processor], to_transition=lambda x: x, to_output=lambda x: x)

    transition = create_transition(
        observation={"state": torch.tensor([1.0, 2.0])},
@@ -427,14 +427,16 @@ def test_save_and_load_pretrained_with_tokenizer_name(mock_auto_tokenizer):
        tokenizer_name="test-tokenizer", max_length=32, task_key="instruction"
    )

-    robot_processor = RobotProcessor([original_processor])
+    robot_processor = RobotProcessor([original_processor], to_transition=lambda x: x, to_output=lambda x: x)

    with tempfile.TemporaryDirectory() as temp_dir:
        # Save processor
        robot_processor.save_pretrained(temp_dir)

        # Load processor - tokenizer will be recreated from saved config
-        loaded_processor = RobotProcessor.from_pretrained(temp_dir)
+        loaded_processor = RobotProcessor.from_pretrained(
+            temp_dir, to_transition=lambda x: x, to_output=lambda x: x
+        )

        # Test that loaded processor works
        transition = create_transition(
@@ -456,7 +458,7 @@ def test_save_and_load_pretrained_with_tokenizer_object():

    original_processor = TokenizerProcessor(tokenizer=mock_tokenizer, max_length=32, task_key="instruction")

-    robot_processor = RobotProcessor([original_processor])
+    robot_processor = RobotProcessor([original_processor], to_transition=lambda x: x, to_output=lambda x: x)

    with tempfile.TemporaryDirectory() as temp_dir:
        # Save processor
@@ -464,7 +466,10 @@ def test_save_and_load_pretrained_with_tokenizer_object():

        # Load processor with tokenizer override (since tokenizer object wasn't saved)
        loaded_processor = RobotProcessor.from_pretrained(
-            temp_dir, overrides={"tokenizer_processor": {"tokenizer": mock_tokenizer}}
+            temp_dir,
+            overrides={"tokenizer_processor": {"tokenizer": mock_tokenizer}},
+            to_transition=lambda x: x,
+            to_output=lambda x: x,
        )

        # Test that loaded processor works
@@ -952,7 +957,9 @@ def test_integration_with_device_processor(mock_auto_tokenizer):
    # Create pipeline with TokenizerProcessor then DeviceProcessor
    tokenizer_processor = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=6)
    device_processor = DeviceProcessor(device="cuda:0")
-    robot_processor = RobotProcessor([tokenizer_processor, device_processor])
+    robot_processor = RobotProcessor(
+        [tokenizer_processor, device_processor], to_transition=lambda x: x, to_output=lambda x: x
+    )

    # Start with CPU tensors
    transition = create_transition(