mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-16 09:09:48 +00:00
refactor(processor): improve processor pipeline typing with generic type (#1810)
* refactor(processor): introduce generic type for to_output - Always return `TOutput` - Remove `_prepare_transition`, so `__call__` now always returns `TOutput` - Update tests accordingly - This refactor paves the way for adding settings for `to_transition` and `to_output` in `make_processor` and the post-processor * refactor(processor): consolidate ProcessorKwargs usage across policies - Removed the ProcessorTypes module and integrated ProcessorKwargs directly into the processor pipeline. - Updated multiple policy files to utilize the new ProcessorKwargs structure for preprocessor and postprocessor arguments. - Simplified the handling of processor kwargs by initializing them to empty dictionaries when not provided.
This commit is contained in:
@@ -389,7 +389,7 @@ def test_integration_with_robot_processor(mock_auto_tokenizer):
|
||||
mock_auto_tokenizer.from_pretrained.return_value = mock_tokenizer
|
||||
|
||||
tokenizer_processor = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=6)
|
||||
robot_processor = RobotProcessor([tokenizer_processor])
|
||||
robot_processor = RobotProcessor([tokenizer_processor], to_transition=lambda x: x, to_output=lambda x: x)
|
||||
|
||||
transition = create_transition(
|
||||
observation={"state": torch.tensor([1.0, 2.0])},
|
||||
@@ -427,14 +427,16 @@ def test_save_and_load_pretrained_with_tokenizer_name(mock_auto_tokenizer):
|
||||
tokenizer_name="test-tokenizer", max_length=32, task_key="instruction"
|
||||
)
|
||||
|
||||
robot_processor = RobotProcessor([original_processor])
|
||||
robot_processor = RobotProcessor([original_processor], to_transition=lambda x: x, to_output=lambda x: x)
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Save processor
|
||||
robot_processor.save_pretrained(temp_dir)
|
||||
|
||||
# Load processor - tokenizer will be recreated from saved config
|
||||
loaded_processor = RobotProcessor.from_pretrained(temp_dir)
|
||||
loaded_processor = RobotProcessor.from_pretrained(
|
||||
temp_dir, to_transition=lambda x: x, to_output=lambda x: x
|
||||
)
|
||||
|
||||
# Test that loaded processor works
|
||||
transition = create_transition(
|
||||
@@ -456,7 +458,7 @@ def test_save_and_load_pretrained_with_tokenizer_object():
|
||||
|
||||
original_processor = TokenizerProcessor(tokenizer=mock_tokenizer, max_length=32, task_key="instruction")
|
||||
|
||||
robot_processor = RobotProcessor([original_processor])
|
||||
robot_processor = RobotProcessor([original_processor], to_transition=lambda x: x, to_output=lambda x: x)
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Save processor
|
||||
@@ -464,7 +466,10 @@ def test_save_and_load_pretrained_with_tokenizer_object():
|
||||
|
||||
# Load processor with tokenizer override (since tokenizer object wasn't saved)
|
||||
loaded_processor = RobotProcessor.from_pretrained(
|
||||
temp_dir, overrides={"tokenizer_processor": {"tokenizer": mock_tokenizer}}
|
||||
temp_dir,
|
||||
overrides={"tokenizer_processor": {"tokenizer": mock_tokenizer}},
|
||||
to_transition=lambda x: x,
|
||||
to_output=lambda x: x,
|
||||
)
|
||||
|
||||
# Test that loaded processor works
|
||||
@@ -952,7 +957,9 @@ def test_integration_with_device_processor(mock_auto_tokenizer):
|
||||
# Create pipeline with TokenizerProcessor then DeviceProcessor
|
||||
tokenizer_processor = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=6)
|
||||
device_processor = DeviceProcessor(device="cuda:0")
|
||||
robot_processor = RobotProcessor([tokenizer_processor, device_processor])
|
||||
robot_processor = RobotProcessor(
|
||||
[tokenizer_processor, device_processor], to_transition=lambda x: x, to_output=lambda x: x
|
||||
)
|
||||
|
||||
# Start with CPU tensors
|
||||
transition = create_transition(
|
||||
|
||||
Reference in New Issue
Block a user