test(tokenizer_processor): Add require_package decorator for transformers

- Introduced @require_package("transformers") decorator in multiple test functions to ensure the transformers package is available before running tests.
- This change enhances test reliability by preventing failures due to missing dependencies.
This commit is contained in:
Adil Zouitine
2025-08-06 19:22:23 +02:00
parent db3cf0158c
commit d38792d6e5
@@ -12,6 +12,7 @@ from lerobot.configs.types import FeatureType, PolicyFeature
from lerobot.constants import OBS_LANGUAGE
from lerobot.processor.pipeline import RobotProcessor, TransitionKey
from lerobot.processor.tokenizer_processor import TokenizerProcessor
from tests.utils import require_package
def create_transition(
@@ -87,6 +88,7 @@ def mock_tokenizer():
return MockTokenizer(vocab_size=100)
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_basic_tokenization(mock_auto_tokenizer):
"""Test basic string tokenization functionality."""
@@ -117,6 +119,7 @@ def test_basic_tokenization(mock_auto_tokenizer):
assert attention_mask.shape == (10,)
@require_package("transformers")
def test_basic_tokenization_with_tokenizer_object():
"""Test basic string tokenization functionality using tokenizer object directly."""
mock_tokenizer = MockTokenizer(vocab_size=100)
@@ -144,6 +147,7 @@ def test_basic_tokenization_with_tokenizer_object():
assert attention_mask.shape == (10,)
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_list_of_strings_tokenization(mock_auto_tokenizer):
"""Test tokenization of a list of strings."""
@@ -167,6 +171,7 @@ def test_list_of_strings_tokenization(mock_auto_tokenizer):
assert attention_mask.shape == (2, 8)
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_custom_keys(mock_auto_tokenizer):
"""Test using custom task_key."""
@@ -188,6 +193,7 @@ def test_custom_keys(mock_auto_tokenizer):
assert tokens.shape == (5,)
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_none_complementary_data(mock_auto_tokenizer):
"""Test handling of None complementary_data."""
@@ -202,6 +208,7 @@ def test_none_complementary_data(mock_auto_tokenizer):
assert result == transition # Should return unchanged
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_missing_task_key(mock_auto_tokenizer):
"""Test handling when task key is missing."""
@@ -216,6 +223,7 @@ def test_missing_task_key(mock_auto_tokenizer):
assert result == transition # Should return unchanged
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_none_task_value(mock_auto_tokenizer):
"""Test handling when task value is None."""
@@ -230,6 +238,7 @@ def test_none_task_value(mock_auto_tokenizer):
assert result == transition # Should return unchanged
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_unsupported_task_type(mock_auto_tokenizer):
"""Test handling of unsupported task types."""
@@ -251,12 +260,14 @@ def test_unsupported_task_type(mock_auto_tokenizer):
assert result == transition # Should return unchanged
@require_package("transformers")
def test_no_tokenizer_error():
"""Test that ValueError is raised when neither tokenizer nor tokenizer_name is provided."""
with pytest.raises(ValueError, match="Either 'tokenizer' or 'tokenizer_name' must be provided"):
TokenizerProcessor()
@require_package("transformers")
def test_invalid_tokenizer_name_error():
"""Test that error is raised when invalid tokenizer_name is provided."""
with patch("lerobot.processor.tokenizer_processor.AutoTokenizer") as mock_auto_tokenizer:
@@ -267,6 +278,7 @@ def test_invalid_tokenizer_name_error():
TokenizerProcessor(tokenizer_name="invalid-tokenizer")
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_get_config_with_tokenizer_name(mock_auto_tokenizer):
"""Test configuration serialization when using tokenizer_name."""
@@ -295,6 +307,7 @@ def test_get_config_with_tokenizer_name(mock_auto_tokenizer):
assert config == expected
@require_package("transformers")
def test_get_config_with_tokenizer_object():
"""Test configuration serialization when using tokenizer object."""
mock_tokenizer = MockTokenizer(vocab_size=100)
@@ -322,6 +335,7 @@ def test_get_config_with_tokenizer_object():
assert "tokenizer_name" not in config
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_state_dict_methods(mock_auto_tokenizer):
"""Test state_dict and load_state_dict methods."""
@@ -338,6 +352,7 @@ def test_state_dict_methods(mock_auto_tokenizer):
processor.load_state_dict({})
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_reset_method(mock_auto_tokenizer):
"""Test reset method."""
@@ -350,6 +365,7 @@ def test_reset_method(mock_auto_tokenizer):
processor.reset()
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_integration_with_robot_processor(mock_auto_tokenizer):
"""Test integration with RobotProcessor."""
@@ -384,6 +400,7 @@ def test_integration_with_robot_processor(mock_auto_tokenizer):
assert torch.equal(result[TransitionKey.ACTION], transition[TransitionKey.ACTION])
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_save_and_load_pretrained_with_tokenizer_name(mock_auto_tokenizer):
"""Test saving and loading processor with tokenizer_name."""
@@ -412,6 +429,7 @@ def test_save_and_load_pretrained_with_tokenizer_name(mock_auto_tokenizer):
assert f"{OBS_LANGUAGE}.attention_mask" in result[TransitionKey.OBSERVATION]
@require_package("transformers")
def test_save_and_load_pretrained_with_tokenizer_object():
"""Test saving and loading processor with tokenizer object using overrides."""
mock_tokenizer = MockTokenizer(vocab_size=100)
@@ -438,6 +456,7 @@ def test_save_and_load_pretrained_with_tokenizer_object():
assert f"{OBS_LANGUAGE}.attention_mask" in result[TransitionKey.OBSERVATION]
@require_package("transformers")
def test_registry_functionality():
"""Test that the processor is properly registered."""
from lerobot.processor.pipeline import ProcessorStepRegistry
@@ -450,6 +469,7 @@ def test_registry_functionality():
assert retrieved_class is TokenizerProcessor
@require_package("transformers")
def test_feature_contract_basic():
"""Test basic feature contract functionality."""
mock_tokenizer = MockTokenizer(vocab_size=100)
@@ -480,6 +500,7 @@ def test_feature_contract_basic():
assert attention_mask_feature.shape == (128,)
@require_package("transformers")
def test_feature_contract_with_custom_max_length():
"""Test feature contract with custom max_length."""
mock_tokenizer = MockTokenizer(vocab_size=100)
@@ -499,6 +520,7 @@ def test_feature_contract_with_custom_max_length():
assert attention_mask_feature.shape == (64,)
@require_package("transformers")
def test_feature_contract_existing_features():
"""Test feature contract when tokenized features already exist."""
mock_tokenizer = MockTokenizer(vocab_size=100)
@@ -516,6 +538,7 @@ def test_feature_contract_existing_features():
assert output_features[f"{OBS_LANGUAGE}.attention_mask"].shape == (100,)
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_tokenization_parameters(mock_auto_tokenizer):
"""Test that tokenization parameters are correctly passed to tokenizer."""
@@ -559,6 +582,7 @@ def test_tokenization_parameters(mock_auto_tokenizer):
assert tracking_tokenizer.last_call_kwargs["return_tensors"] == "pt"
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_preserves_other_complementary_data(mock_auto_tokenizer):
"""Test that other complementary data fields are preserved."""
@@ -591,6 +615,7 @@ def test_preserves_other_complementary_data(mock_auto_tokenizer):
assert f"{OBS_LANGUAGE}.attention_mask" in observation
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_deterministic_tokenization(mock_auto_tokenizer):
"""Test that tokenization is deterministic for the same input."""
@@ -614,6 +639,7 @@ def test_deterministic_tokenization(mock_auto_tokenizer):
assert torch.equal(attention_mask1, attention_mask2)
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_empty_string_task(mock_auto_tokenizer):
"""Test handling of empty string task."""
@@ -633,6 +659,7 @@ def test_empty_string_task(mock_auto_tokenizer):
assert tokens.shape == (8,)
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_very_long_task(mock_auto_tokenizer):
"""Test handling of very long task strings."""
@@ -654,6 +681,7 @@ def test_very_long_task(mock_auto_tokenizer):
assert attention_mask.shape == (5,)
@require_package("transformers")
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
def test_custom_padding_side(mock_auto_tokenizer):
"""Test using custom padding_side parameter."""