mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-15 16:49:55 +00:00
test(tokenizer_processor): Add require_package decorator for transformers
- Introduced @require_package("transformers") decorator in multiple test functions to ensure the transformers package is available before running tests.
- This change enhances test reliability by preventing failures due to missing dependencies.
This commit is contained in:
@@ -12,6 +12,7 @@ from lerobot.configs.types import FeatureType, PolicyFeature
|
||||
from lerobot.constants import OBS_LANGUAGE
|
||||
from lerobot.processor.pipeline import RobotProcessor, TransitionKey
|
||||
from lerobot.processor.tokenizer_processor import TokenizerProcessor
|
||||
from tests.utils import require_package
|
||||
|
||||
|
||||
def create_transition(
|
||||
@@ -87,6 +88,7 @@ def mock_tokenizer():
|
||||
return MockTokenizer(vocab_size=100)
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_basic_tokenization(mock_auto_tokenizer):
|
||||
"""Test basic string tokenization functionality."""
|
||||
@@ -117,6 +119,7 @@ def test_basic_tokenization(mock_auto_tokenizer):
|
||||
assert attention_mask.shape == (10,)
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
def test_basic_tokenization_with_tokenizer_object():
|
||||
"""Test basic string tokenization functionality using tokenizer object directly."""
|
||||
mock_tokenizer = MockTokenizer(vocab_size=100)
|
||||
@@ -144,6 +147,7 @@ def test_basic_tokenization_with_tokenizer_object():
|
||||
assert attention_mask.shape == (10,)
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_list_of_strings_tokenization(mock_auto_tokenizer):
|
||||
"""Test tokenization of a list of strings."""
|
||||
@@ -167,6 +171,7 @@ def test_list_of_strings_tokenization(mock_auto_tokenizer):
|
||||
assert attention_mask.shape == (2, 8)
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_custom_keys(mock_auto_tokenizer):
|
||||
"""Test using custom task_key."""
|
||||
@@ -188,6 +193,7 @@ def test_custom_keys(mock_auto_tokenizer):
|
||||
assert tokens.shape == (5,)
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_none_complementary_data(mock_auto_tokenizer):
|
||||
"""Test handling of None complementary_data."""
|
||||
@@ -202,6 +208,7 @@ def test_none_complementary_data(mock_auto_tokenizer):
|
||||
assert result == transition # Should return unchanged
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_missing_task_key(mock_auto_tokenizer):
|
||||
"""Test handling when task key is missing."""
|
||||
@@ -216,6 +223,7 @@ def test_missing_task_key(mock_auto_tokenizer):
|
||||
assert result == transition # Should return unchanged
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_none_task_value(mock_auto_tokenizer):
|
||||
"""Test handling when task value is None."""
|
||||
@@ -230,6 +238,7 @@ def test_none_task_value(mock_auto_tokenizer):
|
||||
assert result == transition # Should return unchanged
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_unsupported_task_type(mock_auto_tokenizer):
|
||||
"""Test handling of unsupported task types."""
|
||||
@@ -251,12 +260,14 @@ def test_unsupported_task_type(mock_auto_tokenizer):
|
||||
assert result == transition # Should return unchanged
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
def test_no_tokenizer_error():
|
||||
"""Test that ValueError is raised when neither tokenizer nor tokenizer_name is provided."""
|
||||
with pytest.raises(ValueError, match="Either 'tokenizer' or 'tokenizer_name' must be provided"):
|
||||
TokenizerProcessor()
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
def test_invalid_tokenizer_name_error():
|
||||
"""Test that error is raised when invalid tokenizer_name is provided."""
|
||||
with patch("lerobot.processor.tokenizer_processor.AutoTokenizer") as mock_auto_tokenizer:
|
||||
@@ -267,6 +278,7 @@ def test_invalid_tokenizer_name_error():
|
||||
TokenizerProcessor(tokenizer_name="invalid-tokenizer")
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_get_config_with_tokenizer_name(mock_auto_tokenizer):
|
||||
"""Test configuration serialization when using tokenizer_name."""
|
||||
@@ -295,6 +307,7 @@ def test_get_config_with_tokenizer_name(mock_auto_tokenizer):
|
||||
assert config == expected
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
def test_get_config_with_tokenizer_object():
|
||||
"""Test configuration serialization when using tokenizer object."""
|
||||
mock_tokenizer = MockTokenizer(vocab_size=100)
|
||||
@@ -322,6 +335,7 @@ def test_get_config_with_tokenizer_object():
|
||||
assert "tokenizer_name" not in config
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_state_dict_methods(mock_auto_tokenizer):
|
||||
"""Test state_dict and load_state_dict methods."""
|
||||
@@ -338,6 +352,7 @@ def test_state_dict_methods(mock_auto_tokenizer):
|
||||
processor.load_state_dict({})
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_reset_method(mock_auto_tokenizer):
|
||||
"""Test reset method."""
|
||||
@@ -350,6 +365,7 @@ def test_reset_method(mock_auto_tokenizer):
|
||||
processor.reset()
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_integration_with_robot_processor(mock_auto_tokenizer):
|
||||
"""Test integration with RobotProcessor."""
|
||||
@@ -384,6 +400,7 @@ def test_integration_with_robot_processor(mock_auto_tokenizer):
|
||||
assert torch.equal(result[TransitionKey.ACTION], transition[TransitionKey.ACTION])
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_save_and_load_pretrained_with_tokenizer_name(mock_auto_tokenizer):
|
||||
"""Test saving and loading processor with tokenizer_name."""
|
||||
@@ -412,6 +429,7 @@ def test_save_and_load_pretrained_with_tokenizer_name(mock_auto_tokenizer):
|
||||
assert f"{OBS_LANGUAGE}.attention_mask" in result[TransitionKey.OBSERVATION]
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
def test_save_and_load_pretrained_with_tokenizer_object():
|
||||
"""Test saving and loading processor with tokenizer object using overrides."""
|
||||
mock_tokenizer = MockTokenizer(vocab_size=100)
|
||||
@@ -438,6 +456,7 @@ def test_save_and_load_pretrained_with_tokenizer_object():
|
||||
assert f"{OBS_LANGUAGE}.attention_mask" in result[TransitionKey.OBSERVATION]
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
def test_registry_functionality():
|
||||
"""Test that the processor is properly registered."""
|
||||
from lerobot.processor.pipeline import ProcessorStepRegistry
|
||||
@@ -450,6 +469,7 @@ def test_registry_functionality():
|
||||
assert retrieved_class is TokenizerProcessor
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
def test_feature_contract_basic():
|
||||
"""Test basic feature contract functionality."""
|
||||
mock_tokenizer = MockTokenizer(vocab_size=100)
|
||||
@@ -480,6 +500,7 @@ def test_feature_contract_basic():
|
||||
assert attention_mask_feature.shape == (128,)
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
def test_feature_contract_with_custom_max_length():
|
||||
"""Test feature contract with custom max_length."""
|
||||
mock_tokenizer = MockTokenizer(vocab_size=100)
|
||||
@@ -499,6 +520,7 @@ def test_feature_contract_with_custom_max_length():
|
||||
assert attention_mask_feature.shape == (64,)
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
def test_feature_contract_existing_features():
|
||||
"""Test feature contract when tokenized features already exist."""
|
||||
mock_tokenizer = MockTokenizer(vocab_size=100)
|
||||
@@ -516,6 +538,7 @@ def test_feature_contract_existing_features():
|
||||
assert output_features[f"{OBS_LANGUAGE}.attention_mask"].shape == (100,)
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_tokenization_parameters(mock_auto_tokenizer):
|
||||
"""Test that tokenization parameters are correctly passed to tokenizer."""
|
||||
@@ -559,6 +582,7 @@ def test_tokenization_parameters(mock_auto_tokenizer):
|
||||
assert tracking_tokenizer.last_call_kwargs["return_tensors"] == "pt"
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_preserves_other_complementary_data(mock_auto_tokenizer):
|
||||
"""Test that other complementary data fields are preserved."""
|
||||
@@ -591,6 +615,7 @@ def test_preserves_other_complementary_data(mock_auto_tokenizer):
|
||||
assert f"{OBS_LANGUAGE}.attention_mask" in observation
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_deterministic_tokenization(mock_auto_tokenizer):
|
||||
"""Test that tokenization is deterministic for the same input."""
|
||||
@@ -614,6 +639,7 @@ def test_deterministic_tokenization(mock_auto_tokenizer):
|
||||
assert torch.equal(attention_mask1, attention_mask2)
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_empty_string_task(mock_auto_tokenizer):
|
||||
"""Test handling of empty string task."""
|
||||
@@ -633,6 +659,7 @@ def test_empty_string_task(mock_auto_tokenizer):
|
||||
assert tokens.shape == (8,)
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_very_long_task(mock_auto_tokenizer):
|
||||
"""Test handling of very long task strings."""
|
||||
@@ -654,6 +681,7 @@ def test_very_long_task(mock_auto_tokenizer):
|
||||
assert attention_mask.shape == (5,)
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
@patch("lerobot.processor.tokenizer_processor.AutoTokenizer")
|
||||
def test_custom_padding_side(mock_auto_tokenizer):
|
||||
"""Test using custom padding_side parameter."""
|
||||
|
||||
Reference in New Issue
Block a user