diff --git a/tests/processor/test_tokenizer_processor.py b/tests/processor/test_tokenizer_processor.py index b0c235b68..452c36da9 100644 --- a/tests/processor/test_tokenizer_processor.py +++ b/tests/processor/test_tokenizer_processor.py @@ -12,6 +12,7 @@ from lerobot.configs.types import FeatureType, PolicyFeature from lerobot.constants import OBS_LANGUAGE from lerobot.processor.pipeline import RobotProcessor, TransitionKey from lerobot.processor.tokenizer_processor import TokenizerProcessor +from tests.utils import require_package def create_transition( @@ -87,6 +88,7 @@ def mock_tokenizer(): return MockTokenizer(vocab_size=100) +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_basic_tokenization(mock_auto_tokenizer): """Test basic string tokenization functionality.""" @@ -117,6 +119,7 @@ def test_basic_tokenization(mock_auto_tokenizer): assert attention_mask.shape == (10,) +@require_package("transformers") def test_basic_tokenization_with_tokenizer_object(): """Test basic string tokenization functionality using tokenizer object directly.""" mock_tokenizer = MockTokenizer(vocab_size=100) @@ -144,6 +147,7 @@ def test_basic_tokenization_with_tokenizer_object(): assert attention_mask.shape == (10,) +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_list_of_strings_tokenization(mock_auto_tokenizer): """Test tokenization of a list of strings.""" @@ -167,6 +171,7 @@ def test_list_of_strings_tokenization(mock_auto_tokenizer): assert attention_mask.shape == (2, 8) +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_custom_keys(mock_auto_tokenizer): """Test using custom task_key.""" @@ -188,6 +193,7 @@ def test_custom_keys(mock_auto_tokenizer): assert tokens.shape == (5,) +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_none_complementary_data(mock_auto_tokenizer): """Test handling of None complementary_data.""" @@ -202,6 +208,7 @@ def test_none_complementary_data(mock_auto_tokenizer): assert result == transition # Should return unchanged +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_missing_task_key(mock_auto_tokenizer): """Test handling when task key is missing.""" @@ -216,6 +223,7 @@ def test_missing_task_key(mock_auto_tokenizer): assert result == transition # Should return unchanged +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_none_task_value(mock_auto_tokenizer): """Test handling when task value is None.""" @@ -230,6 +238,7 @@ def test_none_task_value(mock_auto_tokenizer): assert result == transition # Should return unchanged +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_unsupported_task_type(mock_auto_tokenizer): """Test handling of unsupported task types.""" @@ -251,12 +260,14 @@ def test_unsupported_task_type(mock_auto_tokenizer): assert result == transition # Should return unchanged +@require_package("transformers") def test_no_tokenizer_error(): """Test that ValueError is raised when neither tokenizer nor tokenizer_name is provided.""" with pytest.raises(ValueError, match="Either 'tokenizer' or 'tokenizer_name' must be provided"): TokenizerProcessor() +@require_package("transformers") def test_invalid_tokenizer_name_error(): """Test that error is raised when invalid tokenizer_name is provided.""" with patch("lerobot.processor.tokenizer_processor.AutoTokenizer") as mock_auto_tokenizer: @@ -267,6 +278,7 @@ def test_invalid_tokenizer_name_error(): TokenizerProcessor(tokenizer_name="invalid-tokenizer") +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_get_config_with_tokenizer_name(mock_auto_tokenizer): """Test configuration serialization when using tokenizer_name.""" @@ -295,6 +307,7 @@ def test_get_config_with_tokenizer_name(mock_auto_tokenizer): assert config == expected +@require_package("transformers") def test_get_config_with_tokenizer_object(): """Test configuration serialization when using tokenizer object.""" mock_tokenizer = MockTokenizer(vocab_size=100) @@ -322,6 +335,7 @@ def test_get_config_with_tokenizer_object(): assert "tokenizer_name" not in config +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_state_dict_methods(mock_auto_tokenizer): """Test state_dict and load_state_dict methods.""" @@ -338,6 +352,7 @@ def test_state_dict_methods(mock_auto_tokenizer): processor.load_state_dict({}) +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_reset_method(mock_auto_tokenizer): """Test reset method.""" @@ -350,6 +365,7 @@ def test_reset_method(mock_auto_tokenizer): processor.reset() +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_integration_with_robot_processor(mock_auto_tokenizer): """Test integration with RobotProcessor.""" @@ -384,6 +400,7 @@ def test_integration_with_robot_processor(mock_auto_tokenizer): assert torch.equal(result[TransitionKey.ACTION], transition[TransitionKey.ACTION]) +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_save_and_load_pretrained_with_tokenizer_name(mock_auto_tokenizer): """Test saving and loading processor with tokenizer_name.""" @@ -412,6 +429,7 @@ def test_save_and_load_pretrained_with_tokenizer_name(mock_auto_tokenizer): assert f"{OBS_LANGUAGE}.attention_mask" in result[TransitionKey.OBSERVATION] +@require_package("transformers") def test_save_and_load_pretrained_with_tokenizer_object(): """Test saving and loading processor with tokenizer object using overrides.""" mock_tokenizer = MockTokenizer(vocab_size=100) @@ -438,6 +456,7 @@ def test_save_and_load_pretrained_with_tokenizer_object(): assert f"{OBS_LANGUAGE}.attention_mask" in result[TransitionKey.OBSERVATION] +@require_package("transformers") def test_registry_functionality(): """Test that the processor is properly registered.""" from lerobot.processor.pipeline import ProcessorStepRegistry @@ -450,6 +469,7 @@ def test_registry_functionality(): assert retrieved_class is TokenizerProcessor +@require_package("transformers") def test_feature_contract_basic(): """Test basic feature contract functionality.""" mock_tokenizer = MockTokenizer(vocab_size=100) @@ -480,6 +500,7 @@ def test_feature_contract_basic(): assert attention_mask_feature.shape == (128,) +@require_package("transformers") def test_feature_contract_with_custom_max_length(): """Test feature contract with custom max_length.""" mock_tokenizer = MockTokenizer(vocab_size=100) @@ -499,6 +520,7 @@ def test_feature_contract_with_custom_max_length(): assert attention_mask_feature.shape == (64,) +@require_package("transformers") def test_feature_contract_existing_features(): """Test feature contract when tokenized features already exist.""" mock_tokenizer = MockTokenizer(vocab_size=100) @@ -516,6 +538,7 @@ def test_feature_contract_existing_features(): assert output_features[f"{OBS_LANGUAGE}.attention_mask"].shape == (100,) +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_tokenization_parameters(mock_auto_tokenizer): """Test that tokenization parameters are correctly passed to tokenizer.""" @@ -559,6 +582,7 @@ def test_tokenization_parameters(mock_auto_tokenizer): assert tracking_tokenizer.last_call_kwargs["return_tensors"] == "pt" +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_preserves_other_complementary_data(mock_auto_tokenizer): """Test that other complementary data fields are preserved.""" @@ -591,6 +615,7 @@ def test_preserves_other_complementary_data(mock_auto_tokenizer): assert f"{OBS_LANGUAGE}.attention_mask" in observation +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_deterministic_tokenization(mock_auto_tokenizer): """Test that tokenization is deterministic for the same input.""" @@ -614,6 +639,7 @@ def test_deterministic_tokenization(mock_auto_tokenizer): assert torch.equal(attention_mask1, attention_mask2) +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_empty_string_task(mock_auto_tokenizer): """Test handling of empty string task.""" @@ -633,6 +659,7 @@ def test_empty_string_task(mock_auto_tokenizer): assert tokens.shape == (8,) +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_very_long_task(mock_auto_tokenizer): """Test handling of very long task strings.""" @@ -654,6 +681,7 @@ def test_very_long_task(mock_auto_tokenizer): assert attention_mask.shape == (5,) +@require_package("transformers") @patch("lerobot.processor.tokenizer_processor.AutoTokenizer") def test_custom_padding_side(mock_auto_tokenizer): """Test using custom padding_side parameter."""