mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-16 00:59:46 +00:00
feat(processor): multiple improvements to the pipeline porting (#1749)
* [Port codebase pipeline] General fixes for RL and scripts (#1748) * Refactor dataset configuration in documentation and codebase - Updated dataset configuration keys from `dataset_root` to `root` and `num_episodes` to `num_episodes_to_record` for consistency. - Adjusted replay episode handling by renaming `episode` to `replay_episode`. - Enhanced documentation - added specific processor to transform from policy actions to delta actions * Added Robot action to tensor processor Added new processor script for dealing with gym specific action processing * removed RobotAction2Tensor processor; imrpoved choosing observations in actor * nit in delta action * added missing reset functions to kinematics * Adapt teleoperate and replay to pipeline similar to record * refactor(processors): move to inheritance (#1750) * fix(teleoperator): improvements phone implementation (#1752) * fix(teleoperator): protect shared state in phone implementation * refactor(teleop): separate classes in phone * fix: solve breaking changes (#1753) * refactor(policies): multiple improvements (#1754) * refactor(processor): simpler logic in device processor (#1755) * refactor(processor): euclidean distance in delta action processor (#1757) * refactor(processor): improvements to joint observations processor migration (#1758) * refactor(processor): improvements to tokenizer migration (#1759) * refactor(processor): improvements to tokenizer migration * fix(tests): tokenizer tests regression from #1750 * fix(processors): fix float comparison and config in hil processors (#1760) * chore(teleop): remove unnecessary callbacks in KeyboardEndEffectorTeleop (#1761) * refactor(processor): improvements normalize pipeline migration (#1756) * refactor(processor): several improvements normalize processor step * refactor(processor): more improvements normalize processor * refactor(processor): more changes to normalizer * refactor(processor): take a different approach to DRY * refactor(processor): final design * chore(record): revert comment and continue deleted (#1764) * refactor(examples): pipeline phone examples (#1769) * refactor(examples): phone teleop + teleop script * refactor(examples): phone replay + replay * chore(examples): rename phone example files & folders * feat(processor): fix improvements to the pipeline porting (#1796) * refactor(processor): enhance tensor device handling in normalization process (#1795) * refactor(tests): remove unsupported device detection test for complementary data (#1797) * chore(tests): update ToBatchProcessor test (#1798) * refactor(tests): remove in-place mutation tests for actions and complementary data in batch processor * test(tests): add tests for action and task processing in batch processor * add names for android and ios phone (#1799) * use _tensor_stats in normalize processor (#1800) * fix(normalize_processor): correct device reference for tensor epsilon handling (#1801) * add point 5 add missing feature contracts (#1806) * Fix PR comments 1452 (#1807) * use key to determine image * Address rest of PR comments * use PolicyFeatures in transform_features --------- Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com> --------- Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com> Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com>
This commit is contained in:
@@ -98,7 +98,11 @@ def test_basic_tokenization(mock_auto_tokenizer):
|
||||
|
||||
processor = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=10)
|
||||
|
||||
transition = create_transition(complementary_data={"task": "pick up the red cube"})
|
||||
transition = create_transition(
|
||||
observation={"state": torch.tensor([1.0, 2.0])},
|
||||
action=torch.tensor([0.1, 0.2]),
|
||||
complementary_data={"task": "pick up the red cube"},
|
||||
)
|
||||
|
||||
result = processor(transition)
|
||||
|
||||
@@ -126,7 +130,11 @@ def test_basic_tokenization_with_tokenizer_object():
|
||||
|
||||
processor = TokenizerProcessor(tokenizer=mock_tokenizer, max_length=10)
|
||||
|
||||
transition = create_transition(complementary_data={"task": "pick up the red cube"})
|
||||
transition = create_transition(
|
||||
observation={"state": torch.tensor([1.0, 2.0])},
|
||||
action=torch.tensor([0.1, 0.2]),
|
||||
complementary_data={"task": "pick up the red cube"},
|
||||
)
|
||||
|
||||
result = processor(transition)
|
||||
|
||||
@@ -156,7 +164,11 @@ def test_list_of_strings_tokenization(mock_auto_tokenizer):
|
||||
|
||||
processor = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=8)
|
||||
|
||||
transition = create_transition(complementary_data={"task": ["pick up cube", "place on table"]})
|
||||
transition = create_transition(
|
||||
observation={"state": torch.tensor([1.0, 2.0])},
|
||||
action=torch.tensor([0.1, 0.2]),
|
||||
complementary_data={"task": ["pick up cube", "place on table"]},
|
||||
)
|
||||
|
||||
result = processor(transition)
|
||||
|
||||
@@ -180,7 +192,11 @@ def test_custom_keys(mock_auto_tokenizer):
|
||||
|
||||
processor = TokenizerProcessor(tokenizer_name="test-tokenizer", task_key="instruction", max_length=5)
|
||||
|
||||
transition = create_transition(complementary_data={"instruction": "move forward"})
|
||||
transition = create_transition(
|
||||
observation={"state": torch.tensor([1.0, 2.0])},
|
||||
action=torch.tensor([0.1, 0.2]),
|
||||
complementary_data={"instruction": "move forward"},
|
||||
)
|
||||
|
||||
result = processor(transition)
|
||||
|
||||
@@ -421,7 +437,11 @@ def test_save_and_load_pretrained_with_tokenizer_name(mock_auto_tokenizer):
|
||||
loaded_processor = RobotProcessor.from_pretrained(temp_dir)
|
||||
|
||||
# Test that loaded processor works
|
||||
transition = create_transition(complementary_data={"instruction": "test instruction"})
|
||||
transition = create_transition(
|
||||
observation={"state": torch.tensor([1.0, 2.0])},
|
||||
action=torch.tensor([0.1, 0.2]),
|
||||
complementary_data={"instruction": "test instruction"},
|
||||
)
|
||||
|
||||
result = loaded_processor(transition)
|
||||
assert TransitionKey.OBSERVATION in result
|
||||
@@ -448,7 +468,11 @@ def test_save_and_load_pretrained_with_tokenizer_object():
|
||||
)
|
||||
|
||||
# Test that loaded processor works
|
||||
transition = create_transition(complementary_data={"instruction": "test instruction"})
|
||||
transition = create_transition(
|
||||
observation={"state": torch.tensor([1.0, 2.0])},
|
||||
action=torch.tensor([0.1, 0.2]),
|
||||
complementary_data={"instruction": "test instruction"},
|
||||
)
|
||||
|
||||
result = loaded_processor(transition)
|
||||
assert TransitionKey.OBSERVATION in result
|
||||
@@ -569,7 +593,11 @@ def test_tokenization_parameters(mock_auto_tokenizer):
|
||||
padding_side="left",
|
||||
)
|
||||
|
||||
transition = create_transition(complementary_data={"task": "test task"})
|
||||
transition = create_transition(
|
||||
observation={"state": torch.tensor([1.0, 2.0])},
|
||||
action=torch.tensor([0.1, 0.2]),
|
||||
complementary_data={"task": "test task"},
|
||||
)
|
||||
|
||||
processor(transition)
|
||||
|
||||
@@ -592,12 +620,14 @@ def test_preserves_other_complementary_data(mock_auto_tokenizer):
|
||||
processor = TokenizerProcessor(tokenizer_name="test-tokenizer")
|
||||
|
||||
transition = create_transition(
|
||||
observation={"state": torch.tensor([1.0, 2.0])},
|
||||
action=torch.tensor([0.1, 0.2]),
|
||||
complementary_data={
|
||||
"task": "test task",
|
||||
"episode_id": 123,
|
||||
"timestamp": 456.789,
|
||||
"other_field": {"nested": "data"},
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
result = processor(transition)
|
||||
@@ -624,7 +654,11 @@ def test_deterministic_tokenization(mock_auto_tokenizer):
|
||||
|
||||
processor = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=10)
|
||||
|
||||
transition = create_transition(complementary_data={"task": "consistent test"})
|
||||
transition = create_transition(
|
||||
observation={"state": torch.tensor([1.0, 2.0])},
|
||||
action=torch.tensor([0.1, 0.2]),
|
||||
complementary_data={"task": "consistent test"},
|
||||
)
|
||||
|
||||
result1 = processor(transition)
|
||||
result2 = processor(transition)
|
||||
@@ -648,7 +682,11 @@ def test_empty_string_task(mock_auto_tokenizer):
|
||||
|
||||
processor = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=8)
|
||||
|
||||
transition = create_transition(complementary_data={"task": ""})
|
||||
transition = create_transition(
|
||||
observation={"state": torch.tensor([1.0, 2.0])},
|
||||
action=torch.tensor([0.1, 0.2]),
|
||||
complementary_data={"task": ""},
|
||||
)
|
||||
|
||||
result = processor(transition)
|
||||
|
||||
@@ -669,7 +707,11 @@ def test_very_long_task(mock_auto_tokenizer):
|
||||
processor = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=5, truncation=True)
|
||||
|
||||
long_task = " ".join(["word"] * 100) # Very long task
|
||||
transition = create_transition(complementary_data={"task": long_task})
|
||||
transition = create_transition(
|
||||
observation={"state": torch.tensor([1.0, 2.0])},
|
||||
action=torch.tensor([0.1, 0.2]),
|
||||
complementary_data={"task": long_task},
|
||||
)
|
||||
|
||||
result = processor(transition)
|
||||
|
||||
@@ -714,7 +756,11 @@ def test_custom_padding_side(mock_auto_tokenizer):
|
||||
# Test left padding
|
||||
processor_left = TokenizerProcessor(tokenizer_name="test-tokenizer", max_length=10, padding_side="left")
|
||||
|
||||
transition = create_transition(complementary_data={"task": "test task"})
|
||||
transition = create_transition(
|
||||
observation={"state": torch.tensor([1.0, 2.0])},
|
||||
action=torch.tensor([0.1, 0.2]),
|
||||
complementary_data={"task": "test task"},
|
||||
)
|
||||
processor_left(transition)
|
||||
|
||||
assert tracking_tokenizer.padding_side_calls[-1] == "left"
|
||||
@@ -873,32 +919,6 @@ def test_device_detection_from_action():
|
||||
assert attention_mask.device.type == "cuda"
|
||||
|
||||
|
||||
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
|
||||
@require_package("transformers")
|
||||
def test_device_detection_from_complementary_data():
|
||||
"""Test that device is detected from tensors in complementary_data."""
|
||||
mock_tokenizer = MockTokenizer(vocab_size=100)
|
||||
processor = TokenizerProcessor(tokenizer=mock_tokenizer, max_length=10)
|
||||
|
||||
# Create transition with tensor in complementary_data
|
||||
transition = create_transition(
|
||||
observation={"metadata": {"key": "value"}}, # No tensors
|
||||
complementary_data={
|
||||
"task": "comp data test",
|
||||
"index": torch.tensor([42]).cuda(), # Tensor in complementary_data
|
||||
},
|
||||
)
|
||||
|
||||
result = processor(transition)
|
||||
|
||||
# Check that tokenized tensors match complementary_data tensor's device
|
||||
tokens = result[TransitionKey.OBSERVATION][f"{OBS_LANGUAGE}.tokens"]
|
||||
attention_mask = result[TransitionKey.OBSERVATION][f"{OBS_LANGUAGE}.attention_mask"]
|
||||
|
||||
assert tokens.device.type == "cuda"
|
||||
assert attention_mask.device.type == "cuda"
|
||||
|
||||
|
||||
@require_package("transformers")
|
||||
def test_device_detection_preserves_dtype():
|
||||
"""Test that device detection doesn't affect dtype of tokenized tensors."""
|
||||
|
||||
Reference in New Issue
Block a user