fix(language): keep base install green — drop processor re-export, gate dataset-extra tests

`lerobot.processor` re-exported `RenderMessagesStep` at the package
level, so importing anything from `lerobot.processor` pulled in
`lerobot.datasets.language` → `lerobot.datasets/__init__.py` →
`require_package("datasets")`, which fails in the Tier 1 base install
that intentionally omits the `[dataset]` extra. The chain bricked
collection for unrelated suites (`tests/policies/pi0_pi05/...`,
`tests/envs/...`, etc.).

* Stop re-exporting `RenderMessagesStep` from `lerobot.processor`. The
  only consumer (the test) already imports from the submodule.
  Document the deliberate omission in the module docstring.
* Add `pytest.importorskip("datasets", ...)` (and `pandas` where
  needed) at the top of the four PR-added tests that exercise the
  language stack:
  - tests/datasets/test_language.py
  - tests/datasets/test_language_render.py
  - tests/processor/test_render_messages_processor.py
  - tests/utils/test_collate.py

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pepijn
2026-05-06 14:12:54 +02:00
parent 789f29aa56
commit 24d2ffe3c6
5 changed files with 43 additions and 18 deletions
+7 -2
View File
@@ -93,9 +93,15 @@ from .relative_action_processor import (
to_relative_actions, to_relative_actions,
) )
from .rename_processor import RenameObservationsProcessorStep, rename_stats from .rename_processor import RenameObservationsProcessorStep, rename_stats
from .render_messages_processor import RenderMessagesStep
from .tokenizer_processor import ActionTokenizerProcessorStep, TokenizerProcessorStep from .tokenizer_processor import ActionTokenizerProcessorStep, TokenizerProcessorStep
# RenderMessagesStep is intentionally NOT re-exported here: it pulls in
# `lerobot.datasets.language`, which requires the `[dataset]` extra
# (`datasets`, `pyarrow`). Importing it from the processor package would
# break every base-install consumer of `lerobot.processor`. Users that
# need it import directly:
# from lerobot.processor.render_messages_processor import RenderMessagesStep
__all__ = [ __all__ = [
"ActionProcessorStep", "ActionProcessorStep",
"AddTeleopActionAsComplimentaryDataStep", "AddTeleopActionAsComplimentaryDataStep",
@@ -129,7 +135,6 @@ __all__ = [
"make_default_robot_observation_processor", "make_default_robot_observation_processor",
"AbsoluteActionsProcessorStep", "AbsoluteActionsProcessorStep",
"RelativeActionsProcessorStep", "RelativeActionsProcessorStep",
"RenderMessagesStep",
"MapDeltaActionToRobotActionStep", "MapDeltaActionToRobotActionStep",
"MapTensorToDeltaActionDictStep", "MapTensorToDeltaActionDictStep",
"NewLineTaskProcessorStep", "NewLineTaskProcessorStep",
+11 -7
View File
@@ -1,13 +1,17 @@
#!/usr/bin/env python #!/usr/bin/env python
import numpy as np
import pandas as pd
import pyarrow as pa
import pytest import pytest
from lerobot.datasets import LeRobotDataset pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])")
from lerobot.datasets.io_utils import write_info pytest.importorskip("pandas", reason="pandas is required (install lerobot[dataset])")
from lerobot.datasets.language import (
import numpy as np # noqa: E402
import pandas as pd # noqa: E402
import pyarrow as pa # noqa: E402
from lerobot.datasets import LeRobotDataset # noqa: E402
from lerobot.datasets.io_utils import write_info # noqa: E402
from lerobot.datasets.language import ( # noqa: E402
EVENT_ONLY_STYLES, EVENT_ONLY_STYLES,
LANGUAGE_EVENTS, LANGUAGE_EVENTS,
LANGUAGE_PERSISTENT, LANGUAGE_PERSISTENT,
@@ -21,7 +25,7 @@ from lerobot.datasets.language import (
language_persistent_arrow_type, language_persistent_arrow_type,
validate_camera_field, validate_camera_field,
) )
from lerobot.datasets.utils import DEFAULT_DATA_PATH from lerobot.datasets.utils import DEFAULT_DATA_PATH # noqa: E402
def test_language_arrow_schema_has_expected_fields(): def test_language_arrow_schema_has_expected_fields():
+10 -2
View File
@@ -2,8 +2,16 @@
import pytest import pytest
from lerobot.configs.recipe import MessageTurn, TrainingRecipe pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])")
from lerobot.datasets.language_render import active_at, emitted_at, nth_next, nth_prev, render_sample
from lerobot.configs.recipe import MessageTurn, TrainingRecipe # noqa: E402
from lerobot.datasets.language_render import ( # noqa: E402
active_at,
emitted_at,
nth_next,
nth_prev,
render_sample,
)
def persistent_row(role, content, style, timestamp, tool_calls=None, camera=None): def persistent_row(role, content, style, timestamp, tool_calls=None, camera=None):
@@ -1,11 +1,15 @@
#!/usr/bin/env python #!/usr/bin/env python
import torch import pytest
from lerobot.configs.recipe import MessageTurn, TrainingRecipe pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])")
from lerobot.processor.converters import create_transition
from lerobot.processor.render_messages_processor import RenderMessagesStep import torch # noqa: E402
from lerobot.types import TransitionKey
from lerobot.configs.recipe import MessageTurn, TrainingRecipe # noqa: E402
from lerobot.processor.converters import create_transition # noqa: E402
from lerobot.processor.render_messages_processor import RenderMessagesStep # noqa: E402
from lerobot.types import TransitionKey # noqa: E402
def test_render_messages_step_noops_without_language_columns(): def test_render_messages_step_noops_without_language_columns():
+6 -2
View File
@@ -1,8 +1,12 @@
#!/usr/bin/env python #!/usr/bin/env python
import torch import pytest
from lerobot.utils.collate import lerobot_collate_fn pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])")
import torch # noqa: E402
from lerobot.utils.collate import lerobot_collate_fn # noqa: E402
def test_lerobot_collate_preserves_messages_and_drops_raw_language(): def test_lerobot_collate_preserves_messages_and_drops_raw_language():