tests(annotations): guard on the 'dataset' extra so base fast-test tier skips cleanly

Fast Pytest Tests failed at COLLECTION in the base '--extra test' tier
with 'ModuleNotFoundError: No module named datasets': tests/annotations/
conftest.py imported the fixture dataset builder (-> lerobot.datasets ->
the HF 'datasets' lib + pandas/pyarrow), which only ship under the
'dataset' extra, so the whole annotations package crashed.

Fix uses the repo's proven module-level guard pattern (see
tests/datasets/test_language.py), NOT a conftest-level importorskip —
verified empirically that pytest.importorskip raised during conftest
*import* is treated as a collection ERROR (exit 1), while module-level
importorskip is a clean SKIP.

  * conftest.py: import build_annotation_dataset LAZILY inside the
    fixtures so the conftest itself imports cleanly in every tier.
  * test_modules / test_validator / test_writer / test_pipeline_recipe_
    render: add module-level pytest.importorskip('datasets') +
    ('pandas') before the pyarrow / lerobot.* imports (# noqa: E402 to
    match the existing convention). pyarrow-importing modules place the
    guard before the pyarrow import.
  * tests/scripts/test_lerobot_annotate.py: same guard (its _push_to_hub
    path imports lerobot.datasets).

Result:
  - base / hardware / viz tiers (no dataset extra): annotation tests
    skip cleanly; the rest of the suite runs -> exit 0.
  - dataset tier: datasets present -> guards pass through -> annotation
    tests run with the stub VLM. The pipeline modules import only
    stdlib + relative + lerobot.datasets (no module-level datatrove /
    vllm / openai), so they import fine there.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pepijn
2026-06-03 15:56:53 +02:00
parent 870980efd6
commit b9246ef61b
6 changed files with 72 additions and 24 deletions
+11 -1
View File
@@ -26,12 +26,20 @@ from pathlib import Path
import pytest
from tests.fixtures.dataset_factories import build_annotation_dataset
# NOTE: ``build_annotation_dataset`` pulls in ``lerobot.datasets`` (-> the HF
# ``datasets`` library + ``pandas``), which only ship under the ``dataset``
# extra. It is imported LAZILY inside the fixtures below so this conftest
# imports cleanly in dependency tiers without that extra (e.g. the base
# ``--extra test`` fast-test tier). The annotation test modules guard
# themselves with a module-level ``pytest.importorskip("datasets")`` so
# their collection is skipped — never erroring — when the extra is absent.
@pytest.fixture
def fixture_dataset_root(tmp_path: Path) -> Path:
"""A tiny dataset with two episodes, 12 frames each at 10 fps."""
from tests.fixtures.dataset_factories import build_annotation_dataset
return build_annotation_dataset(
tmp_path / "ds",
episode_specs=[
@@ -44,6 +52,8 @@ def fixture_dataset_root(tmp_path: Path) -> Path:
@pytest.fixture
def single_episode_root(tmp_path: Path) -> Path:
from tests.fixtures.dataset_factories import build_annotation_dataset
return build_annotation_dataset(
tmp_path / "ds_one",
episode_specs=[(0, 30, "Pour water from the bottle into the cup.")],
+14 -6
View File
@@ -22,21 +22,29 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
from lerobot.annotations.steerable_pipeline.config import (
import pytest
# ``lerobot.annotations`` imports pull in ``lerobot.datasets`` (-> the HF
# ``datasets`` library), which only ships under the ``dataset`` extra. Skip
# this module in tiers without it instead of erroring at import.
pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])")
pytest.importorskip("pandas", reason="pandas is required (install lerobot[dataset])")
from lerobot.annotations.steerable_pipeline.config import ( # noqa: E402
InterjectionsConfig,
PlanConfig,
VqaConfig,
)
from lerobot.annotations.steerable_pipeline.modules import (
from lerobot.annotations.steerable_pipeline.modules import ( # noqa: E402
GeneralVqaModule,
InterjectionsAndSpeechModule,
PlanSubtasksMemoryModule,
)
from lerobot.annotations.steerable_pipeline.reader import iter_episodes
from lerobot.annotations.steerable_pipeline.staging import EpisodeStaging
from lerobot.annotations.steerable_pipeline.vlm_client import StubVlmClient
from lerobot.annotations.steerable_pipeline.reader import iter_episodes # noqa: E402
from lerobot.annotations.steerable_pipeline.staging import EpisodeStaging # noqa: E402
from lerobot.annotations.steerable_pipeline.vlm_client import StubVlmClient # noqa: E402
from ._helpers import make_canned_responder
from ._helpers import make_canned_responder # noqa: E402
@dataclass
@@ -19,26 +19,34 @@ from __future__ import annotations
from pathlib import Path
import pyarrow.parquet as pq
import pytest
from lerobot.annotations.steerable_pipeline.config import (
# ``pyarrow`` and the ``lerobot.datasets`` chain (-> the HF ``datasets``
# library) only ship under the ``dataset`` extra. Skip this module in
# tiers without it instead of erroring at import.
pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])")
pytest.importorskip("pandas", reason="pandas is required (install lerobot[dataset])")
import pyarrow.parquet as pq # noqa: E402
from lerobot.annotations.steerable_pipeline.config import ( # noqa: E402
AnnotationPipelineConfig,
InterjectionsConfig,
PlanConfig,
VqaConfig,
)
from lerobot.annotations.steerable_pipeline.executor import Executor
from lerobot.annotations.steerable_pipeline.modules import (
from lerobot.annotations.steerable_pipeline.executor import Executor # noqa: E402
from lerobot.annotations.steerable_pipeline.modules import ( # noqa: E402
GeneralVqaModule,
InterjectionsAndSpeechModule,
PlanSubtasksMemoryModule,
)
from lerobot.annotations.steerable_pipeline.validator import StagingValidator
from lerobot.annotations.steerable_pipeline.writer import LanguageColumnsWriter
from lerobot.configs.recipe import MessageTurn, TrainingRecipe
from lerobot.datasets.language_render import render_sample
from lerobot.annotations.steerable_pipeline.validator import StagingValidator # noqa: E402
from lerobot.annotations.steerable_pipeline.writer import LanguageColumnsWriter # noqa: E402
from lerobot.configs.recipe import MessageTurn, TrainingRecipe # noqa: E402
from lerobot.datasets.language_render import render_sample # noqa: E402
from ._helpers import make_canned_responder
from ._helpers import make_canned_responder # noqa: E402
def _build_pr1_style_blend_recipe() -> TrainingRecipe:
+12 -4
View File
@@ -20,10 +20,18 @@ from __future__ import annotations
import json
from pathlib import Path
from lerobot.annotations.steerable_pipeline.reader import iter_episodes
from lerobot.annotations.steerable_pipeline.staging import EpisodeStaging
from lerobot.annotations.steerable_pipeline.validator import StagingValidator
from lerobot.annotations.steerable_pipeline.writer import speech_atom
import pytest
# ``lerobot.annotations`` imports pull in ``lerobot.datasets`` (-> the HF
# ``datasets`` library), which only ships under the ``dataset`` extra. Skip
# this module in tiers without it instead of erroring at import.
pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])")
pytest.importorskip("pandas", reason="pandas is required (install lerobot[dataset])")
from lerobot.annotations.steerable_pipeline.reader import iter_episodes # noqa: E402
from lerobot.annotations.steerable_pipeline.staging import EpisodeStaging # noqa: E402
from lerobot.annotations.steerable_pipeline.validator import StagingValidator # noqa: E402
from lerobot.annotations.steerable_pipeline.writer import speech_atom # noqa: E402
def _validate(root: Path, staging_dir: Path):
+11 -4
View File
@@ -20,12 +20,19 @@ from __future__ import annotations
import json
from pathlib import Path
import pyarrow.parquet as pq
import pytest
from lerobot.annotations.steerable_pipeline.reader import iter_episodes
from lerobot.annotations.steerable_pipeline.staging import EpisodeStaging
from lerobot.annotations.steerable_pipeline.writer import (
# ``pyarrow`` and the ``lerobot.annotations`` -> ``lerobot.datasets`` chain
# (-> the HF ``datasets`` library) only ship under the ``dataset`` extra.
# Skip this module in tiers without it instead of erroring at import.
pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])")
pytest.importorskip("pandas", reason="pandas is required (install lerobot[dataset])")
import pyarrow.parquet as pq # noqa: E402
from lerobot.annotations.steerable_pipeline.reader import iter_episodes # noqa: E402
from lerobot.annotations.steerable_pipeline.staging import EpisodeStaging # noqa: E402
from lerobot.annotations.steerable_pipeline.writer import ( # noqa: E402
LanguageColumnsWriter,
speech_atom,
)
+7
View File
@@ -3,6 +3,13 @@
import json
from types import SimpleNamespace
import pytest
# ``lerobot.scripts.lerobot_annotate`` (and the ``_push_to_hub`` path it
# exercises) imports ``lerobot.datasets``, which only ships under the
# ``dataset`` extra. Skip in tiers without it instead of erroring.
pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])")
def test_push_to_hub_tags_uploaded_dataset_revision(tmp_path, monkeypatch):
from lerobot.scripts.lerobot_annotate import _push_to_hub