From bd6e27f9a1f4454a7e885fcbb8ef1ffa7cc5c0ea Mon Sep 17 00:00:00 2001 From: Pepijn Date: Wed, 8 Apr 2026 14:39:39 +0200 Subject: [PATCH] chore: restore adding_benchmarks + test_dispatch, drop env_processor changes - Restore docs/source/adding_benchmarks.mdx (belongs in this PR) - Restore tests/envs/test_dispatch.py (belongs in this PR) - Revert docs/source/env_processor.mdx to main (out of scope for this PR) Co-Authored-By: Claude Sonnet 4.6 --- docs/source/adding_benchmarks.mdx | 397 ++++++++++++++++++++++++++++++ docs/source/env_processor.mdx | 89 ++++--- tests/envs/test_dispatch.py | 143 +++++++++++ 3 files changed, 592 insertions(+), 37 deletions(-) create mode 100644 docs/source/adding_benchmarks.mdx create mode 100644 tests/envs/test_dispatch.py diff --git a/docs/source/adding_benchmarks.mdx b/docs/source/adding_benchmarks.mdx new file mode 100644 index 000000000..1b1df41b7 --- /dev/null +++ b/docs/source/adding_benchmarks.mdx @@ -0,0 +1,397 @@ +# Adding a New Benchmark + +This guide walks you through adding a new simulation benchmark to LeRobot. Follow the steps in order and use the existing benchmarks as templates. + +A benchmark in LeRobot is a set of [Gymnasium](https://gymnasium.farama.org/) environments that wrap a third-party simulator (like LIBERO or Meta-World) behind a standard `gym.Env` interface. The `lerobot-eval` CLI then runs evaluation uniformly across all benchmarks. + +## Existing benchmarks at a glance + +Before diving in, here is what is already integrated: + +| Benchmark | Env file | Config class | Tasks | Action dim | Processor | +| -------------- | ------------------- | ------------------ | ------------------- | ------------ | ---------------------------- | +| LIBERO | `envs/libero.py` | `LiberoEnv` | 130 across 5 suites | 7 | `LiberoProcessorStep` | +| Meta-World | `envs/metaworld.py` | `MetaworldEnv` | 50 (MT50) | 4 | None | +| IsaacLab Arena | Hub-hosted | `IsaaclabArenaEnv` | Configurable | Configurable | `IsaaclabArenaProcessorStep` | + +Use `src/lerobot/envs/libero.py` and `src/lerobot/envs/metaworld.py` as reference implementations. + +## How it all fits together + +### Data flow + +During evaluation, data moves through four stages: + +``` +1. gym.Env ──→ raw observations (numpy dicts) + +2. Preprocessing ──→ standard LeRobot keys + task description + (preprocess_observation in envs/utils.py, env.call("task_description")) + +3. Processors ──→ env-specific then policy-specific transforms + (env_preprocessor, policy_preprocessor) + +4. Policy ──→ select_action() ──→ action tensor + then reverse: policy_postprocessor → env_postprocessor → numpy action → env.step() +``` + +Most benchmarks only need to care about stage 1 (producing observations in the right format) and optionally stage 3 (if env-specific transforms are needed). + +### Environment structure + +`make_env()` returns a nested dict of vectorized environments: + +```python +dict[str, dict[int, gym.vector.VectorEnv]] +# ^suite ^task_id +``` + +A single-task env (e.g. PushT) looks like `{"pusht": {0: vec_env}}`. +A multi-task benchmark (e.g. LIBERO) looks like `{"libero_spatial": {0: vec0, 1: vec1, ...}, ...}`. + +### How evaluation runs + +All benchmarks are evaluated the same way by `lerobot-eval`: + +1. `make_env()` builds the nested `{suite: {task_id: VectorEnv}}` dict. +2. `eval_policy_all()` iterates over every suite and task. +3. For each task, it runs `n_episodes` rollouts via `rollout()`. +4. Results are aggregated hierarchically: episode, task, suite, overall. +5. Metrics include `pc_success` (success rate), `avg_sum_reward`, and `avg_max_reward`. + +The critical piece: your env must return `info["is_success"]` on every `step()` call. This is how the eval loop knows whether a task was completed. + +## What your environment must provide + +LeRobot does not enforce a strict observation schema. Instead it relies on a set of conventions that all benchmarks follow. + +### Env attributes + +Your `gym.Env` must set these attributes: + +| Attribute | Type | Why | +| -------------------- | ----- | ---------------------------------------------------- | +| `_max_episode_steps` | `int` | `rollout()` uses this to cap episode length | +| `task_description` | `str` | Passed to VLA policies as a language instruction | +| `task` | `str` | Fallback identifier if `task_description` is not set | + +### Success reporting + +Your `step()` and `reset()` must include `"is_success"` in the `info` dict: + +```python +info = {"is_success": True} # or False +return observation, reward, terminated, truncated, info +``` + +### Observations + +The simplest approach is to map your simulator's outputs to the standard keys that `preprocess_observation()` already understands. Do this inside your `gym.Env` (e.g. in a `_format_raw_obs()` helper): + +| Your env should output | LeRobot maps it to | What it is | +| ------------------------- | -------------------------- | ------------------------------------- | +| `"pixels"` (single array) | `observation.image` | Single camera image, HWC uint8 | +| `"pixels"` (dict) | `observation.images.` | Multiple cameras, each HWC uint8 | +| `"agent_pos"` | `observation.state` | Proprioceptive state vector | +| `"environment_state"` | `observation.env_state` | Full environment state (e.g. PushT) | +| `"robot_state"` | `observation.robot_state` | Nested robot state dict (e.g. LIBERO) | + +If your simulator uses different key names, you have two options: + +1. **Recommended:** Rename them to the standard keys inside your `gym.Env` wrapper. +2. **Alternative:** Write an env processor to transform observations after `preprocess_observation()` runs (see step 4 below). + +### Actions + +Actions are continuous numpy arrays in a `gym.spaces.Box`. The dimensionality depends on your benchmark (7 for LIBERO, 4 for Meta-World, etc.). Policies adapt to different action dimensions through their `input_features` / `output_features` config. + +### Feature declaration + +Each `EnvConfig` subclass declares two dicts that tell the policy what to expect: + +- `features` — maps feature names to `PolicyFeature(type, shape)` (e.g. action dim, image shape). +- `features_map` — maps raw observation keys to LeRobot convention keys (e.g. `"agent_pos"` to `"observation.state"`). + +## Step by step + + + At minimum, you need two files: a **gym.Env wrapper** and an **EnvConfig + subclass** with a `create_envs()` override. Everything else is optional or + documentation. No changes to `factory.py` are needed. + + +### Checklist + +| File | Required | Why | +| ----------------------------------------- | -------- | ------------------------------------------------------------ | +| `src/lerobot/envs/.py` | Yes | Wraps the simulator as a standard gym.Env | +| `src/lerobot/envs/configs.py` | Yes | Registers your benchmark and its `create_envs()` for the CLI | +| `src/lerobot/processor/env_processor.py` | Optional | Custom observation/action transforms | +| `src/lerobot/envs/utils.py` | Optional | Only if you need new raw observation keys | +| `pyproject.toml` | Yes | Declares benchmark-specific dependencies | +| `docs/source/.mdx` | Yes | User-facing documentation page | +| `docs/source/_toctree.yml` | Yes | Adds your page to the docs sidebar | +| `docker/Dockerfile.benchmark.` | Yes | Isolated Docker image for CI smoke tests | +| `.github/workflows/benchmark_tests.yml` | Yes | CI job that builds the image and runs a 1-episode smoke eval | + +### 1. The gym.Env wrapper (`src/lerobot/envs/.py`) + +Create a `gym.Env` subclass that wraps the third-party simulator: + +```python +class MyBenchmarkEnv(gym.Env): + metadata = {"render_modes": ["rgb_array"], "render_fps": } + + def __init__(self, task_suite, task_id, ...): + super().__init__() + self.task = + self.task_description = + self._max_episode_steps = + self.observation_space = spaces.Dict({...}) + self.action_space = spaces.Box(low=..., high=..., shape=(...,), dtype=np.float32) + + def reset(self, seed=None, **kwargs): + ... # return (observation, info) — info must contain {"is_success": False} + + def step(self, action: np.ndarray): + ... # return (obs, reward, terminated, truncated, info) — info must contain {"is_success": } + + def render(self): + ... # return RGB image as numpy array + + def close(self): + ... +``` + +**GPU-based simulators (e.g. MuJoCo with EGL rendering):** If your simulator allocates GPU/EGL contexts during `__init__`, defer that allocation to a `_ensure_env()` helper called on first `reset()`/`step()`. This avoids inheriting stale GPU handles when `AsyncVectorEnv` spawns worker processes. See `LiberoEnv._ensure_env()` for the pattern. + +Also provide a factory function that returns the nested dict structure: + +```python +def create_mybenchmark_envs( + task: str, + n_envs: int, + gym_kwargs: dict | None = None, + env_cls: type | None = None, +) -> dict[str, dict[int, Any]]: + """Create {suite_name: {task_id: VectorEnv}} for MyBenchmark.""" + ... +``` + +See `create_libero_envs()` (multi-suite, multi-task) and `create_metaworld_envs()` (difficulty-grouped tasks) for reference. + +### 2. The config (`src/lerobot/envs/configs.py`) + +Register a config dataclass so users can select your benchmark with `--env.type=`. Each config owns its environment creation and processor logic via two methods: + +- **`create_envs(n_envs, use_async_envs)`** — Returns `{suite: {task_id: VectorEnv}}`. The base class default uses `gym.make()` for single-task envs. Multi-task benchmarks override this. +- **`get_env_processors()`** — Returns `(preprocessor, postprocessor)`. The base class default returns identity (no-op) pipelines. Override if your benchmark needs observation/action transforms. + +```python +@EnvConfig.register_subclass("") +@dataclass +class MyBenchmarkEnvConfig(EnvConfig): + task: str = "" + fps: int = + obs_type: str = "pixels_agent_pos" + + features: dict[str, PolicyFeature] = field(default_factory=lambda: { + ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(,)), + }) + features_map: dict[str, str] = field(default_factory=lambda: { + ACTION: ACTION, + "agent_pos": OBS_STATE, + "pixels": OBS_IMAGE, + }) + + def __post_init__(self): + ... # populate features based on obs_type + + @property + def gym_kwargs(self) -> dict: + return {"obs_type": self.obs_type, "render_mode": self.render_mode} + + def create_envs(self, n_envs: int, use_async_envs: bool = True): + """Override for multi-task benchmarks or custom env creation.""" + from lerobot.envs. import create__envs + return create__envs(task=self.task, n_envs=n_envs, ...) + + def get_env_processors(self): + """Override if your benchmark needs observation/action transforms.""" + from lerobot.processor.pipeline import PolicyProcessorPipeline + from lerobot.processor.env_processor import MyBenchmarkProcessorStep + return ( + PolicyProcessorPipeline(steps=[MyBenchmarkProcessorStep()]), + PolicyProcessorPipeline(steps=[]), + ) +``` + +Key points: + +- The `register_subclass` name is what users pass on the CLI (`--env.type=`). +- `features` tells the policy what the environment produces. +- `features_map` maps raw observation keys to LeRobot convention keys. +- **No changes to `factory.py` needed** — the factory delegates to `cfg.create_envs()` and `cfg.get_env_processors()` automatically. + +### 3. Env processor (optional — `src/lerobot/processor/env_processor.py`) + +Only needed if your benchmark requires observation transforms beyond what `preprocess_observation()` handles (e.g. image flipping, coordinate conversion). Define the processor step here and return it from `get_env_processors()` in your config (see step 2): + +```python +@dataclass +@ProcessorStepRegistry.register(name="_processor") +class MyBenchmarkProcessorStep(ObservationProcessorStep): + def _process_observation(self, observation): + processed = observation.copy() + # your transforms here + return processed + + def transform_features(self, features): + return features # update if shapes change + + def observation(self, observation): + return self._process_observation(observation) +``` + +See `LiberoProcessorStep` for a full example (image rotation, quaternion-to-axis-angle conversion). + +### 4. Dependencies (`pyproject.toml`) + +Add a new optional-dependency group: + +```toml +mybenchmark = ["my-benchmark-pkg==1.2.3", "lerobot[scipy-dep]"] +``` + +Pinning rules: + +- **Always pin** benchmark packages to exact versions for reproducibility (e.g. `metaworld==3.0.0`). +- **Add platform markers** when needed (e.g. `; sys_platform == 'linux'`). +- **Pin fragile transitive deps** if known (e.g. `gymnasium==1.1.0` for Meta-World). +- **Document constraints** in your benchmark doc page. + +Users install with: + +```bash +pip install -e ".[mybenchmark]" +``` + +### 5. Documentation (`docs/source/.mdx`) + +Write a user-facing page following the template in the next section. See `docs/source/libero.mdx` and `docs/source/metaworld.mdx` for full examples. + +### 6. Table of contents (`docs/source/_toctree.yml`) + +Add your benchmark to the "Benchmarks" section: + +```yaml +- sections: + - local: libero + title: LIBERO + - local: metaworld + title: Meta-World + - local: envhub_isaaclab_arena + title: NVIDIA IsaacLab Arena Environments + - local: + title: + title: "Benchmarks" +``` + +### 7. CI smoke test (`docker/` + `.github/workflows/benchmark_tests.yml`) + +Each benchmark must have an isolated Docker image and a CI job that runs a 1-episode eval. This catches install-time regressions (broken transitive deps, import errors, interactive prompts) before they reach users. + +**Create `docker/Dockerfile.benchmark.`** — copy an existing one and change only the extra name: + +```dockerfile +# Isolated benchmark image — installs lerobot[] only. +# Build: docker build -f docker/Dockerfile.benchmark. -t lerobot-benchmark- . +ARG CUDA_VERSION=12.4.1 +ARG OS_VERSION=22.04 +FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION} +ARG PYTHON_VERSION=3.12 +# ... (same system deps as Dockerfile.benchmark.libero) ... +RUN uv sync --locked --extra --no-cache +``` + +Each benchmark gets its own image so its dependency tree (pinned simulator packages, specific mujoco/scipy versions) cannot conflict with other benchmarks. + +**Add a job to `.github/workflows/benchmark_tests.yml`** — copy an existing job block and adjust: + +```yaml +-integration-test: + name: — build image + 1-episode eval + runs-on: + group: aws-g6-4xlarge-plus + env: + HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + lfs: true + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses] + with: + cache-binary: false + - name: Build image + uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses] + with: + context: . + file: docker/Dockerfile.benchmark. + push: false + load: true + tags: lerobot-benchmark-:ci + cache-from: type=local,src=/tmp/.buildx-cache- + cache-to: type=local,dest=/tmp/.buildx-cache-,mode=max + - name: Run smoke eval (1 episode) + run: | + docker run --rm --gpus all \ + --shm-size=4g \ + -e HF_HOME=/tmp/hf \ + -e HF_USER_TOKEN="${HF_USER_TOKEN}" \ + lerobot-benchmark-:ci \ + bash -c " + hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true + lerobot-eval \ + --policy.path= \ + --env.type= \ + --env.task= \ + --eval.batch_size=1 \ + --eval.n_episodes=1 \ + --eval.use_async_envs=false \ + --policy.device=cuda + " +``` + +**Tips:** + +- If the benchmark library prompts for user input on import (like LIBERO asking for a dataset folder), pass the relevant env var in the `docker run` command (e.g. `-e LIBERO_DATA_FOLDER=/tmp/libero_data`). +- The job is scoped to only trigger on changes to `src/lerobot/envs/**`, `src/lerobot/scripts/lerobot_eval.py`, and the Dockerfiles — it won't run on unrelated PRs. + +## Verifying your integration + +After completing the steps above, confirm that everything works: + +1. **Install** — `pip install -e ".[mybenchmark]"` and verify the dependency group installs cleanly. +2. **Smoke test env creation** — call `make_env()` with your config in Python, check that the returned dict has the expected `{suite: {task_id: VectorEnv}}` shape, and that `reset()` returns observations with the right keys. +3. **Run a full eval** — `lerobot-eval --env.type= --env.task= --eval.n_episodes=1 --policy.path=` to exercise the full pipeline end-to-end. (`batch_size` defaults to auto-tuning based on CPU cores; pass `--eval.batch_size=1` to force a single environment.) +4. **Check success detection** — verify that `info["is_success"]` flips to `True` when the task is actually completed. This is what the eval loop uses to compute success rates. +5. **Add CI smoke test** — follow step 7 above to add a Dockerfile and CI job. This ensures the install stays green as dependencies evolve. + +## Writing a benchmark doc page + +Each benchmark `.mdx` page should include: + +- **Title and description** — 1-2 paragraphs on what the benchmark tests and why it matters. +- **Links** — paper, GitHub repo, project website (if available). +- **Overview image or GIF.** +- **Available tasks** — table of task suites with counts and brief descriptions. +- **Installation** — `pip install -e ".[]"` plus any extra steps (env vars, system packages). +- **Evaluation** — recommended `lerobot-eval` command with `n_episodes` for reproducible results. `batch_size` defaults to auto; only specify it if needed. Include single-task and multi-task examples if applicable. See the [Evaluation guide](evaluation) for details. +- **Policy inputs and outputs** — observation keys with shapes, action space description. +- **Recommended evaluation episodes** — how many episodes per task is standard. +- **Training** — example `lerobot-train` command. +- **Reproducing published results** — link to pretrained model, eval command, results table (if available). + +See `docs/source/libero.mdx` and `docs/source/metaworld.mdx` for complete examples. diff --git a/docs/source/env_processor.mdx b/docs/source/env_processor.mdx index c273591e2..8dbf315c7 100644 --- a/docs/source/env_processor.mdx +++ b/docs/source/env_processor.mdx @@ -25,28 +25,31 @@ raw_observation = env.step(action) # 2. Convert numpy to torch, normalize images [0,1] observation = preprocess_observation(raw_observation) -# 3. ENVIRONMENT-SPECIFIC preprocessing (NEW!) +# 3. Add task metadata (for multi-task environments) +observation = add_envs_task(env, observation) + +# 4. ENVIRONMENT-SPECIFIC preprocessing (NEW!) # - Flatten robot states # - Rotate images to match dataset conventions # - Handle environment-specific coordinate systems observation = env_preprocessor(observation) -# 4. POLICY-SPECIFIC preprocessing +# 5. POLICY-SPECIFIC preprocessing # - Normalize with dataset statistics # - Add batch dimensions # - Move to GPU # - Tokenize language instructions observation = preprocessor(observation) -# 5. Policy inference +# 6. Policy inference action = policy.select_action(observation) -# 6. POLICY-SPECIFIC postprocessing +# 7. POLICY-SPECIFIC postprocessing # - Unnormalize actions # - Remove batch dimensions action = postprocessor(action) -# 7. ENVIRONMENT-SPECIFIC postprocessing (NEW!) +# 8. ENVIRONMENT-SPECIFIC postprocessing (NEW!) # - Convert action formats if needed # - Apply environment-specific constraints action_transition = {"action": action} @@ -148,7 +151,7 @@ observation = { ### Factory Function -The `make_env_pre_post_processors` function delegates to `env_cfg.get_env_processors()`: +The `make_env_pre_post_processors` function follows the same pattern as `make_pre_post_processors` for policies: ```python from lerobot.envs.factory import make_env_pre_post_processors @@ -156,30 +159,46 @@ from lerobot.envs.configs import LiberoEnv, PushtEnv # For LIBERO: Returns LiberoProcessorStep in preprocessor libero_cfg = LiberoEnv(task="libero_spatial", camera_name=["agentview"]) -env_preprocessor, env_postprocessor = make_env_pre_post_processors(libero_cfg, policy_cfg) +env_preprocessor, env_postprocessor = make_env_pre_post_processors(libero_cfg) # For other environments: Returns identity processors (no-op) pusht_cfg = PushtEnv() -env_preprocessor, env_postprocessor = make_env_pre_post_processors(pusht_cfg, policy_cfg) +env_preprocessor, env_postprocessor = make_env_pre_post_processors(pusht_cfg) ``` -### How It Works - -Each `EnvConfig` subclass can override `get_env_processors()` to return benchmark-specific -processor pipelines. The base class returns identity (no-op) processors by default. +### Implementation in `envs/factory.py` ```python -# In your EnvConfig subclass: -def get_env_processors(self): - from lerobot.processor.pipeline import PolicyProcessorPipeline - return ( - PolicyProcessorPipeline(steps=[MyProcessorStep()]), - PolicyProcessorPipeline(steps=[]), - ) -``` +def make_env_pre_post_processors( + env_cfg: EnvConfig, +) -> tuple[ + PolicyProcessorPipeline[dict[str, Any], dict[str, Any]], + PolicyProcessorPipeline[dict[str, Any], dict[str, Any]], +]: + """ + Create preprocessor and postprocessor pipelines for environment observations. -The factory function `make_env_pre_post_processors` simply delegates to this method, -with a special case for `XVLAConfig` policies which override the env processors entirely. + Args: + env_cfg: The configuration of the environment. + + Returns: + A tuple containing: + - preprocessor: Pipeline that processes environment observations + - postprocessor: Pipeline that processes environment outputs + """ + # For LIBERO environments, add the LiberoProcessorStep to preprocessor + if isinstance(env_cfg, LiberoEnv) or "libero" in env_cfg.type: + preprocessor = PolicyProcessorPipeline(steps=[LiberoProcessorStep()]) + else: + # For all other environments, return an identity preprocessor + preprocessor = PolicyProcessorPipeline(steps=[]) + + # Postprocessor is currently identity for all environments + # Future: Could add environment-specific action transformations + postprocessor = PolicyProcessorPipeline(steps=[]) + + return preprocessor, postprocessor +``` ### Integration in Evaluation @@ -200,10 +219,7 @@ def eval_main(cfg: EvalPipelineConfig): ) # Create environment processors (NEW!) - env_preprocessor, env_postprocessor = make_env_pre_post_processors( - env_cfg=cfg.env, - policy_cfg=cfg.policy, - ) + env_preprocessor, env_postprocessor = make_env_pre_post_processors(env_cfg=cfg.env) # Run evaluation with both processor types eval_policy_all( @@ -310,19 +326,18 @@ class MyEnvProcessorStep(ObservationProcessorStep): ### 2. Update the Factory ```python -# In src/lerobot/envs/configs.py -@EnvConfig.register_subclass("myenv") -@dataclass -class MyEnvConfig(EnvConfig): - # ... task/features/gym kwargs ... +# In src/lerobot/envs/factory.py - def get_env_processors(self): - from lerobot.processor.pipeline import PolicyProcessorPipeline +def make_env_pre_post_processors(env_cfg: EnvConfig): + if isinstance(env_cfg, LiberoEnv) or "libero" in env_cfg.type: + preprocessor = PolicyProcessorPipeline(steps=[LiberoProcessorStep()]) + elif isinstance(env_cfg, MyEnvConfig) or "myenv" in env_cfg.type: + preprocessor = PolicyProcessorPipeline(steps=[MyEnvProcessorStep()]) + else: + preprocessor = PolicyProcessorPipeline(steps=[]) - return ( - PolicyProcessorPipeline(steps=[MyEnvProcessorStep()]), - PolicyProcessorPipeline(steps=[]), - ) + postprocessor = PolicyProcessorPipeline(steps=[]) + return preprocessor, postprocessor ``` ### 3. Use in Evaluation diff --git a/tests/envs/test_dispatch.py b/tests/envs/test_dispatch.py new file mode 100644 index 000000000..5bd2827f3 --- /dev/null +++ b/tests/envs/test_dispatch.py @@ -0,0 +1,143 @@ +"""Tests for the benchmark dispatch refactor (create_envs / get_env_processors on EnvConfig).""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field + +import gymnasium as gym +import pytest +from gymnasium.envs.registration import register, registry as gym_registry + +from lerobot.configs.types import PolicyFeature +from lerobot.envs.configs import EnvConfig +from lerobot.envs.factory import make_env, make_env_config, make_env_pre_post_processors + +logger = logging.getLogger(__name__) + + +def test_registry_all_types(): + """make_env_config should resolve every registered EnvConfig subclass via the registry.""" + known = list(EnvConfig.get_known_choices().keys()) + assert len(known) >= 6 + for t in known: + cfg = make_env_config(t) + if not isinstance(cfg, EnvConfig): + continue + assert cfg.type == t + + +def test_unknown_type(): + with pytest.raises(ValueError, match="not registered"): + make_env_config("nonexistent") + + +def test_identity_processors(): + """Base class get_env_processors() returns identity pipelines.""" + cfg = make_env_config("aloha") + pre, post = cfg.get_env_processors() + assert len(pre.steps) == 0 and len(post.steps) == 0 + + +def test_delegation(): + """make_env() should call cfg.create_envs(), not use if/elif dispatch.""" + sentinel = {"delegated": {0: "marker"}} + fake = type( + "Fake", + (), + { + "hub_path": None, + "create_envs": lambda self, n_envs, use_async_envs=False: sentinel, + }, + )() + result = make_env(fake, n_envs=1) + assert result is sentinel + + +def test_processors_delegation(): + """make_env_pre_post_processors delegates to cfg.get_env_processors().""" + cfg = make_env_config("aloha") + pre, post = make_env_pre_post_processors(cfg, policy_cfg=None) + assert len(pre.steps) == 0 + + +def test_base_create_envs(): + """Base class create_envs() should build a single-task VectorEnv via gym.make().""" + gym_id = "_dispatch_test/CartPole-v99" + if gym_id not in gym_registry: + register(id=gym_id, entry_point="gymnasium.envs.classic_control:CartPoleEnv") + + @EnvConfig.register_subclass("_dispatch_base_test") + @dataclass + class _Env(EnvConfig): + task: str = "CartPole-v99" + fps: int = 10 + features: dict[str, PolicyFeature] = field(default_factory=dict) + + @property + def package_name(self): + return "_dispatch_test" + + @property + def gym_id(self): + return gym_id + + @property + def gym_kwargs(self): + return {} + + try: + envs = _Env().create_envs(n_envs=2) + assert "_dispatch_base_test" in envs + env = envs["_dispatch_base_test"][0] + assert isinstance(env, gym.vector.VectorEnv) + assert env.num_envs == 2 + env.close() + finally: + if gym_id in gym_registry: + del gym_registry[gym_id] + + +def test_custom_create_envs_override(): + """A custom EnvConfig subclass can override create_envs().""" + mock_vec = gym.vector.SyncVectorEnv([lambda: gym.make("CartPole-v1")]) + + @EnvConfig.register_subclass("_dispatch_custom_test") + @dataclass + class _Env(EnvConfig): + task: str = "x" + features: dict[str, PolicyFeature] = field(default_factory=dict) + + @property + def gym_kwargs(self): + return {} + + def create_envs(self, n_envs, use_async_envs=False): + return {"custom_suite": {0: mock_vec}} + + try: + result = make_env(_Env(), n_envs=1) + assert "custom_suite" in result + finally: + mock_vec.close() + + +def test_custom_get_env_processors_override(): + """A custom EnvConfig subclass can override get_env_processors().""" + from lerobot.processor.pipeline import DataProcessorPipeline + + @EnvConfig.register_subclass("_dispatch_proc_test") + @dataclass + class _Env(EnvConfig): + task: str = "x" + features: dict[str, PolicyFeature] = field(default_factory=dict) + + @property + def gym_kwargs(self): + return {} + + def get_env_processors(self): + return DataProcessorPipeline(steps=[]), DataProcessorPipeline(steps=[]) + + pre, post = _Env().get_env_processors() + assert isinstance(pre, DataProcessorPipeline)