From 8a778c04ff8c33c74f5605479010872319e9a030 Mon Sep 17 00:00:00 2001 From: Pepijn Kooijmans Date: Tue, 7 Apr 2026 13:38:37 +0200 Subject: [PATCH] docs: update adding_benchmarks for async env changes - Replace add_envs_task reference with env.call("task_description") - Update use_async_envs default to True - Add note about lazy GPU init for AsyncVectorEnv compatibility Made-with: Cursor --- docs/source/adding_benchmarks.mdx | 96 ++++++++++++++++--------------- 1 file changed, 49 insertions(+), 47 deletions(-) diff --git a/docs/source/adding_benchmarks.mdx b/docs/source/adding_benchmarks.mdx index db599bb3c..77ccd3d4a 100644 --- a/docs/source/adding_benchmarks.mdx +++ b/docs/source/adding_benchmarks.mdx @@ -26,7 +26,7 @@ During evaluation, data moves through four stages: 1. gym.Env ──→ raw observations (numpy dicts) 2. Preprocessing ──→ standard LeRobot keys + task description - (preprocess_observation, add_envs_task in envs/utils.py) + (preprocess_observation in envs/utils.py, env.call("task_description")) 3. Processors ──→ env-specific then policy-specific transforms (env_preprocessor, policy_preprocessor) @@ -115,23 +115,22 @@ Each `EnvConfig` subclass declares two dicts that tell the policy what to expect ## Step by step - At minimum, you need three files: a **gym.Env wrapper**, an **EnvConfig - subclass**, and a **factory dispatch branch**. Everything else is optional or - documentation. + At minimum, you need two files: a **gym.Env wrapper** and an **EnvConfig + subclass** with a `create_envs()` override. Everything else is optional or + documentation. No changes to `factory.py` are needed. ### Checklist -| File | Required | Why | -| ---------------------------------------- | -------- | ----------------------------------------- | -| `src/lerobot/envs/.py` | Yes | Wraps the simulator as a standard gym.Env | -| `src/lerobot/envs/configs.py` | Yes | Registers your benchmark for the CLI | -| `src/lerobot/envs/factory.py` | Yes | Tells `make_env()` how to build your envs | -| `src/lerobot/processor/env_processor.py` | Optional | Custom observation/action transforms | -| `src/lerobot/envs/utils.py` | Optional | Only if you need new raw observation keys | -| `pyproject.toml` | Yes | Declares benchmark-specific dependencies | -| `docs/source/.mdx` | Yes | User-facing documentation page | -| `docs/source/_toctree.yml` | Yes | Adds your page to the docs sidebar | +| File | Required | Why | +| ---------------------------------------- | -------- | ------------------------------------------------------------ | +| `src/lerobot/envs/.py` | Yes | Wraps the simulator as a standard gym.Env | +| `src/lerobot/envs/configs.py` | Yes | Registers your benchmark and its `create_envs()` for the CLI | +| `src/lerobot/processor/env_processor.py` | Optional | Custom observation/action transforms | +| `src/lerobot/envs/utils.py` | Optional | Only if you need new raw observation keys | +| `pyproject.toml` | Yes | Declares benchmark-specific dependencies | +| `docs/source/.mdx` | Yes | User-facing documentation page | +| `docs/source/_toctree.yml` | Yes | Adds your page to the docs sidebar | ### 1. The gym.Env wrapper (`src/lerobot/envs/.py`) @@ -162,6 +161,8 @@ class MyBenchmarkEnv(gym.Env): ... ``` +**GPU-based simulators (e.g. MuJoCo with EGL rendering):** If your simulator allocates GPU/EGL contexts during `__init__`, defer that allocation to a `_ensure_env()` helper called on first `reset()`/`step()`. This avoids inheriting stale GPU handles when `AsyncVectorEnv` spawns worker processes. See `LiberoEnv._ensure_env()` for the pattern. + Also provide a factory function that returns the nested dict structure: ```python @@ -179,12 +180,15 @@ See `create_libero_envs()` (multi-suite, multi-task) and `create_metaworld_envs( ### 2. The config (`src/lerobot/envs/configs.py`) -Register a config dataclass so users can select your benchmark with `--env.type=`: +Register a config dataclass so users can select your benchmark with `--env.type=`. Each config owns its environment creation and processor logic via two methods: + +- **`create_envs(n_envs, use_async_envs)`** — Returns `{suite: {task_id: VectorEnv}}`. The base class default uses `gym.make()` for single-task envs. Multi-task benchmarks override this. +- **`get_env_processors()`** — Returns `(preprocessor, postprocessor)`. The base class default returns identity (no-op) pipelines. Override if your benchmark needs observation/action transforms. ```python @EnvConfig.register_subclass("") @dataclass -class MyBenchmarkEnv(EnvConfig): +class MyBenchmarkEnvConfig(EnvConfig): task: str = "" fps: int = obs_type: str = "pixels_agent_pos" @@ -204,6 +208,20 @@ class MyBenchmarkEnv(EnvConfig): @property def gym_kwargs(self) -> dict: return {"obs_type": self.obs_type, "render_mode": self.render_mode} + + def create_envs(self, n_envs: int, use_async_envs: bool = True): + """Override for multi-task benchmarks or custom env creation.""" + from lerobot.envs. import create__envs + return create__envs(task=self.task, n_envs=n_envs, ...) + + def get_env_processors(self): + """Override if your benchmark needs observation/action transforms.""" + from lerobot.processor.pipeline import PolicyProcessorPipeline + from lerobot.processor.env_processor import MyBenchmarkProcessorStep + return ( + PolicyProcessorPipeline(steps=[MyBenchmarkProcessorStep()]), + PolicyProcessorPipeline(steps=[]), + ) ``` Key points: @@ -211,36 +229,11 @@ Key points: - The `register_subclass` name is what users pass on the CLI (`--env.type=`). - `features` tells the policy what the environment produces. - `features_map` maps raw observation keys to LeRobot convention keys. +- **No changes to `factory.py` needed** — the factory delegates to `cfg.create_envs()` and `cfg.get_env_processors()` automatically. -### 3. The factory dispatch (`src/lerobot/envs/factory.py`) +### 3. Env processor (optional — `src/lerobot/processor/env_processor.py`) -Add a branch in `make_env()` to call your factory function: - -```python -elif "" in cfg.type: - from lerobot.envs. import create__envs - - if cfg.task is None: - raise ValueError(" requires a task to be specified") - - return create__envs( - task=cfg.task, - n_envs=n_envs, - gym_kwargs=cfg.gym_kwargs, - env_cls=env_cls, - ) -``` - -If your benchmark needs an env processor, add it in `make_env_pre_post_processors()`: - -```python -if isinstance(env_cfg, MyBenchmarkEnv) or "" in env_cfg.type: - preprocessor_steps.append(MyBenchmarkProcessorStep()) -``` - -### 4. Env processor (optional — `src/lerobot/processor/env_processor.py`) - -Only needed if your benchmark requires observation transforms beyond what `preprocess_observation()` handles (e.g. image flipping, coordinate conversion): +Only needed if your benchmark requires observation transforms beyond what `preprocess_observation()` handles (e.g. image flipping, coordinate conversion). Define the processor step here and return it from `get_env_processors()` in your config (see step 2): ```python @dataclass @@ -260,7 +253,7 @@ class MyBenchmarkProcessorStep(ObservationProcessorStep): See `LiberoProcessorStep` for a full example (image rotation, quaternion-to-axis-angle conversion). -### 5. Dependencies (`pyproject.toml`) +### 4. Dependencies (`pyproject.toml`) Add a new optional-dependency group: @@ -281,11 +274,11 @@ Users install with: pip install -e ".[mybenchmark]" ``` -### 6. Documentation (`docs/source/.mdx`) +### 5. Documentation (`docs/source/.mdx`) Write a user-facing page following the template in the next section. See `docs/source/libero.mdx` and `docs/source/metaworld.mdx` for full examples. -### 7. Table of contents (`docs/source/_toctree.yml`) +### 6. Table of contents (`docs/source/_toctree.yml`) Add your benchmark to the "Benchmarks" section: @@ -302,6 +295,15 @@ Add your benchmark to the "Benchmarks" section: title: "Benchmarks" ``` +## Verifying your integration + +After completing the steps above, confirm that everything works: + +1. **Install** — `pip install -e ".[mybenchmark]"` and verify the dependency group installs cleanly. +2. **Smoke test env creation** — call `make_env()` with your config in Python, check that the returned dict has the expected `{suite: {task_id: VectorEnv}}` shape, and that `reset()` returns observations with the right keys. +3. **Run a full eval** — `lerobot-eval --env.type= --env.task= --eval.n_episodes=1 --eval.batch_size=1 --policy.path=` to exercise the full pipeline end-to-end. +4. **Check success detection** — verify that `info["is_success"]` flips to `True` when the task is actually completed. This is what the eval loop uses to compute success rates. + ## Writing a benchmark doc page Each benchmark `.mdx` page should include: