From d11ec6b5ef15d937d8281f7a72ebb4652d05216e Mon Sep 17 00:00:00 2001 From: Steven Palma Date: Fri, 24 Oct 2025 17:31:37 +0200 Subject: [PATCH 1/4] docs(readme): update installation instructions for 0.4.0 (#2310) --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 56d82c0c7..58a8ccc1b 100644 --- a/README.md +++ b/README.md @@ -185,6 +185,11 @@ _Replace `[...]` with your desired features._ For a full list of optional dependencies, see: https://pypi.org/project/lerobot/ +> [!NOTE] +> For lerobot 0.4.0, if you want to install libero or pi tags, you will have to do: `pip install "lerobot[pi,libero]@git+https://github.com/huggingface/lerobot.git"`. +> +> This will be solved in the next patch release + ### Weights & Biases To use [Weights and Biases](https://docs.wandb.ai/quickstart) for experiment tracking, log in with @@ -337,7 +342,3 @@ If you want, you can cite this work with: ## Star History [![Star History Chart](https://api.star-history.com/svg?repos=huggingface/lerobot&type=Timeline)](https://star-history.com/#huggingface/lerobot&Timeline) - -``` - -``` From d1548e1d1341564be7941396f6e71b7dc763825a Mon Sep 17 00:00:00 2001 From: Steven Palma Date: Sun, 26 Oct 2025 15:37:41 +0800 Subject: [PATCH 2/4] docs(install): imrpove groot and libero installation instructions (#2314) --- docs/source/groot.mdx | 5 ++++- docs/source/libero.mdx | 5 +++++ docs/source/pi0.mdx | 5 +++++ docs/source/pi05.mdx | 5 +++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/docs/source/groot.mdx b/docs/source/groot.mdx index 02d701576..729a64656 100644 --- a/docs/source/groot.mdx +++ b/docs/source/groot.mdx @@ -40,7 +40,7 @@ python -c "import flash_attn; print(f'Flash Attention {flash_attn.__version__} i 3. Install LeRobot by running: ```bash -pip install lerobot[groot] # consider also installing libero,dev and test tags +pip install lerobot[groot] ``` ## Usage @@ -83,6 +83,9 @@ accelerate launch \ ### Libero Benchmark Results +> [!NOTE] +> Follow our instructions for Libero usage: [Libero](./libero) + GR00T has demonstrated strong performance on the Libero benchmark suite. To compare and test its LeRobot implementation, we finetuned the GR00T N1.5 model for 30k steps on the Libero dataset and compared the results to the GR00T reference results. | Benchmark | LeRobot Implementation | GR00T Reference | diff --git a/docs/source/libero.mdx b/docs/source/libero.mdx index 14f51ef3b..f0448c3f4 100644 --- a/docs/source/libero.mdx +++ b/docs/source/libero.mdx @@ -28,6 +28,11 @@ LIBERO is now part of our **multi-eval supported simulation**, meaning you can b To Install LIBERO, after following LeRobot official instructions, just do: `pip install -e ".[libero]"` +> [!NOTE] +> For lerobot 0.4.0, if you want to install libero tag, you will have to do: `pip install "lerobot[libero]@git+https://github.com/huggingface/lerobot.git"`. +> +> This will be solved in the next patch release + ### Single-suite evaluation Evaluate a policy on one LIBERO suite: diff --git a/docs/source/pi0.mdx b/docs/source/pi0.mdx index d36fe0ce4..d15f7e91f 100644 --- a/docs/source/pi0.mdx +++ b/docs/source/pi0.mdx @@ -28,6 +28,11 @@ As described by Physical Intelligence, while AI has achieved remarkable success pip install -e ".[pi]" ``` + > [!NOTE] + > For lerobot 0.4.0, if you want to install pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`. + > + > This will be solved in the next patch release + ## Training Data and Capabilities π₀ is trained on the largest robot interaction dataset to date, combining three key data sources: diff --git a/docs/source/pi05.mdx b/docs/source/pi05.mdx index b6267fc5e..29b797935 100644 --- a/docs/source/pi05.mdx +++ b/docs/source/pi05.mdx @@ -36,6 +36,11 @@ This diverse training mixture creates a "curriculum" that enables generalization pip install -e ".[pi]" ``` + > [!NOTE] + > For lerobot 0.4.0, if you want to install pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`. + > + > This will be solved in the next patch release + ## Usage To use π₀.₅ in your LeRobot configuration, specify the policy type as: From 3f8c5d9809d07e7767f2a1366fee86e6e27640a2 Mon Sep 17 00:00:00 2001 From: Caroline Pascal Date: Tue, 28 Oct 2025 09:41:33 +0100 Subject: [PATCH 3/4] fix(video_key typo): fixing video_key typo in update_video_info (#2323) --- src/lerobot/datasets/lerobot_dataset.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index a6840891f..c8bc5049e 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -430,9 +430,7 @@ class LeRobotDatasetMetadata: video_keys = [video_key] if video_key is not None else self.video_keys for key in video_keys: if not self.features[key].get("info", None): - video_path = self.root / self.video_path.format( - video_key=video_key, chunk_index=0, file_index=0 - ) + video_path = self.root / self.video_path.format(video_key=key, chunk_index=0, file_index=0) self.info["features"][key]["info"] = get_video_info(video_path) def update_chunk_settings( From d57d1aa1970eec7b102a9527262ede4a3dd7a726 Mon Sep 17 00:00:00 2001 From: Jade Choghari Date: Fri, 31 Oct 2025 13:08:42 +0100 Subject: [PATCH 4/4] fix(make_policy): rename mapping edge cases in training (#2332) * fix bug * update fixes * add hf license * more fixes * add transformers * iterate on review * more fixes * more fixes * add a False test * reduce img size * reduce img size * skip the test * add * add style --- src/lerobot/policies/factory.py | 20 +-- src/lerobot/policies/utils.py | 41 ++++++ tests/training/test_visual_validation.py | 157 +++++++++++++++++++++++ 3 files changed, 202 insertions(+), 16 deletions(-) create mode 100644 tests/training/test_visual_validation.py diff --git a/src/lerobot/policies/factory.py b/src/lerobot/policies/factory.py index bdad5cbb3..eb6266757 100644 --- a/src/lerobot/policies/factory.py +++ b/src/lerobot/policies/factory.py @@ -38,6 +38,7 @@ from lerobot.policies.sac.configuration_sac import SACConfig from lerobot.policies.sac.reward_model.configuration_classifier import RewardClassifierConfig from lerobot.policies.smolvla.configuration_smolvla import SmolVLAConfig from lerobot.policies.tdmpc.configuration_tdmpc import TDMPCConfig +from lerobot.policies.utils import validate_visual_features_consistency from lerobot.policies.vqbet.configuration_vqbet import VQBeTConfig from lerobot.processor import PolicyAction, PolicyProcessorPipeline from lerobot.processor.converters import ( @@ -420,20 +421,7 @@ def make_policy( # policy = torch.compile(policy, mode="reduce-overhead") if not rename_map: - expected_features = set(cfg.input_features.keys()) | set(cfg.output_features.keys()) - provided_features = set(features.keys()) - if expected_features and provided_features != expected_features: - missing = expected_features - provided_features - extra = provided_features - expected_features - # TODO (jadechoghari): provide a dynamic rename map suggestion to the user. - raise ValueError( - f"Feature mismatch between dataset/environment and policy config.\n" - f"- Missing features: {sorted(missing) if missing else 'None'}\n" - f"- Extra features: {sorted(extra) if extra else 'None'}\n\n" - f"Please ensure your dataset and policy use consistent feature names.\n" - f"If your dataset uses different observation keys (e.g., cameras named differently), " - f"use the `--rename_map` argument, for example:\n" - f' --rename_map=\'{{"observation.images.left": "observation.images.camera1", ' - f'"observation.images.top": "observation.images.camera2"}}\'' - ) + validate_visual_features_consistency(cfg, features) + # TODO: (jadechoghari) - add a check_state(cfg, features) and check_action(cfg, features) + return policy diff --git a/src/lerobot/policies/utils.py b/src/lerobot/policies/utils.py index 21b39a80e..c4ca35b72 100644 --- a/src/lerobot/policies/utils.py +++ b/src/lerobot/policies/utils.py @@ -22,6 +22,8 @@ import numpy as np import torch from torch import nn +from lerobot.configs.policies import PreTrainedConfig +from lerobot.configs.types import FeatureType, PolicyFeature from lerobot.datasets.utils import build_dataset_frame from lerobot.processor import PolicyAction, RobotAction, RobotObservation from lerobot.utils.constants import ACTION, OBS_STR @@ -198,3 +200,42 @@ def make_robot_action(action_tensor: PolicyAction, ds_features: dict[str, dict]) f"{name}": float(action_tensor[i]) for i, name in enumerate(action_names) } return act_processed_policy + + +def raise_feature_mismatch_error( + provided_features: set[str], + expected_features: set[str], +) -> None: + """ + Raises a standardized ValueError for feature mismatches between dataset/environment and policy config. + """ + missing = expected_features - provided_features + extra = provided_features - expected_features + # TODO (jadechoghari): provide a dynamic rename map suggestion to the user. + raise ValueError( + f"Feature mismatch between dataset/environment and policy config.\n" + f"- Missing features: {sorted(missing) if missing else 'None'}\n" + f"- Extra features: {sorted(extra) if extra else 'None'}\n\n" + f"Please ensure your dataset and policy use consistent feature names.\n" + f"If your dataset uses different observation keys (e.g., cameras named differently), " + f"use the `--rename_map` argument, for example:\n" + f' --rename_map=\'{{"observation.images.left": "observation.images.camera1", ' + f'"observation.images.top": "observation.images.camera2"}}\'' + ) + + +def validate_visual_features_consistency( + cfg: PreTrainedConfig, + features: dict[str, PolicyFeature], +) -> None: + """ + Validates visual feature consistency between a policy config and provided dataset/environment features. + + Args: + cfg (PreTrainedConfig): The model or policy configuration containing input_features and type. + features (Dict[str, PolicyFeature]): A mapping of feature names to PolicyFeature objects. + """ + expected_visuals = {k for k, v in cfg.input_features.items() if v.type == FeatureType.VISUAL} + provided_visuals = {k for k, v in features.items() if v.type == FeatureType.VISUAL} + if not provided_visuals.issubset(expected_visuals): + raise_feature_mismatch_error(provided_visuals, expected_visuals) diff --git a/tests/training/test_visual_validation.py b/tests/training/test_visual_validation.py new file mode 100644 index 000000000..af693fe5e --- /dev/null +++ b/tests/training/test_visual_validation.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Visual Feature Consistency Tests + +This module tests the `validate_visual_features_consistency` function, +which ensures that visual features (camera observations) in a dataset/env +match the expectations defined in a policy configuration. + +The purpose of this check is to prevent mismatches between what a policy expects +(e.g., `observation.images.camera1`, `camera2`, `camera3`) and what a dataset or +environment actually provides (e.g., `observation.images.top`, `side`, or fewer cameras). +""" + +from pathlib import Path + +import numpy as np +import pytest + +from lerobot.configs.default import DatasetConfig +from lerobot.configs.policies import PreTrainedConfig +from lerobot.configs.train import TrainPipelineConfig +from lerobot.datasets.lerobot_dataset import LeRobotDataset +from lerobot.policies.factory import make_policy_config +from lerobot.scripts.lerobot_train import train +from lerobot.utils.utils import auto_select_torch_device + +pytest.importorskip("transformers") + +DUMMY_REPO_ID = "dummy/repo" + + +@pytest.fixture +def temp_dir(tmp_path): + return tmp_path + + +DUMMY_STATE_DIM = 6 +DUMMY_ACTION_DIM = 6 +IMAGE_SIZE = 8 +DEVICE = auto_select_torch_device() + + +def make_dummy_dataset(camera_keys, tmp_path): + """Creates a minimal dummy dataset for testing rename_mapping logic.""" + features = { + "action": {"dtype": "float32", "shape": (DUMMY_ACTION_DIM,), "names": None}, + "observation.state": {"dtype": "float32", "shape": (DUMMY_STATE_DIM,), "names": None}, + } + for cam in camera_keys: + features[f"observation.images.{cam}"] = { + "dtype": "image", + "shape": (IMAGE_SIZE, IMAGE_SIZE, 3), + "names": ["height", "width", "channel"], + } + dataset = LeRobotDataset.create( + repo_id=DUMMY_REPO_ID, + fps=30, + features=features, + root=tmp_path / "_dataset", + ) + root = tmp_path / "_dataset" + for ep_idx in range(2): + for _ in range(3): + frame = { + "action": np.random.randn(DUMMY_ACTION_DIM).astype(np.float32), + "observation.state": np.random.randn(DUMMY_STATE_DIM).astype(np.float32), + } + for cam in camera_keys: + frame[f"observation.images.{cam}"] = np.random.randint( + 0, 255, size=(IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.uint8 + ) + frame["task"] = f"task_{ep_idx}" + dataset.add_frame(frame) + dataset.save_episode() + + dataset.finalize() + return dataset, root + + +def custom_validate(train_config: TrainPipelineConfig, policy_path: str, empty_cameras: int): + train_config.policy = PreTrainedConfig.from_pretrained(policy_path) + train_config.policy.pretrained_path = Path(policy_path) + # override empty_cameras and push_to_hub for testing + train_config.policy.empty_cameras = empty_cameras + train_config.policy.push_to_hub = False + if train_config.use_policy_training_preset: + train_config.optimizer = train_config.policy.get_optimizer_preset() + train_config.scheduler = train_config.policy.get_scheduler_preset() + return train_config + + +@pytest.mark.skip(reason="Skipping this test as it results OOM") +@pytest.mark.parametrize( + "camera_keys, empty_cameras, rename_map, expect_success", + [ + # case 1: dataset has fewer cameras than policy (3 instead of 4), but we specify empty_cameras=1 for smolvla, pi0, pi05 + (["camera1", "camera2", "camera3"], 1, {}, True), + # case 2: dataset has 2 cameras with different names, rename_mapping provided + ( + ["top", "side"], + 0, + { + "observation.images.top": "observation.images.camera1", + "observation.images.side": "observation.images.camera2", + }, + True, + ), + # case 3: dataset has 2 cameras, policy expects 3, names do not match, no empty_cameras + (["top", "side"], 0, {}, False), + # TODO: case 4: dataset has 2 cameras, policy expects 3, no rename_map, no empty_cameras, should raise for smolvla + # (["camera1", "camera2"], 0, {}, False), + ], +) +def test_train_with_camera_mismatch(camera_keys, empty_cameras, rename_map, expect_success, tmp_path): + """Tests that training works or fails depending on camera/feature alignment.""" + + _dataset, root = make_dummy_dataset(camera_keys, tmp_path) + pretrained_path = "lerobot/smolvla_base" + dataset_config = DatasetConfig(repo_id=DUMMY_REPO_ID, root=root) + policy_config = make_policy_config( + "smolvla", + optimizer_lr=0.01, + push_to_hub=False, + pretrained_path=pretrained_path, + device=DEVICE, + ) + policy_config.empty_cameras = empty_cameras + train_config = TrainPipelineConfig( + dataset=dataset_config, + policy=policy_config, + rename_map=rename_map, + output_dir=tmp_path / "_output", + steps=1, + ) + train_config = custom_validate(train_config, policy_path=pretrained_path, empty_cameras=empty_cameras) + # HACK: disable the internal CLI validation step for tests, we did it with custom_validate + train_config.validate = lambda: None + if expect_success: + train(train_config) + else: + with pytest.raises(ValueError): + train(train_config)