diff --git a/.github/workflows/benchmark_tests.yml b/.github/workflows/benchmark_tests.yml
index b07c8f8da..b82c59a8b 100644
--- a/.github/workflows/benchmark_tests.yml
+++ b/.github/workflows/benchmark_tests.yml
@@ -382,6 +382,7 @@ jobs:
                 --policy.path=\"\$ROBOTWIN_POLICY\" \
                 --env.type=robotwin \
                 --env.task=\"\$ROBOTWIN_TASKS\" \
+                --env.max_parallel_tasks=5 \
                 --eval.batch_size=1 \
                 --eval.n_episodes=1 \
                 --eval.use_async_envs=false \
@@ -482,6 +483,7 @@ jobs:
                 --policy.path=lerobot/smolvla_robocasa \
                 --env.type=robocasa \
                 --env.task=CloseFridge,OpenCabinet,OpenDrawer,TurnOnMicrowave,TurnOffStove,CloseToasterOvenDoor,SlideDishwasherRack,TurnOnSinkFaucet,NavigateKitchen,TurnOnElectricKettle \
+                --env.max_parallel_tasks=5 \
                 --eval.batch_size=1 \
                 --eval.n_episodes=1 \
                 --eval.use_async_envs=false \
@@ -693,6 +695,7 @@ jobs:
                 --env.task=\"\$ROBOMME_TASKS\" \
                 --env.dataset_split=test \
                 --env.task_ids=[0] \
+                --env.max_parallel_tasks=5 \
                 --eval.batch_size=1 \
                 --eval.n_episodes=1 \
                 --eval.use_async_envs=false \
@@ -800,6 +803,7 @@ jobs:
                 --env.type=libero_plus \
                 --env.task=\"\$LIBERO_PLUS_SUITE\" \
                 --env.task_ids=\"\$LIBERO_PLUS_TASK_IDS\" \
+                --env.max_parallel_tasks=5 \
                 --eval.batch_size=1 \
                 --eval.n_episodes=1 \
                 --eval.use_async_envs=false \
@@ -900,6 +904,8 @@ jobs:
                 --policy.path=lerobot/smolvla_vlabench \
                 --env.type=vlabench \
                 --env.task=select_fruit,select_toy,select_book,select_painting,select_drink,select_ingredient,select_billiards,select_poker,add_condiment,insert_flower \
+                --env.episode_length=50 \
+                --env.max_parallel_tasks=5 \
                 --eval.batch_size=1 \
                 --eval.n_episodes=1 \
                 --eval.use_async_envs=false \
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index 4dc119b5e..e55410cdf 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -24,14 +24,14 @@ on:
 
 env:
   CLOSE_ISSUE_MESSAGE: >
-    This issue was closed because it has been stalled for 14 days with no activity.
+    This issue was closed because it has been stalled for 30 days with no activity.
     Feel free to reopen if is still relevant, or to ping a collaborator if you have any questions.
   CLOSE_PR_MESSAGE: >
-    This PR was closed because it has been stalled for 21 days with no activity.
+    This PR was closed because it has been stalled for 30 days with no activity.
     Feel free to reopen if is still relevant, or to ping a collaborator if you have any questions.
   WARN_ISSUE_MESSAGE: >
     This issue has been automatically marked as stale because it has not had
-    recent activity (6 months). It will be closed if no further activity occurs.
+    recent activity (1 year). It will be closed if no further activity occurs.
     Any change, comment or update to this issue will reset this count.
     Thank you for your contributions.
   WARN_PR_MESSAGE: >
@@ -59,10 +59,10 @@ jobs:
           stale-pr-label: stale
           exempt-issue-labels: never-stale
           exempt-pr-labels: never-stale
-          days-before-issue-stale: 180
-          days-before-issue-close: 14
+          days-before-issue-stale: 365
+          days-before-issue-close: 30
           days-before-pr-stale: 365
-          days-before-pr-close: 21
+          days-before-pr-close: 30
           delete-branch: true
           close-issue-message: ${{ env.CLOSE_ISSUE_MESSAGE }}
           close-pr-message: ${{ env.CLOSE_PR_MESSAGE }}
diff --git a/docker/Dockerfile.benchmark.robotwin b/docker/Dockerfile.benchmark.robotwin
index 57ee21f4b..8f27c26dd 100644
--- a/docker/Dockerfile.benchmark.robotwin
+++ b/docker/Dockerfile.benchmark.robotwin
@@ -35,7 +35,7 @@ USER root
 ARG ROBOTWIN_SHA=0aeea2d669c0f8516f4d5785f0aa33ba812c14b4
 RUN apt-get update \
     && apt-get install -y --no-install-recommends \
-         cuda-nvcc-12-4 cuda-cudart-dev-12-4 \
+         cuda-nvcc-12-6 cuda-cudart-dev-12-6 \
          libvulkan1 vulkan-tools \
     && mkdir -p /usr/share/vulkan/icd.d \
     && echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libGLX_nvidia.so.0","api_version":"1.3.0"}}' \
diff --git a/docker/Dockerfile.internal b/docker/Dockerfile.internal
index 6e4550933..c48e61bbb 100644
--- a/docker/Dockerfile.internal
+++ b/docker/Dockerfile.internal
@@ -18,9 +18,8 @@
 # docker build -f docker/Dockerfile.internal -t lerobot-internal .
 
 # Configure the base image for CI with GPU access
-# TODO(Steven): Bump these versions
-ARG CUDA_VERSION=12.4.1
-ARG OS_VERSION=22.04
+ARG CUDA_VERSION=12.6.3
+ARG OS_VERSION=24.04
 FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION}
 
 # Define Python version argument
@@ -36,16 +35,13 @@ ENV DEBIAN_FRONTEND=noninteractive \
 
 # Install Python, system dependencies, and uv (as root)
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    software-properties-common build-essential git curl \
-    libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \
+    build-essential git curl \
+    libglib2.0-0 libgl1 libegl1 ffmpeg \
     libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev \
     cmake pkg-config ninja-build \
-    && add-apt-repository -y ppa:deadsnakes/ppa \
-    && apt-get update \
-    && apt-get install -y --no-install-recommends \
-       python${PYTHON_VERSION} \
-       python${PYTHON_VERSION}-venv \
-       python${PYTHON_VERSION}-dev \
+    python${PYTHON_VERSION} \
+    python${PYTHON_VERSION}-venv \
+    python${PYTHON_VERSION}-dev \
     && curl -LsSf https://astral.sh/uv/install.sh | sh \
     && mv /root/.local/bin/uv /usr/local/bin/uv \
     && useradd --create-home --shell /bin/bash user_lerobot \
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index 01e8bfb76..40cec863f 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -47,6 +47,8 @@
     title: π₀-FAST (Pi0Fast)
   - local: pi05
     title: π₀.₅ (Pi05)
+  - local: eo1
+    title: EO-1
   - local: groot
     title: NVIDIA GR00T N1.5
   - local: xvla
diff --git a/docs/source/eo1.mdx b/docs/source/eo1.mdx
new file mode 100644
index 000000000..4865483f6
--- /dev/null
+++ b/docs/source/eo1.mdx
@@ -0,0 +1,168 @@
+# EO-1
+
+EO-1 is a **Vision-Language-Action policy for robot control**. The LeRobot implementation integrates EO-1 with the standard LeRobot training, evaluation, processor interface.
+
+## Model Overview
+
+EO-1 uses a Qwen2.5-VL backbone for vision-language understanding and adds a continuous flow-matching action head for robot control. The policy formats each robot-control sample as a multimodal conversation: camera images are passed to Qwen2.5-VL, the robot state is represented with EO-1 state tokens, and the future action chunk is represented with EO-1 action tokens.
+
+<img
+  src="https://huggingface.co/datasets/HaomingSong/lerobot-documentation-images/resolve/main/lerobot/eo_pipeline.png"
+  alt="An overview of EO-1"
+  width="85%"
+/>
+
+During training, EO-1 learns to denoise continuous action chunks at the action-token positions. During inference, it samples an action chunk, returns continuous actions, and executes `n_action_steps` from the chunk before sampling again.
+
+### What the LeRobot Integration Covers
+
+- Standard `policy.type=eo1` configuration through LeRobot
+- Qwen2.5-VL image and text preprocessing through policy processors
+- Continuous flow-matching action prediction
+- Checkpoint save/load through LeRobot policy APIs
+- Training with `lerobot-train` and evaluation with `lerobot-eval`
+
+The broader EO-1 project also includes interleaved vision-text-action pretraining and multimodal reasoning workflows. This page focuses on the LeRobot robot-control policy path.
+
+## Installation Requirements
+
+1. Install LeRobot by following the [Installation Guide](./installation).
+2. Install EO-1 dependencies by running:
+
+   ```bash
+   pip install -e ".[eo1]"
+   ```
+
+3. If you want to train or evaluate on LIBERO, install the LIBERO dependencies too:
+
+   ```bash
+   pip install -e ".[eo1,libero]"
+   ```
+
+EO-1 can use the standard PyTorch scaled-dot-product attention backend through `policy.attn_implementation=sdpa`. If your environment has a compatible `flash_attn` installation, you can request `policy.attn_implementation=flash_attention_2`.
+
+## Data Requirements
+
+EO-1 expects a LeRobot dataset with:
+
+- At least one visual observation, for example `observation.images.image`
+- `observation.state`
+- `action`
+- A language task instruction through the dataset `task` field
+
+If your dataset uses different observation names, use `rename_map` to align them with the names expected by your training or evaluation setup.
+
+## Usage
+
+To use EO-1 in a LeRobot configuration, specify the policy type as:
+
+```python
+policy.type=eo1
+```
+
+By default, a new EO-1 policy initializes its backbone from:
+
+```python
+policy.vlm_base=Qwen/Qwen2.5-VL-3B-Instruct
+```
+
+Once a LeRobot-format EO-1 checkpoint is available, load it with:
+
+```python
+policy.path=your-org/your-eo1-checkpoint
+```
+
+## Training
+
+### Training Command Example
+
+```bash
+lerobot-train \
+  --dataset.repo_id=your_org/your_dataset \
+  --policy.type=eo1 \
+  --policy.vlm_base=Qwen/Qwen2.5-VL-3B-Instruct \
+  --policy.dtype=bfloat16 \
+  --policy.attn_implementation=sdpa \
+  --policy.gradient_checkpointing=false \
+  --output_dir=./outputs/eo1_training \
+  --job_name=eo1_training \
+  --steps=300000 \
+  --batch_size=16 \
+  --policy.device=cuda
+```
+
+### Key Training Parameters
+
+| Parameter                              | Default                       | Description                                                             |
+| -------------------------------------- | ----------------------------- | ----------------------------------------------------------------------- |
+| `policy.vlm_base`                      | `Qwen/Qwen2.5-VL-3B-Instruct` | Qwen2.5-VL checkpoint used to initialize a new policy                   |
+| `policy.dtype`                         | `auto`                        | Backbone dtype request: `auto`, `bfloat16`, or `float32`                |
+| `policy.attn_implementation`           | `None`                        | Optional Qwen attention backend, such as `sdpa`                         |
+| `policy.gradient_checkpointing`        | `false`                       | Reduces memory usage during training                                    |
+| `policy.chunk_size`                    | `8`                           | Number of future actions predicted per chunk                            |
+| `policy.n_action_steps`                | `8`                           | Number of actions consumed from a sampled chunk                         |
+| `policy.num_denoise_steps`             | `10`                          | Number of flow-matching denoising steps used during sampling            |
+| `policy.max_state_dim`                 | `32`                          | State padding dimension                                                 |
+| `policy.max_action_dim`                | `32`                          | Action padding dimension                                                |
+| `policy.force_fp32_autocast`           | `true`                        | Keeps the flow head in fp32 even when the backbone uses mixed precision |
+| `policy.supervise_padding_action_dims` | `true`                        | Controls whether padded action dimensions are supervised                |
+| `policy.supervise_padding_actions`     | `true`                        | Controls whether padded future action rows are supervised               |
+
+## Evaluation
+
+EO-1 can be evaluated through `lerobot-eval` once you have a LeRobot-format checkpoint:
+
+```bash
+lerobot-eval \
+  --policy.path=your-org/your-eo1-checkpoint \
+  --env.type=libero \
+  --env.task=libero_object \
+  --eval.batch_size=1 \
+  --eval.n_episodes=20
+```
+
+For datasets or environments whose camera names differ from the checkpoint configuration, pass a `rename_map`:
+
+```bash
+lerobot-eval \
+  --policy.path=your-org/your-eo1-checkpoint \
+  --env.type=libero \
+  --env.task=libero_object \
+  --rename_map='{"observation.images.image2":"observation.images.wrist_image"}'
+```
+
+## Configuration Notes
+
+### Image Processing
+
+EO-1 uses the Qwen2.5-VL processor. The `policy.image_min_pixels` and `policy.image_max_pixels` settings control the image resizing bounds before the visual tokens are passed into the backbone.
+
+### State and Action Dimensions
+
+The policy pads state and action vectors to `policy.max_state_dim` and `policy.max_action_dim` before the EO-1 flow head. Predictions are cropped back to the original action dimension before being returned by the policy.
+
+### Attention Backend
+
+Use `policy.attn_implementation=sdpa` for a portable setup. Use `flash_attention_2` only when `flash_attn` is installed and compatible with your environment.
+
+## References
+
+- [EO-1 project](https://github.com/EO-Robotics/EO1)
+- [EO-1 paper](https://arxiv.org/abs/2508.21112)
+- [Qwen2.5-VL-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct)
+
+## Citation
+
+```bibtex
+@article{eo1,
+  title={EO-1: Interleaved Vision-Text-Action Pretraining for General Robot Control},
+  author={Delin Qu and Haoming Song and Qizhi Chen and Zhaoqing Chen and Xianqiang Gao and Xinyi Ye and Qi Lv and Modi Shi and Guanghui Ren and Cheng Ruan and Maoqing Yao and Haoran Yang and Jiacheng Bao and Bin Zhao and Dong Wang},
+  journal={arXiv preprint},
+  year={2025},
+  url={https://arxiv.org/abs/2508.21112}
+}
+```
+
+## License
+
+This LeRobot integration follows the **Apache 2.0 License** used by LeRobot. Check the upstream EO-1 model and dataset pages for the licenses of released EO-1 checkpoints and data.
diff --git a/pyproject.toml b/pyproject.toml
index f38236426..6f1e2558c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -128,7 +128,7 @@ dataset_viz = ["lerobot[dataset]", "lerobot[viz]"]
 av-dep = ["av>=15.0.0,<16.0.0"]
 pygame-dep = ["pygame>=2.5.1,<2.7.0"]
 placo-dep = ["placo>=0.9.6,<0.9.17"]
-transformers-dep = ["transformers==5.3.0"] # TODO(Steven): https://github.com/huggingface/lerobot/pull/3249
+transformers-dep = ["transformers>=5.4.0,<5.6.0"]
 grpcio-dep = ["grpcio==1.73.1", "protobuf>=6.31.1,<6.32.0"]
 can-dep = ["python-can>=4.2.0,<5.0.0"]
 peft-dep = ["peft>=0.18.0,<1.0.0"]
@@ -194,6 +194,7 @@ groot = [
 ]
 sarm = ["lerobot[transformers-dep]", "pydantic>=2.0.0,<3.0.0", "faker>=33.0.0,<35.0.0", "lerobot[matplotlib-dep]", "lerobot[qwen-vl-utils-dep]"]
 xvla = ["lerobot[transformers-dep]"]
+eo1 = ["lerobot[transformers-dep]", "lerobot[qwen-vl-utils-dep]"]
 hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpcio-dep]", "lerobot[placo-dep]"]
 
 # Features
diff --git a/src/lerobot/policies/__init__.py b/src/lerobot/policies/__init__.py
index bbddd855f..3a6b8e5d2 100644
--- a/src/lerobot/policies/__init__.py
+++ b/src/lerobot/policies/__init__.py
@@ -16,6 +16,7 @@ from lerobot.utils.action_interpolator import ActionInterpolator as ActionInterp
 
 from .act.configuration_act import ACTConfig as ACTConfig
 from .diffusion.configuration_diffusion import DiffusionConfig as DiffusionConfig
+from .eo1.configuration_eo1 import EO1Config as EO1Config
 from .factory import get_policy_class, make_policy, make_policy_config, make_pre_post_processors
 from .gaussian_actor.configuration_gaussian_actor import GaussianActorConfig as GaussianActorConfig
 from .groot.configuration_groot import GrootConfig as GrootConfig
@@ -39,6 +40,7 @@ __all__ = [
     # Configuration classes
     "ACTConfig",
     "DiffusionConfig",
+    "EO1Config",
     "GaussianActorConfig",
     "GrootConfig",
     "MultiTaskDiTConfig",
diff --git a/src/lerobot/policies/diffusion/configuration_diffusion.py b/src/lerobot/policies/diffusion/configuration_diffusion.py
index 8e3d4bf19..ed04ab54d 100644
--- a/src/lerobot/policies/diffusion/configuration_diffusion.py
+++ b/src/lerobot/policies/diffusion/configuration_diffusion.py
@@ -100,8 +100,8 @@ class DiffusionConfig(PreTrainedConfig):
 
     # Inputs / output structure.
     n_obs_steps: int = 2
-    horizon: int = 16
-    n_action_steps: int = 8
+    horizon: int = 64
+    n_action_steps: int = 32
 
     normalization_mapping: dict[str, NormalizationMode] = field(
         default_factory=lambda: {
@@ -122,10 +122,10 @@ class DiffusionConfig(PreTrainedConfig):
     crop_ratio: float = 1.0
     crop_shape: tuple[int, int] | None = None
     crop_is_random: bool = True
-    pretrained_backbone_weights: str | None = None
-    use_group_norm: bool = True
+    pretrained_backbone_weights: str | None = "ResNet18_Weights.IMAGENET1K_V1"
+    use_group_norm: bool = False
     spatial_softmax_num_keypoints: int = 32
-    use_separate_rgb_encoder_per_camera: bool = False
+    use_separate_rgb_encoder_per_camera: bool = True
     # Unet.
     down_dims: tuple[int, ...] = (512, 1024, 2048)
     kernel_size: int = 5
diff --git a/src/lerobot/policies/eo1/README.md b/src/lerobot/policies/eo1/README.md
new file mode 120000
index 000000000..3f36fd805
--- /dev/null
+++ b/src/lerobot/policies/eo1/README.md
@@ -0,0 +1 @@
+../../../../docs/source/eo1.mdx
\ No newline at end of file
diff --git a/src/lerobot/policies/eo1/__init__.py b/src/lerobot/policies/eo1/__init__.py
new file mode 100644
index 000000000..8298affcd
--- /dev/null
+++ b/src/lerobot/policies/eo1/__init__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+
+from .configuration_eo1 import EO1Config
+from .modeling_eo1 import EO1Policy
+from .processor_eo1 import make_eo1_pre_post_processors
+
+__all__ = ["EO1Config", "EO1Policy", "make_eo1_pre_post_processors"]
diff --git a/src/lerobot/policies/eo1/configuration_eo1.py b/src/lerobot/policies/eo1/configuration_eo1.py
new file mode 100644
index 000000000..aa0861b48
--- /dev/null
+++ b/src/lerobot/policies/eo1/configuration_eo1.py
@@ -0,0 +1,193 @@
+#!/usr/bin/env python
+
+# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from copy import deepcopy
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING
+
+from lerobot.configs.policies import PreTrainedConfig
+from lerobot.configs.types import FeatureType, NormalizationMode, PolicyFeature
+from lerobot.optim.optimizers import AdamWConfig
+from lerobot.optim.schedulers import CosineDecayWithWarmupSchedulerConfig
+from lerobot.utils.constants import ACTION, OBS_STATE
+from lerobot.utils.import_utils import _transformers_available, require_package
+
+if TYPE_CHECKING or _transformers_available:
+    from transformers.models.qwen2_5_vl.configuration_qwen2_5_vl import (
+        Qwen2_5_VLConfig,
+        Qwen2_5_VLTextConfig,
+        Qwen2_5_VLVisionConfig,
+    )
+else:
+    Qwen2_5_VLConfig = None
+    Qwen2_5_VLTextConfig = None
+    Qwen2_5_VLVisionConfig = None
+
+
+@PreTrainedConfig.register_subclass("eo1")
+@dataclass
+class EO1Config(PreTrainedConfig):
+    """Configuration for native EO1 policy integration in LeRobot."""
+
+    vlm_base: str = "Qwen/Qwen2.5-VL-3B-Instruct"
+    vlm_config: dict | None = None
+
+    # Vision processor settings.
+    image_min_pixels: int | None = 64 * 28 * 28
+    image_max_pixels: int | None = 128 * 28 * 28
+    use_fast_processor: bool = False
+
+    # Execution and action horizon.
+    n_obs_steps: int = 1
+    chunk_size: int = 8
+    n_action_steps: int = 8
+
+    # State/action padding to match EO1 flow head dimensionality.
+    max_state_dim: int = 32
+    max_action_dim: int = 32
+
+    # Flow matching sampling.
+    num_denoise_steps: int = 10
+    num_action_layers: int = 2
+    action_act: str = "linear"
+    time_sampling_beta_alpha: float = 1.5
+    time_sampling_beta_beta: float = 1.0
+    time_sampling_scale: float = 0.999
+    time_sampling_offset: float = 0.001
+    min_period: float = 4e-3
+    max_period: float = 4.0
+    supervise_padding_action_dims: bool = True
+    supervise_padding_actions: bool = True
+
+    # Policy-level dtype request for the Qwen backbone.
+    # - "auto": follow the backbone config/checkpoint default dtype. For Qwen2.5-VL this resolves to bf16.
+    #           The EO1 flow-matching head still keeps its own parameters in fp32.
+    # - "bfloat16": force the backbone to initialize/load in bf16 regardless of the saved config default.
+    # - "float32": force the backbone to initialize/load in fp32 for maximum numerical conservatism.
+    dtype: str = "auto"  # Options: "auto", "bfloat16", "float32"
+    force_fp32_autocast: bool = True
+
+    # Optional attention backend request passed through to the Qwen backbone.
+    # Common values: None, "eager", "sdpa", "flash_attention_2".
+    attn_implementation: str | None = None
+
+    # Training settings.
+    gradient_checkpointing: bool = False  # Enable gradient checkpointing for memory optimization
+
+    normalization_mapping: dict[str, NormalizationMode] = field(
+        default_factory=lambda: {
+            "VISUAL": NormalizationMode.IDENTITY,
+            "STATE": NormalizationMode.MEAN_STD,
+            "ACTION": NormalizationMode.MEAN_STD,
+        }
+    )
+
+    # Optimizer settings aligned with EO1/experiments/2_libero/train.sh and EO1 TrainPipelineConfig defaults.
+    optimizer_lr: float = 1e-4
+    optimizer_betas: tuple[float, float] = (0.9, 0.999)
+    optimizer_eps: float = 1e-8
+    optimizer_weight_decay: float = 0.1
+    optimizer_grad_clip_norm: float = 1.0
+
+    # Scheduler settings aligned with EO1 train.sh: cosine schedule with warmup_ratio=0.03.
+    # Note: These will auto-scale if --steps < scheduler_decay_steps
+    # For example, --steps=3000 will scale warmup to 100 and decay to 3000
+    scheduler_warmup_steps: int = 900  # 0.03 * 30_000 long-run steps
+    scheduler_decay_steps: int = 30_000
+    scheduler_decay_lr: float = 0.0
+
+    def __post_init__(self):
+        super().__post_init__()
+
+        if self.n_action_steps > self.chunk_size:
+            raise ValueError(
+                f"n_action_steps ({self.n_action_steps}) cannot be greater than chunk_size ({self.chunk_size})"
+            )
+
+        # Populate the serialized backbone config only when the caller did not provide one.
+        if self.vlm_config is None:
+            require_package("transformers", extra="eo1")
+            self.vlm_config = Qwen2_5_VLConfig.from_pretrained(self.vlm_base).to_dict()
+
+    @property
+    def vlm_backbone_config(self) -> Qwen2_5_VLConfig:
+        require_package("transformers", extra="eo1")
+        config_dict = deepcopy(self.vlm_config)
+        if self.attn_implementation is not None:
+            config_dict["attn_implementation"] = self.attn_implementation
+        return Qwen2_5_VLConfig(**config_dict)
+
+    @property
+    def text_config(self) -> Qwen2_5_VLTextConfig:
+        return self.vlm_backbone_config.text_config
+
+    @property
+    def vision_config(self) -> Qwen2_5_VLVisionConfig:
+        return self.vlm_backbone_config.vision_config
+
+    def validate_features(self) -> None:
+        """Validate and set up EO1 input and output features."""
+        image_features = [key for key, feat in self.input_features.items() if feat.type == FeatureType.VISUAL]
+        if not image_features:
+            raise ValueError(
+                "EO1 policy requires at least one visual input feature. "
+                "No features of type FeatureType.VISUAL found in input_features."
+            )
+
+        if OBS_STATE not in self.input_features:
+            state_feature = PolicyFeature(
+                type=FeatureType.STATE,
+                shape=(self.max_state_dim,),
+            )
+            self.input_features[OBS_STATE] = state_feature
+
+        if ACTION not in self.output_features:
+            action_feature = PolicyFeature(
+                type=FeatureType.ACTION,
+                shape=(self.max_action_dim,),
+            )
+            self.output_features[ACTION] = action_feature
+
+    def get_optimizer_preset(self) -> AdamWConfig:
+        return AdamWConfig(
+            lr=self.optimizer_lr,
+            betas=self.optimizer_betas,
+            eps=self.optimizer_eps,
+            weight_decay=self.optimizer_weight_decay,
+            grad_clip_norm=self.optimizer_grad_clip_norm,
+        )
+
+    def get_scheduler_preset(self):
+        return CosineDecayWithWarmupSchedulerConfig(
+            peak_lr=self.optimizer_lr,
+            decay_lr=self.scheduler_decay_lr,
+            num_warmup_steps=self.scheduler_warmup_steps,
+            num_decay_steps=self.scheduler_decay_steps,
+        )
+
+    @property
+    def observation_delta_indices(self) -> None:
+        return None
+
+    @property
+    def action_delta_indices(self) -> list[int]:
+        return list(range(self.chunk_size))
+
+    @property
+    def reward_delta_indices(self) -> None:
+        return None
diff --git a/src/lerobot/policies/eo1/modeling_eo1.py b/src/lerobot/policies/eo1/modeling_eo1.py
new file mode 100644
index 000000000..27d609ec1
--- /dev/null
+++ b/src/lerobot/policies/eo1/modeling_eo1.py
@@ -0,0 +1,620 @@
+#!/usr/bin/env python
+
+# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import contextlib
+import logging
+import math
+from collections import deque
+from typing import TYPE_CHECKING, Any
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F  # noqa: N812
+import torch.utils.checkpoint
+from torch import Tensor
+
+from lerobot.policies.eo1.configuration_eo1 import EO1Config
+from lerobot.policies.pretrained import PreTrainedPolicy
+from lerobot.utils.constants import ACTION, OBS_STATE
+from lerobot.utils.import_utils import _transformers_available, require_package
+
+if TYPE_CHECKING or _transformers_available:
+    from transformers.activations import ACT2FN
+    from transformers.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
+    from transformers.utils import torch_compilable_check
+else:
+    ACT2FN = None
+    Qwen2_5_VLForConditionalGeneration = None
+    torch_compilable_check = None
+
+logger = logging.getLogger(__name__)
+
+
+def pad_vector(vector, new_dim):
+    """Pad the last dimension of a vector to new_dim with zeros.
+
+    Can be (batch_size x sequence_length x features_dimension)
+    or (batch_size x features_dimension)
+    """
+    if vector.shape[-1] >= new_dim:
+        return vector
+    return F.pad(vector, (0, new_dim - vector.shape[-1]))
+
+
+class EO1Policy(PreTrainedPolicy):
+    """EO1 policy wrapper for LeRobot robot-only training/evaluation."""
+
+    config_class = EO1Config
+    name = "eo1"
+
+    def __init__(self, config: EO1Config, **kwargs):
+        require_package("transformers", extra="eo1")
+        super().__init__(config)
+        config.validate_features()
+        self.config = config
+
+        if config.pretrained_path is None:
+            # Initialize from pretrained VLM
+            vlm_backbone = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+                config.vlm_base,
+                dtype=config.dtype,
+                attn_implementation=config.attn_implementation,
+            )
+        else:
+            vlm_backbone = Qwen2_5_VLForConditionalGeneration._from_config(
+                config.vlm_backbone_config,
+                dtype=config.vlm_backbone_config.dtype if config.dtype == "auto" else config.dtype,
+            )
+
+        self.model = EO1VisionFlowMatchingModel(config, vlm_backbone)
+        if config.gradient_checkpointing:
+            self.model.gradient_checkpointing_enable()
+
+        self.model.to(config.device)
+        self.reset()
+
+    def reset(self):
+        self._action_queue = deque(maxlen=self.config.n_action_steps)
+
+    @staticmethod
+    def _get_model_inputs(batch: dict[str, Tensor], excluded_keys: set[str]) -> dict[str, Tensor]:
+        return {key: value for key, value in batch.items() if key not in excluded_keys}
+
+    def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict]:
+        state = self.prepare_state(batch[OBS_STATE])
+        actions = self.prepare_action(batch[ACTION])
+        model_inputs = self._get_model_inputs(batch, {OBS_STATE, ACTION})
+        loss = self.model(states=state, action=actions, **model_inputs)
+
+        loss_dict = {"loss": loss.item()}
+        return loss, loss_dict
+
+    @torch.no_grad()
+    def predict_action_chunk(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
+        self.eval()
+
+        states = self.prepare_state(batch[OBS_STATE])
+        model_inputs = self._get_model_inputs(batch, {OBS_STATE})
+        actions = self.model.sample_actions(states=states, **model_inputs).to(torch.float32)
+
+        original_action_dim = self.config.output_features[ACTION].shape[0]
+        return actions[:, :, :original_action_dim]
+
+    def prepare_state(self, state: Tensor) -> Tensor:
+        return pad_vector(state, self.config.max_state_dim)
+
+    def prepare_action(self, action: Tensor) -> Tensor:
+        return pad_vector(action, self.config.max_action_dim)
+
+    @torch.no_grad()
+    def select_action(self, batch: dict[str, Tensor]) -> Tensor:
+        self.eval()
+
+        if len(self._action_queue) == 0:
+            actions = self.predict_action_chunk(batch)[:, : self.config.n_action_steps]
+            self._action_queue.extend(actions.transpose(0, 1))
+
+        return self._action_queue.popleft()
+
+    def get_optim_params(self) -> dict:
+        return self.parameters()
+
+
+def get_safe_dtype(target_dtype, device_type):
+    """Get a safe dtype for the given device type."""
+    if device_type == "mps" and target_dtype == torch.float64:
+        return torch.float32
+    if device_type == "cpu":
+        # CPU doesn't support bfloat16, use float32 instead
+        if target_dtype == torch.bfloat16:
+            return torch.float32
+        if target_dtype == torch.float64:
+            return torch.float64
+    return target_dtype
+
+
+def create_sinusoidal_pos_embedding(  # see openpi `create_sinusoidal_pos_embedding` (exact copy)
+    time: torch.Tensor, dimension: int, min_period: float, max_period: float, device="cpu"
+) -> Tensor:
+    """Computes sine-cosine positional embedding vectors for scalar positions."""
+    if dimension % 2 != 0:
+        raise ValueError(f"dimension ({dimension}) must be divisible by 2")
+
+    if time.ndim != 1:
+        raise ValueError("The time tensor is expected to be of shape `(batch_size, )`.")
+
+    dtype = get_safe_dtype(torch.float64, device.type)
+    fraction = torch.linspace(0.0, 1.0, dimension // 2, dtype=dtype, device=device)
+    period = min_period * (max_period / min_period) ** fraction
+
+    # Compute the outer product
+    scaling_factor = 1.0 / period * 2 * math.pi
+    sin_input = scaling_factor[None, :] * time[:, None]
+    return torch.cat([torch.sin(sin_input), torch.cos(sin_input)], dim=1)
+
+
+def sample_beta(alpha, beta, bsize, device):  # see openpi `sample_beta` (exact copy)
+    # Beta sampling uses _sample_dirichlet which isn't implemented for MPS, so sample on CPU
+    alpha_t = torch.tensor(alpha, dtype=torch.float32)
+    beta_t = torch.tensor(beta, dtype=torch.float32)
+    dist = torch.distributions.Beta(alpha_t, beta_t)
+    return dist.sample((bsize,)).to(device)
+
+
+class EO1VisionActionProjector(torch.nn.Sequential):
+    """This block implements the multi-layer perceptron (MLP) module."""
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        num_layers: int = 2,
+        activation_layer: str = "linear",
+        bias: bool = True,
+        device: Any = None,
+        dtype: torch.dtype = torch.float32,
+    ):
+        layers = []
+        in_dim = in_channels
+        hidden_channels = [in_dim] * (num_layers - 1) + [out_channels]
+        for hidden_dim in hidden_channels[:-1]:
+            layers.append(torch.nn.Linear(in_dim, hidden_dim, bias=bias, dtype=dtype, device=device))
+            layers.append(ACT2FN[activation_layer])
+            in_dim = hidden_dim
+        layers.append(torch.nn.Linear(in_dim, hidden_channels[-1], bias=bias, dtype=dtype, device=device))
+        super().__init__(*layers)
+
+    @property
+    def dtype(self):
+        return self[0].weight.dtype
+
+
+class EO1VisionFlowMatchingModel(nn.Module):
+    def __init__(
+        self,
+        config: EO1Config,
+        vlm_backbone: Qwen2_5_VLForConditionalGeneration | None = None,
+    ):
+        require_package("transformers", extra="eo1")
+        super().__init__()
+
+        self.config = config
+        # Preserve the backbone dtype selected at construction time so Qwen's fp32 rotary buffers stay intact.
+        self.vlm_backbone = vlm_backbone
+        self.hidden_size = self.vlm_backbone.config.text_config.hidden_size
+        max_state_dim = config.max_state_dim
+        max_action_dim = config.max_action_dim
+        self.state_proj = nn.Linear(max_state_dim, self.hidden_size, dtype=torch.float32)
+        self.action_in_proj = nn.Linear(max_action_dim, self.hidden_size, dtype=torch.float32)
+        self.action_out_proj = EO1VisionActionProjector(
+            self.hidden_size,
+            max_action_dim,
+            config.num_action_layers,
+            config.action_act,
+            dtype=torch.float32,
+        )
+        self.action_time_mlp_in = nn.Linear(self.hidden_size * 2, self.hidden_size, dtype=torch.float32)
+        self.action_time_mlp_out = nn.Linear(self.hidden_size, self.hidden_size, dtype=torch.float32)
+        self.gradient_checkpointing_enabled = False
+
+    def get_input_embeddings(self):
+        return self.vlm_backbone.get_input_embeddings()
+
+    def flow_head_autocast_context(self):
+        if self.config.force_fp32_autocast:
+            return torch.autocast(
+                device_type=self.state_proj.weight.device.type,
+                enabled=False,
+            )
+        return contextlib.nullcontext()
+
+    def gradient_checkpointing_enable(self):
+        """Enable gradient checkpointing for the Qwen2.5-VL backbone."""
+        self.gradient_checkpointing_enabled = True
+        self.vlm_backbone.gradient_checkpointing_enable(
+            gradient_checkpointing_kwargs={"use_reentrant": False}
+        )
+        logger.info("Enabled gradient checkpointing for EO1VisionFlowMatchingModel")
+
+    def gradient_checkpointing_disable(self):
+        """Disable gradient checkpointing for the Qwen2.5-VL backbone."""
+        self.gradient_checkpointing_enabled = False
+        self.vlm_backbone.gradient_checkpointing_disable()
+        logger.info("Disabled gradient checkpointing for EO1VisionFlowMatchingModel")
+
+    def _apply_checkpoint(self, func, *args, **kwargs):
+        """Apply manual gradient checkpointing to EO1 flow-head computations when training."""
+        if self.gradient_checkpointing_enabled and self.training and torch.is_grad_enabled():
+            return torch.utils.checkpoint.checkpoint(
+                func, *args, use_reentrant=False, preserve_rng_state=False, **kwargs
+            )
+        return func(*args, **kwargs)
+
+    def sample_noise(self, shape, device):
+        noise = torch.normal(
+            mean=0.0,
+            std=1.0,
+            size=shape,
+            dtype=torch.float32,
+            device=device,
+        )
+        return noise
+
+    def sample_time(self, bsize, device):
+        time_beta = sample_beta(
+            self.config.time_sampling_beta_alpha, self.config.time_sampling_beta_beta, bsize, device
+        )
+        time = time_beta * self.config.time_sampling_scale + self.config.time_sampling_offset
+        return time.to(dtype=torch.float32, device=device)
+
+    def get_placeholder_mask(
+        self,
+        input_ids: torch.LongTensor | None,
+        inputs_embeds: torch.FloatTensor | None,
+        state_features: torch.FloatTensor | None = None,
+        action_features: torch.FloatTensor | None = None,
+        *,
+        state_token_id: int,
+        action_token_id: int,
+    ) -> tuple[torch.BoolTensor, torch.BoolTensor]:
+        """Return EO1 state/action placeholder masks, following Qwen's multimodal mask style."""
+        if input_ids is None:
+            special_state_mask = inputs_embeds == self.get_input_embeddings()(
+                torch.tensor(state_token_id, dtype=torch.long, device=inputs_embeds.device)
+            )
+            special_state_mask = special_state_mask.all(-1)
+            special_action_mask = inputs_embeds == self.get_input_embeddings()(
+                torch.tensor(action_token_id, dtype=torch.long, device=inputs_embeds.device)
+            )
+            special_action_mask = special_action_mask.all(-1)
+        else:
+            special_state_mask = input_ids == state_token_id
+            special_action_mask = input_ids == action_token_id
+
+        n_state_tokens = special_state_mask.sum()
+        special_state_mask = (
+            special_state_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
+        )
+        if state_features is not None:
+            torch_compilable_check(
+                inputs_embeds[special_state_mask].numel() == state_features.numel(),
+                f"State features and state tokens do not match, tokens: {n_state_tokens}, features: {state_features.shape[0]}",
+            )
+
+        n_action_tokens = special_action_mask.sum()
+        special_action_mask = (
+            special_action_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
+        )
+        if action_features is not None:
+            torch_compilable_check(
+                inputs_embeds[special_action_mask].numel() == action_features.numel(),
+                f"Action features and action tokens do not match, tokens: {n_action_tokens}, features: {action_features.shape[0]}",
+            )
+
+        return special_state_mask, special_action_mask
+
+    def embed_prefix(
+        self,
+        input_ids: torch.LongTensor,
+        states: torch.Tensor,
+        *,
+        state_token_id: int,
+        action_token_id: int,
+    ) -> torch.FloatTensor:
+        """Embed the EO1 prefix tokens before native Qwen injects multimodal features."""
+
+        # Get the input embeddings for the input IDs
+        def input_embed_func(input_ids: torch.LongTensor) -> torch.FloatTensor:
+            return self.get_input_embeddings()(input_ids)
+
+        inputs_embeds = self._apply_checkpoint(input_embed_func, input_ids)
+
+        # Project the states to the hidden size
+        def state_proj_func(states: torch.Tensor) -> torch.FloatTensor:
+            with self.flow_head_autocast_context():
+                states = states.to(dtype=self.state_proj.weight.dtype)
+                return self.state_proj(states)
+
+        state_embs = self._apply_checkpoint(state_proj_func, states)
+        state_mask, _ = self.get_placeholder_mask(
+            input_ids,
+            inputs_embeds,
+            state_features=state_embs,
+            state_token_id=state_token_id,
+            action_token_id=action_token_id,
+        )
+        state_embs = state_embs.to(inputs_embeds.device, inputs_embeds.dtype)
+        inputs_embeds = inputs_embeds.masked_scatter(state_mask, state_embs)
+        return inputs_embeds
+
+    def embed_suffix(
+        self,
+        timestep: torch.Tensor,
+        noisy_actions: torch.Tensor,
+    ) -> torch.FloatTensor:
+        """Embed the suffix"""
+
+        def action_proj_func(noisy_actions: torch.Tensor) -> torch.FloatTensor:
+            with self.flow_head_autocast_context():
+                noisy_actions = noisy_actions.to(dtype=self.action_in_proj.weight.dtype)
+                return self.action_in_proj(noisy_actions)
+
+        action_embs = self._apply_checkpoint(action_proj_func, noisy_actions)
+        time_embs = create_sinusoidal_pos_embedding(
+            timestep,
+            self.hidden_size,
+            min_period=self.config.min_period,
+            max_period=self.config.max_period,
+            device=action_embs.device,
+        )
+        time_embs = time_embs.to(dtype=action_embs.dtype)
+        time_embs = time_embs[:, None, :].expand_as(action_embs)
+        action_time_embs = torch.cat([action_embs, time_embs], dim=2)
+
+        def mlp_func(action_time_embs: torch.Tensor) -> torch.FloatTensor:
+            with self.flow_head_autocast_context():
+                action_time_embs = action_time_embs.to(dtype=self.action_time_mlp_in.weight.dtype)
+                action_time_embs = self.action_time_mlp_in(action_time_embs)
+                action_time_embs = F.silu(action_time_embs)
+                return self.action_time_mlp_out(action_time_embs)
+
+        action_time_embs = self._apply_checkpoint(mlp_func, action_time_embs)
+        return action_time_embs
+
+    def forward(
+        self,
+        input_ids: torch.LongTensor | None = None,
+        attention_mask: torch.LongTensor | None = None,
+        pixel_values: torch.FloatTensor | None = None,
+        image_grid_thw: torch.LongTensor | None = None,
+        mm_token_type_ids: torch.IntTensor | None = None,
+        states: torch.FloatTensor | None = None,
+        action: torch.FloatTensor | None = None,
+        action_is_pad: torch.BoolTensor | None = None,
+        *,
+        state_token_id: int,
+        action_token_id: int,
+        **kwargs,
+    ) -> Tensor:
+        """Run the EO1 training forward pass and compute the flow-matching loss."""
+
+        # 1. Build the EO1 prefix with state placeholders resolved.
+        inputs_embeds = self.embed_prefix(
+            input_ids,
+            states=states,
+            state_token_id=state_token_id,
+            action_token_id=action_token_id,
+        )
+
+        # 2. Sample the diffusion target and replace the action placeholders.
+        time = self.sample_time(action.shape[0], inputs_embeds.device)
+        noise = self.sample_noise(action.shape, inputs_embeds.device)
+
+        time_expanded = time[:, None, None]
+        x_t = time_expanded * noise + (1 - time_expanded) * action
+        u_t = noise - action
+        action_time_embs = self.embed_suffix(time, x_t)
+        _, action_mask = self.get_placeholder_mask(
+            input_ids,
+            inputs_embeds,
+            action_features=action_time_embs,
+            state_token_id=state_token_id,
+            action_token_id=action_token_id,
+        )
+        action_time_embs = action_time_embs.to(inputs_embeds.device, inputs_embeds.dtype)
+        inputs_embeds = inputs_embeds.masked_scatter(action_mask, action_time_embs)
+
+        # 3. Optionally drop padded action tokens from backbone attention.
+        if attention_mask is not None:
+            attention_mask = attention_mask.to(inputs_embeds.device)
+
+        if not self.config.supervise_padding_actions:
+            action_is_pad = action_is_pad.to(device=inputs_embeds.device, dtype=torch.bool)
+            action_token_mask = action_mask[..., 0]
+            action_padding_mask = torch.zeros_like(action_token_mask)
+            action_padding_mask = action_padding_mask.masked_scatter(
+                action_token_mask,
+                action_is_pad.reshape(-1),
+            )
+            attention_mask = attention_mask.masked_fill(action_padding_mask, 0)
+
+        # 4. Run the Qwen backbone on the fused EO1 sequence.
+        def vlm_forward_func(
+            input_ids: torch.LongTensor,
+            attention_mask: torch.Tensor | None,
+            inputs_embeds: torch.FloatTensor,
+            pixel_values: torch.Tensor | None,
+            image_grid_thw: torch.LongTensor | None,
+            mm_token_type_ids: torch.IntTensor | None,
+        ) -> torch.FloatTensor:
+            outputs = self.vlm_backbone.model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                inputs_embeds=inputs_embeds,
+                pixel_values=pixel_values,
+                image_grid_thw=image_grid_thw,
+                mm_token_type_ids=mm_token_type_ids,
+                use_cache=False,
+                output_hidden_states=False,
+                return_dict=True,
+            )
+            return outputs.last_hidden_state
+
+        hidden_states = self._apply_checkpoint(
+            vlm_forward_func,
+            input_ids,
+            attention_mask,
+            inputs_embeds,
+            pixel_values,
+            image_grid_thw,
+            mm_token_type_ids,
+        )
+        action_hidden_states = hidden_states[action_mask[..., 0]]
+
+        # 5. Project the action-token hidden states back to the flow target space.
+        def action_out_proj_func(action_hidden_states: torch.FloatTensor) -> torch.FloatTensor:
+            with self.flow_head_autocast_context():
+                action_hidden_states = action_hidden_states.to(dtype=self.action_out_proj.dtype)
+                return self.action_out_proj(action_hidden_states)
+
+        v_t = self._apply_checkpoint(action_out_proj_func, action_hidden_states)
+        v_t = v_t.reshape(u_t.shape).to(dtype=u_t.dtype)
+        losses = F.mse_loss(u_t, v_t, reduction="none")
+
+        # 6. Apply the configured supervision mask and reduce the loss.
+        if not self.config.supervise_padding_action_dims:
+            original_action_dim = self.config.output_features[ACTION].shape[0]
+            losses = losses[..., :original_action_dim]
+
+        if not self.config.supervise_padding_actions:
+            losses = losses[~action_is_pad]
+
+        return losses.mean()
+
+    @torch.no_grad()
+    def sample_actions(
+        self,
+        input_ids: torch.LongTensor | None = None,
+        attention_mask: torch.Tensor | None = None,
+        pixel_values: torch.Tensor | None = None,
+        image_grid_thw: torch.LongTensor | None = None,
+        mm_token_type_ids: torch.IntTensor | None = None,
+        states: torch.Tensor | None = None,
+        *,
+        state_token_id: int,
+        action_token_id: int,
+        **kwargs,
+    ) -> Tensor:
+        """Sample actions from the model."""
+        if states is None:
+            raise ValueError("states are required for EO1 action sampling.")
+        if mm_token_type_ids is None:
+            raise ValueError("mm_token_type_ids are required for EO1 action sampling.")
+
+        # 1. Resolve the left-padded rollout prompt and locate the action span.
+        chunk_size = self.config.chunk_size
+
+        inputs_embeds = self.embed_prefix(
+            input_ids,
+            states=states,
+            state_token_id=state_token_id,
+            action_token_id=action_token_id,
+        ).clone()
+        _, action_placeholder_mask = self.get_placeholder_mask(
+            input_ids,
+            inputs_embeds,
+            state_token_id=state_token_id,
+            action_token_id=action_token_id,
+        )
+        action_mask = action_placeholder_mask[..., 0]
+        token_counts = action_mask.sum(dim=1)
+        if not torch.all(token_counts == chunk_size):
+            raise ValueError(
+                f"Each sample must contain exactly {chunk_size} action tokens, got {token_counts.tolist()}."
+            )
+        if action_mask.ne(action_mask[:1]).any():
+            raise ValueError(
+                "Batch inference expects all samples to share the same action token mask after left padding."
+            )
+        act_start = int(action_mask[0].to(torch.int64).argmax().item())
+        act_end = act_start + self.config.chunk_size
+        if not torch.all(action_mask[:, act_start:act_end]):
+            raise ValueError("Action tokens must form a contiguous chunk of length chunk_size.")
+        act_slice = slice(act_start, act_end)
+
+        # 2. Encode the fixed prefix once and cache its KV state.
+        batch_size = input_ids.shape[0]
+        device = inputs_embeds.device
+        attention_mask = attention_mask.to(device)
+        mm_token_type_ids = mm_token_type_ids.to(device)
+        position_ids, _ = self.vlm_backbone.model.get_rope_index(
+            input_ids,
+            image_grid_thw=image_grid_thw,
+            attention_mask=attention_mask,
+            mm_token_type_ids=mm_token_type_ids,
+        )
+        position_ids = position_ids.to(device)
+
+        outputs = self.vlm_backbone.model(
+            input_ids=input_ids[:, :act_start],
+            attention_mask=attention_mask[:, :act_start],
+            position_ids=position_ids[..., :act_start],
+            inputs_embeds=inputs_embeds[:, :act_start],
+            pixel_values=pixel_values,
+            image_grid_thw=image_grid_thw,
+            mm_token_type_ids=mm_token_type_ids[:, :act_start],
+            use_cache=True,
+            return_dict=True,
+        )
+
+        x_t = self.sample_noise(
+            (batch_size, chunk_size, self.config.max_action_dim),
+            device,
+        ).to(dtype=self.action_in_proj.weight.dtype)
+        dt = -1.0 / self.config.num_denoise_steps
+        past_key_values = outputs.past_key_values
+
+        # 3. Denoise only the action chunk while keeping the prefix cache invariant.
+        for step in range(self.config.num_denoise_steps):
+            time = torch.full(
+                (batch_size,),
+                1.0 + step * dt,
+                device=device,
+                dtype=torch.float32,
+            )
+            action_time_embs = self.embed_suffix(time, x_t)
+            inputs_embeds[:, act_slice] = action_time_embs.to(inputs_embeds.dtype)
+
+            # Keep the prefix KV cache invariant across denoising steps.
+            past_key_values.crop(act_start)
+            outputs = self.vlm_backbone.model(
+                attention_mask=attention_mask[:, :act_end],
+                past_key_values=past_key_values,
+                inputs_embeds=inputs_embeds[:, act_slice],
+                position_ids=position_ids[..., act_slice],
+                use_cache=True,
+                return_dict=True,
+            )
+            with self.flow_head_autocast_context():
+                hidden_states = outputs.last_hidden_state[:, :chunk_size]
+                hidden_states = hidden_states.to(dtype=self.action_out_proj.dtype)
+                v_t = self.action_out_proj(hidden_states)
+
+            x_t += dt * v_t.reshape(x_t.shape)
+
+        return x_t
diff --git a/src/lerobot/policies/eo1/processor_eo1.py b/src/lerobot/policies/eo1/processor_eo1.py
new file mode 100644
index 000000000..2d7bb48ae
--- /dev/null
+++ b/src/lerobot/policies/eo1/processor_eo1.py
@@ -0,0 +1,282 @@
+#!/usr/bin/env python
+
+# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any
+
+import torch
+
+from lerobot.configs.types import FeatureType, PipelineFeatureType, PolicyFeature
+from lerobot.policies.eo1.configuration_eo1 import EO1Config
+from lerobot.processor import (
+    AddBatchDimensionProcessorStep,
+    ComplementaryDataProcessorStep,
+    DeviceProcessorStep,
+    NormalizerProcessorStep,
+    PolicyAction,
+    PolicyProcessorPipeline,
+    ProcessorStep,
+    ProcessorStepRegistry,
+    RenameObservationsProcessorStep,
+    UnnormalizerProcessorStep,
+)
+from lerobot.processor.converters import policy_action_to_transition, transition_to_policy_action
+from lerobot.types import TransitionKey
+from lerobot.utils.constants import (
+    OBS_STATE,
+    POLICY_POSTPROCESSOR_DEFAULT_NAME,
+    POLICY_PREPROCESSOR_DEFAULT_NAME,
+)
+from lerobot.utils.import_utils import _transformers_available, require_package
+
+if TYPE_CHECKING or _transformers_available:
+    from transformers.models.qwen2_5_vl import Qwen2_5_VLProcessor
+else:
+    Qwen2_5_VLProcessor = None
+
+SYSTEM_MESSAGE = "You are a helpful physical assistant."
+
+# EO-1 special tokens
+ACTION_START_TOKEN = "<|action_start|>"  # nosec B105
+DEFAULT_ACTION_TOKEN = "<|action_pad|>"  # nosec B105
+ACTION_END_TOKEN = "<|action_end|>"  # nosec B105
+STATE_START_TOKEN = "<|state_start|>"  # nosec B105
+DEFAULT_STATE_TOKEN = "<|state_pad|>"  # nosec B105
+STATE_END_TOKEN = "<|state_end|>"  # nosec B105
+TASK_VLA_TOKEN = "<|vla|>"  # nosec B105
+
+EO1_SPECIAL_TOKENS = [
+    ACTION_START_TOKEN,
+    DEFAULT_ACTION_TOKEN,
+    ACTION_END_TOKEN,
+    STATE_START_TOKEN,
+    DEFAULT_STATE_TOKEN,
+    STATE_END_TOKEN,
+    TASK_VLA_TOKEN,
+]
+
+
+@dataclass
+@ProcessorStepRegistry.register(name="eo1_conversation_template_processor")
+class EO1ConversationTemplateStep(ComplementaryDataProcessorStep):
+    input_features: dict[str, PolicyFeature] | dict[str, dict[str, Any]]
+    chunk_size: int
+
+    _image_keys: list[str] = field(default_factory=list, init=False, repr=False)
+
+    def __post_init__(self):
+        # Robust JSON deserialization handling (guard empty maps).
+        if self.input_features:
+            first_val = next(iter(self.input_features.values()))
+            if isinstance(first_val, dict):
+                reconstructed = {}
+                for key, ft_dict in self.input_features.items():
+                    reconstructed[key] = PolicyFeature(
+                        type=FeatureType(ft_dict["type"]), shape=tuple(ft_dict["shape"])
+                    )
+                self.input_features = reconstructed
+
+        self._image_keys = [
+            key for key, value in self.input_features.items() if value.type == FeatureType.VISUAL
+        ]
+
+    def complementary_data(self, complementary_data):
+        tasks = complementary_data.get("task")
+        if tasks is None:
+            raise ValueError("Task is required for EO1ConversationTemplateStep.")
+
+        observation = self.transition.get(TransitionKey.OBSERVATION)
+        if observation is None:
+            raise ValueError("Observation is required for EO1ConversationTemplateStep.")
+
+        if OBS_STATE in observation and observation[OBS_STATE].shape[0] != len(tasks):
+            raise ValueError("Batch size mismatch between observation.state and task list.")
+
+        # LeRobot visual observations reach in processor as float32 tensors in [0, 1].
+        # Convert to uint8 in [0, 255] to meet the input requirement of Qwen2.5-VL-3B-Instruct.
+        images = {
+            key: observation[key].clamp(0, 1).mul(255.0).round().to(torch.uint8) for key in self._image_keys
+        }
+        messages = []
+        for i in range(len(tasks)):
+            content = [
+                *[{"type": "image", "image": images[key][i]} for key in self._image_keys],
+                {
+                    "type": "text",
+                    "text": (
+                        f"{STATE_START_TOKEN}{DEFAULT_STATE_TOKEN}{STATE_END_TOKEN}{tasks[i]}{TASK_VLA_TOKEN}"
+                    ),
+                },
+            ]
+            messages.append(
+                [
+                    {"role": "system", "content": [{"type": "text", "text": SYSTEM_MESSAGE}]},
+                    {"role": "user", "content": content},
+                    {
+                        "role": "assistant",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": f"{ACTION_START_TOKEN}{DEFAULT_ACTION_TOKEN * self.chunk_size}{ACTION_END_TOKEN}",
+                            }
+                        ],
+                    },
+                ]
+            )
+
+        complementary_data["messages"] = messages
+
+        return complementary_data
+
+    def transform_features(
+        self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
+    ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
+        """
+        This step only materializes EO1-specific message objects in complementary_data.
+        PipelineFeatureType tracks only ACTION and OBSERVATION, so there is no static
+        feature contract change to record here.
+        """
+        return features
+
+    def get_config(self) -> dict[str, Any]:
+        return {
+            "input_features": {
+                key: {"type": ft.type.value, "shape": ft.shape} for key, ft in self.input_features.items()
+            },
+            "chunk_size": self.chunk_size,
+        }
+
+
+@dataclass
+@ProcessorStepRegistry.register(name="eo1_qwen_processor")
+class EO1QwenProcessorStep(ComplementaryDataProcessorStep):
+    processor_name: str = "Qwen/Qwen2.5-VL-3B-Instruct"
+    image_min_pixels: int | None = 64 * 28 * 28
+    image_max_pixels: int | None = 128 * 28 * 28
+    use_fast_processor: bool = False
+
+    _processor: Qwen2_5_VLProcessor | None = field(default=None, init=False, repr=False)
+    _state_token_id: int | None = field(default=None, init=False, repr=False)
+    _action_token_id: int | None = field(default=None, init=False, repr=False)
+
+    def __post_init__(self):
+        require_package("transformers", extra="eo1")
+        self._processor = Qwen2_5_VLProcessor.from_pretrained(
+            self.processor_name,
+            use_fast=self.use_fast_processor,
+        )
+        self._processor.tokenizer.add_tokens(EO1_SPECIAL_TOKENS, special_tokens=True)
+        self._state_token_id = self._processor.tokenizer.convert_tokens_to_ids(DEFAULT_STATE_TOKEN)
+        self._action_token_id = self._processor.tokenizer.convert_tokens_to_ids(DEFAULT_ACTION_TOKEN)
+
+    def complementary_data(self, complementary_data):
+        messages = complementary_data.pop("messages", None)
+        if messages is None:
+            raise ValueError("Messages are required for EO1QwenProcessorStep.")
+
+        # Rollout batches use left padding so action spans stay aligned across samples.
+        # Supervised batches use right padding to match standard training collation.
+        padding_side = "right" if self.transition.get(TransitionKey.ACTION) is not None else "left"
+
+        inputs = self._processor.apply_chat_template(
+            messages,
+            tokenize=True,
+            padding=True,
+            padding_side=padding_side,
+            min_pixels=self.image_min_pixels,
+            max_pixels=self.image_max_pixels,
+            add_generation_prompt=False,
+            return_dict=True,
+            return_tensors="pt",
+        )
+
+        complementary_data["input_ids"] = inputs["input_ids"]
+        complementary_data["pixel_values"] = inputs["pixel_values"]
+        complementary_data["image_grid_thw"] = inputs["image_grid_thw"]
+        complementary_data["attention_mask"] = inputs["attention_mask"]
+        complementary_data["mm_token_type_ids"] = inputs["mm_token_type_ids"]
+        complementary_data["state_token_id"] = self._state_token_id
+        complementary_data["action_token_id"] = self._action_token_id
+
+        return complementary_data
+
+    def get_config(self) -> dict[str, Any]:
+        return {
+            "processor_name": self.processor_name,
+            "image_min_pixels": self.image_min_pixels,
+            "image_max_pixels": self.image_max_pixels,
+            "use_fast_processor": self.use_fast_processor,
+        }
+
+    def transform_features(
+        self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
+    ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
+        """
+        This step only converts the messages to the model input format.
+        """
+        return features
+
+
+def make_eo1_pre_post_processors(
+    config: EO1Config,
+    dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None,
+) -> tuple[
+    PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
+    PolicyProcessorPipeline[PolicyAction, PolicyAction],
+]:
+    """Build pre/post processor pipelines for EO1."""
+
+    input_steps: list[ProcessorStep] = [
+        RenameObservationsProcessorStep(rename_map={}),
+        AddBatchDimensionProcessorStep(),
+        NormalizerProcessorStep(
+            features={**config.input_features, **config.output_features},
+            norm_map=config.normalization_mapping,
+            stats=dataset_stats,
+        ),
+        EO1ConversationTemplateStep(input_features=config.input_features, chunk_size=config.chunk_size),
+        EO1QwenProcessorStep(
+            processor_name=config.vlm_base,
+            image_min_pixels=config.image_min_pixels,
+            image_max_pixels=config.image_max_pixels,
+            use_fast_processor=config.use_fast_processor,
+        ),
+        DeviceProcessorStep(device=config.device),
+    ]
+
+    output_steps: list[ProcessorStep] = [
+        UnnormalizerProcessorStep(
+            features=config.output_features,
+            norm_map=config.normalization_mapping,
+            stats=dataset_stats,
+        ),
+        DeviceProcessorStep(device="cpu"),
+    ]
+
+    return (
+        PolicyProcessorPipeline[dict[str, Any], dict[str, Any]](
+            steps=input_steps,
+            name=POLICY_PREPROCESSOR_DEFAULT_NAME,
+        ),
+        PolicyProcessorPipeline[PolicyAction, PolicyAction](
+            steps=output_steps,
+            name=POLICY_POSTPROCESSOR_DEFAULT_NAME,
+            to_transition=policy_action_to_transition,
+            to_output=transition_to_policy_action,
+        ),
+    )
diff --git a/src/lerobot/policies/factory.py b/src/lerobot/policies/factory.py
index 9c64ffaaf..8937bc6ae 100644
--- a/src/lerobot/policies/factory.py
+++ b/src/lerobot/policies/factory.py
@@ -46,6 +46,7 @@ from lerobot.utils.feature_utils import dataset_to_policy_features
 
 from .act.configuration_act import ACTConfig
 from .diffusion.configuration_diffusion import DiffusionConfig
+from .eo1.configuration_eo1 import EO1Config
 from .gaussian_actor.configuration_gaussian_actor import GaussianActorConfig
 from .groot.configuration_groot import GrootConfig
 from .multi_task_dit.configuration_multi_task_dit import MultiTaskDiTConfig
@@ -146,6 +147,10 @@ def get_policy_class(name: str) -> type[PreTrainedPolicy]:
         from .wall_x.modeling_wall_x import WallXPolicy
 
         return WallXPolicy
+    elif name == "eo1":
+        from .eo1.modeling_eo1 import EO1Policy
+
+        return EO1Policy
     else:
         try:
             return _get_policy_cls_from_policy_name(name=name)
@@ -196,6 +201,8 @@ def make_policy_config(policy_type: str, **kwargs) -> PreTrainedConfig:
         return XVLAConfig(**kwargs)
     elif policy_type == "wall_x":
         return WallXConfig(**kwargs)
+    elif policy_type == "eo1":
+        return EO1Config(**kwargs)
     else:
         try:
             config_cls = PreTrainedConfig.get_choice_class(policy_type)
@@ -399,6 +406,13 @@ def make_pre_post_processors(
             config=policy_cfg,
             dataset_stats=kwargs.get("dataset_stats"),
         )
+    elif isinstance(policy_cfg, EO1Config):
+        from .eo1.processor_eo1 import make_eo1_pre_post_processors
+
+        processors = make_eo1_pre_post_processors(
+            config=policy_cfg,
+            dataset_stats=kwargs.get("dataset_stats"),
+        )
 
     else:
         try:
diff --git a/src/lerobot/policies/groot/action_head/flow_matching_action_head.py b/src/lerobot/policies/groot/action_head/flow_matching_action_head.py
index 2c1ca6014..986820670 100644
--- a/src/lerobot/policies/groot/action_head/flow_matching_action_head.py
+++ b/src/lerobot/policies/groot/action_head/flow_matching_action_head.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from dataclasses import dataclass, field
+from dataclasses import field
 from typing import TYPE_CHECKING
 
 import torch
@@ -109,7 +109,6 @@ class MultiEmbodimentActionEncoder(nn.Module):
         return x
 
 
-@dataclass
 class FlowmatchingActionHeadConfig(PretrainedConfig):
     """NOTE: N1.5 uses XEmbFlowmatchingPolicyHeadConfig as action head"""
 
diff --git a/src/lerobot/policies/pi0/modeling_pi0.py b/src/lerobot/policies/pi0/modeling_pi0.py
index b3a877b7b..510af0796 100644
--- a/src/lerobot/policies/pi0/modeling_pi0.py
+++ b/src/lerobot/policies/pi0/modeling_pi0.py
@@ -444,13 +444,13 @@ class PaliGemmaWithExpertModel(
         if image.dtype != torch.float32:
             image = image.to(torch.float32)
         image_outputs = self.paligemma.model.get_image_features(image)
-        features = image_outputs.pooler_output * self.paligemma.config.text_config.hidden_size**0.5
+        features = image_outputs.pooler_output
         if features.dtype != out_dtype:
             features = features.to(out_dtype)
         return features
 
     def embed_language_tokens(self, tokens: torch.Tensor):
-        return self.paligemma.model.language_model.embed_tokens(tokens)
+        return self.paligemma.model.language_model.get_input_embeddings()(tokens)
 
     def forward(
         self,
@@ -666,8 +666,7 @@ class PI0Pytorch(nn.Module):  # see openpi `PI0Pytorch`
         # Process language tokens
         def lang_embed_func(lang_tokens):
             lang_emb = self.paligemma_with_expert.embed_language_tokens(lang_tokens)
-            lang_emb_dim = lang_emb.shape[-1]
-            return lang_emb * math.sqrt(lang_emb_dim)
+            return lang_emb
 
         lang_emb = self._apply_checkpoint(lang_embed_func, lang_tokens)
         embs.append(lang_emb)
diff --git a/src/lerobot/policies/pi0_fast/modeling_pi0_fast.py b/src/lerobot/policies/pi0_fast/modeling_pi0_fast.py
index 0bc301609..d9342eb24 100644
--- a/src/lerobot/policies/pi0_fast/modeling_pi0_fast.py
+++ b/src/lerobot/policies/pi0_fast/modeling_pi0_fast.py
@@ -16,7 +16,6 @@
 
 import builtins
 import logging
-import math
 from collections import deque
 from pathlib import Path
 from typing import TYPE_CHECKING, Literal, TypedDict, Unpack
@@ -261,13 +260,15 @@ class PI0FastPaliGemma(nn.Module):
         if image.dtype != torch.float32:
             image = image.to(torch.float32)
         image_outputs = self.paligemma.model.get_image_features(image)
-        features = image_outputs.pooler_output * self.paligemma.config.text_config.hidden_size**0.5
+        features = image_outputs.pooler_output
+        norm = 2048**0.5
+        features = features / norm * norm
         if features.dtype != out_dtype:
             features = features.to(out_dtype)
         return features
 
     def embed_language_tokens(self, tokens: torch.Tensor):
-        return self.paligemma.model.language_model.embed_tokens(tokens)
+        return self.paligemma.model.language_model.get_input_embeddings()(tokens)
 
     def forward(
         self,
@@ -417,8 +418,7 @@ class PI0FastPytorch(nn.Module):  # see openpi `PI0Pytorch`
         # Process language instruction tokens
         def lang_embed_func(tokens):
             lang_emb = self.paligemma_with_expert.embed_language_tokens(tokens)
-            lang_emb_dim = lang_emb.shape[-1]
-            return lang_emb * math.sqrt(lang_emb_dim)
+            return lang_emb
 
         lang_emb = self._apply_checkpoint(lang_embed_func, tokens)
         embs.append(lang_emb)
@@ -432,8 +432,7 @@ class PI0FastPytorch(nn.Module):  # see openpi `PI0Pytorch`
 
             def fast_action_embed_func(fast_action_tokens):
                 fast_emb = self.paligemma_with_expert.embed_language_tokens(fast_action_tokens)
-                fast_emb_dim = fast_emb.shape[-1]
-                return fast_emb * math.sqrt(fast_emb_dim)
+                return fast_emb
 
             fast_action_emb = self._apply_checkpoint(fast_action_embed_func, fast_action_tokens)
             embs.append(fast_action_emb)
@@ -666,7 +665,6 @@ class PI0FastPytorch(nn.Module):  # see openpi `PI0Pytorch`
             if t < max_decoding_steps - 1:
                 # embed the newly generated token
                 next_token_emb = self.paligemma_with_expert.embed_language_tokens(next_token)
-                next_token_emb = next_token_emb * math.sqrt(next_token_emb.shape[-1])
                 if prefix_embs.dtype == torch.bfloat16:
                     next_token_emb = next_token_emb.to(dtype=torch.bfloat16)
 
@@ -771,7 +769,6 @@ class PI0FastPytorch(nn.Module):  # see openpi `PI0Pytorch`
             # Embed the single previous token
             # We use embed_language_tokens directly to avoid overhead of full prefix embedding
             next_token_emb = self.paligemma_with_expert.embed_language_tokens(next_token)
-            next_token_emb = next_token_emb * math.sqrt(next_token_emb.shape[-1])
             if prefix_embs.dtype == torch.bfloat16:
                 next_token_emb = next_token_emb.to(dtype=torch.bfloat16)
 
diff --git a/src/lerobot/policies/vqbet/configuration_vqbet.py b/src/lerobot/policies/vqbet/configuration_vqbet.py
index d02745321..e5c1754e8 100644
--- a/src/lerobot/policies/vqbet/configuration_vqbet.py
+++ b/src/lerobot/policies/vqbet/configuration_vqbet.py
@@ -97,8 +97,8 @@ class VQBeTConfig(PreTrainedConfig):
     vision_backbone: str = "resnet18"
     crop_shape: tuple[int, int] | None = (84, 84)
     crop_is_random: bool = True
-    pretrained_backbone_weights: str | None = None
-    use_group_norm: bool = True
+    pretrained_backbone_weights: str | None = "ResNet18_Weights.IMAGENET1K_V1"
+    use_group_norm: bool = False
     spatial_softmax_num_keypoints: int = 32
     # VQ-VAE
     n_vqvae_training_steps: int = 20000
diff --git a/src/lerobot/policies/wall_x/qwen_model/qwen2_5_vl_moe.py b/src/lerobot/policies/wall_x/qwen_model/qwen2_5_vl_moe.py
index ecf3eb371..a80096514 100644
--- a/src/lerobot/policies/wall_x/qwen_model/qwen2_5_vl_moe.py
+++ b/src/lerobot/policies/wall_x/qwen_model/qwen2_5_vl_moe.py
@@ -22,7 +22,7 @@ from transformers.utils import (
     add_start_docstrings,
     add_start_docstrings_to_model_forward,
     is_flash_attn_2_available,
-    is_flash_attn_greater_or_equal_2_10,
+    is_flash_attn_greater_or_equal,
     is_torchdynamo_compiling,
     logging,
     replace_return_docstrings,
@@ -890,7 +890,7 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
         # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
         # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignment, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
         # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
-        self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10()
+        self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal("2.1.0")
 
     def forward(
         self,
diff --git a/src/lerobot/policies/xvla/modeling_florence2.py b/src/lerobot/policies/xvla/modeling_florence2.py
index e33efe5c3..81f9c8234 100644
--- a/src/lerobot/policies/xvla/modeling_florence2.py
+++ b/src/lerobot/policies/xvla/modeling_florence2.py
@@ -45,7 +45,7 @@ from transformers.utils import (
     add_start_docstrings,
     add_start_docstrings_to_model_forward,
     is_flash_attn_2_available,
-    is_flash_attn_greater_or_equal_2_10,
+    is_flash_attn_greater_or_equal,
     logging,
     replace_return_docstrings,
 )
@@ -909,7 +909,7 @@ class Florence2FlashAttention2(Florence2Attention):
         # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
         # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignment, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
         # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
-        self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10()
+        self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal("2.1.0")
 
     def _reshape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
         return tensor.view(bsz, seq_len, self.num_heads, self.head_dim)
diff --git a/src/lerobot/robots/bi_openarm_follower/bi_openarm_follower.py b/src/lerobot/robots/bi_openarm_follower/bi_openarm_follower.py
index c27398278..b6f446d9c 100644
--- a/src/lerobot/robots/bi_openarm_follower/bi_openarm_follower.py
+++ b/src/lerobot/robots/bi_openarm_follower/bi_openarm_follower.py
@@ -54,6 +54,7 @@ class BiOpenArmFollower(Robot):
             calibration_dir=config.calibration_dir,
             port=config.left_arm_config.port,
             disable_torque_on_disconnect=config.left_arm_config.disable_torque_on_disconnect,
+            use_velocity_and_torque=config.left_arm_config.use_velocity_and_torque,
             max_relative_target=config.left_arm_config.max_relative_target,
             cameras=left_cameras,
             side=config.left_arm_config.side,
@@ -72,6 +73,7 @@ class BiOpenArmFollower(Robot):
             calibration_dir=config.calibration_dir,
             port=config.right_arm_config.port,
             disable_torque_on_disconnect=config.right_arm_config.disable_torque_on_disconnect,
+            use_velocity_and_torque=config.right_arm_config.use_velocity_and_torque,
             max_relative_target=config.right_arm_config.max_relative_target,
             cameras=right_cameras,
             side=config.right_arm_config.side,
diff --git a/src/lerobot/robots/openarm_follower/config_openarm_follower.py b/src/lerobot/robots/openarm_follower/config_openarm_follower.py
index 88d81fd50..096ec320f 100644
--- a/src/lerobot/robots/openarm_follower/config_openarm_follower.py
+++ b/src/lerobot/robots/openarm_follower/config_openarm_follower.py
@@ -66,6 +66,10 @@ class OpenArmFollowerConfigBase:
     # Whether to disable torque when disconnecting
     disable_torque_on_disconnect: bool = True
 
+    # When True, expose `.vel` and `.torque` per motor in observation features.
+    # Default False for compatibility with the position-only openarm_mini teleoperator.
+    use_velocity_and_torque: bool = False
+
     # Safety limit for relative target positions
     # Set to a positive scalar for all motors, or a dict mapping motor names to limits
     max_relative_target: float | dict[str, float] | None = None
diff --git a/src/lerobot/robots/openarm_follower/openarm_follower.py b/src/lerobot/robots/openarm_follower/openarm_follower.py
index 4d1765f07..020f24052 100644
--- a/src/lerobot/robots/openarm_follower/openarm_follower.py
+++ b/src/lerobot/robots/openarm_follower/openarm_follower.py
@@ -93,8 +93,9 @@ class OpenArmFollower(Robot):
         features: dict[str, type] = {}
         for motor in self.bus.motors:
             features[f"{motor}.pos"] = float
-            features[f"{motor}.vel"] = float  # Add this
-            features[f"{motor}.torque"] = float  # Add this
+            if self.config.use_velocity_and_torque:
+                features[f"{motor}.vel"] = float
+                features[f"{motor}.torque"] = float
         return features
 
     @property
@@ -235,8 +236,9 @@ class OpenArmFollower(Robot):
         for motor in self.bus.motors:
             state = states.get(motor, {})
             obs_dict[f"{motor}.pos"] = state.get("position", 0.0)
-            obs_dict[f"{motor}.vel"] = state.get("velocity", 0.0)
-            obs_dict[f"{motor}.torque"] = state.get("torque", 0.0)
+            if self.config.use_velocity_and_torque:
+                obs_dict[f"{motor}.vel"] = state.get("velocity", 0.0)
+                obs_dict[f"{motor}.torque"] = state.get("torque", 0.0)
 
         # Capture images from cameras
         for cam_key, cam in self.cameras.items():
diff --git a/src/lerobot/rollout/strategies/dagger.py b/src/lerobot/rollout/strategies/dagger.py
index da4b463fc..1bd6d9bb0 100644
--- a/src/lerobot/rollout/strategies/dagger.py
+++ b/src/lerobot/rollout/strategies/dagger.py
@@ -33,12 +33,13 @@ Recording modes:
     ``record_autonomous=False``: Only correction windows are recorded.
         Each correction (start to stop) becomes one episode.
 
-Teleoperator expectations:
-    The user is responsible for keeping the leader arm aligned with the
-    follower arm at the moment a correction begins.  Programmatic motor
-    handover (``enable_torque`` / ``disable_torque`` / ``write_goal_positions``)
-    is intentionally not invoked here — see the TODO in
-    :func:`DAggerStrategy._apply_transition` for the open design decision.
+Teleoperator handover:
+    On AUTONOMOUS → PAUSED, actuated teleops (those with non-empty
+    ``feedback_features``, e.g. SO-101, OpenArmMini) are smoothly driven to
+    the follower's last position via ``send_feedback`` so the operator takes
+    over without a jerk.  Non-actuated teleops cannot be driven,
+    so on PAUSED → CORRECTING the follower is instead slid to the teleop's
+    current pose before the correction begins.
 """
 
 from __future__ import annotations
@@ -175,17 +176,27 @@ class DAggerEvents:
 # ---------------------------------------------------------------------------
 
 
-# TODO(Steven): re-enable programmatic teleop alignment once we decide whether
-# to enforce motor-control methods on every Teleoperator.  Until then the user
-# is responsible for moving the leader arm to the follower's pose at the moment
-# a correction begins.
-def _teleop_smooth_move_to(
-    teleop: Teleoperator, target_pos: dict, duration_s: float = 2.0, fps: int = 50
-) -> None:
-    """Smoothly move teleop to target position via linear interpolation.
+def _teleop_supports_feedback(teleop: Teleoperator) -> bool:
+    """Return True when the teleop can receive position feedback (is actuated).
+    TODO(Maxime): See if it is possible to unify this interface across teleops instead of duck-typing.
+    """
+    return (
+        bool(teleop.feedback_features)
+        and hasattr(teleop, "disable_torque")
+        and hasattr(teleop, "enable_torque")
+    )
 
-    Requires the teleoperator to support motor control methods
-    (``enable_torque``, ``write_goal_positions``, ``get_action``).
+
+def _teleop_smooth_move_to(
+    teleop: Teleoperator, target_pos: dict, duration_s: float = 2.0, fps: int = 30
+) -> None:
+    """Smoothly move an actuated teleop to ``target_pos`` via linear interpolation.
+
+    Requires the teleoperator to support feedback
+    (i.e. have non-empty ``feedback_features`` and implement ``disable_torque`` / ``enable_torque``).
+
+    TODO(Maxime): This blocks up to ``duration_s`` seconds, during this time
+    the follower robot doesn't receive new actions, this could be an issue on LeKiwi.
     """
     teleop.enable_torque()
     current = teleop.get_action()
@@ -193,13 +204,28 @@ def _teleop_smooth_move_to(
 
     for step in range(steps + 1):
         t = step / steps
-        interp = {}
-        for k in current:
-            if k in target_pos:
-                interp[k] = current[k] * (1 - t) + target_pos[k] * t
-            else:
-                interp[k] = current[k]
-        teleop.write_goal_positions(interp)
+        interp = {
+            k: current[k] * (1 - t) + target_pos[k] * t if k in target_pos else current[k] for k in current
+        }
+        teleop.send_feedback(interp)
+        time.sleep(1 / fps)
+
+
+def _follower_smooth_move_to(
+    robot: ThreadSafeRobot, current: dict, target: dict, duration_s: float = 1.0, fps: int = 30
+) -> None:
+    """Smoothly move the follower robot from ``current`` to ``target`` action.
+
+    Used when the teleop is non-actuated: instead of driving the leader arm
+    to the follower, we bring the follower to the teleop's current pose.
+    Both ``current`` and ``target`` must be in robot-action key space.
+    """
+    steps = max(int(duration_s * fps), 1)
+
+    for step in range(steps + 1):
+        t = step / steps
+        interp = {k: current[k] * (1 - t) + target[k] * t if k in target else current[k] for k in current}
+        robot.send_action(interp)
         time.sleep(1 / fps)
 
 
@@ -415,9 +441,6 @@ class DAggerStrategy(RolloutStrategy):
         engine.reset()
         interpolator.reset()
         events.reset()
-        # TODO(Steven): re-enable once Teleoperator motor-control methods are
-        # standardised; until then the user pre-aligns the leader by hand.
-        # teleop.disable_torque()
         engine.resume()
 
         last_action: dict[str, Any] | None = None
@@ -441,8 +464,16 @@ class DAggerStrategy(RolloutStrategy):
                     transition = events.consume_transition()
                     if transition is not None:
                         old_phase, new_phase = transition
-                        self._apply_transition(old_phase, new_phase, engine, interpolator, robot, teleop)
-                        last_action = None
+                        self._apply_transition(
+                            old_phase,
+                            new_phase,
+                            engine,
+                            interpolator,
+                            ctx,
+                            last_action,
+                        )
+                        if new_phase == DAggerPhase.AUTONOMOUS:
+                            last_action = None
 
                     phase = events.phase
                     obs = robot.get_observation()
@@ -532,9 +563,6 @@ class DAggerStrategy(RolloutStrategy):
             finally:
                 logger.info("DAgger continuous control loop ended — pausing engine")
                 engine.pause()
-                # TODO(Steven): re-enable once Teleoperator motor-control methods
-                # are standardised across all teleop implementations.
-                # teleop.disable_torque()
                 with contextlib.suppress(Exception):
                     with self._episode_lock:
                         dataset.save_episode()
@@ -570,9 +598,6 @@ class DAggerStrategy(RolloutStrategy):
         engine.reset()
         interpolator.reset()
         events.reset()
-        # TODO(Steven): re-enable once Teleoperator motor-control methods are
-        # standardised; until then the user pre-aligns the leader by hand.
-        # teleop.disable_torque()
         engine.resume()
 
         last_action: dict[str, Any] | None = None
@@ -600,8 +625,16 @@ class DAggerStrategy(RolloutStrategy):
                     transition = events.consume_transition()
                     if transition is not None:
                         old_phase, new_phase = transition
-                        self._apply_transition(old_phase, new_phase, engine, interpolator, robot, teleop)
-                        last_action = None
+                        self._apply_transition(
+                            old_phase,
+                            new_phase,
+                            engine,
+                            interpolator,
+                            ctx,
+                            last_action,
+                        )
+                        if new_phase == DAggerPhase.AUTONOMOUS:
+                            last_action = None
 
                         # Correction ended -> save episode (blocking if not streaming)
                         if old_phase == DAggerPhase.CORRECTING and new_phase == DAggerPhase.PAUSED:
@@ -679,9 +712,6 @@ class DAggerStrategy(RolloutStrategy):
             finally:
                 logger.info("DAgger corrections-only loop ended — pausing engine")
                 engine.pause()
-                # TODO(Steven): re-enable once Teleoperator motor-control methods
-                # are standardised across all teleop implementations.
-                # teleop.disable_torque()
                 with contextlib.suppress(Exception):
                     with self._episode_lock:
                         dataset.save_episode()
@@ -698,36 +728,71 @@ class DAggerStrategy(RolloutStrategy):
         new_phase: DAggerPhase,
         engine,
         interpolator,
-        robot: ThreadSafeRobot,
-        teleop: Teleoperator,
+        ctx: RolloutContext,
+        prev_action: dict | None,
     ) -> None:
-        """Execute side-effects for a validated phase transition."""
+        """Execute side-effects for a validated phase transition, including smooth handovers.
+
+        AUTONOMOUS -> PAUSED (actuated teleop):
+            Pause the engine, then drive the leader arm to the follower's last
+            commanded position so the operator takes over without a jerk.
+
+        PAUSED -> CORRECTING (non-actuated teleop):
+            Slide the follower to the teleop's current pose so the robot meets
+            the operator's hand rather than jumping to it on the first frame.
+
+        CORRECTING -> PAUSED (actuated teleop):
+            Re-enable torque to hold position after correction.
+            This will be potentially useful if cancelling the correction recording
+
+        PAUSED -> AUTONOMOUS:
+            Reset and resume the inference engine.
+        """
+        teleop = ctx.hardware.teleop
+        robot = ctx.hardware.robot_wrapper
+
         logger.info("Phase transition: %s -> %s", old_phase.value, new_phase.value)
         if old_phase == DAggerPhase.AUTONOMOUS and new_phase == DAggerPhase.PAUSED:
-            logger.info("Pausing engine — robot holds position")
+            logger.info("Pausing engine - robot holds position")
             engine.pause()
-            obs = robot.get_observation()
-            _robot_pos = {
-                k: v for k, v in obs.items() if k.endswith(".pos") and k in robot.observation_features
-            }
-            # TODO(Steven): once Teleoperator motor-control methods are
-            # standardised, drive the leader to the follower's pose here so the
-            # operator does not need to pre-align the arm by hand.  Until then
-            # the user is responsible for the alignment.
-            # _teleop_smooth_move_to(teleop, _robot_pos, duration_s=2.0, fps=50)
 
-        elif new_phase == DAggerPhase.CORRECTING:
-            logger.info("Entering correction mode — human teleop control")
-            # TODO(Steven): re-enable once Teleoperator motor-control methods
-            # are standardised across all teleop implementations.
-            # teleop.disable_torque()
+            if _teleop_supports_feedback(teleop) and prev_action is not None:
+                # TODO(Maxime): prev_action is in robot action key space (output of robot_action_processor).
+                # send_feedback expects teleop feedback key space. For homogeneous setups (e.g. SO-101
+                # leader + SO-101 follower) the keys are identical so this works. If the processor pipeline
+                # does non-trivial key renaming (e.g. a rename_map on action keys), the interpolation in
+                # _teleop_smooth_move_to silently no-ops and the arm doesn't move.
+                logger.info("Smooth handover: moving leader arm to follower position")
+                _teleop_smooth_move_to(teleop, prev_action)
+
+        elif old_phase == DAggerPhase.PAUSED and new_phase == DAggerPhase.CORRECTING:
+            logger.info("Entering correction mode - human teleop control")
+            if not _teleop_supports_feedback(teleop) and prev_action is not None:
+                logger.info("Smooth handover: sliding follower to teleop position")
+                obs = robot.get_observation()
+                teleop_action = teleop.get_action()
+                processed = ctx.processors.teleop_action_processor((teleop_action, obs))
+                target = ctx.processors.robot_action_processor((processed, obs))
+                _follower_smooth_move_to(robot, prev_action, target)
+
+            # unlock the teleop for human control
+            if _teleop_supports_feedback(teleop):
+                teleop.disable_torque()
+
+        elif old_phase == DAggerPhase.CORRECTING and new_phase == DAggerPhase.PAUSED:
+            if _teleop_supports_feedback(teleop):
+                teleop.enable_torque()
 
         elif new_phase == DAggerPhase.AUTONOMOUS:
-            logger.info("Resuming autonomous mode — resetting engine and interpolator")
+            logger.info("Resuming autonomous mode - resetting engine and interpolator")
             interpolator.reset()
             engine.reset()
             engine.resume()
 
+            # release teleop before resuming the policy
+            if _teleop_supports_feedback(teleop):
+                teleop.disable_torque()
+
     # ------------------------------------------------------------------
     # Background push (shared by both modes)
     # ------------------------------------------------------------------
diff --git a/src/lerobot/teleoperators/bi_openarm_leader/bi_openarm_leader.py b/src/lerobot/teleoperators/bi_openarm_leader/bi_openarm_leader.py
index 624729c02..2d2c23f9c 100644
--- a/src/lerobot/teleoperators/bi_openarm_leader/bi_openarm_leader.py
+++ b/src/lerobot/teleoperators/bi_openarm_leader/bi_openarm_leader.py
@@ -49,6 +49,7 @@ class BiOpenArmLeader(Teleoperator):
             can_data_bitrate=config.left_arm_config.can_data_bitrate,
             motor_config=config.left_arm_config.motor_config,
             manual_control=config.left_arm_config.manual_control,
+            use_velocity_and_torque=config.left_arm_config.use_velocity_and_torque,
             position_kd=config.left_arm_config.position_kd,
             position_kp=config.left_arm_config.position_kp,
         )
@@ -63,6 +64,7 @@ class BiOpenArmLeader(Teleoperator):
             can_data_bitrate=config.right_arm_config.can_data_bitrate,
             motor_config=config.right_arm_config.motor_config,
             manual_control=config.right_arm_config.manual_control,
+            use_velocity_and_torque=config.right_arm_config.use_velocity_and_torque,
             position_kd=config.right_arm_config.position_kd,
             position_kp=config.right_arm_config.position_kp,
         )
diff --git a/src/lerobot/teleoperators/openarm_leader/config_openarm_leader.py b/src/lerobot/teleoperators/openarm_leader/config_openarm_leader.py
index 4b12fe730..8e3d480f6 100644
--- a/src/lerobot/teleoperators/openarm_leader/config_openarm_leader.py
+++ b/src/lerobot/teleoperators/openarm_leader/config_openarm_leader.py
@@ -60,6 +60,10 @@ class OpenArmLeaderConfigBase:
     # When enabled, motors have torque disabled for manual movement
     manual_control: bool = True
 
+    # When True, expose `.vel` and `.torque` per motor in action features.
+    # Default False for compatibility with the position-only openarm_mini teleoperator.
+    use_velocity_and_torque: bool = False
+
     # TODO(Steven, Pepijn): Not used ... ?
     # MIT control parameters (used when manual_control=False for torque control)
     # List of 8 values: [joint_1, joint_2, joint_3, joint_4, joint_5, joint_6, joint_7, gripper]
diff --git a/src/lerobot/teleoperators/openarm_leader/openarm_leader.py b/src/lerobot/teleoperators/openarm_leader/openarm_leader.py
index 65da7416a..a82ad26af 100644
--- a/src/lerobot/teleoperators/openarm_leader/openarm_leader.py
+++ b/src/lerobot/teleoperators/openarm_leader/openarm_leader.py
@@ -70,8 +70,9 @@ class OpenArmLeader(Teleoperator):
         features: dict[str, type] = {}
         for motor in self.bus.motors:
             features[f"{motor}.pos"] = float
-            features[f"{motor}.vel"] = float
-            features[f"{motor}.torque"] = float
+            if self.config.use_velocity_and_torque:
+                features[f"{motor}.vel"] = float
+                features[f"{motor}.torque"] = float
         return features
 
     @property
@@ -201,8 +202,9 @@ class OpenArmLeader(Teleoperator):
         for motor in self.bus.motors:
             state = states.get(motor, {})
             action_dict[f"{motor}.pos"] = state.get("position")
-            action_dict[f"{motor}.vel"] = state.get("velocity")
-            action_dict[f"{motor}.torque"] = state.get("torque")
+            if self.config.use_velocity_and_torque:
+                action_dict[f"{motor}.vel"] = state.get("velocity")
+                action_dict[f"{motor}.torque"] = state.get("torque")
 
         dt_ms = (time.perf_counter() - start) * 1e3
         logger.debug(f"{self} read state: {dt_ms:.1f}ms")
diff --git a/src/lerobot/teleoperators/openarm_mini/openarm_mini.py b/src/lerobot/teleoperators/openarm_mini/openarm_mini.py
index edc28e7a6..2df1dfcfe 100644
--- a/src/lerobot/teleoperators/openarm_mini/openarm_mini.py
+++ b/src/lerobot/teleoperators/openarm_mini/openarm_mini.py
@@ -112,7 +112,7 @@ class OpenArmMini(Teleoperator):
 
     @property
     def feedback_features(self) -> dict[str, type]:
-        return {}
+        return self.action_features
 
     @property
     def is_connected(self) -> bool:
@@ -348,8 +348,9 @@ class OpenArmMini(Teleoperator):
         if left_goals:
             self.bus_left.sync_write("Goal_Position", left_goals)
 
+    @check_if_not_connected
     def send_feedback(self, feedback: dict[str, float]) -> None:
-        raise NotImplementedError("Feedback is not yet implemented for OpenArm Mini.")
+        self.write_goal_positions(feedback)
 
     @check_if_not_connected
     def disconnect(self) -> None:
diff --git a/src/lerobot/teleoperators/so_leader/so_leader.py b/src/lerobot/teleoperators/so_leader/so_leader.py
index 04ce0f21f..7e731d5ed 100644
--- a/src/lerobot/teleoperators/so_leader/so_leader.py
+++ b/src/lerobot/teleoperators/so_leader/so_leader.py
@@ -59,7 +59,7 @@ class SOLeader(Teleoperator):
 
     @property
     def feedback_features(self) -> dict[str, type]:
-        return {}
+        return self.action_features
 
     @property
     def is_connected(self) -> bool:
@@ -130,6 +130,12 @@ class SOLeader(Teleoperator):
         for motor in self.bus.motors:
             self.bus.write("Operating_Mode", motor, OperatingMode.POSITION.value)
 
+    def enable_torque(self) -> None:
+        self.bus.enable_torque()
+
+    def disable_torque(self) -> None:
+        self.bus.disable_torque()
+
     def setup_motors(self) -> None:
         for motor in reversed(self.bus.motors):
             input(f"Connect the controller board to the '{motor}' motor only and press enter.")
@@ -145,9 +151,11 @@ class SOLeader(Teleoperator):
         logger.debug(f"{self} read action: {dt_ms:.1f}ms")
         return action
 
+    @check_if_not_connected
     def send_feedback(self, feedback: dict[str, float]) -> None:
-        # TODO: Implement force feedback
-        raise NotImplementedError
+        goals = {k.removesuffix(".pos"): v for k, v in feedback.items() if k.endswith(".pos")}
+        if goals:
+            self.bus.sync_write("Goal_Position", goals)
 
     @check_if_not_connected
     def disconnect(self) -> None:
diff --git a/tests/artifacts/policies/pusht_diffusion_/actions.safetensors b/tests/artifacts/policies/pusht_diffusion_/actions.safetensors
index 70b1411ab..65b8d5ca5 100644
--- a/tests/artifacts/policies/pusht_diffusion_/actions.safetensors
+++ b/tests/artifacts/policies/pusht_diffusion_/actions.safetensors
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:54aecbc1af72a4cd5e9261492f5e7601890517516257aacdf2a0ffb3ce281f1b
+oid sha256:51effd76b73e972f10d31f5084ab906386134b600c87b2668767d30232a902bd
 size 992
diff --git a/tests/artifacts/policies/pusht_diffusion_/grad_stats.safetensors b/tests/artifacts/policies/pusht_diffusion_/grad_stats.safetensors
index bea7d4f19..4a5593a46 100644
--- a/tests/artifacts/policies/pusht_diffusion_/grad_stats.safetensors
+++ b/tests/artifacts/policies/pusht_diffusion_/grad_stats.safetensors
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:88a9c3775a2aa1e90a08850521970070a4fcf0f6b82aab43cd8ccc5cf77e0013
-size 47424
+oid sha256:d4d7a16ca67f9adefac0e0620a7b2e9c822f2db42faaaced7a89fbad60e5ead4
+size 47680
diff --git a/tests/artifacts/policies/pusht_diffusion_/output_dict.safetensors b/tests/artifacts/policies/pusht_diffusion_/output_dict.safetensors
index 20cc4f547..f47997b8f 100644
--- a/tests/artifacts/policies/pusht_diffusion_/output_dict.safetensors
+++ b/tests/artifacts/policies/pusht_diffusion_/output_dict.safetensors
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91a2635e05a75fe187a5081504c5f35ce3417378813fa2deaf9ca4e8200e1819
+oid sha256:796c439ee8a64bf9901ff8325e7419bda8bd316360ee95e6304e8e1ae0f4c36c
 size 68
diff --git a/tests/artifacts/policies/pusht_diffusion_/param_stats.safetensors b/tests/artifacts/policies/pusht_diffusion_/param_stats.safetensors
index 365a453dd..104a05f96 100644
--- a/tests/artifacts/policies/pusht_diffusion_/param_stats.safetensors
+++ b/tests/artifacts/policies/pusht_diffusion_/param_stats.safetensors
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:645bff922ac7bea63ad018ebf77c303c0e4cd2c1c0dc5ef3192865281bef3dc6
-size 47424
+oid sha256:ad33a8b47c39c2e1374567ff9da43cdb95e2dbe904c1b02a35051346d3043095
+size 47680
diff --git a/tests/policies/eo1/test_eo1.py b/tests/policies/eo1/test_eo1.py
new file mode 100644
index 000000000..72c78430c
--- /dev/null
+++ b/tests/policies/eo1/test_eo1.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python
+
+# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Smoke tests for EO1's public LeRobot policy interface."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+import torch
+from torch import nn
+
+pytest.importorskip("transformers")
+
+from lerobot.configs.types import FeatureType, PolicyFeature
+from lerobot.policies.eo1.modeling_eo1 import EO1Policy
+from lerobot.utils.constants import ACTION, OBS_STATE
+
+HIDDEN_SIZE = 8
+STATE_DIM = 4
+ACTION_DIM = 3
+CHUNK_SIZE = 3
+N_ACTION_STEPS = 2
+MAX_ACTION_DIM = 6
+STATE_TOKEN_ID = 5
+ACTION_TOKEN_ID = 6
+
+
+class DummyVLMBackbone(nn.Module):
+    def __init__(self, hidden_size: int, vocab_size: int = 64):
+        super().__init__()
+        self.embedding = nn.Embedding(vocab_size, hidden_size)
+        self.config = SimpleNamespace(text_config=SimpleNamespace(hidden_size=hidden_size))
+
+    @property
+    def model(self):
+        return self
+
+    def get_input_embeddings(self):
+        return self.embedding
+
+    def get_rope_index(
+        self,
+        input_ids: torch.Tensor,
+        image_grid_thw: torch.Tensor | None = None,
+        attention_mask: torch.Tensor | None = None,
+        mm_token_type_ids: torch.Tensor | None = None,
+    ):
+        batch_size, seq_len = input_ids.shape
+        if attention_mask is None:
+            text_positions = torch.arange(seq_len, device=input_ids.device).expand(batch_size, -1)
+        else:
+            text_positions = attention_mask.long().cumsum(-1) - 1
+            text_positions = text_positions.masked_fill(attention_mask == 0, 0)
+        position_ids = text_positions.view(1, batch_size, seq_len).expand(3, batch_size, seq_len)
+        rope_deltas = torch.zeros(batch_size, 1, dtype=torch.long, device=input_ids.device)
+        return position_ids, rope_deltas
+
+    def gradient_checkpointing_enable(self, gradient_checkpointing_kwargs=None):
+        return gradient_checkpointing_kwargs
+
+    def gradient_checkpointing_disable(self):
+        return None
+
+    def forward(
+        self,
+        *,
+        input_ids: torch.Tensor | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        **kwargs,
+    ):
+        if inputs_embeds is None:
+            inputs_embeds = self.embedding(input_ids)
+        return SimpleNamespace(
+            last_hidden_state=inputs_embeds,
+            past_key_values=SimpleNamespace(crop=lambda prefix_len: None),
+        )
+
+
+def make_eo1_config():
+    from lerobot.policies.eo1.configuration_eo1 import EO1Config
+
+    return EO1Config(
+        device="cpu",
+        dtype="float32",
+        vlm_base="dummy-qwen",
+        vlm_config={},
+        chunk_size=CHUNK_SIZE,
+        n_action_steps=N_ACTION_STEPS,
+        max_state_dim=STATE_DIM,
+        max_action_dim=MAX_ACTION_DIM,
+        num_denoise_steps=2,
+        input_features={
+            OBS_STATE: PolicyFeature(type=FeatureType.STATE, shape=(STATE_DIM,)),
+            "observation.images.image": PolicyFeature(type=FeatureType.VISUAL, shape=(3, 16, 16)),
+        },
+        output_features={
+            ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(ACTION_DIM,)),
+        },
+    )
+
+
+def make_policy_batch(include_action: bool) -> dict[str, torch.Tensor | int]:
+    batch_size = 1
+    seq_len = CHUNK_SIZE + 4
+    input_ids = torch.tensor(
+        [[11, STATE_TOKEN_ID, 12, ACTION_TOKEN_ID, ACTION_TOKEN_ID, ACTION_TOKEN_ID, 13]],
+        dtype=torch.long,
+    )
+    assert input_ids.shape == (batch_size, seq_len)
+
+    batch: dict[str, torch.Tensor | int] = {
+        OBS_STATE: torch.randn(batch_size, STATE_DIM, dtype=torch.float32),
+        "input_ids": input_ids,
+        "attention_mask": torch.ones(batch_size, seq_len, dtype=torch.long),
+        "pixel_values": torch.zeros(batch_size, 3, 4, 4, dtype=torch.float32),
+        "image_grid_thw": torch.tensor([[1, 2, 2]], dtype=torch.long),
+        "mm_token_type_ids": torch.zeros(batch_size, seq_len, dtype=torch.int32),
+        "state_token_id": STATE_TOKEN_ID,
+        "action_token_id": ACTION_TOKEN_ID,
+    }
+    if include_action:
+        batch[ACTION] = torch.randn(batch_size, CHUNK_SIZE, ACTION_DIM, dtype=torch.float32)
+    return batch
+
+
+def test_lerobot_eo1_forward_pass(monkeypatch):
+    monkeypatch.setattr(
+        "lerobot.policies.eo1.modeling_eo1.Qwen2_5_VLForConditionalGeneration.from_pretrained",
+        lambda *args, **kwargs: DummyVLMBackbone(HIDDEN_SIZE),
+    )
+    policy = EO1Policy(make_eo1_config())
+
+    loss, metrics = policy.forward(make_policy_batch(include_action=True))
+
+    assert loss.ndim == 0
+    assert torch.isfinite(loss)
+    assert metrics["loss"] == pytest.approx(loss.item())
+
+
+def test_lerobot_eo1_inference(monkeypatch):
+    monkeypatch.setattr(
+        "lerobot.policies.eo1.modeling_eo1.Qwen2_5_VLForConditionalGeneration.from_pretrained",
+        lambda *args, **kwargs: DummyVLMBackbone(HIDDEN_SIZE),
+    )
+    policy = EO1Policy(make_eo1_config())
+
+    sample_calls = {"count": 0}
+    fixed_chunk = torch.tensor(
+        [
+            [
+                [0.1, 0.2, 0.3, 9.0, 9.0, 9.0],
+                [1.1, 1.2, 1.3, 9.0, 9.0, 9.0],
+                [2.1, 2.2, 2.3, 9.0, 9.0, 9.0],
+            ]
+        ],
+        dtype=torch.float32,
+    )
+
+    def fake_sample_actions(**kwargs):
+        sample_calls["count"] += 1
+        return fixed_chunk
+
+    monkeypatch.setattr(policy.model, "sample_actions", fake_sample_actions)
+
+    batch = make_policy_batch(include_action=False)
+    action_0 = policy.select_action(batch)
+    action_1 = policy.select_action(batch)
+
+    torch.testing.assert_close(action_0, fixed_chunk[:, 0, :ACTION_DIM])
+    torch.testing.assert_close(action_1, fixed_chunk[:, 1, :ACTION_DIM])
+    assert sample_calls["count"] == 1
diff --git a/uv.lock b/uv.lock
index 323b9bc9d..951163f55 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.12"
 resolution-markers = [
     "python_full_version >= '3.15' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l' and platform_machine != 's390x' and sys_platform == 'linux'",
@@ -972,10 +972,10 @@ wheels = [
 
 [[package]]
 name = "cuda-pathfinder"
-version = "1.5.3"
+version = "1.5.4"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d3/d6/ac63065d33dd700fee7ebd7d287332401b54e31b9346e142f871e1f0b116/cuda_pathfinder-1.5.3-py3-none-any.whl", hash = "sha256:dff021123aedbb4117cc7ec81717bbfe198fb4e8b5f1ee57e0e084fec5c8577d", size = 49991, upload-time = "2026-04-14T20:09:27.037Z" },
+    { url = "https://files.pythonhosted.org/packages/11/d0/c177e29701cf1d3008d7d2b16b5fc626592ce13bd535f8795c5f57187e0e/cuda_pathfinder-1.5.4-py3-none-any.whl", hash = "sha256:9563d3175ce1828531acf4b94e1c1c7d67208c347ca002493e2654878b26f4b7", size = 51657, upload-time = "2026-04-27T22:42:07.712Z" },
 ]
 
 [[package]]
@@ -989,7 +989,7 @@ wheels = [
 
 [[package]]
 name = "datasets"
-version = "4.8.4"
+version = "4.8.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "dill" },
@@ -1007,9 +1007,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "xxhash" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/22/22/73e46ac7a8c25e7ef0b3bd6f10da3465021d90219a32eb0b4d2afea4c56e/datasets-4.8.4.tar.gz", hash = "sha256:a1429ed853275ce7943a01c6d2e25475b4501eb758934362106a280470df3a52", size = 604382, upload-time = "2026-03-23T14:21:17.987Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/66/34/14cd8e76f907f7d4dca2334cfeec9f81d30fd15c25a015f99aaea694eaed/datasets-4.8.5.tar.gz", hash = "sha256:0f0c1c3d56ffff2c93b2f4c63c95bac94f3d7e8621aea2a2a576275233bba772", size = 605649, upload-time = "2026-04-27T15:43:57.384Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b0/e5/247d094108e42ac26363ab8dc57f168840cf7c05774b40ffeb0d78868fcc/datasets-4.8.4-py3-none-any.whl", hash = "sha256:cdc8bee4698e549d78bf1fed6aea2eebc760b22b084f07e6fc020c6577a6ce6d", size = 526991, upload-time = "2026-03-23T14:21:15.89Z" },
+    { url = "https://files.pythonhosted.org/packages/65/99/00f3196036501b53032c4b1ab8337a0b978dee832ed276dae3815df4e8b5/datasets-4.8.5-py3-none-any.whl", hash = "sha256:5079900781719c0e063a8efdd2cd95a31ad0c63209178669cd23cf1b926149ff", size = 528973, upload-time = "2026-04-27T15:43:53.702Z" },
 ]
 
 [[package]]
@@ -1563,14 +1563,14 @@ wheels = [
 
 [[package]]
 name = "gitpython"
-version = "3.1.47"
+version = "3.1.49"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "gitdb" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c1/bd/50db468e9b1310529a19fce651b3b0e753b5c07954d486cba31bbee9a5d5/gitpython-3.1.47.tar.gz", hash = "sha256:dba27f922bd2b42cb54c87a8ab3cb6beb6bf07f3d564e21ac848913a05a8a3cd", size = 216978, upload-time = "2026-04-22T02:44:44.059Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e1/63/210aaa302d6a0a78daa67c5c15bbac2cad361722841278b0209b6da20855/gitpython-3.1.49.tar.gz", hash = "sha256:42f9399c9eb33fc581014bedd76049dfbaf6375aa2a5754575966387280315e1", size = 219367, upload-time = "2026-04-29T00:31:20.478Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f2/c5/a1bc0996af85757903cf2bf444a7824e68e0035ce63fb41d6f76f9def68b/gitpython-3.1.47-py3-none-any.whl", hash = "sha256:489f590edfd6d20571b2c0e72c6a6ac6915ee8b8cd04572330e3842207a78905", size = 209547, upload-time = "2026-04-22T02:44:41.271Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/6f/b842bfa6f21d6f87c57f9abf7194225e55279d96d869775e19e9f7236fc5/gitpython-3.1.49-py3-none-any.whl", hash = "sha256:024b0422d7f84d15cd794844e029ffebd4c5d42a7eb9b936b458697ef550a02c", size = 212190, upload-time = "2026-04-29T00:31:18.412Z" },
 ]
 
 [[package]]
@@ -1930,7 +1930,7 @@ wheels = [
 
 [[package]]
 name = "huggingface-hub"
-version = "1.12.0"
+version = "1.13.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -1943,9 +1943,9 @@ dependencies = [
     { name = "typer" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/56/52/1b54cb569509c725a32c1315261ac9fd0e6b91bbbf74d86fca10d3376164/huggingface_hub-1.12.0.tar.gz", hash = "sha256:7c3fe85e24b652334e5d456d7a812cd9a071e75630fac4365d9165ab5e4a34b6", size = 763091, upload-time = "2026-04-24T13:32:08.674Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/89/ff/ec7ed2eb43bd7ce8bb2233d109cc235c3e807ffe5e469dc09db261fac05e/huggingface_hub-1.13.0.tar.gz", hash = "sha256:f6df2dac5abe82ce2fe05873d10d5ff47bc677d616a2f521f4ee26db9415d9d0", size = 781788, upload-time = "2026-04-30T11:57:33.858Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7e/2b/ef03ddb96bd1123503c2bd6932001020292deea649e9bf4caa2cb65a85bf/huggingface_hub-1.12.0-py3-none-any.whl", hash = "sha256:d74939969585ee35748bd66de09baf84099d461bda7287cd9043bfb99b0e424d", size = 646806, upload-time = "2026-04-24T13:32:06.717Z" },
+    { url = "https://files.pythonhosted.org/packages/93/db/4b1cdae9460ae1f3ca020cd767f013430ce23eb1d9c890ae3a0609b38d26/huggingface_hub-1.13.0-py3-none-any.whl", hash = "sha256:e942cb50d6a08dd5306688b1ac05bda157fd2fcc88b63dae405f7bd0d3234005", size = 660643, upload-time = "2026-04-30T11:57:31.802Z" },
 ]
 
 [[package]]
@@ -2131,14 +2131,14 @@ wheels = [
 
 [[package]]
 name = "jedi"
-version = "0.19.2"
+version = "0.20.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "parso" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287, upload-time = "2024-11-11T01:41:42.873Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/46/b7/a3635f6a2d7cf5b5dd98064fc1d5fbbafcb25477bcea204a3a92145d158b/jedi-0.20.0.tar.gz", hash = "sha256:c3f4ccbd276696f4b19c54618d4fb18f9fc24b0aef02acf704b23f487daa1011", size = 3119416, upload-time = "2026-05-01T23:38:47.814Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278, upload-time = "2024-11-11T01:41:40.175Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/93/242e2eab5fe682ffcb8b0084bde703a41d51e17ee0f3a31ff0d9d813620a/jedi-0.20.0-py2.py3-none-any.whl", hash = "sha256:7bdd9c2634f56713299976f4cbd59cb3fa92165cc5e05ea811fb253480728b67", size = 4884812, upload-time = "2026-05-01T23:38:43.919Z" },
 ]
 
 [[package]]
@@ -2321,7 +2321,7 @@ wheels = [
 
 [[package]]
 name = "jupyter-server"
-version = "2.17.0"
+version = "2.18.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -2343,9 +2343,9 @@ dependencies = [
     { name = "traitlets" },
     { name = "websocket-client" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5b/ac/e040ec363d7b6b1f11304cc9f209dac4517ece5d5e01821366b924a64a50/jupyter_server-2.17.0.tar.gz", hash = "sha256:c38ea898566964c888b4772ae1ed58eca84592e88251d2cfc4d171f81f7e99d5", size = 731949, upload-time = "2025-08-21T14:42:54.042Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/33/b0/666586d557a71a58cd9960b154fb9aee0ed81dd62a50371195ab95731909/jupyter_server-2.18.1.tar.gz", hash = "sha256:f62be526369b791625e03bd658070563c1a4e9a0a2f439ea1f9dbacea5f7191a", size = 752024, upload-time = "2026-05-05T09:17:51.101Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/92/80/a24767e6ca280f5a49525d987bf3e4d7552bf67c8be07e8ccf20271f8568/jupyter_server-2.17.0-py3-none-any.whl", hash = "sha256:e8cb9c7db4251f51ed307e329b81b72ccf2056ff82d50524debde1ee1870e13f", size = 388221, upload-time = "2025-08-21T14:42:52.034Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/45/bfe3779fd06714a379128f2c4eaf7c99414f0eb081f9f34c135f6b3d511c/jupyter_server-2.18.1-py3-none-any.whl", hash = "sha256:db0374d52a975f88a92a7f20de44e08ef5be9763ba7e99630baf16c46ac8dbf0", size = 391844, upload-time = "2026-05-05T09:17:48.521Z" },
 ]
 
 [[package]]
@@ -2363,7 +2363,7 @@ wheels = [
 
 [[package]]
 name = "jupyterlab"
-version = "4.5.6"
+version = "4.5.7"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "async-lru" },
@@ -2380,9 +2380,9 @@ dependencies = [
     { name = "tornado" },
     { name = "traitlets" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ac/d5/730628e03fff2e8a8e8ccdaedde1489ab1309f9a4fa2536248884e30b7c7/jupyterlab-4.5.6.tar.gz", hash = "sha256:642fe2cfe7f0f5922a8a558ba7a0d246c7bc133b708dfe43f7b3a826d163cf42", size = 23970670, upload-time = "2026-03-11T14:17:04.531Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/2b/22/8440ec827762146e7cdecf04335bd348795899d29dc6ae82238707353a2c/jupyterlab-4.5.7.tar.gz", hash = "sha256:55a9822c4754da305f41e113452c68383e214dcf96de760146af89ce5d5117b0", size = 23992763, upload-time = "2026-04-29T16:43:51.328Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e1/1b/dad6fdcc658ed7af26fdf3841e7394072c9549a8b896c381ab49dd11e2d9/jupyterlab-4.5.6-py3-none-any.whl", hash = "sha256:d6b3dac883aa4d9993348e0f8e95b24624f75099aed64eab6a4351a9cdd1e580", size = 12447124, upload-time = "2026-03-11T14:17:00.229Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/aa/537b8f7d80e799af19af35fb3ddfc970b951088a13c57dd9387dcfbb7f61/jupyterlab-4.5.7-py3-none-any.whl", hash = "sha256:fba4cb0e2c44a52859669d8c98b45de029d5e515f8407bf8534d2a8fc5f0964d", size = 12450123, upload-time = "2026-04-29T16:43:46.639Z" },
 ]
 
 [[package]]
@@ -2723,6 +2723,10 @@ dynamixel = [
     { name = "dynamixel-sdk" },
     { name = "pyserial" },
 ]
+eo1 = [
+    { name = "qwen-vl-utils" },
+    { name = "transformers" },
+]
 evaluation = [
     { name = "av" },
 ]
@@ -3029,6 +3033,7 @@ requires-dist = [
     { name = "lerobot", extras = ["pyserial-dep"], marker = "extra == 'unitree-g1'" },
     { name = "lerobot", extras = ["pyzmq-dep"], marker = "extra == 'lekiwi'" },
     { name = "lerobot", extras = ["pyzmq-dep"], marker = "extra == 'unitree-g1'" },
+    { name = "lerobot", extras = ["qwen-vl-utils-dep"], marker = "extra == 'eo1'" },
     { name = "lerobot", extras = ["qwen-vl-utils-dep"], marker = "extra == 'sarm'" },
     { name = "lerobot", extras = ["qwen-vl-utils-dep"], marker = "extra == 'wallx'" },
     { name = "lerobot", extras = ["reachy2"], marker = "extra == 'all'" },
@@ -3043,6 +3048,7 @@ requires-dist = [
     { name = "lerobot", extras = ["smolvla"], marker = "extra == 'all'" },
     { name = "lerobot", extras = ["test"], marker = "extra == 'all'" },
     { name = "lerobot", extras = ["training"], marker = "extra == 'all'" },
+    { name = "lerobot", extras = ["transformers-dep"], marker = "extra == 'eo1'" },
     { name = "lerobot", extras = ["transformers-dep"], marker = "extra == 'groot'" },
     { name = "lerobot", extras = ["transformers-dep"], marker = "extra == 'hilserl'" },
     { name = "lerobot", extras = ["transformers-dep"], marker = "extra == 'libero'" },
@@ -3109,10 +3115,10 @@ requires-dist = [
     { name = "torchdiffeq", marker = "extra == 'wallx'", specifier = ">=0.2.4,<0.3.0" },
     { name = "torchvision", specifier = ">=0.22.0,<0.26.0" },
     { name = "tqdm", specifier = ">=4.66.0,<5.0.0" },
-    { name = "transformers", marker = "extra == 'transformers-dep'", specifier = "==5.3.0" },
+    { name = "transformers", marker = "extra == 'transformers-dep'", specifier = ">=5.4.0,<5.6.0" },
     { name = "wandb", marker = "extra == 'training'", specifier = ">=0.24.0,<0.25.0" },
 ]
-provides-extras = ["dataset", "training", "hardware", "viz", "core-scripts", "evaluation", "dataset-viz", "av-dep", "pygame-dep", "placo-dep", "transformers-dep", "grpcio-dep", "can-dep", "peft-dep", "scipy-dep", "diffusers-dep", "qwen-vl-utils-dep", "matplotlib-dep", "pyserial-dep", "deepdiff-dep", "pynput-dep", "pyzmq-dep", "feetech", "dynamixel", "damiao", "robstride", "openarms", "gamepad", "hopejr", "lekiwi", "unitree-g1", "reachy2", "kinematics", "intelrealsense", "phone", "diffusion", "wallx", "pi", "smolvla", "multi-task-dit", "groot", "sarm", "xvla", "hilserl", "async", "peft", "dev", "notebook", "test", "video-benchmark", "aloha", "pusht", "libero", "metaworld", "all"]
+provides-extras = ["dataset", "training", "hardware", "viz", "core-scripts", "evaluation", "dataset-viz", "av-dep", "pygame-dep", "placo-dep", "transformers-dep", "grpcio-dep", "can-dep", "peft-dep", "scipy-dep", "diffusers-dep", "qwen-vl-utils-dep", "matplotlib-dep", "pyserial-dep", "deepdiff-dep", "pynput-dep", "pyzmq-dep", "feetech", "dynamixel", "damiao", "robstride", "openarms", "gamepad", "hopejr", "lekiwi", "unitree-g1", "reachy2", "kinematics", "intelrealsense", "phone", "diffusion", "wallx", "pi", "smolvla", "multi-task-dit", "groot", "sarm", "xvla", "eo1", "hilserl", "async", "peft", "dev", "notebook", "test", "video-benchmark", "aloha", "pusht", "libero", "metaworld", "all"]
 
 [[package]]
 name = "librt"
@@ -3486,11 +3492,11 @@ wheels = [
 
 [[package]]
 name = "mistune"
-version = "3.2.0"
+version = "3.2.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/9d/55/d01f0c4b45ade6536c51170b9043db8b2ec6ddf4a35c7ea3f5f559ac935b/mistune-3.2.0.tar.gz", hash = "sha256:708487c8a8cdd99c9d90eb3ed4c3ed961246ff78ac82f03418f5183ab70e398a", size = 95467, upload-time = "2025-12-23T11:36:34.994Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ca/84/620cc3f7e3adf6f5067e10f4dbae71295d8f9e16d5d3f9ef97c40f2f592c/mistune-3.2.1.tar.gz", hash = "sha256:7c8e5501d38bac1582e067e46c8343f17d57ea1aaa735823f3aba1fd59c88a28", size = 98003, upload-time = "2026-05-03T14:33:22.312Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9b/f7/4a5e785ec9fbd65146a27b6b70b6cdc161a66f2024e4b04ac06a67f5578b/mistune-3.2.0-py3-none-any.whl", hash = "sha256:febdc629a3c78616b94393c6580551e0e34cc289987ec6c35ed3f4be42d0eee1", size = 53598, upload-time = "2025-12-23T11:36:33.211Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/7f/a946aa4f8752b37102b41e64dca18a1976ac705c3a0d1dfe74d820a02552/mistune-3.2.1-py3-none-any.whl", hash = "sha256:78cdb0ba5e938053ccf63651b352508d2efa9411dc8810bfb05f2dc5140c0048", size = 53749, upload-time = "2026-05-03T14:33:20.551Z" },
 ]
 
 [[package]]
@@ -3855,7 +3861,7 @@ wheels = [
 
 [[package]]
 name = "notebook"
-version = "7.5.5"
+version = "7.5.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "jupyter-server" },
@@ -3864,9 +3870,9 @@ dependencies = [
     { name = "notebook-shim" },
     { name = "tornado" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/1f/6d/41052c48d6f6349ca0a7c4d1f6a78464de135e6d18f5829ba2510e62184c/notebook-7.5.5.tar.gz", hash = "sha256:dc0bfab0f2372c8278c457423d3256c34154ac2cc76bf20e9925260c461013c3", size = 14169167, upload-time = "2026-03-11T16:32:51.922Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/2a/c2/cf59bd2e6f2c8b976b52477e3e53bf6f97bc714ed046a51821afb428eaee/notebook-7.5.6.tar.gz", hash = "sha256:621174aade80108f0020b0f00738000b215f75fa3cd90771ad7aa0f24536a4e1", size = 14170814, upload-time = "2026-04-30T11:46:26.613Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f8/aa/cbd1deb9f07446241e88f8d5fecccd95b249bca0b4e5482214a4d1714c49/notebook-7.5.5-py3-none-any.whl", hash = "sha256:a7c14dbeefa6592e87f72290ca982e0c10f5bbf3786be2a600fda9da2764a2b8", size = 14578929, upload-time = "2026-03-11T16:32:48.021Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/d6/1fd0646b9bbd9efbb0b8ae21b2325fbef515769a5621c03e31d8eb8da587/notebook-7.5.6-py3-none-any.whl", hash = "sha256:4dde3f8fb55fa8fb7946d58c6e869ce9baf46d00fc070664f62604569d0faca0", size = 14581730, upload-time = "2026-04-30T11:46:22.342Z" },
 ]
 
 [[package]]
@@ -4130,7 +4136,7 @@ wheels = [
 
 [[package]]
 name = "onnxruntime"
-version = "1.25.0"
+version = "1.25.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "flatbuffers" },
@@ -4139,25 +4145,25 @@ dependencies = [
     { name = "protobuf" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7a/69/f98c6bda4c34ac382b70c36033a989ceffd1caf5afba47bd2ef26535850f/onnxruntime-1.25.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8ecd3362de3fb496fb3e2d055a95d5acab611cf759a27609c6d99704c9d8f184", size = 17742518, upload-time = "2026-04-22T17:20:34.444Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/c6/19c5bfbc60396791e975652f982bcff9ff4b27947c8e2bf0064ac5d5727b/onnxruntime-1.25.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c99238d20bfa80ac68c7b03c2c936d389189ae40997f78a30d151570d7e18bf", size = 15841110, upload-time = "2026-04-22T17:19:31.284Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/1b/d681878f227513917d8620e4ea504af5eb3313fc01f8aea7b19a976c65db/onnxruntime-1.25.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:be93baa694ef8e5831fcb7b542da21f502b122918b5b9612d9f02972e043ee01", size = 17996146, upload-time = "2026-04-22T17:19:53.792Z" },
-    { url = "https://files.pythonhosted.org/packages/55/fe/ec98e416bd75063dea1e493661c7c939e18660ee41d6a63d7221e5657f48/onnxruntime-1.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:9596040c1f7d247bbfab5d4db1e7651c790235e48e460c7d445ec81687d5a182", size = 12872370, upload-time = "2026-04-22T17:20:22.856Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/86/9a1ac7c8a8eba7967935d4c109fc956d8f9ba61cba61d9368315bb27d0bc/onnxruntime-1.25.0-cp312-cp312-win_arm64.whl", hash = "sha256:463aed7f5e4a3ca5a476db7e9bba9164fa26921ef34c37e59b28c4c61e55f266", size = 12600072, upload-time = "2026-04-22T17:20:11.523Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/5f/3b916a303f43e9c7eed3a705ea69f6867233c161dede30f4df21538c6693/onnxruntime-1.25.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:1b3d76cf770afba76859f270679c9ad0b017b9357eb5892e91926943e05ca82c", size = 17743247, upload-time = "2026-04-22T17:19:45.206Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/b3/9e45ba86ed39ab688578f21dd39ed4b575726205596891870a1a8b4d5ca9/onnxruntime-1.25.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cddb565dfd630550a8817b3d5493ffcfa0fec273b545b2816f2fce53384e1151", size = 15841442, upload-time = "2026-04-22T17:19:34.209Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/c4/810809e3b411fd66958bdd7285a63acf948988ab4189e1fd860a2f999db3/onnxruntime-1.25.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ade74e651e28b39e6bfd6f576cb9b8a4edfa0916234145154dc891bd55331c22", size = 17993660, upload-time = "2026-04-22T17:19:56.719Z" },
-    { url = "https://files.pythonhosted.org/packages/42/3d/b736cda9c71b3df022ca6bbcb991d14b7723c068dbebe826af9102e79777/onnxruntime-1.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:9196c32c039c37ce8362cbee0aa3a704679be5f2b6fb3e849fea927c98fe1e5b", size = 12871906, upload-time = "2026-04-22T17:20:25.705Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/1f/d7bb87cdbb839b356133e9f9e3851fc0c3130dd1c360640c9ce948e3e083/onnxruntime-1.25.0-cp313-cp313-win_arm64.whl", hash = "sha256:b3e52dc2208dec6f61ef118dff04610927e9a18d99e019a828799b23cc9cdea4", size = 12599753, upload-time = "2026-04-22T17:20:14.661Z" },
-    { url = "https://files.pythonhosted.org/packages/04/3c/edb0d825a65beed40a3de8a51521d49d433aa767f8d00e633cd2602024c0/onnxruntime-1.25.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de8548d8fe8fd58ca841178051d535d6f378efae14a4b4eb336617d80540fb41", size = 15852628, upload-time = "2026-04-22T17:19:36.886Z" },
-    { url = "https://files.pythonhosted.org/packages/55/51/7a660b4d087f27b273ff725f744880e7664f64a9331bfb1eae91ed2a9f0a/onnxruntime-1.25.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4edec672d09e34b9e83ad09c44454ce97627388f32858b1d59fe01d091ff54b5", size = 17997241, upload-time = "2026-04-22T17:19:59.653Z" },
-    { url = "https://files.pythonhosted.org/packages/78/be/5254acb849f414c8fb2643fe21f2c2ef8089fab18569f24775ccb8ee182d/onnxruntime-1.25.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:38f27febd2ff034a600a8bdbea34b1f7c961a2dab6bcb5351e70548fea456161", size = 17744932, upload-time = "2026-04-22T17:19:48.097Z" },
-    { url = "https://files.pythonhosted.org/packages/49/98/c2593aaa392e278a41bec35a00298aa5f22bb382483ad02ca451a556b2a2/onnxruntime-1.25.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e0ae389ed1647f11c1b501ba1cef1e2c7453002f626136ace214c9c46153ee4", size = 15842603, upload-time = "2026-04-22T17:19:39.879Z" },
-    { url = "https://files.pythonhosted.org/packages/08/b6/07e924b8a47adc9ce2f92a7ef71a6fb709981b1ebd08179f61cbce6ff9b3/onnxruntime-1.25.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ca32d38173c0f58699ca9dc9e867de74d2c2ab7d1c2d969f862ee8633370b77", size = 17994808, upload-time = "2026-04-22T17:20:02.462Z" },
-    { url = "https://files.pythonhosted.org/packages/56/31/e0147d87acfd06992a9bf45ffc070fd3ab49ff9a1f12de9fb403f2fc0b97/onnxruntime-1.25.0-cp314-cp314-win_amd64.whl", hash = "sha256:a2829e29621db7a4bcd457e6d0f3e4f541fb274c7127e7d2e1a5b46c70572672", size = 13183697, upload-time = "2026-04-22T17:20:28.658Z" },
-    { url = "https://files.pythonhosted.org/packages/18/29/b1d5b91d04ae80768ed8e38639ab2fcc92750a67fddc30ad6b700f244113/onnxruntime-1.25.0-cp314-cp314-win_arm64.whl", hash = "sha256:2bed9b35568b3ecf8ab34dc832d37216e47947e86508a0fd6b75e4c19d7ba907", size = 12933438, upload-time = "2026-04-22T17:20:17.223Z" },
-    { url = "https://files.pythonhosted.org/packages/56/f4/cfd47f88da545ea57c1f2a4b5886d455ec64f53b723b1a448fc44ed757e9/onnxruntime-1.25.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:00548a16e8f0d52cb1c67ef50177e5e2be848ccffc6db60010ee37faaccbbb6f", size = 15853591, upload-time = "2026-04-22T17:19:42.325Z" },
-    { url = "https://files.pythonhosted.org/packages/89/de/8b406be6ea4f2c254f9bc850cbe8038064c7768a94cdf7785420b3652ea7/onnxruntime-1.25.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a071a0740388e0ffad081c583761f37837b113bde3d03dc70790ed6cf4f4de0b", size = 17996166, upload-time = "2026-04-22T17:20:05.873Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/52/8b2a10e8dedf5d486332bc2b3bca0b1ed8049c0b9e4a5cced95413aadfdd/onnxruntime-1.25.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:66e52f7a30d1f780a34aa84d68a0a04d382d9f5b141884ecbf45b7566b9fbde9", size = 17770987, upload-time = "2026-04-27T22:00:47.985Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/87/a424d2867477c42ef8c60172709281120797f7b0f1fd33cc36b24329c825/onnxruntime-1.25.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a5f41779f044d1ff75593df5c10a4d311bc82563687796d5218e2685b8f9da25", size = 15871829, upload-time = "2026-04-27T21:59:39.088Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/55/7819e64c515f17c86005447ede8122b974ca851255a94125e2119376f0f8/onnxruntime-1.25.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:905409e9eb2ef87f8226e073f56e71faf731c3e480ebd34952cf953730e4a4ff", size = 18024586, upload-time = "2026-04-27T22:00:05.359Z" },
+    { url = "https://files.pythonhosted.org/packages/89/36/b4f3eb5e95c66389aafd490950b5255e87c9333742cf90516eb50898e1dc/onnxruntime-1.25.1-cp312-cp312-win_amd64.whl", hash = "sha256:d4097b75b77486bb45835a8ed25b9a67976040ec6c258aeabae6aadfbdd1201c", size = 12905112, upload-time = "2026-04-27T22:00:36.478Z" },
+    { url = "https://files.pythonhosted.org/packages/38/fa/e5c43397632a399f542663ed3e3e37763ee203ba845b10b266cd2ede8925/onnxruntime-1.25.1-cp312-cp312-win_arm64.whl", hash = "sha256:b6c7aa5cae606d5c90a392679fac074b60f80025a2e83e1e90fdf882bd2a97f0", size = 12634433, upload-time = "2026-04-27T22:00:25.918Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/ee/db3ac55ef770347a926ac0f1317df0ab42c8bc604350833b30c7356bf936/onnxruntime-1.25.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e9d9b3b1694196bc3c5bc66f760a237a5e27d7688aaa2e2c9c0f66abd0486699", size = 17770761, upload-time = "2026-04-27T21:59:54.853Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/9a/33225481a94a59906fce44e27ab12fc3bddd2aaecdc6160bd73341ca1aba/onnxruntime-1.25.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:311d29b943e46a55ca72ca1ea48d7815c993122bfc359f68215fddeb9583fff4", size = 15871542, upload-time = "2026-04-27T21:59:41.881Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/09/f20aac60f6fcf840543be54d4e9252cfeb7e8c2bb6d22477aaeb180e763e/onnxruntime-1.25.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98016a038b31160db23208706139fa3b99cd60bc1c5ffdade77aafd6a37a92ad", size = 18036960, upload-time = "2026-04-27T22:00:10.739Z" },
+    { url = "https://files.pythonhosted.org/packages/50/83/47964ac7e2f7e2f9e83c69ec466642c6835466252cc2ef0561eafeb56b66/onnxruntime-1.25.1-cp313-cp313-win_amd64.whl", hash = "sha256:08717d6eee2820807ba60b1b17032af207bd7aaca5b6c4abaee71f83feae877b", size = 12904886, upload-time = "2026-04-27T22:00:39.878Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/6c/a6c5aea47dc95fca7728f8a5af67c184ec9e7d4e7882125c7062e4bba8dd/onnxruntime-1.25.1-cp313-cp313-win_arm64.whl", hash = "sha256:84f8963d70e00167bae273ab7e80e9795bfc5eb94f6b23236a99c5c11af00844", size = 12634117, upload-time = "2026-04-27T22:00:29.15Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/8a/3b65e7911eec86c125e3d6f43d690a6f68671500543c0390ecd6eb59b771/onnxruntime-1.25.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03e800b3a4b48d9f3a2d23aacc4fa95486a3b406b14e51d1a9b8b6981d9adf9c", size = 15882935, upload-time = "2026-04-27T21:59:44.912Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/bb/410a760694f8ae7bbfc5fa81ccbeb7da241e6d520ee02a333a439cf462a2/onnxruntime-1.25.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd83ef5c10cfc051a1cb465db692d57b996a1bc75a2a97b161398e29cdbc47ff", size = 18021727, upload-time = "2026-04-27T22:00:13.846Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/aa/04530bd38e31e26970fa1212346d76cf81705dc16a8ee5e6f4fb24634c11/onnxruntime-1.25.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:395eb662c437fa2407f44266e4778b75bff261b17c2a6fef042421f9069f871d", size = 17773721, upload-time = "2026-04-27T21:59:59.24Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/7f/ec79ab5cece6a688c944a7fa214a8511d548b9d5142a15d1a3d730b705f1/onnxruntime-1.25.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ae85395f41b291ae3e61780ec5092640181d369ef6c268aa8141c478b509e69", size = 15875954, upload-time = "2026-04-27T21:59:49.394Z" },
+    { url = "https://files.pythonhosted.org/packages/67/fe/20428215d822099ea2c1e3cf35c295cf1a58f467bf18b6c607597a39c18a/onnxruntime-1.25.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:828e1b12710fbedb6dfab5e7bae6f11563617cddf3c2e7e8d84c64de566a4a3a", size = 18038703, upload-time = "2026-04-27T22:00:16.199Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/b1/b15db965e6a68bc47ca7eb584de4e6b3d2d2f484d46cc57f715b596f6528/onnxruntime-1.25.1-cp314-cp314-win_amd64.whl", hash = "sha256:2affc9d2fd9ab013b9c9637464e649a0cca870d57ae18bfef74180eee65c3369", size = 13218513, upload-time = "2026-04-27T22:00:42.506Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/f9/25cd2d1b29cdc8140eee4afbb6fb930b69125526632b1d579bc747975306/onnxruntime-1.25.1-cp314-cp314-win_arm64.whl", hash = "sha256:3387d75d1a815b4b2495b4e47a05ef1b3bcb64a817ddc68587e0bfcb9702bcf6", size = 12969835, upload-time = "2026-04-27T22:00:31.504Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/0e/6c507d1e65b2421fb44e241cbba577c7276792279485024fb1752b43f5c5/onnxruntime-1.25.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:06280b06604660595037f783c6d24bc70cbe5c6093975f194cd1482e77d450de", size = 15883298, upload-time = "2026-04-27T21:59:51.991Z" },
+    { url = "https://files.pythonhosted.org/packages/df/4e/1c9df57496409dc86b320bd38f29ad7a34b7115e4f35b8fca44a827568a7/onnxruntime-1.25.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7e79fd5ce7db10ebcc24e020e2ed0159476e69e2326b9b7828e5aadcf6184212", size = 18021249, upload-time = "2026-04-27T22:00:18.954Z" },
 ]
 
 [[package]]
@@ -4272,11 +4278,11 @@ wheels = [
 
 [[package]]
 name = "parso"
-version = "0.8.6"
+version = "0.8.7"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/81/76/a1e769043c0c0c9fe391b702539d594731a4362334cdf4dc25d0c09761e7/parso-0.8.6.tar.gz", hash = "sha256:2b9a0332696df97d454fa67b81618fd69c35a7b90327cbe6ba5c92d2c68a7bfd", size = 401621, upload-time = "2026-02-09T15:45:24.425Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/30/4b/90c937815137d43ce71ba043cd3566221e9df6b9c805f24b5d138c9d40a7/parso-0.8.7.tar.gz", hash = "sha256:eaaac4c9fdd5e9e8852dc778d2d7405897ec510f2a298071453e5e3a07914bb1", size = 401824, upload-time = "2026-05-01T23:13:02.138Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b6/61/fae042894f4296ec49e3f193aff5d7c18440da9e48102c3315e1bc4519a7/parso-0.8.6-py2.py3-none-any.whl", hash = "sha256:2c549f800b70a5c4952197248825584cb00f033b29c692671d3bf08bf380baff", size = 106894, upload-time = "2026-02-09T15:45:21.391Z" },
+    { url = "https://files.pythonhosted.org/packages/99/5d/8268b644392ee874ee82a635cd0df1773de230bde356c38de28e298392cc/parso-0.8.7-py2.py3-none-any.whl", hash = "sha256:a8926eb2a1b915486941fdbd31e86a4baf88fe8c210f25f2f35ecec5b574ca1c", size = 107025, upload-time = "2026-05-01T23:12:58.867Z" },
 ]
 
 [[package]]
@@ -4837,14 +4843,14 @@ wheels = [
 
 [[package]]
 name = "pyngrok"
-version = "8.1.0"
+version = "8.1.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pyyaml" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6b/4b/f1372b66985d76177ea492a5f82643b628a958ebdabc99350bb24c643d5b/pyngrok-8.1.0.tar.gz", hash = "sha256:18ea24460f5d74bf5c80feabd55ccf40e9552235ed103f967ec2ef99b57940c6", size = 45000, upload-time = "2026-04-27T02:54:44.771Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/8d/ae/6664934258773db4666e65730c43b4b06730f78d49861a9a04ebcf4742ff/pyngrok-8.1.2.tar.gz", hash = "sha256:3b5383ec7dc4646ac0d046435eb58c6cd1cbc9acad70e6dee012b05dc25b070a", size = 45078, upload-time = "2026-04-29T15:16:53.969Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/2f/50/d6f73708d6d358184c1227bbd5cf12dfa78af229fe81553bbd0e9a1ad073/pyngrok-8.1.0-py3-none-any.whl", hash = "sha256:8ba8497a2c7ac6b2f41a66f8102b815da30bfbb63321a70a0a4fa3a51b03e79b", size = 25882, upload-time = "2026-04-27T02:54:43.298Z" },
+    { url = "https://files.pythonhosted.org/packages/07/5c/7733776a6a9704bffee19d203f9be80f25f78a5011a9863971be60fe2763/pyngrok-8.1.2-py3-none-any.whl", hash = "sha256:849e9f55706288b00eb28f4ae8ea16b05e52c609f80ef4a88ca23b385f2f9178", size = 25935, upload-time = "2026-04-29T15:16:52.088Z" },
 ]
 
 [[package]]
@@ -5121,11 +5127,11 @@ wheels = [
 
 [[package]]
 name = "pytz"
-version = "2026.1.post1"
+version = "2026.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/56/db/b8721d71d945e6a8ac63c0fc900b2067181dbb50805958d4d4661cf7d277/pytz-2026.1.post1.tar.gz", hash = "sha256:3378dde6a0c3d26719182142c56e60c7f9af7e968076f31aae569d72a0358ee1", size = 321088, upload-time = "2026-03-03T07:47:50.683Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ff/46/dd499ec9038423421951e4fad73051febaa13d2df82b4064f87af8b8c0c3/pytz-2026.2.tar.gz", hash = "sha256:0e60b47b29f21574376f218fe21abc009894a2321ea16c6754f3cad6eb7cdd6a", size = 320861, upload-time = "2026-05-04T01:35:29.667Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/99/781fe0c827be2742bcc775efefccb3b048a3a9c6ce9aec0cbf4a101677e5/pytz-2026.1.post1-py2.py3-none-any.whl", hash = "sha256:f2fd16142fda348286a75e1a524be810bb05d444e5a081f37f7affc635035f7a", size = 510489, upload-time = "2026-03-03T07:47:49.167Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/dd/96da98f892250475bdf2328112d7468abdd4acc7b902b6af23f4ed958ea0/pytz-2026.2-py2.py3-none-any.whl", hash = "sha256:04156e608bee23d3792fd45c94ae47fae1036688e75032eea2e3bf0323d1f126", size = 510141, upload-time = "2026-05-04T01:35:27.408Z" },
 ]
 
 [[package]]
@@ -5769,15 +5775,15 @@ wheels = [
 
 [[package]]
 name = "sentry-sdk"
-version = "2.58.0"
+version = "2.59.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/26/b3/fb8291170d0e844173164709fc0fa0c221ed75a5da740c8746f2a83b4eb1/sentry_sdk-2.58.0.tar.gz", hash = "sha256:c1144d947352d54e5b7daa63596d9f848adf684989c06c4f5a659f0c85a18f6f", size = 438764, upload-time = "2026-04-13T17:23:26.265Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/65/e0/9bf5e5fc7442b10880f3ec0eff0ef4208b84a099606f343ec4f5445227fb/sentry_sdk-2.59.0.tar.gz", hash = "sha256:cd265808ef8bf3f3edf69b527c0a0b2b6b1322762679e55b8987db2e9584aec1", size = 447331, upload-time = "2026-05-04T12:19:06.538Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/eb/d875669993b762556ae8b2efd86219943b4c0864d22204d622a9aee3052b/sentry_sdk-2.58.0-py2.py3-none-any.whl", hash = "sha256:688d1c704ddecf382ea3326f21a67453d4caa95592d722b7c780a36a9d23109e", size = 460919, upload-time = "2026-04-13T17:23:24.675Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/00/b8cc413748fb6383d1582e7cda51314f99743351c462a92dc690d5b5853b/sentry_sdk-2.59.0-py2.py3-none-any.whl", hash = "sha256:abcf65ee9a9d9cdebf9ad369782408ecca9c1c792686ef06ba34f5ab233527fe", size = 468432, upload-time = "2026-05-04T12:19:04.741Z" },
 ]
 
 [[package]]
@@ -6012,14 +6018,14 @@ wheels = [
 
 [[package]]
 name = "tifffile"
-version = "2026.4.11"
+version = "2026.5.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d7/4a/e687f5957fead200faad58dbf9c9431a2bbb118040e96f5fb8a55f7ebc50/tifffile-2026.4.11.tar.gz", hash = "sha256:17758ff0c0d4db385792a083ad3ca51fcb0f4d942642f4d8f8bc1287fdcf17bc", size = 394956, upload-time = "2026-04-12T01:57:28.793Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/6c/3e/695c7ab56be57814e369c1f38bc3f64b9dea0a83e867d00c0c9d613a9929/tifffile-2026.5.2.tar.gz", hash = "sha256:21b10227ede8493814a34676774797f721f487e36cb0530e7c3bd882caa87f5a", size = 429140, upload-time = "2026-05-02T20:19:31.497Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3f/9f/74f110b4271ded519c7add4341cbabc824de26817ff1c345b3109df9e99c/tifffile-2026.4.11-py3-none-any.whl", hash = "sha256:9b94ffeddb39e97601af646345e8808f885773de01b299e480ed6d3a41509ec9", size = 248227, upload-time = "2026-04-12T01:57:26.969Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/af/ce4df3ca29122d219c45d3e86e5ff9a9df03b8cf31afd76817b662c803a3/tifffile-2026.5.2-py3-none-any.whl", hash = "sha256:5129b53b826e768a5b1af26b765eeea75c2d0a227d2d12849617e0737588e105", size = 266420, upload-time = "2026-05-02T20:19:29.814Z" },
 ]
 
 [[package]]
@@ -6243,7 +6249,7 @@ wheels = [
 
 [[package]]
 name = "transformers"
-version = "5.3.0"
+version = "5.5.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "huggingface-hub" },
@@ -6256,9 +6262,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typer" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/fc/1a/70e830d53ecc96ce69cfa8de38f163712d2b43ac52fbd743f39f56025c31/transformers-5.3.0.tar.gz", hash = "sha256:009555b364029da9e2946d41f1c5de9f15e6b1df46b189b7293f33a161b9c557", size = 8830831, upload-time = "2026-03-04T17:41:46.119Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a5/1e/1e244ab2ab50a863e6b52cc55761910567fa532b69a6740f6e99c5fdbd98/transformers-5.5.4.tar.gz", hash = "sha256:2e67cadba81fc7608cc07c4dd54f524820bc3d95b1cabd0ef3db7733c4f8b82e", size = 8227649, upload-time = "2026-04-13T16:55:55.181Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b8/88/ae8320064e32679a5429a2c9ebbc05c2bf32cefb6e076f9b07f6d685a9b4/transformers-5.3.0-py3-none-any.whl", hash = "sha256:50ac8c89c3c7033444fb3f9f53138096b997ebb70d4b5e50a2e810bf12d3d29a", size = 10661827, upload-time = "2026-03-04T17:41:42.722Z" },
+    { url = "https://files.pythonhosted.org/packages/29/fb/162a66789c65e5afa3b051309240c26bf37fbc8fea285b4546ae747995a2/transformers-5.5.4-py3-none-any.whl", hash = "sha256:0bd6281b82966fe5a7a16f553ea517a9db1dee6284d7cb224dfd88fc0dd1c167", size = 10236696, upload-time = "2026-04-13T16:55:51.497Z" },
 ]
 
 [[package]]
@@ -6287,7 +6293,7 @@ wheels = [
 
 [[package]]
 name = "typer"
-version = "0.25.0"
+version = "0.25.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -6295,9 +6301,9 @@ dependencies = [
     { name = "rich" },
     { name = "shellingham" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/7b/27/ede8cec7596e0041ba7e7b80b47d132562f56ff454313a16f6084e555c9f/typer-0.25.0.tar.gz", hash = "sha256:123eaf9f19bb40fd268310e12a542c0c6b4fab9c98d9d23342a01ff95e3ce930", size = 120150, upload-time = "2026-04-26T08:46:14.767Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e4/51/9aed62104cea109b820bbd6c14245af756112017d309da813ef107d42e7e/typer-0.25.1.tar.gz", hash = "sha256:9616eb8853a09ffeabab1698952f33c6f29ffdbceb4eaeecf571880e8d7664cc", size = 122276, upload-time = "2026-04-30T19:32:16.964Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9a/72/193d4e586ec5a4db834a36bbeb47641a62f951f114ffd0fe5b1b46e8d56f/typer-0.25.0-py3-none-any.whl", hash = "sha256:ac01b48823d3db9a83c9e164338057eadbb1c9957a2a6b4eeb486669c560b5dc", size = 55993, upload-time = "2026-04-26T08:46:15.889Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/f9/2b3ff4e56e5fa7debfaf9eb135d0da96f3e9a1d5b27222223c7296336e5f/typer-0.25.1-py3-none-any.whl", hash = "sha256:75caa44ed46a03fb2dab8808753ffacdbfea88495e74c85a28c5eefcf5f39c89", size = 58409, upload-time = "2026-04-30T19:32:18.271Z" },
 ]
 
 [[package]]
@@ -6428,7 +6434,7 @@ wheels = [
 
 [[package]]
 name = "virtualenv"
-version = "21.2.4"
+version = "21.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "distlib" },
@@ -6436,9 +6442,9 @@ dependencies = [
     { name = "platformdirs" },
     { name = "python-discovery" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/0c/98/3a7e644e19cb26133488caff231be390579860bbbb3da35913c49a1d0a46/virtualenv-21.2.4.tar.gz", hash = "sha256:b294ef68192638004d72524ce7ef303e9d0cf5a44c95ce2e54a7500a6381cada", size = 5850742, upload-time = "2026-04-14T22:15:31.438Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ec/0d/915c02c94d207b85580eb09bffab54438a709e7288524094fe781da526c2/virtualenv-21.3.1.tar.gz", hash = "sha256:c2305bc1fddeec40699b8370d13f8d431b0701f00ce895061ce493aeded4426b", size = 7613791, upload-time = "2026-05-05T01:34:31.402Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/27/8d/edd0bd910ff803c308ee9a6b7778621af0d10252219ad9f19ef4d4982a61/virtualenv-21.2.4-py3-none-any.whl", hash = "sha256:29d21e941795206138d0f22f4e45ff7050e5da6c6472299fb7103318763861ac", size = 5831232, upload-time = "2026-04-14T22:15:29.342Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/4f/f71e641e504111a5a74e3a20bc52d01bd86788b22699dd3fee1c63253cf6/virtualenv-21.3.1-py3-none-any.whl", hash = "sha256:d1a71cf58f2f9228fff23a1f6ec15d39785c6b32e03658d104974247145edd35", size = 7594539, upload-time = "2026-05-05T01:34:28.98Z" },
 ]
 
 [[package]]
@@ -6542,11 +6548,11 @@ wheels = [
 
 [[package]]
 name = "wcwidth"
-version = "0.6.0"
+version = "0.7.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/35/a2/8e3becb46433538a38726c948d3399905a4c7cabd0df578ede5dc51f0ec2/wcwidth-0.6.0.tar.gz", hash = "sha256:cdc4e4262d6ef9a1a57e018384cbeb1208d8abbc64176027e2c2455c81313159", size = 159684, upload-time = "2026-02-06T19:19:40.919Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/2c/ee/afaf0f85a9a18fe47a67f1e4422ed6cf1fe642f0ae0a2f81166231303c52/wcwidth-0.7.0.tar.gz", hash = "sha256:90e3a7ea092341c44b99562e75d09e4d5160fe7a3974c6fb842a101a95e7eed0", size = 182132, upload-time = "2026-05-02T16:04:12.653Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/68/5a/199c59e0a824a3db2b89c5d2dade7ab5f9624dbf6448dc291b46d5ec94d3/wcwidth-0.6.0-py3-none-any.whl", hash = "sha256:1a3a1e510b553315f8e146c54764f4fb6264ffad731b3d78088cdb1478ffbdad", size = 94189, upload-time = "2026-02-06T19:19:39.646Z" },
+    { url = "https://files.pythonhosted.org/packages/41/52/e465037f5375f43533d1a80b6923955201596a99142ed524d77b571a1418/wcwidth-0.7.0-py3-none-any.whl", hash = "sha256:5d69154c429a82910e241c738cd0e2976fac8a2dd47a1a805f4afed1c0f136f2", size = 110825, upload-time = "2026-05-02T16:04:11.033Z" },
 ]
 
 [[package]]