feat(so_follower): synchronize goal position with present position to prevent positional error during torque re-enablement

feat(teleop): enhance leader-follower behavior and torque management in SO101 teleoperation
feat(teleop): add SO100/SO101 leader-follower teleoperation example
2026-06-17 16:27:04 +00:00 · 2026-04-28 18:40:48 +02:00 · 2026-04-28 17:46:06 +02:00 · 2026-04-28 17:28:15 +02:00 · 2026-04-28 16:53:36 +02:00 · 2026-04-28 12:04:13 +02:00
162 changed files with 3742 additions and 10732 deletions
@@ -382,7 +382,6 @@ jobs:
                --policy.path=\"\$ROBOTWIN_POLICY\" \
                --env.type=robotwin \
                --env.task=\"\$ROBOTWIN_TASKS\" \
-                --env.max_parallel_tasks=5 \
                --eval.batch_size=1 \
                --eval.n_episodes=1 \
                --eval.use_async_envs=false \
@@ -483,7 +482,6 @@ jobs:
                --policy.path=lerobot/smolvla_robocasa \
                --env.type=robocasa \
                --env.task=CloseFridge,OpenCabinet,OpenDrawer,TurnOnMicrowave,TurnOffStove,CloseToasterOvenDoor,SlideDishwasherRack,TurnOnSinkFaucet,NavigateKitchen,TurnOnElectricKettle \
-                --env.max_parallel_tasks=5 \
                --eval.batch_size=1 \
                --eval.n_episodes=1 \
                --eval.use_async_envs=false \
@@ -695,7 +693,6 @@ jobs:
                --env.task=\"\$ROBOMME_TASKS\" \
                --env.dataset_split=test \
                --env.task_ids=[0] \
-                --env.max_parallel_tasks=5 \
                --eval.batch_size=1 \
                --eval.n_episodes=1 \
                --eval.use_async_envs=false \
@@ -803,7 +800,6 @@ jobs:
                --env.type=libero_plus \
                --env.task=\"\$LIBERO_PLUS_SUITE\" \
                --env.task_ids=\"\$LIBERO_PLUS_TASK_IDS\" \
-                --env.max_parallel_tasks=5 \
                --eval.batch_size=1 \
                --eval.n_episodes=1 \
                --eval.use_async_envs=false \
@@ -904,8 +900,6 @@ jobs:
                --policy.path=lerobot/smolvla_vlabench \
                --env.type=vlabench \
                --env.task=select_fruit,select_toy,select_book,select_painting,select_drink,select_ingredient,select_billiards,select_poker,add_condiment,insert_flower \
-                --env.episode_length=50 \
-                --env.max_parallel_tasks=5 \
                --eval.batch_size=1 \
                --eval.n_episodes=1 \
                --eval.use_async_envs=false \
@@ -33,7 +33,7 @@ jobs:
      github.event.workflow_run.event == 'pull_request' &&
      github.event.workflow_run.conclusion == 'success' &&
      github.repository == 'huggingface/lerobot'
-    uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@2430c1ec91d04667414e2fa31ecfc36c153ea391  # main
+    uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@9ad2de8582b56c017cb530c1165116d40433f1c6  # main
    with:
      package_name: lerobot
    secrets:
@@ -55,7 +55,7 @@ jobs:
      github.repository == 'huggingface/lerobot'
    permissions:
      contents: read
-    uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@2430c1ec91d04667414e2fa31ecfc36c153ea391  # main
+    uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@90b4ee2c10b81b5c1a6367c4e6fc9e2fb510a7e3  # main
    with:
      commit_sha: ${{ github.sha }}
      package: lerobot
@@ -78,7 +78,7 @@ jobs:
    permissions:
      contents: read
      pull-requests: write
-    uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@2430c1ec91d04667414e2fa31ecfc36c153ea391  # main
+    uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@90b4ee2c10b81b5c1a6367c4e6fc9e2fb510a7e3  # main
    with:
      commit_sha: ${{ github.event.pull_request.head.sha }}
      pr_number: ${{ github.event.number }}
@@ -152,14 +152,13 @@ jobs:
            BASE_VERSION="${VERSION%%-*}"
            echo "Installing pre-release version $BASE_VERSION from TestPyPI..."
            uv pip install \
-              --torch-backend cpu \
              --index-url https://test.pypi.org/simple/ \
              --extra-index-url https://pypi.org/simple \
              --index-strategy unsafe-best-match \
               "lerobot[all]==$BASE_VERSION"
          else
            echo "Installing release version $VERSION from PyPI..."
-            uv pip install --torch-backend cpu "lerobot[all]==$VERSION"
+            uv pip install "lerobot[all]==$VERSION"
          fi
      - name: Check lerobot version
        run: uv run python -c "import lerobot; print(lerobot.__version__)"
@@ -19,19 +19,19 @@ on:
  workflow_dispatch:

  # Runs at 02:00
-  # schedule:
-  #   - cron: "0 2 * * *"
+  schedule:
+    - cron: "0 2 * * *"

 env:
  CLOSE_ISSUE_MESSAGE: >
-    This issue was closed because it has been stalled for 30 days with no activity.
+    This issue was closed because it has been stalled for 14 days with no activity.
    Feel free to reopen if is still relevant, or to ping a collaborator if you have any questions.
  CLOSE_PR_MESSAGE: >
-    This PR was closed because it has been stalled for 30 days with no activity.
+    This PR was closed because it has been stalled for 21 days with no activity.
    Feel free to reopen if is still relevant, or to ping a collaborator if you have any questions.
  WARN_ISSUE_MESSAGE: >
    This issue has been automatically marked as stale because it has not had
-    recent activity (1 year). It will be closed if no further activity occurs.
+    recent activity (6 months). It will be closed if no further activity occurs.
    Any change, comment or update to this issue will reset this count.
    Thank you for your contributions.
  WARN_PR_MESSAGE: >
@@ -59,10 +59,10 @@ jobs:
          stale-pr-label: stale
          exempt-issue-labels: never-stale
          exempt-pr-labels: never-stale
-          days-before-issue-stale: 365
-          days-before-issue-close: 30
+          days-before-issue-stale: 180
+          days-before-issue-close: 14
          days-before-pr-stale: 365
-          days-before-pr-close: 30
+          days-before-pr-close: 21
          delete-branch: true
          close-issue-message: ${{ env.CLOSE_ISSUE_MESSAGE }}
          close-pr-message: ${{ env.CLOSE_PR_MESSAGE }}
@@ -232,8 +232,6 @@ Match the policy to the user's **GPU memory** and **time budget**. Numbers below

 All policies typically train for **5–10 epochs** (see §7).

-> **Human-facing version:** the [Compute Hardware Guide](./docs/source/hardware_guide.mdx) reuses the table below and adds a cloud-GPU tier guide and a Hugging Face Jobs pointer.
-
 | Policy      | Batch | Update (ms) | Peak GPU mem (GB) | Best for                                                                                         |
 | ----------- | ----: | ----------: | ----------------: | ------------------------------------------------------------------------------------------------ |
 | `act`       |     4 |    **83.9** |          **0.94** | First-time users, laptops, single-task. Fast and reliable.                                       |
@@ -1,4 +1,3 @@
 include src/lerobot/templates/lerobot_modelcard_template.md
-include src/lerobot/templates/lerobot_rewardmodel_modelcard_template.md
 include src/lerobot/datasets/card_template.md
 include src/lerobot/envs/metaworld_config.json
@@ -109,7 +109,7 @@ lerobot-train \

 Similarly to the hardware, you can easily implement your own policy & leverage LeRobot's data collection, training, and visualization tools, and share your model to the HF Hub

-For detailed policy setup guides, see the [Policy Documentation](https://huggingface.co/docs/lerobot/bring_your_own_policies). For GPU/RAM requirements and expected training time per policy, see the [Compute Hardware Guide](https://huggingface.co/docs/lerobot/hardware_guide).
+For detailed policy setup guides, see the [Policy Documentation](https://huggingface.co/docs/lerobot/bring_your_own_policies).

 ## Inference & Evaluation

@@ -0,0 +1,288 @@
+# Video benchmark
+
+## Questions
+
+What is the optimal trade-off between:
+
+- maximizing loading time with random access,
+- minimizing memory space on disk,
+- maximizing success rate of policies,
+- compatibility across devices/platforms for decoding videos (e.g. video players, web browsers).
+
+How to encode videos?
+
+- Which video codec (`-vcodec`) to use? h264, h265, AV1?
+- What pixel format to use (`-pix_fmt`)? `yuv444p` or `yuv420p`?
+- How much compression (`-crf`)? No compression with `0`, intermediate compression with `25` or extreme with `50+`?
+- Which frequency to chose for key frames (`-g`)? A key frame every `10` frames?
+
+How to decode videos?
+
+- Which `decoder`? `torchvision`, `torchaudio`, `ffmpegio`, `decord`, or `nvc`?
+- What scenarios to use for the requesting timestamps during benchmark? (`timestamps_mode`)
+
+## Variables
+
+**Image content & size**
+We don't expect the same optimal settings for a dataset of images from a simulation, or from real-world in an apartment, or in a factory, or outdoor, or with lots of moving objects in the scene, etc. Similarly, loading times might not vary linearly with the image size (resolution).
+For these reasons, we run this benchmark on four representative datasets:
+
+- `lerobot/pusht_image`: (96 x 96 pixels) simulation with simple geometric shapes, fixed camera.
+- `lerobot/aloha_mobile_shrimp_image`: (480 x 640 pixels) real-world indoor, moving camera.
+- `lerobot/paris_street`: (720 x 1280 pixels) real-world outdoor, moving camera.
+- `lerobot/kitchen`: (1080 x 1920 pixels) real-world indoor, fixed camera.
+
+Note: The datasets used for this benchmark need to be image datasets, not video datasets.
+
+**Data augmentations**
+We might revisit this benchmark and find better settings if we train our policies with various data augmentations to make them more robust (e.g. robust to color changes, compression, etc.).
+
+### Encoding parameters
+
+| parameter   | values                                                       |
+| ----------- | ------------------------------------------------------------ |
+| **vcodec**  | `libx264`, `libx265`, `libsvtav1`                            |
+| **pix_fmt** | `yuv444p`, `yuv420p`                                         |
+| **g**       | `1`, `2`, `3`, `4`, `5`, `6`, `10`, `15`, `20`, `40`, `None` |
+| **crf**     | `0`, `5`, `10`, `15`, `20`, `25`, `30`, `40`, `50`, `None`   |
+
+Note that `crf` value might be interpreted differently by various video codecs. In other words, the same value used with one codec doesn't necessarily translate into the same compression level with another codec. In fact, the default value (`None`) isn't the same amongst the different video codecs. Importantly, it is also the case for many other ffmpeg arguments like `g` which specifies the frequency of the key frames.
+
+For a comprehensive list and documentation of these parameters, see the ffmpeg documentation depending on the video codec used:
+
+- h264: https://trac.ffmpeg.org/wiki/Encode/H.264
+- h265: https://trac.ffmpeg.org/wiki/Encode/H.265
+- AV1: https://trac.ffmpeg.org/wiki/Encode/AV1
+
+### Decoding parameters
+
+**Decoder**
+We tested two video decoding backends from torchvision:
+
+- `pyav`
+- `video_reader` (requires to build torchvision from source)
+
+**Requested timestamps**
+Given the way video decoding works, once a keyframe has been loaded, the decoding of subsequent frames is fast.
+This of course is affected by the `-g` parameter during encoding, which specifies the frequency of the keyframes. Given our typical use cases in robotics policies which might request a few timestamps in different random places, we want to replicate these use cases with the following scenarios:
+
+- `1_frame`: 1 frame,
+- `2_frames`: 2 consecutive frames (e.g. `[t, t + 1 / fps]`),
+- `6_frames`: 6 consecutive frames (e.g. `[t + i / fps for i in range(6)]`)
+
+Note that this differs significantly from a typical use case like watching a movie, in which every frame is loaded sequentially from the beginning to the end and it's acceptable to have big values for `-g`.
+
+Additionally, because some policies might request single timestamps that are a few frames apart, we also have the following scenario:
+
+- `2_frames_4_space`: 2 frames with 4 consecutive frames of spacing in between (e.g `[t, t + 5 / fps]`),
+
+However, due to how video decoding is implemented with `pyav`, we don't have access to an accurate seek so in practice this scenario is essentially the same as `6_frames` since all 6 frames between `t` and `t + 5 / fps` will be decoded.
+
+## Metrics
+
+**Data compression ratio (lower is better)**
+`video_images_size_ratio` is the ratio of the memory space on disk taken by the encoded video over the memory space taken by the original images. For instance, `video_images_size_ratio=25%` means that the video takes 4 times less memory space on disk compared to the original images.
+
+**Loading time ratio (lower is better)**
+`video_images_load_time_ratio` is the ratio of the time it takes to decode frames from the video at a given timestamps over the time it takes to load the exact same original images. Lower is better. For instance, `video_images_load_time_ratio=200%` means that decoding from video is 2 times slower than loading the original images.
+
+**Average Mean Square Error (lower is better)**
+`avg_mse` is the average mean square error between each decoded frame and its corresponding original image over all requested timestamps, and also divided by the number of pixels in the image to be comparable when switching to different image sizes.
+
+**Average Peak Signal to Noise Ratio (higher is better)**
+`avg_psnr` measures the ratio between the maximum possible power of a signal and the power of corrupting noise that affects the fidelity of its representation. Higher PSNR indicates better quality.
+
+**Average Structural Similarity Index Measure (higher is better)**
+`avg_ssim` evaluates the perceived quality of images by comparing luminance, contrast, and structure. SSIM values range from -1 to 1, where 1 indicates perfect similarity.
+
+One aspect that can't be measured here with those metrics is the compatibility of the encoding across platforms, in particular on web browser, for visualization purposes.
+h264, h265 and AV1 are all commonly used codecs and should not pose an issue. However, the chroma subsampling (`pix_fmt`) format might affect compatibility:
+
+- `yuv420p` is more widely supported across various platforms, including web browsers.
+- `yuv444p` offers higher color fidelity but might not be supported as broadly.
+
+<!-- **Loss of a pretrained policy (higher is better)** (not available)
+`loss_pretrained` is the result of evaluating with the selected encoding/decoding settings a policy pretrained on original images. It is easier to understand than `avg_l2_error`.
+
+**Success rate after retraining (higher is better)** (not available)
+`success_rate` is the result of training and evaluating a policy with the selected encoding/decoding settings. It is the most difficult metric to get but also the very best. -->
+
+## How the benchmark works
+
+The benchmark evaluates both encoding and decoding of video frames on the first episode of each dataset.
+
+**Encoding:** for each `vcodec` and `pix_fmt` pair, we use a default value for `g` and `crf` upon which we change a single value (either `g` or `crf`) to one of the specified values (we don't test every combination of those as this would be computationally too heavy).
+This gives a unique set of encoding parameters which is used to encode the episode.
+
+**Decoding:** Then, for each of those unique encodings, we iterate through every combination of the decoding parameters `backend` and `timestamps_mode`. For each of them, we record the metrics of a number of samples (given by `--num-samples`). This is parallelized for efficiency and the number of processes can be controlled with `--num-workers`. Ideally, it's best to have a `--num-samples` that is divisible by `--num-workers`.
+
+Intermediate results saved for each `vcodec` and `pix_fmt` combination in csv tables.
+These are then all concatenated to a single table ready for analysis.
+
+## Caveats
+
+We tried to measure the most impactful parameters for both encoding and decoding. However, for computational reasons we can't test out every combination.
+
+Additional encoding parameters exist that are not included in this benchmark. In particular:
+
+- `-preset` which allows for selecting encoding presets. This represents a collection of options that will provide a certain encoding speed to compression ratio. By leaving this parameter unspecified, it is considered to be `medium` for libx264 and libx265 and `8` for libsvtav1.
+- `-tune` which allows to optimize the encoding for certain aspects (e.g. film quality, fast decoding, etc.).
+
+See the documentation mentioned above for more detailed info on these settings and for a more comprehensive list of other parameters.
+
+Similarly on the decoding side, other decoders exist but are not implemented in our current benchmark. To name a few:
+
+- `torchaudio`
+- `ffmpegio`
+- `decord`
+- `nvc`
+
+Note as well that since we are mostly interested in the performance at decoding time (also because encoding is done only once before uploading a dataset), we did not measure encoding times nor have any metrics regarding encoding.
+However, besides the necessity to build ffmpeg from source, encoding did not pose any issue and it didn't take a significant amount of time during this benchmark.
+
+## Install
+
+Building ffmpeg from source is required to include libx265 and libaom/libsvtav1 (av1) video codecs ([compilation guide](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu)).
+
+**Note:** While you still need to build torchvision with a conda-installed `ffmpeg<4.3` to use the `video_reader` decoder (as described in [#220](https://github.com/huggingface/lerobot/pull/220)), you also need another version which is custom-built with all the video codecs for encoding. For the script to then use that version, you can prepend the command above with `PATH="$HOME/bin:$PATH"`, which is where ffmpeg should be built.
+
+## Adding a video decoder
+
+Right now, we're only benchmarking the two video decoder available with torchvision: `pyav` and `video_reader`.
+You can easily add a new decoder to benchmark by adding it to this function in the script:
+
+```diff
+def decode_video_frames(
+    video_path: str,
+    timestamps: list[float],
+    tolerance_s: float,
+    backend: str,
+) -> torch.Tensor:
+    if backend in ["pyav", "video_reader"]:
+        return decode_video_frames_torchvision(
+            video_path, timestamps, tolerance_s, backend
+        )
+    elif backend == ["your_decoder"]:
+        return your_decoder_function(
+            video_path, timestamps, tolerance_s, backend
+        )
+    else:
+        raise NotImplementedError(backend)
+```
+
+## Example
+
+For a quick run, you can try these parameters:
+
+```bash
+python benchmark/video/run_video_benchmark.py \
+    --output-dir outputs/video_benchmark \
+    --repo-ids \
+        lerobot/pusht_image \
+        lerobot/aloha_mobile_shrimp_image \
+    --vcodec libx264 libx265 \
+    --pix-fmt yuv444p yuv420p \
+    --g 2 20 None \
+    --crf 10 40 None \
+    --timestamps-modes 1_frame 2_frames \
+    --backends pyav video_reader \
+    --num-samples 5 \
+    --num-workers 5 \
+    --save-frames 0
+```
+
+## Results
+
+### Reproduce
+
+We ran the benchmark with the following parameters:
+
+```bash
+# h264 and h265 encodings
+python benchmark/video/run_video_benchmark.py \
+    --output-dir outputs/video_benchmark \
+    --repo-ids \
+        lerobot/pusht_image \
+        lerobot/aloha_mobile_shrimp_image \
+        lerobot/paris_street \
+        lerobot/kitchen \
+    --vcodec libx264 libx265 \
+    --pix-fmt yuv444p yuv420p \
+    --g 1 2 3 4 5 6 10 15 20 40 None \
+    --crf 0 5 10 15 20 25 30 40 50 None \
+    --timestamps-modes 1_frame 2_frames 6_frames \
+    --backends pyav video_reader \
+    --num-samples 50 \
+    --num-workers 5 \
+    --save-frames 1
+
+# av1 encoding (only compatible with yuv420p and pyav decoder)
+python benchmark/video/run_video_benchmark.py \
+    --output-dir outputs/video_benchmark \
+    --repo-ids \
+        lerobot/pusht_image \
+        lerobot/aloha_mobile_shrimp_image \
+        lerobot/paris_street \
+        lerobot/kitchen \
+    --vcodec libsvtav1 \
+    --pix-fmt yuv420p \
+    --g 1 2 3 4 5 6 10 15 20 40 None \
+    --crf 0 5 10 15 20 25 30 40 50 None \
+    --timestamps-modes 1_frame 2_frames 6_frames \
+    --backends pyav \
+    --num-samples 50 \
+    --num-workers 5 \
+    --save-frames 1
+```
+
+The full results are available [here](https://docs.google.com/spreadsheets/d/1OYJB43Qu8fC26k_OyoMFgGBBKfQRCi4BIuYitQnq3sw/edit?usp=sharing)
+
+### Parameters selected for LeRobotDataset
+
+Considering these results, we chose what we think is the best set of encoding parameter:
+
+- vcodec: `libsvtav1`
+- pix-fmt: `yuv420p`
+- g: `2`
+- crf: `30`
+
+Since we're using av1 encoding, we're choosing the `pyav` decoder as `video_reader` does not support it (and `pyav` doesn't require a custom build of `torchvision`).
+
+### Summary
+
+These tables show the results for `g=2` and `crf=30`, using `timestamps-modes=6_frames` and `backend=pyav`
+
+| video_images_size_ratio           | vcodec     | pix_fmt |           |           |           |
+| --------------------------------- | ---------- | ------- | --------- | --------- | --------- |
+|                                   | libx264    |         | libx265   |           | libsvtav1 |
+| repo_id                           | yuv420p    | yuv444p | yuv420p   | yuv444p   | yuv420p   |
+| lerobot/pusht_image               | **16.97%** | 17.58%  | 18.57%    | 18.86%    | 22.06%    |
+| lerobot/aloha_mobile_shrimp_image | 2.14%      | 2.11%   | 1.38%     | **1.37%** | 5.59%     |
+| lerobot/paris_street              | 2.12%      | 2.13%   | **1.54%** | **1.54%** | 4.43%     |
+| lerobot/kitchen                   | 1.40%      | 1.39%   | **1.00%** | **1.00%** | 2.52%     |
+
+| video_images_load_time_ratio      | vcodec  | pix_fmt |          |         |           |
+| --------------------------------- | ------- | ------- | -------- | ------- | --------- |
+|                                   | libx264 |         | libx265  |         | libsvtav1 |
+| repo_id                           | yuv420p | yuv444p | yuv420p  | yuv444p | yuv420p   |
+| lerobot/pusht_image               | 6.45    | 5.19    | **1.90** | 2.12    | 2.47      |
+| lerobot/aloha_mobile_shrimp_image | 11.80   | 7.92    | 0.71     | 0.85    | **0.48**  |
+| lerobot/paris_street              | 2.21    | 2.05    | 0.36     | 0.49    | **0.30**  |
+| lerobot/kitchen                   | 1.46    | 1.46    | 0.28     | 0.51    | **0.26**  |
+
+|                                   |          | vcodec   | pix_fmt      |          |           |              |
+| --------------------------------- | -------- | -------- | ------------ | -------- | --------- | ------------ |
+|                                   |          | libx264  |              | libx265  |           | libsvtav1    |
+| repo_id                           | metric   | yuv420p  | yuv444p      | yuv420p  | yuv444p   | yuv420p      |
+| lerobot/pusht_image               | avg_mse  | 2.90E-04 | **2.03E-04** | 3.13E-04 | 2.29E-04  | 2.19E-04     |
+|                                   | avg_psnr | 35.44    | 37.07        | 35.49    | **37.30** | 37.20        |
+|                                   | avg_ssim | 98.28%   | **98.85%**   | 98.31%   | 98.84%    | 98.72%       |
+| lerobot/aloha_mobile_shrimp_image | avg_mse  | 2.76E-04 | 2.59E-04     | 3.17E-04 | 3.06E-04  | **1.30E-04** |
+|                                   | avg_psnr | 35.91    | 36.21        | 35.88    | 36.09     | **40.17**    |
+|                                   | avg_ssim | 95.19%   | 95.18%       | 95.00%   | 95.05%    | **97.73%**   |
+| lerobot/paris_street              | avg_mse  | 6.89E-04 | 6.70E-04     | 4.03E-03 | 4.02E-03  | **3.09E-04** |
+|                                   | avg_psnr | 33.48    | 33.68        | 32.05    | 32.15     | **35.40**    |
+|                                   | avg_ssim | 93.76%   | 93.75%       | 89.46%   | 89.46%    | **95.46%**   |
+| lerobot/kitchen                   | avg_mse  | 2.50E-04 | 2.24E-04     | 4.28E-04 | 4.18E-04  | **1.53E-04** |
+|                                   | avg_psnr | 36.73    | 37.33        | 36.56    | 36.75     | **39.12**    |
+|                                   | avg_ssim | 95.47%   | 95.58%       | 95.52%   | 95.53%    | **96.82%**   |
@@ -0,0 +1,488 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Assess the performance of video decoding in various configurations.
+
+This script will benchmark different video encoding and decoding parameters.
+See the provided README.md or run `python benchmark/video/run_video_benchmark.py --help` for usage info.
+"""
+
+import argparse
+import datetime as dt
+import itertools
+import random
+import shutil
+from collections import OrderedDict
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from threading import Lock
+
+import einops
+import numpy as np
+import pandas as pd
+import PIL
+import torch
+from skimage.metrics import mean_squared_error, peak_signal_noise_ratio, structural_similarity
+from tqdm import tqdm
+
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.video_utils import (
+    decode_video_frames,
+    encode_video_frames,
+)
+from lerobot.utils.constants import OBS_IMAGE
+from lerobot.utils.utils import TimerManager
+
+BASE_ENCODING = OrderedDict(
+    [
+        ("vcodec", "libx264"),
+        ("pix_fmt", "yuv444p"),
+        ("g", 2),
+        ("crf", None),
+        # TODO(aliberts): Add fastdecode
+        # ("fastdecode", 0),
+    ]
+)
+
+
+# TODO(rcadene, aliberts): move to `utils.py` folder when we want to refactor
+def parse_int_or_none(value) -> int | None:
+    if value.lower() == "none":
+        return None
+    try:
+        return int(value)
+    except ValueError as e:
+        raise argparse.ArgumentTypeError(f"Invalid int or None: {value}") from e
+
+
+def check_datasets_formats(repo_ids: list) -> None:
+    for repo_id in repo_ids:
+        dataset = LeRobotDataset(repo_id)
+        if len(dataset.meta.video_keys) > 0:
+            raise ValueError(
+                f"Use only image dataset for running this benchmark. Video dataset provided: {repo_id}"
+            )
+
+
+def get_directory_size(directory: Path) -> int:
+    total_size = 0
+    for item in directory.rglob("*"):
+        if item.is_file():
+            total_size += item.stat().st_size
+    return total_size
+
+
+def load_original_frames(imgs_dir: Path, timestamps: list[float], fps: int) -> torch.Tensor:
+    frames = []
+    for ts in timestamps:
+        idx = int(ts * fps)
+        frame = PIL.Image.open(imgs_dir / f"frame-{idx:06d}.png")
+        frame = torch.from_numpy(np.array(frame))
+        frame = frame.type(torch.float32) / 255
+        frame = einops.rearrange(frame, "h w c -> c h w")
+        frames.append(frame)
+    return torch.stack(frames)
+
+
+def save_decoded_frames(
+    imgs_dir: Path, save_dir: Path, frames: torch.Tensor, timestamps: list[float], fps: int
+) -> None:
+    if save_dir.exists() and len(list(save_dir.glob("frame-*.png"))) == len(timestamps):
+        return
+
+    save_dir.mkdir(parents=True, exist_ok=True)
+    for i, ts in enumerate(timestamps):
+        idx = int(ts * fps)
+        frame_hwc = (frames[i].permute((1, 2, 0)) * 255).type(torch.uint8).cpu().numpy()
+        PIL.Image.fromarray(frame_hwc).save(save_dir / f"frame-{idx:06d}_decoded.png")
+        shutil.copyfile(imgs_dir / f"frame-{idx:06d}.png", save_dir / f"frame-{idx:06d}_original.png")
+
+
+def save_first_episode(imgs_dir: Path, dataset: LeRobotDataset) -> None:
+    episode_index = 0
+    ep_num_images = dataset.meta.episodes["length"][episode_index]
+    if imgs_dir.exists() and len(list(imgs_dir.glob("frame-*.png"))) == ep_num_images:
+        return
+
+    imgs_dir.mkdir(parents=True, exist_ok=True)
+    hf_dataset = dataset.hf_dataset.with_format(None)
+
+    # We only save images from the first camera
+    img_keys = [key for key in hf_dataset.features if key.startswith(OBS_IMAGE)]
+    imgs_dataset = hf_dataset.select_columns(img_keys[0])
+
+    for i, item in enumerate(
+        tqdm(imgs_dataset, desc=f"saving {dataset.repo_id} first episode images", leave=False)
+    ):
+        img = item[img_keys[0]]
+        img.save(str(imgs_dir / f"frame-{i:06d}.png"), quality=100)
+
+        if i >= ep_num_images - 1:
+            break
+
+
+def sample_timestamps(timestamps_mode: str, ep_num_images: int, fps: int) -> list[float]:
+    # Start at 5 to allow for 2_frames_4_space and 6_frames
+    idx = random.randint(5, ep_num_images - 1)
+    match timestamps_mode:
+        case "1_frame":
+            frame_indexes = [idx]
+        case "2_frames":
+            frame_indexes = [idx - 1, idx]
+        case "2_frames_4_space":
+            frame_indexes = [idx - 5, idx]
+        case "6_frames":
+            frame_indexes = [idx - i for i in range(6)][::-1]
+        case _:
+            raise ValueError(timestamps_mode)
+
+    return [idx / fps for idx in frame_indexes]
+
+
+def benchmark_decoding(
+    imgs_dir: Path,
+    video_path: Path,
+    timestamps_mode: str,
+    backend: str,
+    ep_num_images: int,
+    fps: int,
+    num_samples: int = 50,
+    num_workers: int = 4,
+    save_frames: bool = False,
+) -> dict:
+    def process_sample(sample: int, lock: Lock):
+        time_benchmark = TimerManager(log=False)
+        timestamps = sample_timestamps(timestamps_mode, ep_num_images, fps)
+        num_frames = len(timestamps)
+        result = {
+            "psnr_values": [],
+            "ssim_values": [],
+            "mse_values": [],
+        }
+
+        with time_benchmark, lock:
+            frames = decode_video_frames(video_path, timestamps=timestamps, tolerance_s=5e-1, backend=backend)
+        result["load_time_video_ms"] = (time_benchmark.last * 1000) / num_frames
+
+        with time_benchmark:
+            original_frames = load_original_frames(imgs_dir, timestamps, fps)
+        result["load_time_images_ms"] = (time_benchmark.last * 1000) / num_frames
+
+        frames_np, original_frames_np = frames.numpy(), original_frames.numpy()
+        for i in range(num_frames):
+            result["mse_values"].append(mean_squared_error(original_frames_np[i], frames_np[i]))
+            result["psnr_values"].append(
+                peak_signal_noise_ratio(original_frames_np[i], frames_np[i], data_range=1.0)
+            )
+            result["ssim_values"].append(
+                structural_similarity(original_frames_np[i], frames_np[i], data_range=1.0, channel_axis=0)
+            )
+
+        if save_frames and sample == 0:
+            save_dir = video_path.with_suffix("") / f"{timestamps_mode}_{backend}"
+            save_decoded_frames(imgs_dir, save_dir, frames, timestamps, fps)
+
+        return result
+
+    load_times_video_ms = []
+    load_times_images_ms = []
+    mse_values = []
+    psnr_values = []
+    ssim_values = []
+
+    # A sample is a single set of decoded frames specified by timestamps_mode (e.g. a single frame, 2 frames, etc.).
+    # For each sample, we record metrics (loading time and quality metrics) which are then averaged over all samples.
+    # As these samples are independent, we run them in parallel threads to speed up the benchmark.
+    # Use a single shared lock for all worker threads
+    shared_lock = Lock()
+    with ThreadPoolExecutor(max_workers=num_workers) as executor:
+        futures = [executor.submit(process_sample, i, shared_lock) for i in range(num_samples)]
+        for future in tqdm(as_completed(futures), total=num_samples, desc="samples", leave=False):
+            result = future.result()
+            load_times_video_ms.append(result["load_time_video_ms"])
+            load_times_images_ms.append(result["load_time_images_ms"])
+            psnr_values.extend(result["psnr_values"])
+            ssim_values.extend(result["ssim_values"])
+            mse_values.extend(result["mse_values"])
+
+    avg_load_time_video_ms = float(np.array(load_times_video_ms).mean())
+    avg_load_time_images_ms = float(np.array(load_times_images_ms).mean())
+    video_images_load_time_ratio = avg_load_time_video_ms / avg_load_time_images_ms
+
+    return {
+        "avg_load_time_video_ms": avg_load_time_video_ms,
+        "avg_load_time_images_ms": avg_load_time_images_ms,
+        "video_images_load_time_ratio": video_images_load_time_ratio,
+        "avg_mse": float(np.mean(mse_values)),
+        "avg_psnr": float(np.mean(psnr_values)),
+        "avg_ssim": float(np.mean(ssim_values)),
+    }
+
+
+def benchmark_encoding_decoding(
+    dataset: LeRobotDataset,
+    video_path: Path,
+    imgs_dir: Path,
+    encoding_cfg: dict,
+    decoding_cfg: dict,
+    num_samples: int,
+    num_workers: int,
+    save_frames: bool,
+    overwrite: bool = False,
+    seed: int = 1337,
+) -> list[dict]:
+    fps = dataset.fps
+
+    if overwrite or not video_path.is_file():
+        tqdm.write(f"encoding {video_path}")
+        encode_video_frames(
+            imgs_dir=imgs_dir,
+            video_path=video_path,
+            fps=fps,
+            vcodec=encoding_cfg["vcodec"],
+            pix_fmt=encoding_cfg["pix_fmt"],
+            g=encoding_cfg.get("g"),
+            crf=encoding_cfg.get("crf"),
+            # fast_decode=encoding_cfg.get("fastdecode"),
+            overwrite=True,
+        )
+
+    episode_index = 0
+    ep_num_images = dataset.meta.episodes["length"][episode_index]
+    width, height = tuple(dataset[0][dataset.meta.camera_keys[0]].shape[-2:])
+    num_pixels = width * height
+    video_size_bytes = video_path.stat().st_size
+    images_size_bytes = get_directory_size(imgs_dir)
+    video_images_size_ratio = video_size_bytes / images_size_bytes
+
+    random.seed(seed)
+    benchmark_table = []
+    for timestamps_mode in tqdm(
+        decoding_cfg["timestamps_modes"], desc="decodings (timestamps_modes)", leave=False
+    ):
+        for backend in tqdm(decoding_cfg["backends"], desc="decodings (backends)", leave=False):
+            benchmark_row = benchmark_decoding(
+                imgs_dir,
+                video_path,
+                timestamps_mode,
+                backend,
+                ep_num_images,
+                fps,
+                num_samples,
+                num_workers,
+                save_frames,
+            )
+            benchmark_row.update(
+                **{
+                    "repo_id": dataset.repo_id,
+                    "resolution": f"{width} x {height}",
+                    "num_pixels": num_pixels,
+                    "video_size_bytes": video_size_bytes,
+                    "images_size_bytes": images_size_bytes,
+                    "video_images_size_ratio": video_images_size_ratio,
+                    "timestamps_mode": timestamps_mode,
+                    "backend": backend,
+                },
+                **encoding_cfg,
+            )
+            benchmark_table.append(benchmark_row)
+
+    return benchmark_table
+
+
+def main(
+    output_dir: Path,
+    repo_ids: list[str],
+    vcodec: list[str],
+    pix_fmt: list[str],
+    g: list[int],
+    crf: list[int],
+    # fastdecode: list[int],
+    timestamps_modes: list[str],
+    backends: list[str],
+    num_samples: int,
+    num_workers: int,
+    save_frames: bool,
+):
+    check_datasets_formats(repo_ids)
+    encoding_benchmarks = {
+        "g": g,
+        "crf": crf,
+        # "fastdecode": fastdecode,
+    }
+    decoding_benchmarks = {
+        "timestamps_modes": timestamps_modes,
+        "backends": backends,
+    }
+    headers = ["repo_id", "resolution", "num_pixels"]
+    headers += list(BASE_ENCODING.keys())
+    headers += [
+        "timestamps_mode",
+        "backend",
+        "video_size_bytes",
+        "images_size_bytes",
+        "video_images_size_ratio",
+        "avg_load_time_video_ms",
+        "avg_load_time_images_ms",
+        "video_images_load_time_ratio",
+        "avg_mse",
+        "avg_psnr",
+        "avg_ssim",
+    ]
+    file_paths = []
+    for video_codec in tqdm(vcodec, desc="encodings (vcodec)"):
+        for pixel_format in tqdm(pix_fmt, desc="encodings (pix_fmt)", leave=False):
+            benchmark_table = []
+            for repo_id in tqdm(repo_ids, desc="encodings (datasets)", leave=False):
+                dataset = LeRobotDataset(repo_id)
+                imgs_dir = output_dir / "images" / dataset.repo_id.replace("/", "_")
+                # We only use the first episode
+                save_first_episode(imgs_dir, dataset)
+                for duet in [
+                    dict(zip(encoding_benchmarks.keys(), unique_combination, strict=False))
+                    for unique_combination in itertools.product(*encoding_benchmarks.values())
+                ]:
+                    encoding_cfg = BASE_ENCODING.copy()
+                    encoding_cfg["vcodec"] = video_codec
+                    encoding_cfg["pix_fmt"] = pixel_format
+                    for key, value in duet.items():
+                        encoding_cfg[key] = value
+                    args_path = Path("_".join(str(value) for value in encoding_cfg.values()))
+                    video_path = output_dir / "videos" / args_path / f"{repo_id.replace('/', '_')}.mp4"
+                    benchmark_table += benchmark_encoding_decoding(
+                        dataset,
+                        video_path,
+                        imgs_dir,
+                        encoding_cfg,
+                        decoding_benchmarks,
+                        num_samples,
+                        num_workers,
+                        save_frames,
+                    )
+
+            # Save intermediate results
+            benchmark_df = pd.DataFrame(benchmark_table, columns=headers)
+            now = dt.datetime.now()
+            csv_path = (
+                output_dir
+                / f"{now:%Y-%m-%d}_{now:%H-%M-%S}_{video_codec}_{pixel_format}_{num_samples}-samples.csv"
+            )
+            benchmark_df.to_csv(csv_path, header=True, index=False)
+            file_paths.append(csv_path)
+            del benchmark_df
+
+    # Concatenate all results
+    df_list = [pd.read_csv(csv_path) for csv_path in file_paths]
+    concatenated_df = pd.concat(df_list, ignore_index=True)
+    concatenated_path = output_dir / f"{now:%Y-%m-%d}_{now:%H-%M-%S}_all_{num_samples}-samples.csv"
+    concatenated_df.to_csv(concatenated_path, header=True, index=False)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=Path("outputs/video_benchmark"),
+        help="Directory where the video benchmark outputs are written.",
+    )
+    parser.add_argument(
+        "--repo-ids",
+        type=str,
+        nargs="*",
+        default=[
+            "lerobot/pusht_image",
+            "lerobot/aloha_mobile_shrimp_image",
+            "lerobot/paris_street",
+            "lerobot/kitchen",
+        ],
+        help="Datasets repo-ids to test against. First episodes only are used. Must be images.",
+    )
+    parser.add_argument(
+        "--vcodec",
+        type=str,
+        nargs="*",
+        default=["h264", "hevc", "libsvtav1"],
+        help="Video codecs to be tested",
+    )
+    parser.add_argument(
+        "--pix-fmt",
+        type=str,
+        nargs="*",
+        default=["yuv444p", "yuv420p"],
+        help="Pixel formats (chroma subsampling) to be tested",
+    )
+    parser.add_argument(
+        "--g",
+        type=parse_int_or_none,
+        nargs="*",
+        default=[1, 2, 3, 4, 5, 6, 10, 15, 20, 40, 100, None],
+        help="Group of pictures sizes to be tested.",
+    )
+    parser.add_argument(
+        "--crf",
+        type=parse_int_or_none,
+        nargs="*",
+        default=[0, 5, 10, 15, 20, 25, 30, 40, 50, None],
+        help="Constant rate factors to be tested.",
+    )
+    # parser.add_argument(
+    #     "--fastdecode",
+    #     type=int,
+    #     nargs="*",
+    #     default=[0, 1],
+    #     help="Use the fastdecode tuning option. 0 disables it. "
+    #         "For libx264 and libx265/hevc, only 1 is possible. "
+    #         "For libsvtav1, 1, 2 or 3 are possible values with a higher number meaning a faster decoding optimization",
+    # )
+    parser.add_argument(
+        "--timestamps-modes",
+        type=str,
+        nargs="*",
+        default=[
+            "1_frame",
+            "2_frames",
+            "2_frames_4_space",
+            "6_frames",
+        ],
+        help="Timestamps scenarios to be tested.",
+    )
+    parser.add_argument(
+        "--backends",
+        type=str,
+        nargs="*",
+        default=["torchcodec", "pyav"],
+        help="Torchvision decoding backend to be tested.",
+    )
+    parser.add_argument(
+        "--num-samples",
+        type=int,
+        default=50,
+        help="Number of samples for each encoding x decoding config.",
+    )
+    parser.add_argument(
+        "--num-workers",
+        type=int,
+        default=10,
+        help="Number of processes for parallelized sample processing.",
+    )
+    parser.add_argument(
+        "--save-frames",
+        type=int,
+        default=0,
+        help="Whether to save decoded frames or not. Enter a non-zero number for true.",
+    )
+    args = parser.parse_args()
+    main(**vars(args))
@@ -35,7 +35,7 @@ USER root
 ARG ROBOTWIN_SHA=0aeea2d669c0f8516f4d5785f0aa33ba812c14b4
 RUN apt-get update \
    && apt-get install -y --no-install-recommends \
-         cuda-nvcc-12-8 cuda-cudart-dev-12-8 \
+         cuda-nvcc-12-4 cuda-cudart-dev-12-4 \
         libvulkan1 vulkan-tools \
    && mkdir -p /usr/share/vulkan/icd.d \
    && echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libGLX_nvidia.so.0","api_version":"1.3.0"}}' \
@@ -18,8 +18,9 @@
 # docker build -f docker/Dockerfile.internal -t lerobot-internal .

 # Configure the base image for CI with GPU access
-ARG CUDA_VERSION=12.8.1
-ARG OS_VERSION=24.04
+# TODO(Steven): Bump these versions
+ARG CUDA_VERSION=12.4.1
+ARG OS_VERSION=22.04
 FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION}

 # Define Python version argument
@@ -35,13 +36,16 @@ ENV DEBIAN_FRONTEND=noninteractive \

 # Install Python, system dependencies, and uv (as root)
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    build-essential git curl \
-    libglib2.0-0 libgl1 libegl1 ffmpeg \
+    software-properties-common build-essential git curl \
+    libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \
    libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev \
    cmake pkg-config ninja-build \
-    python${PYTHON_VERSION} \
-    python${PYTHON_VERSION}-venv \
-    python${PYTHON_VERSION}-dev \
+    && add-apt-repository -y ppa:deadsnakes/ppa \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends \
+       python${PYTHON_VERSION} \
+       python${PYTHON_VERSION}-venv \
+       python${PYTHON_VERSION}-dev \
    && curl -LsSf https://astral.sh/uv/install.sh | sh \
    && mv /root/.local/bin/uv /usr/local/bin/uv \
    && useradd --create-home --shell /bin/bash user_lerobot \
@@ -8,7 +8,7 @@
  - local: il_robots
    title: Imitation Learning for Robots
  - local: bring_your_own_policies
-    title: Adding a Policy
+    title: Bring Your Own Policies
  - local: integrate_hardware
    title: Bring Your Own Hardware
  - local: hilserl
@@ -24,12 +24,6 @@
  - local: rename_map
    title: Using Rename Map and Empty Cameras
  title: "Tutorials"
- sections:
-  - local: hardware_guide
-    title: Compute Hardware Guide
-  - local: torch_accelerators
-    title: PyTorch accelerators
-  title: "Compute & Hardware"
 - sections:
  - local: lerobot-dataset-v3
    title: Using LeRobotDataset
@@ -53,8 +47,6 @@
    title: π₀-FAST (Pi0Fast)
  - local: pi05
    title: π₀.₅ (Pi05)
-  - local: eo1
-    title: EO-1
  - local: groot
    title: NVIDIA GR00T N1.5
  - local: xvla
@@ -148,6 +140,10 @@
  - local: cameras
    title: Cameras
  title: "Sensors"
+- sections:
+  - local: torch_accelerators
+    title: PyTorch accelerators
+  title: "Supported Hardware"
 - sections:
  - local: notebooks
    title: Notebooks
@@ -1,37 +1,60 @@
-# Adding a Policy
+# Bring Your Own Policies

-This guide walks you through implementing a custom policy and getting it to work with LeRobot's training, evaluation, and deployment tools. There are two paths:
+This tutorial explains how to integrate your own custom policy implementations into the LeRobot ecosystem, allowing you to leverage all LeRobot tools for training, evaluation, and deployment while using your own algorithms.

- **Plugin (out-of-tree)** — ship your policy as a standalone `lerobot_policy_*` package. Faster, no PR required, easy to iterate. Right for experimentation, internal use, or when you want to publish independently.
- **In-tree (contributed to LeRobot)** — land your policy directly in `src/lerobot/policies/`. Requires a PR, but makes your policy a first-class citizen of the library.
+## Step 1: Create a Policy Package

-The plugin route is usually the right starting point — promote to in-tree once the policy has stabilized and there's clear value in shipping it with the library.
+Your custom policy should be organized as an installable Python package following LeRobot's plugin conventions.

-Either way, the building blocks are the same: a configuration class, a policy class, and a processor factory. The first half of this guide covers those shared pieces; the second half covers the path-specific scaffolding ([Path A](#path-a-out-of-tree-plugin), [Path B](#path-b-contributing-in-tree)).
+### Package Structure

-A note on tone: robot-learning is an actively evolving field, and "what a policy looks like" can shift with each new architecture. The conventions described here exist because they let `lerobot-train` and `lerobot-eval` work uniformly across very different models. When a new policy genuinely doesn't fit them, raise it (in your PR, or an issue) — the conventions are not sacred.
+Create a package with the prefix `lerobot_policy_` (IMPORTANT!) followed by your policy name:

---
+```bash
+lerobot_policy_my_custom_policy/
+├── pyproject.toml
+└── src/
+    └── lerobot_policy_my_custom_policy/
+        ├── __init__.py
+        ├── configuration_my_custom_policy.py
+        ├── modeling_my_custom_policy.py
+        └── processor_my_custom_policy.py
+```

-## Anatomy of a policy
+### Package Configuration

-Three building blocks make up every policy. The names below use `my_policy` as a placeholder — replace with your policy's name. That name is load-bearing: it must match the string you pass to `@PreTrainedConfig.register_subclass`, the `MyPolicy.name` class attribute, and the `make_<name>_pre_post_processors` factory function (more on each below).
+Set up your `pyproject.toml`:

-### Configuration class
+```toml
+[project]
+name = "lerobot_policy_my_custom_policy"
+version = "0.1.0"
+dependencies = [
+    # your policy-specific dependencies
+]
+requires-python = ">= 3.12"

-Inherit from [`PreTrainedConfig`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/configs/policies.py) and register your policy type. Here is a template — customize the parameters and methods as needed for your policy's architecture and training requirements.
+[build-system]
+build-backend = # your-build-backend
+requires = # your-build-system
+```
+
+## Step 2: Define the Policy Configuration
+
+Create a configuration class that inherits from [`PreTrainedConfig`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/configs/policies.py) and registers your policy type:
+Here is a template to get you started, customize the parameters and methods as needed for your policy's architecture and training requirements.

 ```python
-# configuration_my_policy.py
+# configuration_my_custom_policy.py
 from dataclasses import dataclass, field
 from lerobot.configs import PreTrainedConfig
 from lerobot.optim import AdamWConfig
 from lerobot.optim import CosineDecayWithWarmupSchedulerConfig

-@PreTrainedConfig.register_subclass("my_policy")
+@PreTrainedConfig.register_subclass("my_custom_policy")
@dataclass
-class MyPolicyConfig(PreTrainedConfig):
-    """Configuration class for MyPolicy.
+class MyCustomPolicyConfig(PreTrainedConfig):
+    """Configuration class for MyCustomPolicy.

    Args:
        n_obs_steps: Number of observation steps to use as input
@@ -54,20 +77,16 @@ class MyPolicyConfig(PreTrainedConfig):
            raise ValueError("n_action_steps cannot exceed horizon")

    def validate_features(self) -> None:
-        """Validate input/output feature compatibility.
-
-        Call this explicitly from your policy's __init__ — the base class does not.
-        """
+        """Validate input/output feature compatibility."""
        if not self.image_features:
-            raise ValueError("MyPolicy requires at least one image feature.")
+            raise ValueError("MyCustomPolicy requires at least one image feature.")
        if self.action_feature is None:
-            raise ValueError("MyPolicy requires 'action' in output_features.")
+            raise ValueError("MyCustomPolicy requires 'action' in output_features.")

    def get_optimizer_preset(self) -> AdamWConfig:
        return AdamWConfig(lr=self.optimizer_lr, weight_decay=self.optimizer_weight_decay)

    def get_scheduler_preset(self):
-        """Return a LRSchedulerConfig from lerobot.optim, or None."""
        return None

    @property
@@ -82,7 +101,8 @@ class MyPolicyConfig(PreTrainedConfig):

    @property
    def action_delta_indices(self) -> list[int]:
-        """Relative timestep offsets for the action chunk the dataset loader returns."""
+        """Relative timestep offsets for the action chunk the dataset loader returns.
+        """
        return list(range(self.horizon))

    @property
@@ -90,34 +110,32 @@ class MyPolicyConfig(PreTrainedConfig):
        return None
 ```

-The string you pass to `@register_subclass` must match `MyPolicy.name` (next section) and is what users supply as `--policy.type` on the CLI. Default to `AdamW` from `lerobot.optim` for `get_optimizer_preset` unless you genuinely need otherwise.
+## Step 3: Implement the Policy Class

-### Policy class
-
-Inherit from [`PreTrainedPolicy`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/pretrained.py) and set two class attributes — both are checked by `__init_subclass__`:
+Create your policy implementation by inheriting from [`PreTrainedPolicy`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/pretrained.py):

 ```python
-# modeling_my_policy.py
+# modeling_my_custom_policy.py
 import torch
 import torch.nn as nn
 from typing import Any

 from lerobot.policies import PreTrainedPolicy
 from lerobot.utils.constants import ACTION
-from .configuration_my_policy import MyPolicyConfig
+from .configuration_my_custom_policy import MyCustomPolicyConfig

-class MyPolicy(PreTrainedPolicy):
-    config_class = MyPolicyConfig  # must match the string in @register_subclass
-    name = "my_policy"
+class MyCustomPolicy(PreTrainedPolicy):
+    config_class = MyCustomPolicyConfig  # must match the string in @register_subclass
+    name = "my_custom_policy"

-    def __init__(self, config: MyPolicyConfig, dataset_stats: dict[str, Any] = None):
+    def __init__(self, config: MyCustomPolicyConfig, dataset_stats: dict[str, Any] = None):
        super().__init__(config, dataset_stats)
        config.validate_features()  # not called automatically by the base class
        self.config = config
        self.model = ...  # your nn.Module here

    def reset(self):
-        """Reset per-episode state. Called by lerobot-eval at the start of each episode."""
+        """Reset episode state."""
        ...

    def get_optim_params(self) -> dict:
@@ -129,51 +147,35 @@ class MyPolicy(PreTrainedPolicy):
        ...

    def select_action(self, batch: dict[str, torch.Tensor], **kwargs) -> torch.Tensor:
-        """Return a single action for the current timestep (called every step at inference)."""
+        """Return a single action for the current timestep (called at inference)."""
        ...

-    def forward(self, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, dict | None]:
+    def forward(self, batch: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
        """Compute the training loss.

-        Returns `(loss, output_dict)`. `output_dict` may be `None`; everything in it must be
-        logging-friendly Python natives (no tensors with gradients).
-
        `batch["action_is_pad"]` is a bool mask of shape (B, horizon) that marks
-        timesteps padded because the episode ended before `horizon` steps; you
+        timesteps padded because the episode ended before `horizon` steps, you
        can exclude those from your loss.
        """
        actions = batch[ACTION]
        action_is_pad = batch.get("action_is_pad")
        ...
-        return loss, {"some_loss_component": some_loss_component.item()}
+        return {"loss": ...}
 ```

-The methods called by the train/eval loops:
+## Step 4: Add Data Processors

-| Method                                                            | Used by           | What it does                                                                                                                                                                                                                                         |
-| ----------------------------------------------------------------- | ----------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `reset() -> None`                                                 | `lerobot-eval`    | Clear per-episode state at the start of each episode.                                                                                                                                                                                                |
-| `select_action(batch, **kwargs) -> Tensor`                        | `lerobot-eval`    | Return the next action `(B, action_dim)`. Called every step.                                                                                                                                                                                         |
-| `predict_action_chunk(batch, **kwargs) -> Tensor`                 | the policy itself | Return an action chunk `(B, chunk_size, action_dim)`. Currently abstract on the base class — raise `NotImplementedError` if your policy doesn't chunk.                                                                                               |
-| `forward(batch, reduction="mean") -> tuple[Tensor, dict \| None]` | `lerobot-train`   | Return `(loss, output_dict)`. Accept `reduction="none"` if you want to support per-sample weighting.                                                                                                                                                 |
-| `get_optim_params() -> dict`                                      | the optimizer     | Return `self.parameters()` for simple policies; return a named parameter dict for [multi-optimizer policies](https://github.com/huggingface/lerobot/blob/ecd38c50d7d15b4184cf42649ff1185ee2e11eeb/src/lerobot/policies/sac/modeling_sac.py#L61-L73). |
-| `update() -> None` _(optional)_                                   | `lerobot-train`   | Called after each optimizer step _if defined_. Use for EMA, target nets, replay buffers (TDMPC uses this).                                                                                                                                           |
-
-Batches are flat dictionaries keyed by the constants in [`lerobot.utils.constants`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/utils/constants.py): `OBS_STATE` (`observation.state.<motor>`), `OBS_IMAGES` (`observation.images.<camera>`), `OBS_LANGUAGE`, `ACTION`, etc. Reuse the constants — don't invent new prefixes.
-
-### Processor functions
-
-LeRobot uses `PolicyProcessorPipeline`s to normalize inputs and de-normalize outputs around your policy. For a concrete reference, see [`processor_act.py`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/act/processor_act.py) or [`processor_diffusion.py`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/diffusion/processor_diffusion.py).
+Create processor functions. For a concrete reference, see [processor_act.py](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/act/processor_act.py) or [processor_diffusion.py](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/diffusion/processor_diffusion.py).

 ```python
-# processor_my_policy.py
+# processor_my_custom_policy.py
 from typing import Any
 import torch

 from lerobot.processor import PolicyAction, PolicyProcessorPipeline


-def make_my_policy_pre_post_processors(
+def make_my_custom_policy_pre_post_processors(
    config,
    dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None,
 ) -> tuple[
@@ -185,48 +187,11 @@ def make_my_policy_pre_post_processors(
    return preprocessor, postprocessor
 ```

-**Important — function naming:** LeRobot discovers your processor by name. The function **must** be called `make_{policy_name}_pre_post_processors` (matching the string you passed to `@PreTrainedConfig.register_subclass`).
+**Important - function naming:** LeRobot discovers your processor by name. The function **must** be called `make_{policy_name}_pre_post_processors` (matching the string you passed to `@PreTrainedConfig.register_subclass`).

---
+## Step 5: Package Initialization

-## Path A: Out-of-tree plugin
-
-The fastest way to ship a policy: package it as a standalone Python distribution and install it alongside LeRobot. No PR required, you own the release cycle, and you can publish to PyPI under your own namespace.
-
-### Package structure
-
-Create a package with the prefix `lerobot_policy_` (IMPORTANT!) followed by your policy name:
-
-```bash
-lerobot_policy_my_policy/
-├── pyproject.toml
-└── src/
-    └── lerobot_policy_my_policy/
-        ├── __init__.py
-        ├── configuration_my_policy.py
-        ├── modeling_my_policy.py
-        └── processor_my_policy.py
-```
-
-### `pyproject.toml`
-
-```toml
-[project]
-name = "lerobot_policy_my_policy"
-version = "0.1.0"
-dependencies = [
-    # your policy-specific dependencies
-]
-requires-python = ">= 3.12"
-
-[build-system]
-build-backend = # your-build-backend
-requires = # your-build-system
-```
-
-### Package `__init__.py`
-
-Expose your classes in the package's `__init__.py` and guard against missing `lerobot`:
+Expose your classes in the package's `__init__.py`:

 ```python
 # __init__.py
@@ -239,148 +204,44 @@ except ImportError:
        "lerobot is not installed. Please install lerobot to use this policy package."
    )

-from .configuration_my_policy import MyPolicyConfig
-from .modeling_my_policy import MyPolicy
-from .processor_my_policy import make_my_policy_pre_post_processors
+from .configuration_my_custom_policy import MyCustomPolicyConfig
+from .modeling_my_custom_policy import MyCustomPolicy
+from .processor_my_custom_policy import make_my_custom_policy_pre_post_processors

 __all__ = [
-    "MyPolicyConfig",
-    "MyPolicy",
-    "make_my_policy_pre_post_processors",
+    "MyCustomPolicyConfig",
+    "MyCustomPolicy",
+    "make_my_custom_policy_pre_post_processors",
 ]
 ```

-### Install and use
+## Step 6: Installation and Usage
+
+### Install Your Policy Package

 ```bash
-cd lerobot_policy_my_policy
+cd lerobot_policy_my_custom_policy
 pip install -e .

 # Or install from PyPI if published
-pip install lerobot_policy_my_policy
+pip install lerobot_policy_my_custom_policy
 ```

+### Use Your Policy
+
 Once installed, your policy automatically integrates with LeRobot's training and evaluation tools:

 ```bash
 lerobot-train \
-    --policy.type my_policy \
+    --policy.type my_custom_policy \
    --env.type pusht \
    --steps 200000
 ```

---
-
-## Path B: Contributing in-tree
-
-When your policy has stabilized and there's clear value in shipping it with the library, you can land it directly in LeRobot. Read the general [contribution guide](./contributing) and the [PR template](https://github.com/huggingface/lerobot/blob/main/.github/PULL_REQUEST_TEMPLATE.md) first — that's where you'll find the testing/quality expectations every PR has to meet (`pre-commit run -a`, `pytest`, the community-review rule, etc.). What's below is the policy-specific layer on top of that.
-
-### In-tree layout
-
-```
-src/lerobot/policies/my_policy/
-├── __init__.py                    # re-exports config + modeling + processor factory
-├── configuration_my_policy.py     # MyPolicyConfig + @register_subclass
-├── modeling_my_policy.py          # MyPolicy(PreTrainedPolicy)
-├── processor_my_policy.py         # make_my_policy_pre_post_processors
-└── README.md                      # symlink → ../../../../docs/source/policy_my_policy_README.md
-```
-
-Two notes:
-
- The `README.md` next to the source is a **symlink** into `docs/source/policy_<name>_README.md` — the actual file lives under `docs/`. Existing policies (act, smolvla, diffusion, …) all do this; copy one of those symlinks. The policy README is conventionally minimal: paper link + BibTeX citation.
- The user-facing tutorial — what to install, how to train, hyperparameters, benchmark numbers — lives separately at `docs/source/<my_policy>.mdx` and is registered in `_toctree.yml` under "Policies".
-
-The file names are load-bearing: the factory does lazy imports by name, and the processor is discovered by the `make_<policy_name>_pre_post_processors` convention.
-
-### Wiring
-
-Three places need to know about your policy. All by name.
-
-1. **`policies/__init__.py`** — re-export `MyPolicyConfig` and add it to `__all__`. **Don't** re-export the modeling class; it loads lazily through the factory (so `import lerobot` stays fast).
-2. **`factory.py:get_policy_class`** — add a branch returning `MyPolicy` from a lazy import.
-3. **`factory.py:make_policy_config`** and **`factory.py:make_pre_post_processors`** — same idea, two more branches.
-
-Mirror an existing policy that's structurally similar to yours; the diff is small.
-
-### Heavy / optional dependencies
-
-Most policies need a heavy backbone (transformers, diffusers, a specific VLM SDK). The convention is **two-step gating**: a `TYPE_CHECKING`-guarded import at module top, and a `require_package` runtime check in the constructor. [`modeling_diffusion.py`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/diffusion/modeling_diffusion.py) is the canonical reference:
-
-```python
-from typing import TYPE_CHECKING
-from lerobot.utils.import_utils import _diffusers_available, require_package
-
-if TYPE_CHECKING or _diffusers_available:
-    from diffusers.schedulers.scheduling_ddim import DDIMScheduler
-else:
-    DDIMScheduler = None  # keeps the symbol bindable at import time
-
-class DiffusionPolicy(PreTrainedPolicy):
-    def __init__(self, config):
-        require_package("diffusers", extra="diffusion")
-        super().__init__(config)
-        ...
-```
-
-This way:
-
- `import lerobot.policies` keeps working without the extra installed (the symbol is just bound to `None`).
- Type checkers see the real symbol.
- Instantiating the policy without the extra raises a clear `ImportError` pointing at `pip install 'lerobot[diffusion]'`.
-
-Add a matching extra to [`pyproject.toml`](https://github.com/huggingface/lerobot/blob/main/pyproject.toml) `[project.optional-dependencies]` and include it in the `all` extra so `pip install 'lerobot[all]'` keeps installing everything.
-
-### Benchmarks and a published checkpoint
-
-A new policy is much easier to review — and far more useful — when it ships with a working checkpoint and at least one number you can reproduce.
-
-**Pick at least one in-tree benchmark.** LeRobot ships sim benchmarks with per-benchmark Docker images (LIBERO, LIBERO-plus, Meta-World, RoboTwin 2.0, RoboCasa365, RoboCerebra, RoboMME, VLABench and more). Pick the one that matches your policy's modality — VLAs usually go to LIBERO or VLABench; image-only BC to LIBERO or Meta-World. The full list lives under [Benchmarks](./libero) in the docs sidebar.
-
-**Push the checkpoint & processors** to the Hub under `lerobot/<policy>_<benchmark>` (or your namespace if you don't have write access; a maintainer can mirror it). Use `PreTrainedPolicy.push_model_to_hub` so the repo gets `config.json`, `model.safetensors`, and a model card.
-
-**Report results in your policy's MDX**, with the exact `lerobot-eval` command and hardware so anyone can re-run:
-
-```markdown
-## Results
-
-Evaluated on LIBERO with `lerobot/<policy>_libero`:
-
-| Suite          | Success rate | n_episodes |
-| -------------- | -----------: | ---------: |
-| libero_spatial |        87.5% |         50 |
-| libero_object  |        93.0% |         50 |
-| libero_goal    |        81.5% |         50 |
-| libero_10      |        62.0% |         50 |
-| **average**    |    **81.0%** |        200 |
-
-Reproduce: `lerobot-eval --policy.path=lerobot/<policy>_libero --env.type=libero --env.task=libero_spatial --eval.n_episodes=50` (1× A100 40 GB).
-```
-
-Use `n_episodes ≥ 50` per suite for stable success-rate estimates.
-
-If your policy is real-robot-only and no sim benchmark applies, swap the sim eval for: a public training dataset on the Hub, the `lerobot-train` command, the checkpoint, and a real-robot success rate over ≥10 episodes via `lerobot-rollout --policy.path=...`.
-
-### PR checklist
-
-The general expectations are in [`CONTRIBUTING.md`](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md) and the [PR template](https://github.com/huggingface/lerobot/blob/main/.github/PULL_REQUEST_TEMPLATE.md). On top of those, reviewers will look for:
-
- [ ] `MyPolicy` and `MyPolicyConfig` cover the surface above; `__init_subclass__` accepts the class.
- [ ] `factory.py` and `policies/__init__.py` are wired (lazy imports for modeling).
- [ ] `make_my_policy_pre_post_processors` follows the naming convention.
- [ ] Optional deps live behind a `[project.optional-dependencies]` extra and the `TYPE_CHECKING + require_package` guard.
- [ ] `tests/policies/` updated; backward-compat artifact committed & policy-specific tests.
- [ ] `src/lerobot/policies/<name>/README.md` symlinked into `docs/source/policy_<name>_README.md`; user-facing `docs/source/<name>.mdx` written and added to `_toctree.yml`.
- [ ] At least one reproducible benchmark eval in the policy MDX with a published checkpoint (sim benchmark, or real-robot dataset + checkpoint).
-
-The fastest way to get a clean PR is to copy the directory of the existing policy closest to yours, rename, and replace contents method by method. Don't wait until everything is polished — open a draft PR early and iterate with us; reviewers would much rather give feedback on a half-finished branch than a fully-merged one.
-
---
-
-## Examples and community contributions
+## Examples and Community Contributions

 Check out these example policy implementations:

- [DiTFlow Policy](https://github.com/danielsanjosepro/lerobot_policy_ditflow) — Diffusion Transformer policy with flow-matching objective. Try it out in this example: [DiTFlow Example](https://github.com/danielsanjosepro/test_lerobot_policy_ditflow)
+- [DiTFlow Policy](https://github.com/danielsanjosepro/lerobot_policy_ditflow) - Diffusion Transformer policy with flow-matching objective. Try it out in this example: [DiTFlow Example](https://github.com/danielsanjosepro/test_lerobot_policy_ditflow)

-Thanks for taking the time to bring a new policy into LeRobot. Every architecture that lands in `main` — and every plugin published by the community — makes the library a little more useful for the next person, and a little more representative of where robot learning is going. We're looking forward to seeing what you ship. 🤗
+Share your policy implementations with the community! 🤗
@@ -1,168 +0,0 @@
-# EO-1
-
-EO-1 is a **Vision-Language-Action policy for robot control**. The LeRobot implementation integrates EO-1 with the standard LeRobot training, evaluation, processor interface.
-
-## Model Overview
-
-EO-1 uses a Qwen2.5-VL backbone for vision-language understanding and adds a continuous flow-matching action head for robot control. The policy formats each robot-control sample as a multimodal conversation: camera images are passed to Qwen2.5-VL, the robot state is represented with EO-1 state tokens, and the future action chunk is represented with EO-1 action tokens.
-
-<img
-  src="https://huggingface.co/datasets/HaomingSong/lerobot-documentation-images/resolve/main/lerobot/eo_pipeline.png"
-  alt="An overview of EO-1"
-  width="85%"
-/>
-
-During training, EO-1 learns to denoise continuous action chunks at the action-token positions. During inference, it samples an action chunk, returns continuous actions, and executes `n_action_steps` from the chunk before sampling again.
-
-### What the LeRobot Integration Covers
-
- Standard `policy.type=eo1` configuration through LeRobot
- Qwen2.5-VL image and text preprocessing through policy processors
- Continuous flow-matching action prediction
- Checkpoint save/load through LeRobot policy APIs
- Training with `lerobot-train` and evaluation with `lerobot-eval`
-
-The broader EO-1 project also includes interleaved vision-text-action pretraining and multimodal reasoning workflows. This page focuses on the LeRobot robot-control policy path.
-
-## Installation Requirements
-
-1. Install LeRobot by following the [Installation Guide](./installation).
-2. Install EO-1 dependencies by running:
-
-   ```bash
-   pip install -e ".[eo1]"
-   ```
-
-3. If you want to train or evaluate on LIBERO, install the LIBERO dependencies too:
-
-   ```bash
-   pip install -e ".[eo1,libero]"
-   ```
-
-EO-1 can use the standard PyTorch scaled-dot-product attention backend through `policy.attn_implementation=sdpa`. If your environment has a compatible `flash_attn` installation, you can request `policy.attn_implementation=flash_attention_2`.
-
-## Data Requirements
-
-EO-1 expects a LeRobot dataset with:
-
- At least one visual observation, for example `observation.images.image`
- `observation.state`
- `action`
- A language task instruction through the dataset `task` field
-
-If your dataset uses different observation names, use `rename_map` to align them with the names expected by your training or evaluation setup.
-
-## Usage
-
-To use EO-1 in a LeRobot configuration, specify the policy type as:
-
-```python
-policy.type=eo1
-```
-
-By default, a new EO-1 policy initializes its backbone from:
-
-```python
-policy.vlm_base=Qwen/Qwen2.5-VL-3B-Instruct
-```
-
-Once a LeRobot-format EO-1 checkpoint is available, load it with:
-
-```python
-policy.path=your-org/your-eo1-checkpoint
-```
-
-## Training
-
-### Training Command Example
-
-```bash
-lerobot-train \
-  --dataset.repo_id=your_org/your_dataset \
-  --policy.type=eo1 \
-  --policy.vlm_base=Qwen/Qwen2.5-VL-3B-Instruct \
-  --policy.dtype=bfloat16 \
-  --policy.attn_implementation=sdpa \
-  --policy.gradient_checkpointing=false \
-  --output_dir=./outputs/eo1_training \
-  --job_name=eo1_training \
-  --steps=300000 \
-  --batch_size=16 \
-  --policy.device=cuda
-```
-
-### Key Training Parameters
-
-| Parameter                              | Default                       | Description                                                             |
-| -------------------------------------- | ----------------------------- | ----------------------------------------------------------------------- |
-| `policy.vlm_base`                      | `Qwen/Qwen2.5-VL-3B-Instruct` | Qwen2.5-VL checkpoint used to initialize a new policy                   |
-| `policy.dtype`                         | `auto`                        | Backbone dtype request: `auto`, `bfloat16`, or `float32`                |
-| `policy.attn_implementation`           | `None`                        | Optional Qwen attention backend, such as `sdpa`                         |
-| `policy.gradient_checkpointing`        | `false`                       | Reduces memory usage during training                                    |
-| `policy.chunk_size`                    | `8`                           | Number of future actions predicted per chunk                            |
-| `policy.n_action_steps`                | `8`                           | Number of actions consumed from a sampled chunk                         |
-| `policy.num_denoise_steps`             | `10`                          | Number of flow-matching denoising steps used during sampling            |
-| `policy.max_state_dim`                 | `32`                          | State padding dimension                                                 |
-| `policy.max_action_dim`                | `32`                          | Action padding dimension                                                |
-| `policy.force_fp32_autocast`           | `true`                        | Keeps the flow head in fp32 even when the backbone uses mixed precision |
-| `policy.supervise_padding_action_dims` | `true`                        | Controls whether padded action dimensions are supervised                |
-| `policy.supervise_padding_actions`     | `true`                        | Controls whether padded future action rows are supervised               |
-
-## Evaluation
-
-EO-1 can be evaluated through `lerobot-eval` once you have a LeRobot-format checkpoint:
-
-```bash
-lerobot-eval \
-  --policy.path=your-org/your-eo1-checkpoint \
-  --env.type=libero \
-  --env.task=libero_object \
-  --eval.batch_size=1 \
-  --eval.n_episodes=20
-```
-
-For datasets or environments whose camera names differ from the checkpoint configuration, pass a `rename_map`:
-
-```bash
-lerobot-eval \
-  --policy.path=your-org/your-eo1-checkpoint \
-  --env.type=libero \
-  --env.task=libero_object \
-  --rename_map='{"observation.images.image2":"observation.images.wrist_image"}'
-```
-
-## Configuration Notes
-
-### Image Processing
-
-EO-1 uses the Qwen2.5-VL processor. The `policy.image_min_pixels` and `policy.image_max_pixels` settings control the image resizing bounds before the visual tokens are passed into the backbone.
-
-### State and Action Dimensions
-
-The policy pads state and action vectors to `policy.max_state_dim` and `policy.max_action_dim` before the EO-1 flow head. Predictions are cropped back to the original action dimension before being returned by the policy.
-
-### Attention Backend
-
-Use `policy.attn_implementation=sdpa` for a portable setup. Use `flash_attention_2` only when `flash_attn` is installed and compatible with your environment.
-
-## References
-
- [EO-1 project](https://github.com/EO-Robotics/EO1)
- [EO-1 paper](https://arxiv.org/abs/2508.21112)
- [Qwen2.5-VL-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct)
-
-## Citation
-
-```bibtex
-@article{eo1,
-  title={EO-1: Interleaved Vision-Text-Action Pretraining for General Robot Control},
-  author={Delin Qu and Haoming Song and Qizhi Chen and Zhaoqing Chen and Xianqiang Gao and Xinyi Ye and Qi Lv and Modi Shi and Guanghui Ren and Cheng Ruan and Maoqing Yao and Haoran Yang and Jiacheng Bao and Bin Zhao and Dong Wang},
-  journal={arXiv preprint},
-  year={2025},
-  url={https://arxiv.org/abs/2508.21112}
-}
-```
-
-## License
-
-This LeRobot integration follows the **Apache 2.0 License** used by LeRobot. Check the upstream EO-1 model and dataset pages for the licenses of released EO-1 checkpoints and data.
@@ -1,98 +0,0 @@
-# Compute HW Guide for LeRobot Training
-
-Rough sizing for training a LeRobot policy: how much VRAM each policy needs, what training time looks like, and where to run when local hardware isn't enough.
-
-The numbers below are **indicative** — order-of-magnitude figures for picking hardware, not exact predictions. Throughput depends heavily on dataset I/O, image resolution, batch size, and number of GPUs.
-
-## Memory by policy group
-
-Policies cluster by backbone size; the groupings below give a single VRAM envelope per group instead of repeating numbers per policy. Memory scales roughly linearly with batch size; AdamW (the LeRobot default) carries optimizer state that adds ~30–100% over a forward+backward pass alone.
-
-| Group      | Policies                                    | Peak VRAM (BS 8, AdamW) | Suitable starter GPUs             |
-| ---------- | ------------------------------------------- | ----------------------: | --------------------------------- |
-| Light BC   | `act`, `vqbet`, `tdmpc`                     |                  ~2–6GB | Laptop GPU (RTX 3060), L4, A10G   |
-| Diffusion  | `diffusion`, `multi_task_dit`               |                 ~8–14GB | RTX 4070+ / L4 / A10G             |
-| Small VLA  | `smolvla`                                   |                ~10–16GB | RTX 4080+ / L4 / A10G             |
-| Large VLA  | `pi0`, `pi0_fast`, `pi05`, `xvla`, `wall_x` |                ~24–40GB | A100 40 GB+ (24 GB tight at BS 1) |
-| Multimodal | `groot`, `eo1`                              |                ~24–40GB | A100 40 GB+                       |
-| RL         | `sac`                                       |             config-dep. | See [HIL-SERL guide](./hilserl)   |
-
-Memory-bound? Drop the batch size (~linear), use gradient accumulation to recover effective batch, or for SmolVLA leave `freeze_vision_encoder=True`.
-
-## Training time
-
-Robotics imitation learning typically converges in **5–10 epochs over the dataset**, not hundreds of thousands of raw steps. Once you know your epoch count, wall-clock is essentially:
-
-```text
-total_frames    = sum of frames over all episodes      # 50 ep × 30 fps × 30 s ≈ 45,000
-steps_per_epoch = ceil(total_frames / (num_gpus × batch_size))
-total_steps     = epochs × steps_per_epoch
-wall_clock      ≈ total_steps × per_step_time
-```
-
-Per-step time depends on the policy and the GPU. The numbers in the table below are anchors — pick the row closest to your setup and scale linearly with `total_steps` if you train longer or shorter.
-
-### Common scenarios
-
-Indicative wall-clock for **5 epochs on a ~50-episode dataset (~45k frames at 30 fps × 30 s)**, default optimizer (AdamW), 640×480 images:
-
-| Setup                                | Policy         | Batch | Wall-clock |
-| ------------------------------------ | -------------- | ----- | ---------: |
-| Single RTX 4090 / RTX 3090 (24 GB)   | `act`          | 8     |  ~30–60min |
-| Single RTX 4090 / RTX 3090 (24 GB)   | `diffusion`    | 8     |      ~2–4h |
-| Single L4 / A10G (24 GB)             | `act`          | 8     |      ~1–2h |
-| Single L4 / A10G (24 GB)             | `smolvla`      | 4     |      ~3–6h |
-| Single A100 40 GB                    | `smolvla`      | 16    |      ~1–2h |
-| Single A100 40 GB                    | `pi0` / `pi05` | 4     |      ~4–8h |
-| 4× H100 80 GB cluster (`accelerate`) | `diffusion`    | 32    |  ~30–60min |
-| 4× H100 80 GB cluster (`accelerate`) | `smolvla`      | 32    |      ~1–2h |
-| Apple Silicon M1/M2/M3 Max (MPS)     | `act`          | 4     |     ~6–14h |
-
-These are order-of-magnitude figures. Real runs deviate by ±50% depending on image resolution, dataset I/O, dataloader threading, and exact GPU SKU. They are useful as "is this run going to take an hour or a day?" intuition, not as SLAs.
-
-### Multi-GPU matters a lot
-
-`accelerate launch --num_processes=N` is the easiest way to cut training time. Each optimizer step processes `N × batch_size` samples in roughly the same wall-clock as a single-GPU step, so 4 GPUs ≈ 4× speedup for compute-bound runs. See the [Multi GPU training](./multi_gpu_training) guide for the full setup.
-
-Reference data points on a 4×H100 80 GB cluster (`accelerate launch --num_processes=4`), 5000 steps, batch 32, AdamW, dataset [`imstevenpmwork/super_poulain_draft`](https://huggingface.co/datasets/imstevenpmwork/super_poulain_draft) (~50 episodes, ~640×480 images):
-
-| Policy      | Wall-clock | `update_s` | `dataloading_s` | GPU util | Notable flags                                                                                                                  |
-| ----------- | ---------- | ---------: | --------------: | -------- | ------------------------------------------------------------------------------------------------------------------------------ |
-| `diffusion` | 16m 17s    |      0.167 |           0.015 | ~90%     | defaults (training from scratch)                                                                                               |
-| `smolvla`   | 27m 49s    |      0.312 |           0.011 | ~80%     | `--policy.path=lerobot/smolvla_base`, `freeze_vision_encoder=false`, `train_expert_only=false`                                 |
-| `pi05`      | 3h 41m     |      2.548 |           0.014 | ~95%     | `--policy.pretrained_path=lerobot/pi05_base`, `gradient_checkpointing=true`, `dtype=bfloat16`, vision encoder + expert trained |
-
-The `dataloading_s` vs. `update_s` ratio is the diagnostic that matters: when `dataloading_s` approaches `update_s`, more GPUs stop helping — your dataloader is the bottleneck and you should look at `--num_workers`, image resolution, and disk speed before adding compute.
-
-### Schedule and checkpoints
-
-If you shorten training (e.g. 5k–10k steps on a small dataset), also shorten the LR schedule with `--policy.scheduler_decay_steps≈--steps`. Otherwise the LR stays near its peak and never decays. Same for `--save_freq`.
-
-## Where to run
-
-VRAM is the first filter. Within a tier, pick by budget and availability — the `$`–`$$$$` columns are relative; check current pricing on the provider you actually use.
-
-| Class                      | VRAM  | Tier   | Comfortable for                                             |
-| -------------------------- | ----- | ------ | ----------------------------------------------------------- |
-| RTX 3090 / 4090 (consumer) | 24 GB | `$`    | Light BC, Diffusion, SmolVLA. Tight for VLAs at batch 1.    |
-| L4 / A10G (cloud)          | 24 GB | `$–$$` | Same envelope; common on Google Cloud, RunPod, AWS `g5/g6`. |
-| A100 40 GB                 | 40 GB | `$$$`  | Any policy at reasonable batch sizes.                       |
-| A100 80 GB / H100 80 GB    | 80 GB | `$$$$` | Multi-GPU clusters; large batches for VLAs.                 |
-| **CPU only**               | —     | —      | Don't train. Use Colab or rent a GPU.                       |
-
-### Hugging Face Jobs
-
-[Hugging Face Jobs](https://huggingface.co/docs/hub/jobs) lets you run training on managed HF infrastructure, billed by the second. The repo publishes a ready-to-use image: **`huggingface/lerobot-gpu:latest`**, rebuilt **every night at 02:00 UTC from `main`** ([`docker_publish.yml`](https://github.com/huggingface/lerobot/blob/main/.github/workflows/docker_publish.yml)) — so it tracks the current state of the repo, not a tagged release.
-
-```bash
-hf jobs run --flavor a10g-large huggingface/lerobot-gpu:latest \
-  bash -c "nvidia-smi && lerobot-train \
-    --policy.type=act --dataset.repo_id=<USER>/<DATASET> \
-    --policy.repo_id=<USER>/act_<task> --batch_size=8 --steps=50000"
-```
-
-Notes:
-
- The leading `nvidia-smi` is a quick sanity check that CUDA is visible inside the container — useful to fail fast if the flavor or driver mismatched.
- The default Job timeout is 30 minutes; pass `--timeout 4h` (or longer) for real training.
- `--flavor` maps onto the table above: `t4-small`/`t4-medium` (T4, ACT only), `l4x1`/`l4x4` (L4 24 GB), `a10g-small/large/largex2/largex4` (A10G 24 GB scaled out), `a100-large` (A100). For the current full catalogue + pricing see [https://huggingface.co/docs/hub/jobs](https://huggingface.co/docs/hub/jobs).
@@ -62,7 +62,7 @@ pip install -e ".[hilserl]"

 ### Understanding Configuration

-The training process begins with proper configuration for the HILSERl environment. The main configuration class is `GymManipulatorConfig` in `lerobot/rl/gym_manipulator.py`, which contains nested `HILSerlRobotEnvConfig` (defined in `lerobot/envs/configs.py`) and `DatasetConfig`. The configuration is organized into focused, nested sub-configs:
+The training process begins with proper configuration for the HILSerl environment. The main configuration class is `GymManipulatorConfig` in `lerobot/rl/gym_manipulator.py`, which contains nested `HILSerlRobotEnvConfig` and `DatasetConfig`. The configuration is organized into focused, nested sub-configs:

 <!-- prettier-ignore-start -->
 ```python
@@ -95,7 +95,6 @@ class HILSerlProcessorConfig:
 class ObservationConfig:
    add_joint_velocity_to_observation: bool = False    # Add joint velocities to state
    add_current_to_observation: bool = False    # Add motor currents to state
-    add_ee_pose_to_observation: bool = False    # Add end-effector pose to state
    display_cameras: bool = False    # Display camera feeds during execution

 class ImagePreprocessingConfig:
@@ -327,22 +326,14 @@ lerobot-find-joint-limits \
   Max joint positions [-20.0, -20.0, -20.0, -20.0, -20.0, -20.0]
   Min joint positions [50.0, 50.0, 50.0, 50.0, 50.0, 50.0]
   ```
-3. Use these values in your environment configuration under `env.processor.inverse_kinematics.end_effector_bounds` (see `InverseKinematicsConfig` in `lerobot/envs/configs.py`)
+3. Use these values in the configuration of your teleoperation device (TeleoperatorConfig) under the `end_effector_bounds` field

 **Example Configuration**

 ```json
-{
-  "env": {
-    "processor": {
-      "inverse_kinematics": {
-        "end_effector_bounds": {
-          "max": [0.24, 0.2, 0.1],
-          "min": [0.16, -0.08, 0.03]
-        }
-      }
-    }
-  }
+"end_effector_bounds": {
+    "max": [0.24, 0.20, 0.10],
+    "min": [0.16, -0.08, 0.03]
 }
 ```

@@ -413,24 +404,30 @@ We support using a gamepad or a keyboard or the leader arm of the robot.

 HIL-Serl learns actions in the end-effector space of the robot. Therefore, the teleoperation will control the end-effector's x,y,z displacements.

-The end-effector transformation is applied by the processor pipeline (`InverseKinematicsRLStep`, `EEBoundsAndSafety`, `EEReferenceAndDelta`, `GripperVelocityToJoint`) configured under `env.processor.inverse_kinematics` (`InverseKinematicsConfig`) and `env.processor.gripper` / `env.processor.max_gripper_pos`. The defaults related to the end-effector space are:
+For that we need to define a version of the robot that takes actions in the end-effector space. Check the robot class `SO100FollowerEndEffector` and its configuration `SO100FollowerEndEffectorConfig` for the default parameters related to the end-effector space.

 <!-- prettier-ignore-start -->
 ```python
-class InverseKinematicsConfig:
-    """Configuration for inverse kinematics processing."""
+class SO100FollowerEndEffectorConfig(SO100FollowerConfig):
+    """Configuration for the SO100FollowerEndEffector robot."""

-    urdf_path: str | None = None
-    target_frame_name: str | None = None
-    # bounds for the end-effector in x,y,z direction
-    end_effector_bounds: dict[str, list[float]] | None = None
-    # maximum step size for the end-effector in x,y,z direction
-    end_effector_step_sizes: dict[str, float] | None = None
+    # Default bounds for the end-effector position (in meters)
+    end_effector_bounds: dict[str, list[float]] = field( # bounds for the end-effector in x,y,z direction
+        default_factory=lambda: {
+            "min": [-1.0, -1.0, -1.0],  # min x, y, z
+            "max": [1.0, 1.0, 1.0],  # max x, y, z
+        }
+    )

-class HILSerlProcessorConfig:
-    ...
-    # maximum gripper position that the gripper will be open at
-    max_gripper_pos: float | None = 100.0
+    max_gripper_pos: float = 50 # maximum gripper position that the gripper will be open at
+
+    end_effector_step_sizes: dict[str, float] = field( # maximum step size for the end-effector in x,y,z direction
+        default_factory=lambda: {
+            "x": 0.02,
+            "y": 0.02,
+            "z": 0.02,
+        }
+    )
 ```
 <!-- prettier-ignore-end -->

@@ -609,11 +606,11 @@ This guide explains how to train a reward classifier for human-in-the-loop reinf

 **Note**: Training a reward classifier is optional. You can start the first round of RL experiments by annotating the success manually with your gamepad or keyboard device.

-The reward classifier implementation in `lerobot/rewards/classifier/modeling_classifier.py` uses a pretrained vision model to process the images. It can output either a single value for binary rewards to predict success/fail cases or multiple values for multi-class settings.
+The reward classifier implementation in `modeling_classifier.py` uses a pretrained vision model to process the images. It can output either a single value for binary rewards to predict success/fail cases or multiple values for multi-class settings.

 **Collecting a Dataset for the reward classifier**

-Before training, you need to collect a dataset with labeled examples. Setting `mode: "record"` in your config and running `gym_manipulator.py` enables the process of collecting a dataset of observations, actions, and rewards.
+Before training, you need to collect a dataset with labeled examples. The `record_dataset` function in `gym_manipulator.py` enables the process of collecting a dataset of observations, actions, and rewards.

 To collect a dataset, you need to modify some parameters in the environment configuration based on HILSerlRobotEnvConfig.

@@ -661,7 +658,7 @@ Example configuration section for data collection:
  },
  "dataset": {
    "repo_id": "hf_username/dataset_name",
-    "root": "data/your_dataset",
+    "dataset_root": "data/your_dataset",
    "task": "reward_classifier_task",
    "num_episodes_to_record": 20,
    "replay_episode": null,
@@ -674,7 +671,7 @@ Example configuration section for data collection:

 **Reward Classifier Configuration**

-The reward classifier is configured using `lerobot/rewards/classifier/configuration_classifier.py`. Here are the key parameters:
+The reward classifier is configured using `configuration_classifier.py`. Here are the key parameters:

 - **model_name**: Base model architecture (e.g., we mainly use `"helper2424/resnet10"`)
 - **model_type**: `"cnn"` or `"transformer"`
@@ -692,7 +689,7 @@ Example configuration for training the [reward classifier](https://huggingface.c
    "repo_id": "hf_username/dataset_name",
    "root": null
  },
-  "reward_model": {
+  "policy": {
    "type": "reward_classifier",
    "model_name": "helper2424/resnet10",
    "model_type": "cnn",
@@ -702,6 +699,7 @@ Example configuration for training the [reward classifier](https://huggingface.c
    "dropout_rate": 0.1,
    "learning_rate": 1e-4,
    "device": "cuda",
+    "use_amp": true,
    "input_features": {
      "observation.images.front": {
        "type": "VISUAL",
@@ -820,14 +818,13 @@ The LeRobot system uses a distributed actor-learner architecture for training. T

 **Configuration Setup**

-Create a training configuration file (example available [here](https://huggingface.co/datasets/lerobot/config_examples/resolve/main/rl/train_config.json)). The training config is based on the main `TrainRLServerPipelineConfig` class in `lerobot/rl/train_rl.py`.
+Create a training configuration file (example available [here](https://huggingface.co/datasets/lerobot/config_examples/resolve/main/rl/train_config.json)). The training config is based on the main `TrainRLServerPipelineConfig` class in `lerobot/configs/train.py`.

 1. Configure the policy settings (`type="gaussian_actor"`, `device`, etc.)
-2. Configure the algorithm settings under the top-level `algorithm` block (`type="sac"`, learning rates, discount, etc., defined in `lerobot/rl/algorithms/sac/configuration_sac.py`).
-3. Set `dataset` to your cropped dataset
-4. Configure environment settings with crop parameters
-5. Check the other parameters related to the Gaussian Actor in [configuration_gaussian_actor.py](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/gaussian_actor/configuration_gaussian_actor.py#L79).
-6. Verify that the `policy` config is correct with the right `input_features` and `output_features` for your task.
+2. Set `dataset` to your cropped dataset
+3. Configure environment settings with crop parameters
+4. Check the other parameters related to the Gaussian Actor in [configuration_gaussian_actor.py](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/gaussian_actor/configuration_gaussian_actor.py#L79).
+5. Verify that the `policy` config is correct with the right `input_features` and `output_features` for your task.

 **Starting the Learner**

@@ -207,56 +207,6 @@ pip install 'lerobot[feetech]'        # Feetech motor support

 _Multiple extras can be combined (e.g., `.[core_scripts,pi,pusht]`). For a full list of available extras, refer to `pyproject.toml`._

-### PyTorch CUDA variant (Linux only)
-
-On Linux, the install path determines which CUDA wheel you get. macOS and Windows installs use the PyPI default (MPS / CPU / CUDA-Windows wheel respectively) and can skip this section.
-
-<!-- prettier-ignore-start -->
-
-<hfoptions id="cuda_variant">
-<hfoption id="uv-source">
-
-**Source install via `uv` (`uv sync` or `uv pip install -e .`)**
-
-`torch` and `torchvision` are pinned by the project to the **CUDA 12.8** PyTorch index (`https://download.pytorch.org/whl/cu128`, driver floor **570.86**) — covers Ampere/Ada/Hopper/Blackwell GPUs. No action needed for typical NVIDIA setups.
-
-To override for a different CUDA variant:
-
-```bash
-uv pip install --force-reinstall torch torchvision \
-    --index-url https://download.pytorch.org/whl/cu126   # older drivers; or cu130 for Blackwell on driver ≥ 580
-```
-
-</hfoption>
-<hfoption id="pip-conda">
-
-**Source install via `pip`/`conda`, or `pip install lerobot` from PyPI**
-
-PyPI default torch wheel is currently a cu130-bundled Linux wheel, driver floor **580.65**.
-
-To pick a specific CUDA variant:
-
-**Using `pip` or `conda`** — install torch first with an explicit index, then lerobot:
-
-```bash
-pip install --index-url https://download.pytorch.org/whl/cu128 torch torchvision
-pip install -e ".[all]"          # source
-# — or —
-pip install lerobot              # from PyPI
-```
-
-**Using `uv` to install from PyPI** — one-liner via `--torch-backend` (uv ≥ 0.6):
-
-```bash
-uv pip install --torch-backend cu128 lerobot
-```
-
-Supported values include `auto`, `cpu`, `cu126`, `cu128`, `cu129`, `cu130`, plus various `rocm*` and `xpu`. Swap as needed for your driver.
-
-</hfoption>
-</hfoptions>
-<!-- prettier-ignore-end -->
-
 ### Troubleshooting

 If you encounter build errors, you may need to install additional system dependencies: `cmake`, `build-essential`, and `ffmpeg libs`.
@@ -28,15 +28,13 @@ lerobot-train \
 --steps=100000 \
 --batch_size=32 \
 --peft.method_type=LORA \
- --peft.r=64 \
- --peft.lora_alpha=64
+ --peft.r=64
 ```

 Note the `--peft.method_type` parameter that let's you select which PEFT method to use. Here we use
 [LoRA](https://huggingface.co/docs/peft/main/en/package_reference/lora) (Low-Rank Adapter) which is probably the most
 popular fine-tuning method to date. Low-rank adaption means that we only fine-tune a matrix with comparably low rank
-instead of the full weight matrix. This rank can be specified using the `--peft.r` parameter, and the LoRA scaling factor with
-`--peft.lora_alpha` (where `scaling = lora_alpha / r`). The higher the rank
+instead of the full weight matrix. This rank can be specified using the `--peft.r` parameter. The higher the rank
 the closer you get to full fine-tuning

 There are more complex methods that have more parameters. These are not yet supported, feel free to raise an issue
@@ -46,7 +46,7 @@ This ensures identical task states map to consistent progress values, even acros

 ## Inputs and Targets (What the new code expects)

-SARM is trained through its processor (`src/lerobot/rewards/sarm/processor_sarm.py`), which:
+SARM is trained through its processor (`src/lerobot/policies/sarm/processor_sarm.py`), which:

 - **Encodes** images and task text with CLIP (ViT-B/32) into `video_features` and `text_features`
 - **Pads/truncates** robot state into `state_features` (up to `max_state_dim`)
@@ -347,7 +347,7 @@ Use `compute_rabc_weights.py` with `--visualize-only` to visualize model predict
 <hfoption id="single_stage">

 ```bash
-python -m lerobot.rewards.sarm.compute_rabc_weights \
+python src/lerobot/policies/sarm/compute_rabc_weights.py \
  --dataset-repo-id your-username/your-dataset \
  --reward-model-path your-username/sarm-model \
  --visualize-only \
@@ -360,7 +360,7 @@ python -m lerobot.rewards.sarm.compute_rabc_weights \
 <hfoption id="dense_only">

 ```bash
-python -m lerobot.rewards.sarm.compute_rabc_weights \
+python src/lerobot/policies/sarm/compute_rabc_weights.py \
  --dataset-repo-id your-username/your-dataset \
  --reward-model-path your-username/sarm-model \
  --visualize-only \
@@ -373,7 +373,7 @@ python -m lerobot.rewards.sarm.compute_rabc_weights \
 <hfoption id="dual">

 ```bash
-python -m lerobot.rewards.sarm.compute_rabc_weights \
+python src/lerobot/policies/sarm/compute_rabc_weights.py \
  --dataset-repo-id your-username/your-dataset \
  --reward-model-path your-username/sarm-model \
  --visualize-only \
@@ -429,7 +429,7 @@ The weighting follows **Equations 8-9** from the paper:
 First, run the SARM model on all frames in your dataset to compute progress values:

 ```bash
-python -m lerobot.rewards.sarm.compute_rabc_weights \
+python src/lerobot/policies/sarm/compute_rabc_weights.py \
  --dataset-repo-id your-username/your-dataset \
  --reward-model-path your-username/sarm-model \
  --head-mode sparse \
@@ -465,15 +465,15 @@ This script:

 ### Step 5b: Train Policy with RA-BC

-Once you have the progress file, train your policy with RA-BC weighting. The progress file is auto-detected from the dataset path (`sarm_progress.parquet`) if not explicitly provided. Currently PI0, PI0.5 and SmolVLA are supported with RA-BC:
+Once you have the progress file, train your policy with RA-BC weighting. The progress file is auto-detected from the dataset path (`sarm_progress.parquet`). Currently PI0, PI0.5 and SmolVLA are supported with RA-BC:

 ```bash
 lerobot-train \
  --dataset.repo_id=your-username/your-dataset \
  --policy.type=pi0 \
-  --sample_weighting.type=rabc \
-  --sample_weighting.head_mode=sparse \
-  --sample_weighting.kappa=0.01 \
+  --use_rabc=true \
+  --rabc_head_mode=sparse \
+  --rabc_kappa=0.01 \
  --output_dir=outputs/train/policy_rabc \
  --batch_size=32 \
  --steps=40000
@@ -488,13 +488,12 @@ The training script automatically:

 **RA-BC Arguments:**

-| Argument                           | Description                                            | Default                 |
-| ---------------------------------- | ------------------------------------------------------ | ----------------------- |
-| `--sample_weighting.type`          | Weighting strategy type (`rabc` or `uniform`)          | `rabc`                  |
-| `--sample_weighting.progress_path` | Path to progress parquet file                          | `sarm_progress.parquet` |
-| `--sample_weighting.head_mode`     | Which SARM head's progress to use: `sparse` or `dense` | `sparse`                |
-| `--sample_weighting.kappa`         | Threshold κ for high-quality samples                   | `0.01`                  |
-| `--sample_weighting.epsilon`       | Small constant for numerical stability                 | `1e-6`                  |
+| Argument               | Description                                                | Default                            |
+| ---------------------- | ---------------------------------------------------------- | ---------------------------------- |
+| `--use_rabc`           | Enable RA-BC sample weighting                              | `false`                            |
+| `--rabc_progress_path` | Path to progress parquet file (auto-detected from dataset) | `sarm_progress.parquet` in dataset |
+| `--rabc_head_mode`     | Which SARM head's progress to use: `sparse` or `dense`     | `sparse`                           |
+| `--rabc_kappa`         | Threshold κ for high-quality samples                       | `0.01`                             |

 ### Tuning RA-BC Kappa

@@ -512,30 +511,30 @@ The `kappa` parameter is the threshold that determines which samples get full we

 Monitor these WandB metrics during training:

-| Metric                        | Healthy Range | Problem Indicator         |
-| ----------------------------- | ------------- | ------------------------- |
-| `sample_weight_mean_weight`   | 0.3 - 0.8     | ≈ 1.0 means kappa too low |
-| `sample_weighting/delta_mean` | > 0           | Should be positive        |
-| `sample_weighting/delta_std`  | > 0           | Variance in data quality  |
+| Metric             | Healthy Range | Problem Indicator         |
+| ------------------ | ------------- | ------------------------- |
+| `rabc_mean_weight` | 0.3 - 0.8     | ≈ 1.0 means kappa too low |
+| `rabc_delta_mean`  | > 0           | Should be positive        |
+| `rabc_delta_std`   | > 0           | Variance in data quality  |

-**If `sample_weight_mean_weight ≈ 1.0`:** Your kappa is too low. Most samples have `delta > kappa` and bypass the soft-weighting entirely. RA-BC becomes equivalent to vanilla BC.
+**If `rabc_mean_weight ≈ 1.0`:** Your kappa is too low. Most samples have `delta > kappa` and bypass the soft-weighting entirely. RA-BC becomes equivalent to vanilla BC.

 **Setting kappa based on your data:**

-The default `kappa=0.01` was tuned for the paper's T-shirt folding task (~90s episodes at 30fps). For your dataset, check the logged `sample_weighting/delta_mean` and `sample_weighting/delta_std`:
+The default `kappa=0.01` was tuned for the paper's T-shirt folding task (~90s episodes at 30fps). For your dataset, check the logged `rabc_delta_mean` and `rabc_delta_std`:

 ```
 # If delta_mean ≈ 0.03 and delta_std ≈ 0.02:
 # Most deltas fall in range [0.01, 0.05]

 # Option 1: Set kappa = delta_mean (medium selectivity)
--sample_weighting.kappa=0.03
+--rabc_kappa=0.03

 # Option 2: Set kappa = delta_mean + delta_std (high selectivity)
--sample_weighting.kappa=0.05
+--rabc_kappa=0.05

 # Option 3: Set kappa = delta_mean + 2*delta_std (very selective)
--sample_weighting.kappa=0.07
+--rabc_kappa=0.07
 ```

 **When RA-BC may not help:**
@@ -551,8 +550,8 @@ accelerate launch \
  src/lerobot/scripts/lerobot_train.py \
  --dataset.repo_id=your-username/your-dataset \
  --policy.type=pi0 \
-  --sample_weighting.type=rabc \
-  --sample_weighting.kappa=0.01 \
+  --use_rabc=true \
+  --rabc_kappa=0.01 \
  --output_dir=outputs/train/policy_rabc \
  --batch_size=32 \
  --steps=40000
@@ -577,7 +576,7 @@ accelerate launch \
 ### RA-BC

 1. **Train SARM first**: RA-BC quality depends entirely on SARM quality
-2. **Monitor `sample_weight_mean_weight`**: If it's ≈ 1.0, increase kappa (see [Tuning RA-BC Kappa](#tuning-ra-bc-kappa))
+2. **Monitor `rabc_mean_weight`**: If it's ≈ 1.0, increase kappa (see [Tuning RA-BC Kappa](#tuning-ra-bc-kappa))

 ---

@@ -1,244 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Create videos with a Robometer progress overlay for one LeRobot dataset episode.
-
-This is a lightweight smoke-test utility for Robometer checkpoints. It downloads
-one episode video, samples a small number of frames, runs Robometer on those
-frames, and reuses the progress overlay renderer from
-``examples/dataset/create_progress_videos.py``.
-
-Example:
-
-    uv run python examples/dataset/create_robometer_progress_videos.py \\
-        --repo-id lerobot/aloha_mobile_cabinet \\
-        --episode 0 \\
-        --reward-model-path lilkm/robometer-4b \\
-        --device cuda
-"""
-
-from __future__ import annotations
-
-import argparse
-import logging
-from pathlib import Path
-
-import cv2
-import numpy as np
-import torch
-
-from examples.dataset.create_progress_videos import (
-    composite_progress_video,
-    convert_mp4_to_gif,
-    download_episode_metadata,
-    download_video_file,
-    load_episode_meta,
-)
-from lerobot.rewards.robometer import RobometerConfig, RobometerRewardModel
-from lerobot.rewards.robometer.modeling_robometer import decode_progress_outputs
-from lerobot.rewards.robometer.processor_robometer import RobometerEncoderProcessorStep
-from lerobot.utils.utils import init_logging
-
-
-def _default_device() -> str:
-    return "cuda" if torch.cuda.is_available() else "cpu"
-
-
-def sample_episode_frames(
-    video_path: Path,
-    *,
-    from_timestamp: float,
-    to_timestamp: float,
-    fps: float,
-    num_frames: int,
-) -> tuple[np.ndarray, np.ndarray]:
-    """Sample RGB frames uniformly from an episode video segment.
-
-    Returns:
-        ``(frames, frame_indices)`` where ``frames`` is ``(T,H,W,C)`` uint8 RGB
-        and ``frame_indices`` are local episode frame indices used for overlay.
-    """
-    if num_frames <= 0:
-        raise ValueError(f"num_frames must be positive, got {num_frames}")
-
-    duration_seconds = to_timestamp - from_timestamp
-    total_frames = max(int(round(duration_seconds * fps)), 1)
-    frame_indices = np.linspace(0, total_frames - 1, num=min(num_frames, total_frames), dtype=int)
-
-    capture = cv2.VideoCapture(str(video_path))
-    frames: list[np.ndarray] = []
-    try:
-        for frame_idx in frame_indices:
-            timestamp = from_timestamp + frame_idx / fps
-            capture.set(cv2.CAP_PROP_POS_MSEC, timestamp * 1000)
-            ret, frame_bgr = capture.read()
-            if not ret:
-                logging.warning("Could not read frame %d at %.3fs", frame_idx, timestamp)
-                continue
-            frames.append(cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB))
-    finally:
-        capture.release()
-
-    if not frames:
-        raise RuntimeError(f"No frames could be sampled from {video_path}")
-
-    return np.stack(frames), frame_indices[: len(frames)]
-
-
-def predict_robometer_progress(
-    frames: np.ndarray,
-    *,
-    task: str,
-    reward_model_path: str,
-    device: str,
-) -> list[float]:
-    """Run Robometer and return per-sampled-frame progress predictions."""
-    config = RobometerConfig(pretrained_path=reward_model_path, device=device, max_frames=None)
-    model = RobometerRewardModel.from_pretrained(reward_model_path, config=config)
-
-    encoder = RobometerEncoderProcessorStep(
-        base_model_id=model.config.base_model_id,
-        use_multi_image=model.config.use_multi_image,
-        use_per_frame_progress_token=model.config.use_per_frame_progress_token,
-        max_frames=None,
-    )
-    batch = encoder.encode_samples([(frames, task)])
-
-    model_device = next(model.model.parameters()).device
-    inputs = {key: value.to(model_device) if hasattr(value, "to") else value for key, value in batch.items()}
-
-    model.eval()
-    with torch.no_grad():
-        progress_logits, success_logits = model._compute_rbm_logits(inputs)
-
-    decoded = decode_progress_outputs(
-        progress_logits,
-        success_logits,
-        is_discrete_mode=model.config.use_discrete_progress,
-    )
-    return decoded["progress_pred"][0]
-
-
-def process_dataset(
-    repo_id: str,
-    episode: int,
-    reward_model_path: str,
-    device: str,
-    camera_key: str | None,
-    output_dir: Path,
-    num_frames: int,
-    task: str | None = None,
-    create_gif: bool = False,
-) -> Path:
-    safe_name = repo_id.replace("/", "_")
-    logging.info("Processing %s episode %d with Robometer %s", repo_id, episode, reward_model_path)
-
-    local_path = download_episode_metadata(repo_id, episode)
-    episode_meta = load_episode_meta(local_path, episode, camera_key)
-    video_path = download_video_file(repo_id, local_path, episode_meta["video_rel"])
-
-    task_name = task or episode_meta.get("task_name", "")
-    if not task_name:
-        raise ValueError("No task found in dataset metadata. Pass --task explicitly.")
-
-    frames, frame_indices = sample_episode_frames(
-        video_path,
-        from_timestamp=episode_meta["from_ts"],
-        to_timestamp=episode_meta["to_ts"],
-        fps=episode_meta["fps"],
-        num_frames=num_frames,
-    )
-    logging.info("Sampled %d frames for Robometer inference", len(frames))
-
-    progress = predict_robometer_progress(
-        frames,
-        task=task_name,
-        reward_model_path=reward_model_path,
-        device=device,
-    )
-    progress_data = np.stack([frame_indices, np.asarray(progress, dtype=np.float32)], axis=1)
-    logging.info("Progress predictions: %s", [round(float(value), 3) for value in progress])
-
-    output_path = output_dir / f"{safe_name}_ep{episode}_robometer_progress.mp4"
-    final_path = composite_progress_video(
-        video_path=video_path,
-        from_timestamp=episode_meta["from_ts"],
-        to_timestamp=episode_meta["to_ts"],
-        progress_data=progress_data,
-        output_path=output_path,
-        fps=episode_meta["fps"],
-        task_name=task_name,
-    )
-
-    if create_gif:
-        final_path = convert_mp4_to_gif(final_path)
-    return final_path
-
-
-def main() -> None:
-    parser = argparse.ArgumentParser(
-        description="Create MP4/GIF videos with Robometer progress overlay for dataset episodes."
-    )
-    parser.add_argument("--repo-id", required=True, help="Hugging Face LeRobot dataset repo id.")
-    parser.add_argument("--episode", type=int, required=True, help="Episode index to visualize.")
-    parser.add_argument(
-        "--reward-model-path",
-        default="lilkm/robometer-4b",
-        help="Robometer checkpoint path or Hub repo id (e.g. lilkm/robometer-4b).",
-    )
-    parser.add_argument("--device", default=_default_device(), help="Torch device for Robometer inference.")
-    parser.add_argument(
-        "--camera-key",
-        default=None,
-        help="Camera observation key (e.g. observation.images.top). Auto-selects first camera if omitted.",
-    )
-    parser.add_argument(
-        "--task", default=None, help="Task description override if dataset metadata lacks one."
-    )
-    parser.add_argument(
-        "--num-frames",
-        type=int,
-        default=8,
-        help="Number of episode frames to sample for Robometer inference.",
-    )
-    parser.add_argument(
-        "--output-dir",
-        type=Path,
-        default=Path("progress_videos"),
-        help="Directory to write output files.",
-    )
-    parser.add_argument("--gif", action="store_true", help="Also generate a GIF from the MP4 output.")
-    args = parser.parse_args()
-
-    init_logging()
-    args.output_dir.mkdir(parents=True, exist_ok=True)
-
-    result = process_dataset(
-        repo_id=args.repo_id,
-        episode=args.episode,
-        reward_model_path=args.reward_model_path,
-        device=args.device,
-        camera_key=args.camera_key,
-        output_dir=args.output_dir,
-        num_frames=args.num_frames,
-        task=args.task,
-        create_gif=args.gif,
-    )
-    logging.info("Output: %s", result)
-
-
-if __name__ == "__main__":
-    main()
@@ -69,7 +69,7 @@ class ComputeProgressShards(PipelineStep):
        import torch
        from tqdm import tqdm

-        from lerobot.rewards.sarm.compute_rabc_weights import (
+        from lerobot.policies.sarm.compute_rabc_weights import (
            generate_all_frame_indices,
            interpolate_progress,
            load_sarm_resources,
@@ -1,136 +0,0 @@
-# OMX Follower — Cube Pick And Place Example
-
-This is an example of what is possible to do with LeRobot on a physical setup.
-It is a WIP and being used internally at LeRobot and specific to our setup, but we hope it can be a useful reference for how to use LeRobot APIs and CLIs.
-
-It includes an end-to-end example for the **OMX Follower** robot arm: pick and place a cube dataset, train a policy, and deploy it autonomously.
-
-## Hardware
-
-| Component | Value                                |
-| --------- | ------------------------------------ |
-| Robot     | OMX Follower                         |
-| Cameras   | 2× OpenCV cameras (wrist + top-down) |
-
-## Scripts
-
-| Script                 | Purpose                                                         |
-| ---------------------- | --------------------------------------------------------------- |
-| `reset_environment.py` | Standalone utility: sweep workspace, grab cube, place cube      |
-| `record_grab.py`       | Automated data collection: reset → place → record grab episodes |
-
-## Setup
-
-Make sure you have LeRobot installed in your env. (See [the installation guide](https://huggingface.co/docs/lerobot/installation))
-
-Next, we will declare some environment variables for convenience. Adjust the camera indices and robot port to match your system configuration.
-
-```bash
-export ROBOT_PORT=/dev/ttyACM0
-export TELEOP_PORT=/dev/ttyACM1
-export HF_USERNAME=<your_hf_username>
-export ROBOT_CAMERAS="{ wrist: {type: opencv, index_or_path: 0, width: 640, height: 480, fps: 30, fourcc: MJPG}, top: {type: opencv, index_or_path: 2, width: 640, height: 480, fps: 30, fourcc: MJPG} }"
-```
-
-## Step 1 — Collect Data
-
-```bash
-lerobot-record \
-    --robot.type=omx_follower \
-    --robot.port=$ROBOT_PORT \
-    --robot.id=omx_follower \
-    --robot.cameras="$ROBOT_CAMERAS" \
-    --teleop.type=omx_leader \
-    --teleop.port=$TELEOP_PORT \
-    --teleop.id=omx_leader \
-    --dataset.repo_id=$HF_USERNAME/omx_pickandplace \
-    --dataset.root=data/omx_pickandplace \
-    --dataset.num_episodes=50 \
-    --dataset.single_task="Pick the cube and place it in the blue square" \
-    --dataset.streaming_encoding=true \
-    --dataset.push_to_hub=true
-```
-
-### Bonus Auto-Collect script
-
-/!\ This is specific to our setup and the task of picking and placing a cube. It is not a general-purpose data collection script. As you may notice, it doesn't require a teleop.
-
-```bash
-python -m examples.omx.record_grab \
-    --robot.type=omx_follower \
-    --robot.port=$ROBOT_PORT \
-    --robot.id=omx_follower \
-    --robot.cameras="$ROBOT_CAMERAS" \
-    --dataset.repo_id=$HF_USERNAME/omx_pickandplace \
-    --dataset.root=data/omx_pickandplace \
-    --dataset.num_episodes=50 \
-    --dataset.single_task="Pick the cube and place it in the blue square" \
-    --dataset.streaming_encoding=true \
-    --dataset.push_to_hub=true
-```
-
-Each episode:
-
-1. The arm grabs the cube from the center of the workspace and places it at a random position.
-2. The arm returns to HOME.
-3. A targeted grab is recorded: HOME → approach raised → lower onto cube → grasp → lift → carry → drop → HOME.
-
-A dataset is already available here [`maximellerbach/omx_pickandplace`](https://huggingface.co/datasets/maximellerbach/omx_pickandplace), so you can skip directly to training if you want.
-
-## Step 2 — Train
-
-To train a simple `ACT` policy on the collected dataset, you can use the `lerobot-train` CLI:
-
-```bash
-lerobot-train \
-    --dataset.repo_id=$HF_USERNAME/omx_pickandplace \
-    --policy.type=act \
-    --output_dir=outputs/train/omx_pickandplace_act \
-    --policy.device=cuda \
-    --policy.repo_id=$HF_USERNAME/omx_pickandplace_act \
-    --steps=20000 \
-    --wandb.enable=true
-```
-
-A pretrained `ACT` policy is already available here [`maximellerbach/omx_pickandplace_act`](https://huggingface.co/maximellerbach/omx_pickandplace_act).
-
-## Step 3 — Rollout
-
-Use the `lerobot-rollout` CLI with base strategy:
-
-```bash
-lerobot-rollout \
-    --strategy.type=base \
-    --robot.type=omx_follower \
-    --robot.port=$ROBOT_PORT \
-    --robot.id=omx_follower \
-    --robot.cameras="$ROBOT_CAMERAS" \
-    --policy.path=$HF_USERNAME/omx_pickandplace_act \
-```
-
-For continuous recording with automatic upload (sentry mode):
-
-```bash
-lerobot-rollout \
-    --strategy.type=sentry \
-    --strategy.upload_every_n_episodes=10 \
-    --robot.type=omx_follower \
-    --robot.port=$ROBOT_PORT \
-    --robot.id=omx_follower \
-    --robot.cameras="$ROBOT_CAMERAS" \
-    --policy.path=$HF_USERNAME/omx_pickandplace_act \
-    --dataset.repo_id=$HF_USERNAME/rollout_omx_pickandplace_act \
-```
-
-## Environment Reset Utility
-
-Those are specific to this particular physical setup. Those are scripts that execute hardcoded sequences of actions on the robot to reset the environment, which is useful for data collection and evaluation. They are not general-purpose scripts.
-
-`reset_environment.py` can be run standalone to prepare the workspace:
-
-```bash
-# Grab cube + place it at a random position on the left side
-python -m examples.omx.reset_environment --port $ROBOT_PORT --mode grab_and_place
-```
-
-It also exposes `grab_cube(robot)` and `place_cube(robot)` for use in custom scripts.
@@ -1,422 +0,0 @@
-#!/usr/bin/env python3
-"""
-Auto-record grab episodes for the OMX robot arm.
-
-Each episode cycle:
-  1. grab_and_place  — grab cube from workspace center and place at a random (pan, reach) position
-  2. HOME            — return arm to home with gripper open
-  3. record_grab     — execute a targeted grab to the stored position while recording
-                       observations + actions to a LeRobotDataset
-
-Usage (run from repo root):
-    python -m examples.omx.record_grab \\
-        --robot.type=omx_follower \\
-        --robot.port=/dev/ttyACM0 \\
-        --robot.id=omx_follower \\
-        --robot.cameras="{ wrist: {type: opencv, index_or_path: 6, width: 640, height: 480, fps: 30, fourcc: MJPG}, top: {type: opencv, index_or_path: 4, width: 640, height: 480, fps: 30, fourcc: MJPG} }" \\
-        --dataset.repo_id=<hf_username>/<dataset_name> \\
-        --dataset.root=data/omx_grab \\
-        --dataset.num_episodes=50 \\
-        --dataset.single_task="Grab the cube" \\
-        --dataset.streaming_encoding=true
-"""
-
-import logging
-from dataclasses import dataclass
-from pprint import pformat
-
-import numpy as np
-
-from lerobot.cameras import CameraConfig  # noqa: F401
-from lerobot.cameras.opencv import OpenCVCameraConfig  # noqa: F401
-from lerobot.configs import parser
-from lerobot.configs.dataset import DatasetRecordConfig
-from lerobot.datasets import (
-    LeRobotDataset,
-    VideoEncodingManager,
-    aggregate_pipeline_dataset_features,
-    create_initial_features,
-)
-from lerobot.processor import make_default_processors
-from lerobot.robots import RobotConfig, make_robot_from_config
-from lerobot.robots.omx_follower import OmxFollower
-from lerobot.utils.constants import ACTION, OBS_STR
-from lerobot.utils.feature_utils import build_dataset_frame, combine_feature_dicts
-from lerobot.utils.robot_utils import precise_sleep
-
-from .reset_environment import (
-    APPROACH_SPEED,
-    GRIPPER_CLOSE_POS,
-    HOME_POSE,
-    PUSH_END_ELBOW_FLEX,
-    PUSH_END_SHOULDER_LIFT,
-    PUSH_START_ELBOW_FLEX,
-    PUSH_START_SHOULDER_LIFT,
-    array_to_pose,
-    grab_cube,
-    horizontal_wrist_flex,
-    move_to_pose,
-    place_cube,
-    pose_to_array,
-)
-
-# ── Grab-episode motion parameters ────────────────────────────────────────────
-
-# Shoulder-lift offset for the raised approach phase (subtracted from the target sl, arm is higher).
-GRAB_RAISE_SL_OFFSET = 20.0
-GRAB_LOWER_SPEED = 20.0
-RECORD_SPEED = 30.0
-
-# Pose the arm travels to after closing the gripper (cube held).
-GRAB_CARRY_POSE = {
-    "shoulder_pan.pos": -23.0,
-    "shoulder_lift.pos": 5.0,
-    "elbow_flex.pos": 18.0,
-    "wrist_flex.pos": -14.0,
-    "wrist_roll.pos": 0.0,
-    "gripper.pos": GRIPPER_CLOSE_POS,
-}
-
-# Per-joint jitter limits (degrees) applied to transit waypoints for human-like variation.
-# Cube-approach and carry poses are never jittered to preserve precision.
-_JITTER_LIMITS: dict[str, float] = {
-    "shoulder_pan.pos": 5.0,
-    "shoulder_lift.pos": 4.0,
-    "elbow_flex.pos": 4.0,
-    "wrist_flex.pos": 3.0,
-    "wrist_roll.pos": 2.0,
-    "gripper.pos": 0.0,
-}
-
-
-def _jitter_pose(pose: dict, rng: np.random.Generator) -> dict:
-    """Return a copy of pose with independent per-joint random perturbations."""
-    return {
-        k: v + rng.uniform(-_JITTER_LIMITS.get(k, 0.0), _JITTER_LIMITS.get(k, 0.0)) for k, v in pose.items()
-    }
-
-
-def _random_stuck_pose(rng: np.random.Generator) -> dict:
-    """Return a physically plausible stuck pose (failed grasp), gripper closed.
-
-    ef bounds are piecewise-linear in sl so the arm stays in a reachable,
-    table-safe envelope across the full sl range:
-      sl=-50 → ef ∈ [  0,  50]   (arm raised, can be bent forward)
-      sl=  0 → ef ∈ [-25,  25]   (mid reach)
-      sl= 30 → ef ∈ [-20,   0]   (arm extended, little room to flex)
-    wrist_flex is randomly offset from the horizontal value.
-    """
-    pan = float(rng.uniform(-5.0, 35.0))
-    sl = float(rng.uniform(-50.0, 30.0))
-
-    if sl <= 0.0:
-        alpha = (sl + 50.0) / 50.0  # 0 at sl=-50, 1 at sl=0
-        ef_lo = alpha * -25.0  # 0 → -25
-        ef_hi = 50.0 + alpha * -25.0  # 50 → 25
-    else:
-        alpha = sl / 30.0  # 0 at sl=0, 1 at sl=30
-        ef_lo = -25.0 + alpha * 5.0  # -25 → -20
-        ef_hi = 25.0 + alpha * -25.0  # 25 → 0
-
-    ef = float(rng.uniform(ef_lo, ef_hi))
-    wf = horizontal_wrist_flex(sl, ef) + float(rng.uniform(-15.0, 15.0))
-    return {
-        "shoulder_pan.pos": pan,
-        "shoulder_lift.pos": sl,
-        "elbow_flex.pos": ef,
-        "wrist_flex.pos": wf,
-        "wrist_roll.pos": float(rng.uniform(-15.0, 15.0)),
-        "gripper.pos": GRIPPER_CLOSE_POS,
-    }
-
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class OmxRecordGrabConfig:
-    robot: RobotConfig
-    dataset: DatasetRecordConfig
-    # Resume recording on an existing dataset.
-    resume: bool = False
-    # Fraction of episodes that start from a random stuck pose (gripper closed) to
-    # generate recovery data.  0.0 = disabled, 1.0 = all episodes are recovery starts.
-    recovery_prob: float = 0.5
-
-
-def record_episode_spline(
-    robot: OmxFollower,
-    waypoints: list[dict],
-    speeds: list[float],
-    dataset: LeRobotDataset,
-    task: str,
-) -> None:
-    """Execute a Catmull-Rom-style spline through waypoints, recording each frame.
-
-    Segment durations are parameterized from the maximum absolute joint delta
-    between consecutive waypoints divided by the requested segment speed,
-    producing non-uniform timing in joint space. Interior tangents are derived
-    from the adjacent per-segment velocities, with clamped (zero-velocity)
-    endpoints so the arm starts and stops smoothly. Each segment is cubic
-    Hermite, giving C1 continuity at every waypoint.
-    """
-    pts = [pose_to_array(w) for w in waypoints]
-    n = len(pts)
-
-    # Steps and duration per segment
-    n_steps_list = []
-    timestamps = []
-    for i in range(n - 1):
-        max_dist = float(np.max(np.abs(pts[i + 1] - pts[i])))
-        ns = max(1, int(max_dist / speeds[i] * dataset.fps)) if max_dist >= 0.5 else 0
-        n_steps_list.append(ns)
-        timestamps.append(ns / dataset.fps)
-
-    # Velocity tangents (deg/sec) — clamped at endpoints, Catmull-Rom for interior
-    vels = [np.zeros_like(pts[0])]
-    for i in range(1, n - 1):
-        v_prev = (pts[i] - pts[i - 1]) / timestamps[i - 1] if timestamps[i - 1] > 0 else np.zeros_like(pts[0])
-        v_next = (pts[i + 1] - pts[i]) / timestamps[i] if timestamps[i] > 0 else np.zeros_like(pts[0])
-        vels.append(0.5 * (v_prev + v_next))
-    vels.append(np.zeros_like(pts[0]))
-
-    dt = 1.0 / dataset.fps
-    for seg in range(n - 1):
-        ns = n_steps_list[seg]
-        if ns == 0:
-            continue
-        p0, p1 = pts[seg], pts[seg + 1]
-        # Scale velocity (deg/sec) to t-space tangent (deg/t-unit, where t: 0→1 over ns steps)
-        m0 = vels[seg] * timestamps[seg]
-        m1 = vels[seg + 1] * timestamps[seg]
-
-        for step in range(1, ns + 1):
-            t = step / ns
-            h00 = 2 * t**3 - 3 * t**2 + 1
-            h10 = t**3 - 2 * t**2 + t
-            h01 = -2 * t**3 + 3 * t**2
-            h11 = t**3 - t**2
-            commanded = h00 * p0 + h10 * m0 + h01 * p1 + h11 * m1
-
-            action = array_to_pose(commanded)
-            robot.send_action(action)
-            obs = robot.get_observation()
-            obs_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR)
-            action_frame = build_dataset_frame(dataset.features, action, prefix=ACTION)
-            dataset.add_frame({**obs_frame, **action_frame, "task": task})
-            precise_sleep(dt)
-
-
-def record_grab_episode(
-    robot: OmxFollower,
-    dataset: LeRobotDataset,
-    pan: float,
-    t: float,
-    task: str,
-    recovery_start: bool = False,
-) -> None:
-    """Execute a targeted grab to the stored (pan, t) position, recording every frame.
-
-    Normal sequence (initial HOME move is NOT recorded):
-      HOME → raised approach above cube → lower → close gripper
-           → raise [jittered] → retract [jittered] → GRAB_CARRY_POSE → drop → HOME
-
-    Recovery sequence (recovery_start=True): arm is moved to a random stuck pose
-    (gripper closed) without recording, then recording begins from there:
-      stuck_pose → raised approach above cube → [normal grab sequence from there]
-
-    All segments are joined by a Catmull-Rom spline (C1-continuous velocities).
-    """
-    sl = PUSH_START_SHOULDER_LIFT + t * (PUSH_END_SHOULDER_LIFT - PUSH_START_SHOULDER_LIFT)
-    ef = PUSH_START_ELBOW_FLEX + t * (PUSH_END_ELBOW_FLEX - PUSH_START_ELBOW_FLEX)
-    sl_raised = sl - GRAB_RAISE_SL_OFFSET
-    wf_horizontal = horizontal_wrist_flex(sl, ef)
-
-    rng = np.random.default_rng()
-
-    if recovery_start:
-        stuck_pose = _random_stuck_pose(rng)
-        logger.info(f"Recovery start: {stuck_pose}")
-        move_to_pose(robot, stuck_pose, APPROACH_SPEED)
-        first_waypoints = [stuck_pose]
-        first_speeds = []
-    else:
-        jittery_start = _jitter_pose(HOME_POSE, rng)
-        move_to_pose(robot, jittery_start, APPROACH_SPEED)
-        first_waypoints = [jittery_start]
-        first_speeds = []
-
-    waypoints = first_waypoints + [
-        {  # raised approach: arm above cube
-            "shoulder_pan.pos": pan,
-            "shoulder_lift.pos": sl_raised,
-            "elbow_flex.pos": ef,
-            "wrist_flex.pos": horizontal_wrist_flex(sl_raised, ef),
-            "wrist_roll.pos": 0.0,
-            "gripper.pos": 60.0,
-        },
-        {  # lower onto cube — no jitter: precision needed
-            "shoulder_pan.pos": pan,
-            "shoulder_lift.pos": sl,
-            "elbow_flex.pos": ef,
-            "wrist_flex.pos": wf_horizontal,
-            "wrist_roll.pos": 0.0,
-            "gripper.pos": 60.0,
-        },
-        {  # close gripper — no jitter: precision needed
-            "shoulder_pan.pos": pan,
-            "shoulder_lift.pos": sl,
-            "elbow_flex.pos": ef,
-            "wrist_flex.pos": wf_horizontal,
-            "wrist_roll.pos": 0.0,
-            "gripper.pos": GRIPPER_CLOSE_POS,
-        },
-        _jitter_pose(
-            {  # raise with cube
-                "shoulder_pan.pos": pan,
-                "shoulder_lift.pos": sl_raised,
-                "elbow_flex.pos": ef,
-                "wrist_flex.pos": horizontal_wrist_flex(sl_raised, ef),
-                "wrist_roll.pos": 0.0,
-                "gripper.pos": GRIPPER_CLOSE_POS,
-            },
-            rng,
-        ),
-        _jitter_pose(
-            {  # retract: fold arm toward HOME before sweeping to carry zone
-                "shoulder_pan.pos": pan * 0.25,
-                "shoulder_lift.pos": HOME_POSE["shoulder_lift.pos"] + 5.0,
-                "elbow_flex.pos": HOME_POSE["elbow_flex.pos"] - 5.0,
-                "wrist_flex.pos": 0.0,
-                "wrist_roll.pos": 0.0,
-                "gripper.pos": GRIPPER_CLOSE_POS,
-            },
-            rng,
-        ),
-        GRAB_CARRY_POSE,  # no jitter: target drop zone
-        {**GRAB_CARRY_POSE, "gripper.pos": 60.0},  # drop cube
-        HOME_POSE,
-    ]
-    speeds = first_speeds + [
-        RECORD_SPEED,  # (HOME →) raised approach
-        GRAB_LOWER_SPEED,  # raised approach → lower
-        GRAB_LOWER_SPEED,  # lower → close gripper
-        RECORD_SPEED,  # close gripper → raise
-        RECORD_SPEED,  # raise → retract
-        RECORD_SPEED,  # retract → carry pose
-        RECORD_SPEED,  # carry pose → drop
-        RECORD_SPEED,  # drop → HOME
-    ]
-
-    record_episode_spline(robot, waypoints, speeds, dataset, task)
-
-    # Dwell at HOME for ~0.5 s before next episode
-    home_action = build_dataset_frame(dataset.features, HOME_POSE, prefix=ACTION)
-    dt = 1.0 / dataset.fps
-    for _ in range(int(dataset.fps * 0.5)):
-        robot.send_action(HOME_POSE)
-        obs = robot.get_observation()
-        obs_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR)
-        dataset.add_frame({**obs_frame, **home_action, "task": task})
-        precise_sleep(dt)
-
-
-@parser.wrap()
-def record_grab(cfg: OmxRecordGrabConfig) -> LeRobotDataset:
-    logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
-    logger.info(pformat(cfg))
-
-    robot = make_robot_from_config(cfg.robot)
-    use_videos = cfg.dataset.video
-
-    teleop_action_processor, _, robot_obs_processor = make_default_processors()
-
-    dataset_features = combine_feature_dicts(
-        aggregate_pipeline_dataset_features(
-            pipeline=teleop_action_processor,
-            initial_features=create_initial_features(action=robot.action_features),
-            use_videos=use_videos,
-        ),
-        aggregate_pipeline_dataset_features(
-            pipeline=robot_obs_processor,
-            initial_features=create_initial_features(observation=robot.observation_features),
-            use_videos=use_videos,
-        ),
-    )
-
-    num_cameras = len(robot.cameras) if hasattr(robot, "cameras") else 0
-    dataset = None
-
-    try:
-        if cfg.resume:
-            dataset = LeRobotDataset.resume(
-                cfg.dataset.repo_id,
-                root=cfg.dataset.root,
-                streaming_encoding=cfg.dataset.streaming_encoding,
-                batch_encoding_size=cfg.dataset.video_encoding_batch_size,
-                vcodec=cfg.dataset.vcodec,
-                encoder_threads=cfg.dataset.encoder_threads,
-                image_writer_processes=cfg.dataset.num_image_writer_processes if num_cameras > 0 else 0,
-                image_writer_threads=cfg.dataset.num_image_writer_threads_per_camera * num_cameras
-                if num_cameras > 0
-                else 0,
-            )
-        else:
-            cfg.dataset.stamp_repo_id()
-            dataset = LeRobotDataset.create(
-                cfg.dataset.repo_id,
-                cfg.dataset.fps,
-                root=cfg.dataset.root,
-                robot_type=robot.name,
-                features=dataset_features,
-                use_videos=use_videos,
-                streaming_encoding=cfg.dataset.streaming_encoding,
-                batch_encoding_size=cfg.dataset.video_encoding_batch_size,
-                vcodec=cfg.dataset.vcodec,
-                encoder_threads=cfg.dataset.encoder_threads,
-                image_writer_processes=cfg.dataset.num_image_writer_processes if num_cameras > 0 else 0,
-                image_writer_threads=cfg.dataset.num_image_writer_threads_per_camera * num_cameras
-                if num_cameras > 0
-                else 0,
-            )
-
-        robot.connect(calibrate=True)
-
-        rng = np.random.default_rng()
-        with VideoEncodingManager(dataset):
-            for episode_idx in range(cfg.dataset.num_episodes):
-                logger.info(f"=== Episode {episode_idx + 1}/{cfg.dataset.num_episodes} ===")
-
-                logger.info("Step 1: grabbing and placing cube...")
-                grab_cube(robot)
-                pan, t = place_cube(robot)
-                logger.info(f"Cube placed at pan={pan:.1f}, reach={t:.2f}")
-
-                recovery_start = cfg.recovery_prob > 0 and float(rng.random()) < cfg.recovery_prob
-                logger.info(f"Step 2: recording {'recovery ' if recovery_start else ''}grab episode...")
-                record_grab_episode(
-                    robot,
-                    dataset,
-                    pan,
-                    t,
-                    cfg.dataset.single_task,
-                    recovery_start=recovery_start,
-                )
-
-                dataset.save_episode()
-                logger.info(f"Episode {episode_idx + 1} saved.")
-
-    finally:
-        if dataset:
-            dataset.finalize()
-        if robot.is_connected:
-            robot.disconnect()
-
-    if cfg.dataset.push_to_hub and dataset and dataset.num_episodes > 0:
-        dataset.push_to_hub(tags=cfg.dataset.tags, private=cfg.dataset.private)
-
-    return dataset
-
-
-if __name__ == "__main__":
-    record_grab()
@@ -1,267 +0,0 @@
-#!/usr/bin/env python3
-"""
-Auto-reset and cube-grab utility for the OMX robot arm.
-
-Provides:
-  - grab_cube(robot): sweep workspace, center cube, close gripper
-  - place_cube(robot): carry cube to a random position, release
-
-Standalone usage (run from repo root):
-    python -m examples.omx.reset_environment --port /dev/ttyACM1 --mode grab
-    python -m examples.omx.reset_environment --port /dev/ttyACM1 --mode grab_and_place
-
-Joint range: -100 to 100 for arm joints; gripper: 50 = closed, 80 = open.
-
-To read current joint values for calibration, add after robot.connect():
-    obs = robot.get_observation()
-    print({k: round(obs[k], 1) for k in JOINT_NAMES})
-    robot.disconnect(); raise SystemExit
-
-Parallel-to-ground IK: wrist_flex = WRIST_HORIZONTAL_OFFSET - shoulder_lift - elbow_flex.
-Linear interpolation preserves this constraint between any two poses that satisfy it.
-"""
-
-import argparse
-import logging
-
-import numpy as np
-
-from lerobot.robots.omx_follower import OmxFollower, OmxFollowerConfig
-from lerobot.robots.robot import Robot
-from lerobot.utils.robot_utils import precise_sleep
-
-logger = logging.getLogger(__name__)
-
-# ── Poses ─────────────────────────────────────────────────────────────────────
-
-HOME_POSE = {
-    "shoulder_pan.pos": 0.0,
-    "shoulder_lift.pos": -50.0,
-    "elbow_flex.pos": 50.0,
-    "wrist_flex.pos": 0.0,
-    "wrist_roll.pos": 0.0,
-    "gripper.pos": 60.0,
-}
-
-SWEEP_WAYPOINTS = [
-    {
-        "shoulder_pan.pos": -60.0,
-        "shoulder_lift.pos": 50.0,
-        "elbow_flex.pos": -60.0,
-        "wrist_flex.pos": -20.0,
-        "wrist_roll.pos": 0.0,
-        "gripper.pos": 60.0,
-    },
-    {
-        "shoulder_pan.pos": -30.0,
-        "shoulder_lift.pos": 50.0,
-        "elbow_flex.pos": -60.0,
-        "wrist_flex.pos": -5.0,
-        "wrist_roll.pos": 0.0,
-        "gripper.pos": 60.0,
-    },
-    {
-        "shoulder_pan.pos": 20.0,
-        "shoulder_lift.pos": 50.0,
-        "elbow_flex.pos": -55.0,
-        "wrist_flex.pos": -5.0,
-        "wrist_roll.pos": 0.0,
-        "gripper.pos": 60.0,
-    },
-]
-
-# ── Motion parameters ─────────────────────────────────────────────────────────
-
-CONTROL_HZ = 30
-APPROACH_SPEED = 50.0
-SWEEP_SPEED = 40.0
-
-# ── Grab-sequence parameters ──────────────────────────────────────────────────
-
-GRAB_PAN = 0.0
-SWEEP_LEFT_PAN = -60.0
-SWEEP_RIGHT_PAN = 60.0
-SWEEP_END_OFFSET = 5.0  # stop before center so the cube isn't pushed past GRAB_PAN
-SWEEP_END_PAN_RANGE = (15.0, 20.0)
-
-SWEEP_LOW_SHOULDER_LIFT = 50.0
-SWEEP_LOW_ELBOW_FLEX_START = -60.0
-SWEEP_LOW_ELBOW_FLEX_END = -55.0
-
-SWEEP_HIGH_WRIST_FLEX = -20.0  # wrist tilted up during high approach to clear obstacles
-
-PUSH_START_SHOULDER_LIFT = 0.0
-PUSH_START_ELBOW_FLEX = 45.0
-PUSH_END_SHOULDER_LIFT = 50.0
-PUSH_END_ELBOW_FLEX = -50.0
-# Subtracted from shoulder_lift during the push sweep to clear the platform surface.
-# Does not affect the grab-target interpolation in record_grab.py.
-PUSH_RAISE_OFFSET = 5.0
-
-WRIST_HORIZONTAL_OFFSET = 0.0  # tune if gripper tilts during push: + tilts nose up, - down
-GRIPPER_CLOSE_POS = 50.0
-
-PLACE_LEFT_PAN_RANGE = (5.0, 30.0)  # random pan range for cube placement on the left side
-PLACE_REACH_RANGE = (0.1, 0.7)  # 0 = arm retracted (PUSH_START), 1 = fully extended (PUSH_END)
-
-JOINT_NAMES = [
-    "shoulder_pan.pos",
-    "shoulder_lift.pos",
-    "elbow_flex.pos",
-    "wrist_flex.pos",
-    "wrist_roll.pos",
-    "gripper.pos",
-]
-
-# ── Helpers ───────────────────────────────────────────────────────────────────
-
-
-def pose_to_array(pose: dict) -> np.ndarray:
-    return np.array([pose[k] for k in JOINT_NAMES])
-
-
-def array_to_pose(arr: np.ndarray) -> dict:
-    return {k: float(arr[i]) for i, k in enumerate(JOINT_NAMES)}
-
-
-def horizontal_wrist_flex(shoulder_lift: float, elbow_flex: float) -> float:
-    return WRIST_HORIZONTAL_OFFSET - shoulder_lift - elbow_flex
-
-
-def _low_sweep_pose(pan: float, elbow_flex: float, wrist_flex: float | None = None) -> dict:
-    sl = SWEEP_LOW_SHOULDER_LIFT
-    return {
-        "shoulder_pan.pos": pan,
-        "shoulder_lift.pos": sl,
-        "elbow_flex.pos": elbow_flex,
-        "wrist_flex.pos": horizontal_wrist_flex(sl, elbow_flex) if wrist_flex is None else wrist_flex,
-        "wrist_roll.pos": 0.0,
-        "gripper.pos": 60.0,
-    }
-
-
-def _high_sweep_pose(pan: float) -> dict:
-    return {**HOME_POSE, "shoulder_pan.pos": pan, "wrist_flex.pos": SWEEP_HIGH_WRIST_FLEX}
-
-
-def _push_pose(shoulder_lift: float, elbow_flex: float, pan: float = GRAB_PAN, gripper: float = 70.0) -> dict:
-    return {
-        "shoulder_pan.pos": pan,
-        "shoulder_lift.pos": shoulder_lift,
-        "elbow_flex.pos": elbow_flex,
-        "wrist_flex.pos": horizontal_wrist_flex(shoulder_lift, elbow_flex),
-        "wrist_roll.pos": 0.0,
-        "gripper.pos": gripper,
-    }
-
-
-def move_to_pose(robot: Robot, target: dict, speed: float) -> None:
-    """Interpolate from current position to target at the given speed (units/s)."""
-    obs = robot.get_observation()
-    current = np.array([obs[k] for k in JOINT_NAMES])
-    goal = pose_to_array(target)
-
-    max_distance = float(np.max(np.abs(goal - current)))
-    if max_distance < 0.5:
-        return
-
-    n_steps = max(1, int(max_distance / speed * CONTROL_HZ))
-    dt = 1.0 / CONTROL_HZ
-    for step in range(1, n_steps + 1):
-        t = step / n_steps
-        robot.send_action(array_to_pose(current + t * (goal - current)))
-        precise_sleep(dt)
-
-
-# ── Sequences ─────────────────────────────────────────────────────────────────
-
-
-def grab_cube(robot: Robot) -> None:
-    """Left sweep → right sweep → extend arm parallel to ground → close gripper."""
-    move_to_pose(robot, HOME_POSE, APPROACH_SPEED)
-
-    for pan, end_pan in [
-        (SWEEP_LEFT_PAN, GRAB_PAN - SWEEP_END_OFFSET),
-        (SWEEP_RIGHT_PAN, GRAB_PAN + SWEEP_END_OFFSET),
-    ]:
-        logger.info(f"Sweeping {'left' if pan < 0 else 'right'} → center...")
-        move_to_pose(robot, _high_sweep_pose(pan), APPROACH_SPEED)
-        move_to_pose(
-            robot, _low_sweep_pose(pan, SWEEP_LOW_ELBOW_FLEX_START, wrist_flex=-20.0), APPROACH_SPEED
-        )
-        move_to_pose(robot, _low_sweep_pose(end_pan, SWEEP_LOW_ELBOW_FLEX_END, wrist_flex=0.0), SWEEP_SPEED)
-        move_to_pose(robot, HOME_POSE, APPROACH_SPEED)
-
-    logger.info("Extending to push cube into gripper...")
-    move_to_pose(
-        robot,
-        _push_pose(PUSH_START_SHOULDER_LIFT - PUSH_RAISE_OFFSET, PUSH_START_ELBOW_FLEX),
-        APPROACH_SPEED,
-    )
-    move_to_pose(
-        robot,
-        _push_pose(PUSH_END_SHOULDER_LIFT - PUSH_RAISE_OFFSET, PUSH_END_ELBOW_FLEX),
-        SWEEP_SPEED,
-    )
-
-    logger.info("Closing gripper...")
-    move_to_pose(
-        robot,
-        _push_pose(PUSH_END_SHOULDER_LIFT, PUSH_END_ELBOW_FLEX, gripper=GRIPPER_CLOSE_POS),
-        APPROACH_SPEED,
-    )
-
-    logger.info("Grab complete.")
-
-
-def place_cube(robot: Robot) -> tuple[float, float]:
-    """Carry the cube (gripper closed) to a random position on the left side, then release.
-
-    Returns:
-        (pan, t): pan angle and reach scalar [0, 1] of the placement position.
-    """
-    pan = float(np.random.uniform(*PLACE_LEFT_PAN_RANGE))
-    t = float(np.random.uniform(*PLACE_REACH_RANGE))
-    sl = PUSH_START_SHOULDER_LIFT + t * (PUSH_END_SHOULDER_LIFT - PUSH_START_SHOULDER_LIFT)
-    ef = PUSH_START_ELBOW_FLEX + t * (PUSH_END_ELBOW_FLEX - PUSH_START_ELBOW_FLEX)
-    logger.info(f"Placing cube at pan={pan:.1f}, reach={t:.2f}...")
-
-    move_to_pose(robot, {**HOME_POSE, "gripper.pos": GRIPPER_CLOSE_POS}, APPROACH_SPEED)
-    move_to_pose(
-        robot, {**HOME_POSE, "shoulder_pan.pos": pan, "gripper.pos": GRIPPER_CLOSE_POS}, APPROACH_SPEED
-    )
-    move_to_pose(robot, _push_pose(sl, ef, pan=pan, gripper=GRIPPER_CLOSE_POS), APPROACH_SPEED)
-    move_to_pose(robot, _push_pose(sl, ef, pan=pan, gripper=80.0), APPROACH_SPEED)
-    move_to_pose(robot, HOME_POSE, APPROACH_SPEED)
-    logger.info("Place complete.")
-    return pan, t
-
-
-# ── Entry point ───────────────────────────────────────────────────────────────
-
-
-def main():
-    parser = argparse.ArgumentParser(description="OMX arm reset / grab script")
-    parser.add_argument("--port", default="/dev/ttyACM1")
-    parser.add_argument("--robot_id", default="omx_follower")
-    parser.add_argument("--mode", choices=["grab", "grab_and_place"], default="grab_and_place")
-    args = parser.parse_args()
-
-    logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
-
-    robot = OmxFollower(OmxFollowerConfig(port=args.port, id=args.robot_id))
-    robot.connect(calibrate=True)
-
-    try:
-        if args.mode == "grab":
-            grab_cube(robot)
-        elif args.mode == "grab_and_place":
-            grab_cube(robot)
-            place_cube(robot)
-
-    finally:
-        robot.disconnect()
-
-
-if __name__ == "__main__":
-    main()
@@ -0,0 +1,175 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Simple SO100/SO101 leader-follower teleoperation with spacebar intervention toggle.
+
+Modes:
+  - Default (not intervening): follower holds its current position.
+    The leader arm has torque ENABLED and mirrors the follower so there is no
+    large position jump when intervention starts.
+  - Intervention (SPACE pressed): leader torque DISABLED, human moves the leader
+    freely, and the follower mirrors the leader joint-by-joint.
+
+Usage:
+    uv run python examples/so100_teleop/teleop.py
+
+Controls:
+    SPACE  — toggle intervention on/off
+    Ctrl+C — exit
+"""
+
+import logging
+import os
+import sys
+import time
+from threading import Event, Thread
+
+from lerobot.robots.so_follower import SO101Follower, SO101FollowerConfig
+from lerobot.teleoperators.so_leader import SO101Leader
+from lerobot.teleoperators.so_leader.config_so_leader import SOLeaderTeleopConfig
+from lerobot.utils.robot_utils import precise_sleep
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# ── pynput keyboard listener ─────────────────────────────────────────────────
+PYNPUT_AVAILABLE = True
+try:
+    if "DISPLAY" not in os.environ and "linux" in sys.platform:
+        raise ImportError("No DISPLAY set, pynput skipped.")
+    from pynput import keyboard as pynput_keyboard
+except Exception:
+    pynput_keyboard = None
+    PYNPUT_AVAILABLE = False
+
+# ── Configure ports ──────────────────────────────────────────────────────────
+FOLLOWER_PORT = "/dev/ttyUSB0"  # ← change to your follower port
+LEADER_PORT = "/dev/ttyUSB1"  # ← change to your leader port
+FPS = 30
+
+
+def hold_position(robot) -> dict:
+    """Read current joint positions and write them back as the goal.
+
+    This prevents the motors from snapping to a stale Goal_Position register
+    value (which can happen when torque is re-enabled after calibration).
+    Returns the current position dict for reuse.
+    """
+    current = robot.bus.sync_read("Present_Position")
+    robot.bus.sync_write("Goal_Position", current)
+    return {f"{motor}.pos": val for motor, val in current.items()}
+
+
+# ── Connect ───────────────────────────────────────────────────────────────────
+follower_config = SO101FollowerConfig(
+    port=FOLLOWER_PORT,
+    id="follower_arm",
+    use_degrees=True,
+)
+leader_config = SOLeaderTeleopConfig(
+    port=LEADER_PORT,
+    id="leader_arm",
+    use_degrees=True,
+)
+
+follower = SO101Follower(follower_config)
+leader = SO101Leader(leader_config)
+
+follower.connect()
+leader.connect()
+
+# ── CRITICAL: hold both arms at their current position before doing anything ─
+# configure() enables follower torque, and the Goal_Position register may contain
+# a stale value from a previous session. Writing current→goal prevents sudden motion.
+follower_current = hold_position(follower)
+leader_current = hold_position(leader)  # leader torque is still off here, but sets the register
+
+# ── Intervention state + keyboard listener ───────────────────────────────────
+is_intervening = False
+stop_event = Event()
+
+
+def _start_keyboard_listener():
+    if not PYNPUT_AVAILABLE:
+        logger.warning("pynput not available — spacebar toggle disabled.")
+        return None
+
+    def on_press(key):
+        global is_intervening
+        if key == pynput_keyboard.Key.space:
+            is_intervening = not is_intervening
+            state = "INTERVENTION  (leader → follower)" if is_intervening else "IDLE  (follower holds)"
+            print(f"\n[SPACE] {state}\n")
+
+    def listen():
+        with pynput_keyboard.Listener(on_press=on_press) as listener:
+            while not stop_event.is_set():
+                time.sleep(0.05)
+            listener.stop()
+
+    t = Thread(target=listen, daemon=True)
+    t.start()
+    return t
+
+
+kbd_thread = _start_keyboard_listener()
+
+# Enable leader torque AFTER writing its goal to current position, so it holds in place.
+leader.bus.sync_write("Torque_Enable", 1)
+leader_torque_on = True
+
+print("\nTeleoperation ready.")
+print("  SPACE  → toggle intervention (leader controls follower)")
+print("  Ctrl+C → exit\n")
+
+try:
+    while True:
+        t0 = time.perf_counter()
+
+        if is_intervening:
+            # ── Intervention: leader torque OFF, follower mirrors leader ──────
+            if leader_torque_on:
+                leader.bus.sync_write("Torque_Enable", 0)
+                leader_torque_on = False
+
+            leader_action = leader.get_action()  # reads present leader joints
+            follower.send_action(leader_action)  # follower tracks leader
+
+        else:
+            # ── Idle: leader torque ON, leader mirrors follower, follower holds
+            if not leader_torque_on:
+                # Before re-enabling torque, set the leader's goal to its current
+                # position so it doesn't snap to the follower position suddenly.
+                hold_position(leader)
+                leader.bus.sync_write("Torque_Enable", 1)
+                leader_torque_on = True
+
+            follower_obs = follower.get_observation()
+            # Command leader to match follower (so next intervention has no jump)
+            goal_pos = {motor: follower_obs[f"{motor}.pos"] for motor in leader.bus.motors}
+            leader.bus.sync_write("Goal_Position", goal_pos)
+            # Follower holds — no send_action call
+
+        precise_sleep(max(1.0 / FPS - (time.perf_counter() - t0), 0.0))
+
+except KeyboardInterrupt:
+    print("\nExiting...")
+finally:
+    stop_event.set()
+    leader.bus.sync_write("Torque_Enable", 0)
+    follower.disconnect()
+    leader.disconnect()
@@ -0,0 +1,365 @@
+# !/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+from dataclasses import dataclass
+
+import numpy as np
+import torch
+
+from lerobot.configs.types import PipelineFeatureType, PolicyFeature
+from lerobot.model.kinematics import RobotKinematics
+from lerobot.processor import (
+    ProcessorStepRegistry,
+    RobotAction,
+    RobotActionProcessorStep,
+    RobotObservation,
+    RobotProcessorPipeline,
+    TransitionKey,
+)
+from lerobot.processor.converters import (
+    create_transition,
+    identity_transition,
+)
+from lerobot.robots.robot import Robot
+from lerobot.robots.so100_follower.robot_kinematic_processor import (
+    EEBoundsAndSafety,
+    EEReferenceAndDelta,
+    GripperVelocityToJoint,
+    InverseKinematicsRLStep,
+)
+from lerobot.robots.so101_follower.config_so101_follower import SO101FollowerConfig
+from lerobot.robots.so101_follower.so101_follower import SO101Follower
+from lerobot.teleoperators.so101_leader.config_so101_leader import SO101LeaderConfig
+from lerobot.teleoperators.so101_leader.so101_leader import SO101Leader
+from lerobot.utils.robot_utils import precise_sleep
+from lerobot.utils.rotation import Rotation
+
+
+def reset_follower_position(robot_arm: Robot, target_position: np.ndarray) -> None:
+    """Reset robot arm to target position using smooth trajectory."""
+    current_position_dict = robot_arm.bus.sync_read("Present_Position")
+    current_position = np.array(
+        [current_position_dict[name] for name in current_position_dict],
+        dtype=np.float32,
+    )
+    trajectory = torch.from_numpy(
+        np.linspace(current_position, target_position, 50)
+    )  # NOTE: 30 is just an arbitrary number
+    for pose in trajectory:
+        action_dict = dict(zip(current_position_dict, pose, strict=False))
+        robot_arm.bus.sync_write("Goal_Position", action_dict)
+        precise_sleep(0.015)
+
+
+@dataclass
+class LogRobotAction(RobotActionProcessorStep):
+    def action(self, action: RobotAction) -> RobotAction:
+        print(f"Robot action: {action}")
+        return action
+
+    def transform_features(self, features):
+        # features[PipelineFeatureType.ACTION][ACTION] = PolicyFeature(
+        #     type=FeatureType.ACTION, shape=(len(self.motor_names),)
+        # )
+        return features
+
+
+@ProcessorStepRegistry.register("forward_kinematics_joints_to_ee_target_action")
+@dataclass
+class ForwardKinematicsJointsToEETargetAction(RobotActionProcessorStep):
+    """
+    Computes the end-effector pose from joint positions using forward kinematics (FK).
+
+    This step is typically used to add the robot's Cartesian pose to the observation space,
+    which can be useful for visualization or as an input to a policy.
+
+    Attributes:
+        kinematics: The robot's kinematic model.
+    """
+
+    kinematics: RobotKinematics
+    motor_names: list[str]
+    end_effector_step_sizes: dict
+    max_gripper_pos: float
+    use_ik_solution: bool = False
+
+    def action(self, action: RobotAction) -> RobotAction:
+        # return compute_forward_kinematics_joints_to_ee(action, self.kinematics, self.motor_names)
+        teleop_action = action
+        raw_joint_pos = self.transition.get(TransitionKey.OBSERVATION)
+
+        leader_pos = np.array([teleop_action[f"{motor}.pos"] for motor in self.motor_names])
+
+        leader_ee = self.kinematics.forward_kinematics(leader_pos)
+
+        if self.use_ik_solution and "IK_solution" in self.transition.get(TransitionKey.COMPLEMENTARY_DATA):
+            follower_pos = transition.get(TransitionKey.COMPLEMENTARY_DATA)["IK_solution"]
+        else:
+            follower_pos = np.array([raw_joint_pos[f"{motor}.pos"] for motor in self.motor_names])
+
+        follower_ee = self.kinematics.forward_kinematics(follower_pos)
+
+        follower_ee_pos = follower_ee[:3, 3]
+        follower_ee_rvec = Rotation.from_matrix(follower_ee[:3, :3]).as_rotvec()
+        # follower_gripper_pos = raw_joint_pos["gripper.pos"]
+        follower_gripper_pos = follower_pos[-1]  # assuming gripper is the last motor
+
+        leader_ee_pos = leader_ee[:3, 3]
+        leader_ee_rvec = Rotation.from_matrix(leader_ee[:3, :3]).as_rotvec()
+        leader_gripper_pos = np.clip(
+            teleop_action["gripper.pos"], -self.max_gripper_pos, self.max_gripper_pos
+        )
+
+        print("f pos:", follower_ee_pos)
+        print("l pos:", leader_ee_pos)
+
+        print("f rvec:", follower_ee_rvec)
+        print("l rvec:", leader_ee_rvec)
+
+        # follower_ee_pos = follower_ee[:3, 3]
+        # follower_ee_rvec = Rotation.from_matrix(follower_ee[:3, :3]).as_rotvec()
+
+        delta_pos = leader_ee_pos - follower_ee_pos
+
+        # For rotation: compute relative rotation from follower to leader
+        # R_leader = R_follower * R_delta  =>  R_delta = R_follower^T * R_leader
+        r_delta = follower_ee[:3, :3].T @ leader_ee[:3, :3]
+        delta_rvec = Rotation.from_matrix(r_delta).as_rotvec()
+        delta_gripper = leader_gripper_pos - follower_gripper_pos
+
+        desired = np.eye(4, dtype=float)
+        desired[:3, :3] = follower_ee[:3, :3] @ r_delta
+        desired[:3, 3] = follower_ee[:3, 3] + delta_pos
+
+        pos = desired[:3, 3]
+        tw = Rotation.from_matrix(desired[:3, :3]).as_rotvec()
+
+        assert np.allclose(pos, leader_ee_pos), "Position delta computation error"
+        assert np.allclose(tw, leader_ee_rvec), "Orientation delta computation error"
+        assert np.isclose(follower_gripper_pos + delta_gripper, leader_gripper_pos), (
+            "Gripper delta computation error"
+        )
+
+        # Normalize the action to the range [-1, 1]
+        delta_pos = delta_pos / np.array(
+            [
+                self.end_effector_step_sizes["x"],
+                self.end_effector_step_sizes["y"],
+                self.end_effector_step_sizes["z"],
+            ]
+        )
+        delta_rvec = delta_rvec / np.array(
+            [
+                self.end_effector_step_sizes["wx"],
+                self.end_effector_step_sizes["wy"],
+                self.end_effector_step_sizes["wz"],
+            ]
+        )
+
+        # Check if any of the normalized deltas exceed 1.0
+
+        max_normalized_pos = max(
+            abs(delta_pos[0]),
+            abs(delta_pos[1]),
+            abs(delta_pos[2]),
+        )
+
+        max_normalized_rot = max(
+            abs(delta_rvec[0]),
+            abs(delta_rvec[1]),
+            abs(delta_rvec[2]),
+        )
+
+        # Use the same scaling factor for both position and rotation
+        max_normalized = max(max_normalized_pos, max_normalized_rot)
+        if max_normalized > 1.0:
+            print(f"Warning: EE delta too large, scaling. Max normalized delta: {max_normalized_pos}")
+            print(f"Original delta_pos: {delta_pos}, delta_rvec: {delta_rvec}")
+            # Scale proportionally
+            delta_pos = delta_pos / max_normalized
+            delta_rvec = delta_rvec / max_normalized
+
+        new_action = {}
+        new_action["enabled"] = True
+        new_action["target_x"] = float(delta_pos[0])
+        new_action["target_y"] = float(delta_pos[1])
+        new_action["target_z"] = float(delta_pos[2])
+        new_action["target_wx"] = float(delta_rvec[0])
+        new_action["target_wy"] = float(delta_rvec[1])
+        new_action["target_wz"] = float(delta_rvec[2])
+        new_action["gripper_vel"] = float(
+            np.clip(delta_gripper, -self.max_gripper_pos, self.max_gripper_pos) / self.max_gripper_pos
+        )
+        return new_action
+
+    def transform_features(
+        self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
+    ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
+        # TODO: implement feature transformation
+        return features
+
+
+FPS = 20
+
+# Initialize the robot and teleoperator config
+follower_config = SO101FollowerConfig(port="/dev/usb_follower_arm_a", id="follower_arm_a", use_degrees=True)
+leader_config = SO101LeaderConfig(port="/dev/usb_leader_arm_a", id="leader_arm_a", use_degrees=True)
+
+# Initialize the robot and teleoperator
+follower = SO101Follower(follower_config)
+leader = SO101Leader(leader_config)
+
+# NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf
+follower_kinematics_solver = RobotKinematics(
+    urdf_path="../SO-ARM100/Simulation/SO101/so101_new_calib.urdf",
+    target_frame_name="gripper_frame_link",
+    joint_names=list(follower.bus.motors.keys()),
+)
+
+# NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf
+leader_kinematics_solver = RobotKinematics(
+    urdf_path="../SO-ARM100/Simulation/SO101/so101_new_calib.urdf",
+    target_frame_name="gripper_frame_link",
+    joint_names=list(leader.bus.motors.keys()),
+)
+
+end_effector_step_sizes = {
+    "x": 0.004,
+    "y": 0.004,
+    "z": 0.004,
+    "wx": 5 * np.pi / 180,
+    "wy": 5 * np.pi / 180,
+    "wz": 5 * np.pi / 180,
+}
+
+
+# Build pipeline to convert teleop joints to EE action
+leader_to_ee = RobotProcessorPipeline[RobotAction, RobotAction](
+    steps=[
+        LogRobotAction(),
+        ForwardKinematicsJointsToEETargetAction(
+            kinematics=leader_kinematics_solver,
+            motor_names=list(leader.bus.motors.keys()),
+            end_effector_step_sizes=end_effector_step_sizes,
+            max_gripper_pos=30.0,
+            use_ik_solution=True,
+        ),
+        LogRobotAction(),
+    ],
+    to_transition=identity_transition,
+    to_output=identity_transition,
+)
+
+# build pipeline to convert EE action to robot joints
+ee_to_follower_joints = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction](
+    [
+        LogRobotAction(),
+        EEReferenceAndDelta(
+            kinematics=follower_kinematics_solver,
+            # end_effector_step_sizes={"x": 0.006, "y": 0.01, "z": 0.005},
+            end_effector_step_sizes=end_effector_step_sizes,
+            motor_names=list(follower.bus.motors.keys()),
+            use_latched_reference=False,
+            use_ik_solution=True,
+        ),
+        LogRobotAction(),
+        EEBoundsAndSafety(
+            end_effector_bounds={
+                "min": [-0.05, -0.55, -0.0075],
+                "max": [0.55, 0.55, 0.55],
+            },
+            # end_effector_bounds={"min": [-1.0, -1.0, -1.0], "max": [1.0, 1.0, 1.0]},
+            max_ee_step_m=0.05,
+        ),
+        LogRobotAction(),
+        GripperVelocityToJoint(
+            clip_max=30.0,
+            speed_factor=0.2,
+            discrete_gripper=False,
+            scale_velocity=True,
+            use_ik_solution=True,
+        ),
+        LogRobotAction(),
+        InverseKinematicsRLStep(
+            kinematics=follower_kinematics_solver,
+            motor_names=list(follower.bus.motors.keys()),
+            initial_guess_current_joints=False,
+        ),
+        LogRobotAction(),
+    ],
+    to_transition=identity_transition,
+    to_output=identity_transition,
+)
+
+# Connect to the robot and teleoperator
+follower.connect()
+leader.connect()
+
+reset_pose = [0.0, 10, 20, 60.00, 90.00, 10.00]
+
+start_time = time.perf_counter()
+reset_follower_position(follower, np.array(reset_pose))
+reset_follower_position(leader, np.array(reset_pose))
+precise_sleep(5.0 - (time.perf_counter() - start_time))
+# time.sleep(10)
+leader.bus.sync_write("Torque_Enable", 0)
+
+# Init rerun viewer
+# init_rerun(session_name="so100_so100_EE_teleop")
+
+transition = None
+
+print("Starting teleop loop...")
+while True:
+    print("New loop iteration")
+    t0 = time.perf_counter()
+
+    # Get robot observation
+    robot_obs = follower.get_observation()
+
+    # Get teleop observation
+    leader_joints_obs = leader.get_action()
+
+    # teleop joints -> teleop EE action
+    if transition is None:
+        transition = create_transition(action=leader_joints_obs, observation=robot_obs)
+    else:
+        transition = create_transition(
+            action=leader_joints_obs,
+            observation=robot_obs,
+            complementary_data=transition.get(TransitionKey.COMPLEMENTARY_DATA),
+        )
+
+    transition = leader_to_ee(transition)
+    leader_ee_act = transition[TransitionKey.ACTION]
+
+    # teleop EE -> robot joints
+    transition = create_transition(
+        action=leader_ee_act,
+        observation=robot_obs,
+        complementary_data=transition.get(TransitionKey.COMPLEMENTARY_DATA),
+    )
+    transition = ee_to_follower_joints(transition)
+    follower_joints_act = transition[TransitionKey.ACTION]
+
+    # Send action to robot
+    _ = follower.send_action(follower_joints_act)
+
+    # Visualize
+    # log_rerun_data(observation=leader_ee_act, action=follower_joints_act)
+
+    precise_sleep(max(1.0 / FPS - (time.perf_counter() - t0), 0.0))
@@ -9,7 +9,7 @@ from lerobot.datasets import LeRobotDataset
 from lerobot.envs.configs import HILSerlProcessorConfig, HILSerlRobotEnvConfig
 from lerobot.policies import GaussianActorConfig
 from lerobot.policies.gaussian_actor.modeling_gaussian_actor import GaussianActorPolicy
-from lerobot.rewards.classifier.modeling_classifier import Classifier
+from lerobot.policies.gaussian_actor.reward_model.modeling_classifier import Classifier
 from lerobot.rl.algorithms.sac import SACAlgorithm, SACAlgorithmConfig
 from lerobot.rl.buffer import ReplayBuffer
 from lerobot.rl.gym_manipulator import make_robot_env
@@ -1,7 +1,7 @@
 import torch

 from lerobot.datasets import LeRobotDataset
-from lerobot.rewards import RewardClassifierConfig, make_reward_model, make_reward_pre_post_processors
+from lerobot.policies import RewardClassifierConfig, make_policy, make_pre_post_processors


 def main():
@@ -22,10 +22,10 @@ def main():
        model_name="microsoft/resnet-18",
    )

-    # Make reward model, preprocessor, and optimizer
-    reward_model = make_reward_model(config, dataset_stats=dataset.meta.stats)
-    optimizer = config.get_optimizer_preset().build(reward_model.parameters())
-    preprocessor, _ = make_reward_pre_post_processors(config, dataset_stats=dataset.meta.stats)
+    # Make policy, preprocessor, and optimizer
+    policy = make_policy(config, ds_meta=dataset.meta)
+    optimizer = config.get_optimizer_preset().build(policy.parameters())
+    preprocessor, _ = make_pre_post_processors(policy_cfg=config, dataset_stats=dataset.meta.stats)

    classifier_id = "<user>/reward_classifier_hil_serl_example"

@@ -42,7 +42,7 @@ def main():
            batch = preprocessor(batch)

            # Forward pass
-            loss, output_dict = reward_model.forward(batch)
+            loss, output_dict = policy.forward(batch)

            # Backward pass and optimization
            optimizer.zero_grad()
@@ -58,8 +58,8 @@ def main():

    print("Training finished!")

-    # You can now save the trained reward model.
-    reward_model.push_to_hub(classifier_id)
+    # You can now save the trained policy.
+    policy.push_to_hub(classifier_id)


 if __name__ == "__main__":
@@ -59,8 +59,8 @@ keywords = ["lerobot", "huggingface", "robotics",  "machine learning", "artifici

 dependencies = [
    # Core ML
-    "torch>=2.7,<2.12.0",
-    "torchvision>=0.22.0,<0.27.0",
+    "torch>=2.7,<2.11.0",
+    "torchvision>=0.22.0,<0.26.0",
    "numpy>=2.0.0,<2.3.0", # NOTE: Explicitly listing numpy helps the resolver converge faster. Upper bound imposed by opencv-python-headless.
    "opencv-python-headless>=4.9.0,<4.14.0",
    "Pillow>=10.0.0,<13.0.0",
@@ -99,18 +99,7 @@ dataset = [
    "pandas>=2.0.0,<3.0.0", # NOTE: Transitive dependency of datasets
    "pyarrow>=21.0.0,<30.0.0", # NOTE: Transitive dependency of datasets
    "lerobot[av-dep]",
-
-    # NOTE: torchcodec wheel availability matrix (PyPI):
-    #   - linux x86_64/amd64 + macOS arm64 : wheels since 0.3.0 (the historic supported set).
-    #   - win32 x86_64                     : wheels since 0.7.0  (needs torch>=2.8).
-    #   - linux aarch64/arm64              : wheels since 0.11.0 (needs torch>=2.11).
-    #   - macOS x86_64 (Intel) and linux armv7l: no wheels in any released version -> fall through to the PyAV decoder.
-    # Each platform gets its own line so the resolver picks the minimum version that has a wheel for it.
-
-    # Other torch/torchcodec pairings (informational): 0.8.1 = ffmpeg>=8 support, 0.10 = system-wide ffmpeg support, 0.12 needs torch==2.12.
-    "torchcodec>=0.3.0,<0.12.0; (sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'AMD64')) or (sys_platform == 'darwin' and platform_machine == 'arm64')",
-    "torchcodec>=0.7.0,<0.12.0; sys_platform == 'win32'",
-    "torchcodec>=0.11.0,<0.12.0; sys_platform == 'linux' and (platform_machine == 'aarch64' or platform_machine == 'arm64')",
+    "torchcodec>=0.3.0,<0.11.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')", # NOTE: Windows support starts at version 0.7 (needs torch==2.8), ffmpeg>=8 support starts at version 0.8.1 (needs torch==2.9), system-wide ffmpeg support starts at version 0.10 (needs torch==2.10).
    "jsonlines>=4.0.0,<5.0.0",
 ]
 training = [
@@ -139,7 +128,7 @@ dataset_viz = ["lerobot[dataset]", "lerobot[viz]"]
 av-dep = ["av>=15.0.0,<16.0.0"]
 pygame-dep = ["pygame>=2.5.1,<2.7.0"]
 placo-dep = ["placo>=0.9.6,<0.9.17"]
-transformers-dep = ["transformers>=5.4.0,<5.6.0"]
+transformers-dep = ["transformers==5.3.0"] # TODO(Steven): https://github.com/huggingface/lerobot/pull/3249
 grpcio-dep = ["grpcio==1.73.1", "protobuf>=6.31.1,<6.32.0"]
 can-dep = ["python-can>=4.2.0,<5.0.0"]
 peft-dep = ["peft>=0.18.0,<1.0.0"]
@@ -204,10 +193,8 @@ groot = [
    "flash-attn>=2.5.9,<3.0.0 ; sys_platform != 'darwin'"
 ]
 sarm = ["lerobot[transformers-dep]", "pydantic>=2.0.0,<3.0.0", "faker>=33.0.0,<35.0.0", "lerobot[matplotlib-dep]", "lerobot[qwen-vl-utils-dep]"]
-robometer = ["lerobot[transformers-dep]", "lerobot[qwen-vl-utils-dep]", "lerobot[peft-dep]"]
 xvla = ["lerobot[transformers-dep]"]
-eo1 = ["lerobot[transformers-dep]", "lerobot[qwen-vl-utils-dep]"]
-hilserl = ["lerobot[transformers-dep]", "lerobot[dataset]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpcio-dep]", "lerobot[placo-dep]"]
+hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpcio-dep]", "lerobot[placo-dep]"]

 # Features
 async = ["lerobot[grpcio-dep]", "lerobot[matplotlib-dep]"]
@@ -303,23 +290,8 @@ lerobot-imgtransform-viz="lerobot.scripts.lerobot_imgtransform_viz:main"
 lerobot-edit-dataset="lerobot.scripts.lerobot_edit_dataset:main"
 lerobot-setup-can="lerobot.scripts.lerobot_setup_can:main"
 lerobot-rollout="lerobot.scripts.lerobot_rollout:main"
-lerobot-export-robometer="lerobot.scripts.lerobot_export_robometer:main"

 # ---------------- Tool Configurations ----------------
-
-# cu128 wheels keep broad hardware reach; the driver floor is 570.86.
-# To use a different CUDA variant, reinstall torch with an explicit index, e.g.:
-#   uv pip install --force-reinstall torch torchvision \
-#       --index-url https://download.pytorch.org/whl/cu130
-[[tool.uv.index]]
-name = "pytorch-cu128"
-url = "https://download.pytorch.org/whl/cu128"
-explicit = true
-
-[tool.uv.sources]
-torch = [{ index = "pytorch-cu128", marker = "sys_platform == 'linux'" }]
-torchvision = [{ index = "pytorch-cu128", marker = "sys_platform == 'linux'" }]
-
 [tool.setuptools.package-data]
 lerobot = ["envs/*.json"]

@@ -1,164 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-"""Pinpoint exactly which rows of ``embed_tokens`` / ``lm_head`` differ.
-
-Useful follow-up to ``scripts/verify_robometer_export.py`` when the verifier
-reports a small tail of differing keys but you want to know whether the
-diff is:
-
-1. Concentrated in the 5 special-token rows added by ``resize_token_embeddings``
-   (expected non-determinism: mean-resize sampling differs between runs).
-2. Spread across the full vocabulary (would point to a real loading bug).
-
-Also confirms whether ``apply_upstream_checkpoint`` actually overwrites the
-embed/lm-head tensors when loading the upstream state dict (vs. silently
-skipping them due to a key mismatch).
-"""
-
-from __future__ import annotations
-
-import argparse
-import sys
-
-import torch
-from safetensors.torch import load_file
-
-from lerobot.configs.rewards import RewardModelConfig
-from lerobot.rewards.robometer import RobometerConfig, RobometerRewardModel
-from lerobot.rewards.robometer._upstream_loader import (
-    _download_robometer_snapshot,
-    _remap_state_dict_keys,
-    _resolve_checkpoint_safetensors_files,
-    apply_upstream_checkpoint,
-)
-
-EMBED_KEY = "model.model.language_model.embed_tokens.weight"
-LMHEAD_KEY = "model.lm_head.weight"
-
-
-def _load_upstream(path: str) -> RobometerRewardModel:
-    cfg = RobometerConfig(pretrained_path=path, device="cpu")
-    model = RobometerRewardModel(cfg)
-    apply_upstream_checkpoint(model, path)
-    model.eval()
-    return model
-
-
-def _load_lerobot(path: str) -> RobometerRewardModel:
-    cfg = RewardModelConfig.from_pretrained(path)
-    if not isinstance(cfg, RobometerConfig):
-        raise TypeError(f"Expected RobometerConfig, got {type(cfg)}")
-    cfg.pretrained_path = path
-    cfg.device = "cpu"
-    return RobometerRewardModel.from_pretrained(path, config=cfg)
-
-
-def _inspect_upstream_state_dict(upstream_path: str, model: RobometerRewardModel) -> None:
-    """Dump the upstream state-dict view of the embed/lm-head tensors.
-
-    Loads the raw upstream safetensors (pre-remap), runs the remapper, and
-    reports whether the embed/lm-head keys survive into the merged dict that
-    eventually hits ``model.load_state_dict``.
-    """
-    snapshot_dir = _download_robometer_snapshot(upstream_path)
-    files = _resolve_checkpoint_safetensors_files(snapshot_dir)
-    merged: dict[str, torch.Tensor] = {}
-    for path in files:
-        merged.update(load_file(str(path)))
-    remapped = _remap_state_dict_keys(merged, model)
-
-    print(f"\n=== Upstream state-dict inspection (snapshot at {snapshot_dir}) ===")
-    print(f"raw keys (before remap)  : {len(merged)}")
-    print(f"keys after remap         : {len(remapped)}")
-    print(f"model expects (state_dict): {len(model.state_dict())}")
-
-    expected = set(model.state_dict())
-    present_after_remap = set(remapped) & expected
-    print(f"keys present after remap : {len(present_after_remap)}")
-
-    missing_keys = expected - set(remapped)
-    print(f"keys missing from remap  : {len(missing_keys)}")
-    if missing_keys:
-        sample = list(missing_keys)[:10]
-        print(f"  sample missing keys    : {sample}")
-
-    unexpected_keys = set(remapped) - expected
-    print(f"keys unexpected by model : {len(unexpected_keys)}")
-    if unexpected_keys:
-        sample = list(unexpected_keys)[:10]
-        print(f"  sample unexpected keys : {sample}")
-
-    for key in (EMBED_KEY, LMHEAD_KEY):
-        present = key in remapped
-        shape = tuple(remapped[key].shape) if present else None
-        print(f"  {key:60s}  present={present}, shape={shape}")
-
-
-def _diff_embed(name: str, a: torch.Tensor, b: torch.Tensor, special_token_count: int) -> None:
-    a = a.float()
-    b = b.float()
-    if a.shape != b.shape:
-        print(f"❌ {name} shape mismatch: {tuple(a.shape)} vs {tuple(b.shape)}")
-        return
-
-    abs_diff = (a - b).abs()
-    per_row_max = abs_diff.max(dim=1).values
-    nz_rows = (per_row_max > 0).nonzero(as_tuple=True)[0].tolist()
-    print(f"\n=== {name} (shape {tuple(a.shape)}) ===")
-    print(f"global max|Δ|         = {abs_diff.max().item():.3e}")
-    print(f"rows with any diff    = {len(nz_rows)}")
-    if nz_rows:
-        first = nz_rows[:10]
-        last = nz_rows[-10:]
-        print(f"  first nonzero rows  = {first}")
-        print(f"  last nonzero rows   = {last}")
-        vocab_size = a.shape[0]
-        base_vocab = vocab_size - special_token_count
-        special_rows = list(range(base_vocab, vocab_size))
-        in_special = [r for r in nz_rows if r in special_rows]
-        out_special = [r for r in nz_rows if r not in special_rows]
-        print(
-            f"  diffs in special-token rows ({base_vocab}..{vocab_size - 1}): {len(in_special)}/{special_token_count}"
-        )
-        print(f"  diffs in base-vocab rows  (0..{base_vocab - 1})           : {len(out_special)}")
-        for r in special_rows:
-            print(
-                f"    row {r}: max|Δ|={per_row_max[r].item():.3e}, "
-                f"upstream_norm={a[r].norm().item():.3e}, lerobot_norm={b[r].norm().item():.3e}"
-            )
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-    parser.add_argument("--upstream", required=True)
-    parser.add_argument("--lerobot", required=True)
-    parser.add_argument(
-        "--special-token-count",
-        type=int,
-        default=5,
-        help="Number of special tokens Robometer adds. Defaults to len(ROBOMETER_SPECIAL_TOKENS)=5.",
-    )
-    args = parser.parse_args()
-
-    print(f"Loading upstream:        {args.upstream}")
-    upstream = _load_upstream(args.upstream)
-    print(f"Loading LeRobot-format:  {args.lerobot}")
-    lerobot = _load_lerobot(args.lerobot)
-
-    _inspect_upstream_state_dict(args.upstream, upstream)
-
-    sd_u, sd_l = upstream.state_dict(), lerobot.state_dict()
-
-    for key in (EMBED_KEY, LMHEAD_KEY):
-        if key not in sd_u or key not in sd_l:
-            print(f"❌ key missing: {key} (upstream={key in sd_u}, lerobot={key in sd_l})")
-            continue
-        _diff_embed(key, sd_u[key], sd_l[key], args.special_token_count)
-
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
@@ -1,168 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-"""Extract one LIBERO episode for Robometer parity testing.
-
-Loads a LeRobot LIBERO (or any video-bearing LeRobot) dataset, picks one
-episode, samples ``--num-frames`` frames uniformly across its duration
-(matching upstream Robometer's default of 8 frames), and saves them to
-``.npz`` plus a sidecar ``.txt`` task file.
-
-The ``.npz`` layout (``frames`` key, ``(T, H, W, C) uint8``) is what upstream
-``example_inference_local.py`` consumes, so the same file feeds both pipelines
-and frame sampling cannot drift.
-
-Workflow:
-
-1. Run this script (LeRobot env) to produce ``frames.npz`` + ``task.txt``.
-2. Pass them to upstream ``scripts/example_inference_local.py``
-   (upstream env) to produce reference progress / success outputs.
-3. Pass the same ``frames.npz`` to ``scripts/parity_robometer.py``
-   (LeRobot env) to compare both sides.
-
-Example:
-
-    uv run python scripts/extract_libero_episode_for_parity.py \\
-        --repo-id lerobot/libero_10_image \\
-        --episode 0 \\
-        --num-frames 8 \\
-        --out-dir /tmp/libero_ep0
-"""
-
-from __future__ import annotations
-
-import argparse
-import sys
-from pathlib import Path
-
-import numpy as np
-import torch
-
-from lerobot.configs.types import FeatureType
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
-
-
-def _pick_visual_feature(features: dict, requested: str | None) -> str:
-    """Return a visual feature key, preferring ``requested`` when given."""
-    visual_keys = [
-        key
-        for key, ft in features.items()
-        if getattr(ft, "type", None) == FeatureType.VISUAL or ft.get("dtype", "") == "video"
-    ]
-    if not visual_keys:
-        raise ValueError(f"Dataset has no visual feature; available: {list(features)}")
-    if requested is not None:
-        if requested not in visual_keys:
-            raise ValueError(f"Camera key {requested!r} not in dataset visual features {visual_keys}")
-        return requested
-    return visual_keys[0]
-
-
-def _frame_uint8_hwc(tensor: torch.Tensor) -> np.ndarray:
-    """Convert a LeRobotDataset video frame to ``uint8`` ``(H, W, C)`` RGB."""
-    arr = tensor.detach().cpu().numpy()
-    if arr.ndim == 3 and arr.shape[0] in (1, 3):
-        arr = arr.transpose(1, 2, 0)
-    if arr.dtype != np.uint8:
-        arr = np.clip(arr * 255.0 if arr.max() <= 1.0 + 1e-3 else arr, 0, 255).astype(np.uint8)
-    if arr.shape[-1] == 1:
-        arr = np.repeat(arr, 3, axis=-1)
-    return arr
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(
-        description=__doc__,
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-    parser.add_argument(
-        "--repo-id",
-        default="lerobot/libero_10_image",
-        help="LeRobot LIBERO (or other) dataset repo id (default: lerobot/libero_10_image).",
-    )
-    parser.add_argument("--episode", type=int, default=0, help="Episode index.")
-    parser.add_argument(
-        "--camera-key",
-        default=None,
-        help="Visual feature key (e.g. observation.images.image). Auto-selects first if omitted.",
-    )
-    parser.add_argument(
-        "--num-frames",
-        type=int,
-        default=8,
-        help="Number of frames to sample uniformly (default: 8 — Robometer's training-time default).",
-    )
-    parser.add_argument(
-        "--out-dir",
-        type=Path,
-        default=Path("outputs/robometer_parity/libero"),
-        help="Directory to write frames.npz / task.txt / frame_indices.npy.",
-    )
-    args = parser.parse_args()
-
-    print(f"Loading {args.repo_id} (episode {args.episode})...")
-    dataset = LeRobotDataset(args.repo_id, episodes=[args.episode])
-
-    camera_key = _pick_visual_feature(dataset.features, args.camera_key)
-    print(f"Using camera key: {camera_key}")
-
-    ep_from = int(dataset.episode_data_index["from"][0].item())
-    ep_to = int(dataset.episode_data_index["to"][0].item())
-    total_frames = ep_to - ep_from
-    if total_frames <= 0:
-        print(f"ERROR: episode {args.episode} has no frames.", file=sys.stderr)
-        return 1
-    print(f"Episode has {total_frames} frames; sampling {args.num_frames} uniformly.")
-
-    indices = np.linspace(0, total_frames - 1, num=min(args.num_frames, total_frames), dtype=int)
-    frames: list[np.ndarray] = []
-    task: str = ""
-    for offset in indices:
-        sample = dataset[ep_from + int(offset)]
-        frame_tensor = sample[camera_key]
-        frames.append(_frame_uint8_hwc(frame_tensor))
-        if not task:
-            task = sample.get("task", "") or ""
-
-    if not task:
-        print("ERROR: episode has no task description in metadata.", file=sys.stderr)
-        return 1
-
-    frames_array = np.stack(frames)
-
-    args.out_dir.mkdir(parents=True, exist_ok=True)
-    frames_path = args.out_dir / "frames.npz"
-    task_path = args.out_dir / "task.txt"
-    indices_path = args.out_dir / "frame_indices.npy"
-
-    np.savez(frames_path, frames=frames_array)
-    task_path.write_text(task + "\n", encoding="utf-8")
-    np.save(indices_path, indices)
-
-    print()
-    print(f"Wrote {frames_path} (shape={frames_array.shape}, dtype={frames_array.dtype})")
-    print(f"Wrote {task_path}   (task={task!r})")
-    print(f"Wrote {indices_path} (frame_indices={indices.tolist()})")
-    print()
-    print("Next steps:")
-    print("  # in upstream env (where `robometer` is importable):")
-    print(
-        f"  python third_party/robometer/scripts/example_inference_local.py \\\n"
-        f"      --model-path robometer/Robometer-4B \\\n"
-        f"      --video {frames_path} \\\n"
-        f'      --task "{task}" \\\n'
-        f"      --out {args.out_dir / 'upstream.npy'}"
-    )
-    print()
-    print("  # back in LeRobot env:")
-    print(
-        f"  uv run python scripts/parity_robometer.py \\\n"
-        f"      --frames {frames_path} \\\n"
-        f'      --task "{task}" \\\n'
-        f"      --upstream-progress {args.out_dir / 'upstream.npy'} \\\n"
-        f"      --upstream-success  {args.out_dir / 'upstream_success_probs.npy'}"
-    )
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
@@ -1,232 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-"""Functional parity check: LeRobot Robometer vs. upstream Robometer.
-
-Runs the in-tree :class:`RobometerRewardModel` on the same frames + task that
-upstream Robometer was run on, and compares per-frame progress / success
-predictions against reference outputs saved by upstream's
-``scripts/example_inference_local.py``.
-
-Workflow:
-
-1. In the upstream Robometer environment (where ``robometer`` is importable),
-   run::
-
-       python third_party/robometer/scripts/example_inference_local.py \\
-           --model-path robometer/Robometer-4B \\
-           --video /path/to/episode.mp4 \\
-           --task "Open the drawer" \\
-           --fps 1.0 \\
-           --out /tmp/robometer_upstream.npy
-
-   This produces:
-   - ``/tmp/robometer_upstream.npy``               (progress predictions)
-   - ``/tmp/robometer_upstream_success_probs.npy`` (success probabilities)
-
-2. Extract the exact same frames the upstream script used, save as ``.npz``::
-
-       # quick helper: extract frames at the same fps and save as .npz
-       python -c "
-       from third_party.robometer.scripts.example_inference_local import load_frames_input
-       import numpy as np
-       frames = load_frames_input('/path/to/episode.mp4', fps=1.0, max_frames=512)
-       np.savez('/tmp/robometer_frames.npz', frames=frames)
-       "
-
-3. In this LeRobot env, run this script::
-
-       uv run python scripts/parity_robometer.py \\
-           --frames /tmp/robometer_frames.npz \\
-           --task "Open the drawer" \\
-           --upstream-progress /tmp/robometer_upstream.npy \\
-           --upstream-success  /tmp/robometer_upstream_success_probs.npy \\
-           --lerobot-model     lilkm/robometer-4b
-"""
-
-from __future__ import annotations
-
-import argparse
-import sys
-
-import numpy as np
-import torch
-
-from lerobot.configs.rewards import RewardModelConfig
-from lerobot.rewards.robometer import RobometerConfig, RobometerRewardModel
-from lerobot.rewards.robometer.modeling_robometer import decode_progress_outputs
-from lerobot.rewards.robometer.processor_robometer import RobometerEncoderProcessorStep
-
-
-def _load_frames(path: str) -> np.ndarray:
-    """Load frames from .npy/.npz. Expects (T, H, W, C) uint8."""
-    if path.endswith(".npy"):
-        frames = np.load(path)
-    elif path.endswith(".npz"):
-        with np.load(path, allow_pickle=False) as npz:
-            frames = npz["frames"].copy() if "frames" in npz else next(iter(npz.values())).copy()
-    else:
-        raise ValueError(f"Frames must be .npy or .npz (got {path!r}).")
-
-    if frames.dtype != np.uint8:
-        frames = np.clip(frames, 0, 255).astype(np.uint8)
-    if frames.ndim != 4:
-        raise ValueError(f"Frames must be 4D (T,H,W,C); got shape {frames.shape}.")
-    if frames.shape[-1] not in (1, 3):
-        # Probably (T,C,H,W) — transpose
-        if frames.shape[1] in (1, 3):
-            frames = frames.transpose(0, 2, 3, 1)
-        else:
-            raise ValueError(f"Cannot interpret frame channel layout: {frames.shape}.")
-    return frames
-
-
-def _run_lerobot(
-    frames: np.ndarray,
-    task: str,
-    model_path: str,
-    device: str,
-) -> tuple[np.ndarray, np.ndarray]:
-    """Run LeRobot's Robometer on the given frames; return (progress, success)."""
-    cfg = RobometerConfig(pretrained_path=model_path, device=device, max_frames=None)
-    model = RobometerRewardModel.from_pretrained(model_path, config=cfg)
-
-    encoder = RobometerEncoderProcessorStep(
-        base_model_id=model.config.base_model_id,
-        use_multi_image=model.config.use_multi_image,
-        use_per_frame_progress_token=model.config.use_per_frame_progress_token,
-        max_frames=None,
-    )
-    batch = encoder.encode_samples([(frames, task)])
-
-    model_device = next(model.model.parameters()).device
-    inputs = {key: value.to(model_device) if hasattr(value, "to") else value for key, value in batch.items()}
-
-    model.eval()
-    with torch.no_grad():
-        progress_logits, success_logits = model._compute_rbm_logits(inputs)
-
-    decoded = decode_progress_outputs(
-        progress_logits,
-        success_logits,
-        is_discrete_mode=model.config.use_discrete_progress,
-    )
-    progress = np.asarray(decoded["progress_pred"][0], dtype=np.float32)
-    success = (
-        np.asarray(decoded["success_probs"][0], dtype=np.float32)
-        if decoded["success_probs"]
-        else np.array([], dtype=np.float32)
-    )
-    return progress, success
-
-
-def _compare(name: str, lerobot: np.ndarray, upstream: np.ndarray, atol: float, rtol: float) -> bool:
-    print(f"\n=== {name} ===")
-    if lerobot.shape != upstream.shape:
-        print(f"shape mismatch: lerobot={lerobot.shape}  upstream={upstream.shape}")
-        return False
-
-    abs_diff = np.abs(lerobot - upstream)
-    rel_diff = abs_diff / (np.abs(upstream) + 1e-12)
-    print(f"shape        : {lerobot.shape}")
-    print(f"max |Δ|      : {abs_diff.max():.3e}")
-    print(f"mean |Δ|     : {abs_diff.mean():.3e}")
-    print(f"max rel |Δ|  : {rel_diff.max():.3e}")
-    print(f"lerobot[:5]  : {lerobot[:5]}")
-    print(f"upstream[:5] : {upstream[:5]}")
-
-    within_tol = bool(np.allclose(lerobot, upstream, atol=atol, rtol=rtol))
-    print(f"allclose(atol={atol}, rtol={rtol}) -> {within_tol}")
-    return within_tol
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(
-        description=__doc__,
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-    parser.add_argument(
-        "--frames",
-        required=True,
-        help=".npy / .npz file with the exact frames upstream was run on (T,H,W,C uint8).",
-    )
-    parser.add_argument("--task", required=True, help="Task instruction string.")
-    parser.add_argument(
-        "--upstream-progress",
-        required=True,
-        help="Reference progress .npy saved by upstream example_inference_local.py.",
-    )
-    parser.add_argument(
-        "--upstream-success",
-        default=None,
-        help="Optional reference success_probs .npy. If omitted, success comparison is skipped.",
-    )
-    parser.add_argument(
-        "--lerobot-model",
-        default="lilkm/robometer-4b",
-        help="LeRobot-format Robometer Hub repo id or local path.",
-    )
-    parser.add_argument(
-        "--device",
-        default="cuda" if torch.cuda.is_available() else "cpu",
-        help="Device for the LeRobot model (default: cuda if available).",
-    )
-    parser.add_argument(
-        "--atol",
-        type=float,
-        default=1e-3,
-        help="Absolute tolerance for allclose (default: 1e-3; bf16 round-trip headroom).",
-    )
-    parser.add_argument(
-        "--rtol",
-        type=float,
-        default=1e-2,
-        help="Relative tolerance for allclose (default: 1e-2).",
-    )
-    parser.add_argument(
-        "--out-prefix",
-        default="lerobot_robometer_outputs",
-        help="Save the LeRobot outputs as <prefix>_progress.npy / <prefix>_success.npy.",
-    )
-    args = parser.parse_args()
-
-    # 0. Sanity: confirm the LeRobot config is a RobometerConfig.
-    cfg = RewardModelConfig.from_pretrained(args.lerobot_model)
-    if not isinstance(cfg, RobometerConfig):
-        print(f"ERROR: {args.lerobot_model!r} does not resolve to a RobometerConfig.", file=sys.stderr)
-        return 2
-
-    # 1. Load frames + task + upstream reference outputs.
-    frames = _load_frames(args.frames)
-    upstream_progress = np.load(args.upstream_progress).astype(np.float32)
-    upstream_success = (
-        np.load(args.upstream_success).astype(np.float32) if args.upstream_success is not None else None
-    )
-
-    print(f"Loaded {frames.shape[0]} frames at {frames.shape[1:]}, task={args.task!r}")
-    print(f"LeRobot model: {args.lerobot_model}  device: {args.device}")
-
-    # 2. Run LeRobot pipeline.
-    progress, success = _run_lerobot(frames, args.task, args.lerobot_model, args.device)
-    np.save(f"{args.out_prefix}_progress.npy", progress)
-    if success.size > 0:
-        np.save(f"{args.out_prefix}_success.npy", success)
-    print(f"Saved LeRobot outputs to {args.out_prefix}_progress.npy / _success.npy")
-
-    # 3. Compare to upstream references.
-    progress_ok = _compare("progress", progress, upstream_progress, args.atol, args.rtol)
-    if upstream_success is not None and success.size > 0:
-        success_ok = _compare("success_probs", success, upstream_success, args.atol, args.rtol)
-    else:
-        success_ok = True
-        print("\n(skipping success comparison — upstream success file not provided)")
-
-    print()
-    if progress_ok and success_ok:
-        print("Parity check passed.")
-        return 0
-    print("Parity check FAILED.")
-    return 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())
@@ -1,362 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-"""Run LeRobot Robometer parity against upstream Robometer's bundled examples.
-
-Upstream Robometer ships three reference videos with their pre-computed
-progress / success outputs at
-``third_party/robometer/scripts/example_videos/``::
-
-    soar_put_green_stick_in_brown_bowl.mp4
-        + soar_put_green_stick_in_brown_bowl_rewards.npy            (progress)
-        + soar_put_green_stick_in_brown_bowl_rewards_success_probs.npy (success)
-    berkeley_rpt_stack_cup.mp4
-        + berkeley_rpt_stack_cup_rewards.npy
-        + berkeley_rpt_stack_cup_rewards_success_probs.npy
-    jaco_play_pick_up_green_cup.mp4
-        + pick_up_green_cup_rewards.npy
-        + pick_up_green_cup_rewards_success_probs.npy
-
-This script:
-1. Decodes each video at upstream's sampling fps using ``av`` (PyAV), with the
-   same linspace-over-total-frames logic as upstream's ``extract_frames``.
-2. Runs the LeRobot ``RobometerRewardModel`` on those frames + the task from
-   upstream's README.
-3. Compares per-frame progress / success to the pre-saved upstream outputs.
-
-This means you do **not** need to install upstream Robometer to confirm parity.
-
-Run::
-
-    uv run python scripts/parity_robometer_upstream_examples.py \\
-        --lerobot-model lilkm/robometer-4b \\
-        --device cuda \\
-        --decoder decord
-
-The number of frames sampled per video is derived from the length of each
-upstream ``.npy`` reference, so the script does not need a ``--fps`` argument
-(the README documents ``fps=3`` for SOAR / Berkeley, but the Jaco Play
-reference was generated with a different fps).
-"""
-
-from __future__ import annotations
-
-import argparse
-import sys
-from pathlib import Path
-
-import numpy as np
-import torch
-
-from lerobot.configs.rewards import RewardModelConfig
-from lerobot.rewards.robometer import RobometerConfig, RobometerRewardModel
-from lerobot.rewards.robometer.modeling_robometer import decode_progress_outputs
-from lerobot.rewards.robometer.processor_robometer import RobometerEncoderProcessorStep
-
-try:
-    import decord  # type: ignore
-
-    _HAS_DECORD = True
-except ImportError:
-    decord = None  # type: ignore
-    _HAS_DECORD = False
-
-try:
-    import av
-
-    _HAS_AV = True
-except ImportError:
-    av = None  # type: ignore
-    _HAS_AV = False
-
-EXAMPLES = [
-    {
-        "name": "soar_put_green_stick_in_brown_bowl",
-        "video": "soar_put_green_stick_in_brown_bowl.mp4",
-        "task": "Put green stick in brown bowl",
-        "progress_npy": "soar_put_green_stick_in_brown_bowl_rewards.npy",
-        "success_npy": "soar_put_green_stick_in_brown_bowl_rewards_success_probs.npy",
-    },
-    {
-        "name": "berkeley_rpt_stack_cup",
-        "video": "berkeley_rpt_stack_cup.mp4",
-        "task": "Pick up the yellow cup and stack it on the other cup",
-        "progress_npy": "berkeley_rpt_stack_cup_rewards.npy",
-        "success_npy": "berkeley_rpt_stack_cup_rewards_success_probs.npy",
-    },
-    {
-        "name": "jaco_play_pick_up_green_cup",
-        "video": "jaco_play_pick_up_green_cup.mp4",
-        "task": "Pick up the green cup",
-        "progress_npy": "pick_up_green_cup_rewards.npy",
-        "success_npy": "pick_up_green_cup_rewards_success_probs.npy",
-    },
-]
-
-
-def _extract_frames_decord(video_path: Path, num_frames: int) -> tuple[np.ndarray, str]:
-    """Sample ``num_frames`` indices uniformly from the video using decord.
-
-    Mirrors upstream's ``extract_frames`` indexing
-    (``third_party/robometer/scripts/example_inference.py``): a
-    ``np.linspace(0, total_frames-1, num_frames)`` lookup over decord's
-    ``VideoReader``. We pass ``num_frames`` explicitly (derived from the
-    upstream reference output length) so we don't have to guess what ``fps``
-    upstream actually used when generating each saved ``.npy`` — the file
-    length is the ground truth.
-    """
-    vr = decord.VideoReader(str(video_path), num_threads=1)
-    total_frames = len(vr)
-    if total_frames == 0:
-        raise RuntimeError(f"No decodable frames in {video_path}.")
-    desired_frames = max(1, min(int(num_frames), total_frames))
-    indices = np.linspace(0, total_frames - 1, desired_frames, dtype=int).tolist()
-    frames = vr.get_batch(indices).asnumpy()
-    native_fps = float(vr.get_avg_fps()) or 1.0
-    return frames, f"decord total={total_frames} native_fps={native_fps:.3f}"
-
-
-def _extract_frames_av(video_path: Path, num_frames: int) -> tuple[np.ndarray, str]:
-    """PyAV fallback for environments without decord.
-
-    PyAV and decord can disagree on ``total_frames`` for the same container,
-    so the sampled frame indices can drift. Install ``decord`` for a real
-    parity check; this fallback is for smoke tests only.
-    """
-    container = av.open(str(video_path))
-    stream = container.streams.video[0]
-    native_fps = float(stream.average_rate) if stream.average_rate else float(stream.guessed_rate or 30.0)
-    rgb_frames: list[np.ndarray] = []
-    for frame in container.decode(stream):
-        rgb_frames.append(frame.to_ndarray(format="rgb24"))
-    container.close()
-    total_frames = len(rgb_frames)
-    if total_frames == 0:
-        raise RuntimeError(f"No decodable frames in {video_path}.")
-    desired_frames = max(1, min(int(num_frames), total_frames))
-    indices = np.linspace(0, total_frames - 1, desired_frames, dtype=int)
-    frames = np.stack([rgb_frames[i] for i in indices])
-    return frames, f"av total={total_frames} native_fps={native_fps:.3f}"
-
-
-def _extract_frames(video_path: Path, num_frames: int, prefer: str) -> tuple[np.ndarray, str]:
-    """Decoder dispatch. ``prefer`` is ``"decord"`` | ``"av"`` | ``"auto"``."""
-    if prefer == "decord":
-        if not _HAS_DECORD:
-            raise RuntimeError("decord requested but not installed (`uv pip install decord`).")
-        return _extract_frames_decord(video_path, num_frames)
-    if prefer == "av":
-        if not _HAS_AV:
-            raise RuntimeError("av requested but not installed.")
-        return _extract_frames_av(video_path, num_frames)
-    # auto
-    if _HAS_DECORD:
-        return _extract_frames_decord(video_path, num_frames)
-    if _HAS_AV:
-        return _extract_frames_av(video_path, num_frames)
-    raise RuntimeError("No video decoder available (install `decord` or `av`).")
-
-
-def _pearson(a: np.ndarray, b: np.ndarray) -> float:
-    """Pearson correlation; returns 1.0 for constant inputs (no signal to align)."""
-    a = a.astype(np.float64)
-    b = b.astype(np.float64)
-    if a.size < 2:
-        return 1.0
-    da = a - a.mean()
-    db = b - b.mean()
-    denom = float(np.sqrt((da * da).sum()) * np.sqrt((db * db).sum()))
-    if denom == 0:
-        return 1.0
-    return float((da * db).sum() / denom)
-
-
-def _run_lerobot(
-    model: RobometerRewardModel,
-    encoder: RobometerEncoderProcessorStep,
-    frames: np.ndarray,
-    task: str,
-) -> tuple[np.ndarray, np.ndarray]:
-    batch = encoder.encode_samples([(frames, task)])
-    device = next(model.model.parameters()).device
-    inputs = {key: value.to(device) if hasattr(value, "to") else value for key, value in batch.items()}
-    model.eval()
-    with torch.no_grad():
-        progress_logits, success_logits = model._compute_rbm_logits(inputs)
-    decoded = decode_progress_outputs(
-        progress_logits, success_logits, is_discrete_mode=model.config.use_discrete_progress
-    )
-    progress = np.asarray(decoded["progress_pred"][0], dtype=np.float32)
-    success = (
-        np.asarray(decoded["success_probs"][0], dtype=np.float32)
-        if decoded["success_probs"]
-        else np.array([], dtype=np.float32)
-    )
-    return progress, success
-
-
-def _compare(
-    name: str,
-    lerobot: np.ndarray,
-    upstream: np.ndarray,
-    *,
-    atol: float,
-    pearson_min: float,
-) -> bool:
-    if lerobot.shape != upstream.shape:
-        print(f"  {name:8s}  SHAPE MISMATCH lerobot={lerobot.shape} upstream={upstream.shape}")
-        return False
-    abs_diff = np.abs(lerobot - upstream)
-    pearson = _pearson(lerobot, upstream)
-    abs_ok = bool(abs_diff.max() <= atol)
-    pearson_ok = bool(pearson >= pearson_min)
-    verdict = "PASS" if (abs_ok or pearson_ok) else "FAIL"
-    print(
-        f"  {name:8s}  shape={lerobot.shape}  max|Δ|={abs_diff.max():.3e}  "
-        f"mean|Δ|={abs_diff.mean():.3e}  pearson={pearson:.4f}  "
-        f"(atol={atol:.0e} pearson_min={pearson_min:.3f}) -> {verdict}"
-    )
-    return abs_ok or pearson_ok
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(
-        description=__doc__,
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-    parser.add_argument(
-        "--examples-dir",
-        type=Path,
-        default=Path("third_party/robometer/scripts/example_videos"),
-        help="Directory containing the upstream Robometer example mp4s + .npy outputs.",
-    )
-    parser.add_argument(
-        "--lerobot-model",
-        default="lilkm/robometer-4b",
-        help="LeRobot-format Robometer Hub repo id or local path.",
-    )
-    parser.add_argument(
-        "--device",
-        default="cuda" if torch.cuda.is_available() else "cpu",
-        help="Device for the LeRobot model.",
-    )
-    parser.add_argument(
-        "--decoder",
-        choices=("auto", "decord", "av"),
-        default="auto",
-        help=(
-            "Video decoder. ``auto`` prefers decord (matches upstream) and falls back to av. "
-            "Force ``decord`` for a clean parity check."
-        ),
-    )
-    parser.add_argument(
-        "--progress-atol",
-        type=float,
-        default=1e-2,
-        help="Absolute tolerance for the progress array. Default 1e-2 covers CUDA bf16 noise.",
-    )
-    parser.add_argument(
-        "--success-atol",
-        type=float,
-        default=1e-1,
-        help=(
-            "Absolute tolerance for the success array. Looser than progress because "
-            "``sigmoid`` amplifies logit-space noise near 0.5."
-        ),
-    )
-    parser.add_argument(
-        "--pearson-min",
-        type=float,
-        default=0.99,
-        help="Minimum Pearson correlation for a PASS verdict (per array).",
-    )
-    args = parser.parse_args()
-
-    if args.decoder == "av" or (args.decoder == "auto" and not _HAS_DECORD):
-        print(
-            "WARNING: using PyAV decoder. PyAV's total-frame count can differ from decord's, "
-            "which propagates into different sampled-frame indices. Install `decord` and "
-            "re-run for a clean parity check.",
-            file=sys.stderr,
-        )
-
-    examples_dir = args.examples_dir.resolve()
-    if not examples_dir.is_dir():
-        print(f"ERROR: examples dir {examples_dir} does not exist.", file=sys.stderr)
-        return 2
-
-    # Sanity-check the LeRobot config is a RobometerConfig before loading weights.
-    cfg = RewardModelConfig.from_pretrained(args.lerobot_model)
-    if not isinstance(cfg, RobometerConfig):
-        print(f"ERROR: {args.lerobot_model!r} did not resolve to a RobometerConfig.", file=sys.stderr)
-        return 2
-
-    print(f"Loading LeRobot Robometer from {args.lerobot_model} on {args.device}...")
-    cfg.pretrained_path = args.lerobot_model
-    cfg.device = args.device
-    model = RobometerRewardModel.from_pretrained(args.lerobot_model, config=cfg)
-    encoder = RobometerEncoderProcessorStep(
-        base_model_id=model.config.base_model_id,
-        use_multi_image=model.config.use_multi_image,
-        use_per_frame_progress_token=model.config.use_per_frame_progress_token,
-        max_frames=None,
-    )
-
-    all_ok = True
-    for ex in EXAMPLES:
-        video_path = examples_dir / ex["video"]
-        upstream_progress_path = examples_dir / ex["progress_npy"]
-        upstream_success_path = examples_dir / ex["success_npy"]
-
-        missing = [p for p in (video_path, upstream_progress_path, upstream_success_path) if not p.exists()]
-        if missing:
-            print(f"[skip] {ex['name']}: missing {[str(m) for m in missing]}")
-            all_ok = False
-            continue
-
-        print(f"\n=== {ex['name']} ===")
-        print(f"  task: {ex['task']!r}")
-
-        # Trust the upstream reference array as the source of truth for how
-        # many frames to sample. The README documents fps=3 for SOAR/Berkeley
-        # but Jaco Play was generated with a different fps, so any hardcoded
-        # ``--fps`` mismatches at least one example. The npy length always
-        # tells us what upstream actually used.
-        upstream_progress = np.load(upstream_progress_path).astype(np.float32)
-        upstream_success = np.load(upstream_success_path).astype(np.float32)
-        target_num_frames = int(upstream_progress.shape[0])
-        frames, decoder_info = _extract_frames(video_path, target_num_frames, prefer=args.decoder)
-        print(
-            f"  decoded {frames.shape[0]} frames (matches upstream npy length); "
-            f"shape={frames.shape}  [{decoder_info}]"
-        )
-
-        progress, success = _run_lerobot(model, encoder, frames, ex["task"])
-
-        progress_ok = _compare(
-            "progress",
-            progress,
-            upstream_progress,
-            atol=args.progress_atol,
-            pearson_min=args.pearson_min,
-        )
-        success_ok = _compare(
-            "success",
-            success,
-            upstream_success,
-            atol=args.success_atol,
-            pearson_min=args.pearson_min,
-        )
-        verdict = "PASS" if (progress_ok and success_ok) else "FAIL"
-        print(f"  -> {verdict}")
-        all_ok = all_ok and progress_ok and success_ok
-
-    print()
-    if all_ok:
-        print("All upstream example parity checks passed.")
-        return 0
-    print("Some upstream example parity checks FAILED.")
-    return 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())
@@ -1,149 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-"""Verify that a LeRobot-format Robometer is byte-equivalent to its upstream source.
-
-Run this once after publishing a LeRobot-format Robometer to the Hub, before
-flipping the default `RobometerConfig.pretrained_path` to it. It loads both
-the upstream snapshot and the re-exported copy, compares state dicts, and
-prints a clear pass/fail summary.
-
-Example:
-
-    python scripts/verify_robometer_export.py \\
-        --upstream robometer/Robometer-4B \\
-        --lerobot  lerobot/robometer-4b
-
-    python scripts/verify_robometer_export.py \\
-        --upstream robometer/Robometer-4B \\
-        --lerobot  ./robometer-4b-lerobot   # local folder also works
-"""
-
-from __future__ import annotations
-
-import argparse
-import sys
-
-from lerobot.configs.rewards import RewardModelConfig
-from lerobot.rewards.robometer import RobometerConfig, RobometerRewardModel
-from lerobot.rewards.robometer._upstream_loader import apply_upstream_checkpoint
-
-
-def _load_upstream(path: str) -> RobometerRewardModel:
-    # Fresh ``RobometerConfig`` (``vlm_config=None``) triggers
-    # ``RobometerRewardModel.__init__``'s upstream-matching path: download
-    # base Qwen, resize for ROBOMETER_SPECIAL_TOKENS. The subsequent
-    # ``apply_upstream_checkpoint`` call resizes again if the checkpoint's
-    # vocab differs (e.g. upstream was trained against an older Qwen).
-    cfg = RobometerConfig(pretrained_path=path, device="cpu")
-    model = RobometerRewardModel(cfg)
-    apply_upstream_checkpoint(model, path)
-    model.eval()
-    return model
-
-
-def _load_lerobot(path: str) -> RobometerRewardModel:
-    cfg = RewardModelConfig.from_pretrained(path)
-    if not isinstance(cfg, RobometerConfig):
-        raise TypeError(f"Expected RobometerConfig in LeRobot export, got {type(cfg)}")
-    cfg.pretrained_path = path
-    cfg.device = "cpu"
-    return RobometerRewardModel.from_pretrained(path, config=cfg)
-
-
-def compare_state_dicts(a: RobometerRewardModel, b: RobometerRewardModel) -> bool:
-    sd_a, sd_b = a.state_dict(), b.state_dict()
-    keys_a, keys_b = set(sd_a), set(sd_b)
-
-    missing = keys_a - keys_b
-    extra = keys_b - keys_a
-    if missing:
-        print(f"❌ {len(missing)} keys missing in LeRobot-format model (sample: {list(missing)[:5]})")
-    if extra:
-        print(f"❌ {len(extra)} extra keys in LeRobot-format model (sample: {list(extra)[:5]})")
-    if missing or extra:
-        return False
-
-    diff_summary: list[tuple[str, float]] = []
-    for key in sorted(keys_a):
-        ta, tb = sd_a[key], sd_b[key]
-        if ta.shape != tb.shape:
-            print(f"❌ shape mismatch at {key}: {tuple(ta.shape)} vs {tuple(tb.shape)}")
-            return False
-        # Compare in float to avoid bfloat16 equality quirks.
-        max_abs = (ta.float() - tb.float()).abs().max().item()
-        if max_abs > 0:
-            diff_summary.append((key, max_abs))
-
-    if not diff_summary:
-        print(f"✅ All {len(keys_a)} parameters identical")
-        return True
-
-    # Some keys differ; show worst offenders.
-    diff_summary.sort(key=lambda kv: kv[1], reverse=True)
-    print(f"⚠️  {len(diff_summary)} keys differ. Top 10 by max abs diff:")
-    for key, value in diff_summary[:10]:
-        print(f"    {key:60s}  max|Δ| = {value:.3e}")
-
-    # Tolerance: bf16 round-trips can introduce ULP-level noise but no real
-    # change. Allow up to 1e-3 absolute difference; anything larger is a real
-    # divergence.
-    worst = diff_summary[0][1]
-    if worst < 1e-3:
-        print(f"✅ Worst diff {worst:.3e} is within bf16 round-trip tolerance")
-        return True
-    print(f"❌ Worst diff {worst:.3e} exceeds tolerance (1e-3)")
-    return False
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-    parser.add_argument("--upstream", required=True, help="Upstream Robometer repo id or local path.")
-    parser.add_argument("--lerobot", required=True, help="LeRobot-format Robometer repo id or local path.")
-    args = parser.parse_args()
-
-    print(f"Loading upstream:        {args.upstream}")
-    upstream = _load_upstream(args.upstream)
-    print(f"Loading LeRobot-format:  {args.lerobot}")
-    lerobot = _load_lerobot(args.lerobot)
-
-    print("\n=== Config comparison ===")
-    config_ok = True
-    for field in [
-        "base_model_id",
-        "torch_dtype",
-        "use_multi_image",
-        "use_per_frame_progress_token",
-        "average_temporal_patches",
-        "frame_pooling",
-        "frame_pooling_attn_temperature",
-        "progress_loss_type",
-        "progress_discrete_bins",
-    ]:
-        a, b = getattr(upstream.config, field), getattr(lerobot.config, field)
-        field_ok = a == b
-        config_ok = config_ok and field_ok
-        ok = "✅" if field_ok else "❌"
-        print(f"  {ok} {field}: upstream={a!r}, lerobot={b!r}")
-
-    print("\n=== State-dict comparison ===")
-    state_dict_ok = compare_state_dicts(upstream, lerobot)
-
-    print()
-    if config_ok and state_dict_ok:
-        print("🎉 Verification passed — safe to flip the default.")
-        return 0
-    print("⛔ Verification failed — DO NOT flip the default.")
-    return 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())
@@ -41,12 +41,8 @@ def cfg_to_group(
            return tag
        return tag[:max_tag_length]

-    if cfg.is_reward_model_training:
-        trainable_tag = f"reward_model:{cfg.reward_model.type}"
-    else:
-        trainable_tag = f"policy:{cfg.policy.type}"
    lst = [
-        trainable_tag,
+        f"policy:{cfg.policy.type}",
        f"seed:{cfg.seed}",
    ]
    if cfg.dataset is not None:
@@ -117,9 +117,3 @@ class PeftConfig:
    # the rank used for the adapter. In general a higher rank means more trainable parameters and closer to full
    # fine-tuning.
    r: int = 16
-
-    # Alpha parameter for LoRA scaling (scaling = lora_alpha / r).
-    # In general, a higher alpha means stronger adaptation signal.
-    # If None, the PEFT library defaults to alpha=8, which may dampen high-rank adapters.
-    # Common values are r (alpha == rank) or 2*r.
-    lora_alpha: int | None = None
@@ -46,11 +46,8 @@ class EvalPipelineConfig:
        # HACK: We parse again the cli args here to get the pretrained path if there was one.
        policy_path = parser.get_path_arg("policy")
        if policy_path:
-            yaml_overrides = parser.get_yaml_overrides("policy")
-            cli_overrides = parser.get_cli_overrides("policy") or []
-            self.policy = PreTrainedConfig.from_pretrained(
-                policy_path, cli_overrides=yaml_overrides + cli_overrides
-            )
+            cli_overrides = parser.get_cli_overrides("policy")
+            self.policy = PreTrainedConfig.from_pretrained(policy_path, cli_overrides=cli_overrides)
            self.policy.pretrained_path = Path(policy_path)

        else:
@@ -13,10 +13,8 @@
 # limitations under the License.
 import importlib
 import inspect
-import json
 import pkgutil
 import sys
-import tempfile
 from argparse import ArgumentError
 from collections.abc import Callable, Iterable, Sequence
 from functools import wraps
@@ -26,7 +24,6 @@ from types import ModuleType
 from typing import Any, TypeVar, cast

 import draccus
-import yaml  # type: ignore[import-untyped]

 from lerobot.utils.utils import has_method

@@ -35,29 +32,6 @@ F = TypeVar("F", bound=Callable[..., object])
 PATH_KEY = "path"
 PLUGIN_DISCOVERY_SUFFIX = "discover_packages_path"

-# Storage for path args extracted from YAML/JSON config files, so that
-# get_path_arg() can find them even when they weren't passed via CLI.
-_config_path_args: dict[str, str] = {}
-
-# Storage for non-path YAML overrides so validate() can pass them to from_pretrained.
-_config_yaml_overrides: dict[str, list[str]] = {}
-
-
-def _flatten_to_cli_args(d: dict, prefix: str = "") -> list[str]:
-    """Recursively flatten a nested dict to CLI-style args (e.g. {"lr": 1e-4} -> ["--lr=0.0001"])."""
-    args = []
-    for key, value in d.items():
-        if key in (PATH_KEY, draccus.CHOICE_TYPE_KEY):
-            continue
-        full_key = f"{prefix}.{key}" if prefix else key
-        if isinstance(value, bool):
-            value = str(value).lower()
-        if isinstance(value, dict):
-            args.extend(_flatten_to_cli_args(value, full_key))
-        elif value is not None and not isinstance(value, list):
-            args.append(f"--{full_key}={value}")
-    return args
-

 def get_cli_overrides(field_name: str, args: Sequence[str] | None = None) -> list[str] | None:
    """Parses arguments from cli at a given nested attribute level.
@@ -171,14 +145,7 @@ def load_plugin(plugin_path: str) -> None:


 def get_path_arg(field_name: str, args: Sequence[str] | None = None) -> str | None:
-    result = parse_arg(f"{field_name}.{PATH_KEY}", args)
-    if result is None:
-        result = _config_path_args.get(field_name)
-    return result
-
-
-def get_yaml_overrides(field_name: str) -> list[str]:
-    return _config_yaml_overrides.get(field_name, [])
+    return parse_arg(f"{field_name}.{PATH_KEY}", args)


 def get_type_arg(field_name: str, args: Sequence[str] | None = None) -> str | None:
@@ -225,52 +192,6 @@ def filter_path_args(fields_to_filter: str | list[str], args: Sequence[str] | No
    return filtered_args


-def extract_path_fields_from_config(config_path: str, path_fields: list[str]) -> str:
-    """Extract `path` fields from a YAML/JSON config before draccus processes it.
-
-    When a user specifies e.g. ``policy.path: lerobot/smolvla_base`` in a YAML config,
-    draccus will fail because ``path`` is not a valid field on policy config classes.
-    This function extracts those path values, stores them in ``_config_path_args`` for
-    later retrieval by ``get_path_arg()``, and returns a cleaned temp config file path.
-    """
-    config_file = Path(config_path)
-    suffix = config_file.suffix.lower()
-
-    if suffix in (".yaml", ".yml"):
-        with open(config_file) as f:
-            config_data = yaml.safe_load(f)
-    elif suffix == ".json":
-        with open(config_file) as f:
-            config_data = json.load(f)
-    else:
-        return config_path
-
-    if not isinstance(config_data, dict):
-        return config_path
-
-    modified = False
-    for field in path_fields:
-        if field in config_data and isinstance(config_data[field], dict) and PATH_KEY in config_data[field]:
-            _config_path_args[field] = str(config_data[field].pop(PATH_KEY))
-            remaining = config_data[field]
-            if remaining:
-                _config_yaml_overrides[field] = _flatten_to_cli_args(remaining)
-            else:
-                del config_data[field]
-            modified = True
-
-    if not modified:
-        return config_path
-
-    # Write cleaned config to a temp file
-    with tempfile.NamedTemporaryFile(mode="w", suffix=suffix, delete=False) as tmp:
-        if suffix in (".yaml", ".yml"):
-            yaml.dump(config_data, tmp, default_flow_style=False)
-        else:
-            json.dump(config_data, tmp, indent=2)
-    return tmp.name
-
-
 def wrap(config_path: Path | None = None) -> Callable[[F], F]:
    """
    HACK: Similar to draccus.wrap but does three additional things:
@@ -304,9 +225,6 @@ def wrap(config_path: Path | None = None) -> Callable[[F], F]:
                if has_method(argtype, "__get_path_fields__"):
                    path_fields = argtype.__get_path_fields__()
                    cli_args = filter_path_args(path_fields, cli_args)
-                    # Also extract path fields from the YAML/JSON config file
-                    if config_path_cli:
-                        config_path_cli = extract_path_fields_from_config(config_path_cli, path_fields)
                if has_method(argtype, "from_pretrained") and config_path_cli:
                    cli_args = filter_arg("config_path", cli_args)
                    cfg = argtype.from_pretrained(config_path_cli, cli_args=cli_args)
@@ -1,170 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import abc
-import builtins
-import json
-import logging
-import os
-import tempfile
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, TypeVar
-
-import draccus
-from huggingface_hub import hf_hub_download
-from huggingface_hub.constants import CONFIG_NAME
-from huggingface_hub.errors import HfHubHTTPError
-
-from lerobot.configs.types import PolicyFeature
-from lerobot.optim.optimizers import OptimizerConfig
-from lerobot.optim.schedulers import LRSchedulerConfig
-from lerobot.utils.device_utils import auto_select_torch_device, is_torch_device_available
-from lerobot.utils.hub import HubMixin
-
-T = TypeVar("T", bound="RewardModelConfig")
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class RewardModelConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
-    """Base configuration for reward models.
-
-    Args:
-    input_features: A dictionary defining the PolicyFeature of the input data for the reward. The key represents
-        the input data name, and the value is PolicyFeature, which consists of FeatureType and shape attributes.
-    output_features: A dictionary defining the PolicyFeature of the output data for the reward. The key represents
-        the output data name, and the value is PolicyFeature, which consists of FeatureType and shape attributes.
-    """
-
-    # Reuses PolicyFeature
-    input_features: dict[str, PolicyFeature] = field(default_factory=dict)
-    output_features: dict[str, PolicyFeature] = field(default_factory=dict)
-
-    device: str | None = None
-
-    pretrained_path: str | None = None
-
-    push_to_hub: bool = False
-    repo_id: str | None = None
-
-    # Hub metadata
-    license: str | None = None
-    tags: list[str] | None = None
-    private: bool | None = None
-
-    def __post_init__(self) -> None:
-        if not self.device or not is_torch_device_available(self.device):
-            auto_device = auto_select_torch_device()
-            logger.warning(f"Device '{self.device}' is not available. Switching to '{auto_device}'.")
-            self.device = auto_device.type
-
-    @property
-    def type(self) -> str:
-        choice_name = self.get_choice_name(self.__class__)
-        if not isinstance(choice_name, str):
-            raise TypeError(f"Expected string from get_choice_name, got {type(choice_name)}")
-        return choice_name
-
-    @property
-    def observation_delta_indices(self) -> list | None:  # type: ignore[type-arg]
-        return None
-
-    @property
-    def action_delta_indices(self) -> list | None:  # type: ignore[type-arg]
-        return None
-
-    @property
-    def reward_delta_indices(self) -> list | None:  # type: ignore[type-arg]
-        return None
-
-    def get_optimizer_preset(self) -> OptimizerConfig | None:
-        """Default optimizer for this reward model, or ``None`` for zero-shot models.
-
-        Trainable reward models (e.g. SARM, Classifier) must override this with a
-        concrete optimizer config. Zero-shot reward models (e.g. Robometer) leave
-        the default ``None`` — they error out earlier via the
-        :attr:`~lerobot.rewards.pretrained.PreTrainedRewardModel.is_trainable`
-        check in ``lerobot-train``.
-        """
-        return None
-
-    def get_scheduler_preset(self) -> LRSchedulerConfig | None:
-        return None
-
-    def validate_features(self) -> None:
-        pass
-
-    def _save_pretrained(self, save_directory: Path) -> None:
-        with open(save_directory / CONFIG_NAME, "w") as f, draccus.config_type("json"):
-            draccus.dump(self, f, indent=4)
-
-    @classmethod
-    def from_pretrained(
-        cls: builtins.type[T],
-        pretrained_name_or_path: str | Path,
-        *,
-        force_download: bool = False,
-        resume_download: bool | None = None,
-        proxies: dict[Any, Any] | None = None,
-        token: str | bool | None = None,
-        cache_dir: str | Path | None = None,
-        local_files_only: bool = False,
-        revision: str | None = None,
-        **reward_kwargs: Any,
-    ) -> T:
-        model_id = str(pretrained_name_or_path)
-        config_file: str | None = None
-        if Path(model_id).is_dir():
-            if CONFIG_NAME in os.listdir(model_id):
-                config_file = os.path.join(model_id, CONFIG_NAME)
-            else:
-                logger.error(f"{CONFIG_NAME} not found in {Path(model_id).resolve()}")
-        else:
-            try:
-                config_file = hf_hub_download(
-                    repo_id=model_id,
-                    filename=CONFIG_NAME,
-                    revision=revision,
-                    cache_dir=cache_dir,
-                    force_download=force_download,
-                    proxies=proxies,
-                    resume_download=resume_download,
-                    token=token,
-                    local_files_only=local_files_only,
-                )
-            except HfHubHTTPError as e:
-                raise FileNotFoundError(
-                    f"{CONFIG_NAME} not found on the HuggingFace Hub in {model_id}"
-                ) from e
-
-        if config_file is None:
-            raise FileNotFoundError(f"{CONFIG_NAME} not found in {model_id}")
-
-        # HACK: Parse the original config to get the config subclass, so that we can
-        # apply cli overrides.
-        with draccus.config_type("json"):
-            orig_config = draccus.parse(cls, config_file, args=[])
-
-        with open(config_file) as f:
-            config = json.load(f)
-
-        config.pop("type", None)
-        with tempfile.NamedTemporaryFile("w+", delete=False, suffix=".json") as f:
-            json.dump(config, f)
-            config_file = f.name
-
-        cli_overrides = reward_kwargs.pop("cli_overrides", [])
-        with draccus.config_type("json"):
-            return draccus.parse(orig_config.__class__, config_file, args=cli_overrides)
@@ -13,9 +13,7 @@
 # limitations under the License.
 import builtins
 import datetime as dt
-import json
 import os
-import tempfile
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
@@ -28,57 +26,18 @@ from lerobot import envs
 from lerobot.configs import parser
 from lerobot.optim import LRSchedulerConfig, OptimizerConfig
 from lerobot.utils.hub import HubMixin
-from lerobot.utils.sample_weighting import SampleWeightingConfig

 from .default import DatasetConfig, EvalConfig, PeftConfig, WandBConfig
 from .policies import PreTrainedConfig
-from .rewards import RewardModelConfig

 TRAIN_CONFIG_NAME = "train_config.json"


-def _migrate_legacy_rabc_fields(config: dict[str, Any]) -> dict[str, Any] | None:
-    """Return migrated payload for legacy RA-BC fields, or None when no migration is needed."""
-    legacy_fields = (
-        "use_rabc",
-        "rabc_progress_path",
-        "rabc_kappa",
-        "rabc_epsilon",
-        "rabc_head_mode",
-    )
-    if not any(key in config for key in legacy_fields):
-        return None
-
-    migrated_config = dict(config)
-    use_rabc = bool(migrated_config.pop("use_rabc", False))
-    rabc_progress_path = migrated_config.pop("rabc_progress_path", None)
-    rabc_kappa = migrated_config.pop("rabc_kappa", None)
-    rabc_epsilon = migrated_config.pop("rabc_epsilon", None)
-    rabc_head_mode = migrated_config.pop("rabc_head_mode", None)
-
-    # New configs may already define sample_weighting explicitly. In that case,
-    # legacy fields are ignored after being stripped from the payload.
-    if migrated_config.get("sample_weighting") is None and use_rabc:
-        sample_weighting: dict[str, Any] = {"type": "rabc"}
-        if rabc_progress_path is not None:
-            sample_weighting["progress_path"] = rabc_progress_path
-        if rabc_kappa is not None:
-            sample_weighting["kappa"] = rabc_kappa
-        if rabc_epsilon is not None:
-            sample_weighting["epsilon"] = rabc_epsilon
-        if rabc_head_mode is not None:
-            sample_weighting["head_mode"] = rabc_head_mode
-        migrated_config["sample_weighting"] = sample_weighting
-
-    return migrated_config
-
-
@dataclass
 class TrainPipelineConfig(HubMixin):
    dataset: DatasetConfig
    env: envs.EnvConfig | None = None
    policy: PreTrainedConfig | None = None
-    reward_model: RewardModelConfig | None = None
    # Set `dir` to where you would like to save all of the run outputs. If you run another training session
    # with the same value for `dir` its contents will be overwritten unless you set `resume` to true.
    output_dir: Path | None = None
@@ -113,44 +72,27 @@ class TrainPipelineConfig(HubMixin):
    wandb: WandBConfig = field(default_factory=WandBConfig)
    peft: PeftConfig | None = None

-    # Sample weighting configuration (e.g., for RA-BC training)
-    sample_weighting: SampleWeightingConfig | None = None
+    # RA-BC (Reward-Aligned Behavior Cloning) parameters
+    use_rabc: bool = False  # Enable reward-weighted training
+    rabc_progress_path: str | None = None  # Path to precomputed SARM progress parquet file
+    rabc_kappa: float = 0.01  # Hard threshold for high-quality samples
+    rabc_epsilon: float = 1e-6  # Small constant for numerical stability
+    rabc_head_mode: str | None = "sparse"  # For dual-head models: "sparse" or "dense"

    # Rename map for the observation to override the image and state keys
    rename_map: dict[str, str] = field(default_factory=dict)
    checkpoint_path: Path | None = field(init=False, default=None)

-    @property
-    def is_reward_model_training(self) -> bool:
-        """True when the config targets a reward model rather than a policy."""
-        return self.reward_model is not None
-
-    @property
-    def trainable_config(self) -> PreTrainedConfig | RewardModelConfig:
-        """Return whichever config (policy or reward_model) is active."""
-        if self.is_reward_model_training:
-            return self.reward_model  # type: ignore[return-value]
-        return self.policy  # type: ignore[return-value]
-
    def validate(self) -> None:
        # HACK: We parse again the cli args here to get the pretrained paths if there was some.
        policy_path = parser.get_path_arg("policy")
-        reward_model_path = parser.get_path_arg("reward_model")
-
-        if reward_model_path:
-            cli_overrides = parser.get_cli_overrides("reward_model")
-            self.reward_model = RewardModelConfig.from_pretrained(
-                reward_model_path, cli_overrides=cli_overrides
-            )
-            self.reward_model.pretrained_path = str(Path(reward_model_path))
-        elif policy_path:
-            yaml_overrides = parser.get_yaml_overrides("policy")
-            cli_overrides = parser.get_cli_overrides("policy") or []
-            self.policy = PreTrainedConfig.from_pretrained(
-                policy_path, cli_overrides=yaml_overrides + cli_overrides
-            )
+        if policy_path:
+            # Only load the policy config
+            cli_overrides = parser.get_cli_overrides("policy")
+            self.policy = PreTrainedConfig.from_pretrained(policy_path, cli_overrides=cli_overrides)
            self.policy.pretrained_path = Path(policy_path)
        elif self.resume:
+            # The entire train config is already loaded, we just need to get the checkpoint dir
            config_path = parser.parse_arg("config_path")
            if not config_path:
                raise ValueError(
@@ -166,22 +108,18 @@ class TrainPipelineConfig(HubMixin):
            policy_dir = Path(config_path).parent
            if self.policy is not None:
                self.policy.pretrained_path = policy_dir
-            if self.reward_model is not None:
-                self.reward_model.pretrained_path = str(policy_dir)
            self.checkpoint_path = policy_dir.parent

-        if self.policy is None and self.reward_model is None:
+        if self.policy is None:
            raise ValueError(
-                "Neither policy nor reward_model is configured. "
-                "Please specify one with `--policy.path` or `--reward_model.path`."
+                "Policy is not configured. Please specify a pretrained policy with `--policy.path`."
            )

-        active_cfg = self.trainable_config
        if not self.job_name:
            if self.env is None:
-                self.job_name = f"{active_cfg.type}"
+                self.job_name = f"{self.policy.type}"
            else:
-                self.job_name = f"{self.env.type}_{active_cfg.type}"
+                self.job_name = f"{self.env.type}_{self.policy.type}"

        if not self.resume and isinstance(self.output_dir, Path) and self.output_dir.is_dir():
            raise FileExistsError(
@@ -199,16 +137,26 @@ class TrainPipelineConfig(HubMixin):
        if not self.use_policy_training_preset and (self.optimizer is None or self.scheduler is None):
            raise ValueError("Optimizer and Scheduler must be set when the policy presets are not used.")
        elif self.use_policy_training_preset and not self.resume:
-            self.optimizer = active_cfg.get_optimizer_preset()
-            self.scheduler = active_cfg.get_scheduler_preset()
+            self.optimizer = self.policy.get_optimizer_preset()
+            self.scheduler = self.policy.get_scheduler_preset()

-        if hasattr(active_cfg, "push_to_hub") and active_cfg.push_to_hub and not active_cfg.repo_id:
-            raise ValueError("'repo_id' argument missing. Please specify it to push the model to the hub.")
+        if self.policy.push_to_hub and not self.policy.repo_id:
+            raise ValueError(
+                "'policy.repo_id' argument missing. Please specify it to push the model to the hub."
+            )
+
+        if self.use_rabc and not self.rabc_progress_path:
+            # Auto-detect from dataset path
+            repo_id = self.dataset.repo_id
+            if self.dataset.root:
+                self.rabc_progress_path = str(Path(self.dataset.root) / "sarm_progress.parquet")
+            else:
+                self.rabc_progress_path = f"hf://datasets/{repo_id}/sarm_progress.parquet"

    @classmethod
    def __get_path_fields__(cls) -> list[str]:
-        """Keys for draccus pretrained-path loading."""
-        return ["policy", "reward_model"]
+        """This enables the parser to load config from the policy using `--policy.path=local/dir`"""
+        return ["policy"]

    def to_dict(self) -> dict[str, Any]:
        return draccus.encode(self)  # type: ignore[no-any-return]  # because of the third-party library draccus uses Any as the return type
@@ -259,16 +207,5 @@ class TrainPipelineConfig(HubMixin):
                ) from e

        cli_args = kwargs.pop("cli_args", [])
-        # Legacy RA-BC migration only applies to framework-saved checkpoints (always JSON).
-        # Hand-written YAML/TOML configs are expected to use the current sample_weighting schema.
-        if config_file is not None and config_file.endswith(".json"):
-            with open(config_file) as f:
-                config = json.load(f)
-            migrated_config = _migrate_legacy_rabc_fields(config)
-            if migrated_config is not None:
-                with tempfile.NamedTemporaryFile("w+", delete=False, suffix=".json") as f:
-                    json.dump(migrated_config, f)
-                    config_file = f.name
-
        with draccus.config_type("json"):
            return draccus.parse(cls, config_file, args=cli_args)
@@ -97,8 +97,8 @@ def update_data_df(df, src_meta, dst_meta):
        pd.DataFrame: Updated DataFrame with adjusted indices.
    """

-    df["episode_index"] = df["episode_index"] + dst_meta.info.total_episodes
-    df["index"] = df["index"] + dst_meta.info.total_frames
+    df["episode_index"] = df["episode_index"] + dst_meta.info["total_episodes"]
+    df["index"] = df["index"] + dst_meta.info["total_frames"]

    src_task_names = src_meta.tasks.index.take(df["task_index"].to_numpy())
    df["task_index"] = dst_meta.tasks.loc[src_task_names, "task_index"].to_numpy()
@@ -225,9 +225,9 @@ def update_meta_data(
        # Clean up temporary columns
        df = df.drop(columns=["_orig_chunk", "_orig_file"])

-    df["dataset_from_index"] = df["dataset_from_index"] + dst_meta.info.total_frames
-    df["dataset_to_index"] = df["dataset_to_index"] + dst_meta.info.total_frames
-    df["episode_index"] = df["episode_index"] + dst_meta.info.total_episodes
+    df["dataset_from_index"] = df["dataset_from_index"] + dst_meta.info["total_frames"]
+    df["dataset_to_index"] = df["dataset_to_index"] + dst_meta.info["total_frames"]
+    df["episode_index"] = df["episode_index"] + dst_meta.info["total_episodes"]

    return df

@@ -237,8 +237,8 @@ def aggregate_datasets(
    aggr_repo_id: str,
    roots: list[Path] | None = None,
    aggr_root: Path | None = None,
-    data_files_size_in_mb: int | None = None,
-    video_files_size_in_mb: int | None = None,
+    data_files_size_in_mb: float | None = None,
+    video_files_size_in_mb: float | None = None,
    chunk_size: int | None = None,
 ):
    """Aggregates multiple LeRobot datasets into a single unified dataset.
@@ -313,8 +313,8 @@ def aggregate_datasets(
        # to avoid interference between different source datasets
        data_idx.pop("src_to_dst", None)

-        dst_meta.info.total_episodes += src_meta.total_episodes
-        dst_meta.info.total_frames += src_meta.total_frames
+        dst_meta.info["total_episodes"] += src_meta.total_episodes
+        dst_meta.info["total_frames"] += src_meta.total_frames

    finalize_aggregation(dst_meta, all_metadata)
    logging.info("Aggregation complete.")
@@ -640,10 +640,14 @@ def finalize_aggregation(aggr_meta, all_metadata):
    write_tasks(aggr_meta.tasks, aggr_meta.root)

    logging.info("write info")
-    aggr_meta.info.total_tasks = len(aggr_meta.tasks)
-    aggr_meta.info.total_episodes = sum(m.total_episodes for m in all_metadata)
-    aggr_meta.info.total_frames = sum(m.total_frames for m in all_metadata)
-    aggr_meta.info.splits = {"train": f"0:{sum(m.total_episodes for m in all_metadata)}"}
+    aggr_meta.info.update(
+        {
+            "total_tasks": len(aggr_meta.tasks),
+            "total_episodes": sum(m.total_episodes for m in all_metadata),
+            "total_frames": sum(m.total_frames for m in all_metadata),
+            "splits": {"train": f"0:{sum(m.total_episodes for m in all_metadata)}"},
+        }
+    )
    write_info(aggr_meta.info, aggr_meta.root)

    logging.info("write stats")
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import contextlib
-from collections.abc import Callable
 from pathlib import Path

 import numpy as np
@@ -38,11 +37,13 @@ from .io_utils import (
    load_subtasks,
    load_tasks,
    write_info,
+    write_json,
    write_stats,
    write_tasks,
 )
 from .utils import (
    DEFAULT_EPISODES_PATH,
+    INFO_PATH,
    check_version_compatibility,
    get_safe_version,
    has_legacy_hub_download_metadata,
@@ -190,29 +191,6 @@ class LeRobotDatasetMetadata:
        if self.episodes is None:
            self._load_metadata()

-    def filter_episodes(
-        self,
-        predicate: Callable[[dict], bool],
-        candidates: list[int] | None = None,
-    ) -> list[int]:
-        """Filter episodes whose metadata satisfies a given predicate.
-
-        Args:
-            predicate: Predicate over per-episode metadata rows used to select episodes.
-            candidates: Optional list of episode indices to restrict evaluation to.
-
-        Returns:
-            List of sorted episode indices that satisfy the predicate.
-        """
-        self.ensure_readable()
-        if candidates is not None:
-            candidate_set = set(candidates)
-            combined = lambda ep: ep["episode_index"] in candidate_set and predicate(ep)  # noqa: E731
-        else:
-            combined = predicate
-        filtered = self.episodes.filter(combined, keep_in_memory=True, load_from_cache_file=False)
-        return sorted(int(idx) for idx in filtered["episode_index"])
-
    def _pull_from_repo(
        self,
        allow_patterns: list[str] | str | None = None,
@@ -250,7 +228,7 @@ class LeRobotDatasetMetadata:
    @property
    def _version(self) -> packaging.version.Version:
        """Codebase version used to create this dataset."""
-        return packaging.version.parse(self.info.codebase_version)
+        return packaging.version.parse(self.info["codebase_version"])

    def get_data_file_path(self, ep_index: int) -> Path:
        """Return the relative parquet file path for the given episode index.
@@ -305,27 +283,27 @@ class LeRobotDatasetMetadata:
    @property
    def data_path(self) -> str:
        """Formattable string for the parquet files."""
-        return self.info.data_path
+        return self.info["data_path"]

    @property
    def video_path(self) -> str | None:
        """Formattable string for the video files."""
-        return self.info.video_path
+        return self.info["video_path"]

    @property
    def robot_type(self) -> str | None:
        """Robot type used in recording this dataset."""
-        return self.info.robot_type
+        return self.info["robot_type"]

    @property
    def fps(self) -> int:
        """Frames per second used during data collection."""
-        return self.info.fps
+        return self.info["fps"]

    @property
    def features(self) -> dict[str, dict]:
        """All features contained in the dataset."""
-        return self.info.features
+        return self.info["features"]

    @property
    def image_keys(self) -> list[str]:
@@ -355,32 +333,32 @@ class LeRobotDatasetMetadata:
    @property
    def total_episodes(self) -> int:
        """Total number of episodes available."""
-        return self.info.total_episodes
+        return self.info["total_episodes"]

    @property
    def total_frames(self) -> int:
        """Total number of frames saved in this dataset."""
-        return self.info.total_frames
+        return self.info["total_frames"]

    @property
    def total_tasks(self) -> int:
        """Total number of different tasks performed in this dataset."""
-        return self.info.total_tasks
+        return self.info["total_tasks"]

    @property
    def chunks_size(self) -> int:
        """Max number of files per chunk."""
-        return self.info.chunks_size
+        return self.info["chunks_size"]

    @property
    def data_files_size_in_mb(self) -> int:
        """Max size of data file in mega bytes."""
-        return self.info.data_files_size_in_mb
+        return self.info["data_files_size_in_mb"]

    @property
    def video_files_size_in_mb(self) -> int:
        """Max size of video file in mega bytes."""
-        return self.info.video_files_size_in_mb
+        return self.info["video_files_size_in_mb"]

    def get_task_index(self, task: str) -> int | None:
        """
@@ -524,10 +502,10 @@ class LeRobotDatasetMetadata:
        self._save_episode_metadata(episode_dict)

        # Update info
-        self.info.total_episodes += 1
-        self.info.total_frames += episode_length
-        self.info.total_tasks = len(self.tasks)
-        self.info.splits = {"train": f"0:{self.info.total_episodes}"}
+        self.info["total_episodes"] += 1
+        self.info["total_frames"] += episode_length
+        self.info["total_tasks"] = len(self.tasks)
+        self.info["splits"] = {"train": f"0:{self.info['total_episodes']}"}

        write_info(self.info, self.root)

@@ -546,7 +524,7 @@ class LeRobotDatasetMetadata:
        for key in video_keys:
            if not self.features[key].get("info", None):
                video_path = self.root / self.video_path.format(video_key=key, chunk_index=0, file_index=0)
-                self.info.features[key]["info"] = get_video_info(video_path)
+                self.info["features"][key]["info"] = get_video_info(video_path)

    def update_chunk_settings(
        self,
@@ -568,17 +546,17 @@ class LeRobotDatasetMetadata:
        if chunks_size is not None:
            if chunks_size <= 0:
                raise ValueError(f"chunks_size must be positive, got {chunks_size}")
-            self.info.chunks_size = chunks_size
+            self.info["chunks_size"] = chunks_size

        if data_files_size_in_mb is not None:
            if data_files_size_in_mb <= 0:
                raise ValueError(f"data_files_size_in_mb must be positive, got {data_files_size_in_mb}")
-            self.info.data_files_size_in_mb = data_files_size_in_mb
+            self.info["data_files_size_in_mb"] = data_files_size_in_mb

        if video_files_size_in_mb is not None:
            if video_files_size_in_mb <= 0:
                raise ValueError(f"video_files_size_in_mb must be positive, got {video_files_size_in_mb}")
-            self.info.video_files_size_in_mb = video_files_size_in_mb
+            self.info["video_files_size_in_mb"] = video_files_size_in_mb

        # Update the info file on disk
        write_info(self.info, self.root)
@@ -675,7 +653,7 @@ class LeRobotDatasetMetadata:
                f"Features contain video keys {obj.video_keys}, but 'use_videos' is set to False. "
                "Either remove video features from the features dict, or set 'use_videos=True'."
            )
-        write_info(obj.info, obj.root)
+        write_json(obj.info, obj.root / INFO_PATH)
        obj.revision = None
        obj._pq_writer = None
        obj.latest_episode = None
@@ -897,10 +897,14 @@ def _copy_and_reindex_episodes_metadata(

    dst_meta.finalize()

-    dst_meta.info.total_episodes = len(episode_mapping)
-    dst_meta.info.total_frames = total_frames
-    dst_meta.info.total_tasks = len(dst_meta.tasks) if dst_meta.tasks is not None else 0
-    dst_meta.info.splits = {"train": f"0:{len(episode_mapping)}"}
+    dst_meta.info.update(
+        {
+            "total_episodes": len(episode_mapping),
+            "total_frames": total_frames,
+            "total_tasks": len(dst_meta.tasks) if dst_meta.tasks is not None else 0,
+            "splits": {"train": f"0:{len(episode_mapping)}"},
+        }
+    )
    write_info(dst_meta.info, dst_meta.root)

    if not all_stats:
@@ -1065,20 +1069,21 @@ def _copy_episodes_metadata_and_stats(
    if episodes_dir.exists():
        shutil.copytree(episodes_dir, dst_episodes_dir, dirs_exist_ok=True)

-    dst_meta.info.total_episodes = src_dataset.meta.total_episodes
-    dst_meta.info.total_frames = src_dataset.meta.total_frames
-    dst_meta.info.total_tasks = src_dataset.meta.total_tasks
-    # Preserve original splits if available, otherwise create default
-    dst_meta.info.splits = (
-        src_dataset.meta.info.splits
-        if src_dataset.meta.info.splits
-        else {"train": f"0:{src_dataset.meta.total_episodes}"}
+    dst_meta.info.update(
+        {
+            "total_episodes": src_dataset.meta.total_episodes,
+            "total_frames": src_dataset.meta.total_frames,
+            "total_tasks": src_dataset.meta.total_tasks,
+            "splits": src_dataset.meta.info.get("splits", {"train": f"0:{src_dataset.meta.total_episodes}"}),
+        }
    )

    if dst_meta.video_keys and src_dataset.meta.video_keys:
        for key in dst_meta.video_keys:
            if key in src_dataset.meta.features:
-                dst_meta.info.features[key]["info"] = src_dataset.meta.info.features[key].get("info", {})
+                dst_meta.info["features"][key]["info"] = src_dataset.meta.info["features"][key].get(
+                    "info", {}
+                )

    write_info(dst_meta.info, dst_meta.root)

@@ -1520,7 +1525,7 @@ def modify_tasks(
    write_tasks(new_task_df, root)

    # Update info.json
-    dataset.meta.info.total_tasks = len(unique_tasks)
+    dataset.meta.info["total_tasks"] = len(unique_tasks)
    write_info(dataset.meta.info, root)

    # Reload metadata to reflect changes
@@ -1853,10 +1858,10 @@ def convert_image_to_video_dataset(
        episodes_df.to_parquet(episodes_path, index=False)

        # Update metadata info
-        new_meta.info.total_episodes = len(episode_indices)
-        new_meta.info.total_frames = sum(ep["length"] for ep in all_episode_metadata.values())
-        new_meta.info.total_tasks = dataset.meta.total_tasks
-        new_meta.info.splits = {"train": f"0:{len(episode_indices)}"}
+        new_meta.info["total_episodes"] = len(episode_indices)
+        new_meta.info["total_frames"] = sum(ep["length"] for ep in all_episode_metadata.values())
+        new_meta.info["total_tasks"] = dataset.meta.total_tasks
+        new_meta.info["splits"] = {"train": f"0:{len(episode_indices)}"}

        # Update video info for all image keys (now videos)
        # We need to manually set video info since update_video_info() checks video_keys first
@@ -1865,7 +1870,7 @@ def convert_image_to_video_dataset(
                video_path = new_meta.root / new_meta.video_path.format(
                    video_key=img_key, chunk_index=0, file_index=0
                )
-                new_meta.info.features[img_key]["info"] = get_video_info(video_path)
+                new_meta.info["features"][img_key]["info"] = get_video_info(video_path)

        write_info(new_meta.info, new_meta.root)

@@ -19,7 +19,6 @@ from pprint import pformat
 import torch

 from lerobot.configs import PreTrainedConfig
-from lerobot.configs.rewards import RewardModelConfig
 from lerobot.configs.train import TrainPipelineConfig
 from lerobot.transforms import ImageTransforms
 from lerobot.utils.constants import ACTION, IMAGENET_STATS, OBS_PREFIX, REWARD
@@ -31,14 +30,12 @@ from .streaming_dataset import StreamingLeRobotDataset


 def resolve_delta_timestamps(
-    cfg: PreTrainedConfig | RewardModelConfig, ds_meta: LeRobotDatasetMetadata
+    cfg: PreTrainedConfig, ds_meta: LeRobotDatasetMetadata
 ) -> dict[str, list] | None:
-    """Resolves delta_timestamps by reading from the 'delta_indices' properties of the config.
+    """Resolves delta_timestamps by reading from the 'delta_indices' properties of the PreTrainedConfig.

    Args:
-        cfg (PreTrainedConfig | RewardModelConfig): The config to read delta_indices from. Both
-            ``PreTrainedConfig`` and concrete ``RewardModelConfig`` subclasses expose the
-            ``{observation,action,reward}_delta_indices`` properties used below.
+        cfg (PreTrainedConfig): The PreTrainedConfig to read delta_indices from.
        ds_meta (LeRobotDatasetMetadata): The dataset from which features and fps are used to build
            delta_timestamps against.

@@ -85,7 +82,7 @@ def make_dataset(cfg: TrainPipelineConfig) -> LeRobotDataset | MultiLeRobotDatas
        ds_meta = LeRobotDatasetMetadata(
            cfg.dataset.repo_id, root=cfg.dataset.root, revision=cfg.dataset.revision
        )
-        delta_timestamps = resolve_delta_timestamps(cfg.trainable_config, ds_meta)
+        delta_timestamps = resolve_delta_timestamps(cfg.policy, ds_meta)
        if not cfg.dataset.streaming:
            dataset = LeRobotDataset(
                cfg.dataset.repo_id,
@@ -28,7 +28,6 @@ from .utils import (
    DEFAULT_DATA_PATH,
    DEFAULT_VIDEO_FILE_SIZE_IN_MB,
    DEFAULT_VIDEO_PATH,
-    DatasetInfo,
 )


@@ -79,8 +78,8 @@ def create_empty_dataset_info(
    chunks_size: int | None = None,
    data_files_size_in_mb: int | None = None,
    video_files_size_in_mb: int | None = None,
-) -> DatasetInfo:
-    """Create a template ``DatasetInfo`` object for a new dataset's ``meta/info.json``.
+) -> dict:
+    """Create a template dictionary for a new dataset's `info.json`.

    Args:
        codebase_version (str): The version of the LeRobot codebase.
@@ -88,24 +87,25 @@ def create_empty_dataset_info(
        features (dict): The LeRobot features dictionary for the dataset.
        use_videos (bool): Whether the dataset will store videos.
        robot_type (str | None): The type of robot used, if any.
-        chunks_size (int | None): Max files per chunk directory. Defaults to ``DEFAULT_CHUNK_SIZE``.
-        data_files_size_in_mb (int | None): Max parquet file size in MB. Defaults to ``DEFAULT_DATA_FILE_SIZE_IN_MB``.
-        video_files_size_in_mb (int | None): Max video file size in MB. Defaults to ``DEFAULT_VIDEO_FILE_SIZE_IN_MB``.

    Returns:
-        DatasetInfo: A typed dataset information object with initial metadata.
+        dict: A dictionary with the initial dataset metadata.
    """
-    return DatasetInfo(
-        codebase_version=codebase_version,
-        fps=fps,
-        features=features,
-        robot_type=robot_type,
-        chunks_size=chunks_size or DEFAULT_CHUNK_SIZE,
-        data_files_size_in_mb=data_files_size_in_mb or DEFAULT_DATA_FILE_SIZE_IN_MB,
-        video_files_size_in_mb=video_files_size_in_mb or DEFAULT_VIDEO_FILE_SIZE_IN_MB,
-        data_path=DEFAULT_DATA_PATH,
-        video_path=DEFAULT_VIDEO_PATH if use_videos else None,
-    )
+    return {
+        "codebase_version": codebase_version,
+        "robot_type": robot_type,
+        "total_episodes": 0,
+        "total_frames": 0,
+        "total_tasks": 0,
+        "chunks_size": chunks_size or DEFAULT_CHUNK_SIZE,
+        "data_files_size_in_mb": data_files_size_in_mb or DEFAULT_DATA_FILE_SIZE_IN_MB,
+        "video_files_size_in_mb": video_files_size_in_mb or DEFAULT_VIDEO_FILE_SIZE_IN_MB,
+        "fps": fps,
+        "splits": {},
+        "data_path": DEFAULT_DATA_PATH,
+        "video_path": DEFAULT_VIDEO_PATH if use_videos else None,
+        "features": features,
+    }


 def check_delta_timestamps(
@@ -39,7 +39,6 @@ from .utils import (
    EPISODES_DIR,
    INFO_PATH,
    STATS_PATH,
-    DatasetInfo,
    serialize_dict,
 )

@@ -116,21 +115,25 @@ def embed_images(dataset: datasets.Dataset) -> datasets.Dataset:
    return dataset


-def write_info(info: DatasetInfo, local_dir: Path) -> None:
-    write_json(info.to_dict(), local_dir / INFO_PATH)
+def write_info(info: dict, local_dir: Path) -> None:
+    write_json(info, local_dir / INFO_PATH)


-def load_info(local_dir: Path) -> DatasetInfo:
+def load_info(local_dir: Path) -> dict:
    """Load dataset info metadata from its standard file path.

+    Also converts shape lists to tuples for consistency.
+
    Args:
        local_dir (Path): The root directory of the dataset.

    Returns:
-        DatasetInfo: The typed dataset information object.
+        dict: The dataset information dictionary.
    """
-    raw = load_json(local_dir / INFO_PATH)
-    return DatasetInfo.from_dict(raw)
+    info = load_json(local_dir / INFO_PATH)
+    for ft in info["features"].values():
+        ft["shape"] = tuple(ft["shape"])
+    return info


 def write_stats(stats: dict, local_dir: Path) -> None:
@@ -49,7 +49,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
        repo_id: str,
        root: str | Path | None = None,
        episodes: list[int] | None = None,
-        episode_filter: Callable[[dict], bool] | None = None,
        image_transforms: Callable | None = None,
        delta_timestamps: dict[str, list[float]] | None = None,
        tolerance_s: float = 1e-4,
@@ -154,11 +153,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
                ``$HF_LEROBOT_HOME/hub``.
            episodes (list[int] | None, optional): If specified, this will only load episodes specified by
                their episode_index in this list. Defaults to None.
-            episode_filter (Callable[[dict], bool] | None, optional): Predicate over per-episode
-                metadata rows used to select episodes. Evaluated against ``meta/`` without ``stats`` keys
-                (e.g.``task_index``, ``episode_index``, ``length``, ``from_timestamp``, ``to_timestamp``).
-                Intersected with ``episodes`` when both are set. Example: ``lambda ep: ep["length"] >= 100``.
-                Defaults to None.
            image_transforms (Callable | None, optional):
                Transform applied to visual modalities inside `__getitem__` after image decoding / tensor
                conversion. This works for both image-backed and video-backed observations and can later be
@@ -205,6 +199,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
        self.reader = None
        self.set_image_transforms(image_transforms)
        self.delta_timestamps = delta_timestamps
+        self.episodes = episodes
        self.tolerance_s = tolerance_s
        self.revision = revision if revision else CODEBASE_VERSION
        self._video_backend = video_backend if video_backend else get_safe_default_codec()
@@ -223,23 +218,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
        self.root = self.meta.root
        self.revision = self.meta.revision

-        if episodes is not None and any(
-            episode >= self.meta.total_episodes or episode < 0 for episode in episodes
-        ):
-            logger.warning(
-                f"Some episodes in the provided episodes list are out of range for this dataset ({self.meta.total_episodes})."
-            )
-
-        if episode_filter is not None:
-            resolved = self.meta.filter_episodes(episode_filter, candidates=episodes)
-            if not resolved:
-                raise ValueError(
-                    "The episode filter did not match any episode. Make sure the filter and episodes list are valid and compatible."
-                )
-            logger.info(f"The episode filter matched {len(resolved)} episode(s).")
-            episodes = resolved
-        self.episodes = episodes
-
        # Create reader (hf_dataset loaded below)
        self.reader = DatasetReader(
            meta=self.meta,
@@ -123,7 +123,7 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):

        NOTE: Fow now, this relies on a check in __init__ to make sure all sub-datasets have the same info.
        """
-        return self._datasets[0].meta.info.fps
+        return self._datasets[0].meta.info["fps"]

    @property
    def video(self) -> bool:
@@ -133,7 +133,7 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):

        NOTE: Fow now, this relies on a check in __init__ to make sure all sub-datasets have the same info.
        """
-        return len(self._datasets[0].meta.video_keys) > 0
+        return self._datasets[0].meta.info.get("video", False)

    @property
    def features(self) -> datasets.Features:
@@ -434,7 +434,7 @@ class StreamingLeRobotDataset(torch.utils.data.IterableDataset):

    def _make_padding_camera_frame(self, camera_key: str):
        """Variable-shape padding frame for given camera keys, given in (H, W, C)"""
-        return torch.zeros(self.meta.info.features[camera_key]["shape"]).permute(-1, 0, 1)
+        return torch.zeros(self.meta.info["features"][camera_key]["shape"]).permute(-1, 0, 1)

    def _get_video_frame_padding_mask(
        self,
@@ -14,11 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import contextlib
-import dataclasses
 import importlib.resources
 import json
 import logging
-from dataclasses import dataclass, field
 from pathlib import Path

 import datasets
@@ -72,9 +70,6 @@ class ForwardCompatibilityError(CompatibilityError):
        super().__init__(message)


-logger = logging.getLogger(__name__)
-
-
 DEFAULT_CHUNK_SIZE = 1000  # Max number of files per chunk
 DEFAULT_DATA_FILE_SIZE_IN_MB = 100  # Max size per file
 DEFAULT_VIDEO_FILE_SIZE_IN_MB = 200  # Max size per file
@@ -99,123 +94,6 @@ LEGACY_EPISODES_STATS_PATH = "meta/episodes_stats.jsonl"
 LEGACY_TASKS_PATH = "meta/tasks.jsonl"


-@dataclass
-class DatasetInfo:
-    """Typed representation of the ``meta/info.json`` file for a LeRobot dataset.
-
-    Replaces the previously untyped ``dict`` returned by ``load_info()`` and
-    created by ``create_empty_dataset_info()``.  Using a dataclass provides
-    explicit field definitions, IDE auto-completion, and validation at
-    construction time.
-    """
-
-    codebase_version: str
-    fps: int
-    features: dict[str, dict]
-
-    # Episode / frame counters — start at zero for new datasets
-    total_episodes: int = 0
-    total_frames: int = 0
-    total_tasks: int = 0
-
-    # Storage settings
-    chunks_size: int = field(default=DEFAULT_CHUNK_SIZE)
-    data_files_size_in_mb: int = field(default=DEFAULT_DATA_FILE_SIZE_IN_MB)
-    video_files_size_in_mb: int = field(default=DEFAULT_VIDEO_FILE_SIZE_IN_MB)
-
-    # File path templates
-    data_path: str = field(default=DEFAULT_DATA_PATH)
-    video_path: str | None = field(default=DEFAULT_VIDEO_PATH)
-
-    # Optional metadata
-    robot_type: str | None = None
-    splits: dict[str, str] = field(default_factory=dict)
-
-    def __post_init__(self) -> None:
-        # Coerce feature shapes from list to tuple — JSON deserialisation
-        # returns lists, but the rest of the codebase expects tuples.
-        for ft in self.features.values():
-            if isinstance(ft.get("shape"), list):
-                ft["shape"] = tuple(ft["shape"])
-
-        if self.fps <= 0:
-            raise ValueError(f"fps must be positive, got {self.fps}")
-        if self.chunks_size <= 0:
-            raise ValueError(f"chunks_size must be positive, got {self.chunks_size}")
-        if self.data_files_size_in_mb <= 0:
-            raise ValueError(f"data_files_size_in_mb must be positive, got {self.data_files_size_in_mb}")
-        if self.video_files_size_in_mb <= 0:
-            raise ValueError(f"video_files_size_in_mb must be positive, got {self.video_files_size_in_mb}")
-
-    def to_dict(self) -> dict:
-        """Return a JSON-serialisable dict.
-
-        Converts tuple shapes back to lists so ``json.dump`` can handle them.
-        """
-        d = dataclasses.asdict(self)
-        for ft in d["features"].values():
-            if isinstance(ft.get("shape"), tuple):
-                ft["shape"] = list(ft["shape"])
-        return d
-
-    @classmethod
-    def from_dict(cls, data: dict) -> "DatasetInfo":
-        """Construct from a raw dict (e.g. loaded directly from JSON).
-
-        Unknown keys are ignored for forward compatibility with datasets that
-        carry additional fields (e.g. ``total_videos`` from v2.x). A warning is
-        logged when such fields are present.
-        """
-        known = {f.name for f in dataclasses.fields(cls)}
-        unknown = sorted(k for k in data if k not in known)
-        if unknown:
-            logger.warning(f"Unknown fields in DatasetInfo: {unknown}. These will be ignored.")
-        return cls(**{k: v for k, v in data.items() if k in known})
-
-    # ---------------------------------------------------------------------------
-    # Temporary dict-style compatibility layer
-    # Allows existing ``info["key"]`` call-sites to keep working without changes.
-    # Once all callers have been migrated to attribute access, remove these.
-    # ---------------------------------------------------------------------------
-    def __getitem__(self, key: str):
-        import warnings
-
-        warnings.warn(
-            f"Accessing DatasetInfo with dict-style syntax info['{key}'] is deprecated. "
-            f"Use attribute access info.{key} instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        try:
-            return getattr(self, key)
-        except AttributeError as err:
-            raise KeyError(key) from err
-
-    def __setitem__(self, key: str, value) -> None:
-        import warnings
-
-        warnings.warn(
-            f"Setting DatasetInfo with dict-style syntax info['{key}'] = ... is deprecated. "
-            f"Use attribute assignment info.{key} = ... instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        if not hasattr(self, key):
-            raise KeyError(f"DatasetInfo has no field '{key}'")
-        setattr(self, key, value)
-
-    def __contains__(self, key: str) -> bool:
-        """Check if a field exists (dict-like interface)."""
-        return hasattr(self, key)
-
-    def get(self, key: str, default=None):
-        """Get attribute value with default fallback (dict-like interface)."""
-        try:
-            return getattr(self, key)
-        except AttributeError:
-            return default
-
-
 def has_legacy_hub_download_metadata(root: Path) -> bool:
    """Return ``True`` when *root* looks like a legacy Hub ``local_dir`` mirror.

@@ -416,7 +294,7 @@ def create_branch(repo_id: str, *, branch: str, repo_type: str | None = None) ->

 def create_lerobot_dataset_card(
    tags: list | None = None,
-    dataset_info: DatasetInfo | None = None,
+    dataset_info: dict | None = None,
    **kwargs,
 ) -> DatasetCard:
    """Create a `DatasetCard` for a LeRobot dataset.
@@ -427,7 +305,7 @@ def create_lerobot_dataset_card(

    Args:
        tags (list | None): A list of tags to add to the dataset card.
-        dataset_info (DatasetInfo | None): The dataset's info object, which will
+        dataset_info (dict | None): The dataset's info dictionary, which will
            be displayed on the card.
        **kwargs: Additional keyword arguments to populate the card template.

@@ -440,7 +318,7 @@ def create_lerobot_dataset_card(
        card_tags += tags
    if dataset_info:
        dataset_structure = "[meta/info.json](meta/info.json):\n"
-        dataset_structure += f"```json\n{json.dumps(dataset_info.to_dict(), indent=4)}\n```\n"
+        dataset_structure += f"```json\n{json.dumps(dataset_info, indent=4)}\n```\n"
        kwargs = {**kwargs, "dataset_structure": dataset_structure}
    card_data = DatasetCardData(
        license=kwargs.get("license"),
@@ -33,6 +33,7 @@ import fsspec
 import numpy as np
 import pyarrow as pa
 import torch
+import torchvision
 from datasets.features.features import register_feature
 from PIL import Image

@@ -131,9 +132,7 @@ def decode_video_frames(
        video_path (Path): Path to the video file.
        timestamps (list[float]): List of timestamps to extract frames.
        tolerance_s (float): Allowed deviation in seconds for frame retrieval.
-        backend (str, optional): Backend to use for decoding. Defaults to "torchcodec" when available
-            in the platform; otherwise, defaults to "pyav". The legacy value "video_reader" is
-            accepted for one release as an alias for "pyav" and will be removed in a future version.
+        backend (str, optional): Backend to use for decoding. Defaults to "torchcodec" when available in the platform; otherwise, defaults to "pyav".
        return_uint8 (bool): If True, return raw uint8 frames without float32 normalization.
            This reduces memory for DataLoader IPC; normalization can be done on GPU afterward.

@@ -146,87 +145,85 @@ def decode_video_frames(
        backend = get_safe_default_codec()
    if backend == "torchcodec":
        return decode_video_frames_torchcodec(video_path, timestamps, tolerance_s, return_uint8=return_uint8)
-    elif backend == "pyav":
-        return decode_video_frames_pyav(video_path, timestamps, tolerance_s, return_uint8=return_uint8)
-    elif backend == "video_reader":
-        logger.warning("backend='video_reader' is deprecated and now aliases to 'pyav'.")
-        return decode_video_frames_pyav(video_path, timestamps, tolerance_s, return_uint8=return_uint8)
+    elif backend in ["pyav", "video_reader"]:
+        return decode_video_frames_torchvision(
+            video_path, timestamps, tolerance_s, backend, return_uint8=return_uint8
+        )
    else:
        raise ValueError(f"Unsupported video backend: {backend}")


-def decode_video_frames_pyav(
+def decode_video_frames_torchvision(
    video_path: Path | str,
    timestamps: list[float],
    tolerance_s: float,
+    backend: str = "pyav",
    log_loaded_timestamps: bool = False,
    return_uint8: bool = False,
 ) -> torch.Tensor:
-    """Loads frames associated to the requested timestamps of a video using PyAV.
+    """Loads frames associated to the requested timestamps of a video

-    This is the fallback decoder for platforms where torchcodec has no wheel (currently macOS
-    x86_64 and linux armv7l — see the torchcodec block in pyproject.toml for the full matrix).
-    On supported platforms, prefer `decode_video_frames_torchcodec`, which is faster and supports
-    accurate seek.
+    The backend can be either "pyav" (default) or "video_reader".
+    "video_reader" requires installing torchvision from source, see:
+    https://github.com/pytorch/vision/blob/main/torchvision/csrc/io/decoder/gpu/README.rst
+    (note that you need to compile against ffmpeg<4.3)

-    PyAV doesn't support accurate seek: we seek to the nearest preceding keyframe and decode
-    forward until we have covered the requested timestamp range. The number of key frames in a
-    video can be adjusted at encoding time to trade off decoding speed against file size.
+    While both use cpu, "video_reader" is supposedly faster than "pyav" but requires additional setup.
+    For more info on video decoding, see `benchmark/video/README.md`

-    Args:
-        video_path: Path to the video file.
-        timestamps: List of timestamps (in seconds) to extract frames for.
-        tolerance_s: Allowed deviation in seconds between a queried timestamp and the closest
-            decoded frame.
-        log_loaded_timestamps: When True, log every decoded frame's timestamp at INFO level.
-        return_uint8: When True, return raw uint8 frames (C, H, W). Otherwise, return float32 in
-            [0, 1] range.
+    See torchvision doc for more info on these two backends:
+    https://pytorch.org/vision/0.18/index.html?highlight=backend#torchvision.set_video_backend

-    Returns:
-        torch.Tensor of shape (len(timestamps), C, H, W).
+    Note: Video benefits from inter-frame compression. Instead of storing every frame individually,
+    the encoder stores a reference frame (or a key frame) and subsequent frames as differences relative to
+    that key frame. As a consequence, to access a requested frame, we need to load the preceding key frame,
+    and all subsequent frames until reaching the requested frame. The number of key frames in a video
+    can be adjusted during encoding to take into account decoding time and video size in bytes.
    """
-    # TODO(rcadene): also load audio stream at the same time
    video_path = str(video_path)

+    # set backend
+    keyframes_only = False
+    torchvision.set_video_backend(backend)
+    if backend == "pyav":
+        keyframes_only = True  # pyav doesn't support accurate seek
+
+    # set a video stream reader
+    # TODO(rcadene): also load audio stream at the same time
+    reader = torchvision.io.VideoReader(video_path, "video")
+
    # set the first and last requested timestamps
    # Note: previous timestamps are usually loaded, since we need to access the previous key frame
    first_ts = min(timestamps)
    last_ts = max(timestamps)

-    loaded_frames: list[torch.Tensor] = []
-    loaded_ts: list[float] = []
+    # access closest key frame of the first requested frame
+    # Note: closest key frame timestamp is usually smaller than `first_ts` (e.g. key frame can be the first frame of the video)
+    # for details on what `seek` is doing see: https://pyav.basswood-io.com/docs/stable/api/container.html?highlight=inputcontainer#av.container.InputContainer.seek
+    reader.seek(first_ts, keyframes_only=keyframes_only)

-    # Seek + decode. `container.seek(offset)` with no `stream` argument expects the offset in
-    # av.time_base units (microseconds). `backward=True` lands us on the nearest keyframe at or
-    # before `first_ts`, so we can then decode forward until we cover `last_ts`. See:
-    # https://pyav.basswood-io.com/docs/stable/api/container.html#av.container.InputContainer.seek
-    with av.open(video_path) as container:
-        stream = container.streams.video[0]
-        container.seek(int(first_ts * av.time_base), backward=True)
+    # load all frames until last requested frame
+    loaded_frames = []
+    loaded_ts = []
+    for frame in reader:
+        current_ts = frame["pts"]
+        if log_loaded_timestamps:
+            logger.info(f"frame loaded at timestamp={current_ts:.4f}")
+        loaded_frames.append(frame["data"])
+        loaded_ts.append(current_ts)
+        if current_ts >= last_ts:
+            break

-        for frame in container.decode(stream):
-            if frame.pts is None:
-                continue
-            current_ts = float(frame.pts * stream.time_base)
-            if log_loaded_timestamps:
-                logger.info(f"frame loaded at timestamp={current_ts:.4f}")
-            # Convert to CHW uint8 to match torchcodec's output layout.
-            arr = frame.to_ndarray(format="rgb24")  # H, W, 3
-            loaded_frames.append(torch.from_numpy(arr).permute(2, 0, 1).contiguous())
-            loaded_ts.append(current_ts)
-            if current_ts >= last_ts:
-                break
+    if backend == "pyav":
+        reader.container.close()

-    if not loaded_frames:
-        raise FrameTimestampError(
-            f"No frames could be decoded from {video_path} in the timestamp range [{first_ts}, {last_ts}]."
-        )
+    reader = None

    query_ts = torch.tensor(timestamps)
-    loaded_ts_t = torch.tensor(loaded_ts)
+    loaded_ts = torch.tensor(loaded_ts)

    # compute distances between each query timestamp and timestamps of all loaded frames
-    dist = torch.cdist(query_ts[:, None], loaded_ts_t[:, None], p=1)
+    dist = torch.cdist(query_ts[:, None], loaded_ts[:, None], p=1)
    min_, argmin_ = dist.min(1)

    is_within_tol = min_ < tolerance_s
@@ -237,14 +234,14 @@ def decode_video_frames_pyav(
            " This might be due to synchronization issues with timestamps during data collection."
            " To be safe, we advise to ignore this item during training."
            f"\nqueried timestamps: {query_ts}"
-            f"\nloaded timestamps: {loaded_ts_t}"
+            f"\nloaded timestamps: {loaded_ts}"
            f"\nvideo: {video_path}"
-            f"\nbackend: pyav"
+            f"\nbackend: {backend}"
        )

    # get closest frames to the query timestamps
    closest_frames = torch.stack([loaded_frames[idx] for idx in argmin_])
-    closest_ts = loaded_ts_t[argmin_]
+    closest_ts = loaded_ts[argmin_]

    if log_loaded_timestamps:
        logger.info(f"{closest_ts=}")
@@ -285,11 +282,7 @@ class VideoDecoderCache:
        with self._lock:
            if video_path not in self._cache:
                file_handle = fsspec.open(video_path).__enter__()
-                try:
-                    decoder = VideoDecoder(file_handle, seek_mode="approximate")
-                except Exception:
-                    file_handle.close()
-                    raise
+                decoder = VideoDecoder(file_handle, seek_mode="approximate")
                self._cache[video_path] = (decoder, file_handle)

            return self._cache[video_path][0]
@@ -299,6 +299,7 @@ class HILSerlProcessorConfig:
    inverse_kinematics: InverseKinematicsConfig | None = None
    reward_classifier: RewardClassifierConfig | None = None
    max_gripper_pos: float | None = 100.0
+    gripper_speed_factor: float | None = None


@EnvConfig.register_subclass(name="gym_manipulator")
@@ -16,15 +16,18 @@ from lerobot.utils.action_interpolator import ActionInterpolator as ActionInterp

 from .act.configuration_act import ACTConfig as ACTConfig
 from .diffusion.configuration_diffusion import DiffusionConfig as DiffusionConfig
-from .eo1.configuration_eo1 import EO1Config as EO1Config
 from .factory import get_policy_class, make_policy, make_policy_config, make_pre_post_processors
 from .gaussian_actor.configuration_gaussian_actor import GaussianActorConfig as GaussianActorConfig
+from .gaussian_actor.reward_model.configuration_classifier import (
+    RewardClassifierConfig as RewardClassifierConfig,
+)
 from .groot.configuration_groot import GrootConfig as GrootConfig
 from .multi_task_dit.configuration_multi_task_dit import MultiTaskDiTConfig as MultiTaskDiTConfig
 from .pi0.configuration_pi0 import PI0Config as PI0Config
 from .pi0_fast.configuration_pi0_fast import PI0FastConfig as PI0FastConfig
 from .pi05.configuration_pi05 import PI05Config as PI05Config
 from .pretrained import PreTrainedPolicy as PreTrainedPolicy
+from .sarm.configuration_sarm import SARMConfig as SARMConfig
 from .smolvla.configuration_smolvla import SmolVLAConfig as SmolVLAConfig
 from .tdmpc.configuration_tdmpc import TDMPCConfig as TDMPCConfig
 from .utils import make_robot_action, prepare_observation_for_inference
@@ -40,13 +43,14 @@ __all__ = [
    # Configuration classes
    "ACTConfig",
    "DiffusionConfig",
-    "EO1Config",
    "GaussianActorConfig",
    "GrootConfig",
    "MultiTaskDiTConfig",
    "PI0Config",
    "PI0FastConfig",
    "PI05Config",
+    "RewardClassifierConfig",
+    "SARMConfig",
    "SmolVLAConfig",
    "TDMPCConfig",
    "VQBeTConfig",
@@ -100,8 +100,8 @@ class DiffusionConfig(PreTrainedConfig):

    # Inputs / output structure.
    n_obs_steps: int = 2
-    horizon: int = 64
-    n_action_steps: int = 32
+    horizon: int = 16
+    n_action_steps: int = 8

    normalization_mapping: dict[str, NormalizationMode] = field(
        default_factory=lambda: {
@@ -122,10 +122,10 @@ class DiffusionConfig(PreTrainedConfig):
    crop_ratio: float = 1.0
    crop_shape: tuple[int, int] | None = None
    crop_is_random: bool = True
-    pretrained_backbone_weights: str | None = "ResNet18_Weights.IMAGENET1K_V1"
-    use_group_norm: bool = False
+    pretrained_backbone_weights: str | None = None
+    use_group_norm: bool = True
    spatial_softmax_num_keypoints: int = 32
-    use_separate_rgb_encoder_per_camera: bool = True
+    use_separate_rgb_encoder_per_camera: bool = False
    # Unet.
    down_dims: tuple[int, ...] = (512, 1024, 2048)
    kernel_size: int = 5
@@ -1 +0,0 @@
-../../../../docs/source/eo1.mdx
@@ -1,7 +0,0 @@
-#!/usr/bin/env python
-
-from .configuration_eo1 import EO1Config
-from .modeling_eo1 import EO1Policy
-from .processor_eo1 import make_eo1_pre_post_processors
-
-__all__ = ["EO1Config", "EO1Policy", "make_eo1_pre_post_processors"]
@@ -1,193 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import annotations
-
-from copy import deepcopy
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING
-
-from lerobot.configs.policies import PreTrainedConfig
-from lerobot.configs.types import FeatureType, NormalizationMode, PolicyFeature
-from lerobot.optim.optimizers import AdamWConfig
-from lerobot.optim.schedulers import CosineDecayWithWarmupSchedulerConfig
-from lerobot.utils.constants import ACTION, OBS_STATE
-from lerobot.utils.import_utils import _transformers_available, require_package
-
-if TYPE_CHECKING or _transformers_available:
-    from transformers.models.qwen2_5_vl.configuration_qwen2_5_vl import (
-        Qwen2_5_VLConfig,
-        Qwen2_5_VLTextConfig,
-        Qwen2_5_VLVisionConfig,
-    )
-else:
-    Qwen2_5_VLConfig = None
-    Qwen2_5_VLTextConfig = None
-    Qwen2_5_VLVisionConfig = None
-
-
-@PreTrainedConfig.register_subclass("eo1")
-@dataclass
-class EO1Config(PreTrainedConfig):
-    """Configuration for native EO1 policy integration in LeRobot."""
-
-    vlm_base: str = "Qwen/Qwen2.5-VL-3B-Instruct"
-    vlm_config: dict | None = None
-
-    # Vision processor settings.
-    image_min_pixels: int | None = 64 * 28 * 28
-    image_max_pixels: int | None = 128 * 28 * 28
-    use_fast_processor: bool = False
-
-    # Execution and action horizon.
-    n_obs_steps: int = 1
-    chunk_size: int = 8
-    n_action_steps: int = 8
-
-    # State/action padding to match EO1 flow head dimensionality.
-    max_state_dim: int = 32
-    max_action_dim: int = 32
-
-    # Flow matching sampling.
-    num_denoise_steps: int = 10
-    num_action_layers: int = 2
-    action_act: str = "linear"
-    time_sampling_beta_alpha: float = 1.5
-    time_sampling_beta_beta: float = 1.0
-    time_sampling_scale: float = 0.999
-    time_sampling_offset: float = 0.001
-    min_period: float = 4e-3
-    max_period: float = 4.0
-    supervise_padding_action_dims: bool = True
-    supervise_padding_actions: bool = True
-
-    # Policy-level dtype request for the Qwen backbone.
-    # - "auto": follow the backbone config/checkpoint default dtype. For Qwen2.5-VL this resolves to bf16.
-    #           The EO1 flow-matching head still keeps its own parameters in fp32.
-    # - "bfloat16": force the backbone to initialize/load in bf16 regardless of the saved config default.
-    # - "float32": force the backbone to initialize/load in fp32 for maximum numerical conservatism.
-    dtype: str = "auto"  # Options: "auto", "bfloat16", "float32"
-    force_fp32_autocast: bool = True
-
-    # Optional attention backend request passed through to the Qwen backbone.
-    # Common values: None, "eager", "sdpa", "flash_attention_2".
-    attn_implementation: str | None = None
-
-    # Training settings.
-    gradient_checkpointing: bool = False  # Enable gradient checkpointing for memory optimization
-
-    normalization_mapping: dict[str, NormalizationMode] = field(
-        default_factory=lambda: {
-            "VISUAL": NormalizationMode.IDENTITY,
-            "STATE": NormalizationMode.MEAN_STD,
-            "ACTION": NormalizationMode.MEAN_STD,
-        }
-    )
-
-    # Optimizer settings aligned with EO1/experiments/2_libero/train.sh and EO1 TrainPipelineConfig defaults.
-    optimizer_lr: float = 1e-4
-    optimizer_betas: tuple[float, float] = (0.9, 0.999)
-    optimizer_eps: float = 1e-8
-    optimizer_weight_decay: float = 0.1
-    optimizer_grad_clip_norm: float = 1.0
-
-    # Scheduler settings aligned with EO1 train.sh: cosine schedule with warmup_ratio=0.03.
-    # Note: These will auto-scale if --steps < scheduler_decay_steps
-    # For example, --steps=3000 will scale warmup to 100 and decay to 3000
-    scheduler_warmup_steps: int = 900  # 0.03 * 30_000 long-run steps
-    scheduler_decay_steps: int = 30_000
-    scheduler_decay_lr: float = 0.0
-
-    def __post_init__(self):
-        super().__post_init__()
-
-        if self.n_action_steps > self.chunk_size:
-            raise ValueError(
-                f"n_action_steps ({self.n_action_steps}) cannot be greater than chunk_size ({self.chunk_size})"
-            )
-
-        # Populate the serialized backbone config only when the caller did not provide one.
-        if self.vlm_config is None:
-            require_package("transformers", extra="eo1")
-            self.vlm_config = Qwen2_5_VLConfig.from_pretrained(self.vlm_base).to_dict()
-
-    @property
-    def vlm_backbone_config(self) -> Qwen2_5_VLConfig:
-        require_package("transformers", extra="eo1")
-        config_dict = deepcopy(self.vlm_config)
-        if self.attn_implementation is not None:
-            config_dict["attn_implementation"] = self.attn_implementation
-        return Qwen2_5_VLConfig(**config_dict)
-
-    @property
-    def text_config(self) -> Qwen2_5_VLTextConfig:
-        return self.vlm_backbone_config.text_config
-
-    @property
-    def vision_config(self) -> Qwen2_5_VLVisionConfig:
-        return self.vlm_backbone_config.vision_config
-
-    def validate_features(self) -> None:
-        """Validate and set up EO1 input and output features."""
-        image_features = [key for key, feat in self.input_features.items() if feat.type == FeatureType.VISUAL]
-        if not image_features:
-            raise ValueError(
-                "EO1 policy requires at least one visual input feature. "
-                "No features of type FeatureType.VISUAL found in input_features."
-            )
-
-        if OBS_STATE not in self.input_features:
-            state_feature = PolicyFeature(
-                type=FeatureType.STATE,
-                shape=(self.max_state_dim,),
-            )
-            self.input_features[OBS_STATE] = state_feature
-
-        if ACTION not in self.output_features:
-            action_feature = PolicyFeature(
-                type=FeatureType.ACTION,
-                shape=(self.max_action_dim,),
-            )
-            self.output_features[ACTION] = action_feature
-
-    def get_optimizer_preset(self) -> AdamWConfig:
-        return AdamWConfig(
-            lr=self.optimizer_lr,
-            betas=self.optimizer_betas,
-            eps=self.optimizer_eps,
-            weight_decay=self.optimizer_weight_decay,
-            grad_clip_norm=self.optimizer_grad_clip_norm,
-        )
-
-    def get_scheduler_preset(self):
-        return CosineDecayWithWarmupSchedulerConfig(
-            peak_lr=self.optimizer_lr,
-            decay_lr=self.scheduler_decay_lr,
-            num_warmup_steps=self.scheduler_warmup_steps,
-            num_decay_steps=self.scheduler_decay_steps,
-        )
-
-    @property
-    def observation_delta_indices(self) -> None:
-        return None
-
-    @property
-    def action_delta_indices(self) -> list[int]:
-        return list(range(self.chunk_size))
-
-    @property
-    def reward_delta_indices(self) -> None:
-        return None
@@ -1,620 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import annotations
-
-import contextlib
-import logging
-import math
-from collections import deque
-from typing import TYPE_CHECKING, Any
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F  # noqa: N812
-import torch.utils.checkpoint
-from torch import Tensor
-
-from lerobot.policies.eo1.configuration_eo1 import EO1Config
-from lerobot.policies.pretrained import PreTrainedPolicy
-from lerobot.utils.constants import ACTION, OBS_STATE
-from lerobot.utils.import_utils import _transformers_available, require_package
-
-if TYPE_CHECKING or _transformers_available:
-    from transformers.activations import ACT2FN
-    from transformers.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
-    from transformers.utils import torch_compilable_check
-else:
-    ACT2FN = None
-    Qwen2_5_VLForConditionalGeneration = None
-    torch_compilable_check = None
-
-logger = logging.getLogger(__name__)
-
-
-def pad_vector(vector, new_dim):
-    """Pad the last dimension of a vector to new_dim with zeros.
-
-    Can be (batch_size x sequence_length x features_dimension)
-    or (batch_size x features_dimension)
-    """
-    if vector.shape[-1] >= new_dim:
-        return vector
-    return F.pad(vector, (0, new_dim - vector.shape[-1]))
-
-
-class EO1Policy(PreTrainedPolicy):
-    """EO1 policy wrapper for LeRobot robot-only training/evaluation."""
-
-    config_class = EO1Config
-    name = "eo1"
-
-    def __init__(self, config: EO1Config, **kwargs):
-        require_package("transformers", extra="eo1")
-        super().__init__(config)
-        config.validate_features()
-        self.config = config
-
-        if config.pretrained_path is None:
-            # Initialize from pretrained VLM
-            vlm_backbone = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-                config.vlm_base,
-                dtype=config.dtype,
-                attn_implementation=config.attn_implementation,
-            )
-        else:
-            vlm_backbone = Qwen2_5_VLForConditionalGeneration._from_config(
-                config.vlm_backbone_config,
-                dtype=config.vlm_backbone_config.dtype if config.dtype == "auto" else config.dtype,
-            )
-
-        self.model = EO1VisionFlowMatchingModel(config, vlm_backbone)
-        if config.gradient_checkpointing:
-            self.model.gradient_checkpointing_enable()
-
-        self.model.to(config.device)
-        self.reset()
-
-    def reset(self):
-        self._action_queue = deque(maxlen=self.config.n_action_steps)
-
-    @staticmethod
-    def _get_model_inputs(batch: dict[str, Tensor], excluded_keys: set[str]) -> dict[str, Tensor]:
-        return {key: value for key, value in batch.items() if key not in excluded_keys}
-
-    def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict]:
-        state = self.prepare_state(batch[OBS_STATE])
-        actions = self.prepare_action(batch[ACTION])
-        model_inputs = self._get_model_inputs(batch, {OBS_STATE, ACTION})
-        loss = self.model(states=state, action=actions, **model_inputs)
-
-        loss_dict = {"loss": loss.item()}
-        return loss, loss_dict
-
-    @torch.no_grad()
-    def predict_action_chunk(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
-        self.eval()
-
-        states = self.prepare_state(batch[OBS_STATE])
-        model_inputs = self._get_model_inputs(batch, {OBS_STATE})
-        actions = self.model.sample_actions(states=states, **model_inputs).to(torch.float32)
-
-        original_action_dim = self.config.output_features[ACTION].shape[0]
-        return actions[:, :, :original_action_dim]
-
-    def prepare_state(self, state: Tensor) -> Tensor:
-        return pad_vector(state, self.config.max_state_dim)
-
-    def prepare_action(self, action: Tensor) -> Tensor:
-        return pad_vector(action, self.config.max_action_dim)
-
-    @torch.no_grad()
-    def select_action(self, batch: dict[str, Tensor]) -> Tensor:
-        self.eval()
-
-        if len(self._action_queue) == 0:
-            actions = self.predict_action_chunk(batch)[:, : self.config.n_action_steps]
-            self._action_queue.extend(actions.transpose(0, 1))
-
-        return self._action_queue.popleft()
-
-    def get_optim_params(self) -> dict:
-        return self.parameters()
-
-
-def get_safe_dtype(target_dtype, device_type):
-    """Get a safe dtype for the given device type."""
-    if device_type == "mps" and target_dtype == torch.float64:
-        return torch.float32
-    if device_type == "cpu":
-        # CPU doesn't support bfloat16, use float32 instead
-        if target_dtype == torch.bfloat16:
-            return torch.float32
-        if target_dtype == torch.float64:
-            return torch.float64
-    return target_dtype
-
-
-def create_sinusoidal_pos_embedding(  # see openpi `create_sinusoidal_pos_embedding` (exact copy)
-    time: torch.Tensor, dimension: int, min_period: float, max_period: float, device="cpu"
-) -> Tensor:
-    """Computes sine-cosine positional embedding vectors for scalar positions."""
-    if dimension % 2 != 0:
-        raise ValueError(f"dimension ({dimension}) must be divisible by 2")
-
-    if time.ndim != 1:
-        raise ValueError("The time tensor is expected to be of shape `(batch_size, )`.")
-
-    dtype = get_safe_dtype(torch.float64, device.type)
-    fraction = torch.linspace(0.0, 1.0, dimension // 2, dtype=dtype, device=device)
-    period = min_period * (max_period / min_period) ** fraction
-
-    # Compute the outer product
-    scaling_factor = 1.0 / period * 2 * math.pi
-    sin_input = scaling_factor[None, :] * time[:, None]
-    return torch.cat([torch.sin(sin_input), torch.cos(sin_input)], dim=1)
-
-
-def sample_beta(alpha, beta, bsize, device):  # see openpi `sample_beta` (exact copy)
-    # Beta sampling uses _sample_dirichlet which isn't implemented for MPS, so sample on CPU
-    alpha_t = torch.tensor(alpha, dtype=torch.float32)
-    beta_t = torch.tensor(beta, dtype=torch.float32)
-    dist = torch.distributions.Beta(alpha_t, beta_t)
-    return dist.sample((bsize,)).to(device)
-
-
-class EO1VisionActionProjector(torch.nn.Sequential):
-    """This block implements the multi-layer perceptron (MLP) module."""
-
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        num_layers: int = 2,
-        activation_layer: str = "linear",
-        bias: bool = True,
-        device: Any = None,
-        dtype: torch.dtype = torch.float32,
-    ):
-        layers = []
-        in_dim = in_channels
-        hidden_channels = [in_dim] * (num_layers - 1) + [out_channels]
-        for hidden_dim in hidden_channels[:-1]:
-            layers.append(torch.nn.Linear(in_dim, hidden_dim, bias=bias, dtype=dtype, device=device))
-            layers.append(ACT2FN[activation_layer])
-            in_dim = hidden_dim
-        layers.append(torch.nn.Linear(in_dim, hidden_channels[-1], bias=bias, dtype=dtype, device=device))
-        super().__init__(*layers)
-
-    @property
-    def dtype(self):
-        return self[0].weight.dtype
-
-
-class EO1VisionFlowMatchingModel(nn.Module):
-    def __init__(
-        self,
-        config: EO1Config,
-        vlm_backbone: Qwen2_5_VLForConditionalGeneration | None = None,
-    ):
-        require_package("transformers", extra="eo1")
-        super().__init__()
-
-        self.config = config
-        # Preserve the backbone dtype selected at construction time so Qwen's fp32 rotary buffers stay intact.
-        self.vlm_backbone = vlm_backbone
-        self.hidden_size = self.vlm_backbone.config.text_config.hidden_size
-        max_state_dim = config.max_state_dim
-        max_action_dim = config.max_action_dim
-        self.state_proj = nn.Linear(max_state_dim, self.hidden_size, dtype=torch.float32)
-        self.action_in_proj = nn.Linear(max_action_dim, self.hidden_size, dtype=torch.float32)
-        self.action_out_proj = EO1VisionActionProjector(
-            self.hidden_size,
-            max_action_dim,
-            config.num_action_layers,
-            config.action_act,
-            dtype=torch.float32,
-        )
-        self.action_time_mlp_in = nn.Linear(self.hidden_size * 2, self.hidden_size, dtype=torch.float32)
-        self.action_time_mlp_out = nn.Linear(self.hidden_size, self.hidden_size, dtype=torch.float32)
-        self.gradient_checkpointing_enabled = False
-
-    def get_input_embeddings(self):
-        return self.vlm_backbone.get_input_embeddings()
-
-    def flow_head_autocast_context(self):
-        if self.config.force_fp32_autocast:
-            return torch.autocast(
-                device_type=self.state_proj.weight.device.type,
-                enabled=False,
-            )
-        return contextlib.nullcontext()
-
-    def gradient_checkpointing_enable(self):
-        """Enable gradient checkpointing for the Qwen2.5-VL backbone."""
-        self.gradient_checkpointing_enabled = True
-        self.vlm_backbone.gradient_checkpointing_enable(
-            gradient_checkpointing_kwargs={"use_reentrant": False}
-        )
-        logger.info("Enabled gradient checkpointing for EO1VisionFlowMatchingModel")
-
-    def gradient_checkpointing_disable(self):
-        """Disable gradient checkpointing for the Qwen2.5-VL backbone."""
-        self.gradient_checkpointing_enabled = False
-        self.vlm_backbone.gradient_checkpointing_disable()
-        logger.info("Disabled gradient checkpointing for EO1VisionFlowMatchingModel")
-
-    def _apply_checkpoint(self, func, *args, **kwargs):
-        """Apply manual gradient checkpointing to EO1 flow-head computations when training."""
-        if self.gradient_checkpointing_enabled and self.training and torch.is_grad_enabled():
-            return torch.utils.checkpoint.checkpoint(
-                func, *args, use_reentrant=False, preserve_rng_state=False, **kwargs
-            )
-        return func(*args, **kwargs)
-
-    def sample_noise(self, shape, device):
-        noise = torch.normal(
-            mean=0.0,
-            std=1.0,
-            size=shape,
-            dtype=torch.float32,
-            device=device,
-        )
-        return noise
-
-    def sample_time(self, bsize, device):
-        time_beta = sample_beta(
-            self.config.time_sampling_beta_alpha, self.config.time_sampling_beta_beta, bsize, device
-        )
-        time = time_beta * self.config.time_sampling_scale + self.config.time_sampling_offset
-        return time.to(dtype=torch.float32, device=device)
-
-    def get_placeholder_mask(
-        self,
-        input_ids: torch.LongTensor | None,
-        inputs_embeds: torch.FloatTensor | None,
-        state_features: torch.FloatTensor | None = None,
-        action_features: torch.FloatTensor | None = None,
-        *,
-        state_token_id: int,
-        action_token_id: int,
-    ) -> tuple[torch.BoolTensor, torch.BoolTensor]:
-        """Return EO1 state/action placeholder masks, following Qwen's multimodal mask style."""
-        if input_ids is None:
-            special_state_mask = inputs_embeds == self.get_input_embeddings()(
-                torch.tensor(state_token_id, dtype=torch.long, device=inputs_embeds.device)
-            )
-            special_state_mask = special_state_mask.all(-1)
-            special_action_mask = inputs_embeds == self.get_input_embeddings()(
-                torch.tensor(action_token_id, dtype=torch.long, device=inputs_embeds.device)
-            )
-            special_action_mask = special_action_mask.all(-1)
-        else:
-            special_state_mask = input_ids == state_token_id
-            special_action_mask = input_ids == action_token_id
-
-        n_state_tokens = special_state_mask.sum()
-        special_state_mask = (
-            special_state_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
-        )
-        if state_features is not None:
-            torch_compilable_check(
-                inputs_embeds[special_state_mask].numel() == state_features.numel(),
-                f"State features and state tokens do not match, tokens: {n_state_tokens}, features: {state_features.shape[0]}",
-            )
-
-        n_action_tokens = special_action_mask.sum()
-        special_action_mask = (
-            special_action_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
-        )
-        if action_features is not None:
-            torch_compilable_check(
-                inputs_embeds[special_action_mask].numel() == action_features.numel(),
-                f"Action features and action tokens do not match, tokens: {n_action_tokens}, features: {action_features.shape[0]}",
-            )
-
-        return special_state_mask, special_action_mask
-
-    def embed_prefix(
-        self,
-        input_ids: torch.LongTensor,
-        states: torch.Tensor,
-        *,
-        state_token_id: int,
-        action_token_id: int,
-    ) -> torch.FloatTensor:
-        """Embed the EO1 prefix tokens before native Qwen injects multimodal features."""
-
-        # Get the input embeddings for the input IDs
-        def input_embed_func(input_ids: torch.LongTensor) -> torch.FloatTensor:
-            return self.get_input_embeddings()(input_ids)
-
-        inputs_embeds = self._apply_checkpoint(input_embed_func, input_ids)
-
-        # Project the states to the hidden size
-        def state_proj_func(states: torch.Tensor) -> torch.FloatTensor:
-            with self.flow_head_autocast_context():
-                states = states.to(dtype=self.state_proj.weight.dtype)
-                return self.state_proj(states)
-
-        state_embs = self._apply_checkpoint(state_proj_func, states)
-        state_mask, _ = self.get_placeholder_mask(
-            input_ids,
-            inputs_embeds,
-            state_features=state_embs,
-            state_token_id=state_token_id,
-            action_token_id=action_token_id,
-        )
-        state_embs = state_embs.to(inputs_embeds.device, inputs_embeds.dtype)
-        inputs_embeds = inputs_embeds.masked_scatter(state_mask, state_embs)
-        return inputs_embeds
-
-    def embed_suffix(
-        self,
-        timestep: torch.Tensor,
-        noisy_actions: torch.Tensor,
-    ) -> torch.FloatTensor:
-        """Embed the suffix"""
-
-        def action_proj_func(noisy_actions: torch.Tensor) -> torch.FloatTensor:
-            with self.flow_head_autocast_context():
-                noisy_actions = noisy_actions.to(dtype=self.action_in_proj.weight.dtype)
-                return self.action_in_proj(noisy_actions)
-
-        action_embs = self._apply_checkpoint(action_proj_func, noisy_actions)
-        time_embs = create_sinusoidal_pos_embedding(
-            timestep,
-            self.hidden_size,
-            min_period=self.config.min_period,
-            max_period=self.config.max_period,
-            device=action_embs.device,
-        )
-        time_embs = time_embs.to(dtype=action_embs.dtype)
-        time_embs = time_embs[:, None, :].expand_as(action_embs)
-        action_time_embs = torch.cat([action_embs, time_embs], dim=2)
-
-        def mlp_func(action_time_embs: torch.Tensor) -> torch.FloatTensor:
-            with self.flow_head_autocast_context():
-                action_time_embs = action_time_embs.to(dtype=self.action_time_mlp_in.weight.dtype)
-                action_time_embs = self.action_time_mlp_in(action_time_embs)
-                action_time_embs = F.silu(action_time_embs)
-                return self.action_time_mlp_out(action_time_embs)
-
-        action_time_embs = self._apply_checkpoint(mlp_func, action_time_embs)
-        return action_time_embs
-
-    def forward(
-        self,
-        input_ids: torch.LongTensor | None = None,
-        attention_mask: torch.LongTensor | None = None,
-        pixel_values: torch.FloatTensor | None = None,
-        image_grid_thw: torch.LongTensor | None = None,
-        mm_token_type_ids: torch.IntTensor | None = None,
-        states: torch.FloatTensor | None = None,
-        action: torch.FloatTensor | None = None,
-        action_is_pad: torch.BoolTensor | None = None,
-        *,
-        state_token_id: int,
-        action_token_id: int,
-        **kwargs,
-    ) -> Tensor:
-        """Run the EO1 training forward pass and compute the flow-matching loss."""
-
-        # 1. Build the EO1 prefix with state placeholders resolved.
-        inputs_embeds = self.embed_prefix(
-            input_ids,
-            states=states,
-            state_token_id=state_token_id,
-            action_token_id=action_token_id,
-        )
-
-        # 2. Sample the diffusion target and replace the action placeholders.
-        time = self.sample_time(action.shape[0], inputs_embeds.device)
-        noise = self.sample_noise(action.shape, inputs_embeds.device)
-
-        time_expanded = time[:, None, None]
-        x_t = time_expanded * noise + (1 - time_expanded) * action
-        u_t = noise - action
-        action_time_embs = self.embed_suffix(time, x_t)
-        _, action_mask = self.get_placeholder_mask(
-            input_ids,
-            inputs_embeds,
-            action_features=action_time_embs,
-            state_token_id=state_token_id,
-            action_token_id=action_token_id,
-        )
-        action_time_embs = action_time_embs.to(inputs_embeds.device, inputs_embeds.dtype)
-        inputs_embeds = inputs_embeds.masked_scatter(action_mask, action_time_embs)
-
-        # 3. Optionally drop padded action tokens from backbone attention.
-        if attention_mask is not None:
-            attention_mask = attention_mask.to(inputs_embeds.device)
-
-        if not self.config.supervise_padding_actions:
-            action_is_pad = action_is_pad.to(device=inputs_embeds.device, dtype=torch.bool)
-            action_token_mask = action_mask[..., 0]
-            action_padding_mask = torch.zeros_like(action_token_mask)
-            action_padding_mask = action_padding_mask.masked_scatter(
-                action_token_mask,
-                action_is_pad.reshape(-1),
-            )
-            attention_mask = attention_mask.masked_fill(action_padding_mask, 0)
-
-        # 4. Run the Qwen backbone on the fused EO1 sequence.
-        def vlm_forward_func(
-            input_ids: torch.LongTensor,
-            attention_mask: torch.Tensor | None,
-            inputs_embeds: torch.FloatTensor,
-            pixel_values: torch.Tensor | None,
-            image_grid_thw: torch.LongTensor | None,
-            mm_token_type_ids: torch.IntTensor | None,
-        ) -> torch.FloatTensor:
-            outputs = self.vlm_backbone.model(
-                input_ids=input_ids,
-                attention_mask=attention_mask,
-                inputs_embeds=inputs_embeds,
-                pixel_values=pixel_values,
-                image_grid_thw=image_grid_thw,
-                mm_token_type_ids=mm_token_type_ids,
-                use_cache=False,
-                output_hidden_states=False,
-                return_dict=True,
-            )
-            return outputs.last_hidden_state
-
-        hidden_states = self._apply_checkpoint(
-            vlm_forward_func,
-            input_ids,
-            attention_mask,
-            inputs_embeds,
-            pixel_values,
-            image_grid_thw,
-            mm_token_type_ids,
-        )
-        action_hidden_states = hidden_states[action_mask[..., 0]]
-
-        # 5. Project the action-token hidden states back to the flow target space.
-        def action_out_proj_func(action_hidden_states: torch.FloatTensor) -> torch.FloatTensor:
-            with self.flow_head_autocast_context():
-                action_hidden_states = action_hidden_states.to(dtype=self.action_out_proj.dtype)
-                return self.action_out_proj(action_hidden_states)
-
-        v_t = self._apply_checkpoint(action_out_proj_func, action_hidden_states)
-        v_t = v_t.reshape(u_t.shape).to(dtype=u_t.dtype)
-        losses = F.mse_loss(u_t, v_t, reduction="none")
-
-        # 6. Apply the configured supervision mask and reduce the loss.
-        if not self.config.supervise_padding_action_dims:
-            original_action_dim = self.config.output_features[ACTION].shape[0]
-            losses = losses[..., :original_action_dim]
-
-        if not self.config.supervise_padding_actions:
-            losses = losses[~action_is_pad]
-
-        return losses.mean()
-
-    @torch.no_grad()
-    def sample_actions(
-        self,
-        input_ids: torch.LongTensor | None = None,
-        attention_mask: torch.Tensor | None = None,
-        pixel_values: torch.Tensor | None = None,
-        image_grid_thw: torch.LongTensor | None = None,
-        mm_token_type_ids: torch.IntTensor | None = None,
-        states: torch.Tensor | None = None,
-        *,
-        state_token_id: int,
-        action_token_id: int,
-        **kwargs,
-    ) -> Tensor:
-        """Sample actions from the model."""
-        if states is None:
-            raise ValueError("states are required for EO1 action sampling.")
-        if mm_token_type_ids is None:
-            raise ValueError("mm_token_type_ids are required for EO1 action sampling.")
-
-        # 1. Resolve the left-padded rollout prompt and locate the action span.
-        chunk_size = self.config.chunk_size
-
-        inputs_embeds = self.embed_prefix(
-            input_ids,
-            states=states,
-            state_token_id=state_token_id,
-            action_token_id=action_token_id,
-        ).clone()
-        _, action_placeholder_mask = self.get_placeholder_mask(
-            input_ids,
-            inputs_embeds,
-            state_token_id=state_token_id,
-            action_token_id=action_token_id,
-        )
-        action_mask = action_placeholder_mask[..., 0]
-        token_counts = action_mask.sum(dim=1)
-        if not torch.all(token_counts == chunk_size):
-            raise ValueError(
-                f"Each sample must contain exactly {chunk_size} action tokens, got {token_counts.tolist()}."
-            )
-        if action_mask.ne(action_mask[:1]).any():
-            raise ValueError(
-                "Batch inference expects all samples to share the same action token mask after left padding."
-            )
-        act_start = int(action_mask[0].to(torch.int64).argmax().item())
-        act_end = act_start + self.config.chunk_size
-        if not torch.all(action_mask[:, act_start:act_end]):
-            raise ValueError("Action tokens must form a contiguous chunk of length chunk_size.")
-        act_slice = slice(act_start, act_end)
-
-        # 2. Encode the fixed prefix once and cache its KV state.
-        batch_size = input_ids.shape[0]
-        device = inputs_embeds.device
-        attention_mask = attention_mask.to(device)
-        mm_token_type_ids = mm_token_type_ids.to(device)
-        position_ids, _ = self.vlm_backbone.model.get_rope_index(
-            input_ids,
-            image_grid_thw=image_grid_thw,
-            attention_mask=attention_mask,
-            mm_token_type_ids=mm_token_type_ids,
-        )
-        position_ids = position_ids.to(device)
-
-        outputs = self.vlm_backbone.model(
-            input_ids=input_ids[:, :act_start],
-            attention_mask=attention_mask[:, :act_start],
-            position_ids=position_ids[..., :act_start],
-            inputs_embeds=inputs_embeds[:, :act_start],
-            pixel_values=pixel_values,
-            image_grid_thw=image_grid_thw,
-            mm_token_type_ids=mm_token_type_ids[:, :act_start],
-            use_cache=True,
-            return_dict=True,
-        )
-
-        x_t = self.sample_noise(
-            (batch_size, chunk_size, self.config.max_action_dim),
-            device,
-        ).to(dtype=self.action_in_proj.weight.dtype)
-        dt = -1.0 / self.config.num_denoise_steps
-        past_key_values = outputs.past_key_values
-
-        # 3. Denoise only the action chunk while keeping the prefix cache invariant.
-        for step in range(self.config.num_denoise_steps):
-            time = torch.full(
-                (batch_size,),
-                1.0 + step * dt,
-                device=device,
-                dtype=torch.float32,
-            )
-            action_time_embs = self.embed_suffix(time, x_t)
-            inputs_embeds[:, act_slice] = action_time_embs.to(inputs_embeds.dtype)
-
-            # Keep the prefix KV cache invariant across denoising steps.
-            past_key_values.crop(act_start)
-            outputs = self.vlm_backbone.model(
-                attention_mask=attention_mask[:, :act_end],
-                past_key_values=past_key_values,
-                inputs_embeds=inputs_embeds[:, act_slice],
-                position_ids=position_ids[..., act_slice],
-                use_cache=True,
-                return_dict=True,
-            )
-            with self.flow_head_autocast_context():
-                hidden_states = outputs.last_hidden_state[:, :chunk_size]
-                hidden_states = hidden_states.to(dtype=self.action_out_proj.dtype)
-                v_t = self.action_out_proj(hidden_states)
-
-            x_t += dt * v_t.reshape(x_t.shape)
-
-        return x_t
@@ -1,282 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any
-
-import torch
-
-from lerobot.configs.types import FeatureType, PipelineFeatureType, PolicyFeature
-from lerobot.policies.eo1.configuration_eo1 import EO1Config
-from lerobot.processor import (
-    AddBatchDimensionProcessorStep,
-    ComplementaryDataProcessorStep,
-    DeviceProcessorStep,
-    NormalizerProcessorStep,
-    PolicyAction,
-    PolicyProcessorPipeline,
-    ProcessorStep,
-    ProcessorStepRegistry,
-    RenameObservationsProcessorStep,
-    UnnormalizerProcessorStep,
-)
-from lerobot.processor.converters import policy_action_to_transition, transition_to_policy_action
-from lerobot.types import TransitionKey
-from lerobot.utils.constants import (
-    OBS_STATE,
-    POLICY_POSTPROCESSOR_DEFAULT_NAME,
-    POLICY_PREPROCESSOR_DEFAULT_NAME,
-)
-from lerobot.utils.import_utils import _transformers_available, require_package
-
-if TYPE_CHECKING or _transformers_available:
-    from transformers.models.qwen2_5_vl import Qwen2_5_VLProcessor
-else:
-    Qwen2_5_VLProcessor = None
-
-SYSTEM_MESSAGE = "You are a helpful physical assistant."
-
-# EO-1 special tokens
-ACTION_START_TOKEN = "<|action_start|>"  # nosec B105
-DEFAULT_ACTION_TOKEN = "<|action_pad|>"  # nosec B105
-ACTION_END_TOKEN = "<|action_end|>"  # nosec B105
-STATE_START_TOKEN = "<|state_start|>"  # nosec B105
-DEFAULT_STATE_TOKEN = "<|state_pad|>"  # nosec B105
-STATE_END_TOKEN = "<|state_end|>"  # nosec B105
-TASK_VLA_TOKEN = "<|vla|>"  # nosec B105
-
-EO1_SPECIAL_TOKENS = [
-    ACTION_START_TOKEN,
-    DEFAULT_ACTION_TOKEN,
-    ACTION_END_TOKEN,
-    STATE_START_TOKEN,
-    DEFAULT_STATE_TOKEN,
-    STATE_END_TOKEN,
-    TASK_VLA_TOKEN,
-]
-
-
-@dataclass
-@ProcessorStepRegistry.register(name="eo1_conversation_template_processor")
-class EO1ConversationTemplateStep(ComplementaryDataProcessorStep):
-    input_features: dict[str, PolicyFeature] | dict[str, dict[str, Any]]
-    chunk_size: int
-
-    _image_keys: list[str] = field(default_factory=list, init=False, repr=False)
-
-    def __post_init__(self):
-        # Robust JSON deserialization handling (guard empty maps).
-        if self.input_features:
-            first_val = next(iter(self.input_features.values()))
-            if isinstance(first_val, dict):
-                reconstructed = {}
-                for key, ft_dict in self.input_features.items():
-                    reconstructed[key] = PolicyFeature(
-                        type=FeatureType(ft_dict["type"]), shape=tuple(ft_dict["shape"])
-                    )
-                self.input_features = reconstructed
-
-        self._image_keys = [
-            key for key, value in self.input_features.items() if value.type == FeatureType.VISUAL
-        ]
-
-    def complementary_data(self, complementary_data):
-        tasks = complementary_data.get("task")
-        if tasks is None:
-            raise ValueError("Task is required for EO1ConversationTemplateStep.")
-
-        observation = self.transition.get(TransitionKey.OBSERVATION)
-        if observation is None:
-            raise ValueError("Observation is required for EO1ConversationTemplateStep.")
-
-        if OBS_STATE in observation and observation[OBS_STATE].shape[0] != len(tasks):
-            raise ValueError("Batch size mismatch between observation.state and task list.")
-
-        # LeRobot visual observations reach in processor as float32 tensors in [0, 1].
-        # Convert to uint8 in [0, 255] to meet the input requirement of Qwen2.5-VL-3B-Instruct.
-        images = {
-            key: observation[key].clamp(0, 1).mul(255.0).round().to(torch.uint8) for key in self._image_keys
-        }
-        messages = []
-        for i in range(len(tasks)):
-            content = [
-                *[{"type": "image", "image": images[key][i]} for key in self._image_keys],
-                {
-                    "type": "text",
-                    "text": (
-                        f"{STATE_START_TOKEN}{DEFAULT_STATE_TOKEN}{STATE_END_TOKEN}{tasks[i]}{TASK_VLA_TOKEN}"
-                    ),
-                },
-            ]
-            messages.append(
-                [
-                    {"role": "system", "content": [{"type": "text", "text": SYSTEM_MESSAGE}]},
-                    {"role": "user", "content": content},
-                    {
-                        "role": "assistant",
-                        "content": [
-                            {
-                                "type": "text",
-                                "text": f"{ACTION_START_TOKEN}{DEFAULT_ACTION_TOKEN * self.chunk_size}{ACTION_END_TOKEN}",
-                            }
-                        ],
-                    },
-                ]
-            )
-
-        complementary_data["messages"] = messages
-
-        return complementary_data
-
-    def transform_features(
-        self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
-    ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
-        """
-        This step only materializes EO1-specific message objects in complementary_data.
-        PipelineFeatureType tracks only ACTION and OBSERVATION, so there is no static
-        feature contract change to record here.
-        """
-        return features
-
-    def get_config(self) -> dict[str, Any]:
-        return {
-            "input_features": {
-                key: {"type": ft.type.value, "shape": ft.shape} for key, ft in self.input_features.items()
-            },
-            "chunk_size": self.chunk_size,
-        }
-
-
-@dataclass
-@ProcessorStepRegistry.register(name="eo1_qwen_processor")
-class EO1QwenProcessorStep(ComplementaryDataProcessorStep):
-    processor_name: str = "Qwen/Qwen2.5-VL-3B-Instruct"
-    image_min_pixels: int | None = 64 * 28 * 28
-    image_max_pixels: int | None = 128 * 28 * 28
-    use_fast_processor: bool = False
-
-    _processor: Qwen2_5_VLProcessor | None = field(default=None, init=False, repr=False)
-    _state_token_id: int | None = field(default=None, init=False, repr=False)
-    _action_token_id: int | None = field(default=None, init=False, repr=False)
-
-    def __post_init__(self):
-        require_package("transformers", extra="eo1")
-        self._processor = Qwen2_5_VLProcessor.from_pretrained(
-            self.processor_name,
-            use_fast=self.use_fast_processor,
-        )
-        self._processor.tokenizer.add_tokens(EO1_SPECIAL_TOKENS, special_tokens=True)
-        self._state_token_id = self._processor.tokenizer.convert_tokens_to_ids(DEFAULT_STATE_TOKEN)
-        self._action_token_id = self._processor.tokenizer.convert_tokens_to_ids(DEFAULT_ACTION_TOKEN)
-
-    def complementary_data(self, complementary_data):
-        messages = complementary_data.pop("messages", None)
-        if messages is None:
-            raise ValueError("Messages are required for EO1QwenProcessorStep.")
-
-        # Rollout batches use left padding so action spans stay aligned across samples.
-        # Supervised batches use right padding to match standard training collation.
-        padding_side = "right" if self.transition.get(TransitionKey.ACTION) is not None else "left"
-
-        inputs = self._processor.apply_chat_template(
-            messages,
-            tokenize=True,
-            padding=True,
-            padding_side=padding_side,
-            min_pixels=self.image_min_pixels,
-            max_pixels=self.image_max_pixels,
-            add_generation_prompt=False,
-            return_dict=True,
-            return_tensors="pt",
-        )
-
-        complementary_data["input_ids"] = inputs["input_ids"]
-        complementary_data["pixel_values"] = inputs["pixel_values"]
-        complementary_data["image_grid_thw"] = inputs["image_grid_thw"]
-        complementary_data["attention_mask"] = inputs["attention_mask"]
-        complementary_data["mm_token_type_ids"] = inputs["mm_token_type_ids"]
-        complementary_data["state_token_id"] = self._state_token_id
-        complementary_data["action_token_id"] = self._action_token_id
-
-        return complementary_data
-
-    def get_config(self) -> dict[str, Any]:
-        return {
-            "processor_name": self.processor_name,
-            "image_min_pixels": self.image_min_pixels,
-            "image_max_pixels": self.image_max_pixels,
-            "use_fast_processor": self.use_fast_processor,
-        }
-
-    def transform_features(
-        self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
-    ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
-        """
-        This step only converts the messages to the model input format.
-        """
-        return features
-
-
-def make_eo1_pre_post_processors(
-    config: EO1Config,
-    dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None,
-) -> tuple[
-    PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
-    PolicyProcessorPipeline[PolicyAction, PolicyAction],
-]:
-    """Build pre/post processor pipelines for EO1."""
-
-    input_steps: list[ProcessorStep] = [
-        RenameObservationsProcessorStep(rename_map={}),
-        AddBatchDimensionProcessorStep(),
-        NormalizerProcessorStep(
-            features={**config.input_features, **config.output_features},
-            norm_map=config.normalization_mapping,
-            stats=dataset_stats,
-        ),
-        EO1ConversationTemplateStep(input_features=config.input_features, chunk_size=config.chunk_size),
-        EO1QwenProcessorStep(
-            processor_name=config.vlm_base,
-            image_min_pixels=config.image_min_pixels,
-            image_max_pixels=config.image_max_pixels,
-            use_fast_processor=config.use_fast_processor,
-        ),
-        DeviceProcessorStep(device=config.device),
-    ]
-
-    output_steps: list[ProcessorStep] = [
-        UnnormalizerProcessorStep(
-            features=config.output_features,
-            norm_map=config.normalization_mapping,
-            stats=dataset_stats,
-        ),
-        DeviceProcessorStep(device="cpu"),
-    ]
-
-    return (
-        PolicyProcessorPipeline[dict[str, Any], dict[str, Any]](
-            steps=input_steps,
-            name=POLICY_PREPROCESSOR_DEFAULT_NAME,
-        ),
-        PolicyProcessorPipeline[PolicyAction, PolicyAction](
-            steps=output_steps,
-            name=POLICY_POSTPROCESSOR_DEFAULT_NAME,
-            to_transition=policy_action_to_transition,
-            to_output=transition_to_policy_action,
-        ),
-    )
@@ -46,13 +46,14 @@ from lerobot.utils.feature_utils import dataset_to_policy_features

 from .act.configuration_act import ACTConfig
 from .diffusion.configuration_diffusion import DiffusionConfig
-from .eo1.configuration_eo1 import EO1Config
 from .gaussian_actor.configuration_gaussian_actor import GaussianActorConfig
+from .gaussian_actor.reward_model.configuration_classifier import RewardClassifierConfig
 from .groot.configuration_groot import GrootConfig
 from .multi_task_dit.configuration_multi_task_dit import MultiTaskDiTConfig
 from .pi0.configuration_pi0 import PI0Config
 from .pi05.configuration_pi05 import PI05Config
 from .pretrained import PreTrainedPolicy
+from .sarm.configuration_sarm import SARMConfig
 from .smolvla.configuration_smolvla import SmolVLAConfig
 from .tdmpc.configuration_tdmpc import TDMPCConfig
 from .utils import validate_visual_features_consistency
@@ -88,7 +89,7 @@ def get_policy_class(name: str) -> type[PreTrainedPolicy]:

    Args:
        name: The name of the policy. Supported names are "tdmpc", "diffusion", "act",
-            "multi_task_dit", "vqbet", "pi0", "pi05", "gaussian_actor", "smolvla", "wall_x".
+            "multi_task_dit", "vqbet", "pi0", "pi05", "gaussian_actor", "reward_classifier", "smolvla", "wall_x".
    Returns:
        The policy class corresponding to the given name.

@@ -131,10 +132,18 @@ def get_policy_class(name: str) -> type[PreTrainedPolicy]:
        from .gaussian_actor.modeling_gaussian_actor import GaussianActorPolicy

        return GaussianActorPolicy
+    elif name == "reward_classifier":
+        from .gaussian_actor.reward_model.modeling_classifier import Classifier
+
+        return Classifier
    elif name == "smolvla":
        from .smolvla.modeling_smolvla import SmolVLAPolicy

        return SmolVLAPolicy
+    elif name == "sarm":
+        from .sarm.modeling_sarm import SARMRewardModel
+
+        return SARMRewardModel
    elif name == "groot":
        from .groot.modeling_groot import GrootPolicy

@@ -147,10 +156,6 @@ def get_policy_class(name: str) -> type[PreTrainedPolicy]:
        from .wall_x.modeling_wall_x import WallXPolicy

        return WallXPolicy
-    elif name == "eo1":
-        from .eo1.modeling_eo1 import EO1Policy
-
-        return EO1Policy
    else:
        try:
            return _get_policy_cls_from_policy_name(name=name)
@@ -168,7 +173,7 @@ def make_policy_config(policy_type: str, **kwargs) -> PreTrainedConfig:
    Args:
        policy_type: The type of the policy. Supported types include "tdmpc",
                     "multi_task_dit", "diffusion", "act", "vqbet", "pi0", "pi05", "gaussian_actor",
-                     "smolvla", "wall_x".
+                     "smolvla", "reward_classifier", "wall_x".
        **kwargs: Keyword arguments to be passed to the configuration class constructor.

    Returns:
@@ -195,14 +200,14 @@ def make_policy_config(policy_type: str, **kwargs) -> PreTrainedConfig:
        return GaussianActorConfig(**kwargs)
    elif policy_type == "smolvla":
        return SmolVLAConfig(**kwargs)
+    elif policy_type == "reward_classifier":
+        return RewardClassifierConfig(**kwargs)
    elif policy_type == "groot":
        return GrootConfig(**kwargs)
    elif policy_type == "xvla":
        return XVLAConfig(**kwargs)
    elif policy_type == "wall_x":
        return WallXConfig(**kwargs)
-    elif policy_type == "eo1":
-        return EO1Config(**kwargs)
    else:
        try:
            config_cls = PreTrainedConfig.get_choice_class(policy_type)
@@ -373,6 +378,14 @@ def make_pre_post_processors(
            dataset_stats=kwargs.get("dataset_stats"),
        )

+    elif isinstance(policy_cfg, RewardClassifierConfig):
+        from .gaussian_actor.reward_model.processor_classifier import make_classifier_processor
+
+        processors = make_classifier_processor(
+            config=policy_cfg,
+            dataset_stats=kwargs.get("dataset_stats"),
+        )
+
    elif isinstance(policy_cfg, SmolVLAConfig):
        from .smolvla.processor_smolvla import make_smolvla_pre_post_processors

@@ -381,6 +394,14 @@ def make_pre_post_processors(
            dataset_stats=kwargs.get("dataset_stats"),
        )

+    elif isinstance(policy_cfg, SARMConfig):
+        from .sarm.processor_sarm import make_sarm_pre_post_processors
+
+        processors = make_sarm_pre_post_processors(
+            config=policy_cfg,
+            dataset_stats=kwargs.get("dataset_stats"),
+            dataset_meta=kwargs.get("dataset_meta"),
+        )
    elif isinstance(policy_cfg, GrootConfig):
        from .groot.processor_groot import make_groot_pre_post_processors

@@ -406,13 +427,6 @@ def make_pre_post_processors(
            config=policy_cfg,
            dataset_stats=kwargs.get("dataset_stats"),
        )
-    elif isinstance(policy_cfg, EO1Config):
-        from .eo1.processor_eo1 import make_eo1_pre_post_processors
-
-        processors = make_eo1_pre_post_processors(
-            config=policy_cfg,
-            dataset_stats=kwargs.get("dataset_stats"),
-        )

    else:
        try:
@@ -528,7 +542,7 @@ def make_policy(

        logging.info("Loading policy's PEFT adapter.")

-        peft_pretrained_path = str(cfg.pretrained_path)
+        peft_pretrained_path = cfg.pretrained_path
        peft_config = PeftConfig.from_pretrained(peft_pretrained_path)

        kwargs["pretrained_name_or_path"] = peft_config.base_model_name_or_path
@@ -541,9 +555,7 @@ def make_policy(
            )

        policy = policy_cls.from_pretrained(**kwargs)
-        policy = PeftModel.from_pretrained(
-            policy, peft_pretrained_path, config=peft_config, is_trainable=True
-        )
+        policy = PeftModel.from_pretrained(policy, peft_pretrained_path, config=peft_config)

    else:
        # Make a fresh policy.
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+# !/usr/bin/env python

 # Copyright 2025 The HuggingFace Inc. team.
 # All rights reserved.
@@ -143,48 +143,34 @@ class GaussianActorConfig(PreTrainedConfig):
    latent_dim: int = 256

    # Online training (TODO(Khalil): relocate to TrainRLServerPipelineConfig)
-    # Number of steps for online training
    online_steps: int = 1000000
-    # Capacity of the online replay buffer
    online_buffer_capacity: int = 100000
-    # Capacity of the offline replay buffer
    offline_buffer_capacity: int = 100000
-    # Whether to use asynchronous prefetching for the buffers
    async_prefetch: bool = False
-    # Number of steps before learning starts
    online_step_before_learning: int = 100

    # Actor-learner transport (TODO(Khalil): relocate to TrainRLServerPipelineConfig).
-    # Configuration for actor-learner architecture
    actor_learner_config: ActorLearnerConfig = field(default_factory=ActorLearnerConfig)
-    # Configuration for concurrency settings (you can use threads or processes for the actor and learner)
    concurrency: ConcurrencyConfig = field(default_factory=ConcurrencyConfig)

    # Network architecture
-    # Configuration for the actor network architecture
+    # Actor network
    actor_network_kwargs: ActorNetworkConfig = field(default_factory=ActorNetworkConfig)
-    # Configuration for the policy parameters (Gaussian head)
+    # Gaussian head parameters
    policy_kwargs: PolicyConfig = field(default_factory=PolicyConfig)
-    # Configuration for the discrete critic network
+    # Discrete critic
    discrete_critic_network_kwargs: CriticNetworkConfig = field(default_factory=CriticNetworkConfig)

    def __post_init__(self):
        super().__post_init__()
-        # Any validation specific to GaussianActor configuration

    def get_optimizer_preset(self) -> MultiAdamConfig:
-        # Default learning rate used to satisfy the abstract ``get_optimizer_preset()``
-        # contract from ``PreTrainedConfig``. The actual optimizers used during RL
-        # training are built by ``SACAlgorithm.make_optimizers_and_scheduler()`` from
-        # ``SACAlgorithmConfig.{actor_lr,critic_lr,temperature_lr}`` and fully bypass
-        # this preset.
-        default_lr = 3e-4
        return MultiAdamConfig(
            weight_decay=0.0,
            optimizer_groups={
-                "actor": {"lr": default_lr},
-                "critic": {"lr": default_lr},
-                "temperature": {"lr": default_lr},
+                "actor": {"lr": 3e-4},
+                "critic": {"lr": 3e-4},
+                "temperature": {"lr": 3e-4},
            },
        )

@@ -17,6 +17,7 @@

 from collections.abc import Callable
 from dataclasses import asdict
+from typing import Any

 import torch
 import torch.nn as nn
@@ -111,6 +112,18 @@ class GaussianActorPolicy(
        actions, log_probs, means = self.actor(observations, observation_features)
        return {"action": actions, "log_prob": log_probs, "action_mean": means}

+    def load_actor_weights(self, state_dicts: dict[str, Any], device: str | torch.device = "cpu") -> None:
+        from lerobot.utils.transition import move_state_dict_to_device
+
+        actor_state_dict = move_state_dict_to_device(state_dicts["policy"], device=device)
+        self.actor.load_state_dict(actor_state_dict)
+
+        if "discrete_critic" in state_dicts and self.discrete_critic is not None:
+            discrete_critic_state_dict = move_state_dict_to_device(
+                state_dicts["discrete_critic"], device=device
+            )
+            self.discrete_critic.load_state_dict(discrete_critic_state_dict)
+
    def _init_encoders(self):
        """Initialize shared or separate encoders for actor and critic."""
        self.shared_encoder = self.config.shared_encoder
@@ -1,3 +1,5 @@
+# !/usr/bin/env python
+
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,15 +15,14 @@
 # limitations under the License.
 from dataclasses import dataclass, field

-from lerobot.configs import NormalizationMode
-from lerobot.configs.rewards import RewardModelConfig
+from lerobot.configs import NormalizationMode, PreTrainedConfig
 from lerobot.optim import AdamWConfig, LRSchedulerConfig, OptimizerConfig
 from lerobot.utils.constants import OBS_IMAGE


-@RewardModelConfig.register_subclass(name="reward_classifier")
+@PreTrainedConfig.register_subclass(name="reward_classifier")
@dataclass
-class RewardClassifierConfig(RewardModelConfig):
+class RewardClassifierConfig(PreTrainedConfig):
    """Configuration for the Reward Classifier model."""

    name: str = "reward_classifier"
@@ -1,3 +1,5 @@
+# !/usr/bin/env python
+
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,10 +19,11 @@ import logging
 import torch
 from torch import Tensor, nn

-from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig
-from lerobot.rewards.pretrained import PreTrainedRewardModel
 from lerobot.utils.constants import OBS_IMAGE, REWARD

+from ...pretrained import PreTrainedPolicy
+from .configuration_classifier import RewardClassifierConfig
+

 class ClassifierOutput:
    """Wrapper for classifier outputs with additional metadata."""
@@ -96,7 +99,7 @@ class SpatialLearnedEmbeddings(nn.Module):
        return output


-class Classifier(PreTrainedRewardModel):
+class Classifier(PreTrainedPolicy):
    """Image classifier built on top of a pre-trained encoder."""

    name = "reward_classifier"
@@ -233,16 +236,6 @@ class Classifier(PreTrainedRewardModel):

        return ClassifierOutput(logits=logits, probabilities=probabilities, hidden_states=encoder_outputs)

-    def compute_reward(self, batch: dict[str, Tensor]) -> Tensor:
-        """Returns 1.0 for success, 0.0 for failure based on image observations."""
-        images = [batch[key] for key in self.config.input_features if key.startswith(OBS_IMAGE)]
-        output = self.predict(images)
-
-        if self.config.num_classes == 2:
-            return (output.probabilities > 0.5).float()
-        else:
-            return torch.argmax(output.probabilities, dim=1).float()
-
    def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict[str, Tensor]]:
        """Standard forward pass for training compatible with train.py."""
        # Extract images and labels
@@ -286,3 +279,28 @@ class Classifier(PreTrainedRewardModel):
            return (probs > threshold).float()
        else:
            return torch.argmax(self.predict(images).probabilities, dim=1)
+
+    def get_optim_params(self):
+        """Return optimizer parameters for the policy."""
+        return self.parameters()
+
+    def select_action(self, batch: dict[str, Tensor]) -> Tensor:
+        """
+        This method is required by PreTrainedPolicy but not used for reward classifiers.
+        The reward classifier is not an actor and does not select actions.
+        """
+        raise NotImplementedError("Reward classifiers do not select actions")
+
+    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+        """
+        This method is required by PreTrainedPolicy but not used for reward classifiers.
+        The reward classifier is not an actor and does not produce action chunks.
+        """
+        raise NotImplementedError("Reward classifiers do not predict action chunks")
+
+    def reset(self):
+        """
+        This method is required by PreTrainedPolicy but not used for reward classifiers.
+        The reward classifier is not an actor and does not select actions.
+        """
+        pass
@@ -1,3 +1,5 @@
+# !/usr/bin/env python
+
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -25,7 +27,8 @@ from lerobot.processor import (
    policy_action_to_transition,
    transition_to_policy_action,
 )
-from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig
+
+from .configuration_classifier import RewardClassifierConfig


 def make_classifier_processor(
@@ -49,6 +52,8 @@ def make_classifier_processor(
    Args:
        config: The configuration object for the RewardClassifier.
        dataset_stats: A dictionary of statistics for normalization.
+        preprocessor_kwargs: Additional arguments for the pre-processor pipeline.
+        postprocessor_kwargs: Additional arguments for the post-processor pipeline.

    Returns:
        A tuple containing the configured pre-processor and post-processor pipelines.
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from dataclasses import field
+from dataclasses import dataclass, field
 from typing import TYPE_CHECKING

 import torch
@@ -109,6 +109,7 @@ class MultiEmbodimentActionEncoder(nn.Module):
        return x


+@dataclass
 class FlowmatchingActionHeadConfig(PretrainedConfig):
    """NOTE: N1.5 uses XEmbFlowmatchingPolicyHeadConfig as action head"""

@@ -444,13 +444,13 @@ class PaliGemmaWithExpertModel(
        if image.dtype != torch.float32:
            image = image.to(torch.float32)
        image_outputs = self.paligemma.model.get_image_features(image)
-        features = image_outputs.pooler_output
+        features = image_outputs.pooler_output * self.paligemma.config.text_config.hidden_size**0.5
        if features.dtype != out_dtype:
            features = features.to(out_dtype)
        return features

    def embed_language_tokens(self, tokens: torch.Tensor):
-        return self.paligemma.model.language_model.get_input_embeddings()(tokens)
+        return self.paligemma.model.language_model.embed_tokens(tokens)

    def forward(
        self,
@@ -666,7 +666,8 @@ class PI0Pytorch(nn.Module):  # see openpi `PI0Pytorch`
        # Process language tokens
        def lang_embed_func(lang_tokens):
            lang_emb = self.paligemma_with_expert.embed_language_tokens(lang_tokens)
-            return lang_emb
+            lang_emb_dim = lang_emb.shape[-1]
+            return lang_emb * math.sqrt(lang_emb_dim)

        lang_emb = self._apply_checkpoint(lang_embed_func, lang_tokens)
        embs.append(lang_emb)
@@ -747,8 +748,16 @@ class PI0Pytorch(nn.Module):  # see openpi `PI0Pytorch`

        return embs, pad_masks, att_masks, adarms_cond

-    def forward(self, images, img_masks, lang_tokens, lang_masks, state, actions, noise, time) -> Tensor:
+    def forward(
+        self, images, img_masks, lang_tokens, lang_masks, state, actions, noise=None, time=None
+    ) -> Tensor:
        """Do a full training forward pass and compute the loss."""
+        if noise is None:
+            noise = self.sample_noise(actions.shape, actions.device)
+
+        if time is None:
+            time = self.sample_time(actions.shape[0], actions.device)
+
        time_expanded = time[:, None, None]
        x_t = time_expanded * noise + (1 - time_expanded) * actions
        u_t = noise - actions
@@ -1283,11 +1292,8 @@ class PI0Policy(PreTrainedPolicy):
        state = self.prepare_state(batch)
        actions = self.prepare_action(batch)

-        noise = self.model.sample_noise(actions.shape, actions.device)
-        time = self.model.sample_time(actions.shape[0], actions.device)
-
        # Compute loss
-        losses = self.model.forward(images, img_masks, lang_tokens, lang_masks, state, actions, noise, time)
+        losses = self.model.forward(images, img_masks, lang_tokens, lang_masks, state, actions)

        # Truncate losses to actual action dimensions
        original_action_dim = self.config.output_features[ACTION].shape[0]
@@ -728,8 +728,14 @@ class PI05Pytorch(nn.Module):  # see openpi `PI0Pytorch`

        return embs, pad_masks, att_masks, adarms_cond

-    def forward(self, images, img_masks, tokens, masks, actions, noise, time) -> Tensor:
+    def forward(self, images, img_masks, tokens, masks, actions, noise=None, time=None) -> Tensor:
        """Do a full training forward pass and compute the loss."""
+        if noise is None:
+            noise = self.sample_noise(actions.shape, actions.device)
+
+        if time is None:
+            time = self.sample_time(actions.shape[0], actions.device)
+
        time_expanded = time[:, None, None]
        x_t = time_expanded * noise + (1 - time_expanded) * actions
        u_t = noise - actions
@@ -1256,11 +1262,8 @@ class PI05Policy(PreTrainedPolicy):

        actions = self.prepare_action(batch)

-        noise = self.model.sample_noise(actions.shape, actions.device)
-        time = self.model.sample_time(actions.shape[0], actions.device)
-
        # Compute loss (no separate state needed for PI05)
-        losses = self.model.forward(images, img_masks, tokens, masks, actions, noise, time)
+        losses = self.model.forward(images, img_masks, tokens, masks, actions)

        # Truncate losses to actual action dimensions
        original_action_dim = self.config.output_features[ACTION].shape[0]
@@ -16,6 +16,7 @@

 import builtins
 import logging
+import math
 from collections import deque
 from pathlib import Path
 from typing import TYPE_CHECKING, Literal, TypedDict, Unpack
@@ -260,15 +261,13 @@ class PI0FastPaliGemma(nn.Module):
        if image.dtype != torch.float32:
            image = image.to(torch.float32)
        image_outputs = self.paligemma.model.get_image_features(image)
-        features = image_outputs.pooler_output
-        norm = 2048**0.5
-        features = features / norm * norm
+        features = image_outputs.pooler_output * self.paligemma.config.text_config.hidden_size**0.5
        if features.dtype != out_dtype:
            features = features.to(out_dtype)
        return features

    def embed_language_tokens(self, tokens: torch.Tensor):
-        return self.paligemma.model.language_model.get_input_embeddings()(tokens)
+        return self.paligemma.model.language_model.embed_tokens(tokens)

    def forward(
        self,
@@ -418,7 +417,8 @@ class PI0FastPytorch(nn.Module):  # see openpi `PI0Pytorch`
        # Process language instruction tokens
        def lang_embed_func(tokens):
            lang_emb = self.paligemma_with_expert.embed_language_tokens(tokens)
-            return lang_emb
+            lang_emb_dim = lang_emb.shape[-1]
+            return lang_emb * math.sqrt(lang_emb_dim)

        lang_emb = self._apply_checkpoint(lang_embed_func, tokens)
        embs.append(lang_emb)
@@ -432,7 +432,8 @@ class PI0FastPytorch(nn.Module):  # see openpi `PI0Pytorch`

            def fast_action_embed_func(fast_action_tokens):
                fast_emb = self.paligemma_with_expert.embed_language_tokens(fast_action_tokens)
-                return fast_emb
+                fast_emb_dim = fast_emb.shape[-1]
+                return fast_emb * math.sqrt(fast_emb_dim)

            fast_action_emb = self._apply_checkpoint(fast_action_embed_func, fast_action_tokens)
            embs.append(fast_action_emb)
@@ -665,6 +666,7 @@ class PI0FastPytorch(nn.Module):  # see openpi `PI0Pytorch`
            if t < max_decoding_steps - 1:
                # embed the newly generated token
                next_token_emb = self.paligemma_with_expert.embed_language_tokens(next_token)
+                next_token_emb = next_token_emb * math.sqrt(next_token_emb.shape[-1])
                if prefix_embs.dtype == torch.bfloat16:
                    next_token_emb = next_token_emb.to(dtype=torch.bfloat16)

@@ -769,6 +771,7 @@ class PI0FastPytorch(nn.Module):  # see openpi `PI0Pytorch`
            # Embed the single previous token
            # We use embed_language_tokens directly to avoid overhead of full prefix embedding
            next_token_emb = self.paligemma_with_expert.embed_language_tokens(next_token)
+            next_token_emb = next_token_emb * math.sqrt(next_token_emb.shape[-1])
            if prefix_embs.dtype == torch.bfloat16:
                next_token_emb = next_token_emb.to(dtype=torch.bfloat16)

@@ -0,0 +1 @@
+../../../../docs/source/policy_sarm_README.md
@@ -1,4 +1,4 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,6 +14,5 @@

 from .configuration_sarm import SARMConfig
 from .modeling_sarm import SARMRewardModel
-from .processor_sarm import make_sarm_pre_post_processors

-__all__ = ["SARMConfig", "SARMRewardModel", "make_sarm_pre_post_processors"]
+__all__ = ["SARMConfig", "SARMRewardModel"]
@@ -25,18 +25,18 @@ need ~num_frames/30 queries instead of one per frame (~30x speedup).

 Usage:
    # Full RA-BC computation with visualizations
-    python src/lerobot/rewards/sarm/compute_rabc_weights.py \\
+    python src/lerobot/policies/sarm/compute_rabc_weights.py \\
        --dataset-repo-id lerobot/aloha_sim_insertion_human \\
        --reward-model-path <USER>/sarm_single_uni4

    # Faster computation with stride (compute every 5 frames, interpolate the rest)
-    python src/lerobot/rewards/sarm/compute_rabc_weights.py \\
+    python src/lerobot/policies/sarm/compute_rabc_weights.py \\
        --dataset-repo-id lerobot/aloha_sim_insertion_human \\
        --reward-model-path <USER>/sarm_single_uni4 \\
        --stride 5

    # Visualize predictions only (no RA-BC computation)
-    python src/lerobot/rewards/sarm/compute_rabc_weights.py \\
+    python src/lerobot/policies/sarm/compute_rabc_weights.py \\
        --dataset-repo-id lerobot/aloha_sim_insertion_human \\
        --reward-model-path <USER>/sarm_single_uni4 \\
        --visualize-only \\
@@ -58,9 +58,10 @@ import torch
 from tqdm import tqdm

 from lerobot.datasets import LeRobotDataset
-from lerobot.rewards.sarm.modeling_sarm import SARMRewardModel
-from lerobot.rewards.sarm.processor_sarm import make_sarm_pre_post_processors
-from lerobot.rewards.sarm.sarm_utils import normalize_stage_tau
+
+from .modeling_sarm import SARMRewardModel
+from .processor_sarm import make_sarm_pre_post_processors
+from .sarm_utils import normalize_stage_tau


 def get_reward_model_path_from_parquet(parquet_path: Path) -> str | None:
@@ -712,12 +713,12 @@ def main():
        epilog="""
 Examples:
    # Full RA-BC computation with visualizations
-    python src/lerobot/rewards/sarm/compute_rabc_weights.py \\
+    python src/lerobot/policies/sarm/compute_rabc_weights.py \\
        --dataset-repo-id lerobot/aloha_sim_insertion_human \\
        --reward-model-path <USER>/sarm_single_uni4

    # Visualize predictions only (no RA-BC computation)
-    python src/lerobot/rewards/sarm/compute_rabc_weights.py \\
+    python src/lerobot/policies/sarm/compute_rabc_weights.py \\
        --dataset-repo-id lerobot/aloha_sim_insertion_human \\
        --reward-model-path <USER>/sarm_single_uni4 \\
        --visualize-only \\
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 # Copyright 2025 Qianzhong Chen, Justin Yu, Mac Schwager, Pieter Abbeel, Yide Shentu, Philipp Wu
 # and The HuggingFace Inc. team. All rights reserved.
 #
@@ -20,15 +22,14 @@ Paper: https://arxiv.org/abs/2509.25358

 from dataclasses import dataclass, field

-from lerobot.configs import FeatureType, NormalizationMode, PolicyFeature
-from lerobot.configs.rewards import RewardModelConfig
+from lerobot.configs import FeatureType, NormalizationMode, PolicyFeature, PreTrainedConfig
 from lerobot.optim import AdamWConfig, CosineDecayWithWarmupSchedulerConfig
 from lerobot.utils.constants import OBS_IMAGES, OBS_STATE


-@RewardModelConfig.register_subclass("sarm")
+@PreTrainedConfig.register_subclass("sarm")
@dataclass
-class SARMConfig(RewardModelConfig):
+class SARMConfig(PreTrainedConfig):
    """Configuration class for SARM (Stage-Aware Reward Modeling).

    Supports three annotation modes:
@@ -109,6 +110,7 @@ class SARMConfig(RewardModelConfig):

    def __post_init__(self):
        super().__post_init__()
+
        if self.annotation_mode not in ["single_stage", "dense_only", "dual"]:
            raise ValueError(
                f"annotation_mode must be 'single_stage', 'dense_only', or 'dual', got {self.annotation_mode}"
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 # Copyright 2025 Qianzhong Chen, Justin Yu, Mac Schwager, Pieter Abbeel, Yide Shentu, Philipp Wu
 # and The HuggingFace Inc. team. All rights reserved.
 #
@@ -32,13 +34,14 @@ import torch.nn as nn
 import torch.nn.functional as F  # noqa: N812
 from torch import Tensor

-from lerobot.rewards.pretrained import PreTrainedRewardModel
-from lerobot.rewards.sarm.configuration_sarm import SARMConfig
-from lerobot.rewards.sarm.sarm_utils import (
+from lerobot.utils.constants import OBS_STR
+
+from ..pretrained import PreTrainedPolicy
+from .configuration_sarm import SARMConfig
+from .sarm_utils import (
    normalize_stage_tau,
    pad_state_to_max_dim,
 )
-from lerobot.utils.constants import OBS_STR


 class StageTransformer(nn.Module):
@@ -350,7 +353,7 @@ def gen_stage_emb(num_classes: int, targets: torch.Tensor) -> torch.Tensor:
    return stage_onehot


-class SARMRewardModel(PreTrainedRewardModel):
+class SARMRewardModel(PreTrainedPolicy):
    """
    SARM Reward Model for stage-aware task completion rewards.

@@ -468,23 +471,6 @@ class SARMRewardModel(PreTrainedRewardModel):
        self.subtask_model.to(device)
        return self

-    def compute_reward(self, batch: dict[str, Tensor]) -> Tensor:
-        """Compute dense progress reward in [0, 1] from batch.
-
-        Expects batch to contain:
-        - "observation_features" or video embeddings: (B, T, 512)
-        - "language_embedding" or text embeddings: (B, 512)
-        - optionally "observation.state": (B, T, state_dim)
-        """
-        text_emb = batch.get("language_embedding", batch.get("text_features"))
-        video_emb = batch.get("observation_features", batch.get("video_features"))
-        state = batch.get("observation.state", batch.get("state_features"))
-
-        rewards = self.calculate_rewards(text_emb, video_emb, state)
-        if isinstance(rewards, np.ndarray):
-            rewards = torch.from_numpy(rewards).float()
-        return rewards
-
    @torch.no_grad()
    def calculate_rewards(
        self,
@@ -645,9 +631,17 @@ class SARMRewardModel(PreTrainedRewardModel):
        return self.parameters()

    def reset(self):
-        """SARM has no episode-level state to reset."""
+        """Required by PreTrainedPolicy but not used for reward models."""
        pass

+    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+        """Required by PreTrainedPolicy but not used for reward models."""
+        raise NotImplementedError("SARM model does not predict action chunks")
+
+    def select_action(self, batch: dict[str, Tensor]) -> Tensor:
+        """Required by PreTrainedPolicy but not used for SARM."""
+        raise NotImplementedError("SARM model does not select actions")
+
    def _train_step(
        self,
        img_emb: torch.Tensor,  # (B, N, T, D)
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -58,15 +60,16 @@ from lerobot.processor import (
    policy_action_to_transition,
    transition_to_policy_action,
 )
-from lerobot.rewards.sarm.configuration_sarm import SARMConfig
-from lerobot.rewards.sarm.sarm_utils import (
+from lerobot.types import EnvTransition, PolicyAction, TransitionKey
+from lerobot.utils.constants import POLICY_POSTPROCESSOR_DEFAULT_NAME, POLICY_PREPROCESSOR_DEFAULT_NAME
+
+from .configuration_sarm import SARMConfig
+from .sarm_utils import (
    apply_rewind_augmentation,
    compute_absolute_indices,
    find_stage_and_tau,
    pad_state_to_max_dim,
 )
-from lerobot.types import EnvTransition, PolicyAction, TransitionKey
-from lerobot.utils.constants import POLICY_POSTPROCESSOR_DEFAULT_NAME, POLICY_PREPROCESSOR_DEFAULT_NAME


 class SARMEncodingProcessorStep(ProcessorStep):
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -97,8 +97,8 @@ class VQBeTConfig(PreTrainedConfig):
    vision_backbone: str = "resnet18"
    crop_shape: tuple[int, int] | None = (84, 84)
    crop_is_random: bool = True
-    pretrained_backbone_weights: str | None = "ResNet18_Weights.IMAGENET1K_V1"
-    use_group_norm: bool = False
+    pretrained_backbone_weights: str | None = None
+    use_group_norm: bool = True
    spatial_softmax_num_keypoints: int = 32
    # VQ-VAE
    n_vqvae_training_steps: int = 20000
@@ -22,7 +22,7 @@ from transformers.utils import (
    add_start_docstrings,
    add_start_docstrings_to_model_forward,
    is_flash_attn_2_available,
-    is_flash_attn_greater_or_equal,
+    is_flash_attn_greater_or_equal_2_10,
    is_torchdynamo_compiling,
    logging,
    replace_return_docstrings,
@@ -890,7 +890,7 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
        # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
        # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignment, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
        # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
-        self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal("2.1.0")
+        self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10()

    def forward(
        self,
@@ -939,7 +939,7 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
        input_dtype = query_states.dtype
        if input_dtype == torch.float32:
            if torch.is_autocast_enabled():
-                target_dtype = torch.get_autocast_dtype(query_states.device.type)
+                target_dtype = torch.get_autocast_gpu_dtype()
            # Handle the case where the model is quantized
            elif hasattr(self.config, "_pre_quantization_dtype"):
                target_dtype = self.config._pre_quantization_dtype
@@ -45,7 +45,7 @@ from transformers.utils import (
    add_start_docstrings,
    add_start_docstrings_to_model_forward,
    is_flash_attn_2_available,
-    is_flash_attn_greater_or_equal,
+    is_flash_attn_greater_or_equal_2_10,
    logging,
    replace_return_docstrings,
 )
@@ -909,7 +909,7 @@ class Florence2FlashAttention2(Florence2Attention):
        # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
        # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignment, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
        # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
-        self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal("2.1.0")
+        self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10()

    def _reshape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
        return tensor.view(bsz, seq_len, self.num_heads, self.head_dim)
@@ -985,7 +985,7 @@ class Florence2FlashAttention2(Florence2Attention):
        input_dtype = query_states.dtype
        if input_dtype == torch.float32:
            if torch.is_autocast_enabled():
-                target_dtype = torch.get_autocast_dtype(query_states.device.type)
+                target_dtype = torch.get_autocast_gpu_dtype()
            # Handle the case where the model is quantized
            elif hasattr(self.config, "_pre_quantization_dtype"):
                target_dtype = self.config._pre_quantization_dtype
@@ -61,6 +61,7 @@ from .hil_processor import (
    RewardClassifierProcessorStep,
    TimeLimitProcessorStep,
 )
+from .leader_follower_processor import LeaderFollowerProcessor
 from .newline_task_processor import NewLineTaskProcessorStep
 from .normalize_processor import NormalizerProcessorStep, UnnormalizerProcessorStep, hotswap_stats
 from .observation_processor import VanillaObservationProcessorStep
@@ -122,6 +123,7 @@ __all__ = [
    "ImageCropResizeProcessorStep",
    "InfoProcessorStep",
    "InterventionActionProcessorStep",
+    "LeaderFollowerProcessor",
    "make_default_processors",
    "make_default_teleop_action_processor",
    "make_default_robot_action_processor",
@@ -38,6 +38,7 @@ class MapTensorToDeltaActionDictStep(ActionProcessorStep):
    """

    use_gripper: bool = True
+    use_rotation: bool = False

    def action(self, action: PolicyAction) -> RobotAction:
        if not isinstance(action, PolicyAction):
@@ -52,7 +53,13 @@ class MapTensorToDeltaActionDictStep(ActionProcessorStep):
            "delta_y": action[1].item(),
            "delta_z": action[2].item(),
        }
-        if self.use_gripper:
+        if self.use_rotation:
+            delta_action["delta_wx"] = action[3].item()
+            delta_action["delta_wy"] = action[4].item()
+            delta_action["delta_wz"] = action[5].item()
+            if self.use_gripper:
+                delta_action["gripper"] = action[6].item()
+        elif self.use_gripper:
            delta_action["gripper"] = action[3].item()
        return delta_action

@@ -64,6 +71,12 @@ class MapTensorToDeltaActionDictStep(ActionProcessorStep):
                type=FeatureType.ACTION, shape=(1,)
            )

+        if self.use_rotation:
+            for axis in ["wx", "wy", "wz"]:
+                features[PipelineFeatureType.ACTION][f"delta_{axis}"] = PolicyFeature(
+                    type=FeatureType.ACTION, shape=(1,)
+                )
+
        if self.use_gripper:
            features[PipelineFeatureType.ACTION]["gripper"] = PolicyFeature(
                type=FeatureType.ACTION, shape=(1,)
@@ -90,6 +103,8 @@ class MapDeltaActionToRobotActionStep(RobotActionProcessorStep):
    # Scale factors for delta movements
    position_scale: float = 1.0
    noise_threshold: float = 1e-3  # 1 mm threshold to filter out noise
+    use_rotation: bool = False
+    rotation_scale: float = 1.0

    def action(self, action: RobotAction) -> RobotAction:
        # NOTE (maractingi): Action can be a dict from the teleop_devices or a tensor from the policy
@@ -97,23 +112,34 @@ class MapDeltaActionToRobotActionStep(RobotActionProcessorStep):
        delta_x = action.pop("delta_x")
        delta_y = action.pop("delta_y")
        delta_z = action.pop("delta_z")
+        if self.use_rotation:
+            delta_wx = action.pop("delta_wx")
+            delta_wy = action.pop("delta_wy")
+            delta_wz = action.pop("delta_wz")
+        else:
+            delta_wx = 0.0
+            delta_wy = 0.0
+            delta_wz = 0.0
        gripper = action.pop("gripper")

        # Determine if the teleoperator is actively providing input
        # Consider enabled if any significant movement delta is detected
        position_magnitude = (delta_x**2 + delta_y**2 + delta_z**2) ** 0.5  # Use Euclidean norm for position
-        enabled = position_magnitude > self.noise_threshold  # Small threshold to avoid noise
+        rotation_magnitude = (
+            delta_wx**2 + delta_wy**2 + delta_wz**2
+        ) ** 0.5  # TODO use proper magnitud for rotation
+        enabled = (
+            position_magnitude > self.noise_threshold or rotation_magnitude > self.noise_threshold
+        )  # Small threshold to avoid noise

        # Scale the deltas appropriately
        scaled_delta_x = delta_x * self.position_scale
        scaled_delta_y = delta_y * self.position_scale
        scaled_delta_z = delta_z * self.position_scale

-        # For gamepad/keyboard, we don't have rotation input, so set to 0
-        # These could be extended in the future for more sophisticated teleoperators
-        target_wx = 0.0
-        target_wy = 0.0
-        target_wz = 0.0
+        target_wx = delta_wx * self.rotation_scale
+        target_wy = delta_wy * self.rotation_scale
+        target_wz = delta_wz * self.rotation_scale

        # Update action with robot target format
        action = {
@@ -132,9 +158,15 @@ class MapDeltaActionToRobotActionStep(RobotActionProcessorStep):
    def transform_features(
        self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
    ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
-        for axis in ["x", "y", "z", "gripper"]:
+        for axis in ["x", "y", "z"]:
            features[PipelineFeatureType.ACTION].pop(f"delta_{axis}", None)

+        if self.use_rotation:
+            for axis in ["wx", "wy", "wz"]:
+                features[PipelineFeatureType.ACTION].pop(f"delta_{axis}", None)
+
+        features[PipelineFeatureType.ACTION].pop("delta_gripper", None)
+
        for feat in ["enabled", "target_x", "target_y", "target_z", "target_wx", "target_wy", "target_wz"]:
            features[PipelineFeatureType.ACTION][f"{feat}"] = PolicyFeature(
                type=FeatureType.ACTION, shape=(1,)
@@ -4,6 +4,7 @@
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
+# You may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
@@ -388,15 +389,11 @@ class GripperPenaltyProcessorStep(ProcessorStep):
        if raw_joint_positions is None:
            return new_transition

-        current_gripper_pos = raw_joint_positions.get(f"{GRIPPER_KEY}.pos", None)
+        current_gripper_pos = raw_joint_positions.get(GRIPPER_KEY, None)
        if current_gripper_pos is None:
            return new_transition

-        # During reset, the transition may not carry any action yet.
-        if action is None:
-            return new_transition
-
-        # Gripper action is expected as the last action dimension.
+        # Gripper action is a PolicyAction at this stage
        gripper_action = action[-1].item()
        gripper_action_normalized = gripper_action / self.max_gripper_pos

@@ -464,6 +461,7 @@ class InterventionActionProcessorStep(ProcessorStep):

    use_gripper: bool = False
    terminate_on_success: bool = True
+    use_rotation: bool = False

    def __call__(self, transition: EnvTransition) -> EnvTransition:
        """
@@ -500,6 +498,14 @@ class InterventionActionProcessorStep(ProcessorStep):
                    teleop_action.get("delta_y", 0.0),
                    teleop_action.get("delta_z", 0.0),
                ]
+                if self.use_rotation:
+                    action_list.extend(
+                        [
+                            teleop_action.get("delta_wx", 0.0),
+                            teleop_action.get("delta_wy", 0.0),
+                            teleop_action.get("delta_wz", 0.0),
+                        ]
+                    )
                if self.use_gripper:
                    action_list.append(teleop_action.get(GRIPPER_KEY, 1.0))
            elif isinstance(teleop_action, np.ndarray):
@@ -577,7 +583,7 @@ class RewardClassifierProcessorStep(ProcessorStep):
    def __post_init__(self):
        """Initializes the reward classifier model after the dataclass is created."""
        if self.pretrained_path is not None:
-            from lerobot.rewards.classifier.modeling_classifier import Classifier
+            from lerobot.policies.gaussian_actor.reward_model.modeling_classifier import Classifier

            self.reward_classifier = Classifier.from_pretrained(self.pretrained_path)
            self.reward_classifier.to(self.device)
@@ -0,0 +1,243 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+
+import numpy as np
+import torch
+
+from lerobot.configs.types import PipelineFeatureType, PolicyFeature
+from lerobot.model.kinematics import RobotKinematics
+from lerobot.processor.pipeline import EnvTransition, ProcessorStepRegistry, TransitionKey
+from lerobot.robots import Robot
+from lerobot.teleoperators import Teleoperator
+from lerobot.teleoperators.utils import TeleopEvents
+from lerobot.utils.rotation import Rotation
+
+from .pipeline import ProcessorStep
+
+
+@ProcessorStepRegistry.register("leader_follower_processor")
+@dataclass
+class LeaderFollowerProcessor(ProcessorStep):
+    """
+    Processor for leader-follower teleoperation mode.
+
+    This processor:
+    1. Sends follower positions to leader arm when not intervening
+    2. Computes EE delta actions from leader when intervening
+    3. Handles teleop events from the leader device
+    """
+
+    leader_device: Teleoperator
+    motor_names: list[str]
+    robot: Robot
+    kinematics: RobotKinematics
+    end_effector_step_sizes: np.ndarray | None = None
+    use_gripper: bool = True
+    # prev_leader_gripper: float | None = None
+    max_gripper_pos: float = 100.0
+    use_ik_solution: bool = False
+
+    def __call__(self, transition: EnvTransition) -> EnvTransition:
+        """Process transition with leader-follower logic."""
+        # Get current follower position from complementary data
+        # raw_joint_pos = transition.get(TransitionKey.COMPLEMENTARY_DATA, {}).get("raw_joint_positions")
+        raw_joint_pos = transition.get(TransitionKey.OBSERVATION)
+        if raw_joint_pos is not None:
+            # Send follower position to leader (for follow mode)
+            # follower_action = {
+            #     f"{motor}.pos": float(raw_joint_pos[motor])
+            #     for motor in self.motor_names
+            # }
+            self.leader_device.send_action(raw_joint_pos)
+
+        # Only compute EE action if intervention is active
+        # (AddTeleopEventsAsInfo already added IS_INTERVENTION to info)
+        info = transition.get(TransitionKey.INFO, {})
+        if info.get(TeleopEvents.IS_INTERVENTION, False):
+            # Get leader joint positions from teleop_action
+            # (AddTeleopActionAsComplimentaryData already got the action)
+            complementary = transition.get(TransitionKey.COMPLEMENTARY_DATA, {})
+            teleop_action = complementary.get("teleop_action", {})
+
+            if isinstance(teleop_action, dict) and raw_joint_pos is not None:
+                leader_pos = np.array([teleop_action[f"{motor}.pos"] for motor in self.motor_names])
+
+                leader_ee = self.kinematics.forward_kinematics(leader_pos)
+
+                if self.use_ik_solution and "IK_solution" in transition.get(TransitionKey.COMPLEMENTARY_DATA):
+                    follower_pos = transition.get(TransitionKey.COMPLEMENTARY_DATA)["IK_solution"]
+                else:
+                    follower_pos = np.array([raw_joint_pos[f"{motor}.pos"] for motor in self.motor_names])
+
+                follower_ee = self.kinematics.forward_kinematics(follower_pos)
+
+                # follower_gripper_pos = raw_joint_pos["gripper.pos"]
+                follower_gripper_pos = follower_pos[-1]  # assuming gripper is the last motor
+
+                leader_ee_pos = leader_ee[:3, 3]
+                leader_ee_rvec = Rotation.from_matrix(leader_ee[:3, :3]).as_rotvec()
+                leader_gripper_pos = np.clip(
+                    teleop_action["gripper.pos"], -self.max_gripper_pos, self.max_gripper_pos
+                )
+
+                follower_ee_pos = follower_ee[:3, 3]
+                # follower_ee_rvec = Rotation.from_matrix(follower_ee[:3, :3]).as_rotvec()
+
+                delta_pos = leader_ee_pos - follower_ee_pos
+
+                # For rotation: compute relative rotation from follower to leader
+                # R_leader = R_follower * R_delta  =>  R_delta = R_follower^T * R_leader
+                r_delta = follower_ee[:3, :3].T @ leader_ee[:3, :3]
+                delta_rvec = Rotation.from_matrix(r_delta).as_rotvec()
+
+                delta_gripper = leader_gripper_pos - follower_gripper_pos
+
+                desired = np.eye(4, dtype=float)
+                desired[:3, :3] = follower_ee[:3, :3] @ r_delta
+                desired[:3, 3] = follower_ee[:3, 3] + delta_pos
+
+                pos = desired[:3, 3]
+                tw = Rotation.from_matrix(desired[:3, :3]).as_rotvec()
+
+                assert np.allclose(pos, leader_ee_pos), "Position delta computation error"
+                assert np.allclose(tw, leader_ee_rvec), "Orientation delta computation error"
+                assert np.isclose(follower_gripper_pos + delta_gripper, leader_gripper_pos), (
+                    "Gripper delta computation error"
+                )
+
+                # Normalize the action to the range [-1, 1]
+                delta_pos = delta_pos / np.array(
+                    [
+                        self.end_effector_step_sizes["x"],
+                        self.end_effector_step_sizes["y"],
+                        self.end_effector_step_sizes["z"],
+                    ]
+                )
+                delta_rvec = delta_rvec / np.array(
+                    [
+                        self.end_effector_step_sizes["wx"],
+                        self.end_effector_step_sizes["wy"],
+                        self.end_effector_step_sizes["wz"],
+                    ]
+                )
+                max_normalized_pos = max(
+                    abs(delta_pos[0]),
+                    abs(delta_pos[1]),
+                    abs(delta_pos[2]),
+                )
+
+                normalized_rot = max(abs(delta_rvec[0]), abs(delta_rvec[1]), abs(delta_rvec[2]))
+
+                max_normalized = max(max_normalized_pos, normalized_rot)
+
+                if max_normalized > 1.0:
+                    # Scale proportionally
+                    delta_pos = delta_pos / max_normalized
+                    delta_rvec = delta_rvec / max_normalized
+
+                intervention_action = np.array(
+                    [
+                        delta_pos[0],
+                        delta_pos[1],
+                        delta_pos[2],
+                        delta_rvec[0],
+                        delta_rvec[1],
+                        delta_rvec[2],
+                        np.clip(delta_gripper, -self.max_gripper_pos, self.max_gripper_pos)
+                        / self.max_gripper_pos,
+                    ],
+                    dtype=float,
+                )
+
+                #         # Extract leader positions from teleop action dict
+                #         # leader_pos = np.array([teleop_action.get(f"{motor}.pos", 0) for motor in self.motor_names])
+                #         # follower_pos = np.array([raw_joint_pos[f"{motor}.pos"] for motor in self.motor_names])
+
+                #         teleop_action = self.leader_device.bus.sync_read("Present_Position")
+                #         raw_joint_pos = self.robot.bus.sync_read("Present_Position")
+                #         leader_pos = np.array([teleop_action.get(f"{motor}", 0) for motor in self.motor_names])
+                #         follower_pos = np.array([raw_joint_pos[f"{motor}"] for motor in self.motor_names])
+
+                #         # Compute EE positions
+                #         leader_ee_fi = self.kinematics.forward_kinematics(leader_pos)
+                #         leader_ee_pos = leader_ee_fi[:3, 3]
+                #         # leader_ee_rot = Rotation.from_matrix(leader_ee_fi[:3, :3]).as_rotvec()
+                #         leader_ee = np.concat([leader_ee_pos, [0,0,0]])
+
+                #         if "IK_solution" in transition.get(TransitionKey.COMPLEMENTARY_DATA):
+                #             follower_ee = transition.get(TransitionKey.COMPLEMENTARY_DATA)["IK_solution"]
+                #         else:
+                #             follower_pos = np.array([raw_joint_pos[f"{motor}.pos"] for motor in self.motor_names])
+                #             follower_ee_fi = self.kinematics.forward_kinematics(follower_pos)
+                #             follower_ee_pos = follower_ee_fi[:3, 3]
+                #             # follower_ee_rot = Rotation.from_matrix(follower_ee_fi[:3, :3]).as_rotvec()
+                #             follower_ee = np.concat([follower_ee_pos, [0,0,0]])
+
+                #         # Compute normalized EE delta
+                #         if self.end_effector_step_sizes is not None:
+                #             ee_delta = np.clip(
+                #                 leader_ee - follower_ee,
+                #                 -self.end_effector_step_sizes,
+                #                 self.end_effector_step_sizes
+                #             )
+                #             ee_delta_normalized = ee_delta / self.end_effector_step_sizes
+                #         else:
+                #             ee_delta_normalized = leader_ee - follower_ee
+
+                #         # Handle gripper
+                #         if self.use_gripper and len(leader_pos) > 3:
+                #             if self.prev_leader_gripper is None:
+                #                 self.prev_leader_gripper = np.clip(
+                #                     leader_pos[-1], 0, self.max_gripper_pos
+                #                 )
+
+                #             leader_gripper = leader_pos[-1]
+                #             gripper_delta = leader_gripper - self.prev_leader_gripper
+                #             normalized_delta = gripper_delta / self.max_gripper_pos
+
+                #             # Quantize gripper action
+                #             if normalized_delta >= 0.3:
+                #                 gripper_action = 2
+                #             elif normalized_delta <= -0.1:
+                #                 gripper_action = 0
+                #             else:
+                #                 gripper_action = 1
+
+                #             self.prev_leader_gripper = leader_gripper
+
+                #             # Create intervention action
+                #             intervention_action = np.append(ee_delta_normalized, gripper_action)
+                #         else:
+                #             intervention_action = ee_delta_normalized
+
+                #         # Override teleop_action with computed EE action
+                complementary["teleop_action"] = torch.from_numpy(intervention_action).float()
+                transition[TransitionKey.COMPLEMENTARY_DATA] = complementary  # type: ignore[misc]
+
+        return transition
+
+    def reset(self) -> None:
+        """Reset leader-follower state."""
+        # self.prev_leader_gripper = None
+        if hasattr(self.leader_device, "reset"):
+            self.leader_device.reset()
+
+    def transform_features(
+        self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
+    ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
+        return features
@@ -137,21 +137,12 @@ class _NormalizationMixin:
        self._reshape_visual_stats()

    def _reshape_visual_stats(self) -> None:
-        """Reshape flat ``(C,)`` visual stats to ``(C, 1, 1)`` for image broadcasting.
-
-        No-op for stats from :func:`~lerobot.datasets.compute_stats.compute_stats`
-        (already ``(C, 1, 1)``). Needed by RL training, which can start without
-        a dataset and supplies stats manually via JSON config.
-        """
+        """Reshape visual stats from ``[C]`` to ``[C, 1, 1]`` for image broadcasting."""
        for key, feature in self.features.items():
-            if feature.type != FeatureType.VISUAL:
-                continue
-            if key not in self._tensor_stats:
-                continue
-            for stat_name, stat_tensor in self._tensor_stats[key].items():
-                if not isinstance(stat_tensor, Tensor) or stat_tensor.ndim != 1:
-                    continue
-                self._tensor_stats[key][stat_name] = stat_tensor.reshape(-1, 1, 1)
+            if feature.type == FeatureType.VISUAL and key in self._tensor_stats:
+                for stat_name, stat_tensor in self._tensor_stats[key].items():
+                    if isinstance(stat_tensor, Tensor) and stat_tensor.ndim == 1:
+                        self._tensor_stats[key][stat_name] = stat_tensor.reshape(-1, 1, 1)

    def to(
        self, device: torch.device | str | None = None, dtype: torch.dtype | None = None
@@ -1,38 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .classifier.configuration_classifier import RewardClassifierConfig as RewardClassifierConfig
-from .factory import (
-    get_reward_model_class as get_reward_model_class,
-    make_reward_model as make_reward_model,
-    make_reward_model_config as make_reward_model_config,
-    make_reward_pre_post_processors as make_reward_pre_post_processors,
-)
-from .pretrained import PreTrainedRewardModel as PreTrainedRewardModel
-from .robometer.configuration_robometer import RobometerConfig as RobometerConfig
-from .sarm.configuration_sarm import SARMConfig as SARMConfig
-
-__all__ = [
-    # Configuration classes
-    "RewardClassifierConfig",
-    "RobometerConfig",
-    "SARMConfig",
-    # Base class
-    "PreTrainedRewardModel",
-    # Factory functions
-    "get_reward_model_class",
-    "make_reward_model",
-    "make_reward_model_config",
-    "make_reward_pre_post_processors",
-]
@@ -1,252 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import importlib
-import logging
-from typing import Any
-
-import torch
-
-from lerobot.configs.rewards import RewardModelConfig
-from lerobot.processor import PolicyAction, PolicyProcessorPipeline
-from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig
-from lerobot.rewards.pretrained import PreTrainedRewardModel
-from lerobot.rewards.robometer.configuration_robometer import RobometerConfig
-from lerobot.rewards.sarm.configuration_sarm import SARMConfig
-
-
-def get_reward_model_class(name: str) -> type[PreTrainedRewardModel]:
-    """
-    Retrieves a reward model class by its registered name.
-
-    This function uses dynamic imports to avoid loading all reward model classes into
-    memory at once, improving startup time and reducing dependencies.
-
-    Args:
-        name: The name of the reward model. Supported names are "reward_classifier",
-              "sarm", "robometer".
-
-    Returns:
-        The reward model class corresponding to the given name.
-
-    Raises:
-        ValueError: If the reward model name is not recognized.
-    """
-    if name == "reward_classifier":
-        from lerobot.rewards.classifier.modeling_classifier import Classifier
-
-        return Classifier
-    elif name == "sarm":
-        from lerobot.rewards.sarm.modeling_sarm import SARMRewardModel
-
-        return SARMRewardModel
-    elif name == "robometer":
-        from lerobot.rewards.robometer.modeling_robometer import RobometerRewardModel
-
-        return RobometerRewardModel
-    else:
-        try:
-            return _get_reward_model_cls_from_name(name=name)
-        except Exception as e:
-            raise ValueError(f"Reward model type '{name}' is not available.") from e
-
-
-def make_reward_model_config(reward_type: str, **kwargs) -> RewardModelConfig:
-    """
-    Instantiates a reward model configuration object based on the reward type.
-
-    This factory function simplifies the creation of reward model configuration objects
-    by mapping a string identifier to the corresponding config class.
-
-    Args:
-        reward_type: The type of the reward model. Supported types include
-                     "reward_classifier", "sarm", "robometer".
-        **kwargs: Keyword arguments to be passed to the configuration class constructor.
-
-    Returns:
-        An instance of a `RewardModelConfig` subclass.
-
-    Raises:
-        ValueError: If the `reward_type` is not recognized.
-    """
-    if reward_type == "reward_classifier":
-        return RewardClassifierConfig(**kwargs)
-    elif reward_type == "sarm":
-        return SARMConfig(**kwargs)
-    elif reward_type == "robometer":
-        return RobometerConfig(**kwargs)
-    else:
-        try:
-            config_cls = RewardModelConfig.get_choice_class(reward_type)
-            return config_cls(**kwargs)
-        except Exception as e:
-            raise ValueError(f"Reward model type '{reward_type}' is not available.") from e
-
-
-def make_reward_model(cfg: RewardModelConfig, **kwargs) -> PreTrainedRewardModel:
-    """
-    Instantiate a reward model from its configuration.
-
-    Args:
-        cfg: The configuration for the reward model to be created. If
-             `cfg.pretrained_path` is set, the model will be loaded with weights
-             from that path.
-        **kwargs: Additional keyword arguments forwarded to the model constructor
-            (e.g., ``dataset_stats``, ``dataset_meta``).
-
-    Returns:
-        An instantiated and device-placed reward model.
-    """
-    reward_cls = get_reward_model_class(cfg.type)
-
-    kwargs["config"] = cfg
-
-    if cfg.pretrained_path:
-        kwargs["pretrained_name_or_path"] = cfg.pretrained_path
-        reward_model = reward_cls.from_pretrained(**kwargs)
-    else:
-        reward_model = reward_cls(**kwargs)
-
-    reward_model.to(cfg.device)
-    assert isinstance(reward_model, torch.nn.Module)
-
-    return reward_model
-
-
-def make_reward_pre_post_processors(
-    reward_cfg: RewardModelConfig,
-    **kwargs,
-) -> tuple[
-    PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
-    PolicyProcessorPipeline[PolicyAction, PolicyAction],
-]:
-    """
-    Create pre- and post-processor pipelines for a given reward model.
-
-    Each reward model type has a dedicated factory function for its processors.
-
-    Args:
-        reward_cfg: The configuration of the reward model for which to create processors.
-        **kwargs: Additional keyword arguments passed to the processor factory
-            (e.g., ``dataset_stats``, ``dataset_meta``).
-
-    Returns:
-        A tuple containing the input (pre-processor) and output (post-processor) pipelines.
-
-    Raises:
-        ValueError: If a processor factory is not implemented for the given reward
-            model configuration type.
-    """
-    # Create a new processor based on reward model type
-    if isinstance(reward_cfg, RewardClassifierConfig):
-        from lerobot.rewards.classifier.processor_classifier import make_classifier_processor
-
-        return make_classifier_processor(
-            config=reward_cfg,
-            dataset_stats=kwargs.get("dataset_stats"),
-        )
-
-    elif isinstance(reward_cfg, SARMConfig):
-        from lerobot.rewards.sarm.processor_sarm import make_sarm_pre_post_processors
-
-        return make_sarm_pre_post_processors(
-            config=reward_cfg,
-            dataset_stats=kwargs.get("dataset_stats"),
-            dataset_meta=kwargs.get("dataset_meta"),
-        )
-    elif isinstance(reward_cfg, RobometerConfig):
-        from lerobot.rewards.robometer.processor_robometer import make_robometer_pre_post_processors
-
-        return make_robometer_pre_post_processors(
-            config=reward_cfg,
-            dataset_stats=kwargs.get("dataset_stats"),
-        )
-
-    else:
-        try:
-            processors = _make_processors_from_reward_model_config(
-                config=reward_cfg,
-                dataset_stats=kwargs.get("dataset_stats"),
-            )
-        except Exception as e:
-            raise ValueError(
-                f"Processor for reward model type '{reward_cfg.type}' is not implemented."
-            ) from e
-        return processors
-
-
-def _get_reward_model_cls_from_name(name: str) -> type[PreTrainedRewardModel]:
-    """Get reward model class from its registered name using dynamic imports.
-
-    This is used as a helper function to import reward models from 3rd party lerobot
-    plugins.
-
-    Args:
-        name: The name of the reward model.
-
-    Returns:
-        The reward model class corresponding to the given name.
-    """
-    if name not in RewardModelConfig.get_known_choices():
-        raise ValueError(
-            f"Unknown reward model name '{name}'. "
-            f"Available reward models: {RewardModelConfig.get_known_choices()}"
-        )
-
-    config_cls = RewardModelConfig.get_choice_class(name)
-    config_cls_name = config_cls.__name__
-
-    model_name = config_cls_name.removesuffix("Config")
-    if model_name == config_cls_name:
-        raise ValueError(
-            f"The config class name '{config_cls_name}' does not follow the expected naming convention. "
-            f"Make sure it ends with 'Config'!"
-        )
-
-    cls_name = model_name + "RewardModel"
-    module_path = config_cls.__module__.replace("configuration_", "modeling_")
-
-    module = importlib.import_module(module_path)
-    reward_cls = getattr(module, cls_name)
-    return reward_cls
-
-
-def _make_processors_from_reward_model_config(
-    config: RewardModelConfig,
-    dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None,
-) -> tuple[Any, Any]:
-    """Create pre- and post-processors from a reward model configuration using dynamic imports.
-
-    This is used as a helper function to import processor factories from 3rd party
-    lerobot reward model plugins.
-
-    Args:
-        config: The reward model configuration object.
-        dataset_stats: Dataset statistics for normalization.
-
-    Returns:
-        A tuple containing the input (pre-processor) and output (post-processor) pipelines.
-    """
-    reward_type = config.type
-    function_name = f"make_{reward_type}_pre_post_processors"
-    module_path = config.__class__.__module__.replace("configuration_", "processor_")
-    logging.debug(
-        f"Instantiating reward pre/post processors using function '{function_name}' "
-        f"from module '{module_path}'"
-    )
-    module = importlib.import_module(module_path)
-    function = getattr(module, function_name)
-    return function(config, dataset_stats=dataset_stats)
@@ -1,244 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import abc
-import builtins
-import logging
-import os
-from importlib.resources import files
-from pathlib import Path
-from tempfile import TemporaryDirectory
-from typing import TYPE_CHECKING, Any, TypeVar
-
-import packaging
-import safetensors
-from huggingface_hub import HfApi, ModelCard, ModelCardData, hf_hub_download
-from huggingface_hub.constants import SAFETENSORS_SINGLE_FILE
-from huggingface_hub.errors import HfHubHTTPError
-from safetensors.torch import load_model as load_model_as_safetensor, save_model as save_model_as_safetensor
-from torch import Tensor, nn
-
-from lerobot.configs.rewards import RewardModelConfig
-from lerobot.utils.hub import HubMixin
-
-if TYPE_CHECKING:
-    from lerobot.configs.train import TrainPipelineConfig
-
-T = TypeVar("T", bound="PreTrainedRewardModel")
-
-
-class PreTrainedRewardModel(nn.Module, HubMixin, abc.ABC):
-    """Base class for reward models."""
-
-    config_class: None
-    name: None
-
-    def __init__(self, config: RewardModelConfig, *inputs, **kwargs):
-        super().__init__()
-        if not isinstance(config, RewardModelConfig):
-            raise ValueError(
-                f"Parameter config in `{self.__class__.__name__}(config)` should be an instance of class "
-                "`RewardModelConfig`. To create a model from a pretrained model use "
-                f"`model = {self.__class__.__name__}.from_pretrained(PRETRAINED_MODEL_NAME)`"
-            )
-        self.config = config
-
-    def __init_subclass__(cls, **kwargs):
-        super().__init_subclass__(**kwargs)
-        if not getattr(cls, "config_class", None):
-            raise TypeError(f"Class {cls.__name__} must define 'config_class'")
-        if not getattr(cls, "name", None):
-            raise TypeError(f"Class {cls.__name__} must define 'name'")
-
-    def _save_pretrained(self, save_directory: Path) -> None:
-        self.config._save_pretrained(save_directory)
-        model_to_save = self.module if hasattr(self, "module") else self
-        save_model_as_safetensor(model_to_save, str(save_directory / SAFETENSORS_SINGLE_FILE))
-
-    @classmethod
-    def from_pretrained(
-        cls: builtins.type[T],
-        pretrained_name_or_path: str | Path,
-        *,
-        config: RewardModelConfig | None = None,
-        force_download: bool = False,
-        resume_download: bool | None = None,
-        proxies: dict | None = None,
-        token: str | bool | None = None,
-        cache_dir: str | Path | None = None,
-        local_files_only: bool = False,
-        revision: str | None = None,
-        strict: bool = False,
-        **kwargs,
-    ) -> T:
-        """
-        The reward model is set in evaluation mode by default using `reward.eval()` (dropout modules are
-        deactivated). To train it, you should first set it back in training mode with `reward.train()`.
-        """
-        if config is None:
-            config = RewardModelConfig.from_pretrained(
-                pretrained_name_or_path=pretrained_name_or_path,
-                force_download=force_download,
-                resume_download=resume_download,
-                proxies=proxies,
-                token=token,
-                cache_dir=cache_dir,
-                local_files_only=local_files_only,
-                revision=revision,
-                **kwargs,
-            )
-        model_id = str(pretrained_name_or_path)
-        instance = cls(config, **kwargs)
-        if os.path.isdir(model_id):
-            print("Loading weights from local directory")
-            model_file = os.path.join(model_id, SAFETENSORS_SINGLE_FILE)
-            reward = cls._load_as_safetensor(instance, model_file, config.device or "cpu", strict)
-        else:
-            try:
-                model_file = hf_hub_download(
-                    repo_id=model_id,
-                    filename=SAFETENSORS_SINGLE_FILE,
-                    revision=revision,
-                    cache_dir=cache_dir,
-                    force_download=force_download,
-                    proxies=proxies,
-                    resume_download=resume_download,
-                    token=token,
-                    local_files_only=local_files_only,
-                )
-                reward = cls._load_as_safetensor(instance, model_file, config.device or "cpu", strict)
-            except HfHubHTTPError as e:
-                raise FileNotFoundError(
-                    f"{SAFETENSORS_SINGLE_FILE} not found on the HuggingFace Hub in {model_id}"
-                ) from e
-
-        reward.to(config.device)
-        reward.eval()
-        return reward
-
-    @classmethod
-    def _load_as_safetensor(cls, model: T, model_file: str, map_location: str, strict: bool) -> T:
-        # Create base kwargs
-        kwargs = {"strict": strict}
-
-        # Add device parameter for newer versions that support it
-        if packaging.version.parse(safetensors.__version__) >= packaging.version.parse("0.4.3"):
-            kwargs["device"] = map_location
-
-        # Load the model with appropriate kwargs
-        missing_keys, unexpected_keys = load_model_as_safetensor(model, model_file, **kwargs)
-        if missing_keys:
-            logging.warning(f"Missing key(s) when loading model: {missing_keys}")
-        if unexpected_keys:
-            logging.warning(f"Unexpected key(s) when loading model: {unexpected_keys}")
-
-        # For older versions, manually move to device if needed
-        if "device" not in kwargs and map_location != "cpu":
-            logging.warning(
-                "Loading model weights on other devices than 'cpu' is not supported natively in your version of safetensors."
-                " This means that the model is loaded on 'cpu' first and then copied to the device."
-                " This leads to a slower loading time."
-                " Please update safetensors to version 0.4.3 or above for improved performance."
-            )
-            model.to(map_location)
-        return model
-
-    def get_optim_params(self):
-        """
-        Returns the reward-model-specific parameters dict to be passed on to the optimizer.
-        """
-        return self.parameters()
-
-    def reset(self) -> None:
-        """Reset any internal state."""
-        pass
-
-    @abc.abstractmethod
-    def compute_reward(self, batch: dict[str, Tensor]) -> Tensor:
-        """Compute a scalar reward signal for a batch of observations.
-
-        Args:
-            batch: Dictionary containing at minimum observation tensors.
-                   May also contain "action", "next_observation.*", etc.
-
-        Returns:
-            Tensor of shape ``(batch_size,)`` with reward values.
-        """
-        ...
-
-    def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict[str, Any]]:
-        """Training forward pass — override for trainable reward models."""
-        raise NotImplementedError(
-            f"{self.__class__.__name__} is not trainable. Only use compute_reward() for inference."
-        )
-
-    @property
-    def is_trainable(self) -> bool:
-        """Whether this reward model can be trained via ``lerobot-train``.
-
-        Trainable reward models override :meth:`forward`; zero-shot models
-        inherit the base implementation that raises ``NotImplementedError``.
-        """
-        return type(self).forward is not PreTrainedRewardModel.forward
-
-    def push_model_to_hub(self, cfg: "TrainPipelineConfig"):
-        api = HfApi()
-        repo_id = api.create_repo(
-            repo_id=self.config.repo_id, private=self.config.private, exist_ok=True
-        ).repo_id
-
-        # Push the files to the repo in a single commit
-        with TemporaryDirectory(ignore_cleanup_errors=True) as tmp:
-            saved_path = Path(tmp) / repo_id
-
-            self.save_pretrained(saved_path)  # Calls _save_pretrained and stores model tensors
-
-            card = self.generate_model_card(
-                cfg.dataset.repo_id, self.config.type, self.config.license, self.config.tags
-            )
-            card.save(str(saved_path / "README.md"))
-
-            cfg.save_pretrained(saved_path)  # Calls _save_pretrained and stores train config
-
-            commit_info = api.upload_folder(
-                repo_id=repo_id,
-                repo_type="model",
-                folder_path=saved_path,
-                commit_message="Upload reward model weights, train config and readme",
-                allow_patterns=["*.safetensors", "*.json", "*.yaml", "*.md"],
-                ignore_patterns=["*.tmp", "*.log"],
-            )
-
-            logging.info(f"Model pushed to {commit_info.repo_url.url}")
-
-    def generate_model_card(
-        self, dataset_repo_id: str, model_type: str, license: str | None, tags: list[str] | None
-    ) -> ModelCard:
-        card_data = ModelCardData(
-            license=license or "apache-2.0",
-            library_name="lerobot",
-            pipeline_tag="robotics",
-            tags=list(set(tags or []).union({"robotics", "lerobot", "reward-model", model_type})),
-            model_name=model_type,
-            datasets=dataset_repo_id,
-        )
-
-        template_card = (
-            files("lerobot.templates")
-            .joinpath("lerobot_rewardmodel_modelcard_template.md")
-            .read_text(encoding="utf-8")
-        )
-        card = ModelCard.from_template(card_data, template_str=template_card)
-        card.validate()
-        return card
@@ -1,19 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .configuration_robometer import RobometerConfig
-from .modeling_robometer import RobometerRewardModel
-from .processor_robometer import make_robometer_pre_post_processors
-
-__all__ = ["RobometerConfig", "RobometerRewardModel", "make_robometer_pre_post_processors"]
@@ -1,229 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Upstream/legacy Robometer checkpoint loader.
-
-This module is **only** used by the one-time conversion tooling
-(:mod:`lerobot.scripts.lerobot_export_robometer` and
-``scripts/verify_robometer_export.py``). It supports:
-
- Sharded upstream checkpoints (``model-0000X-of-Y.safetensors`` + index).
- PEFT/LoRA adapter checkpoints (``adapter_config.json`` + adapter weights).
- Local snapshot directories or Hugging Face Hub repo ids.
-
-Once :class:`~lerobot.rewards.robometer.RobometerRewardModel` is loaded
-through this module, calling ``save_pretrained`` writes the canonical
-LeRobot-native layout (single ``model.safetensors`` + ``config.json``) that
-the base loader understands.
-
-The runtime path
-(:meth:`~lerobot.rewards.pretrained.PreTrainedRewardModel.from_pretrained`)
-does **not** import this file. It is safe to delete once you no longer need
-the conversion tooling.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-from pathlib import Path
-from typing import Any
-
-from huggingface_hub import snapshot_download
-from safetensors.torch import load_file
-from torch import Tensor, nn
-
-from lerobot.utils.import_utils import require_package
-
-logger = logging.getLogger(__name__)
-
-
-def _download_robometer_snapshot(
-    pretrained_path: str,
-    *,
-    hub_token: str | None = None,
-) -> Path:
-    """Resolve a Robometer snapshot directory.
-
-    - If ``pretrained_path`` is an existing local directory, return it directly.
-    - Otherwise treat ``pretrained_path`` as a Hugging Face repo id (optionally
-      with ``@revision``) and download it via ``snapshot_download``.
-    """
-    local_candidate = Path(pretrained_path)
-    if local_candidate.is_dir():
-        return local_candidate
-
-    if "@" in pretrained_path:
-        repo_id, revision = pretrained_path.split("@", 1)
-    else:
-        repo_id, revision = pretrained_path, None
-
-    return Path(
-        snapshot_download(
-            repo_id=repo_id,
-            revision=revision,
-            token=hub_token,
-            allow_patterns=[
-                "*.json",
-                "*.safetensors",
-                "*.bin",
-                "*.txt",
-                "*.model",
-                "tokenizer*",
-                "special_tokens_map.json",
-            ],
-        )
-    )
-
-
-def _maybe_apply_peft(base_model: Any, snapshot_dir: Path) -> Any:
-    adapter_config = snapshot_dir / "adapter_config.json"
-    if not adapter_config.exists():
-        return base_model
-
-    require_package("peft", extra="peft-dep")
-    from peft import PeftModel
-
-    return PeftModel.from_pretrained(base_model, str(snapshot_dir))
-
-
-def _remap_state_dict_keys(state_dict: dict[str, Tensor], model: nn.Module) -> dict[str, Tensor]:
-    """Try a few common prefix swaps so PEFT-wrapped checkpoints load cleanly."""
-    model_keys = set(model.state_dict().keys())
-    remapped: dict[str, Tensor] = {}
-
-    for key, value in state_dict.items():
-        if key in model_keys:
-            remapped[key] = value
-            continue
-
-        candidates: list[str] = []
-        if key.startswith("model.model."):
-            candidates.append(key.replace("model.model.", "model.base_model.model.model.", 1))
-            candidates.append(key.replace("model.model.", "model.", 1))
-        if key.startswith("model."):
-            candidates.append(f"model.{key}")
-            candidates.append(key.replace("model.", "", 1))
-        else:
-            candidates.append(f"model.{key}")
-        if key.startswith("model.") and not key.startswith("model.base_model."):
-            parts = key.split(".", 1)
-            if len(parts) == 2:
-                candidates.append(f"model.base_model.{parts[1]}")
-
-        for candidate in candidates:
-            if candidate in model_keys:
-                remapped[candidate] = value
-                break
-        else:
-            remapped[key] = value
-
-    return remapped
-
-
-def _resolve_checkpoint_safetensors_files(snapshot_dir: Path) -> list[Path]:
-    """Pick the safetensors files that hold the full model weights.
-
-    When ``model.safetensors.index.json`` is present, only the files it lists are
-    loaded. Otherwise any ``model*.safetensors`` shards are preferred over
-    sidecar files. Falls back to every ``*.safetensors`` in the snapshot.
-    """
-    index_path = snapshot_dir / "model.safetensors.index.json"
-    if index_path.exists():
-        with index_path.open() as f:
-            weight_map = json.load(f).get("weight_map", {})
-        indexed = sorted(
-            {snapshot_dir / name for name in weight_map.values() if (snapshot_dir / name).exists()}
-        )
-        if indexed:
-            return indexed
-
-    model_shards = sorted(snapshot_dir.glob("model*.safetensors"))
-    if model_shards:
-        return model_shards
-
-    return sorted(snapshot_dir.glob("*.safetensors"))
-
-
-def apply_upstream_checkpoint(
-    model: nn.Module,
-    pretrained_path: str,
-    *,
-    hub_token: str | None = None,
-) -> None:
-    """Load an upstream (sharded / PEFT) Robometer checkpoint into ``model``.
-
-    Downloads the snapshot, optionally applies PEFT wrapping, merges sharded
-    ``.safetensors`` files in memory, remaps PEFT-prefixed keys, and loads them
-    into ``model`` non-strictly. ``model`` must already be constructed with the
-    matching Robometer architecture (e.g. via
-    :class:`~lerobot.rewards.robometer.RobometerRewardModel` ``__init__``).
-    """
-    snapshot_dir = _download_robometer_snapshot(pretrained_path, hub_token=hub_token)
-
-    # PEFT adapter checkpoints wrap the base model before weight loading so the
-    # remapper can place adapter tensors at the right prefix.
-    base_model = getattr(model, "model", None)
-    if base_model is not None:
-        wrapped = _maybe_apply_peft(base_model, snapshot_dir)
-        if wrapped is not base_model:
-            model.model = wrapped
-
-    files = _resolve_checkpoint_safetensors_files(snapshot_dir)
-    if not files:
-        logger.warning("No *.safetensors files in %s; using freshly initialised heads", snapshot_dir)
-        return
-
-    merged: dict[str, Tensor] = {}
-    for path in files:
-        merged.update(load_file(str(path)))
-
-    remapped = _remap_state_dict_keys(merged, model)
-
-    # Defensive vocab-match. With the corrected resize logic
-    # (``_resize_embeddings_for_robometer`` uses ``len(tokenizer) + 5``),
-    # a freshly built ``RobometerRewardModel`` should already share the same
-    # vocabulary as the upstream checkpoint (e.g. 151,674 for
-    # ``robometer/Robometer-4B``). This block stays in place as a safety net
-    # in case a future upstream variant uses a different vocab — we never
-    # want ``load_state_dict`` to trip on a silent shape mismatch.
-    base_model = getattr(model, "model", None)
-    if base_model is not None and hasattr(base_model, "get_input_embeddings"):
-        for key in (
-            "model.model.language_model.embed_tokens.weight",
-            "model.language_model.embed_tokens.weight",
-            "model.embed_tokens.weight",
-        ):
-            tensor = remapped.get(key)
-            if tensor is None:
-                continue
-            ckpt_vocab = int(tensor.shape[0])
-            current_vocab = int(base_model.get_input_embeddings().num_embeddings)
-            if ckpt_vocab != current_vocab:
-                logger.info(
-                    "Resizing model embed table %d -> %d to match upstream checkpoint vocab "
-                    "(upstream was trained against a different Qwen revision).",
-                    current_vocab,
-                    ckpt_vocab,
-                )
-                base_model.resize_token_embeddings(ckpt_vocab)
-            break
-
-    missing, unexpected = model.load_state_dict(remapped, strict=False)
-    if missing:
-        logger.debug("Robometer checkpoint missing %d keys (sample: %s)", len(missing), missing[:5])
-    if unexpected:
-        logger.debug(
-            "Robometer checkpoint had %d unexpected keys (sample: %s)", len(unexpected), unexpected[:5]
-        )
@@ -1,162 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import annotations
-
-from copy import deepcopy
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any
-
-from lerobot.configs import FeatureType, NormalizationMode, PolicyFeature
-from lerobot.configs.rewards import RewardModelConfig
-from lerobot.utils.constants import OBS_IMAGES
-from lerobot.utils.import_utils import _transformers_available, require_package
-
-if TYPE_CHECKING or _transformers_available:
-    from transformers import AutoConfig, AutoTokenizer
-else:
-    AutoConfig = None  # type: ignore[assignment]
-    AutoTokenizer = None  # type: ignore[assignment]
-
-
-@RewardModelConfig.register_subclass("robometer")
-@dataclass
-class RobometerConfig(RewardModelConfig):
-    """Configuration for the Robometer reward model."""
-
-    pretrained_path: str | None = "lilkm/Robometer-4B"
-    image_key: str = OBS_IMAGES + ".top"
-    task_key: str = "task"
-    default_task: str | None = None
-
-    max_frames: int | None = 8
-    reward_output: str = "progress"  # "progress" or "success"
-    success_threshold: float = 0.5
-
-    license: str | None = "apache-2.0"
-    tags: list[str] | None = field(
-        default_factory=lambda: ["reward-model", "vision-language", "qwen3-vl", "zero-shot"]
-    )
-
-    base_model_id: str = "Qwen/Qwen3-VL-4B-Instruct"
-    torch_dtype: str = "bfloat16"
-    use_multi_image: bool = True
-    use_per_frame_progress_token: bool = True
-    average_temporal_patches: bool = True
-    frame_pooling: str = "mean"  # "mean" | "boundary" | "attention"
-    frame_pooling_attn_temperature: float = 1.0
-    progress_loss_type: str = "discrete"  # "l1" | "l2" | "discrete"
-    progress_discrete_bins: int = 10
-
-    # Serialised Qwen backbone config (post-resize). Always populated by
-    # ``__post_init__`` from ``base_model_id`` + ``len(tokenizer) + 5``, so it
-    # is never ``None`` after construction (EO-1 style). Saved into
-    # ``config.json`` automatically by the base ``_save_pretrained``.
-    vlm_config: dict[str, Any] | None = None
-
-    input_features: dict[str, PolicyFeature] = field(default_factory=dict)
-    output_features: dict[str, PolicyFeature] = field(default_factory=dict)
-    normalization_mapping: dict[str, NormalizationMode] = field(
-        default_factory=lambda: {
-            "VISUAL": NormalizationMode.IDENTITY,
-            "REWARD": NormalizationMode.IDENTITY,
-        }
-    )
-
-    def __post_init__(self) -> None:
-        super().__post_init__()
-        if self.reward_output not in {"progress", "success"}:
-            raise ValueError(f"reward_output must be 'progress' or 'success', got {self.reward_output!r}")
-        if self.max_frames is not None and self.max_frames < 1:
-            raise ValueError(f"max_frames must be >= 1, got {self.max_frames}")
-        if self.frame_pooling not in {"mean", "boundary", "attention"}:
-            raise ValueError(f"frame_pooling must be mean/boundary/attention; got {self.frame_pooling!r}")
-        if self.frame_pooling_attn_temperature <= 0:
-            raise ValueError("frame_pooling_attn_temperature must be > 0")
-        if self.progress_loss_type not in {"l1", "l2", "discrete"}:
-            raise ValueError(f"progress_loss_type must be l1/l2/discrete; got {self.progress_loss_type!r}")
-        if self.use_per_frame_progress_token and not self.use_multi_image:
-            raise ValueError("use_per_frame_progress_token=True requires use_multi_image=True")
-
-        if self.image_key not in self.input_features:
-            self.input_features[self.image_key] = PolicyFeature(shape=(3, 224, 224), type=FeatureType.VISUAL)
-        self.output_features.setdefault("progress", PolicyFeature(shape=(1,), type=FeatureType.REWARD))
-        self.output_features.setdefault("success", PolicyFeature(shape=(1,), type=FeatureType.REWARD))
-
-        # Deterministically populate ``vlm_config`` so it is never ``None``
-        # after construction (mirrors EO-1's ``__post_init__`` snapshot).
-        # The target vocab matches upstream Robometer's runtime resize
-        # ``base_model.resize_token_embeddings(len(processor.tokenizer))`` —
-        # see ``third_party/robometer/.../setup_utils.py`` —
-        # i.e. ``len(tokenizer) + len(ROBOMETER_SPECIAL_TOKENS)``.
-        #
-        # For ``Qwen/Qwen3-VL-4B-Instruct`` this gives 151,669 + 5 = 151,674,
-        # which is exactly the published ``robometer/Robometer-4B`` checkpoint
-        # vocab. NB: ``text_config.vocab_size`` in the raw Qwen config is the
-        # padded embedding-table size (151,936), not the tokenizer length —
-        # we override it with the tokenizer-driven value to stay consistent
-        # with upstream.
-        if self.vlm_config is None:
-            require_package("transformers", extra="robometer")
-            # Local import avoids a top-level cycle (modeling_robometer imports
-            # this module). ``ROBOMETER_SPECIAL_TOKENS`` is the single source
-            # of truth for the resize delta.
-            from lerobot.rewards.robometer.modeling_robometer import ROBOMETER_SPECIAL_TOKENS
-
-            vlm = AutoConfig.from_pretrained(self.base_model_id).to_dict()
-            tokenizer = AutoTokenizer.from_pretrained(self.base_model_id)
-            text_config = vlm.get("text_config")
-            if not isinstance(text_config, dict):
-                raise ValueError(
-                    f"Backbone config for {self.base_model_id!r} has no nested `text_config`; "
-                    "Robometer expects a Qwen-VL-style config."
-                )
-            text_config["vocab_size"] = len(tokenizer) + len(ROBOMETER_SPECIAL_TOKENS)
-            self.vlm_config = vlm
-
-    @property
-    def use_discrete_progress(self) -> bool:
-        """Whether the progress head outputs distribution logits over bins."""
-        return self.progress_loss_type.lower() == "discrete"
-
-    @property
-    def vlm_backbone_config(self):
-        """Reconstruct the Qwen backbone config from :attr:`vlm_config`.
-
-        ``vlm_config`` is always populated after :meth:`__post_init__`
-        (either fresh, computed from the tokenizer, or loaded from a saved
-        ``config.json`` via draccus).
-        """
-        require_package("transformers", extra="robometer")
-        config_dict = deepcopy(self.vlm_config)
-        model_type = config_dict.pop("model_type", None)
-        if model_type is None:
-            raise ValueError("vlm_config must include `model_type` to reconstruct the backbone config")
-        return AutoConfig.for_model(model_type, **config_dict)
-
-    @property
-    def observation_delta_indices(self) -> list[int] | None:
-        return None
-
-    @property
-    def action_delta_indices(self) -> None:
-        return None
-
-    @property
-    def reward_delta_indices(self) -> None:
-        return None
-
-    def validate_features(self) -> None:
-        if self.image_key not in self.input_features:
-            raise ValueError(f"Robometer requires image input feature {self.image_key!r}")
@@ -1,493 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Robometer reward model.
-
- Qwen3-VL backbone (default: ``Qwen/Qwen3-VL-4B-Instruct``).
- Progress + success heads at inference; the preference head is preserved in the
-  state dict but not queried.
-"""
-
-from __future__ import annotations
-
-import logging
-from typing import TYPE_CHECKING, Any
-
-import torch
-from torch import Tensor, nn
-
-from lerobot.rewards.pretrained import PreTrainedRewardModel
-from lerobot.rewards.robometer.configuration_robometer import RobometerConfig
-from lerobot.utils.constants import OBS_PREFIX
-from lerobot.utils.import_utils import _transformers_available, require_package
-
-if TYPE_CHECKING or _transformers_available:
-    from transformers import AutoModelForImageTextToText
-else:
-    AutoModelForImageTextToText = None  # type: ignore[assignment]
-
-logger = logging.getLogger(__name__)
-
-# Namespace for Robometer's pre-encoded Qwen-VL observation tensors. The
-# processor writes both Qwen-VL tensors and Robometer-specific token ids /
-# metadata here; the model reads them at inference (no tokenizer needed in
-# the model — EO1-style separation).
-ROBOMETER_FEATURE_PREFIX = f"{OBS_PREFIX}robometer."
-ROBOMETER_QWEN_INPUT_KEYS = (
-    "input_ids",
-    "attention_mask",
-    "pixel_values",
-    "pixel_values_videos",
-    "image_grid_thw",
-    "video_grid_thw",
-    "second_per_grid_ts",
-)
-ROBOMETER_METADATA_KEYS = (
-    "prog_token_id",
-    "vision_start_token_id",
-    "vision_end_token_id",
-    "video_merge_size",
-)
-ROBOMETER_INPUT_KEYS = ROBOMETER_QWEN_INPUT_KEYS + ROBOMETER_METADATA_KEYS
-
-# Order matters: the released checkpoint resized `embed_tokens` after adding
-# these tokens in this order, so changing the set or order would silently
-# misalign the saved embedding rows with their token ids. `<|reward_token|>`
-# and `<|sim_token|>` are vestigial (never read by any head) but still occupy
-# rows the checkpoint expects.
-ROBOMETER_SPECIAL_TOKENS = (
-    "<|split_token|>",
-    "<|reward_token|>",
-    "<|pref_token|>",
-    "<|sim_token|>",
-    "<|prog_token|>",
-)
-
-
-def convert_bins_to_continuous(bin_logits: Tensor) -> Tensor:
-    """Collapse per-bin logits into a single value in ``[0, 1]``.
-
-    The discrete progress head outputs ``num_bins`` logits per frame. Bins are
-    evenly spaced centers in ``[0, 1]``; the continuous prediction is the
-    softmax-weighted mean of those centers.
-    """
-    bin_probs = torch.softmax(bin_logits, dim=-1)
-    num_bins = bin_logits.shape[-1]
-    bin_centers = torch.linspace(0.0, 1.0, num_bins, device=bin_logits.device, dtype=bin_logits.dtype)
-    return (bin_probs * bin_centers).sum(dim=-1)
-
-
-def squeeze_last_safe(x: Tensor) -> Tensor:
-    """Drop a trailing singleton dim only when present.
-
-    Matches the upstream helper of the same name in
-    ``robometer.models.rbm`` (kept module-level and non-underscored to mirror
-    upstream).
-    """
-    return x.squeeze(-1) if x.ndim > 1 and x.shape[-1] == 1 else x
-
-
-def _torch_dtype(name: str) -> torch.dtype:
-    dtype = getattr(torch, name, None)
-    if isinstance(dtype, torch.dtype):
-        return dtype
-    raise ValueError(f"Unknown torch dtype: {name!r}")
-
-
-class RobometerPredictionHead(nn.Sequential):
-    """Small MLP head used for Robometer's progress / success / preference outputs.
-
-    Subclasses ``nn.Sequential`` (not ``nn.Module``) so the ``state_dict`` keys
-    stay flat (``progress_head.0.weight``, ``progress_head.1.weight``, ...) and
-    remain byte-compatible with the published ``lilkm/robometer-4b`` checkpoint.
-    """
-
-    def __init__(self, hidden_dim: int, output_size: int, *, dropout: float, with_sigmoid: bool) -> None:
-        layers: list[nn.Module] = [
-            nn.Linear(hidden_dim, hidden_dim // 2),
-            nn.LayerNorm(hidden_dim // 2),
-            nn.GELU(),
-            nn.Dropout(dropout),
-            nn.Linear(hidden_dim // 2, output_size),
-        ]
-        if with_sigmoid:
-            layers.append(nn.Sigmoid())
-        super().__init__(*layers)
-
-
-def decode_progress_outputs(
-    progress_logits: Tensor | None,
-    success_logits: Tensor | None,
-    *,
-    is_discrete_mode: bool,
-) -> dict[str, list[list[float]]]:
-    """Decode RBM head outputs into per-frame floats.
-
-    Args:
-        progress_logits: ``(B, T)`` (continuous) or ``(B, T, num_bins)`` (discrete).
-        success_logits: ``(B, T)`` raw logits, ``sigmoid``-ed to probabilities.
-        is_discrete_mode: if True the progress logits get a softmax over bins
-            and are projected onto bin centers via :func:`convert_bins_to_continuous`.
-
-    Returns:
-        Dict with ``progress_pred`` and ``success_probs``, each a list of
-        length ``B`` of per-frame float lists.
-    """
-    progress_pred: list[list[float]] = []
-    success_probs: list[list[float]] = []
-
-    if progress_logits is not None:
-        for sample_logits in progress_logits:
-            if is_discrete_mode:
-                continuous = convert_bins_to_continuous(sample_logits.detach().float().cpu())
-                progress_pred.append(continuous.flatten().tolist())
-            else:
-                progress_pred.append(sample_logits.detach().float().cpu().flatten().tolist())
-
-    if success_logits is not None:
-        for sample_logits in success_logits:
-            success_probs.append(torch.sigmoid(sample_logits.detach().float().cpu()).flatten().tolist())
-
-    return {"progress_pred": progress_pred, "success_probs": success_probs}
-
-
-class RobometerRewardModel(PreTrainedRewardModel):
-    """Robometer reward model: Qwen3-VL backbone + progress/success heads."""
-
-    name = "robometer"
-    config_class = RobometerConfig
-
-    def __init__(self, config: RobometerConfig, *, dropout: float = 0.1) -> None:
-        require_package("transformers", extra="robometer")
-        super().__init__(config)
-        self.config = config
-
-        # Two backbone-build paths (EO-1 style, branched on ``pretrained_path``):
-        #
-        #   - Fresh training (``pretrained_path is None``): download the base
-        #     Qwen weights and resize the embed table to match
-        #     ``vlm_config.text_config.vocab_size`` — populated deterministically
-        #     in ``RobometerConfig.__post_init__`` as
-        #     ``len(tokenizer) + len(ROBOMETER_SPECIAL_TOKENS)``, mirroring
-        #     upstream Robometer's ``_add_special_tokens_and_resize`` in
-        #     ``third_party/robometer/.../setup_utils.py``.
-        #
-        #   - Loading a saved checkpoint (``pretrained_path`` is set): rebuild
-        #     the empty architecture from ``vlm_config`` via
-        #     ``AutoModelForImageTextToText.from_config`` so the subsequent
-        #     ``model.safetensors`` load is a direct fill of the right shape —
-        #     no redundant Qwen weight download.
-        torch_dtype = _torch_dtype(config.torch_dtype)
-        if config.pretrained_path is None:
-            self.model = AutoModelForImageTextToText.from_pretrained(
-                config.base_model_id,
-                dtype=torch_dtype,
-                trust_remote_code=True,
-            )
-            target_vocab = config.vlm_config["text_config"]["vocab_size"]
-            self.model.resize_token_embeddings(target_vocab)
-        else:
-            self.model = AutoModelForImageTextToText.from_config(
-                config.vlm_backbone_config,
-                dtype=torch_dtype,
-                trust_remote_code=True,
-            )
-
-        # All Qwen-VL backbones Robometer supports expose `text_config.hidden_size`.
-        # Falls back to the top-level `hidden_size` so future non-multimodal
-        # variants would still resolve.
-        backbone_config = self.model.config
-        text_config = getattr(backbone_config, "text_config", None)
-        hidden_size = getattr(text_config, "hidden_size", None) if text_config is not None else None
-        if hidden_size is None:
-            hidden_size = getattr(backbone_config, "hidden_size", None)
-        if hidden_size is None:
-            raise AttributeError(
-                f"Could not infer hidden_size from backbone config of {config.base_model_id}"
-            )
-        hidden_dim = int(hidden_size)
-
-        # Robometer's three prediction heads + frame-pool attention. The
-        # preference head is preserved to match the published state-dict layout
-        # even though only progress + success are consumed at inference, and
-        # `frame_pool_attn` is always allocated so checkpoints trained with
-        # `frame_pooling="attention"` load without remapping.
-        progress_output = config.progress_discrete_bins if config.use_discrete_progress else 1
-        self.progress_head = RobometerPredictionHead(
-            hidden_dim,
-            progress_output,
-            dropout=dropout,
-            with_sigmoid=not config.use_discrete_progress,
-        )
-        self.preference_head = RobometerPredictionHead(hidden_dim, 1, dropout=dropout, with_sigmoid=False)
-        self.success_head = RobometerPredictionHead(hidden_dim, 1, dropout=dropout, with_sigmoid=False)
-        self.frame_pool_attn = nn.Linear(hidden_dim, 1, bias=False)
-
-        # Match the dtype of the loaded base model so weight loading is a no-op cast.
-        model_dtype = next(self.model.parameters()).dtype
-        self.progress_head.to(dtype=model_dtype)
-        self.preference_head.to(dtype=model_dtype)
-        self.success_head.to(dtype=model_dtype)
-        self.frame_pool_attn.to(dtype=model_dtype)
-
-    def compute_reward(self, batch: dict[str, Tensor]) -> Tensor:
-        inputs = {
-            key: batch[f"{ROBOMETER_FEATURE_PREFIX}{key}"]
-            for key in ROBOMETER_INPUT_KEYS
-            if f"{ROBOMETER_FEATURE_PREFIX}{key}" in batch
-        }
-        if "input_ids" not in inputs:
-            raise KeyError(
-                f"Robometer batch missing pre-encoded inputs (expected "
-                f"`{ROBOMETER_FEATURE_PREFIX}input_ids`). Make sure the "
-                "RobometerEncoderProcessorStep ran before `compute_reward`."
-            )
-
-        device = next(self.model.parameters()).device
-        inputs = {key: value.to(device) if hasattr(value, "to") else value for key, value in inputs.items()}
-
-        self.eval()
-        with torch.no_grad():
-            progress_logits, success_logits = self._compute_rbm_logits(inputs)
-
-        decoded = decode_progress_outputs(
-            progress_logits,
-            success_logits,
-            is_discrete_mode=self.config.use_discrete_progress,
-        )
-        values = (
-            decoded["success_probs"] if self.config.reward_output == "success" else decoded["progress_pred"]
-        )
-
-        rewards = torch.stack([torch.as_tensor(seq, dtype=torch.float32)[-1] for seq in values])
-        if self.config.reward_output == "success":
-            rewards = (rewards > self.config.success_threshold).float()
-        return rewards.to(self.config.device or "cpu")
-
-    def _compute_rbm_logits(
-        self,
-        inputs: dict[str, Any],
-    ) -> tuple[Tensor, Tensor]:
-        """Run the Qwen3-VL backbone and apply Robometer's heads.
-
-        ``inputs`` is the encoded batch produced by
-        :class:`RobometerEncoderProcessorStep`. It carries Qwen tensors as well
-        as Robometer-specific metadata (``prog_token_id``,
-        ``vision_start_token_id``, ``vision_end_token_id``, ``video_merge_size``)
-        — the metadata is popped here so the rest can be forwarded straight to
-        the Qwen model.
-
-        Returns ``(progress_logits, success_logits)``. Shapes:
-
-        - ``progress_logits``: ``(B, T)`` (continuous) or ``(B, T, num_bins)`` (discrete).
-        - ``success_logits``: ``(B, T)`` raw logits (sigmoid happens at decode time).
-        """
-        prog_token_id = inputs.pop("prog_token_id", None)
-        vision_start_token_id = inputs.pop("vision_start_token_id", None)
-        vision_end_token_id = inputs.pop("vision_end_token_id", None)
-        video_merge_size = inputs.pop("video_merge_size", 14)
-
-        # Qwen3-VL doesn't reliably populate `last_hidden_state`; ask for the
-        # full hidden-state tuple and take the last layer. This matches the
-        # `is_qwen3` path in upstream Robometer's `RBM.forward_qwen` (main).
-        outputs = self.model(**inputs, output_hidden_states=True, return_dict=True)
-        hidden_state = (
-            outputs.hidden_states[-1]
-            if getattr(outputs, "hidden_states", None)
-            else outputs.last_hidden_state
-        )
-
-        input_ids = inputs["input_ids"]
-        if self.config.use_per_frame_progress_token:
-            if prog_token_id is None:
-                raise KeyError("`prog_token_id` missing in batch (run RobometerEncoderProcessorStep first)")
-            return self._process_token_extraction(hidden_state, input_ids, prog_token_id=prog_token_id)
-        if self.config.use_multi_image:
-            if vision_start_token_id is None or vision_end_token_id is None:
-                raise KeyError(
-                    "`vision_start_token_id` / `vision_end_token_id` missing in batch "
-                    "(run RobometerEncoderProcessorStep first)"
-                )
-            return self._process_multi_image_frames(
-                hidden_state,
-                input_ids,
-                start_id=vision_start_token_id,
-                end_id=vision_end_token_id,
-            )
-        video_grid_thw = inputs.get("video_grid_thw")
-        if video_grid_thw is None:
-            raise ValueError("video_grid_thw is required for video-mode Robometer inference")
-        if vision_start_token_id is None:
-            raise KeyError("`vision_start_token_id` missing in batch")
-        return self._process_video_frames(
-            hidden_state,
-            input_ids,
-            video_grid_thw,
-            start_id=vision_start_token_id,
-            merge_size=video_merge_size,
-        )
-
-    def _apply_heads_to_hidden_states(self, frame_embeddings: Tensor) -> tuple[Tensor, Tensor]:
-        """Apply progress + success heads to a tensor of frame embeddings.
-
-        Mirrors upstream ``RBM._apply_heads_to_hidden_states``.
-        """
-        progress_out = self.progress_head(frame_embeddings)
-        progress = progress_out if self.config.use_discrete_progress else squeeze_last_safe(progress_out)
-        success = squeeze_last_safe(self.success_head(frame_embeddings))
-        return progress, success
-
-    def _process_token_extraction(
-        self,
-        hidden_state: Tensor,
-        input_ids: Tensor,
-        *,
-        prog_token_id: int,
-    ) -> tuple[Tensor, Tensor]:
-        """Per-frame progress/success from ``<|prog_token|>`` positions.
-
-        Mirrors the progress-sample branch of upstream
-        ``RBM._process_token_extraction``.
-        """
-        token_mask = input_ids == prog_token_id
-        batch_indices, positions = token_mask.nonzero(as_tuple=True)
-        if positions.numel() == 0:
-            raise ValueError("`<|prog_token|>` not found in any sequence")
-
-        per_sample_hidden = [
-            hidden_state[i, positions[batch_indices == i]] for i in range(input_ids.shape[0])
-        ]
-        progress_list, success_list = [], []
-        for embeddings in per_sample_hidden:
-            if embeddings.shape[0] == 0:
-                raise ValueError("`<|prog_token|>` missing in a sequence")
-            progress, success = self._apply_heads_to_hidden_states(embeddings)
-            progress_list.append(progress)
-            success_list.append(success)
-
-        return torch.stack(progress_list), torch.stack(success_list)
-
-    def _process_multi_image_frames(
-        self,
-        hidden_state: Tensor,
-        input_ids: Tensor,
-        *,
-        start_id: int,
-        end_id: int,
-    ) -> tuple[Tensor, Tensor]:
-        """Per-frame progress/success in multi-image mode (Qwen-VL).
-
-        Mirrors upstream ``RBM._process_multi_image_frames`` (progress-sample
-        branch only — we don't run preference at inference).
-        """
-        progress_list, success_list = [], []
-        for batch_idx in range(input_ids.shape[0]):
-            seq_ids = input_ids[batch_idx]
-            seq_hidden = hidden_state[batch_idx]
-            frame_embeddings = self._extract_hidden_states_from_token_pairs(
-                seq_hidden, seq_ids, start_id, end_id
-            )
-            progress, success = self._apply_heads_to_hidden_states(frame_embeddings)
-            progress_list.append(progress)
-            success_list.append(success)
-
-        return torch.stack(progress_list), torch.stack(success_list)
-
-    def _extract_hidden_states_from_token_pairs(
-        self,
-        hidden_state: Tensor,
-        input_ids: Tensor,
-        start_id: int,
-        end_id: int,
-    ) -> Tensor:
-        start_positions = (input_ids == start_id).nonzero(as_tuple=True)[0]
-        end_positions = (input_ids == end_id).nonzero(as_tuple=True)[0]
-        if start_positions.numel() == 0:
-            raise ValueError("`<|vision_start|>` not found in sequence")
-        if start_positions.numel() != end_positions.numel():
-            raise ValueError(
-                f"Mismatched vision token counts: {start_positions.numel()} start vs "
-                f"{end_positions.numel()} end"
-            )
-
-        frames: list[Tensor] = []
-        for start, end in zip(start_positions.tolist(), end_positions.tolist(), strict=True):
-            if start >= end:
-                raise ValueError(f"Invalid vision token pair: start={start} end={end}")
-            patch_tokens = hidden_state[start + 1 : end]
-            if patch_tokens.shape[0] == 0:
-                frames.append((hidden_state[start] + hidden_state[end]) / 2.0)
-                continue
-
-            pooling = self.config.frame_pooling
-            if pooling == "mean":
-                frames.append(patch_tokens.mean(dim=0))
-            elif pooling == "boundary":
-                frames.append(patch_tokens[-1])
-            else:  # attention
-                scores = (
-                    self.frame_pool_attn(patch_tokens).squeeze(-1)
-                    / self.config.frame_pooling_attn_temperature
-                )
-                weights = torch.softmax(scores, dim=0).unsqueeze(-1)
-                frames.append((weights * patch_tokens).sum(dim=0))
-
-        return torch.stack(frames)
-
-    def _process_video_frames(
-        self,
-        hidden_state: Tensor,
-        input_ids: Tensor,
-        video_grid_thw: Tensor,
-        *,
-        start_id: int,
-        merge_size: int,
-    ) -> tuple[Tensor, Tensor]:
-        """Per-frame progress/success in video mode (Qwen-VL).
-
-        Mirrors upstream ``RBM._process_video_frames`` /
-        ``RBM._extract_progress_from_trajectory`` (progress-sample branch
-        only — preference is not run at inference). In particular,
-        ``average_temporal_patches=False`` reads the *boundary* token at
-        ``cursor + tokens_per_frame`` to match upstream byte-for-byte.
-        """
-        progress_list, success_list = [], []
-        for batch_idx in range(input_ids.shape[0]):
-            seq_ids = input_ids[batch_idx]
-            seq_hidden = hidden_state[batch_idx]
-            start_positions = (seq_ids == start_id).nonzero(as_tuple=True)[0]
-            if start_positions.numel() == 0:
-                raise ValueError("`<|vision_start|>` not found in sequence")
-            t_dim, h_dim, w_dim = (int(x) for x in video_grid_thw[batch_idx].tolist())
-            tokens_per_frame = (h_dim * w_dim) // (merge_size**2)
-
-            cursor = start_positions[0].item()
-            frame_embeddings: list[Tensor] = []
-            for _ in range(t_dim):
-                if self.config.average_temporal_patches:
-                    patch = seq_hidden[cursor : cursor + tokens_per_frame]
-                    frame_embeddings.append(patch.mean(dim=0))
-                else:
-                    # Upstream takes the position *one past* the patch span as
-                    # the per-frame boundary; see
-                    # `RBM._extract_progress_from_trajectory`.
-                    frame_embeddings.append(seq_hidden[cursor + tokens_per_frame])
-                cursor += tokens_per_frame
-
-            stacked = torch.stack(frame_embeddings)
-            progress, success = self._apply_heads_to_hidden_states(stacked)
-            progress_list.append(progress)
-            success_list.append(success)
-
-        return torch.stack(progress_list), torch.stack(success_list)
@@ -1,348 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Robometer pre/post processing pipelines."""
-
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any
-
-import numpy as np
-import torch
-from PIL import Image
-from torch import Tensor
-
-from lerobot.configs import PipelineFeatureType, PolicyFeature
-from lerobot.processor import (
-    AddBatchDimensionProcessorStep,
-    DeviceProcessorStep,
-    PolicyAction,
-    PolicyProcessorPipeline,
-    ProcessorStep,
-    ProcessorStepRegistry,
-    policy_action_to_transition,
-)
-from lerobot.rewards.robometer.configuration_robometer import RobometerConfig
-from lerobot.rewards.robometer.modeling_robometer import (
-    ROBOMETER_FEATURE_PREFIX,
-    ROBOMETER_SPECIAL_TOKENS,
-)
-from lerobot.types import EnvTransition, TransitionKey
-from lerobot.utils.constants import (
-    OBS_IMAGES,
-    POLICY_POSTPROCESSOR_DEFAULT_NAME,
-    POLICY_PREPROCESSOR_DEFAULT_NAME,
-)
-from lerobot.utils.import_utils import _transformers_available, require_package
-
-if TYPE_CHECKING or _transformers_available:
-    from transformers import AutoProcessor
-else:
-    AutoProcessor = None
-
-PROGRESS_PROMPT = (
-    "The task for the robot is '{task}'. Given the trajectory video, predict "
-    "the task progress at each frame, how far along the robot is towards "
-    "completing the task, a float between 0 and 1, where 0 is the starting "
-    "state and 1 is when the task is completed. If the robot is not "
-    "performing the same task, predict 0 progress."
-)
-
-
-def _frames_to_pil(frames: np.ndarray) -> list[Image.Image]:
-    """Convert ``(T, H, W, C)`` uint8 frames to a list of PIL images."""
-    if frames.ndim != 4:
-        raise ValueError(f"Expected (T,H,W,C) frames; got shape {frames.shape}")
-    if frames.dtype != np.uint8:
-        frames = np.clip(frames, 0, 255).astype(np.uint8)
-    return [Image.fromarray(frames[i]) for i in range(frames.shape[0])]
-
-
-def _video_to_numpy(video: Tensor, *, max_frames: int | None) -> np.ndarray:
-    """Convert one trajectory tensor to a ``(T, H, W, C) uint8`` numpy array."""
-    if max_frames is not None:
-        video = video[-max_frames:]
-    if video.shape[1] in (1, 3):
-        video = video.permute(0, 2, 3, 1)
-    elif video.shape[-1] not in (1, 3):
-        raise ValueError(f"Expected channel dim of size 1 or 3, got shape {tuple(video.shape)}")
-
-    array = video.detach().cpu().numpy()
-    if np.issubdtype(array.dtype, np.floating) and array.size > 0 and array.max() <= 1.0:
-        array = array * 255.0
-    return np.clip(array, 0, 255).astype(np.uint8)
-
-
-def _expand_tasks(task: Any, *, batch_size: int, default: str | None) -> list[str]:
-    if task is None:
-        task = default
-    if task is None:
-        raise KeyError("Robometer expected a task description in complementary data")
-    if isinstance(task, str):
-        return [task] * batch_size
-    if isinstance(task, tuple):
-        task = list(task)
-    if not (isinstance(task, list) and all(isinstance(item, str) for item in task)):
-        raise TypeError(f"Robometer task must be a string or list of strings, got {type(task)}")
-    if len(task) == 1 and batch_size > 1:
-        return task * batch_size
-    if len(task) != batch_size:
-        raise ValueError(f"Expected {batch_size} tasks, got {len(task)}")
-    return task
-
-
-@dataclass
-@ProcessorStepRegistry.register(name="robometer_encoder")
-class RobometerEncoderProcessorStep(ProcessorStep):
-    """Encode raw frames + task into Qwen-VL tensors for the Robometer model.
-
-    Loads a :class:`~transformers.AutoProcessor` matching ``base_model_id`` and
-    registers Robometer's special tokens on the tokenizer. The matching
-    embedding resize happens model-side in
-    :meth:`RobometerRewardModel.__init__`. This step owns the tokenizer — the
-    model itself never needs one — and is the EO1-style boundary between
-    pre-processing and modeling.
-
-    At call time the step reads:
-
-    - ``observation[image_key]``: ``(B, T, C, H, W)`` or ``(B, C, H, W)`` frames.
-    - ``complementary_data[task_key]``: a string or list of strings.
-
-    and writes ``observation[f"{ROBOMETER_FEATURE_PREFIX}<name>"]`` for:
-
-    - the Qwen-VL processor outputs: ``input_ids``, ``attention_mask``,
-      ``pixel_values``, ``image_grid_thw``, ``video_grid_thw``, ...
-    - Robometer-specific token ids consumed by the model heads:
-      ``prog_token_id``, ``vision_start_token_id``, ``vision_end_token_id``,
-      ``video_merge_size``.
-    """
-
-    base_model_id: str = "Qwen/Qwen3-VL-4B-Instruct"
-    image_key: str = OBS_IMAGES + ".top"
-    task_key: str = "task"
-    default_task: str | None = None
-    max_frames: int | None = 8
-    use_multi_image: bool = True
-    use_per_frame_progress_token: bool = True
-    max_length: int = 1024
-
-    _processor: Any = field(default=None, init=False, repr=False)
-
-    def __post_init__(self) -> None:
-        require_package("transformers", extra="robometer")
-        require_package("qwen-vl-utils", extra="robometer", import_name="qwen_vl_utils")
-
-        self._processor = AutoProcessor.from_pretrained(
-            self.base_model_id,
-            trust_remote_code=True,
-            do_sample_frames=False,
-            padding_side="right",
-        )
-
-        # Register Robometer's special tokens on the tokenizer. The matching
-        # embedding resize happens model-side in `RobometerRewardModel.__init__`.
-        tokenizer = self._processor.tokenizer
-        # Qwen tokenizers may not define a pad token, but batched prompts/videos
-        # require padding, so reuse EOS as the padding token.
-        if tokenizer.pad_token is None:
-            tokenizer.pad_token = tokenizer.eos_token
-        for token in ROBOMETER_SPECIAL_TOKENS:
-            if token not in tokenizer.get_vocab():
-                tokenizer.add_special_tokens({"additional_special_tokens": [token]})
-
-    def __call__(self, transition: EnvTransition) -> EnvTransition:
-        observation = transition.get(TransitionKey.OBSERVATION)
-        complementary = transition.get(TransitionKey.COMPLEMENTARY_DATA) or {}
-        if not isinstance(observation, dict):
-            raise ValueError("RobometerEncoderProcessorStep requires an observation dict")
-
-        if self.image_key not in observation:
-            raise KeyError(f"Robometer expected image key {self.image_key!r} in observation")
-
-        frames = observation[self.image_key]
-        tensor = frames.detach().cpu() if isinstance(frames, Tensor) else torch.as_tensor(frames)
-        if tensor.ndim == 4:
-            tensor = tensor.unsqueeze(1)
-        elif tensor.ndim != 5:
-            raise ValueError(
-                f"Expected Robometer frames with shape (B,C,H,W) or (B,T,C,H,W); got {tuple(tensor.shape)}"
-            )
-
-        batch_size = tensor.shape[0]
-        tasks = _expand_tasks(
-            complementary.get(self.task_key, self.default_task),
-            batch_size=batch_size,
-            default=self.default_task,
-        )
-
-        samples = [
-            (_video_to_numpy(tensor[i], max_frames=self.max_frames), tasks[i]) for i in range(batch_size)
-        ]
-        encoded = self.encode_samples(samples)
-
-        new_observation = dict(observation)
-        for key, value in encoded.items():
-            new_observation[f"{ROBOMETER_FEATURE_PREFIX}{key}"] = value
-
-        new_transition = transition.copy()
-        new_transition[TransitionKey.OBSERVATION] = new_observation
-        return new_transition
-
-    def encode_samples(self, samples: list[tuple[np.ndarray, str]]) -> dict[str, Tensor]:
-        """Run the Qwen-VL processor on a list of ``(frames, task)`` samples.
-
-        Used internally by ``__call__`` and exposed for callers that want to
-        run the encoder on a single trajectory without building an
-        :class:`EnvTransition` (see ``examples/dataset/create_robometer_progress_videos.py``).
-        """
-        from qwen_vl_utils import process_vision_info
-
-        conversations = [self._build_conversation(frames, task) for frames, task in samples]
-
-        texts = [
-            self._processor.apply_chat_template(
-                msg,
-                tokenize=False,
-                add_generation_prompt=False,
-                add_vision_id=True,
-                enable_thinking=False,
-                fps=1,
-            )
-            for msg in conversations
-        ]
-
-        process_kwargs: dict[str, Any] = {
-            "return_video_kwargs": True,
-            "return_video_metadata": True,
-        }
-        image_processor = getattr(self._processor, "image_processor", None)
-        if image_processor is not None and hasattr(image_processor, "patch_size"):
-            process_kwargs["image_patch_size"] = image_processor.patch_size
-
-        image_inputs, video_inputs, video_kwargs = process_vision_info(conversations, **process_kwargs)
-
-        videos: list[Any] | None = None
-        video_metadatas: list[Any] | None = None
-        if video_inputs:
-            if isinstance(video_inputs[0], tuple) and len(video_inputs[0]) == 2:
-                videos_seq, metadatas_seq = zip(*video_inputs, strict=False)
-                videos = list(videos_seq)
-                video_metadatas = list(metadatas_seq)
-            else:
-                videos = list(video_inputs)
-
-        processor_kwargs: dict[str, Any] = {
-            "text": texts,
-            "images": image_inputs,
-            "padding": True,
-            "truncation": False,
-            "max_length": self.max_length,
-            "return_tensors": "pt",
-            "do_resize": False,
-        }
-        if videos is not None:
-            processor_kwargs["videos"] = videos
-        if video_metadatas is not None:
-            processor_kwargs["video_metadata"] = video_metadatas
-        if video_kwargs:
-            processor_kwargs.update(video_kwargs)
-
-        encoded = self._processor(**processor_kwargs)
-
-        # Write Robometer-specific token ids and the video patch merge size into
-        # the encoded batch so `RobometerRewardModel` doesn't need its own
-        # tokenizer at inference (EO1-style separation: the processor owns the
-        # tokenizer, the model owns the backbone and heads).
-        tokenizer = self._processor.tokenizer
-        encoded["prog_token_id"] = tokenizer.convert_tokens_to_ids("<|prog_token|>")
-        encoded["vision_start_token_id"] = tokenizer.convert_tokens_to_ids("<|vision_start|>")
-        encoded["vision_end_token_id"] = tokenizer.convert_tokens_to_ids("<|vision_end|>")
-        video_processor = getattr(self._processor, "video_processor", None)
-        encoded["video_merge_size"] = int(getattr(video_processor, "merge_size", 14))
-        return encoded
-
-    def _build_conversation(self, frames: np.ndarray, task: str) -> list[dict[str, Any]]:
-        pil_frames = _frames_to_pil(frames)
-        prompt = PROGRESS_PROMPT.format(task=task)
-        content: list[dict[str, Any]] = [{"type": "text", "text": prompt}]
-
-        if self.use_multi_image:
-            for image in pil_frames:
-                content.append({"type": "image", "image": image})
-                if self.use_per_frame_progress_token:
-                    content.append({"type": "text", "text": "<|prog_token|>"})
-        else:
-            content.append({"type": "video", "video": pil_frames, "sample_fps": 1.0})
-
-        return [{"role": "user", "content": content}]
-
-    def transform_features(
-        self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
-    ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
-        # The Qwen-VL processor produces variable-length sequence tensors that
-        # don't fit the static `PolicyFeature(shape=...)` mould; we deliberately
-        # do not advertise the new observation keys here.
-        return features
-
-    def get_config(self) -> dict[str, Any]:
-        return {
-            "base_model_id": self.base_model_id,
-            "image_key": self.image_key,
-            "task_key": self.task_key,
-            "default_task": self.default_task,
-            "max_frames": self.max_frames,
-            "use_multi_image": self.use_multi_image,
-            "use_per_frame_progress_token": self.use_per_frame_progress_token,
-            "max_length": self.max_length,
-        }
-
-
-def make_robometer_pre_post_processors(
-    config: RobometerConfig,
-    dataset_stats: dict[str, dict[str, Any]] | None = None,
-) -> tuple[
-    PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
-    PolicyProcessorPipeline[PolicyAction, PolicyAction],
-]:
-    """Pipeline that pre-encodes frames + task into Qwen-VL tensors.
-
-    The preprocessor adds a batch dimension if needed, runs Robometer's
-    encoder, and moves everything to the configured device. The
-    postprocessor is the identity since Robometer outputs a single reward
-    tensor (no action to un-normalise).
-    """
-    del dataset_stats  # Robometer has its own normalisation inside the Qwen-VL processor.
-
-    preprocessor = PolicyProcessorPipeline[dict[str, Any], dict[str, Any]](
-        steps=[
-            AddBatchDimensionProcessorStep(),
-            RobometerEncoderProcessorStep(
-                base_model_id=config.base_model_id,
-                image_key=config.image_key,
-                task_key=config.task_key,
-                default_task=config.default_task,
-                max_frames=config.max_frames,
-                use_multi_image=config.use_multi_image,
-                use_per_frame_progress_token=config.use_per_frame_progress_token,
-            ),
-            DeviceProcessorStep(device=config.device or "cpu"),
-        ],
-        name=POLICY_PREPROCESSOR_DEFAULT_NAME,
-    )
-    postprocessor = PolicyProcessorPipeline(
-        name=POLICY_POSTPROCESSOR_DEFAULT_NAME,
-        to_transition=policy_action_to_transition,
-    )
-    return preprocessor, postprocessor
@@ -49,47 +49,33 @@ https://github.com/michel-aractingi/lerobot-hilserl-guide
 import logging
 import os
 import time
-from collections.abc import Generator
 from functools import lru_cache
 from queue import Empty
-from typing import TYPE_CHECKING, Any
-
-from lerobot.utils.import_utils import _grpc_available, require_package
-
-if TYPE_CHECKING or _grpc_available:
-    import grpc
-
-    from lerobot.transport import services_pb2, services_pb2_grpc
-    from lerobot.transport.utils import (
-        bytes_to_state_dict,
-        grpc_channel_options,
-        python_object_to_bytes,
-        receive_bytes_in_chunks,
-        send_bytes_in_chunks,
-        transitions_to_bytes,
-    )
-else:
-    grpc = None
-    services_pb2 = None
-    services_pb2_grpc = None
-    bytes_to_state_dict = None
-    grpc_channel_options = None
-    python_object_to_bytes = None
-    receive_bytes_in_chunks = None
-    send_bytes_in_chunks = None
-    transitions_to_bytes = None
+from typing import Any

+import grpc
 import torch
 from torch import nn
 from torch.multiprocessing import Queue

 from lerobot.cameras import opencv  # noqa: F401
 from lerobot.configs import parser
-from lerobot.policies import make_policy, make_pre_post_processors
+from lerobot.policies import PreTrainedPolicy, make_policy, make_pre_post_processors
 from lerobot.processor import TransitionKey
+from lerobot.rl.queue import get_last_item_from_queue
+from lerobot.rl.train_rl import TrainRLServerPipelineConfig
 from lerobot.robots import so_follower  # noqa: F401
 from lerobot.teleoperators import gamepad, so_leader  # noqa: F401
 from lerobot.teleoperators.utils import TeleopEvents
+from lerobot.transport import services_pb2, services_pb2_grpc
+from lerobot.transport.utils import (
+    bytes_to_state_dict,
+    grpc_channel_options,
+    python_object_to_bytes,
+    receive_bytes_in_chunks,
+    send_bytes_in_chunks,
+    transitions_to_bytes,
+)
 from lerobot.utils.device_utils import get_safe_torch_device
 from lerobot.utils.process import ProcessSignalHandler
 from lerobot.utils.random_utils import set_seed
@@ -103,24 +89,18 @@ from lerobot.utils.utils import (
    init_logging,
 )

-from .algorithms.base import RLAlgorithm
-from .algorithms.factory import make_algorithm
 from .gym_manipulator import (
    make_processors,
    make_robot_env,
    reset_and_build_transition,
    step_env_and_process_transition,
 )
-from .queue import get_last_item_from_queue
-from .train_rl import TrainRLServerPipelineConfig

 # Main entry point


@parser.wrap()
 def actor_cli(cfg: TrainRLServerPipelineConfig):
-    # Fail fast with a friendly error if the optional ``hilserl`` extra is missing.
-    require_package("grpcio", extra="hilserl", import_name="grpc")
    cfg.validate()
    display_pid = False
    if not use_threads(cfg):
@@ -280,9 +260,6 @@ def act_with_policy(
    policy = policy.to(device).eval()
    assert isinstance(policy, nn.Module)

-    # Build the algorithm
-    algorithm = make_algorithm(cfg=cfg.algorithm, policy=policy)
-
    preprocessor, postprocessor = make_pre_post_processors(
        policy_cfg=cfg.policy,
        dataset_stats=cfg.policy.dataset_stats,
@@ -386,7 +363,7 @@ def act_with_policy(
        if done or truncated:
            logging.info(f"[ACTOR] Global step {interaction_step}: Episode reward: {sum_reward_episode}")

-            update_policy_parameters(algorithm=algorithm, parameters_queue=parameters_queue, device=device)
+            update_policy_parameters(policy=policy, parameters_queue=parameters_queue, device=device)

            if len(list_transition_to_send_to_learner) > 0:
                push_transitions_to_transport_queue(
@@ -433,10 +410,10 @@ def act_with_policy(


 def establish_learner_connection(
-    stub: "services_pb2_grpc.LearnerServiceStub",
+    stub: services_pb2_grpc.LearnerServiceStub,
    shutdown_event: Any,  # Event
    attempts: int = 30,
-) -> bool:
+):
    """Establish a connection with the learner.

    Args:
@@ -466,14 +443,12 @@ def establish_learner_connection(
 def learner_service_client(
    host: str = "127.0.0.1",
    port: int = 50051,
-) -> "tuple[services_pb2_grpc.LearnerServiceStub, grpc.Channel]":
-    """Return a client for the learner service.
+) -> tuple[services_pb2_grpc.LearnerServiceStub, grpc.Channel]:
+    """
+    Returns a client for the learner service.

    GRPC uses HTTP/2, which is a binary protocol and multiplexes requests over a single connection.
    So we need to create only one client and reuse it.
-
-    Returns:
-        tuple[services_pb2_grpc.LearnerServiceStub, grpc.Channel]: The stub and the channel.
    """

    channel = grpc.insecure_channel(
@@ -489,17 +464,15 @@ def receive_policy(
    cfg: TrainRLServerPipelineConfig,
    parameters_queue: Queue,
    shutdown_event: Any,  # Event
-    learner_client: "services_pb2_grpc.LearnerServiceStub | None" = None,
-    grpc_channel: "grpc.Channel | None" = None,
-) -> None:
+    learner_client: services_pb2_grpc.LearnerServiceStub | None = None,
+    grpc_channel: grpc.Channel | None = None,
+):
    """Receive parameters from the learner.

    Args:
        cfg (TrainRLServerPipelineConfig): The configuration for the actor.
        parameters_queue (Queue): The queue to receive the parameters.
        shutdown_event (Event): The event to check if the process should shutdown.
-        learner_client (services_pb2_grpc.LearnerServiceStub | None): Optional pre-created stub.
-        grpc_channel (grpc.Channel | None): Optional pre-created channel.
    """
    logging.info("[ACTOR] Start receiving parameters from the Learner")
    if not use_threads(cfg):
@@ -543,10 +516,11 @@ def send_transitions(
    cfg: TrainRLServerPipelineConfig,
    transitions_queue: Queue,
    shutdown_event: Any,  # Event
-    learner_client: "services_pb2_grpc.LearnerServiceStub | None" = None,
-    grpc_channel: "grpc.Channel | None" = None,
-) -> None:
-    """Send transitions to the learner.
+    learner_client: services_pb2_grpc.LearnerServiceStub | None = None,
+    grpc_channel: grpc.Channel | None = None,
+) -> services_pb2.Empty:
+    """
+    Sends transitions to the learner.

    This function continuously retrieves messages from the queue and processes:

@@ -554,13 +528,6 @@ def send_transitions(
        - A batch of transitions (observation, action, reward, next observation) is collected.
        - Transitions are moved to the CPU and serialized using PyTorch.
        - The serialized data is wrapped in a `services_pb2.Transition` message and sent to the learner.
-
-    Args:
-        cfg (TrainRLServerPipelineConfig): The configuration for the actor.
-        transitions_queue (Queue): The queue to receive the transitions.
-        shutdown_event (Event): The event to check if the process should shutdown.
-        learner_client (services_pb2_grpc.LearnerServiceStub | None): Optional pre-created stub.
-        grpc_channel (grpc.Channel | None): Optional pre-created channel.
    """

    if not use_threads(cfg):
@@ -599,23 +566,17 @@ def send_interactions(
    cfg: TrainRLServerPipelineConfig,
    interactions_queue: Queue,
    shutdown_event: Any,  # Event
-    learner_client: "services_pb2_grpc.LearnerServiceStub | None" = None,
-    grpc_channel: "grpc.Channel | None" = None,
-) -> None:
-    """Send interactions to the learner.
+    learner_client: services_pb2_grpc.LearnerServiceStub | None = None,
+    grpc_channel: grpc.Channel | None = None,
+) -> services_pb2.Empty:
+    """
+    Sends interactions to the learner.

    This function continuously retrieves messages from the queue and processes:

    - Interaction Messages:
        - Contains useful statistics about episodic rewards and policy timings.
        - The message is serialized using `pickle` and sent to the learner.
-
-    Args:
-        cfg (TrainRLServerPipelineConfig): The configuration for the actor.
-        interactions_queue (Queue): The queue to receive the interactions.
-        shutdown_event (Event): The event to check if the process should shutdown.
-        learner_client (services_pb2_grpc.LearnerServiceStub | None): Optional pre-created stub.
-        grpc_channel (grpc.Channel | None): Optional pre-created channel.
    """

    if not use_threads(cfg):
@@ -658,7 +619,7 @@ def transitions_stream(
    shutdown_event: Any,  # Event
    transitions_queue: Queue,
    timeout: float,
-) -> "Generator[Any, None, services_pb2.Empty]":
+) -> services_pb2.Empty:
    while not shutdown_event.is_set():
        try:
            message = transitions_queue.get(block=True, timeout=timeout)
@@ -677,7 +638,7 @@ def interactions_stream(
    shutdown_event: Any,  # Event
    interactions_queue: Queue,
    timeout: float,
-) -> "Generator[Any, None, services_pb2.Empty]":
+) -> services_pb2.Empty:
    while not shutdown_event.is_set():
        try:
            message = interactions_queue.get(block=True, timeout=timeout)
@@ -697,8 +658,7 @@ def interactions_stream(
 #  Policy functions


-def update_policy_parameters(algorithm: RLAlgorithm, parameters_queue: Queue, device):
-    """Drain the latest learner-pushed weights into ``algorithm.policy``."""
+def update_policy_parameters(policy: PreTrainedPolicy, parameters_queue: Queue, device):
    bytes_state_dict = get_last_item_from_queue(parameters_queue, block=False)
    if bytes_state_dict is not None:
        logging.info("[ACTOR] Load new parameters from Learner.")
@@ -713,7 +673,7 @@ def update_policy_parameters(algorithm: RLAlgorithm, parameters_queue: Queue, de
        # - Send critic's encoder state when shared_encoder=True
        # - Skip encoder params entirely when freeze_vision_encoder=True
        # - Ensure discrete_critic gets correct encoder state (currently uses encoder_critic)
-        algorithm.load_weights(state_dicts, device=device)
+        policy.load_actor_weights(state_dicts, device=device)


 #  Utilities functions
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from .sac import SACAlgorithm, SACAlgorithmConfig
+from .sac import SACAlgorithm as SACAlgorithm, SACAlgorithmConfig as SACAlgorithmConfig

 __all__ = [
    "SACAlgorithm",
@@ -15,38 +15,32 @@
 from __future__ import annotations

 import abc
-import builtins
-import os
 from collections.abc import Iterator
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, TypeVar
+from typing import TYPE_CHECKING, Any

 import torch
-from huggingface_hub import hf_hub_download
-from huggingface_hub.constants import SAFETENSORS_SINGLE_FILE
-from huggingface_hub.errors import HfHubHTTPError
-from safetensors.torch import load_file as load_safetensors, save_file as save_safetensors
 from torch.optim import Optimizer

-from lerobot.types import BatchType
-from lerobot.utils.hub import HubMixin
-
-from .configs import RLAlgorithmConfig, TrainingStats
+from lerobot.rl.algorithms.configs import RLAlgorithmConfig, TrainingStats

 if TYPE_CHECKING:
-    from torch import nn
+    from lerobot.rl.data_sources.data_mixer import DataMixer

-    from ..data_sources.data_mixer import DataMixer
-
-T = TypeVar("T", bound="RLAlgorithm")
+BatchType = dict[str, Any]


-class RLAlgorithm(HubMixin, abc.ABC):
+class RLAlgorithm(abc.ABC):
    """Base for all RL algorithms."""

-    config_class: type[RLAlgorithmConfig]
-    name: str
-    config: RLAlgorithmConfig
+    config_class: type[RLAlgorithmConfig] | None = None
+    name: str | None = None
+
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        if not getattr(cls, "config_class", None):
+            raise TypeError(f"Class {cls.__name__} must define 'config_class'")
+        if not getattr(cls, "name", None):
+            raise TypeError(f"Class {cls.__name__} must define 'name'")

    @abc.abstractmethod
    def update(self, batch_iterator: Iterator[BatchType]) -> TrainingStats:
@@ -57,7 +51,7 @@ class RLAlgorithm(HubMixin, abc.ABC):
        The iterator is owned by the trainer; the algorithm just consumes
        from it.
        """
-        raise NotImplementedError
+        ...

    def configure_data_iterator(
        self,
@@ -78,13 +72,13 @@ class RLAlgorithm(HubMixin, abc.ABC):
            queue_size=queue_size,
        )

-    @abc.abstractmethod
    def make_optimizers_and_scheduler(self) -> dict[str, Optimizer]:
-        """Build and return the optimizers used during training.
+        """Create, store, and return the optimizers needed for training.

-        Called once on the learner side after construction.
+        Called on the **learner** side after construction.  Subclasses must
+        override this with algorithm-specific optimizer setup.
        """
-        raise NotImplementedError
+        return {}

    def get_optimizers(self) -> dict[str, Optimizer]:
        """Return optimizers for checkpointing / external scheduling."""
@@ -110,98 +104,3 @@ class RLAlgorithm(HubMixin, abc.ABC):
    @abc.abstractmethod
    def load_weights(self, weights: dict[str, Any], device: str | torch.device = "cpu") -> None:
        """Load policy state-dict received from the learner."""
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def state_dict(self) -> dict[str, torch.Tensor]:
-        """Algorithm-owned trainable tensors.
-
-        Must return a flat tensor mapping for everything the algorithm owns
-        that is not part of the policy (e.g. critic ensembles, target networks,
-        temperature parameters). Algorithms with no training-only tensors
-        should explicitly return an empty dict.
-        """
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def load_state_dict(
-        self,
-        state_dict: dict[str, torch.Tensor],
-        device: str | torch.device = "cpu",
-    ) -> None:
-        """In-place load of algorithm-owned tensors.
-
-        Implementations MUST keep the identity of any ``nn.Parameter`` that an
-        optimizer references (e.g. SAC's ``log_alpha``) by using ``.copy_()``
-        rather than rebinding the attribute.
-        """
-        raise NotImplementedError
-
-    def _save_pretrained(self, save_directory: Path) -> None:
-        """Persist the algorithm's tensors and config to ``save_directory``.
-
-        Writes ``model.safetensors`` (algorithm tensors via :meth:`state_dict`)
-        and ``config.json`` (via :meth:`RLAlgorithmConfig.save_pretrained`).
-        """
-        tensors = {k: v.detach().cpu().contiguous() for k, v in self.state_dict().items()}
-        save_safetensors(tensors, str(save_directory / SAFETENSORS_SINGLE_FILE))
-        self.config._save_pretrained(save_directory)
-
-    @classmethod
-    def from_pretrained(
-        cls: builtins.type[T],
-        pretrained_name_or_path: str | Path,
-        *,
-        policy: nn.Module,
-        config: RLAlgorithmConfig | None = None,
-        force_download: bool = False,
-        resume_download: bool | None = None,
-        proxies: dict | None = None,
-        token: str | bool | None = None,
-        cache_dir: str | Path | None = None,
-        local_files_only: bool = False,
-        revision: str | None = None,
-        device: str | torch.device = "cpu",
-        **algo_kwargs: Any,
-    ) -> T:
-        """Build an algorithm and load its weights from ``pretrained_name_or_path``."""
-        if config is None:
-            config = cls.config_class.from_pretrained(
-                pretrained_name_or_path,
-                force_download=force_download,
-                resume_download=resume_download,
-                proxies=proxies,
-                token=token,
-                cache_dir=cache_dir,
-                local_files_only=local_files_only,
-                revision=revision,
-            )
-        if hasattr(config, "policy_config"):
-            config.policy_config = policy.config
-
-        instance = cls(policy=policy, config=config, **algo_kwargs)
-
-        model_id = str(pretrained_name_or_path)
-        if os.path.isdir(model_id):
-            model_file = os.path.join(model_id, SAFETENSORS_SINGLE_FILE)
-        else:
-            try:
-                model_file = hf_hub_download(
-                    repo_id=model_id,
-                    filename=SAFETENSORS_SINGLE_FILE,
-                    revision=revision,
-                    cache_dir=cache_dir,
-                    force_download=force_download,
-                    proxies=proxies,
-                    resume_download=resume_download,
-                    token=token,
-                    local_files_only=local_files_only,
-                )
-            except HfHubHTTPError as e:
-                raise FileNotFoundError(
-                    f"{SAFETENSORS_SINGLE_FILE} not found on the HuggingFace Hub in {model_id}"
-                ) from e
-
-        tensors = load_safetensors(model_file)
-        instance.load_state_dict(tensors, device=device)
-        return instance
@@ -15,23 +15,14 @@
 from __future__ import annotations

 import abc
-import builtins
-import logging
-import os
 from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, TypeVar
+from typing import TYPE_CHECKING, Any

 import draccus
-from huggingface_hub import hf_hub_download
-from huggingface_hub.constants import CONFIG_NAME
-from huggingface_hub.errors import HfHubHTTPError
+import torch

-from lerobot.utils.hub import HubMixin
-
-T = TypeVar("T", bound="RLAlgorithmConfig")
-
-logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+    from lerobot.rl.algorithms.base import RLAlgorithm


@dataclass
@@ -56,7 +47,7 @@ class TrainingStats:


@dataclass
-class RLAlgorithmConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
+class RLAlgorithmConfig(draccus.ChoiceRegistry, abc.ABC):
    """Registry for algorithm configs."""

    @property
@@ -67,6 +58,14 @@ class RLAlgorithmConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
            raise TypeError(f"Expected string from get_choice_name, got {type(choice_name)}")
        return choice_name

+    @abc.abstractmethod
+    def build_algorithm(self, policy: torch.nn.Module) -> RLAlgorithm:
+        """Construct the :class:`RLAlgorithm` for this config.
+
+        Must be overridden by every registered config subclass.
+        """
+        raise NotImplementedError(f"{type(self).__name__} must implement build_algorithm()")
+
    @classmethod
    @abc.abstractmethod
    def from_policy_config(cls, policy_cfg: Any) -> RLAlgorithmConfig:
@@ -75,64 +74,3 @@ class RLAlgorithmConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
        Must be overridden by every registered config subclass.
        """
        raise NotImplementedError(f"{cls.__name__} must implement from_policy_config()")
-
-    def _save_pretrained(self, save_directory: Path) -> None:
-        """Serialize this config as ``config.json`` inside ``save_directory``."""
-        with open(save_directory / CONFIG_NAME, "w") as f, draccus.config_type("json"):
-            draccus.dump(self, f, indent=4)
-
-    @classmethod
-    def from_pretrained(
-        cls: builtins.type[T],
-        pretrained_name_or_path: str | Path,
-        *,
-        force_download: bool = False,
-        resume_download: bool | None = None,
-        proxies: dict[Any, Any] | None = None,
-        token: str | bool | None = None,
-        cache_dir: str | Path | None = None,
-        local_files_only: bool = False,
-        revision: str | None = None,
-        **algo_kwargs: Any,
-    ) -> T:
-        model_id = str(pretrained_name_or_path)
-        config_file: str | None = None
-        if Path(model_id).is_dir():
-            if CONFIG_NAME in os.listdir(model_id):
-                config_file = os.path.join(model_id, CONFIG_NAME)
-            else:
-                logger.error(f"{CONFIG_NAME} not found in {Path(model_id).resolve()}")
-        else:
-            try:
-                config_file = hf_hub_download(
-                    repo_id=model_id,
-                    filename=CONFIG_NAME,
-                    revision=revision,
-                    cache_dir=cache_dir,
-                    force_download=force_download,
-                    proxies=proxies,
-                    resume_download=resume_download,
-                    token=token,
-                    local_files_only=local_files_only,
-                )
-            except HfHubHTTPError as e:
-                raise FileNotFoundError(
-                    f"{CONFIG_NAME} not found on the HuggingFace Hub in {model_id}"
-                ) from e
-
-        if config_file is None:
-            raise FileNotFoundError(f"{CONFIG_NAME} not found in {model_id}")
-
-        with draccus.config_type("json"):
-            instance = draccus.parse(RLAlgorithmConfig, config_file, args=[])
-
-        if cls is not RLAlgorithmConfig and not isinstance(instance, cls):
-            raise TypeError(
-                f"Config at {model_id} has type '{instance.type}' but was loaded via "
-                f"{cls.__name__}; use the matching subclass or RLAlgorithmConfig.from_pretrained()."
-            )
-
-        for key, value in algo_kwargs.items():
-            if hasattr(instance, key):
-                setattr(instance, key, value)
-        return instance
@@ -16,8 +16,8 @@ from __future__ import annotations

 import torch

-from .base import RLAlgorithm
-from .configs import RLAlgorithmConfig
+from lerobot.rl.algorithms.base import RLAlgorithm
+from lerobot.rl.algorithms.configs import RLAlgorithmConfig


 def make_algorithm_config(algorithm_type: str, **kwargs) -> RLAlgorithmConfig:
@@ -43,57 +43,5 @@ def make_algorithm_config(algorithm_type: str, **kwargs) -> RLAlgorithmConfig:
    return cls(**kwargs)


-def get_algorithm_class(name: str) -> type[RLAlgorithm]:
-    """
-    Retrieves an RL algorithm class by its registered name.
-
-    This function uses dynamic imports to avoid loading all algorithm classes into
-    memory at once, improving startup time and reducing dependencies.
-
-    Args:
-        name: The name of the algorithm. Supported names are "sac".
-
-    Returns:
-        The algorithm class corresponding to the given name.
-
-    Raises:
-        ValueError: If the algorithm name is not recognized.
-    """
-    if name == "sac":
-        from .sac.sac_algorithm import SACAlgorithm
-
-        return SACAlgorithm
-    raise ValueError(
-        f"Algorithm type '{name}' is not available. "
-        f"Known: {list(RLAlgorithmConfig.get_known_choices().keys())}"
-    )
-
-
 def make_algorithm(cfg: RLAlgorithmConfig, policy: torch.nn.Module) -> RLAlgorithm:
-    """
-    Instantiate an RL algorithm.
-
-    This factory function looks up the :class:`RLAlgorithm` subclass that matches
-    ``cfg.type`` and instantiates it with the provided policy. It also enforces
-    that ``cfg.policy_config`` has been populated before construction (this is
-    normally handled by :meth:`TrainRLServerPipelineConfig.validate`).
-
-    Args:
-        cfg: The algorithm configuration. Must have ``policy_config`` set.
-        policy: The policy module the algorithm will train.
-
-    Returns:
-        An instantiated :class:`RLAlgorithm`.
-
-    Raises:
-        ValueError: If ``cfg.policy_config`` is ``None`` or ``cfg.type`` is not
-            registered.
-    """
-    if getattr(cfg, "policy_config", None) is None:
-        raise ValueError(
-            f"{type(cfg).__name__}.policy_config is None. "
-            "It must be populated (typically by TrainRLServerPipelineConfig.validate) "
-            "before calling make_algorithm()."
-        )
-    cls = get_algorithm_class(cfg.type)
-    return cls(policy=policy, config=cfg)
+    return cfg.build_algorithm(policy)
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from .configuration_sac import SACAlgorithmConfig
-from .sac_algorithm import SACAlgorithm
+from lerobot.rl.algorithms.sac.configuration_sac import SACAlgorithmConfig
+from lerobot.rl.algorithms.sac.sac_algorithm import SACAlgorithm

 __all__ = ["SACAlgorithm", "SACAlgorithmConfig"]
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Khalil Meftah	5c444302c1	feat(so_follower): synchronize goal position with present position to prevent positional error during torque re-enablement	2026-04-28 18:40:48 +02:00
Khalil Meftah	c868f874f1	feat(teleop): enhance leader-follower behavior and torque management in SO101 teleoperation	2026-04-28 17:46:06 +02:00
Khalil Meftah	e228f0880f	feat(teleop): add SO100/SO101 leader-follower teleoperation example fix: update import for SO101Leader in so101_leader_follower.py chore: include SO101LeaderFollower in exports	2026-04-28 17:28:15 +02:00
Khalil Meftah	fe2c32d9e7	add so leader arm	2026-04-28 16:53:36 +02:00
Khalil Meftah	6ed80f5a59	Merge remote-tracking branch 'origin/main' into user/khalil-meftah/2026-02-16-rl-stack-refactor # Conflicts: # src/lerobot/policies/__init__.py # src/lerobot/rl/actor.py	2026-04-28 12:04:13 +02:00
Khalil Meftah	ef6b3b5b0f	refactor: simplify docstrings for clarity and conciseness across multiple files	2026-04-28 11:11:02 +02:00
Khalil Meftah	e298474bf3	fix(tests): gate RL tests on the `datasets` extra	2026-04-27 16:53:34 +02:00
Khalil Meftah	577f14337a	refactor(tests): remove grpc import checks from test files for cleaner code	2026-04-27 16:20:13 +02:00
Khalil Meftah	47be90f040	refactor(rl): make RLAlgorithmConfig an abstract base class for better extensibility	2026-04-27 15:59:59 +02:00
Khalil Meftah	47dd65347e	refactor(rl): add type property to RLAlgorithmConfig for better clarity	2026-04-27 15:57:24 +02:00
Khalil Meftah	fd5a788120	refactor(rl): add make_algorithm_config function for RLAlgorithmConfig instantiation	2026-04-27 15:55:16 +02:00
Khalil Meftah	9ce9e01469	refactor(rl): make algorithm a nested config so all SAC hyperparameters are JSON-addressable	2026-04-27 13:39:03 +02:00
Khalil Meftah	21c16a27f0	Revert "perf(observation_processor): add CUDA support for image processing" This reverts commit `38b88c414c`.	2026-04-27 11:52:19 +02:00
Khalil Meftah	b3164543f4	fix(rl): enhance intervention handling in actor and learner (cherry picked from commit `ef8bfffbd7`)	2026-04-27 11:35:21 +02:00
Khalil Meftah	f3993cbbb1	fix(rl): improve action processing for discrete and continuous actions (cherry picked from commit `f887ab3f6a`)	2026-04-27 11:35:20 +02:00
Khalil Meftah	c278cfa026	fix(rl): postprocess action in actor (cherry picked from commit `c2556439e5`)	2026-04-27 11:35:20 +02:00
Khalil Meftah	77d18659b1	fix(rl): mirror gym_manipulator in actor (cherry picked from commit `d2a046dfc5`)	2026-04-27 11:35:19 +02:00
Khalil Meftah	6347edefb1	fix(rl): merge environment and action-processor info in transition processing (cherry picked from commit `30e1886b64`)	2026-04-27 11:35:18 +02:00
Khalil Meftah	eda47eca18	fix(rl): update neutral gripper action (cherry picked from commit `9c9064e5be`)	2026-04-27 11:35:18 +02:00
Khalil Meftah	a64e6f5070	fix(rl): clarify discrete gripper action mapping in GripperVelocityToJoint for SO100 (cherry picked from commit `494f469a2b`)	2026-04-27 11:35:17 +02:00
Khalil Meftah	3def86c2c3	fix(rl): add time limit processor to environment pipeline (cherry picked from commit `cd105f65cb`)	2026-04-27 11:35:17 +02:00
Khalil Meftah	356a64d8c4	fix(rl): correctly wire HIL-SERL gripper penalty through processor pipeline (cherry picked from commit `9c2af818ff`)	2026-04-27 11:35:16 +02:00
Khalil Meftah	38b88c414c	perf(observation_processor): add CUDA support for image processing	2026-04-24 13:36:26 +02:00
Khalil Meftah	1ed32210c7	refactor(rl/sac): consolidate hyperparameter ownership and clean up discrete critic	2026-04-24 13:18:33 +02:00
Khalil Meftah	06255996ea	refactor(policies): rename policies/sac → policies/gaussian_actor	2026-04-23 19:13:18 +02:00
Khalil Meftah	8065bf15c7	fix test for flat dict structure	2026-04-21 12:06:25 +02:00
Khalil Meftah	8191d2d87f	remove unused type alias	2026-04-21 11:56:27 +02:00
Khalil Meftah	6b93f31238	fix docstring	2026-04-21 11:55:17 +02:00
Khalil Meftah	a4c0c9e358	update losses names in tests	2026-04-21 11:53:32 +02:00
Khalil Meftah	a84b0e8132	refactor(sac): decouple algorithm hyperparameters from policy config	2026-04-18 16:40:56 +02:00
Khalil Meftah	2487a6ee6d	perf(rl): use async iterators in OnlineOfflineMixer.get_iterator	2026-04-18 16:02:28 +02:00
Khalil Meftah	72fb0faf62	refactor(sac): simplify optimizer return structure	2026-04-18 15:45:22 +02:00
Khalil Meftah	2c97cb23c8	refactor(rl): update shutdown_event type hints from 'any' to 'Any' for consistency and clarity	2026-04-18 15:39:32 +02:00
Khalil Meftah	87d4c9879c	fix(sac): clarify torch.compile status	2026-04-18 15:19:35 +02:00
Khalil Meftah	e4c1a8472d	fix(config): update vision encoder model name to lerobot/resnet10	2026-04-18 15:15:59 +02:00
Khalil Meftah	d7e25c8326	refactor(rl): expose public API in rl/__init__ and use relative imports in sub-packages	2026-04-16 15:46:34 +02:00
Khalil Meftah	a5ad273b62	fix(tests): skip tests that require grpc if not available	2026-04-15 16:30:20 +02:00
Khalil Meftah	23bece96a4	fix(tests): ensure tensor stats comparison accounts for reshaping in normalization tests	2026-04-15 16:12:08 +02:00
Khalil Meftah	7a1c9e74c3	fix: skip tests that require grpc if not available	2026-04-15 15:18:04 +02:00
Khalil Meftah	c88cf979f1	fix: use string key for IS_INTERVENTION in complementary_info to avoid torch.load serialization error	2026-04-15 11:49:38 +02:00
Khalil Meftah	79a9ebdaa6	fix: add try/finally to control_loop to ensure image writer cleanup on exit	2026-04-14 17:54:35 +02:00
Khalil Meftah	da6e36fd03	Merge remote-tracking branch 'origin/main' into user/khalil-meftah/2026-02-16-rl-stack-refactor	2026-04-14 17:14:56 +02:00
Khalil Meftah	64dc08cb7b	fix: include IS_INTERVENTION in complementary_info sent to learner for offline replay buffer	2026-04-14 16:35:08 +02:00
Khalil Meftah	e6d282108d	Fix: add kwargs in reward classifier __init__()	2026-04-14 11:13:43 +02:00
Khalil Meftah	a8838c081b	perf: remove redundant CPU→GPU→CPU transition move in learner	2026-04-13 19:06:28 +02:00
Khalil Meftah	ee0814ef60	refactor: update SACAlgorithm to pass action_dim to _init_critics and fix encoder reference	2026-04-13 18:31:17 +02:00
Khalil Meftah	7b0bdf2a98	fix: add thread synchronization to ReplayBuffer to prevent race condition between add() and sample()	2026-04-13 18:27:24 +02:00
Khalil Meftah	9422dc98c2	fix: remove leftover normalization calls from reward classifier predict_reward Fixes #2355	2026-04-13 13:30:50 +02:00
Khalil Meftah	11a0b0174f	fix(teleop): keyboard EE teleop not registering special keys and losing intervention state Fixes #2345 Co-authored-by: jpizarrom <jpizarrom@gmail.com>	2026-04-13 12:31:00 +02:00
Khalil Meftah	036b310a97	chore: clarify torch.compile disabled note in SACAlgorithm	2026-04-13 11:49:27 +02:00
Khalil Meftah	e022207c75	refactor: RL stack refactoring — RLAlgorithm, RLTrainer, DataMixer, and SAC restructuring	2026-04-13 11:39:48 +02:00
				`@@ -0,0 +1 @@`
				`../../../../docs/source/policy_sarm_README.md`