fix(robotwin): pin compatible curobo in benchmark image

Merge remote-tracking branch 'origin/feat/robotwin-benchmark' into feat/robotwin-benchmark
Merge branch 'main' into feat/robotwin-benchmark
2026-06-18 16:57:12 +00:00 · 2026-04-21 18:41:16 +02:00 · 2026-04-20 17:31:28 +02:00 · 2026-04-20 17:17:00 +02:00 · 2026-04-20 17:10:53 +02:00 · 2026-04-20 15:33:13 +02:00
122 changed files with 5593 additions and 4421 deletions
@@ -2,11 +2,6 @@

 Short, imperative summary (e.g., "fix(robots): handle None in sensor parser"). See [CONTRIBUTING.md](../CONTRIBUTING.md) for PR conventions.

-## Type / Scope
-
- **Type**: (Bug | Feature | Docs | Performance | Test | CI | Chore)
- **Scope**: (optional — name of module or package affected)
-
 ## Summary / Motivation

 - One-paragraph description of what changes and why.
@@ -19,28 +14,14 @@ Short, imperative summary (e.g., "fix(robots): handle None in sensor parser"). S

 ## What changed

- Short, concrete bullets of the modifications (files/behaviour).
+- Short, concrete bullets explaining the functional changes (how the behavior or output differs now).
 - Short note if this introduces breaking changes and migration steps.

 ## How was this tested (or how to run locally)

- Tests added: list new tests or test files.
+- Tests added: list new tests or test files. `pytest -q tests/ -k <keyword>`
 - Manual checks / dataset runs performed.
- Instructions for the reviewer
-
-Example:
-
- Ran the relevant tests:
-
-  ```bash
-  pytest -q tests/ -k <keyword>
-  ```
-
- Reproduce with a quick example or CLI (if applicable):
-
-  ```bash
-  lerobot-train --some.option=true
-  ```
+- Instructions for the reviewer for reproducing with a quick example or CLI (if applicable)

 ## Checklist (required before merge)

@@ -48,6 +29,7 @@ Example:
 - [ ] All tests pass locally (`pytest`)
 - [ ] Documentation updated
 - [ ] CI is green
+- [ ] Community Review: I have reviewed another contributor's open PR and linked it here: # (insert PR number/link)

 ## Reviewer notes

@@ -83,10 +83,13 @@ jobs:
          cache-binary: false

      - name: Login to Docker Hub
+        if: ${{ env.DOCKERHUB_USERNAME != '' }}
        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
        with:
          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
+        env:
+          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}

      # Build the benchmark-specific image. The Dockerfile separates dep-install
      # from source-copy, so code-only changes skip the slow uv-sync layer
@@ -115,7 +118,7 @@ jobs:
            bash -c "
              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
              lerobot-eval \
-                --policy.path=pepijn223/smolvla_libero \
+                --policy.path=lerobot/smolvla_libero \
                --env.type=libero \
                --env.task=libero_spatial \
                --eval.batch_size=1 \
@@ -144,7 +147,7 @@ jobs:
            --artifacts-dir /tmp/libero-artifacts \
            --env libero \
            --task libero_spatial \
-            --policy pepijn223/smolvla_libero
+            --policy lerobot/smolvla_libero

      - name: Upload Libero rollout video
        if: always()
@@ -238,10 +241,13 @@ jobs:
          cache-binary: false

      - name: Login to Docker Hub
+        if: ${{ env.DOCKERHUB_USERNAME != '' }}
        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
        with:
          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
+        env:
+          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}

      - name: Build MetaWorld benchmark image
        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
@@ -264,7 +270,7 @@ jobs:
            bash -c "
              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
              lerobot-eval \
-                --policy.path=pepijn223/smolvla_metaworld \
+                --policy.path=lerobot/smolvla_metaworld \
                --env.type=metaworld \
                --env.task=metaworld-push-v3 \
                --eval.batch_size=1 \
@@ -293,7 +299,7 @@ jobs:
            --artifacts-dir /tmp/metaworld-artifacts \
            --env metaworld \
            --task metaworld-push-v3 \
-            --policy pepijn223/smolvla_metaworld
+            --policy lerobot/smolvla_metaworld

      - name: Upload MetaWorld rollout video
        if: always()
@@ -310,3 +316,212 @@ jobs:
          name: metaworld-metrics
          path: /tmp/metaworld-artifacts/metrics.json
          if-no-files-found: warn
+
+  # ── ROBOTWIN 2.0 ──────────────────────────────────────────────────────────
+  # Isolated image: full RoboTwin 2.0 stack — SAPIEN, mplib, CuRobo,
+  # pytorch3d, + simulation assets (~4 GB).
+  # Build takes ~20 min on first run; subsequent runs hit the layer cache.
+  # Requires an NVIDIA GPU runner with CUDA 12.1 drivers.
+  robotwin-integration-test:
+    name: RoboTwin 2.0 — build image + 1-episode eval
+    runs-on:
+      group: aws-g6-4xlarge-plus
+    env:
+      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
+      ROBOTWIN_POLICY: lerobot/smolvla_robotwin
+      ROBOTWIN_TASKS: beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch
+
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+          lfs: true
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
+        with:
+          cache-binary: false
+
+      - name: Login to Docker Hub
+        if: ${{ env.DOCKERHUB_USERNAME != '' }}
+        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
+        with:
+          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
+        env:
+          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
+
+      # Build the full-install image: SAPIEN, mplib, CuRobo, pytorch3d +
+      # simulation assets (~4 GB). Layer cache lives in the runner's local
+      # Docker daemon — reused across re-runs on the same machine.
+      - name: Build RoboTwin 2.0 benchmark image
+        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
+        with:
+          context: .
+          file: docker/Dockerfile.benchmark.robotwin
+          push: false
+          load: true
+          tags: lerobot-benchmark-robotwin:ci
+          cache-from: type=local,src=/tmp/.buildx-cache-robotwin
+          cache-to: type=local,dest=/tmp/.buildx-cache-robotwin,mode=max
+
+      - name: Run RoboTwin 2.0 smoke eval (10 tasks, 1 episode each)
+        if: env.HF_USER_TOKEN != ''
+        run: |
+          # Named container (no --rm) so we can docker cp artifacts out.
+          docker run --name robotwin-eval --gpus all \
+            --shm-size=4g \
+            -e HF_HOME=/tmp/hf \
+            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
+            -e ROBOTWIN_POLICY="${ROBOTWIN_POLICY}" \
+            -e ROBOTWIN_TASKS="${ROBOTWIN_TASKS}" \
+            lerobot-benchmark-robotwin:ci \
+            bash -c "
+              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
+              cd /opt/robotwin && lerobot-eval \
+                --policy.path=\"\$ROBOTWIN_POLICY\" \
+                --env.type=robotwin \
+                --env.task=\"\$ROBOTWIN_TASKS\" \
+                --eval.batch_size=1 \
+                --eval.n_episodes=1 \
+                --eval.use_async_envs=false \
+                --policy.device=cuda \
+                '--rename_map={\"observation.images.head_camera\": \"observation.images.camera1\", \"observation.images.left_camera\": \"observation.images.camera2\", \"observation.images.right_camera\": \"observation.images.camera3\"}' \
+                --output_dir=/tmp/eval-artifacts
+              python /lerobot/scripts/ci/extract_task_descriptions.py \
+                --env robotwin \
+                --task \"\$ROBOTWIN_TASKS\" \
+                --output /tmp/eval-artifacts/task_descriptions.json
+            "
+
+      - name: Copy RoboTwin artifacts from container
+        if: always()
+        run: |
+          mkdir -p /tmp/robotwin-artifacts
+          docker cp robotwin-eval:/tmp/eval-artifacts/. /tmp/robotwin-artifacts/ 2>/dev/null || true
+          docker rm -f robotwin-eval || true
+
+      - name: Parse RoboTwin eval metrics
+        if: always()
+        run: |
+          python3 scripts/ci/parse_eval_metrics.py \
+            --artifacts-dir /tmp/robotwin-artifacts \
+            --env robotwin \
+            --task "${ROBOTWIN_TASKS}" \
+            --policy "${ROBOTWIN_POLICY}"
+
+      - name: Upload RoboTwin rollout video
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: robotwin-rollout-video
+          path: /tmp/robotwin-artifacts/videos/
+          if-no-files-found: warn
+
+      - name: Upload RoboTwin eval metrics
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: robotwin-metrics
+          path: /tmp/robotwin-artifacts/metrics.json
+          if-no-files-found: warn
+
+  # ── ROBOCASA365 ──────────────────────────────────────────────────────────
+  # Isolated image: robocasa + robosuite installed manually as editable
+  # clones (no `lerobot[robocasa]` extra — robocasa's setup.py pins
+  # `lerobot==0.3.3`, which would shadow this repo's lerobot).
+  robocasa-integration-test:
+    name: RoboCasa365 — build image + 1-episode eval
+    runs-on:
+      group: aws-g6-4xlarge-plus
+    env:
+      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
+
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+          lfs: true
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
+        with:
+          cache-binary: false
+
+      - name: Login to Docker Hub
+        if: ${{ env.DOCKERHUB_USERNAME != '' }}
+        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
+        with:
+          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
+        env:
+          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
+
+      - name: Build RoboCasa365 benchmark image
+        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
+        with:
+          context: .
+          file: docker/Dockerfile.benchmark.robocasa
+          push: false
+          load: true
+          tags: lerobot-benchmark-robocasa:ci
+
+      - name: Run RoboCasa365 smoke eval (10 atomic tasks, 1 episode each)
+        if: env.HF_USER_TOKEN != ''
+        run: |
+          docker run --name robocasa-eval --gpus all \
+            --shm-size=4g \
+            -e HF_HOME=/tmp/hf \
+            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
+            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
+            -e MUJOCO_GL=egl \
+            lerobot-benchmark-robocasa:ci \
+            bash -c "
+              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
+              lerobot-eval \
+                --policy.path=lerobot/smolvla_robocasa \
+                --env.type=robocasa \
+                --env.task=CloseFridge,OpenCabinet,OpenDrawer,TurnOnMicrowave,TurnOffStove,CloseToasterOvenDoor,SlideDishwasherRack,TurnOnSinkFaucet,NavigateKitchen,TurnOnElectricKettle \
+                --eval.batch_size=1 \
+                --eval.n_episodes=1 \
+                --eval.use_async_envs=false \
+                --policy.device=cuda \
+                '--rename_map={\"observation.images.robot0_agentview_left\": \"observation.images.camera1\", \"observation.images.robot0_eye_in_hand\": \"observation.images.camera2\", \"observation.images.robot0_agentview_right\": \"observation.images.camera3\"}' \
+                --output_dir=/tmp/eval-artifacts
+              python scripts/ci/extract_task_descriptions.py \
+                --env robocasa \
+                --task CloseFridge,OpenCabinet,OpenDrawer,TurnOnMicrowave,TurnOffStove,CloseToasterOvenDoor,SlideDishwasherRack,TurnOnSinkFaucet,NavigateKitchen,TurnOnElectricKettle \
+                --output /tmp/eval-artifacts/task_descriptions.json
+            "
+
+      - name: Copy RoboCasa365 artifacts from container
+        if: always()
+        run: |
+          mkdir -p /tmp/robocasa-artifacts
+          docker cp robocasa-eval:/tmp/eval-artifacts/. /tmp/robocasa-artifacts/ 2>/dev/null || true
+          docker rm -f robocasa-eval || true
+
+      - name: Parse RoboCasa365 eval metrics
+        if: always()
+        run: |
+          python3 scripts/ci/parse_eval_metrics.py \
+            --artifacts-dir /tmp/robocasa-artifacts \
+            --env robocasa \
+            --task atomic_smoke_10 \
+            --policy lerobot/smolvla_robocasa
+
+      - name: Upload RoboCasa365 rollout video
+        if: always()
+        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
+        with:
+          name: robocasa-rollout-video
+          path: /tmp/robocasa-artifacts/videos/
+          if-no-files-found: warn
+
+      - name: Upload RoboCasa365 eval metrics
+        if: always()
+        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
+        with:
+          name: robocasa-metrics
+          path: /tmp/robocasa-artifacts/metrics.json
+          if-no-files-found: warn
@@ -33,7 +33,7 @@ jobs:
      github.event.workflow_run.event == 'pull_request' &&
      github.event.workflow_run.conclusion == 'success' &&
      github.repository == 'huggingface/lerobot'
-    uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@90b4ee2c10b81b5c1a6367c4e6fc9e2fb510a7e3  # main
+    uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@9ad2de8582b56c017cb530c1165116d40433f1c6  # main
    with:
      package_name: lerobot
    secrets:
@@ -217,6 +217,24 @@ jobs:
      - name: Run end-to-end tests
        run: make test-end-to-end

+  slack-notification:
+    name: Slack Notification
+    needs: [cpu-tests, gpu-tests, upgrade-lock]
+    if: always() && needs.upgrade-lock.outputs.changed == 'true'
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    env:
+      CI_SLACK_CHANNEL: ${{ secrets.CI_SLACK_CHANNEL }}
+    steps:
+      - name: Post to a Slack channel
+        uses: huggingface/hf-workflows/.github/actions/post-slack@a88e7fa2eaee28de5a4d6142381b1fb792349b67  # main
+        with:
+          slack_channel: ${{ env.CI_SLACK_CHANNEL }}
+          title: "Results of the latest dependency tests (CPU + GPU)"
+          status: ${{ (needs.cpu-tests.result == 'success' && needs.gpu-tests.result == 'success') && 'success' || 'failure' }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+
  # This job creates or updates a PR with the upgraded lockfile
  open-pr:
    name: Open PR
@@ -78,6 +78,9 @@ Use the templates for required fields and examples.
 - **Issues:** Follow the [ticket template](https://github.com/huggingface/lerobot/blob/main/.github/ISSUE_TEMPLATE/bug-report.yml).
 - **Pull requests:** Rebase on `upstream/main`, use a descriptive branch (don't work on `main`), run `pre-commit` and tests locally, and follow the [PR template](https://github.com/huggingface/lerobot/blob/main/.github/PULL_REQUEST_TEMPLATE.md).

-One member of the LeRobot team will then review your contribution.
+> [!IMPORTANT]
+> Community Review Policy: To help scale our efforts and foster a collaborative environment, we ask contributors to review at least one other person's open PR before their own receives attention. This shared responsibility multiplies our review capacity and helps everyone's code get merged faster!
+
+Once you have submitted your PR and completed a peer review, a member of the LeRobot team will review your contribution.

 Thank you for contributing to LeRobot!
@@ -0,0 +1,71 @@
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Benchmark image for RoboCasa365 integration tests.
+# Extends the nightly GPU image (which already has all extras installed)
+# with the PR's source code and RoboCasa-specific asset setup.
+#
+# Build:  docker build -f docker/Dockerfile.benchmark.robocasa -t lerobot-benchmark-robocasa .
+# Run:    docker run --gpus all --rm lerobot-benchmark-robocasa lerobot-eval ...
+
+FROM huggingface/lerobot-gpu:latest
+
+# Install robocasa + robosuite as editable clones. pip-installing from git
+# omits data files like robocasa/models/assets/box_links/box_links_assets.json
+# (not declared in package_data), which download_kitchen_assets needs at import.
+#
+# `--no-deps` on robocasa is deliberate: its setup.py pins `lerobot==0.3.3`
+# in install_requires, which would shadow the editable lerobot baked into
+# this image. We install robocasa's actual runtime deps explicitly instead.
+# Pinned SHAs for reproducible benchmark runs. Bump when you need an
+# upstream fix; don't rely on `main`/`master` drift.
+ARG ROBOCASA_SHA=56e355ccc64389dfc1b8a61a33b9127b975ba681
+ARG ROBOSUITE_SHA=aaa8b9b214ce8e77e82926d677b4d61d55e577ab
+RUN git clone https://github.com/robocasa/robocasa.git ~/robocasa && \
+    git -C ~/robocasa checkout ${ROBOCASA_SHA} && \
+    git clone https://github.com/ARISE-Initiative/robosuite.git ~/robosuite && \
+    git -C ~/robosuite checkout ${ROBOSUITE_SHA} && \
+    uv pip install --no-cache -e ~/robocasa --no-deps && \
+    uv pip install --no-cache -e ~/robosuite && \
+    uv pip install --no-cache \
+      "numpy==2.2.5" "numba==0.61.2" "scipy==1.15.3" "mujoco==3.3.1" \
+      "pygame==2.6.1" "Pillow==12.2.0" "opencv-python==4.13.0.92" \
+      "pyyaml==6.0.3" "pynput==1.8.1" "tqdm==4.67.3" "termcolor==3.3.0" \
+      "imageio==2.37.3" "h5py==3.16.0" "lxml==6.0.4" "hidapi==0.14.0.post4" \
+      "tianshou==0.4.10" "gymnasium==1.2.3"
+
+# Set up robocasa macros and download kitchen assets. We need:
+#   - tex              : base environment textures
+#   - tex_generative   : AI-generated textures; kitchen fixture XMLs embed
+#                        refs to generative_textures/wall/tex*.png
+#                        unconditionally, so MjModel.from_xml_string fails
+#                        at reset time without them (even if the env is
+#                        constructed with generative_textures=None).
+#   - fixtures_lw      : lightwheel kitchen fixtures (fridge, counters...)
+#   - objs_lw          : lightwheel object meshes (stools, misc props)
+# We skip the objaverse/aigen object packs (~30GB combined) by pairing
+# this with --env.obj_registries=["lightwheel"] on the lerobot side.
+# The download script prompts interactively, so pipe 'y' to auto-accept.
+RUN python -m robocasa.scripts.setup_macros && \
+    yes y | python -m robocasa.scripts.download_kitchen_assets \
+      --type tex tex_generative fixtures_lw objs_lw
+
+# Overlay the PR's source code on top of the nightly image.
+COPY --chown=user_lerobot:user_lerobot . .
+
+# Re-install lerobot editably so the new source (with RoboCasaEnv registration)
+# replaces the stale package baked into the nightly image.
+RUN uv pip install --no-cache --no-deps -e .
+
+CMD ["/bin/bash"]
@@ -0,0 +1,131 @@
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Benchmark image for RoboTwin 2.0 integration tests.
+# Extends the nightly GPU image with the RoboTwin simulator stack:
+#   sapien/mplib/pytorch3d + NVlabs CuRobo + embodiments.zip + objects.zip
+# (~3.96 GB of assets; background_texture.zip ~11 GB skipped for smoke eval).
+#
+# Build: docker build -f docker/Dockerfile.benchmark.robotwin -t lerobot-benchmark-robotwin .
+# Run:   docker run --gpus all --rm lerobot-benchmark-robotwin \
+#            lerobot-eval --env.type=robotwin --env.task=beat_block_hammer ...
+
+FROM huggingface/lerobot-gpu:latest
+
+ENV NVIDIA_DRIVER_CAPABILITIES=all \
+    VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/nvidia_icd.json \
+    ROBOTWIN_ROOT=/opt/robotwin
+
+# The nightly base is CUDA -base (no compiler, no Vulkan loader). CuRobo's
+# `pip install -e .` runs nvcc, and SAPIEN renders via Vulkan — add both.
+USER root
+# Pinned upstream SHA for reproducible benchmark runs. Bump when we need
+# an upstream fix; don't rely on `main` drift.
+ARG ROBOTWIN_SHA=0aeea2d669c0f8516f4d5785f0aa33ba812c14b4
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+         cuda-nvcc-12-4 cuda-cudart-dev-12-4 \
+         libvulkan1 vulkan-tools \
+    && mkdir -p /usr/share/vulkan/icd.d \
+    && echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libGLX_nvidia.so.0","api_version":"1.3.0"}}' \
+       > /usr/share/vulkan/icd.d/nvidia_icd.json \
+    && git clone https://github.com/RoboTwin-Platform/RoboTwin.git ${ROBOTWIN_ROOT} \
+    && git -C ${ROBOTWIN_ROOT} checkout ${ROBOTWIN_SHA} \
+    && chown -R user_lerobot:user_lerobot ${ROBOTWIN_ROOT} \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
+USER user_lerobot
+
+# RoboTwin runtime deps (av is already in the base via [av-dep]).
+RUN uv pip install --no-cache \
+        "sapien==3.0.0b1" "mplib==0.2.1" "transforms3d==0.4.2" "trimesh==4.4.3" \
+        "open3d==0.19.0" "imageio==2.34.2" termcolor zarr pydantic h5py
+
+# pytorch3d has no universal wheel; must be built from source (~10 min, cached).
+RUN uv pip install --no-cache --no-build-isolation \
+        "git+https://github.com/facebookresearch/pytorch3d.git@stable"
+
+# CuRobo — NVlabs motion generator; TORCH_CUDA_ARCH_LIST must be set or the
+# build aborts on an empty arch list. RoboTwin's own installer pins v0.7.8,
+# which still exposes the v1 API (`curobo.types.math`) that RoboTwin imports.
+ARG CUROBO_REF=v0.7.8
+RUN cd ${ROBOTWIN_ROOT}/envs \
+    && git clone --branch ${CUROBO_REF} --depth 1 https://github.com/NVlabs/curobo.git \
+    && cd curobo \
+    && TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9;9.0" \
+       uv pip install -e . --no-build-isolation --no-cache
+
+# Upstream patches (mirror RoboTwin's script/_install.sh).
+# These patches target the exact versions pinned above; re-check when upgrading.
+# mplib==0.2.1: drop a broken `or collide` clause in planner.py.
+#   Safe to remove once mplib > 0.2.1 ships with the fix upstream.
+# sapien==3.0.0b1: fix URDF loader encoding + .srdf extension check.
+#   Safe to remove once sapien > 3.0.0b1 ships with the fix upstream.
+RUN python - <<'EOF'
+import pathlib, re, site
+for d in site.getsitepackages():
+    p = pathlib.Path(d) / "mplib" / "planner.py"
+    if p.exists():
+        p.write_text(re.sub(r"\bor collide\b", "", p.read_text(), count=1))
+        print(f"mplib patch applied: {p}")
+    p = pathlib.Path(d) / "sapien" / "wrapper" / "urdf_loader.py"
+    if p.exists():
+        src = p.read_text().replace(
+            "with open(srdf_path) as f:", 'with open(srdf_path, encoding="utf-8") as f:'
+        ).replace('"srdf"', '".srdf"')
+        p.write_text(src)
+        print(f"sapien patch applied: {p}")
+EOF
+
+# Simulation assets from TianxingChen/RoboTwin2.0: embodiments (~220 MB) +
+# objects (~3.74 GB). background_texture (~11 GB) is intentionally skipped.
+# The dataset is public — no auth token needed.
+RUN python - <<'EOF'
+import os, pathlib, zipfile
+from huggingface_hub import hf_hub_download
+
+assets_dir = pathlib.Path(os.environ["ROBOTWIN_ROOT"]) / "assets"
+assets_dir.mkdir(parents=True, exist_ok=True)
+for fname in ("embodiments.zip", "objects.zip"):
+    local = hf_hub_download(
+        repo_id="TianxingChen/RoboTwin2.0",
+        repo_type="dataset",
+        filename=fname,
+        local_dir=str(assets_dir),
+    )
+    with zipfile.ZipFile(local, "r") as z:
+        z.extractall(str(assets_dir))
+    pathlib.Path(local).unlink()
+EOF
+
+WORKDIR ${ROBOTWIN_ROOT}
+RUN python script/update_embodiment_config_path.py
+
+ENV PYTHONPATH="${ROBOTWIN_ROOT}:${PYTHONPATH}"
+
+# Fail the image build early if the CuRobo/RoboTwin import chain regresses.
+RUN python - <<'EOF'
+from curobo.types.math import Pose
+from envs.robot.planner import CuroboPlanner
+
+print("CuRobo import OK:", Pose.__name__)
+print("RoboTwin planner import OK:", CuroboPlanner.__name__)
+EOF
+
+# Return to the lerobot source directory (set by base image) before overlaying.
+WORKDIR /lerobot
+
+# Overlay the PR's source code on top of the nightly image.
+COPY --chown=user_lerobot:user_lerobot . .
+
+CMD ["/bin/bash"]
@@ -79,6 +79,10 @@
    title: LIBERO
  - local: metaworld
    title: Meta-World
+  - local: robotwin
+    title: RoboTwin 2.0
+  - local: robocasa
+    title: RoboCasa365
  - local: envhub_isaaclab_arena
    title: NVIDIA IsaacLab Arena Environments
  title: "Benchmarks"
@@ -820,10 +820,10 @@ The LeRobot system uses a distributed actor-learner architecture for training. T

 Create a training configuration file (example available [here](https://huggingface.co/datasets/lerobot/config_examples/resolve/main/rl/train_config.json)). The training config is based on the main `TrainRLServerPipelineConfig` class in `lerobot/configs/train.py`.

-1. Configure the policy settings (`type="gaussian_actor"`, `device`, etc.)
+1. Configure the policy settings (`type="sac"`, `device`, etc.)
 2. Set `dataset` to your cropped dataset
 3. Configure environment settings with crop parameters
-4. Check the other parameters related to the Gaussian Actor in [configuration_gaussian_actor.py](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/gaussian_actor/configuration_gaussian_actor.py#L79).
+4. Check the other parameters related to SAC in [configuration_sac.py](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/sac/configuration_sac.py#L79).
 5. Verify that the `policy` config is correct with the right `input_features` and `output_features` for your task.

 **Starting the Learner**
@@ -926,7 +926,7 @@ The ideal behaviour is that your intervention rate should drop gradually during

 Some configuration values have a disproportionate impact on training stability and speed:

- **`temperature_init`** (`algorithm.temperature_init`) – initial entropy temperature in SAC. Higher values encourage more exploration; lower values make the policy more deterministic early on. A good starting point is `1e-2`. We observed that setting it too high can make human interventions ineffective and slow down learning.
+- **`temperature_init`** (`policy.temperature_init`) – initial entropy temperature in SAC. Higher values encourage more exploration; lower values make the policy more deterministic early on. A good starting point is `1e-2`. We observed that setting it too high can make human interventions ineffective and slow down learning.
 - **`policy_parameters_push_frequency`** (`policy.actor_learner_config.policy_parameters_push_frequency`) – interval in _seconds_ between two weight pushes from the learner to the actor. The default is `4 s`. Decrease to **1-2 s** to provide fresher weights (at the cost of more network traffic); increase only if your connection is slow, as this will reduce sample efficiency.
 - **`storage_device`** (`policy.storage_device`) – device on which the learner keeps the policy parameters. If you have spare GPU memory, set this to `"cuda"` (instead of the default `"cpu"`). Keeping the weights on-GPU removes CPU→GPU transfer overhead and can significantly increase the number of learner updates per second.

@@ -32,6 +32,12 @@ Once you’ve gathered enough trajectories, you’ll train a neural network to i

 If you run into any issues at any point, jump into our [Discord community](https://discord.com/invite/s3KuuzsPFb) for support.

+<Tip>
+
+Want to quickly get the right commands for your setup? The [quickstart notebook](https://github.com/huggingface/lerobot/blob/main/examples/notebooks/quickstart.ipynb) [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/lerobot/blob/main/examples/notebooks/quickstart.ipynb) lets you configure your robot once and generates all the commands below ready to paste.
+
+</Tip>
+
 ## Set up and Calibrate

 If you haven't yet set up and calibrated your robot and teleop device, please do so by following the robot-specific tutorial.
@@ -0,0 +1,188 @@
+# RoboCasa365
+
+[RoboCasa365](https://robocasa.ai) is a large-scale simulation framework for training and benchmarking **generalist robots** in everyday kitchen tasks. It ships 365 diverse manipulation tasks across 2,500 kitchen environments, 3,200+ object assets and 600+ hours of human demonstration data, on a PandaOmron 12-DOF mobile manipulator (Franka arm on a holonomic base).
+
+- Paper: [RoboCasa: Large-Scale Simulation of Everyday Tasks for Generalist Robots](https://arxiv.org/abs/2406.02523)
+- GitHub: [robocasa/robocasa](https://github.com/robocasa/robocasa)
+- Project website: [robocasa.ai](https://robocasa.ai)
+- Pretrained policy: [`lerobot/smolvla_robocasa`](https://huggingface.co/lerobot/smolvla_robocasa)
+- Single-task dataset (CloseFridge): [`pepijn223/robocasa_CloseFridge`](https://huggingface.co/datasets/pepijn223/robocasa_CloseFridge)
+
+<img
+  src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/robocasa-banner.webp"
+  alt="RoboCasa365 benchmark overview"
+  width="85%"
+/>
+
+## Available tasks
+
+RoboCasa365 organizes its 365 tasks into two families and three upstream benchmark groups that LeRobot exposes as first-class `--env.task` shortcuts:
+
+| Family    | Tasks | Description                                                                     |
+| --------- | ----- | ------------------------------------------------------------------------------- |
+| Atomic    | ~65   | Single-skill tasks: pick-and-place, door/drawer manipulation, appliance control |
+| Composite | ~300  | Multi-step tasks across 60+ categories: cooking, cleaning, organizing, etc.     |
+
+**Atomic task examples:** `CloseFridge`, `OpenDrawer`, `OpenCabinet`, `TurnOnMicrowave`, `TurnOffStove`, `NavigateKitchen`, `PickPlaceCounterToStove`.
+
+**Composite task categories:** baking, boiling, brewing, chopping, clearing table, defrosting food, loading dishwasher, making tea, microwaving food, washing dishes, and more.
+
+`--env.task` accepts three forms:
+
+- a single task name (`CloseFridge`)
+- a comma-separated list (`CloseFridge,OpenBlenderLid,PickPlaceCoffee`)
+- a benchmark-group shortcut — `atomic_seen`, `composite_seen`, `composite_unseen`, `pretrain50`, `pretrain100`, `pretrain200`, `pretrain300` — which auto-expands to the upstream task list and auto-sets the dataset `split` (`target` or `pretrain`).
+
+## Installation
+
+RoboCasa and its dependency `robosuite` are not published on PyPI, and RoboCasa's own `setup.py` hardcodes `lerobot==0.3.3`, which conflicts with this repo's `lerobot`. LeRobot therefore does **not** expose a `robocasa` extra — install the two packages manually as editable clones (using `--no-deps` on `robocasa` to skip its shadowed `lerobot` pin):
+
+```bash
+# After following the standard LeRobot installation instructions.
+
+git clone https://github.com/robocasa/robocasa.git ~/robocasa
+git clone https://github.com/ARISE-Initiative/robosuite.git ~/robosuite
+pip install -e ~/robocasa --no-deps
+pip install -e ~/robosuite
+
+# Robocasa's runtime deps (the ones its setup.py would have pulled, minus
+# the bad lerobot pin).
+pip install numpy numba scipy mujoco pygame Pillow opencv-python \
+            pyyaml pynput tqdm termcolor imageio h5py lxml hidapi \
+            tianshou gymnasium
+
+python -m robocasa.scripts.setup_macros
+# Lightweight assets (lightwheel object meshes + textures). Enough for
+# the default env out of the box.
+python -m robocasa.scripts.download_kitchen_assets \
+  --type tex tex_generative fixtures_lw objs_lw
+# Optional: full objaverse/aigen registries (~30GB) for richer object
+# variety. Enable at eval time via --env.obj_registries (see below).
+# python -m robocasa.scripts.download_kitchen_assets --type objs_objaverse
+```
+
+<Tip>
+RoboCasa requires MuJoCo. Set the rendering backend before training or evaluation:
+
+```bash
+export MUJOCO_GL=egl  # for headless servers (HPC, cloud)
+```
+
+</Tip>
+
+### Object registries
+
+By default the env samples objects only from the `lightwheel` registry (what `--type objs_lw` ships), which avoids a `Probabilities contain NaN` crash when the objaverse / aigen packs aren't on disk. If you've downloaded the full asset set, enable the full registry at runtime:
+
+```bash
+--env.obj_registries='[objaverse,lightwheel]'
+```
+
+## Evaluation
+
+All eval snippets below mirror the CI command (see `.github/workflows/benchmark_tests.yml`). The `--rename_map` argument maps RoboCasa's native camera keys (`robot0_agentview_left` / `robot0_eye_in_hand` / `robot0_agentview_right`) onto the three-camera (`camera1` / `camera2` / `camera3`) input layout the released `smolvla_robocasa` policy was trained on.
+
+### Single-task evaluation (recommended for quick iteration)
+
+```bash
+lerobot-eval \
+  --policy.path=lerobot/smolvla_robocasa \
+  --env.type=robocasa \
+  --env.task=CloseFridge \
+  --eval.batch_size=1 \
+  --eval.n_episodes=20 \
+  --eval.use_async_envs=false \
+  --policy.device=cuda \
+  '--rename_map={"observation.images.robot0_agentview_left": "observation.images.camera1", "observation.images.robot0_eye_in_hand": "observation.images.camera2", "observation.images.robot0_agentview_right": "observation.images.camera3"}'
+```
+
+### Multi-task evaluation
+
+Pass a comma-separated list of tasks:
+
+```bash
+lerobot-eval \
+  --policy.path=lerobot/smolvla_robocasa \
+  --env.type=robocasa \
+  --env.task=CloseFridge,OpenCabinet,OpenDrawer,TurnOnMicrowave,TurnOffStove \
+  --eval.batch_size=1 \
+  --eval.n_episodes=20 \
+  --eval.use_async_envs=false \
+  --policy.device=cuda \
+  '--rename_map={"observation.images.robot0_agentview_left": "observation.images.camera1", "observation.images.robot0_eye_in_hand": "observation.images.camera2", "observation.images.robot0_agentview_right": "observation.images.camera3"}'
+```
+
+### Benchmark-group evaluation
+
+Run an entire upstream group (e.g. all 18 `atomic_seen` tasks with `split=target`):
+
+```bash
+lerobot-eval \
+  --policy.path=lerobot/smolvla_robocasa \
+  --env.type=robocasa \
+  --env.task=atomic_seen \
+  --eval.batch_size=1 \
+  --eval.n_episodes=20 \
+  --eval.use_async_envs=false \
+  --policy.device=cuda \
+  '--rename_map={"observation.images.robot0_agentview_left": "observation.images.camera1", "observation.images.robot0_eye_in_hand": "observation.images.camera2", "observation.images.robot0_agentview_right": "observation.images.camera3"}'
+```
+
+### Recommended evaluation episodes
+
+**20 episodes per task** for reproducible benchmarking. Matches the protocol used in published results.
+
+## Policy inputs and outputs
+
+**Observations** (raw RoboCasa camera names are preserved verbatim):
+
+- `observation.state` — 16-dim proprioceptive state (base position, base quaternion, relative end-effector position, relative end-effector quaternion, gripper qpos)
+- `observation.images.robot0_agentview_left` — left agent view, 256×256 HWC uint8
+- `observation.images.robot0_eye_in_hand` — wrist camera view, 256×256 HWC uint8
+- `observation.images.robot0_agentview_right` — right agent view, 256×256 HWC uint8
+
+**Actions:**
+
+- Continuous control in `Box(-1, 1, shape=(12,))` — base motion (4D) + control mode (1D) + end-effector position (3D) + end-effector rotation (3D) + gripper (1D).
+
+## Training
+
+### Single-task example
+
+A ready-to-use single-task dataset is on the Hub:
+[`pepijn223/robocasa_CloseFridge`](https://huggingface.co/datasets/pepijn223/robocasa_CloseFridge).
+
+Fine-tune a SmolVLA base on `CloseFridge`:
+
+```bash
+lerobot-train \
+  --policy.type=smolvla \
+  --policy.repo_id=${HF_USER}/smolvla_robocasa_CloseFridge \
+  --policy.load_vlm_weights=true \
+  --policy.push_to_hub=true \
+  --dataset.repo_id=pepijn223/robocasa_CloseFridge \
+  --env.type=robocasa \
+  --env.task=CloseFridge \
+  --output_dir=./outputs/smolvla_robocasa_CloseFridge \
+  --steps=100000 \
+  --batch_size=4 \
+  --eval_freq=5000 \
+  --eval.batch_size=1 \
+  --eval.n_episodes=5 \
+  --save_freq=10000
+```
+
+Evaluate the resulting checkpoint:
+
+```bash
+lerobot-eval \
+  --policy.path=${HF_USER}/smolvla_robocasa_CloseFridge \
+  --env.type=robocasa \
+  --env.task=CloseFridge \
+  --eval.batch_size=1 \
+  --eval.n_episodes=20
+```
+
+## Reproducing published results
+
+The released checkpoint [`lerobot/smolvla_robocasa`](https://huggingface.co/lerobot/smolvla_robocasa) is evaluated with the commands in the [Evaluation](#evaluation) section. CI runs a 10-atomic-task smoke eval (one episode each) on every PR touching the benchmark, picking fixture-centric tasks that don't require the objaverse asset pack.
@@ -0,0 +1,223 @@
+# RoboTwin 2.0
+
+RoboTwin 2.0 is a **large-scale dual-arm manipulation benchmark** built on the SAPIEN physics engine. It provides a standardized evaluation protocol for bimanual robotic policies across 50 tasks (as of upstream `main`) with strong domain randomization (clutter, lighting, background, tabletop height, and language instructions).
+
+- Paper: [RoboTwin 2.0: A Scalable Data Generator and Benchmark with Strong Domain Randomization for Robust Bimanual Robotic Manipulation](https://arxiv.org/abs/2506.18088)
+- GitHub: [RoboTwin-Platform/RoboTwin](https://github.com/RoboTwin-Platform/RoboTwin)
+- Leaderboard: [robotwin-platform.github.io/leaderboard](https://robotwin-platform.github.io/leaderboard)
+- Dataset: [lerobot/robotwin_unified](https://huggingface.co/datasets/lerobot/robotwin_unified)
+
+![RoboTwin 2.0 benchmark overview](https://www.aitntnews.com/pictures/2025/7/8/9a7f79cb-5ba9-11f0-8581-fa163e47d677.png)
+
+## Overview
+
+| Property      | Value                                                    |
+| ------------- | -------------------------------------------------------- |
+| Tasks         | 50 dual-arm manipulation tasks                           |
+| Robot         | Aloha-AgileX bimanual (14 DOF, 7 per arm)                |
+| Action space  | 14-dim joint-space, continuous in `[-1, 1]`              |
+| Cameras       | `head_camera`, `left_camera`, `right_camera`             |
+| Simulator     | SAPIEN (not MuJoCo)                                      |
+| Eval protocol | 100 episodes/task, 50 demo_clean demonstrations          |
+| Eval settings | **Easy** (`demo_clean`) and **Hard** (`demo_randomized`) |
+
+## Available tasks
+
+RoboTwin 2.0 ships 50 dual-arm manipulation tasks in its upstream `envs/` directory. The canonical list is the `ROBOTWIN_TASKS` tuple in `src/lerobot/envs/robotwin.py`, mirrored verbatim from the upstream repo. Example tasks:
+
+| Task                     | CLI name                 | Category          |
+| ------------------------ | ------------------------ | ----------------- |
+| Beat block with hammer   | `beat_block_hammer`      | Tool use          |
+| Click bell / alarm clock | `click_bell`             | Precision press   |
+| Stack blocks (2 / 3)     | `stack_blocks_two/three` | Stacking          |
+| Stack bowls (2 / 3)      | `stack_bowls_two/three`  | Stacking          |
+| Handover block / mic     | `handover_block`         | Bimanual coord.   |
+| Lift pot                 | `lift_pot`               | Bimanual lift     |
+| Shake bottle             | `shake_bottle`           | Continuous motion |
+| Turn switch              | `turn_switch`            | Articulated obj   |
+| Stamp seal               | `stamp_seal`             | Precision place   |
+| Scan object              | `scan_object`            | Mobile manip.     |
+
+Pass a comma-separated list to `--env.task` to run multiple tasks in a single eval sweep.
+
+<Tip warning={true}>
+  `open_laptop` is currently broken upstream (its `check_success()` uses
+  `self.arm_tag`, which is only set inside the scripted-expert `play_once()`
+  path and therefore unavailable during normal policy eval). Avoid it until the
+  upstream bug is fixed, or patch the task to default `self.arm_tag = "left"` in
+  `load_actors()`.
+</Tip>
+
+## Dataset
+
+The RoboTwin 2.0 dataset is available in **LeRobot v3.0 format** on the Hugging Face Hub:
+
+```
+lerobot/robotwin_unified
+```
+
+It contains over 100,000 pre-collected trajectories across all 50 tasks (79.6 GB, Apache 2.0 license). No format conversion is needed — it is already in the correct LeRobot v3.0 schema with video observations and action labels.
+
+You can load it directly with the HF Datasets library:
+
+```python
+from datasets import load_dataset
+
+ds = load_dataset("lerobot/robotwin_unified", split="train")
+```
+
+## Installation
+
+RoboTwin 2.0 requires **Linux** with an NVIDIA GPU (CUDA 12.1 recommended). Installation takes approximately 20 minutes.
+
+### 1. Create a conda environment
+
+```bash
+conda create -n robotwin python=3.10 -y
+conda activate robotwin
+```
+
+### 2. Install LeRobot
+
+```bash
+git clone https://github.com/huggingface/lerobot.git
+cd lerobot
+pip install -e "."
+```
+
+### 3. Install RoboTwin 2.0
+
+```bash
+git clone https://github.com/RoboTwin-Platform/RoboTwin.git
+cd RoboTwin
+bash script/_install.sh
+bash script/_download_assets.sh
+```
+
+The install script handles all Python dependencies including SAPIEN, CuRobo, mplib, and pytorch3d.
+
+<Tip warning={true}>
+If the automated install fails, install manually:
+
+```bash
+pip install -r requirements.txt
+pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable"
+cd envs && git clone https://github.com/NVlabs/curobo.git && cd curobo
+pip install -e . --no-build-isolation
+```
+
+Then apply the required mplib fix: in `mplib/planner.py` line 807, remove `or collide` from the conditional.
+
+</Tip>
+
+### 4. Add RoboTwin to PYTHONPATH
+
+The RoboTwin task modules must be importable by LeRobot. From within the `RoboTwin/` directory:
+
+```bash
+export PYTHONPATH="${PYTHONPATH}:$(pwd)"
+```
+
+Add this to your shell profile to make it permanent.
+
+## Evaluation
+
+### Standard evaluation (recommended)
+
+Evaluate a policy on a single task with the official protocol (100 episodes):
+
+```bash
+lerobot-eval \
+  --policy.path="your-hf-policy-id" \
+  --env.type=robotwin \
+  --env.task=beat_block_hammer \
+  --eval.batch_size=1 \
+  --eval.n_episodes=100
+```
+
+### Single-task quick check
+
+```bash
+lerobot-eval \
+  --policy.path="your-hf-policy-id" \
+  --env.type=robotwin \
+  --env.task=beat_block_hammer \
+  --eval.batch_size=1 \
+  --eval.n_episodes=5
+```
+
+### Multi-task sweep
+
+Evaluate on several tasks in one run:
+
+```bash
+lerobot-eval \
+  --policy.path="your-hf-policy-id" \
+  --env.type=robotwin \
+  --env.task=beat_block_hammer,click_bell,handover_block,stack_blocks_two \
+  --eval.batch_size=1 \
+  --eval.n_episodes=100
+```
+
+### Full benchmark (all 50 tasks)
+
+```bash
+lerobot-eval \
+  --policy.path="your-hf-policy-id" \
+  --env.type=robotwin \
+  --env.task=adjust_bottle,beat_block_hammer,blocks_ranking_rgb,blocks_ranking_size,click_alarmclock,click_bell,dump_bin_bigbin,grab_roller,handover_block,handover_mic,hanging_mug,lift_pot,move_can_pot,move_pillbottle_pad,move_playingcard_away,move_stapler_pad,open_microwave,pick_diverse_bottles,pick_dual_bottles,place_a2b_left,place_a2b_right,place_bread_basket,place_bread_skillet,place_burger_fries,place_can_basket,place_cans_plasticbox,place_container_plate,place_dual_shoes,place_empty_cup,place_fan,place_mouse_pad,place_object_basket,place_object_scale,place_object_stand,place_phone_stand,place_shoe,press_stapler,put_bottles_dustbin,put_object_cabinet,rotate_qrcode,scan_object,shake_bottle,shake_bottle_horizontally,stack_blocks_three,stack_blocks_two,stack_bowls_three,stack_bowls_two,stamp_seal,turn_switch \
+  --eval.batch_size=1 \
+  --eval.n_episodes=100
+```
+
+<Tip>
+  `open_laptop` is intentionally omitted above because of the upstream
+  `self.arm_tag` bug (see the **Available tasks** section). Re-add it once the
+  upstream fix lands.
+</Tip>
+
+## Camera configuration
+
+By default, all three cameras are included:
+
+| Camera key     | Description                    |
+| -------------- | ------------------------------ |
+| `head_camera`  | Torso-mounted overhead view    |
+| `left_camera`  | Left arm wrist-mounted camera  |
+| `right_camera` | Right arm wrist-mounted camera |
+
+To use a subset of cameras, override `--env.camera_names`:
+
+```bash
+lerobot-eval \
+  --policy.path="your-hf-policy-id" \
+  --env.type=robotwin \
+  --env.task=beat_block_hammer \
+  --env.camera_names="head_camera,left_camera" \
+  --eval.batch_size=1 \
+  --eval.n_episodes=10
+```
+
+## Environment config reference
+
+Key parameters for `RoboTwinEnvConfig`:
+
+| Parameter            | Default                                  | Description                        |
+| -------------------- | ---------------------------------------- | ---------------------------------- |
+| `task`               | `"beat_block_hammer"`                    | Comma-separated task name(s)       |
+| `fps`                | `25`                                     | Simulation FPS                     |
+| `episode_length`     | `300`                                    | Max steps per episode              |
+| `obs_type`           | `"pixels_agent_pos"`                     | `"pixels"` or `"pixels_agent_pos"` |
+| `camera_names`       | `"head_camera,left_camera,right_camera"` | Comma-separated active cameras     |
+| `observation_height` | `240`                                    | Camera pixel height                |
+| `observation_width`  | `320`                                    | Camera pixel width                 |
+
+## Leaderboard submission
+
+Results can be submitted to the [RoboTwin 2.0 leaderboard](https://robotwin-platform.github.io/leaderboard). The official protocol requires:
+
+- Training on 50 `demo_clean` demonstrations per task
+- Evaluating 100 episodes per task
+- Reporting success rate separately for **Easy** (`demo_clean`) and **Hard** (`demo_randomized`) settings
+
+For submission instructions, refer to the [RoboTwin 2.0 documentation](https://robotwin-platform.github.io/doc/).
@@ -0,0 +1,342 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 🤗 LeRobot Quickstart\n",
+    "\n",
+    "Calibration → teleoperation → data collection → training → evaluation.\n",
+    "\n",
+    "Install the required dependencies: `pip install -e .[notebook,dataset,training,viz,hardware]`.\n",
+    "\n",
+    "**How to use:**\n",
+    "1. Edit the **Configuration** cell with your settings.\n",
+    "2. Run all cells (`Run All`).\n",
+    "3. Each section prints a ready-to-paste terminal command - copy it and run it.\n",
+    "\n",
+    "Each setup is different, please refer to the [LeRobot documentation](https://huggingface.co/docs/lerobot/il_robots) for more details on each step and available options. <br>\n",
+    "Feel free to make this notebook your own and adapt it to your needs!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "## Utils"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def _cameras_arg(cameras: dict) -> str:\n",
+    "    if not cameras:\n",
+    "        return \"\"\n",
+    "    entries = [f\"{n}: {{{', '.join(f'{k}: {v}' for k, v in cfg.items())}}}\" for n, cfg in cameras.items()]\n",
+    "    return \"{ \" + \", \".join(entries) + \" }\"\n",
+    "\n",
+    "\n",
+    "def print_cmd(*parts: str) -> None:\n",
+    "    \"\"\"Print a shell command with line continuations, skipping empty parts.\"\"\"\n",
+    "    non_empty = [p for p in parts if p]\n",
+    "    print(\" \\\\\\n    \".join(non_empty))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "## Configuration\n",
+    "\n",
+    "Edit this cell, then **Run All** to generate all commands below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Robot (follower) - run `lerobot-find-port` to discover the port\n",
+    "ROBOT_TYPE = \"so101_follower\"\n",
+    "ROBOT_PORT = \"/dev/ttyACM0\"\n",
+    "ROBOT_ID = \"my_follower_arm\"\n",
+    "\n",
+    "# Teleop (leader) - run `lerobot-find-port` to discover the port\n",
+    "TELEOP_TYPE = \"so101_leader\"\n",
+    "TELEOP_PORT = \"/dev/ttyACM1\"\n",
+    "TELEOP_ID = \"my_leader_arm\"\n",
+    "\n",
+    "# Cameras - set to {} to disable\n",
+    "# Run `lerobot-find-cameras opencv` to list available cameras and their indices\n",
+    "CAMERAS = {\n",
+    "    \"top\": {\"type\": \"opencv\", \"index_or_path\": 2, \"width\": 640, \"height\": 480, \"fps\": 30},\n",
+    "    \"wrist\": {\"type\": \"opencv\", \"index_or_path\": 4, \"width\": 640, \"height\": 480, \"fps\": 30},\n",
+    "}\n",
+    "\n",
+    "# Dataset\n",
+    "HF_USER = \"your_hf_username\"  # `huggingface-cli whoami` to find your username\n",
+    "DATASET_NAME = \"my_so101_dataset\"\n",
+    "TASK_DESCRIPTION = \"pick and place the block\"\n",
+    "NUM_EPISODES = 10\n",
+    "\n",
+    "# Training\n",
+    "POLICY_TYPE = \"act\"  # act, diffusion, smolvla, ...\n",
+    "POLICY_DEVICE = \"cuda\"  # cuda / cpu / mps\n",
+    "TRAIN_STEPS = 10_000\n",
+    "SAVE_FREQ = 2_000\n",
+    "OUTPUT_DIR = f\"outputs/train/{DATASET_NAME}\"\n",
+    "\n",
+    "# Inference - Hub repo ID or local checkpoint path\n",
+    "# e.g. set to f\"{OUTPUT_DIR}/checkpoints/last\" to use a local checkpoint\n",
+    "POLICY_PATH = f\"{HF_USER}/{DATASET_NAME}_{POLICY_TYPE}\"\n",
+    "LAST_CHECKPOINT_PATH = f\"{OUTPUT_DIR}/checkpoints/last\"\n",
+    "\n",
+    "# Derived\n",
+    "DATASET_REPO_ID = f\"{HF_USER}/{DATASET_NAME}\"\n",
+    "DATASET_ROOT = f\"data/{DATASET_NAME}\"\n",
+    "POLICY_REPO_ID = f\"{HF_USER}/{DATASET_NAME}_{POLICY_TYPE}\"\n",
+    "EVAL_REPO_ID = f\"{HF_USER}/eval_{DATASET_NAME}\"\n",
+    "CAMERAS_ARG = _cameras_arg(CAMERAS)\n",
+    "CAMERAS_FLAG = f'--robot.cameras=\"{CAMERAS_ARG}\"' if CAMERAS_ARG else \"\"\n",
+    "\n",
+    "print(f\"Robot  : {ROBOT_TYPE} @ {ROBOT_PORT}\")\n",
+    "print(f\"Teleop : {TELEOP_TYPE} @ {TELEOP_PORT}\")\n",
+    "print(f\"Cameras: {list(CAMERAS) or 'none'}\")\n",
+    "print(f\"Dataset: {DATASET_REPO_ID} ({NUM_EPISODES} episodes) saved to {DATASET_ROOT}\")\n",
+    "print(f\"Policy : {POLICY_TYPE} -> {POLICY_REPO_ID}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "## 1. Calibration\n",
+    "\n",
+    "Run once per arm before first use."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Follower\n",
+    "print_cmd(\n",
+    "    \"lerobot-calibrate\",\n",
+    "    f\"--robot.type={ROBOT_TYPE}\",\n",
+    "    f\"--robot.port={ROBOT_PORT}\",\n",
+    "    f\"--robot.id={ROBOT_ID}\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Leader\n",
+    "print_cmd(\n",
+    "    \"lerobot-calibrate\",\n",
+    "    f\"--teleop.type={TELEOP_TYPE}\",\n",
+    "    f\"--teleop.port={TELEOP_PORT}\",\n",
+    "    f\"--teleop.id={TELEOP_ID}\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "## 2. Teleoperation\n",
+    "\n",
+    "See the [teleoperation docs](https://huggingface.co/docs/lerobot/il_robots#teleoperate) and the [cameras guide](https://huggingface.co/docs/lerobot/cameras) for more options."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print_cmd(\n",
+    "    \"lerobot-teleoperate\",\n",
+    "    f\"--robot.type={ROBOT_TYPE}\",\n",
+    "    f\"--robot.port={ROBOT_PORT}\",\n",
+    "    f\"--robot.id={ROBOT_ID}\",\n",
+    "    CAMERAS_FLAG,\n",
+    "    f\"--teleop.type={TELEOP_TYPE}\",\n",
+    "    f\"--teleop.port={TELEOP_PORT}\",\n",
+    "    f\"--teleop.id={TELEOP_ID}\",\n",
+    "    \"--display_data=true\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "## 3. Record Dataset\n",
+    "\n",
+    "See the [recording docs](https://huggingface.co/docs/lerobot/il_robots#record-a-dataset) for tips on gathering good data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print_cmd(\n",
+    "    \"lerobot-record\",\n",
+    "    f\"--robot.type={ROBOT_TYPE}\",\n",
+    "    f\"--robot.port={ROBOT_PORT}\",\n",
+    "    f\"--robot.id={ROBOT_ID}\",\n",
+    "    CAMERAS_FLAG,\n",
+    "    f\"--teleop.type={TELEOP_TYPE}\",\n",
+    "    f\"--teleop.port={TELEOP_PORT}\",\n",
+    "    f\"--teleop.id={TELEOP_ID}\",\n",
+    "    f\"--dataset.repo_id={DATASET_REPO_ID}\",\n",
+    "    f\"--dataset.num_episodes={NUM_EPISODES}\",\n",
+    "    f'--dataset.single_task=\"{TASK_DESCRIPTION}\"',\n",
+    "    \"--dataset.streaming_encoding=true\",\n",
+    "    \"--display_data=true\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Resume a previously interrupted recording session\n",
+    "print_cmd(\n",
+    "    \"lerobot-record\",\n",
+    "    f\"--robot.type={ROBOT_TYPE}\",\n",
+    "    f\"--robot.port={ROBOT_PORT}\",\n",
+    "    f\"--robot.id={ROBOT_ID}\",\n",
+    "    CAMERAS_FLAG,\n",
+    "    f\"--teleop.type={TELEOP_TYPE}\",\n",
+    "    f\"--teleop.port={TELEOP_PORT}\",\n",
+    "    f\"--teleop.id={TELEOP_ID}\",\n",
+    "    f\"--dataset.repo_id={DATASET_REPO_ID}\",\n",
+    "    f\"--dataset.root={DATASET_ROOT}\",\n",
+    "    f\"--dataset.num_episodes={NUM_EPISODES}\",\n",
+    "    f'--dataset.single_task=\"{TASK_DESCRIPTION}\"',\n",
+    "    \"--dataset.streaming_encoding=true\",\n",
+    "    \"--display_data=true\",\n",
+    "    \"--resume=true\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "## 4. Train Policy\n",
+    "\n",
+    "See the [training docs](https://huggingface.co/docs/lerobot/il_robots#train-a-policy) for configuration options and tips."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print_cmd(\n",
+    "    \"lerobot-train\",\n",
+    "    f\"--dataset.repo_id={DATASET_REPO_ID}\",\n",
+    "    f\"--policy.type={POLICY_TYPE}\",\n",
+    "    f\"--policy.device={POLICY_DEVICE}\",\n",
+    "    f\"--policy.repo_id={POLICY_REPO_ID}\",\n",
+    "    f\"--output_dir={OUTPUT_DIR}\",\n",
+    "    f\"--steps={TRAIN_STEPS}\",\n",
+    "    f\"--save_freq={SAVE_FREQ}\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Resume a previously interrupted training session\n",
+    "print_cmd(\n",
+    "    \"lerobot-train\",\n",
+    "    f\"--config_path={LAST_CHECKPOINT_PATH}/pretrained_model/train_config.json\",\n",
+    "    \"--resume=true\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "## 5. Inference\n",
+    "\n",
+    "Uses `POLICY_PATH` from the Configuration cell (defaults to the Hub repo ID). You can also put there the `LAST_CHECKPOINT_PATH`.\n",
+    "\n",
+    "See the [inference docs](https://huggingface.co/docs/lerobot/il_robots#run-inference-and-evaluate-your-policy) for details."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print_cmd(\n",
+    "    \"lerobot-record\",\n",
+    "    f\"--policy.path={POLICY_PATH}\",\n",
+    "    f\"--robot.type={ROBOT_TYPE}\",\n",
+    "    f\"--robot.port={ROBOT_PORT}\",\n",
+    "    f\"--robot.id={ROBOT_ID}\",\n",
+    "    CAMERAS_FLAG,\n",
+    "    f\"--teleop.type={TELEOP_TYPE}\",\n",
+    "    f\"--teleop.port={TELEOP_PORT}\",\n",
+    "    f\"--teleop.id={TELEOP_ID}\",\n",
+    "    f\"--dataset.repo_id={EVAL_REPO_ID}\",\n",
+    "    f\"--dataset.num_episodes={NUM_EPISODES}\",\n",
+    "    f'--dataset.single_task=\"{TASK_DESCRIPTION}\"',\n",
+    "    \"--dataset.streaming_encoding=true\",\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "lerobot (3.12.3)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -1,191 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""SO100 leader / follower teleop with HIL-SERL-style intervention toggle.
-
-Position-only standalone demo of the leader-arm intervention pattern used by
-PR #2596's HIL-SERL training stack (see
-``lerobot.processor.LeaderFollowerProcessor`` and
-``lerobot.teleoperators.so_leader.SO101LeaderFollower``). Compared to the
-verbatim PR #2596 example (which builds the full ``EEReferenceAndDelta`` ->
-``EEBoundsAndSafety`` -> ``GripperVelocityToJoint`` -> ``InverseKinematicsRLStep``
-pipeline), this version computes the EE delta and the IK target inline against
-the follower's *measured* pose every tick. That removes the latched-reference
-feedback loop and produces noticeably smoother haptic teleop.
-
-Behaviour:
-    * **Following mode** (default): the follower is idle, the leader is
-      torque-enabled with low PID gains and haptically tracks the follower.
-      The user can grab the leader at any time without fighting the position
-      loop.
-    * **Intervention mode** (toggled by pressing SPACE): the leader's torque
-      is released, the user moves the leader freely, and the follower mirrors
-      the leader's end-effector position via ``[delta_x, delta_y, delta_z]``
-      deltas, plus a direct gripper passthrough. This matches the action
-      space recorded by ``LeaderFollowerProcessor`` during HIL-SERL recording.
-
-Keyboard:
-    * ``SPACE`` -- toggle intervention on/off.
-    * ``ESC``   -- terminate (treated as failure event).
-    * ``s``     -- mark current intervention as success.
-    * ``r``     -- request re-record of current episode.
-"""
-
-from __future__ import annotations
-
-import time
-
-import numpy as np
-
-from lerobot.model.kinematics import RobotKinematics
-from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
-from lerobot.teleoperators.so_leader import SO101LeaderConfig, SO101LeaderFollower
-from lerobot.teleoperators.utils import TeleopEvents
-from lerobot.utils.robot_utils import precise_sleep
-
-FPS = 30
-
-# Per-axis EE-delta normalisation (metres). The clamped delta is
-# ``clip((p_leader - p_follower) / step, -1, 1) * step``, so a single tick is
-# bounded by ``step`` in metres. Keep small for safe motion.
-EE_STEP_SIZES = {"x": 0.010, "y": 0.010, "z": 0.010}
-
-# Workspace bounds (metres) - tight box around the rest pose to keep the
-# follower from running into joint limits during the demo. Adjust to your
-# workspace.
-EE_BOUNDS = {
-    "min": np.array([-0.20, -0.30, 0.02]),
-    "max": np.array([0.30, 0.30, 0.40]),
-}
-
-# NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo:
-# https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf
-URDF_PATH = "./SO101/so101_new_calib.urdf"
-TARGET_FRAME = "gripper_frame_link"
-
-# Set these to the actual ports on your machine.
-FOLLOWER_PORT = "/dev/usb_follower_arm_a"
-LEADER_PORT = "/dev/usb_leader_arm_a"
-
-
-def _joints_dict_to_array(joints: dict[str, float], motor_names: list[str]) -> np.ndarray:
-    return np.array([joints[f"{m}.pos"] for m in motor_names], dtype=float)
-
-
-def _array_to_joints_dict(arr: np.ndarray, motor_names: list[str]) -> dict[str, float]:
-    return {f"{m}.pos": float(v) for m, v in zip(motor_names, arr, strict=True)}
-
-
-def main() -> None:
-    follower_config = SO100FollowerConfig(port=FOLLOWER_PORT, id="my_follower_arm", use_degrees=True)
-    leader_config = SO101LeaderConfig(
-        port=LEADER_PORT,
-        id="my_leader_arm",
-        use_degrees=True,
-        leader_follower_mode=True,
-        use_gripper=True,
-    )
-
-    follower = SO100Follower(follower_config)
-    leader = SO101LeaderFollower(leader_config)
-
-    follower_motor_names = list(follower.bus.motors.keys())
-    leader_motor_names = list(leader.bus.motors.keys())
-
-    follower_kinematics = RobotKinematics(
-        urdf_path=URDF_PATH, target_frame_name=TARGET_FRAME, joint_names=follower_motor_names
-    )
-    leader_kinematics = RobotKinematics(
-        urdf_path=URDF_PATH, target_frame_name=TARGET_FRAME, joint_names=leader_motor_names
-    )
-
-    follower.connect()
-    leader.connect()
-
-    print("Starting leader-follower intervention demo...")
-    print("  - Press SPACE to toggle intervention.")
-    print("  - Press ESC to terminate, 's' for success, 'r' to re-record.")
-
-    try:
-        while True:
-            t0 = time.perf_counter()
-
-            # 1. Read both arms.
-            follower_obs = follower.get_observation()
-            follower_joints_dict = {f"{m}.pos": float(follower_obs[f"{m}.pos"]) for m in follower_motor_names}
-            leader_joints_dict = leader.get_action()
-
-            # 2. Haptic follow: push follower joints back to the leader. The
-            # leader's ``send_action`` gates motor writes on its intervention
-            # state internally (torque on while following, off while
-            # intervening), so this call is safe in both modes.
-            leader.send_action(follower_joints_dict)
-
-            # 3. Pull teleop events from the leader's keyboard listener.
-            events = leader.get_teleop_events()
-            if events.get(TeleopEvents.TERMINATE_EPISODE):
-                print("Termination requested -- exiting.")
-                break
-
-            is_intervention = events.get(TeleopEvents.IS_INTERVENTION, False)
-
-            if is_intervention:
-                # 4a. INTERVENTION: take normalised position-only delta against
-                # the follower's *measured* pose every tick (no latched
-                # reference, no compounding lag), integrate onto the follower's
-                # current EE pose, clip to the workspace, then IK.
-                leader_arr = _joints_dict_to_array(leader_joints_dict, leader_motor_names)
-                follower_arr = _joints_dict_to_array(follower_joints_dict, follower_motor_names)
-
-                p_leader = leader_kinematics.forward_kinematics(leader_arr)[:3, 3]
-                p_follower_mat = follower_kinematics.forward_kinematics(follower_arr)
-                p_follower = p_follower_mat[:3, 3]
-
-                step_vec = np.array([EE_STEP_SIZES["x"], EE_STEP_SIZES["y"], EE_STEP_SIZES["z"]], dtype=float)
-                raw_delta = p_leader - p_follower
-                delta_norm = np.clip(raw_delta / step_vec, -1.0, 1.0)
-                delta_m = delta_norm * step_vec
-
-                target_pose = p_follower_mat.copy()
-                target_pose[:3, 3] = np.clip(p_follower + delta_m, EE_BOUNDS["min"], EE_BOUNDS["max"])
-
-                # IK -> joint-space goal for the follower's arm chain. Position
-                # only (orientation_weight=0.0) keeps it stable under the
-                # rotation-noise that would otherwise come from leader FK.
-                target_joints = follower_kinematics.inverse_kinematics(
-                    current_joint_pos=follower_arr,
-                    desired_ee_pose=target_pose,
-                    orientation_weight=0.0,
-                )
-                follower_action = _array_to_joints_dict(target_joints, follower_motor_names)
-                # Gripper passthrough: leader gripper position drives follower
-                # gripper directly (no IK).
-                follower_action["gripper.pos"] = float(leader_joints_dict.get("gripper.pos", 50.0))
-                follower.send_action(follower_action)
-
-            # 4b. FOLLOWING: leave the follower alone -- the leader haptically
-            # tracks it via the ``leader.send_action`` call above. In real
-            # HIL-SERL training this is where the policy would step the
-            # follower forward.
-
-            precise_sleep(max(1.0 / FPS - (time.perf_counter() - t0), 0.0))
-    finally:
-        leader.disconnect()
-        follower.disconnect()
-
-
-if __name__ == "__main__":
-    main()
@@ -4,13 +4,13 @@ from pathlib import Path
 from queue import Empty, Full

 import torch
+import torch.optim as optim

 from lerobot.datasets import LeRobotDataset
 from lerobot.envs.configs import HILSerlProcessorConfig, HILSerlRobotEnvConfig
-from lerobot.policies import GaussianActorConfig
-from lerobot.policies.gaussian_actor.modeling_gaussian_actor import GaussianActorPolicy
-from lerobot.policies.gaussian_actor.reward_model.modeling_classifier import Classifier
-from lerobot.rl.algorithms.sac import SACAlgorithm, SACAlgorithmConfig
+from lerobot.policies import SACConfig
+from lerobot.policies.sac.modeling_sac import SACPolicy
+from lerobot.policies.sac.reward_model.modeling_classifier import Classifier
 from lerobot.rl.buffer import ReplayBuffer
 from lerobot.rl.gym_manipulator import make_robot_env
 from lerobot.robots.so_follower import SO100FollowerConfig
@@ -28,7 +28,7 @@ def run_learner(
    transitions_queue: mp.Queue,
    parameters_queue: mp.Queue,
    shutdown_event: mp.Event,
-    policy_learner: GaussianActorPolicy,
+    policy_learner: SACPolicy,
    online_buffer: ReplayBuffer,
    offline_buffer: ReplayBuffer,
    lr: float = 3e-4,
@@ -40,9 +40,8 @@ def run_learner(
    policy_learner.train()
    policy_learner.to(device)

-    algo_config = SACAlgorithmConfig.from_policy_config(policy_learner.config)
-    algorithm = SACAlgorithm(policy=policy_learner, config=algo_config)
-    algorithm.make_optimizers_and_scheduler()
+    # Create Adam optimizer from scratch - simple and clean
+    optimizer = optim.Adam(policy_learner.parameters(), lr=lr)

    print(f"[LEARNER] Online buffer capacity: {online_buffer.capacity}")
    print(f"[LEARNER] Offline buffer capacity: {offline_buffer.capacity}")
@@ -84,26 +83,24 @@ def run_learner(
                else:
                    batch[key] = online_batch[key]

-            def batch_iter(b=batch):
-                while True:
-                    yield b
+            loss, _ = policy_learner.forward(batch)

-            stats = algorithm.update(batch_iter())
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
            training_step += 1

            if training_step % LOG_EVERY == 0:
-                log_dict = stats.to_log_dict()
                print(
-                    f"[LEARNER] Training step {training_step}, "
-                    f"critic_loss: {log_dict.get('critic', 'N/A'):.4f}, "
+                    f"[LEARNER] Training step {training_step}, Loss: {loss.item():.4f}, "
                    f"Buffers: Online={len(online_buffer)}, Offline={len(offline_buffer)}"
                )

            # Send updated parameters to actor every 10 training steps
            if training_step % SEND_EVERY == 0:
                try:
-                    weights = algorithm.get_weights()
-                    parameters_queue.put_nowait(weights)
+                    state_dict = {k: v.cpu() for k, v in policy_learner.state_dict().items()}
+                    parameters_queue.put_nowait(state_dict)
                    print("[LEARNER] Sent updated parameters to actor")
                except Full:
                    # Missing write due to queue not being consumed (should happen rarely)
@@ -116,7 +113,7 @@ def run_actor(
    transitions_queue: mp.Queue,
    parameters_queue: mp.Queue,
    shutdown_event: mp.Event,
-    policy_actor: GaussianActorPolicy,
+    policy_actor: SACPolicy,
    reward_classifier: Classifier,
    env_cfg: HILSerlRobotEnvConfig,
    device: torch.device = "mps",
@@ -147,15 +144,15 @@ def run_actor(

            while step < MAX_STEPS_PER_EPISODE and not shutdown_event.is_set():
                try:
-                    new_weights = parameters_queue.get_nowait()
-                    policy_actor.load_state_dict(new_weights)
+                    new_params = parameters_queue.get_nowait()
+                    policy_actor.load_state_dict(new_params)
                    print("[ACTOR] Updated policy parameters from learner")
                except Empty:  # No new updated parameters available from learner, waiting
                    pass

-                # Get action from policy (returns full action: continuous + discrete)
+                # Get action from policy
                policy_obs = make_policy_obs(obs, device=device)
-                action_tensor = policy_actor.select_action(policy_obs)
+                action_tensor = policy_actor.select_action(policy_obs)  # predicts a single action
                action = action_tensor.squeeze(0).cpu().numpy()

                # Step environment
@@ -264,14 +261,14 @@ def main():
    action_features = hw_to_dataset_features(env.robot.action_features, "action")

    # Create SAC policy for action selection
-    policy_cfg = GaussianActorConfig(
+    policy_cfg = SACConfig(
        device=device,
        input_features=obs_features,
        output_features=action_features,
    )

-    policy_actor = GaussianActorPolicy(policy_cfg)
-    policy_learner = GaussianActorPolicy(policy_cfg)
+    policy_actor = SACPolicy(policy_cfg)
+    policy_learner = SACPolicy(policy_cfg)

    demonstrations_repo_id = "lerobot/example_hil_serl_dataset"
    offline_dataset = LeRobotDataset(repo_id=demonstrations_repo_id)
@@ -108,9 +108,9 @@ training = [
    "wandb>=0.24.0,<0.25.0",
 ]
 hardware = [
-    "pynput>=1.7.8,<1.9.0",
-    "pyserial>=3.5,<4.0",
-    "deepdiff>=7.0.1,<9.0.0",
+    "lerobot[pynput-dep]",
+    "lerobot[pyserial-dep]",
+    "lerobot[deepdiff-dep]",
 ]
 viz = [
    "rerun-sdk>=0.24.0,<0.27.0",
@@ -136,10 +136,14 @@ scipy-dep = ["scipy>=1.14.0,<2.0.0"]
 diffusers-dep = ["diffusers>=0.27.2,<0.36.0"]
 qwen-vl-utils-dep = ["qwen-vl-utils>=0.0.11,<0.1.0"]
 matplotlib-dep = ["matplotlib>=3.10.3,<4.0.0", "contourpy>=1.3.0,<2.0.0"] # NOTE: Explicitly listing contourpy helps the resolver converge faster.
+pyserial-dep = ["pyserial>=3.5,<4.0"]
+deepdiff-dep = ["deepdiff>=7.0.1,<9.0.0"]
+pynput-dep = ["pynput>=1.7.8,<1.9.0"]
+pyzmq-dep = ["pyzmq>=26.2.1,<28.0.0"]

 # Motors
-feetech = ["feetech-servo-sdk>=1.0.0,<2.0.0"]
-dynamixel = ["dynamixel-sdk>=3.7.31,<3.9.0"]
+feetech = ["feetech-servo-sdk>=1.0.0,<2.0.0", "lerobot[pyserial-dep]", "lerobot[deepdiff-dep]"]
+dynamixel = ["dynamixel-sdk>=3.7.31,<3.9.0", "lerobot[pyserial-dep]", "lerobot[deepdiff-dep]"]
 damiao = ["lerobot[can-dep]"]
 robstride = ["lerobot[can-dep]"]

@@ -147,10 +151,11 @@ robstride = ["lerobot[can-dep]"]
 openarms = ["lerobot[damiao]"]
 gamepad = ["lerobot[pygame-dep]", "hidapi>=0.14.0,<0.15.0"]
 hopejr = ["lerobot[feetech]", "lerobot[pygame-dep]"]
-lekiwi = ["lerobot[feetech]", "pyzmq>=26.2.1,<28.0.0"]
+lekiwi = ["lerobot[feetech]", "lerobot[pyzmq-dep]"]
 unitree_g1 = [
    # "unitree-sdk2==1.0.1",
-    "pyzmq>=26.2.1,<28.0.0",
+    "lerobot[pyzmq-dep]",
+    "lerobot[pyserial-dep]",
    "onnxruntime>=1.16.0,<2.0.0",
    "onnx>=1.16.0,<2.0.0",
    "meshcat>=0.3.0,<0.4.0",
@@ -196,7 +201,8 @@ async = ["lerobot[grpcio-dep]", "lerobot[matplotlib-dep]"]
 peft = ["lerobot[transformers-dep]", "lerobot[peft-dep]"]

 # Development
-dev = ["pre-commit>=3.7.0,<5.0.0", "debugpy>=1.8.1,<1.9.0", "lerobot[grpcio-dep]", "grpcio-tools==1.73.1", "mypy>=1.19.1", "ruff>=0.14.1"]
+dev = ["pre-commit>=3.7.0,<5.0.0", "debugpy>=1.8.1,<1.9.0", "lerobot[grpcio-dep]", "grpcio-tools==1.73.1", "mypy>=1.19.1", "ruff>=0.14.1", "lerobot[notebook]"]
+notebook = ["jupyter>=1.0.0,<2.0.0", "ipykernel>=6.0.0,<7.0.0"]
 test = ["pytest>=8.1.0,<9.0.0", "pytest-timeout>=2.4.0,<3.0.0", "pytest-cov>=5.0.0,<8.0.0", "mock-serial>=0.0.1,<0.1.0 ; sys_platform != 'win32'"]
 video_benchmark = ["scikit-image>=0.23.2,<0.26.0", "pandas>=2.2.2,<2.4.0"]

@@ -206,6 +212,11 @@ aloha = ["lerobot[dataset]", "gym-aloha>=0.1.2,<0.2.0", "lerobot[scipy-dep]"]
 pusht = ["lerobot[dataset]", "gym-pusht>=0.1.5,<0.2.0", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk version in gym-pusht instead
 libero = ["lerobot[dataset]", "lerobot[transformers-dep]", "hf-libero>=0.1.3,<0.2.0; sys_platform == 'linux'", "lerobot[scipy-dep]"]
 metaworld = ["lerobot[dataset]", "metaworld==3.0.0", "lerobot[scipy-dep]"]
+# NOTE: robocasa is NOT exposed as a `lerobot` extra. Its setup.py pins
+# `lerobot==0.3.3` in install_requires, which cyclically shadows our own
+# workspace `lerobot` and makes the graph unsolvable under any resolver
+# (uv, pip). Install it manually alongside robosuite — see
+# docs/source/robocasa.mdx for the recipe.

 # All
 all = [
@@ -57,6 +57,41 @@ def _metaworld_descriptions(task_name: str) -> dict[str, str]:
    return {f"{task_name}_0": label}


+def _robotwin_descriptions(task_names: str) -> dict[str, str]:
+    """Return descriptions for each requested RoboTwin task. Reads
+    `description/task_instruction/<task>.json` from the RoboTwin clone
+    (cwd is /opt/robotwin in CI). Falls back to the task name if missing."""
+    out: dict[str, str] = {}
+    root = Path("description/task_instruction")
+    for name in (t.strip() for t in task_names.split(",") if t.strip()):
+        desc_file = root / f"{name}.json"
+        desc = name.replace("_", " ")
+        if desc_file.is_file():
+            data = json.loads(desc_file.read_text())
+            full = data.get("full_description") or desc
+            # Strip the schema placeholders ({A}, {a}) — keep the sentence readable.
+            desc = full.replace("<", "").replace(">", "")
+        out[f"{name}_0"] = desc
+    return out
+
+
+def _robocasa_descriptions(task_spec: str) -> dict[str, str]:
+    """For each task in the comma-separated list, emit a cleaned-name label.
+
+    RoboCasa episodes carry their language instruction in the env's
+    `ep_meta['lang']`, populated per reset. Pulling it requires spinning
+    up the full kitchen env per task (~seconds each); we use the task
+    name as the key here and let the eval's episode info carry the
+    actual instruction.
+    """
+    out: dict[str, str] = {}
+    for task in (t.strip() for t in task_spec.split(",") if t.strip()):
+        # Split CamelCase into words: "CloseFridge" → "close fridge".
+        label = "".join(f" {c.lower()}" if c.isupper() else c for c in task).strip()
+        out[f"{task}_0"] = label or task
+    return out
+
+
 def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--env", required=True, help="Environment family (libero, metaworld, ...)")
@@ -70,6 +105,10 @@ def main() -> int:
            descriptions = _libero_descriptions(args.task)
        elif args.env == "metaworld":
            descriptions = _metaworld_descriptions(args.task)
+        elif args.env == "robotwin":
+            descriptions = _robotwin_descriptions(args.task)
+        elif args.env == "robocasa":
+            descriptions = _robocasa_descriptions(args.task)
        else:
            print(
                f"[extract_task_descriptions] No description extractor for env '{args.env}'.",
@@ -33,7 +33,7 @@ import cv2  # type: ignore  # TODO: add type stubs for OpenCV
 import numpy as np  # type: ignore  # TODO: add type stubs for numpy

 from lerobot.utils.decorators import check_if_not_connected
-from lerobot.utils.import_utils import _reachy2_sdk_available
+from lerobot.utils.import_utils import _reachy2_sdk_available, require_package

 if TYPE_CHECKING or _reachy2_sdk_available:
    from reachy2_sdk.media.camera import CameraView
@@ -76,6 +76,7 @@ class Reachy2Camera(Camera):
        Args:
            config: The configuration settings for the camera.
        """
+        require_package("reachy2_sdk", extra="reachy2")
        super().__init__(config)

        self.config = config
@@ -19,16 +19,18 @@ Provides the RealSenseCamera class for capturing frames from Intel RealSense cam
 import logging
 import time
 from threading import Event, Lock, Thread
-from typing import Any
+from typing import TYPE_CHECKING, Any

 import cv2  # type: ignore  # TODO: add type stubs for OpenCV
 import numpy as np  # type: ignore  # TODO: add type stubs for numpy
 from numpy.typing import NDArray  # type: ignore  # TODO: add type stubs for numpy.typing

-try:
-    import pyrealsense2 as rs  # type: ignore  # TODO: add type stubs for pyrealsense2
-except Exception as e:
-    logging.info(f"Could not import realsense: {e}")
+from lerobot.utils.import_utils import _pyrealsense2_available, require_package
+
+if TYPE_CHECKING or _pyrealsense2_available:
+    import pyrealsense2 as rs
+else:
+    rs = None

 from lerobot.utils.decorators import check_if_already_connected, check_if_not_connected
 from lerobot.utils.errors import DeviceNotConnectedError
@@ -112,7 +114,7 @@ class RealSenseCamera(Camera):
        Args:
            config: The configuration settings for the camera.
        """
-
+        require_package("pyrealsense2", extra="intelrealsense")
        super().__init__(config)

        self.config = config
@@ -28,12 +28,19 @@ import json
 import logging
 import time
 from threading import Event, Lock, Thread
-from typing import Any
+from typing import TYPE_CHECKING, Any

 import cv2
 import numpy as np
 from numpy.typing import NDArray

+from lerobot.utils.import_utils import _zmq_available, require_package
+
+if TYPE_CHECKING or _zmq_available:
+    import zmq
+else:
+    zmq = None
+
 from lerobot.utils.decorators import check_if_already_connected, check_if_not_connected
 from lerobot.utils.errors import DeviceNotConnectedError

@@ -74,8 +81,8 @@ class ZMQCamera(Camera):
    """

    def __init__(self, config: ZMQCameraConfig):
+        require_package("pyzmq", extra="pyzmq-dep", import_name="zmq")
        super().__init__(config)
-        import zmq

        self.config = config
        self.server_address = config.server_address
@@ -117,8 +124,6 @@ class ZMQCamera(Camera):
        logger.info(f"Connecting to {self}...")

        try:
-            import zmq
-
            self.context = zmq.Context()
            self.socket = self.context.socket(zmq.SUB)
            self.socket.setsockopt_string(zmq.SUBSCRIBE, "")
@@ -180,11 +185,8 @@ class ZMQCamera(Camera):

        try:
            message = self.socket.recv_string()
-        except Exception as e:
-            # zmq is lazy-imported in connect(), so check by name to avoid a top-level import
-            if type(e).__name__ == "Again":
-                raise TimeoutError(f"{self} timeout after {self.timeout_ms}ms") from e
-            raise
+        except zmq.Again as e:
+            raise TimeoutError(f"{self} timeout after {self.timeout_ms}ms") from e

        # Decode JSON message
        data = json.loads(message)
@@ -28,6 +28,12 @@ import numpy as np
 import torch

 from lerobot.policies import PreTrainedPolicy, prepare_observation_for_inference
+from lerobot.utils.import_utils import _deepdiff_available, require_package
+
+if TYPE_CHECKING or _deepdiff_available:
+    from deepdiff import DeepDiff
+else:
+    DeepDiff = None

 if TYPE_CHECKING:
    from lerobot.datasets import LeRobotDataset
@@ -217,10 +223,7 @@ def sanity_check_dataset_robot_compatibility(
    Raises:
        ValueError: If any of the checked metadata fields do not match.
    """
-    from lerobot.utils.import_utils import require_package
-
-    require_package("deepdiff", extra="hardware")
-    from deepdiff import DeepDiff
+    require_package("deepdiff", extra="deepdiff-dep")

    from lerobot.utils.constants import DEFAULT_FEATURES

@@ -99,7 +99,6 @@ def save_checkpoint(
        optimizer (Optimizer | None, optional): The optimizer to save the state from. Defaults to None.
        scheduler (LRScheduler | None, optional): The scheduler to save the state from. Defaults to None.
        preprocessor: The preprocessor/pipeline to save. Defaults to None.
-        postprocessor: The postprocessor/pipeline to save. Defaults to None.
    """
    pretrained_dir = checkpoint_dir / PRETRAINED_MODEL_DIR
    policy.save_pretrained(pretrained_dir)
@@ -35,6 +35,9 @@ class DatasetConfig:
    revision: str | None = None
    use_imagenet_stats: bool = True
    video_backend: str = field(default_factory=get_safe_default_codec)
+    # When True, video frames are returned as uint8 tensors (0-255) instead of float32 (0.0-1.0).
+    # This reduces memory and speeds up DataLoader IPC. The training pipeline handles the conversion.
+    return_uint8: bool = False
    streaming: bool = False

    def __post_init__(self) -> None:
@@ -56,6 +56,8 @@ class TrainPipelineConfig(HubMixin):
    # Number of workers for the dataloader.
    num_workers: int = 4
    batch_size: int = 8
+    prefetch_factor: int = 4
+    persistent_workers: bool = True
    steps: int = 100_000
    eval_freq: int = 20_000
    log_freq: int = 200
@@ -207,3 +209,10 @@ class TrainPipelineConfig(HubMixin):
        cli_args = kwargs.pop("cli_args", [])
        with draccus.config_type("json"):
            return draccus.parse(cls, config_file, args=cli_args)
+
+
+@dataclass(kw_only=True)
+class TrainRLServerPipelineConfig(TrainPipelineConfig):
+    # NOTE: In RL, we don't need an offline dataset
+    # TODO: Make `TrainPipelineConfig.dataset` optional
+    dataset: DatasetConfig | None = None  # type: ignore[assignment] # because the parent class has made it's type non-optional
@@ -16,6 +16,7 @@
 """Private reader component for LeRobotDataset. Handles random-access reading (HF dataset, delta indices, video decoding)."""

 from collections.abc import Callable
+from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path

 import datasets
@@ -49,6 +50,7 @@ class DatasetReader:
        video_backend: str,
        delta_timestamps: dict[str, list[float]] | None,
        image_transforms: Callable | None,
+        return_uint8: bool = False,
    ):
        """Initialize the reader with metadata, filtering, and transform config.

@@ -73,6 +75,7 @@ class DatasetReader:
        self._tolerance_s = tolerance_s
        self._video_backend = video_backend
        self._image_transforms = image_transforms
+        self._return_uint8 = return_uint8

        self.hf_dataset: datasets.Dataset | None = None
        self._absolute_to_relative_idx: dict[int, int] | None = None
@@ -105,10 +108,8 @@ class DatasetReader:
        """Build absolute-to-relative index mapping from loaded hf_dataset."""
        self._absolute_to_relative_idx = None
        if self.episodes is not None and self.hf_dataset is not None:
-            self._absolute_to_relative_idx = {
-                abs_idx.item() if isinstance(abs_idx, torch.Tensor) else abs_idx: rel_idx
-                for rel_idx, abs_idx in enumerate(self.hf_dataset["index"])
-            }
+            indices = self.hf_dataset.data.column("index").to_numpy()
+            self._absolute_to_relative_idx = dict(zip(indices.tolist(), range(len(indices)), strict=True))

    @property
    def num_frames(self) -> int:
@@ -235,16 +236,30 @@ class DatasetReader:
        Segmentation Fault.
        """
        ep = self._meta.episodes[ep_idx]
-        item = {}
-        for vid_key, query_ts in query_timestamps.items():
+
+        def _decode_single(vid_key: str, query_ts: list[float]) -> tuple[str, torch.Tensor]:
            from_timestamp = ep[f"videos/{vid_key}/from_timestamp"]
            shifted_query_ts = [from_timestamp + ts for ts in query_ts]
-
            video_path = self.root / self._meta.get_video_file_path(ep_idx, vid_key)
-            frames = decode_video_frames(video_path, shifted_query_ts, self._tolerance_s, self._video_backend)
-            item[vid_key] = frames.squeeze(0)
+            frames = decode_video_frames(
+                video_path,
+                shifted_query_ts,
+                self._tolerance_s,
+                self._video_backend,
+                return_uint8=self._return_uint8,
+            )
+            return vid_key, frames.squeeze(0)

-        return item
+        items = list(query_timestamps.items())
+
+        # Single camera: no threading overhead
+        if len(items) <= 1:
+            return {vid_key: _decode_single(vid_key, query_ts)[1] for vid_key, query_ts in items}
+
+        # Multi-camera: decode in parallel (video decoding releases the GIL)
+        with ThreadPoolExecutor(max_workers=len(items)) as pool:
+            futures = [pool.submit(_decode_single, k, ts) for k, ts in items]
+            return dict(f.result() for f in futures)

    def get_item(self, idx) -> dict:
        """Core __getitem__ logic. Assumes hf_dataset is loaded.
@@ -597,7 +597,7 @@ class DatasetWriter:

    def cleanup_interrupted_episode(self, episode_index: int) -> None:
        """Remove temporary image directories for an interrupted episode."""
-        for key in self._meta.video_keys:
+        for key in self._meta.camera_keys:
            img_dir = self._get_image_file_path(
                episode_index=episode_index, image_key=key, frame_index=0
            ).parent
@@ -92,6 +92,7 @@ def make_dataset(cfg: TrainPipelineConfig) -> LeRobotDataset | MultiLeRobotDatas
                image_transforms=image_transforms,
                revision=cfg.dataset.revision,
                video_backend=cfg.dataset.video_backend,
+                return_uint8=True,
                tolerance_s=cfg.tolerance_s,
            )
        else:
@@ -104,6 +105,7 @@ def make_dataset(cfg: TrainPipelineConfig) -> LeRobotDataset | MultiLeRobotDatas
                revision=cfg.dataset.revision,
                max_num_shards=cfg.num_workers,
                tolerance_s=cfg.tolerance_s,
+                return_uint8=True,
            )
    else:
        raise NotImplementedError("The MultiLeRobotDataset isn't supported for now.")
@@ -30,13 +30,13 @@ def safe_stop_image_writer(func):
    def wrapper(*args, **kwargs):
        try:
            return func(*args, **kwargs)
-        except Exception as e:
+        except BaseException:
            dataset = kwargs.get("dataset")
            writer = getattr(dataset, "writer", None) if dataset else None
            if writer is not None and writer.image_writer is not None:
                logger.warning("Waiting for image writer to terminate...")
                writer.image_writer.stop()
-            raise e
+            raise

    return wrapper

@@ -56,6 +56,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
        force_cache_sync: bool = False,
        download_videos: bool = True,
        video_backend: str | None = None,
+        return_uint8: bool = False,
        batch_encoding_size: int = 1,
        vcodec: str = "libsvtav1",
        streaming_encoding: bool = False,
@@ -202,6 +203,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
        self.tolerance_s = tolerance_s
        self.revision = revision if revision else CODEBASE_VERSION
        self._video_backend = video_backend if video_backend else get_safe_default_codec()
+        self._return_uint8 = return_uint8
        self._batch_encoding_size = batch_encoding_size
        self._vcodec = resolve_vcodec(vcodec)
        self._encoder_threads = encoder_threads
@@ -225,6 +227,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
            video_backend=self._video_backend,
            delta_timestamps=delta_timestamps,
            image_transforms=image_transforms,
+            return_uint8=self._return_uint8,
        )

        # Load actual data
@@ -288,6 +291,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
                video_backend=self._video_backend,
                delta_timestamps=self.delta_timestamps,
                image_transforms=self.image_transforms,
+                return_uint8=self._return_uint8,
            )
        return self.reader

@@ -683,6 +687,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
        obj.delta_timestamps = None
        obj.episodes = None
        obj._video_backend = video_backend if video_backend is not None else get_safe_default_codec()
+        obj._return_uint8 = False
        obj._batch_encoding_size = batch_encoding_size
        obj._vcodec = vcodec
        obj._encoder_threads = encoder_threads
@@ -775,6 +780,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
        obj.delta_timestamps = None
        obj.episodes = None
        obj._video_backend = video_backend if video_backend else get_safe_default_codec()
+        obj._return_uint8 = False
        obj._batch_encoding_size = batch_encoding_size
        obj._vcodec = vcodec
        obj._encoder_threads = encoder_threads
@@ -251,6 +251,7 @@ class StreamingLeRobotDataset(torch.utils.data.IterableDataset):
        seed: int = 42,
        rng: np.random.Generator | None = None,
        shuffle: bool = True,
+        return_uint8: bool = False,
    ):
        """Initialize a StreamingLeRobotDataset.

@@ -288,6 +289,7 @@ class StreamingLeRobotDataset(torch.utils.data.IterableDataset):

        self.streaming = streaming
        self.buffer_size = buffer_size
+        self._return_uint8 = return_uint8

        # We cache the video decoders to avoid re-initializing them at each frame (avoiding a ~10x slowdown)
        self.video_decoder_cache = None
@@ -553,7 +555,11 @@ class StreamingLeRobotDataset(torch.utils.data.IterableDataset):
            root = self.meta.url_root if self.streaming and not self.streaming_from_local else self.root
            video_path = f"{root}/{self.meta.get_video_file_path(ep_idx, video_key)}"
            frames = decode_video_frames_torchcodec(
-                video_path, query_ts, self.tolerance_s, decoder_cache=self.video_decoder_cache
+                video_path,
+                query_ts,
+                self.tolerance_s,
+                decoder_cache=self.video_decoder_cache,
+                return_uint8=self._return_uint8,
            )

            item[video_key] = frames.squeeze(0) if len(query_ts) == 1 else frames
@@ -123,6 +123,7 @@ def decode_video_frames(
    timestamps: list[float],
    tolerance_s: float,
    backend: str | None = None,
+    return_uint8: bool = False,
 ) -> torch.Tensor:
    """
    Decodes video frames using the specified backend.
@@ -131,19 +132,23 @@ def decode_video_frames(
        video_path (Path): Path to the video file.
        timestamps (list[float]): List of timestamps to extract frames.
        tolerance_s (float): Allowed deviation in seconds for frame retrieval.
-        backend (str, optional): Backend to use for decoding. Defaults to "torchcodec" when available in the platform; otherwise, defaults to "pyav"..
+        backend (str, optional): Backend to use for decoding. Defaults to "torchcodec" when available in the platform; otherwise, defaults to "pyav".
+        return_uint8 (bool): If True, return raw uint8 frames without float32 normalization.
+            This reduces memory for DataLoader IPC; normalization can be done on GPU afterward.

    Returns:
-        torch.Tensor: Decoded frames.
+        torch.Tensor: Decoded frames (float32 in [0,1] by default, or uint8 if return_uint8=True).

    Currently supports torchcodec on cpu and pyav.
    """
    if backend is None:
        backend = get_safe_default_codec()
    if backend == "torchcodec":
-        return decode_video_frames_torchcodec(video_path, timestamps, tolerance_s)
+        return decode_video_frames_torchcodec(video_path, timestamps, tolerance_s, return_uint8=return_uint8)
    elif backend in ["pyav", "video_reader"]:
-        return decode_video_frames_torchvision(video_path, timestamps, tolerance_s, backend)
+        return decode_video_frames_torchvision(
+            video_path, timestamps, tolerance_s, backend, return_uint8=return_uint8
+        )
    else:
        raise ValueError(f"Unsupported video backend: {backend}")

@@ -154,6 +159,7 @@ def decode_video_frames_torchvision(
    tolerance_s: float,
    backend: str = "pyav",
    log_loaded_timestamps: bool = False,
+    return_uint8: bool = False,
 ) -> torch.Tensor:
    """Loads frames associated to the requested timestamps of a video

@@ -240,14 +246,17 @@ def decode_video_frames_torchvision(
    if log_loaded_timestamps:
        logger.info(f"{closest_ts=}")

-    # convert to the pytorch format which is float32 in [0,1] range (and channel first)
-    closest_frames = closest_frames.type(torch.float32) / 255
-
    if len(timestamps) != len(closest_frames):
        raise FrameTimestampError(
            f"Number of retrieved frames ({len(closest_frames)}) does not match "
            f"number of queried timestamps ({len(timestamps)})"
        )
+
+    if return_uint8:
+        return closest_frames
+
+    # convert to the pytorch format which is float32 in [0,1] range (and channel first)
+    closest_frames = closest_frames.type(torch.float32) / 255
    return closest_frames


@@ -306,6 +315,7 @@ def decode_video_frames_torchcodec(
    tolerance_s: float,
    log_loaded_timestamps: bool = False,
    decoder_cache: VideoDecoderCache | None = None,
+    return_uint8: bool = False,
 ) -> torch.Tensor:
    """Loads frames associated with the requested timestamps of a video using torchcodec.

@@ -373,14 +383,16 @@ def decode_video_frames_torchcodec(
    if log_loaded_timestamps:
        logger.info(f"{closest_ts=}")

-    # convert to float32 in [0,1] range
-    closest_frames = (closest_frames / 255.0).type(torch.float32)
-
    if not len(timestamps) == len(closest_frames):
        raise FrameTimestampError(
            f"Retrieved timestamps differ from queried {set(closest_frames) - set(timestamps)}"
        )

+    if return_uint8:
+        return closest_frames
+
+    # convert to float32 in [0,1] range
+    closest_frames = (closest_frames / 255.0).type(torch.float32)
    return closest_frames


@@ -299,14 +299,6 @@ class HILSerlProcessorConfig:
    inverse_kinematics: InverseKinematicsConfig | None = None
    reward_classifier: RewardClassifierConfig | None = None
    max_gripper_pos: float | None = 100.0
-    # Only used when ``control_mode == "leader"``. ``LeaderFollowerProcessor``
-    # always builds the PR #2596 **7-D** vector ``[dx,dy,dz,wx,wy,wz,gripper]``.
-    # When ``False`` (default), rotation is **disabled** (components 3–5 are
-    # zeroed); when ``True``, full rotation deltas are used (requires
-    # ``wx/wy/wz`` in ``inverse_kinematics.end_effector_step_sizes``). The
-    # intervention step then turns that into a 4-D policy tensor when rotation
-    # is disabled, matching the gamepad pipeline.
-    use_rotation: bool = False


@EnvConfig.register_subclass(name="gym_manipulator")
@@ -504,6 +496,81 @@ class MetaworldEnv(EnvConfig):
        )


+@EnvConfig.register_subclass("robocasa")
+@dataclass
+class RoboCasaEnv(EnvConfig):
+    task: str = "CloseFridge"
+    fps: int = 20
+    episode_length: int = 1000
+    obs_type: str = "pixels_agent_pos"
+    render_mode: str = "rgb_array"
+    camera_name: str = "robot0_agentview_left,robot0_eye_in_hand,robot0_agentview_right"
+    observation_height: int = 256
+    observation_width: int = 256
+    visualization_height: int = 512
+    visualization_width: int = 512
+    split: str | None = None
+    # Object-mesh registries to sample from. Upstream default is
+    # ("objaverse", "lightwheel"), but objaverse is ~30GB and the CI image
+    # only ships the lightwheel pack. Override to include objaverse once
+    # you've run `python -m robocasa.scripts.download_kitchen_assets
+    # --type objaverse` locally.
+    obj_registries: list[str] = field(default_factory=lambda: ["lightwheel"])
+    features: dict[str, PolicyFeature] = field(
+        default_factory=lambda: {ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(12,))}
+    )
+    features_map: dict[str, str] = field(default_factory=lambda: {ACTION: ACTION, "agent_pos": OBS_STATE})
+
+    def __post_init__(self):
+        if self.obs_type not in ("pixels", "pixels_agent_pos"):
+            raise ValueError(f"Unsupported obs_type: {self.obs_type}")
+
+        # Preserve raw RoboCasa camera names end-to-end (e.g.
+        # `observation.images.robot0_agentview_left`). This matches the
+        # naming convention used by the RoboCasa datasets on the Hub, so
+        # trained policies don't need a `--rename_map` at eval time.
+        cams = [c.strip() for c in self.camera_name.split(",") if c.strip()]
+        for cam in cams:
+            self.features[f"pixels/{cam}"] = PolicyFeature(
+                type=FeatureType.VISUAL,
+                shape=(self.observation_height, self.observation_width, 3),
+            )
+            self.features_map[f"pixels/{cam}"] = f"{OBS_IMAGES}.{cam}"
+
+        if self.obs_type == "pixels_agent_pos":
+            self.features["agent_pos"] = PolicyFeature(type=FeatureType.STATE, shape=(16,))
+
+    @property
+    def gym_kwargs(self) -> dict:
+        kwargs: dict[str, Any] = {
+            "obs_type": self.obs_type,
+            "render_mode": self.render_mode,
+            "observation_height": self.observation_height,
+            "observation_width": self.observation_width,
+            "visualization_height": self.visualization_height,
+            "visualization_width": self.visualization_width,
+        }
+        if self.split is not None:
+            kwargs["split"] = self.split
+        return kwargs
+
+    def create_envs(self, n_envs: int, use_async_envs: bool = False):
+        from .robocasa import create_robocasa_envs
+
+        if self.task is None:
+            raise ValueError("RoboCasaEnv requires a task to be specified")
+        env_cls = _make_vec_env_cls(use_async_envs, n_envs)
+        return create_robocasa_envs(
+            task=self.task,
+            n_envs=n_envs,
+            camera_name=self.camera_name,
+            gym_kwargs=self.gym_kwargs,
+            env_cls=env_cls,
+            episode_length=self.episode_length,
+            obj_registries=tuple(self.obj_registries),
+        )
+
+
@EnvConfig.register_subclass("isaaclab_arena")
@dataclass
 class IsaaclabArenaEnv(HubEnvConfig):
@@ -582,3 +649,90 @@ class IsaaclabArenaEnv(HubEnvConfig):
            ),
            PolicyProcessorPipeline(steps=[]),
        )
+
+
+@EnvConfig.register_subclass("robotwin")
+@dataclass
+class RoboTwinEnvConfig(EnvConfig):
+    """Configuration for RoboTwin 2.0 benchmark environments.
+
+    RoboTwin 2.0 is a dual-arm manipulation benchmark with 50 tasks built on the
+    SAPIEN simulator. The robot is an Aloha-AgileX bimanual platform with 14 DOF
+    (7 per arm). All three cameras are enabled by default.
+
+    See: https://robotwin-platform.github.io
+    Dataset: https://huggingface.co/datasets/lerobot/robotwin_unified
+    """
+
+    task: str = "beat_block_hammer"  # single task or comma-separated list
+    fps: int = 25
+    episode_length: int = 300
+    obs_type: str = "pixels_agent_pos"
+    render_mode: str = "rgb_array"
+    # Available cameras from RoboTwin's aloha-agilex embodiment: head_camera
+    # (torso-mounted) + left_camera / right_camera (wrists).
+    camera_names: str = "head_camera,left_camera,right_camera"
+    # Match the D435 dims in task_config/demo_clean.yml (_camera_config.yml).
+    # Gym's vector-env concatenate pre-allocates buffers of this shape, so it
+    # must equal what SAPIEN actually renders.
+    observation_height: int = 240
+    observation_width: int = 320
+    features: dict[str, PolicyFeature] = field(
+        default_factory=lambda: {
+            ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(14,)),
+        }
+    )
+    features_map: dict[str, str] = field(
+        default_factory=lambda: {
+            ACTION: ACTION,
+            "pixels/head_camera": f"{OBS_IMAGES}.head_camera",
+            "pixels/left_camera": f"{OBS_IMAGES}.left_camera",
+            "pixels/right_camera": f"{OBS_IMAGES}.right_camera",
+            "agent_pos": OBS_STATE,
+        }
+    )
+
+    def __post_init__(self):
+        cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()]
+        for cam in cam_list:
+            self.features[f"pixels/{cam}"] = PolicyFeature(
+                type=FeatureType.VISUAL,
+                shape=(self.observation_height, self.observation_width, 3),
+            )
+            # Keep features_map entry if already set (default_factory); add if missing.
+            key = f"pixels/{cam}"
+            if key not in self.features_map:
+                self.features_map[key] = f"{OBS_IMAGES}.{cam}"
+
+        if self.obs_type == "pixels_agent_pos":
+            self.features["agent_pos"] = PolicyFeature(
+                type=FeatureType.STATE,
+                shape=(14,),  # 14 DOF: 7 per arm
+            )
+        elif self.obs_type != "pixels":
+            raise ValueError(
+                f"Unsupported obs_type '{self.obs_type}'. "
+                "RoboTwinEnvConfig supports 'pixels' and 'pixels_agent_pos'."
+            )
+
+    @property
+    def gym_kwargs(self) -> dict:
+        return {}
+
+    def create_envs(self, n_envs: int, use_async_envs: bool = True):
+        from lerobot.envs.robotwin import create_robotwin_envs
+
+        if not self.task:
+            raise ValueError("RoboTwinEnvConfig requires `task` to be specified.")
+
+        env_cls = _make_vec_env_cls(use_async_envs, n_envs)
+        cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()]
+        return create_robotwin_envs(
+            task=self.task,
+            n_envs=n_envs,
+            env_cls=env_cls,
+            camera_names=cam_list,
+            observation_height=self.observation_height,
+            observation_width=self.observation_width,
+            episode_length=self.episode_length,
+        )
@@ -31,20 +31,7 @@ from libero.libero.envs import OffScreenRenderEnv

 from lerobot.types import RobotObservation

-from .utils import _LazyAsyncVectorEnv
-
-
-def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
-    """Normalize camera_name into a non-empty list of strings."""
-    if isinstance(camera_name, str):
-        cams = [c.strip() for c in camera_name.split(",") if c.strip()]
-    elif isinstance(camera_name, (list | tuple)):
-        cams = [str(c).strip() for c in camera_name if str(c).strip()]
-    else:
-        raise TypeError(f"camera_name must be str or sequence[str], got {type(camera_name).__name__}")
-    if not cams:
-        raise ValueError("camera_name resolved to an empty list.")
-    return cams
+from .utils import _LazyAsyncVectorEnv, parse_camera_names


 def _get_suite(name: str) -> benchmark.Benchmark:
@@ -128,7 +115,7 @@ class LiberoEnv(gym.Env):
        self.visualization_width = visualization_width
        self.visualization_height = visualization_height
        self.init_states = init_states
-        self.camera_name = _parse_camera_names(
+        self.camera_name = parse_camera_names(
            camera_name
        )  # agentview_image (main) or robot0_eye_in_hand_image (wrist)

@@ -437,7 +424,7 @@ def create_libero_envs(
    gym_kwargs = dict(gym_kwargs or {})
    task_ids_filter = gym_kwargs.pop("task_ids", None)  # optional: limit to specific tasks

-    camera_names = _parse_camera_names(camera_name)
+    camera_names = parse_camera_names(camera_name)
    suite_names = [s.strip() for s in str(task).split(",") if s.strip()]
    if not suite_names:
        raise ValueError("`task` must contain at least one LIBERO suite name.")
@@ -462,6 +449,7 @@ def create_libero_envs(
        # Probe once and reuse to avoid creating a temp env per task.
        cached_obs_space: spaces.Space | None = None
        cached_act_space: spaces.Space | None = None
+        cached_metadata: dict[str, Any] | None = None

        for tid in selected:
            fns = _make_env_fns(
@@ -477,10 +465,11 @@ def create_libero_envs(
                camera_name_mapping=camera_name_mapping,
            )
            if is_async:
-                lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space)
+                lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space, cached_metadata)
                if cached_obs_space is None:
                    cached_obs_space = lazy.observation_space
                    cached_act_space = lazy.action_space
+                    cached_metadata = lazy.metadata
                out[suite_name][tid] = lazy
            else:
                out[suite_name][tid] = env_cls(fns)
@@ -311,6 +311,7 @@ def create_metaworld_envs(
    is_async = env_cls is gym.vector.AsyncVectorEnv
    cached_obs_space = None
    cached_act_space = None
+    cached_metadata = None
    out: dict[str, dict[int, Any]] = defaultdict(dict)

    for group in task_groups:
@@ -324,10 +325,11 @@ def create_metaworld_envs(
            fns = [(lambda tn=task_name: MetaworldEnv(task=tn, **gym_kwargs)) for _ in range(n_envs)]

            if is_async:
-                lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space)
+                lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space, cached_metadata)
                if cached_obs_space is None:
                    cached_obs_space = lazy.observation_space
                    cached_act_space = lazy.action_space
+                    cached_metadata = lazy.metadata
                out[group][tid] = lazy
            else:
                out[group][tid] = env_cls(fns)
@@ -0,0 +1,425 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import logging
+from collections import defaultdict
+from collections.abc import Callable, Sequence
+from functools import partial
+from typing import Any
+
+import gymnasium as gym
+import numpy as np
+from gymnasium import spaces
+
+from lerobot.types import RobotObservation
+
+from .utils import _LazyAsyncVectorEnv, parse_camera_names
+
+logger = logging.getLogger(__name__)
+
+# Dimensions for the flat action/state vectors used by the LeRobot wrapper.
+# These correspond to the PandaOmron robot in RoboCasa365.
+OBS_STATE_DIM = 16  # base_pos(3) + base_quat(4) + ee_pos_rel(3) + ee_quat_rel(4) + gripper_qpos(2)
+ACTION_DIM = 12  # base_motion(4) + control_mode(1) + ee_pos(3) + ee_rot(3) + gripper(1)
+ACTION_LOW = -1.0
+ACTION_HIGH = 1.0
+
+# Default PandaOmron cameras. We surface these raw names directly as
+# `observation.images.<name>` so the LeRobot dataset/policy keys match
+# RoboCasa's native convention (no implicit renaming).
+DEFAULT_CAMERAS = [
+    "robot0_agentview_left",
+    "robot0_eye_in_hand",
+    "robot0_agentview_right",
+]
+
+# Object-mesh registries to sample from. RoboCasa's upstream default is
+# ("objaverse", "lightwheel"), but the objaverse pack is huge (~30GB) and
+# most users — including our CI image — only download the lightwheel pack
+# (`--type objs_lw` in `download_kitchen_assets`). When a sampled object
+# category has zero candidates in every registry, robocasa crashes with
+# `ValueError: Probabilities contain NaN` (0/0 divide in the probability
+# normalization). Restricting to registries that are actually on disk
+# avoids the NaN and matches what the asset download provides.
+DEFAULT_OBJ_REGISTRIES: tuple[str, ...] = ("lightwheel",)
+
+# Task-group shortcuts accepted as `--env.task`. When the user passes one of
+# these names, we expand it to the upstream RoboCasa task list and auto-set
+# the dataset split. Individual task names (optionally comma-separated) still
+# take precedence; this only triggers on an exact group-name match.
+_TASK_GROUP_SPLITS = {
+    "atomic_seen": "target",
+    "composite_seen": "target",
+    "composite_unseen": "target",
+    "pretrain50": "pretrain",
+    "pretrain100": "pretrain",
+    "pretrain200": "pretrain",
+    "pretrain300": "pretrain",
+}
+
+
+def _resolve_tasks(task: str) -> tuple[list[str], str | None]:
+    """Resolve a `--env.task` value to (task_names, split_override).
+
+    If `task` is a known task-group name (e.g. `atomic_seen`, `pretrain100`),
+    expand it via `robocasa.utils.dataset_registry.{TARGET,PRETRAINING}_TASKS`
+    and return the matching split. Otherwise treat `task` as a single task or
+    comma-separated list and leave the split untouched (None).
+    """
+    key = task.strip()
+    if key in _TASK_GROUP_SPLITS:
+        from robocasa.utils.dataset_registry import PRETRAINING_TASKS, TARGET_TASKS
+
+        combined = {**TARGET_TASKS, **PRETRAINING_TASKS}
+        if key not in combined:
+            raise ValueError(
+                f"Task group '{key}' is not available in this version of robocasa. "
+                f"Known groups: {sorted(combined.keys())}."
+            )
+        return list(combined[key]), _TASK_GROUP_SPLITS[key]
+
+    names = [t.strip() for t in task.split(",") if t.strip()]
+    if not names:
+        raise ValueError("`task` must contain at least one RoboCasa task name.")
+    return names, None
+
+
+def convert_action(flat_action: np.ndarray) -> dict[str, Any]:
+    """Split a flat (12,) action vector into a RoboCasa action dict.
+
+    Layout: base_motion(4) + control_mode(1) + ee_pos(3) + ee_rot(3) + gripper(1)
+    """
+    return {
+        "action.base_motion": flat_action[0:4],
+        "action.control_mode": flat_action[4:5],
+        "action.end_effector_position": flat_action[5:8],
+        "action.end_effector_rotation": flat_action[8:11],
+        "action.gripper_close": flat_action[11:12],
+    }
+
+
+class RoboCasaEnv(gym.Env):
+    """LeRobot gym.Env wrapper for RoboCasa365 kitchen environments.
+
+    Wraps RoboCasaGymEnv from the robocasa package and converts its
+    dict-based observations and actions into the flat arrays LeRobot expects.
+    Raw RoboCasa camera names are preserved verbatim under `pixels/<cam>`.
+    """
+
+    metadata = {"render_modes": ["rgb_array"], "render_fps": 20}
+
+    def __init__(
+        self,
+        task: str,
+        camera_name: str | Sequence[str] = ",".join(DEFAULT_CAMERAS),
+        obs_type: str = "pixels_agent_pos",
+        render_mode: str = "rgb_array",
+        observation_width: int = 256,
+        observation_height: int = 256,
+        visualization_width: int = 512,
+        visualization_height: int = 512,
+        split: str | None = None,
+        episode_length: int | None = None,
+        obj_registries: Sequence[str] = DEFAULT_OBJ_REGISTRIES,
+        episode_index: int = 0,
+    ):
+        super().__init__()
+        self.task = task
+        self.obs_type = obs_type
+        self.render_mode = render_mode
+        self.observation_width = observation_width
+        self.observation_height = observation_height
+        self.visualization_width = visualization_width
+        self.visualization_height = visualization_height
+        self.split = split
+        self.obj_registries = tuple(obj_registries)
+        # Per-worker index (0..n_envs-1) used to spread the user-provided
+        # seed across factories so each sub-env explores a distinct layout
+        # even when the same seed is passed to `reset()`.
+        self.episode_index = int(episode_index)
+
+        self.camera_name = parse_camera_names(camera_name)
+
+        self._max_episode_steps = episode_length if episode_length is not None else 1000
+
+        # Deferred — created on first reset() inside the worker subprocess
+        # to avoid inheriting stale GPU/EGL contexts across fork().
+        self._env: Any = None
+        self.task_description = ""
+
+        images = {
+            cam: spaces.Box(
+                low=0,
+                high=255,
+                shape=(self.observation_height, self.observation_width, 3),
+                dtype=np.uint8,
+            )
+            for cam in self.camera_name
+        }
+
+        if self.obs_type == "pixels":
+            self.observation_space = spaces.Dict({"pixels": spaces.Dict(images)})
+        elif self.obs_type == "pixels_agent_pos":
+            self.observation_space = spaces.Dict(
+                {
+                    "pixels": spaces.Dict(images),
+                    "agent_pos": spaces.Box(
+                        low=-np.inf,
+                        high=np.inf,
+                        shape=(OBS_STATE_DIM,),
+                        dtype=np.float32,
+                    ),
+                }
+            )
+        else:
+            raise ValueError(f"Unsupported obs_type '{self.obs_type}'. Use 'pixels' or 'pixels_agent_pos'.")
+
+        self.action_space = spaces.Box(
+            low=ACTION_LOW,
+            high=ACTION_HIGH,
+            shape=(ACTION_DIM,),
+            dtype=np.float32,
+        )
+
+    def _ensure_env(self) -> None:
+        """Create the underlying RoboCasaGymEnv on first use.
+
+        Called inside the worker subprocess after fork(), so each worker gets
+        its own clean rendering context rather than inheriting a stale one from
+        the parent process (which causes crashes with AsyncVectorEnv).
+        """
+        if self._env is not None:
+            return
+        from robocasa.wrappers.gym_wrapper import RoboCasaGymEnv
+
+        # RoboCasaGymEnv defaults split="test", which create_env rejects
+        # (only None/"all"/"pretrain"/"target" are valid). Always pass a
+        # valid value so we don't hit that default. Extra kwargs are
+        # forwarded to the underlying kitchen env via create_env/robosuite.make.
+        self._env = RoboCasaGymEnv(
+            env_name=self.task,
+            camera_widths=self.observation_width,
+            camera_heights=self.observation_height,
+            split=self.split if self.split is not None else "all",
+            obj_registries=self.obj_registries,
+        )
+
+        ep_meta = self._env.env.get_ep_meta()
+        self.task_description = ep_meta.get("lang", self.task)
+
+    def _format_raw_obs(self, raw_obs: dict) -> RobotObservation:
+        """Convert RoboCasaGymEnv observation dict to LeRobot format."""
+        # RoboCasaGymEnv emits camera frames under "video.<cam>".
+        images = {cam: raw_obs[f"video.{cam}"] for cam in self.camera_name if f"video.{cam}" in raw_obs}
+
+        if self.obs_type == "pixels":
+            return {"pixels": images}
+
+        # `state.*` keys come from PandaOmronKeyConverter inside the wrapper.
+        agent_pos = np.concatenate(
+            [
+                raw_obs.get("state.base_position", np.zeros(3)),
+                raw_obs.get("state.base_rotation", np.zeros(4)),
+                raw_obs.get("state.end_effector_position_relative", np.zeros(3)),
+                raw_obs.get("state.end_effector_rotation_relative", np.zeros(4)),
+                raw_obs.get("state.gripper_qpos", np.zeros(2)),
+            ],
+            axis=-1,
+        ).astype(np.float32)
+
+        return {"pixels": images, "agent_pos": agent_pos}
+
+    def render(self) -> np.ndarray:
+        self._ensure_env()
+        assert self._env is not None
+        return self._env.render()
+
+    def reset(self, seed=None, **kwargs):
+        self._ensure_env()
+        assert self._env is not None
+        super().reset(seed=seed)
+        # Spread the seed across workers so n_envs factories don't all
+        # roll the same scene. With an explicit user seed we shift it by
+        # episode_index; with no seed we fall back to episode_index so
+        # each worker is still distinct rather than inheriting the same
+        # global RNG state.
+        worker_seed = seed + self.episode_index if seed is not None else self.episode_index
+        raw_obs, info = self._env.reset(seed=worker_seed)
+
+        ep_meta = self._env.env.get_ep_meta()
+        self.task_description = ep_meta.get("lang", self.task)
+
+        observation = self._format_raw_obs(raw_obs)
+        info = {"is_success": False}
+        return observation, info
+
+    def step(self, action: np.ndarray) -> tuple[RobotObservation, float, bool, bool, dict[str, Any]]:
+        self._ensure_env()
+        assert self._env is not None
+        if action.ndim != 1:
+            raise ValueError(
+                f"Expected action to be 1-D (shape (action_dim,)), "
+                f"but got shape {action.shape} with ndim={action.ndim}"
+            )
+
+        action_dict = convert_action(action)
+        raw_obs, reward, done, truncated, info = self._env.step(action_dict)
+
+        is_success = bool(info.get("success", False))
+        terminated = done or is_success
+        info.update({"task": self.task, "done": done, "is_success": is_success})
+
+        observation = self._format_raw_obs(raw_obs)
+        if terminated:
+            info["final_info"] = {
+                "task": self.task,
+                "done": bool(done),
+                "is_success": bool(is_success),
+            }
+            self.reset()
+
+        return observation, reward, terminated, truncated, info
+
+    def close(self):
+        if self._env is not None:
+            self._env.close()
+
+
+def _make_env_fns(
+    *,
+    task: str,
+    n_envs: int,
+    camera_names: list[str],
+    obs_type: str,
+    render_mode: str,
+    observation_width: int,
+    observation_height: int,
+    visualization_width: int,
+    visualization_height: int,
+    split: str | None,
+    episode_length: int | None,
+    obj_registries: Sequence[str],
+) -> list[Callable[[], RoboCasaEnv]]:
+    """Build n_envs factory callables for a single task.
+
+    Each factory carries a distinct ``episode_index`` (``0..n_envs-1``) so
+    ``RoboCasaEnv.reset()`` can derive a per-worker seed series from the
+    user-provided seed.
+    """
+
+    def _make_env(episode_index: int) -> RoboCasaEnv:
+        return RoboCasaEnv(
+            task=task,
+            camera_name=camera_names,
+            obs_type=obs_type,
+            render_mode=render_mode,
+            observation_width=observation_width,
+            observation_height=observation_height,
+            visualization_width=visualization_width,
+            visualization_height=visualization_height,
+            split=split,
+            episode_length=episode_length,
+            obj_registries=obj_registries,
+            episode_index=episode_index,
+        )
+
+    return [partial(_make_env, i) for i in range(n_envs)]
+
+
+def create_robocasa_envs(
+    task: str,
+    n_envs: int,
+    gym_kwargs: dict[str, Any] | None = None,
+    camera_name: str | Sequence[str] = ",".join(DEFAULT_CAMERAS),
+    env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None,
+    episode_length: int | None = None,
+    obj_registries: Sequence[str] = DEFAULT_OBJ_REGISTRIES,
+) -> dict[str, dict[int, Any]]:
+    """Create vectorized RoboCasa365 environments with a consistent return shape.
+
+    Returns:
+        dict[task_name][task_id] -> vec_env (env_cls([...]) with exactly n_envs factories)
+
+    `task` can be:
+      - a single task name (e.g. `CloseFridge`)
+      - a comma-separated list of task names (e.g. `CloseFridge,PickPlaceCoffee`)
+      - a benchmark-group shortcut (`atomic_seen`, `composite_seen`,
+        `composite_unseen`, `pretrain50`, `pretrain100`, `pretrain200`,
+        `pretrain300`), which auto-expands to the upstream task list and
+        auto-sets the dataset `split` ("target" or "pretrain").
+    """
+    if env_cls is None or not callable(env_cls):
+        raise ValueError("env_cls must be a callable that wraps a list of environment factory callables.")
+    if not isinstance(n_envs, int) or n_envs <= 0:
+        raise ValueError(f"n_envs must be a positive int; got {n_envs}.")
+
+    gym_kwargs = dict(gym_kwargs or {})
+    obs_type = gym_kwargs.pop("obs_type", "pixels_agent_pos")
+    render_mode = gym_kwargs.pop("render_mode", "rgb_array")
+    observation_width = gym_kwargs.pop("observation_width", 256)
+    observation_height = gym_kwargs.pop("observation_height", 256)
+    visualization_width = gym_kwargs.pop("visualization_width", 512)
+    visualization_height = gym_kwargs.pop("visualization_height", 512)
+    split = gym_kwargs.pop("split", None)
+
+    camera_names = parse_camera_names(camera_name)
+    task_names, group_split = _resolve_tasks(str(task))
+    if group_split is not None and split is None:
+        split = group_split
+
+    logger.info(
+        "Creating RoboCasa envs | tasks=%s | split=%s | n_envs(per task)=%d",
+        task_names,
+        split,
+        n_envs,
+    )
+
+    is_async = env_cls is gym.vector.AsyncVectorEnv
+
+    cached_obs_space: spaces.Space | None = None
+    cached_act_space: spaces.Space | None = None
+    cached_metadata: dict[str, Any] | None = None
+    out: dict[str, dict[int, Any]] = defaultdict(dict)
+
+    for task_name in task_names:
+        fns = _make_env_fns(
+            task=task_name,
+            n_envs=n_envs,
+            camera_names=camera_names,
+            obs_type=obs_type,
+            render_mode=render_mode,
+            observation_width=observation_width,
+            observation_height=observation_height,
+            visualization_width=visualization_width,
+            visualization_height=visualization_height,
+            split=split,
+            episode_length=episode_length,
+            obj_registries=obj_registries,
+        )
+
+        if is_async:
+            lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space, cached_metadata)
+            if cached_obs_space is None:
+                cached_obs_space = lazy.observation_space
+                cached_act_space = lazy.action_space
+                cached_metadata = lazy.metadata
+            out[task_name][0] = lazy
+        else:
+            out[task_name][0] = env_cls(fns)
+        logger.info("Built vec env | task=%s | n_envs=%d", task_name, n_envs)
+
+    return {name: dict(task_map) for name, task_map in out.items()}
@@ -0,0 +1,488 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import importlib
+import logging
+from collections import defaultdict
+from collections.abc import Callable, Sequence
+from functools import partial
+from typing import Any
+
+import gymnasium as gym
+import numpy as np
+import torch
+from gymnasium import spaces
+
+from lerobot.types import RobotObservation
+
+from .utils import _LazyAsyncVectorEnv
+
+logger = logging.getLogger(__name__)
+
+# Camera names as used by RoboTwin 2.0. The wrapper appends "_rgb" when looking
+# up keys in get_obs() output (e.g. "head_camera" → "head_camera_rgb").
+ROBOTWIN_CAMERA_NAMES: tuple[str, ...] = (
+    "head_camera",
+    "left_camera",
+    "right_camera",
+)
+
+ACTION_DIM = 14  # 7 DOF × 2 arms
+ACTION_LOW = -1.0
+ACTION_HIGH = 1.0
+DEFAULT_EPISODE_LENGTH = 300
+# D435 dims from task_config/_camera_config.yml (what demo_clean.yml selects).
+DEFAULT_CAMERA_H = 240
+DEFAULT_CAMERA_W = 320
+
+# Task list from RoboTwin 2.0's `envs/` directory — mirrors upstream exactly
+# (50 tasks as of main; earlier revisions had 60 with a different split).
+# Keep this in sync with:
+#   gh api /repos/RoboTwin-Platform/RoboTwin/contents/envs --paginate \
+#     | jq -r '.[].name' | grep -E '\.py$' | grep -v '^_' | sed 's/\.py$//'
+ROBOTWIN_TASKS: tuple[str, ...] = (
+    "adjust_bottle",
+    "beat_block_hammer",
+    "blocks_ranking_rgb",
+    "blocks_ranking_size",
+    "click_alarmclock",
+    "click_bell",
+    "dump_bin_bigbin",
+    "grab_roller",
+    "handover_block",
+    "handover_mic",
+    "hanging_mug",
+    "lift_pot",
+    "move_can_pot",
+    "move_pillbottle_pad",
+    "move_playingcard_away",
+    "move_stapler_pad",
+    "open_laptop",
+    "open_microwave",
+    "pick_diverse_bottles",
+    "pick_dual_bottles",
+    "place_a2b_left",
+    "place_a2b_right",
+    "place_bread_basket",
+    "place_bread_skillet",
+    "place_burger_fries",
+    "place_can_basket",
+    "place_cans_plasticbox",
+    "place_container_plate",
+    "place_dual_shoes",
+    "place_empty_cup",
+    "place_fan",
+    "place_mouse_pad",
+    "place_object_basket",
+    "place_object_scale",
+    "place_object_stand",
+    "place_phone_stand",
+    "place_shoe",
+    "press_stapler",
+    "put_bottles_dustbin",
+    "put_object_cabinet",
+    "rotate_qrcode",
+    "scan_object",
+    "shake_bottle",
+    "shake_bottle_horizontally",
+    "stack_blocks_three",
+    "stack_blocks_two",
+    "stack_bowls_three",
+    "stack_bowls_two",
+    "stamp_seal",
+    "turn_switch",
+)
+
+
+_ROBOTWIN_SETUP_CACHE: dict[str, dict[str, Any]] = {}
+
+
+def _load_robotwin_setup_kwargs(task_name: str) -> dict[str, Any]:
+    """Build the kwargs dict RoboTwin's setup_demo expects.
+
+    Mirrors the config loading done by RoboTwin's ``script/eval_policy.py``:
+    reads ``task_config/demo_clean.yml``, resolves the embodiment file from
+    ``_embodiment_config.yml``, loads the robot's own ``config.yml``, and
+    reads camera dimensions from ``_camera_config.yml``.
+
+    Uses ``aloha-agilex`` single-robot dual-arm by default (the only embodiment
+    used by beat_block_hammer and most smoke-test tasks).
+    """
+    if task_name in _ROBOTWIN_SETUP_CACHE:
+        return dict(_ROBOTWIN_SETUP_CACHE[task_name])
+
+    import os
+
+    import yaml  # type: ignore[import-untyped]
+    from envs import CONFIGS_PATH  # type: ignore[import-not-found]
+
+    task_config = "demo_clean"
+    with open(os.path.join(CONFIGS_PATH, f"{task_config}.yml"), encoding="utf-8") as f:
+        args = yaml.safe_load(f)
+
+    # Resolve embodiment — demo_clean.yml uses [aloha-agilex] (dual-arm single robot)
+    with open(os.path.join(CONFIGS_PATH, "_embodiment_config.yml"), encoding="utf-8") as f:
+        embodiment_types = yaml.safe_load(f)
+    embodiment = args.get("embodiment", ["aloha-agilex"])
+    if len(embodiment) == 1:
+        robot_file = embodiment_types[embodiment[0]]["file_path"]
+        args["left_robot_file"] = robot_file
+        args["right_robot_file"] = robot_file
+        args["dual_arm_embodied"] = True
+    elif len(embodiment) == 3:
+        args["left_robot_file"] = embodiment_types[embodiment[0]]["file_path"]
+        args["right_robot_file"] = embodiment_types[embodiment[1]]["file_path"]
+        args["embodiment_dis"] = embodiment[2]
+        args["dual_arm_embodied"] = False
+    else:
+        raise ValueError(f"embodiment must have 1 or 3 items, got {len(embodiment)}")
+
+    with open(os.path.join(args["left_robot_file"], "config.yml"), encoding="utf-8") as f:
+        args["left_embodiment_config"] = yaml.safe_load(f)
+    with open(os.path.join(args["right_robot_file"], "config.yml"), encoding="utf-8") as f:
+        args["right_embodiment_config"] = yaml.safe_load(f)
+
+    # Camera dimensions
+    with open(os.path.join(CONFIGS_PATH, "_camera_config.yml"), encoding="utf-8") as f:
+        camera_config = yaml.safe_load(f)
+    head_cam = args["camera"]["head_camera_type"]
+    args["head_camera_h"] = camera_config[head_cam]["h"]
+    args["head_camera_w"] = camera_config[head_cam]["w"]
+
+    # Headless overrides
+    args["render_freq"] = 0
+    args["task_name"] = task_name
+    args["task_config"] = task_config
+
+    _ROBOTWIN_SETUP_CACHE[task_name] = args
+    return dict(args)
+
+
+def _load_robotwin_task(task_name: str) -> type:
+    """Dynamically import and return a RoboTwin 2.0 task class.
+
+    RoboTwin tasks live in ``envs/<task_name>.py`` relative to the repository
+    root and are expected to be on ``sys.path`` after installation.
+    """
+    try:
+        module = importlib.import_module(f"envs.{task_name}")
+    except ModuleNotFoundError as e:
+        raise ModuleNotFoundError(
+            f"Could not import RoboTwin task '{task_name}'. "
+            "Ensure RoboTwin 2.0 is installed and its 'envs/' directory is on PYTHONPATH. "
+            "See the RoboTwin installation guide: https://robotwin-platform.github.io/doc/usage/robotwin-install.html"
+        ) from e
+    task_cls = getattr(module, task_name, None)
+    if task_cls is None:
+        raise AttributeError(f"Task class '{task_name}' not found in envs/{task_name}.py")
+    return task_cls
+
+
+class RoboTwinEnv(gym.Env):
+    """Gymnasium wrapper around a single RoboTwin 2.0 task.
+
+    RoboTwin uses a custom SAPIEN-based API (``setup_demo`` / ``get_obs`` /
+    ``take_action`` / ``check_success``) rather than the standard gym interface.
+    This class bridges that API to Gymnasium so that ``lerobot-eval`` can drive
+    RoboTwin exactly like LIBERO or Meta-World.
+
+    The underlying SAPIEN environment is created lazily on the first ``reset()``
+    call *inside the worker process*.  This is required for
+    ``gym.vector.AsyncVectorEnv`` compatibility: SAPIEN allocates EGL/GPU
+    contexts that must not be forked from the parent process.
+
+    Observations
+    ------------
+    The ``pixels`` dict uses the raw RoboTwin camera names as keys (e.g.
+    ``"head_camera"``, ``"left_camera"``). ``preprocess_observation`` in
+    ``envs/utils.py`` then converts these to ``observation.images.<cam>``.
+
+    Actions
+    -------
+    14-dim float32 array in ``[-1, 1]`` (joint-space, 7 DOF per arm).
+
+    Autograd
+    --------
+    ``setup_demo`` and ``take_action`` drive CuRobo's Newton trajectory
+    optimizer, which calls ``cost.backward()`` internally. lerobot_eval wraps
+    the rollout in ``torch.no_grad()``, so both call sites re-enable grad.
+    """
+
+    metadata = {"render_modes": ["rgb_array"], "render_fps": 25}
+
+    def __init__(
+        self,
+        task_name: str,
+        episode_index: int = 0,
+        n_envs: int = 1,
+        camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES,
+        observation_height: int | None = None,
+        observation_width: int | None = None,
+        episode_length: int = DEFAULT_EPISODE_LENGTH,
+        render_mode: str = "rgb_array",
+    ):
+        super().__init__()
+        self.task_name = task_name
+        self.task = task_name  # used by add_envs_task() in utils.py
+        self.task_description = task_name.replace("_", " ")
+        self.episode_index = episode_index
+        self._reset_stride = n_envs
+        self.camera_names = list(camera_names)
+        # Default to D435 dims (the camera type baked into task_config/demo_clean.yml).
+        # The YAML-driven lookup is deferred to reset() so construction doesn't
+        # import RoboTwin's `envs` module — fast-tests run without RoboTwin installed.
+        self.observation_height = observation_height or DEFAULT_CAMERA_H
+        self.observation_width = observation_width or DEFAULT_CAMERA_W
+        self.episode_length = episode_length
+        self._max_episode_steps = episode_length  # lerobot_eval.rollout reads this
+        self.render_mode = render_mode
+
+        self._env: Any | None = None  # deferred — created on first reset() inside worker
+        self._step_count: int = 0
+        self._black_frame = np.zeros((self.observation_height, self.observation_width, 3), dtype=np.uint8)
+
+        image_spaces = {
+            cam: spaces.Box(
+                low=0,
+                high=255,
+                shape=(self.observation_height, self.observation_width, 3),
+                dtype=np.uint8,
+            )
+            for cam in self.camera_names
+        }
+        self.observation_space = spaces.Dict(
+            {
+                "pixels": spaces.Dict(image_spaces),
+                "agent_pos": spaces.Box(low=-np.inf, high=np.inf, shape=(ACTION_DIM,), dtype=np.float32),
+            }
+        )
+        self.action_space = spaces.Box(
+            low=ACTION_LOW, high=ACTION_HIGH, shape=(ACTION_DIM,), dtype=np.float32
+        )
+
+    def _ensure_env(self) -> None:
+        """Create the SAPIEN environment on first use.
+
+        Called inside the worker subprocess after fork(), so each worker gets
+        its own EGL/GPU context rather than inheriting a stale one from the
+        parent process (which causes crashes with AsyncVectorEnv).
+        """
+        if self._env is not None:
+            return
+        task_cls = _load_robotwin_task(self.task_name)
+        self._env = task_cls()
+
+    def _get_obs(self) -> RobotObservation:
+        assert self._env is not None, "_get_obs called before _ensure_env()"
+        raw = self._env.get_obs()
+        cameras_raw = raw.get("observation", {})
+
+        images: dict[str, np.ndarray] = {}
+        for cam in self.camera_names:
+            cam_data = cameras_raw.get(cam)
+            img = cam_data.get("rgb") if cam_data else None
+            if img is None:
+                images[cam] = self._black_frame
+                continue
+            img = np.asarray(img, dtype=np.uint8)
+            if img.ndim == 2:
+                img = np.stack([img, img, img], axis=-1)
+            elif img.shape[-1] != 3:
+                img = img[..., :3]
+            images[cam] = img
+
+        ja = raw.get("joint_action") or {}
+        vec = ja.get("vector")
+        if vec is not None:
+            arr = np.asarray(vec, dtype=np.float32).ravel()
+            joint_state = (
+                arr[:ACTION_DIM] if arr.size >= ACTION_DIM else np.zeros(ACTION_DIM, dtype=np.float32)
+            )
+        else:
+            joint_state = np.zeros(ACTION_DIM, dtype=np.float32)
+
+        return {"pixels": images, "agent_pos": joint_state}
+
+    def reset(self, seed: int | None = None, **kwargs) -> tuple[RobotObservation, dict]:
+        self._ensure_env()
+        super().reset(seed=seed)
+        assert self._env is not None  # set by _ensure_env() above
+
+        actual_seed = self.episode_index if seed is None else seed
+        setup_kwargs = _load_robotwin_setup_kwargs(self.task_name)
+        setup_kwargs.update(seed=actual_seed, is_test=True)
+        with torch.enable_grad():
+            self._env.setup_demo(**setup_kwargs)
+        self.episode_index += self._reset_stride
+        self._step_count = 0
+
+        obs = self._get_obs()
+        return obs, {"is_success": False, "task": self.task_name}
+
+    def step(self, action: np.ndarray) -> tuple[RobotObservation, float, bool, bool, dict[str, Any]]:
+        assert self._env is not None, "step() called before reset()"
+        if action.ndim != 1 or action.shape[0] != ACTION_DIM:
+            raise ValueError(f"Expected 1-D action of shape ({ACTION_DIM},), got {action.shape}")
+
+        with torch.enable_grad():
+            if hasattr(self._env, "take_action"):
+                self._env.take_action(action)
+            else:
+                self._env.step(action)
+
+        self._step_count += 1
+
+        is_success = bool(getattr(self._env, "eval_success", False))
+        if not is_success and hasattr(self._env, "check_success"):
+            is_success = bool(self._env.check_success())
+
+        obs = self._get_obs()
+        reward = float(is_success)
+        terminated = is_success
+        truncated = self._step_count >= self.episode_length
+
+        info: dict[str, Any] = {
+            "task": self.task_name,
+            "is_success": is_success,
+            "step": self._step_count,
+        }
+        if terminated or truncated:
+            info["final_info"] = {
+                "task": self.task_name,
+                "is_success": is_success,
+            }
+            self.reset()
+
+        return obs, reward, terminated, truncated, info
+
+    def render(self) -> np.ndarray:
+        self._ensure_env()
+        obs = self._get_obs()
+        # Prefer head camera for rendering; fall back to first available.
+        if "head_camera" in obs["pixels"]:
+            return obs["pixels"]["head_camera"]
+        return next(iter(obs["pixels"].values()))
+
+    def close(self) -> None:
+        if self._env is not None:
+            if hasattr(self._env, "close_env"):
+                import contextlib
+
+                with contextlib.suppress(TypeError):
+                    self._env.close_env()
+            self._env = None
+
+
+# ---- Multi-task factory --------------------------------------------------------
+
+
+def _make_env_fns(
+    *,
+    task_name: str,
+    n_envs: int,
+    camera_names: list[str],
+    observation_height: int,
+    observation_width: int,
+    episode_length: int,
+) -> list[Callable[[], RoboTwinEnv]]:
+    """Return n_envs factory callables for a single task."""
+
+    def _make_one(episode_index: int) -> RoboTwinEnv:
+        return RoboTwinEnv(
+            task_name=task_name,
+            episode_index=episode_index,
+            n_envs=n_envs,
+            camera_names=camera_names,
+            observation_height=observation_height,
+            observation_width=observation_width,
+            episode_length=episode_length,
+        )
+
+    return [partial(_make_one, i) for i in range(n_envs)]
+
+
+def create_robotwin_envs(
+    task: str,
+    n_envs: int,
+    env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None,
+    camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES,
+    observation_height: int = DEFAULT_CAMERA_H,
+    observation_width: int = DEFAULT_CAMERA_W,
+    episode_length: int = DEFAULT_EPISODE_LENGTH,
+) -> dict[str, dict[int, Any]]:
+    """Create vectorized RoboTwin 2.0 environments.
+
+    Returns:
+        ``dict[task_name][0] -> VectorEnv`` — one entry per task, each wrapping
+        ``n_envs`` parallel rollouts.
+
+    Args:
+        task: Comma-separated list of task names (e.g. ``"beat_block_hammer"``
+            or ``"beat_block_hammer,click_bell"``).
+        n_envs: Number of parallel rollouts per task.
+        env_cls: Vector env constructor (e.g. ``gym.vector.AsyncVectorEnv``).
+        camera_names: Cameras to include in observations.
+        observation_height: Pixel height for all cameras.
+        observation_width: Pixel width for all cameras.
+        episode_length: Max steps before truncation.
+    """
+    if env_cls is None or not callable(env_cls):
+        raise ValueError("env_cls must be callable (e.g. gym.vector.AsyncVectorEnv).")
+    if not isinstance(n_envs, int) or n_envs <= 0:
+        raise ValueError(f"n_envs must be a positive int; got {n_envs}.")
+
+    task_names = [t.strip() for t in str(task).split(",") if t.strip()]
+    if not task_names:
+        raise ValueError("`task` must contain at least one RoboTwin task name.")
+
+    unknown = [t for t in task_names if t not in ROBOTWIN_TASKS]
+    if unknown:
+        raise ValueError(f"Unknown RoboTwin tasks: {unknown}. Available tasks: {sorted(ROBOTWIN_TASKS)}")
+
+    logger.info(
+        "Creating RoboTwin envs | tasks=%s | n_envs(per task)=%d",
+        task_names,
+        n_envs,
+    )
+
+    is_async = env_cls is gym.vector.AsyncVectorEnv
+    cached_obs_space: spaces.Space | None = None
+    cached_act_space: spaces.Space | None = None
+    cached_metadata: dict[str, Any] | None = None
+
+    out: dict[str, dict[int, Any]] = defaultdict(dict)
+    for task_name in task_names:
+        fns = _make_env_fns(
+            task_name=task_name,
+            n_envs=n_envs,
+            camera_names=list(camera_names),
+            observation_height=observation_height,
+            observation_width=observation_width,
+            episode_length=episode_length,
+        )
+        if is_async:
+            lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space, cached_metadata)
+            if cached_obs_space is None:
+                cached_obs_space = lazy.observation_space
+                cached_act_space = lazy.action_space
+                cached_metadata = lazy.metadata
+            out[task_name][0] = lazy
+        else:
+            out[task_name][0] = env_cls(fns)
+        logger.info("Built vec env | task=%s | n_envs=%d", task_name, n_envs)
+
+    return {k: dict(v) for k, v in out.items()}
@@ -34,6 +34,25 @@ from lerobot.utils.utils import get_channel_first_image_shape
 from .configs import EnvConfig


+def parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
+    """Normalize ``camera_name`` into a non-empty list of strings.
+
+    Accepts a comma-separated string (``"cam_a,cam_b"``) or a sequence of
+    strings (tuples/lists). Whitespace is stripped; empty entries are
+    dropped. Raises ``TypeError`` for unsupported input types and
+    ``ValueError`` when the normalized list is empty.
+    """
+    if isinstance(camera_name, str):
+        cams = [c.strip() for c in camera_name.split(",") if c.strip()]
+    elif isinstance(camera_name, (list | tuple)):
+        cams = [str(c).strip() for c in camera_name if str(c).strip()]
+    else:
+        raise TypeError(f"camera_name must be str or sequence[str], got {type(camera_name).__name__}")
+    if not cams:
+        raise ValueError("camera_name resolved to an empty list.")
+    return cams
+
+
 def _convert_nested_dict(d):
    result = {}
    for k, v in d.items():
@@ -153,17 +172,20 @@ class _LazyAsyncVectorEnv:
        env_fns: list[Callable],
        observation_space=None,
        action_space=None,
+        metadata=None,
    ):
        self._env_fns = env_fns
        self._env: gym.vector.AsyncVectorEnv | None = None
        self.num_envs = len(env_fns)
-        if observation_space is not None and action_space is not None:
+        if observation_space is not None and action_space is not None and metadata is not None:
            self.observation_space = observation_space
            self.action_space = action_space
+            self.metadata = metadata
        else:
            tmp = env_fns[0]()
            self.observation_space = tmp.observation_space
            self.action_space = tmp.action_space
+            self.metadata = tmp.metadata
            tmp.close()
        self.single_observation_space = self.observation_space
        self.single_action_space = self.action_space
@@ -172,6 +194,10 @@ class _LazyAsyncVectorEnv:
        if self._env is None:
            self._env = gym.vector.AsyncVectorEnv(self._env_fns, context="forkserver", shared_memory=True)

+    @property
+    def unwrapped(self):
+        return self
+
    def reset(self, **kwargs):
        self._ensure()
        return self._env.reset(**kwargs)
@@ -12,8 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import numpy as np

+from lerobot.utils.import_utils import _placo_available, require_package
+
+if TYPE_CHECKING or _placo_available:
+    import placo  # type: ignore[import-not-found]
+else:
+    placo = None
+

 class RobotKinematics:
    """Robot kinematics using placo library for forward and inverse kinematics."""
@@ -32,13 +43,7 @@ class RobotKinematics:
            target_frame_name (str): Name of the end-effector frame in the URDF
            joint_names (list[str] | None): List of joint names to use for the kinematics solver
        """
-        try:
-            import placo  # type: ignore[import-not-found] # C++ library with Python bindings, no type stubs available. TODO: Create stub file or request upstream typing support.
-        except ImportError as e:
-            raise ImportError(
-                "placo is required for RobotKinematics. "
-                "Please install the optional dependencies of `kinematics` in the package."
-            ) from e
+        require_package("placo", extra="placo-dep")

        self.robot = placo.RobotWrapper(urdf_path)
        self.solver = placo.KinematicsSolver(self.robot)
@@ -24,7 +24,7 @@ from functools import cached_property
 from typing import TYPE_CHECKING, Any, TypedDict

 from lerobot.utils.decorators import check_if_already_connected, check_if_not_connected
-from lerobot.utils.import_utils import _can_available
+from lerobot.utils.import_utils import _can_available, require_package

 if TYPE_CHECKING or _can_available:
    import can
@@ -111,6 +111,7 @@ class DamiaoMotorsBus(MotorsBusBase):
            bitrate: Nominal bitrate in bps (default: 1000000 = 1 Mbps)
            data_bitrate: Data bitrate for CAN FD in bps (default: 5000000 = 5 Mbps), ignored if use_can_fd is False
        """
+        require_package("python-can", extra="damiao", import_name="can")
        super().__init__(port, motors, calibration)
        self.port = port
        self.can_interface = can_interface
@@ -356,8 +356,8 @@ class SerialMotorsBus(MotorsBusBase):
        motors: dict[str, Motor],
        calibration: dict[str, MotorCalibration] | None = None,
    ):
-        require_package("pyserial", extra="hardware", import_name="serial")
-        require_package("deepdiff", extra="hardware")
+        require_package("pyserial", extra="pyserial-dep", import_name="serial")
+        require_package("deepdiff", extra="deepdiff-dep")
        super().__init__(port, motors, calibration)

        self.port_handler: PortHandler
@@ -23,12 +23,12 @@ from types import SimpleNamespace
 from typing import TYPE_CHECKING, Any, TypedDict

 from lerobot.utils.decorators import check_if_already_connected, check_if_not_connected
-from lerobot.utils.import_utils import _can_available
+from lerobot.utils.import_utils import _can_available, require_package

 if TYPE_CHECKING or _can_available:
    import can
 else:
-    can = SimpleNamespace(Message=object, interface=None)
+    can = SimpleNamespace(Message=object, interface=None, BusABC=object)
 import numpy as np

 from lerobot.utils.errors import DeviceNotConnectedError
@@ -106,6 +106,7 @@ class RobstrideMotorsBus(MotorsBusBase):
            bitrate: Nominal bitrate in bps (default: 1000000 = 1 Mbps)
            data_bitrate: Data bitrate for CAN FD in bps (default: 5000000 = 5 Mbps), ignored if use_can_fd is False
        """
+        require_package("python-can", extra="robstride", import_name="can")
        super().__init__(port, motors, calibration)
        self.port = port
        self.can_interface = can_interface
@@ -18,14 +18,21 @@ import logging
 import math
 from dataclasses import asdict, dataclass
 from pathlib import Path
+from typing import TYPE_CHECKING

 import draccus
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import LambdaLR, LRScheduler

 from lerobot.utils.constants import SCHEDULER_STATE
+from lerobot.utils.import_utils import _diffusers_available, require_package
 from lerobot.utils.io_utils import deserialize_json_into_object, write_json

+if TYPE_CHECKING or _diffusers_available:
+    from diffusers.optimization import get_scheduler
+else:
+    get_scheduler = None
+

@dataclass
 class LRSchedulerConfig(draccus.ChoiceRegistry, abc.ABC):
@@ -47,10 +54,7 @@ class DiffuserSchedulerConfig(LRSchedulerConfig):
    num_warmup_steps: int | None = None

    def build(self, optimizer: Optimizer, num_training_steps: int) -> LambdaLR:
-        from lerobot.utils.import_utils import require_package
-
        require_package("diffusers", extra="diffusion")
-        from diffusers.optimization import get_scheduler

        kwargs = {**asdict(self), "num_training_steps": num_training_steps, "optimizer": optimizer}
        return get_scheduler(**kwargs)
@@ -15,10 +15,6 @@
 from .act.configuration_act import ACTConfig as ACTConfig
 from .diffusion.configuration_diffusion import DiffusionConfig as DiffusionConfig
 from .factory import get_policy_class, make_policy, make_policy_config, make_pre_post_processors
-from .gaussian_actor.configuration_gaussian_actor import GaussianActorConfig as GaussianActorConfig
-from .gaussian_actor.reward_model.configuration_classifier import (
-    RewardClassifierConfig as RewardClassifierConfig,
-)
 from .groot.configuration_groot import GrootConfig as GrootConfig
 from .multi_task_dit.configuration_multi_task_dit import MultiTaskDiTConfig as MultiTaskDiTConfig
 from .pi0.configuration_pi0 import PI0Config as PI0Config
@@ -26,6 +22,8 @@ from .pi0_fast.configuration_pi0_fast import PI0FastConfig as PI0FastConfig
 from .pi05.configuration_pi05 import PI05Config as PI05Config
 from .pretrained import PreTrainedPolicy as PreTrainedPolicy
 from .rtc import ActionInterpolator as ActionInterpolator
+from .sac.configuration_sac import SACConfig as SACConfig
+from .sac.reward_model.configuration_classifier import RewardClassifierConfig as RewardClassifierConfig
 from .sarm.configuration_sarm import SARMConfig as SARMConfig
 from .smolvla.configuration_smolvla import SmolVLAConfig as SmolVLAConfig
 from .tdmpc.configuration_tdmpc import TDMPCConfig as TDMPCConfig
@@ -34,21 +32,21 @@ from .vqbet.configuration_vqbet import VQBeTConfig as VQBeTConfig
 from .wall_x.configuration_wall_x import WallXConfig as WallXConfig
 from .xvla.configuration_xvla import XVLAConfig as XVLAConfig

-# NOTE: Policy modeling classes (e.g., GaussianActorPolicy) are intentionally NOT re-exported here.
+# NOTE: Policy modeling classes (e.g., SACPolicy) are intentionally NOT re-exported here.
 # They have heavy optional dependencies and are loaded lazily via get_policy_class().
-# Import directly: ``from lerobot.policies.gaussian_actor.modeling_gaussian_actor import GaussianActorPolicy``
+# Import directly: ``from lerobot.policies.sac.modeling_sac import SACPolicy``

 __all__ = [
    # Configuration classes
    "ACTConfig",
    "DiffusionConfig",
-    "GaussianActorConfig",
    "GrootConfig",
    "MultiTaskDiTConfig",
    "PI0Config",
    "PI0FastConfig",
    "PI05Config",
    "RewardClassifierConfig",
+    "SACConfig",
    "SARMConfig",
    "SmolVLAConfig",
    "TDMPCConfig",
@@ -23,6 +23,7 @@ TODO(alexander-soare):
 import math
 from collections import deque
 from collections.abc import Callable
+from typing import TYPE_CHECKING

 import einops
 import numpy as np
@@ -32,6 +33,14 @@ import torchvision
 from torch import Tensor, nn

 from lerobot.utils.constants import ACTION, OBS_ENV_STATE, OBS_IMAGES, OBS_STATE
+from lerobot.utils.import_utils import _diffusers_available, require_package
+
+if TYPE_CHECKING or _diffusers_available:
+    from diffusers.schedulers.scheduling_ddim import DDIMScheduler
+    from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
+else:
+    DDIMScheduler = None
+    DDPMScheduler = None

 from ..pretrained import PreTrainedPolicy
 from ..utils import (
@@ -64,6 +73,7 @@ class DiffusionPolicy(PreTrainedPolicy):
            dataset_stats: Dataset statistics to be used for normalization. If not passed here, it is expected
                that they will be passed with a call to `load_state_dict` before the policy is used.
        """
+        require_package("diffusers", extra="diffusion")
        super().__init__(config)
        config.validate_features()
        self.config = config
@@ -155,11 +165,7 @@ def _make_noise_scheduler(name: str, **kwargs: dict):
    Factory for noise scheduler instances of the requested type. All kwargs are passed
    to the scheduler.
    """
-    from lerobot.utils.import_utils import require_package
-
    require_package("diffusers", extra="diffusion")
-    from diffusers.schedulers.scheduling_ddim import DDIMScheduler
-    from diffusers.schedulers.scheduling_ddpm import DDPMScheduler

    if name == "DDPM":
        return DDPMScheduler(**kwargs)
@@ -46,13 +46,13 @@ from lerobot.utils.feature_utils import dataset_to_policy_features

 from .act.configuration_act import ACTConfig
 from .diffusion.configuration_diffusion import DiffusionConfig
-from .gaussian_actor.configuration_gaussian_actor import GaussianActorConfig
-from .gaussian_actor.reward_model.configuration_classifier import RewardClassifierConfig
 from .groot.configuration_groot import GrootConfig
 from .multi_task_dit.configuration_multi_task_dit import MultiTaskDiTConfig
 from .pi0.configuration_pi0 import PI0Config
 from .pi05.configuration_pi05 import PI05Config
 from .pretrained import PreTrainedPolicy
+from .sac.configuration_sac import SACConfig
+from .sac.reward_model.configuration_classifier import RewardClassifierConfig
 from .sarm.configuration_sarm import SARMConfig
 from .smolvla.configuration_smolvla import SmolVLAConfig
 from .tdmpc.configuration_tdmpc import TDMPCConfig
@@ -89,7 +89,7 @@ def get_policy_class(name: str) -> type[PreTrainedPolicy]:

    Args:
        name: The name of the policy. Supported names are "tdmpc", "diffusion", "act",
-            "multi_task_dit", "vqbet", "pi0", "pi05", "gaussian_actor", "reward_classifier", "smolvla", "wall_x".
+            "multi_task_dit", "vqbet", "pi0", "pi05", "sac", "reward_classifier", "smolvla", "wall_x".
    Returns:
        The policy class corresponding to the given name.

@@ -128,12 +128,12 @@ def get_policy_class(name: str) -> type[PreTrainedPolicy]:
        from .pi05.modeling_pi05 import PI05Policy

        return PI05Policy
-    elif name == "gaussian_actor":
-        from .gaussian_actor.modeling_gaussian_actor import GaussianActorPolicy
+    elif name == "sac":
+        from .sac.modeling_sac import SACPolicy

-        return GaussianActorPolicy
+        return SACPolicy
    elif name == "reward_classifier":
-        from .gaussian_actor.reward_model.modeling_classifier import Classifier
+        from .sac.reward_model.modeling_classifier import Classifier

        return Classifier
    elif name == "smolvla":
@@ -172,7 +172,7 @@ def make_policy_config(policy_type: str, **kwargs) -> PreTrainedConfig:

    Args:
        policy_type: The type of the policy. Supported types include "tdmpc",
-                     "multi_task_dit", "diffusion", "act", "vqbet", "pi0", "pi05", "gaussian_actor",
+                     "multi_task_dit", "diffusion", "act", "vqbet", "pi0", "pi05", "sac",
                     "smolvla", "reward_classifier", "wall_x".
        **kwargs: Keyword arguments to be passed to the configuration class constructor.

@@ -196,8 +196,8 @@ def make_policy_config(policy_type: str, **kwargs) -> PreTrainedConfig:
        return PI0Config(**kwargs)
    elif policy_type == "pi05":
        return PI05Config(**kwargs)
-    elif policy_type == "gaussian_actor":
-        return GaussianActorConfig(**kwargs)
+    elif policy_type == "sac":
+        return SACConfig(**kwargs)
    elif policy_type == "smolvla":
        return SmolVLAConfig(**kwargs)
    elif policy_type == "reward_classifier":
@@ -370,16 +370,16 @@ def make_pre_post_processors(
            dataset_stats=kwargs.get("dataset_stats"),
        )

-    elif isinstance(policy_cfg, GaussianActorConfig):
-        from .gaussian_actor.processor_gaussian_actor import make_gaussian_actor_pre_post_processors
+    elif isinstance(policy_cfg, SACConfig):
+        from .sac.processor_sac import make_sac_pre_post_processors

-        processors = make_gaussian_actor_pre_post_processors(
+        processors = make_sac_pre_post_processors(
            config=policy_cfg,
            dataset_stats=kwargs.get("dataset_stats"),
        )

    elif isinstance(policy_cfg, RewardClassifierConfig):
-        from .gaussian_actor.reward_model.processor_classifier import make_classifier_processor
+        from .sac.reward_model.processor_classifier import make_classifier_processor

        processors = make_classifier_processor(
            config=policy_cfg,
@@ -1,19 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .configuration_gaussian_actor import GaussianActorConfig
-from .modeling_gaussian_actor import GaussianActorPolicy
-from .processor_gaussian_actor import make_gaussian_actor_pre_post_processors
-
-__all__ = ["GaussianActorConfig", "GaussianActorPolicy", "make_gaussian_actor_pre_post_processors"]
@@ -43,6 +43,7 @@ from torch import Tensor

 from lerobot.configs import FeatureType, PolicyFeature
 from lerobot.utils.constants import ACTION, OBS_IMAGES
+from lerobot.utils.import_utils import require_package

 from ..pretrained import PreTrainedPolicy
 from .configuration_groot import GrootConfig
@@ -59,6 +60,7 @@ class GrootPolicy(PreTrainedPolicy):

    def __init__(self, config: GrootConfig, **kwargs):
        """Initialize Groot policy wrapper."""
+        require_package("transformers", extra="groot")
        super().__init__(config)
        config.validate_features()
        self.config = config
@@ -36,7 +36,7 @@ import torch.nn.functional as F  # noqa: N812
 import torchvision
 from torch import Tensor

-from lerobot.utils.import_utils import _transformers_available
+from lerobot.utils.import_utils import _diffusers_available, _transformers_available, require_package

 from .configuration_multi_task_dit import MultiTaskDiTConfig

@@ -46,6 +46,13 @@ if TYPE_CHECKING or _transformers_available:
 else:
    CLIPTextModel = None
    CLIPVisionModel = None
+
+if TYPE_CHECKING or _diffusers_available:
+    from diffusers.schedulers.scheduling_ddim import DDIMScheduler
+    from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
+else:
+    DDIMScheduler = None
+    DDPMScheduler = None
 from lerobot.utils.constants import (
    ACTION,
    OBS_IMAGES,
@@ -65,6 +72,8 @@ class MultiTaskDiTPolicy(PreTrainedPolicy):
    name = "multi_task_dit"

    def __init__(self, config: MultiTaskDiTConfig, **kwargs):
+        require_package("transformers", extra="multi_task_dit")
+        require_package("diffusers", extra="multi_task_dit")
        super().__init__(config)
        config.validate_features()
        self.config = config
@@ -643,12 +652,6 @@ class DiffusionObjective(nn.Module):
            "prediction_type": config.prediction_type,
        }

-        from lerobot.utils.import_utils import require_package
-
-        require_package("diffusers", extra="multi_task_dit")
-        from diffusers.schedulers.scheduling_ddim import DDIMScheduler
-        from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
-
        if config.noise_scheduler_type == "DDPM":
            self.noise_scheduler: DDPMScheduler | DDIMScheduler = DDPMScheduler(**scheduler_kwargs)
        elif config.noise_scheduler_type == "DDIM":
@@ -26,7 +26,7 @@ import torch
 import torch.nn.functional as F  # noqa: N812
 from torch import Tensor, nn

-from lerobot.utils.import_utils import _transformers_available
+from lerobot.utils.import_utils import _transformers_available, require_package

 # Conditional import for type checking and lazy loading
 if TYPE_CHECKING or _transformers_available:
@@ -947,6 +947,7 @@ class PI0Policy(PreTrainedPolicy):
        Args:
            config: Policy configuration class instance.
        """
+        require_package("transformers", extra="pi")
        super().__init__(config)
        config.validate_features()
        self.config = config
@@ -26,7 +26,7 @@ import torch
 import torch.nn.functional as F  # noqa: N812
 from torch import Tensor, nn

-from lerobot.utils.import_utils import _transformers_available
+from lerobot.utils.import_utils import _transformers_available, require_package

 # Conditional import for type checking and lazy loading
 if TYPE_CHECKING or _transformers_available:
@@ -918,6 +918,7 @@ class PI05Policy(PreTrainedPolicy):
        Args:
            config: Policy configuration class instance.
        """
+        require_package("transformers", extra="pi")
        super().__init__(config)
        config.validate_features()
        self.config = config
@@ -26,7 +26,7 @@ import torch
 import torch.nn.functional as F  # noqa: N812
 from torch import Tensor, nn

-from lerobot.utils.import_utils import _scipy_available, _transformers_available
+from lerobot.utils.import_utils import _scipy_available, _transformers_available, require_package

 # Conditional import for type checking and lazy loading
 if TYPE_CHECKING or _scipy_available:
@@ -35,7 +35,7 @@ else:
    idct = None

 if TYPE_CHECKING or _transformers_available:
-    from transformers import AutoTokenizer
+    from transformers import AutoProcessor, AutoTokenizer
    from transformers.models.auto import CONFIG_MAPPING

    from ..pi_gemma import (
@@ -44,6 +44,7 @@ if TYPE_CHECKING or _transformers_available:
    )
 else:
    CONFIG_MAPPING = None
+    AutoProcessor = None
    AutoTokenizer = None
    PiGemmaModel = None
    PaliGemmaForConditionalGenerationWithPiGemma = None
@@ -826,14 +827,14 @@ class PI0FastPolicy(PreTrainedPolicy):
        Args:
            config: Policy configuration class instance.
        """
+        require_package("transformers", extra="pi")
+        require_package("scipy", extra="pi")
        super().__init__(config)
        config.validate_features()
        self.config = config

        # Load tokenizers first
        try:
-            from transformers import AutoProcessor, AutoTokenizer
-
            # Load FAST tokenizer
            self.action_tokenizer = AutoProcessor.from_pretrained(
                config.action_tokenizer_name, trust_remote_code=True
@@ -1,4 +1,4 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,9 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from .sac import SACAlgorithm as SACAlgorithm, SACAlgorithmConfig as SACAlgorithmConfig
+from .configuration_sac import SACConfig
+from .modeling_sac import SACPolicy
+from .processor_sac import make_sac_pre_post_processors

-__all__ = [
-    "SACAlgorithm",
-    "SACAlgorithmConfig",
-]
+__all__ = ["SACConfig", "SACPolicy", "make_sac_pre_post_processors"]
@@ -75,19 +75,18 @@ class PolicyConfig:
    init_final: float = 0.05


-@PreTrainedConfig.register_subclass("gaussian_actor")
+@PreTrainedConfig.register_subclass("sac")
@dataclass
-class GaussianActorConfig(PreTrainedConfig):
-    """Gaussian actor configuration.
+class SACConfig(PreTrainedConfig):
+    """Soft Actor-Critic (SAC) configuration.

-    This configures the policy-side (actor + observation encoder) of a Gaussian
-    policy, as used by SAC and related maximum-entropy continuous-control algorithms.
-    By default the actor output is a tanh-squashed diagonal Gaussian
-    (``TanhMultivariateNormalDiag``); the tanh squashing can be disabled via
-    ``policy_kwargs.use_tanh_squash``. The critics, temperature, and Bellman-update
-    logic live on the algorithm side (see ``lerobot.rl.algorithms.sac``).
+    SAC is an off-policy actor-critic deep RL algorithm based on the maximum entropy
+    reinforcement learning framework. It learns a policy and a Q-function simultaneously
+    using experience collected from the environment.

-    CLI: ``--policy.type=gaussian_actor``.
+    This configuration class contains all the parameters needed to define a SAC agent,
+    including network architectures, optimization settings, and algorithm-specific
+    hyperparameters.
    """

    # Mapping of feature types to normalization modes
@@ -123,7 +122,7 @@ class GaussianActorConfig(PreTrainedConfig):
    device: str = "cpu"
    # Device to store the model on
    storage_device: str = "cpu"
-    # Name of the vision encoder model (Set to "lerobot/resnet10" for hil serl resnet10)
+    # Name of the vision encoder model (Set to "helper2424/resnet10" for hil serl resnet10)
    vision_encoder_name: str | None = None
    # Whether to freeze the vision encoder during training
    freeze_vision_encoder: bool = True
@@ -136,41 +135,78 @@ class GaussianActorConfig(PreTrainedConfig):
    # Dimension of the image embedding pooling
    image_embedding_pooling_dim: int = 8

-    # Encoder architecture
+    # Training parameter
+    # Number of steps for online training
+    online_steps: int = 1000000
+    # Capacity of the online replay buffer
+    online_buffer_capacity: int = 100000
+    # Capacity of the offline replay buffer
+    offline_buffer_capacity: int = 100000
+    # Whether to use asynchronous prefetching for the buffers
+    async_prefetch: bool = False
+    # Number of steps before learning starts
+    online_step_before_learning: int = 100
+    # Frequency of policy updates
+    policy_update_freq: int = 1
+
+    # SAC algorithm parameters
+    # Discount factor for the SAC algorithm
+    discount: float = 0.99
+    # Initial temperature value
+    temperature_init: float = 1.0
+    # Number of critics in the ensemble
+    num_critics: int = 2
+    # Number of subsampled critics for training
+    num_subsample_critics: int | None = None
+    # Learning rate for the critic network
+    critic_lr: float = 3e-4
+    # Learning rate for the actor network
+    actor_lr: float = 3e-4
+    # Learning rate for the temperature parameter
+    temperature_lr: float = 3e-4
+    # Weight for the critic target update
+    critic_target_update_weight: float = 0.005
+    # Update-to-data ratio for the UTD algorithm (If you want enable utd_ratio, you need to set it to >1)
+    utd_ratio: int = 1
    # Hidden dimension size for the state encoder
    state_encoder_hidden_dim: int = 256
    # Dimension of the latent space
    latent_dim: int = 256
+    # Target entropy for the SAC algorithm
+    target_entropy: float | None = None
+    # Whether to use backup entropy for the SAC algorithm
+    use_backup_entropy: bool = True
+    # Gradient clipping norm for the SAC algorithm
+    grad_clip_norm: float = 40.0

-    # Online training (TODO(Khalil): relocate to TrainRLServerPipelineConfig)
-    online_steps: int = 1000000
-    online_buffer_capacity: int = 100000
-    offline_buffer_capacity: int = 100000
-    async_prefetch: bool = False
-    online_step_before_learning: int = 100
-
-    # Actor-learner transport (TODO(Khalil): relocate to TrainRLServerPipelineConfig).
+    # Network configuration
+    # Configuration for the critic network architecture
+    critic_network_kwargs: CriticNetworkConfig = field(default_factory=CriticNetworkConfig)
+    # Configuration for the actor network architecture
+    actor_network_kwargs: ActorNetworkConfig = field(default_factory=ActorNetworkConfig)
+    # Configuration for the policy parameters
+    policy_kwargs: PolicyConfig = field(default_factory=PolicyConfig)
+    # Configuration for the discrete critic network
+    discrete_critic_network_kwargs: CriticNetworkConfig = field(default_factory=CriticNetworkConfig)
+    # Configuration for actor-learner architecture
    actor_learner_config: ActorLearnerConfig = field(default_factory=ActorLearnerConfig)
+    # Configuration for concurrency settings (you can use threads or processes for the actor and learner)
    concurrency: ConcurrencyConfig = field(default_factory=ConcurrencyConfig)

-    # Network architecture
-    # Actor network
-    actor_network_kwargs: ActorNetworkConfig = field(default_factory=ActorNetworkConfig)
-    # Gaussian head parameters
-    policy_kwargs: PolicyConfig = field(default_factory=PolicyConfig)
-    # Discrete critic
-    discrete_critic_network_kwargs: CriticNetworkConfig = field(default_factory=CriticNetworkConfig)
+    # Optimizations
+    use_torch_compile: bool = True

    def __post_init__(self):
        super().__post_init__()
+        # Any validation specific to SAC configuration

    def get_optimizer_preset(self) -> MultiAdamConfig:
        return MultiAdamConfig(
            weight_decay=0.0,
            optimizer_groups={
-                "actor": {"lr": 3e-4},
-                "critic": {"lr": 3e-4},
-                "temperature": {"lr": 3e-4},
+                "actor": {"lr": self.actor_lr},
+                "critic": {"lr": self.critic_lr},
+                "temperature": {"lr": self.temperature_lr},
            },
        )

@@ -15,12 +15,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import math
 from collections.abc import Callable
 from dataclasses import asdict
-from typing import Any
+from typing import Literal

+import einops
+import numpy as np
 import torch
 import torch.nn as nn
+import torch.nn.functional as F  # noqa: N812
 from torch import Tensor
 from torch.distributions import MultivariateNormal, TanhTransform, Transform, TransformedDistribution

@@ -28,20 +32,20 @@ from lerobot.utils.constants import ACTION, OBS_ENV_STATE, OBS_STATE

 from ..pretrained import PreTrainedPolicy
 from ..utils import get_device_from_parameters
-from .configuration_gaussian_actor import GaussianActorConfig, is_image_feature
+from .configuration_sac import SACConfig, is_image_feature

 DISCRETE_DIMENSION_INDEX = -1  # Gripper is always the last dimension


-class GaussianActorPolicy(
+class SACPolicy(
    PreTrainedPolicy,
 ):
-    config_class = GaussianActorConfig
-    name = "gaussian_actor"
+    config_class = SACConfig
+    name = "sac"

    def __init__(
        self,
-        config: GaussianActorConfig | None = None,
+        config: SACConfig | None = None,
    ):
        super().__init__(config)
        config.validate_features()
@@ -50,8 +54,9 @@ class GaussianActorPolicy(
        # Determine action dimension and initialize all components
        continuous_action_dim = config.output_features[ACTION].shape[0]
        self._init_encoders()
+        self._init_critics(continuous_action_dim)
        self._init_actor(continuous_action_dim)
-        self._init_discrete_critic()
+        self._init_temperature()

    def get_optim_params(self) -> dict:
        optim_params = {
@@ -60,7 +65,11 @@ class GaussianActorPolicy(
                for n, p in self.actor.named_parameters()
                if not n.startswith("encoder") or not self.shared_encoder
            ],
+            "critic": self.critic_ensemble.parameters(),
+            "temperature": self.log_alpha,
        }
+        if self.config.num_discrete_actions is not None:
+            optim_params["discrete_critic"] = self.discrete_critic.parameters()
        return optim_params

    def reset(self):
@@ -70,9 +79,7 @@ class GaussianActorPolicy(
    @torch.no_grad()
    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
        """Predict a chunk of actions given environment observations."""
-        raise NotImplementedError(
-            "GaussianActorPolicy does not support action chunking. It returns single actions!"
-        )
+        raise NotImplementedError("SACPolicy does not support action chunking. It returns single actions!")

    @torch.no_grad()
    def select_action(self, batch: dict[str, Tensor]) -> Tensor:
@@ -85,55 +92,360 @@ class GaussianActorPolicy(
        actions, _, _ = self.actor(batch, observations_features)

        if self.config.num_discrete_actions is not None:
-            if self.discrete_critic is not None:
-                discrete_action_value = self.discrete_critic(batch, observations_features)
-                discrete_action = torch.argmax(discrete_action_value, dim=-1, keepdim=True)
-            else:
-                discrete_action = torch.ones(
-                    (*actions.shape[:-1], 1), device=actions.device, dtype=actions.dtype
-                )
+            discrete_action_value = self.discrete_critic(batch, observations_features)
+            discrete_action = torch.argmax(discrete_action_value, dim=-1, keepdim=True)
            actions = torch.cat([actions, discrete_action], dim=-1)

        return actions

-    def forward(self, batch: dict[str, Tensor | dict[str, Tensor]]) -> dict[str, Tensor]:
-        """Actor forward pass: sample actions and return log-probabilities.
+    def critic_forward(
+        self,
+        observations: dict[str, Tensor],
+        actions: Tensor,
+        use_target: bool = False,
+        observation_features: Tensor | None = None,
+    ) -> Tensor:
+        """Forward pass through a critic network ensemble

        Args:
-            batch: A flat observation dict, or a training dict containing
-                ``"state"`` (observations) and optionally ``"observation_feature"``
-                (pre-computed encoder features).
+            observations: Dictionary of observations
+            actions: Action tensor
+            use_target: If True, use target critics, otherwise use ensemble critics

        Returns:
-            Dict with ``"action"``, ``"log_prob"``, and ``"action_mean"`` tensors.
+            Tensor of Q-values from all critics
        """
-        observations = batch.get("state", batch)
-        observation_features = batch.get("observation_feature") if isinstance(batch, dict) else None
-        actions, log_probs, means = self.actor(observations, observation_features)
-        return {"action": actions, "log_prob": log_probs, "action_mean": means}

-    def load_actor_weights(self, state_dicts: dict[str, Any], device: str | torch.device = "cpu") -> None:
-        from lerobot.utils.transition import move_state_dict_to_device
+        critics = self.critic_target if use_target else self.critic_ensemble
+        q_values = critics(observations, actions, observation_features)
+        return q_values

-        actor_state_dict = move_state_dict_to_device(state_dicts["policy"], device=device)
-        self.actor.load_state_dict(actor_state_dict)
+    def discrete_critic_forward(
+        self, observations, use_target=False, observation_features=None
+    ) -> torch.Tensor:
+        """Forward pass through a discrete critic network

-        if "discrete_critic" in state_dicts and self.discrete_critic is not None:
-            discrete_critic_state_dict = move_state_dict_to_device(
-                state_dicts["discrete_critic"], device=device
+        Args:
+            observations: Dictionary of observations
+            use_target: If True, use target critics, otherwise use ensemble critics
+            observation_features: Optional pre-computed observation features to avoid recomputing encoder output
+
+        Returns:
+            Tensor of Q-values from the discrete critic network
+        """
+        discrete_critic = self.discrete_critic_target if use_target else self.discrete_critic
+        q_values = discrete_critic(observations, observation_features)
+        return q_values
+
+    def forward(
+        self,
+        batch: dict[str, Tensor | dict[str, Tensor]],
+        model: Literal["actor", "critic", "temperature", "discrete_critic"] = "critic",
+    ) -> dict[str, Tensor]:
+        """Compute the loss for the given model
+
+        Args:
+            batch: Dictionary containing:
+                - action: Action tensor
+                - reward: Reward tensor
+                - state: Observations tensor dict
+                - next_state: Next observations tensor dict
+                - done: Done mask tensor
+                - observation_feature: Optional pre-computed observation features
+                - next_observation_feature: Optional pre-computed next observation features
+            model: Which model to compute the loss for ("actor", "critic", "discrete_critic", or "temperature")
+
+        Returns:
+            The computed loss tensor
+        """
+        # Extract common components from batch
+        actions: Tensor = batch[ACTION]
+        observations: dict[str, Tensor] = batch["state"]
+        observation_features: Tensor = batch.get("observation_feature")
+
+        if model == "critic":
+            # Extract critic-specific components
+            rewards: Tensor = batch["reward"]
+            next_observations: dict[str, Tensor] = batch["next_state"]
+            done: Tensor = batch["done"]
+            next_observation_features: Tensor = batch.get("next_observation_feature")
+
+            loss_critic = self.compute_loss_critic(
+                observations=observations,
+                actions=actions,
+                rewards=rewards,
+                next_observations=next_observations,
+                done=done,
+                observation_features=observation_features,
+                next_observation_features=next_observation_features,
            )
-            self.discrete_critic.load_state_dict(discrete_critic_state_dict)
+
+            return {"loss_critic": loss_critic}
+
+        if model == "discrete_critic" and self.config.num_discrete_actions is not None:
+            # Extract critic-specific components
+            rewards: Tensor = batch["reward"]
+            next_observations: dict[str, Tensor] = batch["next_state"]
+            done: Tensor = batch["done"]
+            next_observation_features: Tensor = batch.get("next_observation_feature")
+            complementary_info = batch.get("complementary_info")
+            loss_discrete_critic = self.compute_loss_discrete_critic(
+                observations=observations,
+                actions=actions,
+                rewards=rewards,
+                next_observations=next_observations,
+                done=done,
+                observation_features=observation_features,
+                next_observation_features=next_observation_features,
+                complementary_info=complementary_info,
+            )
+            return {"loss_discrete_critic": loss_discrete_critic}
+        if model == "actor":
+            return {
+                "loss_actor": self.compute_loss_actor(
+                    observations=observations,
+                    observation_features=observation_features,
+                )
+            }
+
+        if model == "temperature":
+            return {
+                "loss_temperature": self.compute_loss_temperature(
+                    observations=observations,
+                    observation_features=observation_features,
+                )
+            }
+
+        raise ValueError(f"Unknown model type: {model}")
+
+    def update_target_networks(self):
+        """Update target networks with exponential moving average"""
+        for target_param, param in zip(
+            self.critic_target.parameters(),
+            self.critic_ensemble.parameters(),
+            strict=True,
+        ):
+            target_param.data.copy_(
+                param.data * self.config.critic_target_update_weight
+                + target_param.data * (1.0 - self.config.critic_target_update_weight)
+            )
+        if self.config.num_discrete_actions is not None:
+            for target_param, param in zip(
+                self.discrete_critic_target.parameters(),
+                self.discrete_critic.parameters(),
+                strict=True,
+            ):
+                target_param.data.copy_(
+                    param.data * self.config.critic_target_update_weight
+                    + target_param.data * (1.0 - self.config.critic_target_update_weight)
+                )
+
+    @property
+    def temperature(self) -> float:
+        """Return the current temperature value, always in sync with log_alpha."""
+        return self.log_alpha.exp().item()
+
+    def compute_loss_critic(
+        self,
+        observations,
+        actions,
+        rewards,
+        next_observations,
+        done,
+        observation_features: Tensor | None = None,
+        next_observation_features: Tensor | None = None,
+    ) -> Tensor:
+        with torch.no_grad():
+            next_action_preds, next_log_probs, _ = self.actor(next_observations, next_observation_features)
+
+            # 2- compute q targets
+            q_targets = self.critic_forward(
+                observations=next_observations,
+                actions=next_action_preds,
+                use_target=True,
+                observation_features=next_observation_features,
+            )
+
+            # subsample critics to prevent overfitting if use high UTD (update to date)
+            # TODO: Get indices before forward pass to avoid unnecessary computation
+            if self.config.num_subsample_critics is not None:
+                indices = torch.randperm(self.config.num_critics)
+                indices = indices[: self.config.num_subsample_critics]
+                q_targets = q_targets[indices]
+
+            # critics subsample size
+            min_q, _ = q_targets.min(dim=0)  # Get values from min operation
+            if self.config.use_backup_entropy:
+                min_q = min_q - (self.temperature * next_log_probs)
+
+            td_target = rewards + (1 - done) * self.config.discount * min_q
+
+        # 3- compute predicted qs
+        if self.config.num_discrete_actions is not None:
+            # NOTE: We only want to keep the continuous action part
+            # In the buffer we have the full action space (continuous + discrete)
+            # We need to split them before concatenating them in the critic forward
+            actions: Tensor = actions[:, :DISCRETE_DIMENSION_INDEX]
+        q_preds = self.critic_forward(
+            observations=observations,
+            actions=actions,
+            use_target=False,
+            observation_features=observation_features,
+        )
+
+        # 4- Calculate loss
+        # Compute state-action value loss (TD loss) for all of the Q functions in the ensemble.
+        td_target_duplicate = einops.repeat(td_target, "b -> e b", e=q_preds.shape[0])
+        # You compute the mean loss of the batch for each critic and then to compute the final loss you sum them up
+        critics_loss = (
+            F.mse_loss(
+                input=q_preds,
+                target=td_target_duplicate,
+                reduction="none",
+            ).mean(dim=1)
+        ).sum()
+        return critics_loss
+
+    def compute_loss_discrete_critic(
+        self,
+        observations,
+        actions,
+        rewards,
+        next_observations,
+        done,
+        observation_features=None,
+        next_observation_features=None,
+        complementary_info=None,
+    ):
+        # NOTE: We only want to keep the discrete action part
+        # In the buffer we have the full action space (continuous + discrete)
+        # We need to split them before concatenating them in the critic forward
+        actions_discrete: Tensor = actions[:, DISCRETE_DIMENSION_INDEX:].clone()
+        actions_discrete = torch.round(actions_discrete)
+        actions_discrete = actions_discrete.long()
+
+        discrete_penalties: Tensor | None = None
+        if complementary_info is not None:
+            discrete_penalties: Tensor | None = complementary_info.get("discrete_penalty")
+
+        with torch.no_grad():
+            # For DQN, select actions using online network, evaluate with target network
+            next_discrete_qs = self.discrete_critic_forward(
+                next_observations, use_target=False, observation_features=next_observation_features
+            )
+            best_next_discrete_action = torch.argmax(next_discrete_qs, dim=-1, keepdim=True)
+
+            # Get target Q-values from target network
+            target_next_discrete_qs = self.discrete_critic_forward(
+                observations=next_observations,
+                use_target=True,
+                observation_features=next_observation_features,
+            )
+
+            # Use gather to select Q-values for best actions
+            target_next_discrete_q = torch.gather(
+                target_next_discrete_qs, dim=1, index=best_next_discrete_action
+            ).squeeze(-1)
+
+            # Compute target Q-value with Bellman equation
+            rewards_discrete = rewards
+            if discrete_penalties is not None:
+                rewards_discrete = rewards + discrete_penalties
+            target_discrete_q = rewards_discrete + (1 - done) * self.config.discount * target_next_discrete_q
+
+        # Get predicted Q-values for current observations
+        predicted_discrete_qs = self.discrete_critic_forward(
+            observations=observations, use_target=False, observation_features=observation_features
+        )
+
+        # Use gather to select Q-values for taken actions
+        predicted_discrete_q = torch.gather(predicted_discrete_qs, dim=1, index=actions_discrete).squeeze(-1)
+
+        # Compute MSE loss between predicted and target Q-values
+        discrete_critic_loss = F.mse_loss(input=predicted_discrete_q, target=target_discrete_q)
+        return discrete_critic_loss
+
+    def compute_loss_temperature(self, observations, observation_features: Tensor | None = None) -> Tensor:
+        """Compute the temperature loss"""
+        # calculate temperature loss
+        with torch.no_grad():
+            _, log_probs, _ = self.actor(observations, observation_features)
+        temperature_loss = (-self.log_alpha.exp() * (log_probs + self.target_entropy)).mean()
+        return temperature_loss
+
+    def compute_loss_actor(
+        self,
+        observations,
+        observation_features: Tensor | None = None,
+    ) -> Tensor:
+        actions_pi, log_probs, _ = self.actor(observations, observation_features)
+
+        q_preds = self.critic_forward(
+            observations=observations,
+            actions=actions_pi,
+            use_target=False,
+            observation_features=observation_features,
+        )
+        min_q_preds = q_preds.min(dim=0)[0]
+
+        actor_loss = ((self.temperature * log_probs) - min_q_preds).mean()
+        return actor_loss

    def _init_encoders(self):
        """Initialize shared or separate encoders for actor and critic."""
        self.shared_encoder = self.config.shared_encoder
-        self.encoder_critic = GaussianActorObservationEncoder(self.config)
+        self.encoder_critic = SACObservationEncoder(self.config)
        self.encoder_actor = (
-            self.encoder_critic if self.shared_encoder else GaussianActorObservationEncoder(self.config)
+            self.encoder_critic if self.shared_encoder else SACObservationEncoder(self.config)
        )

+    def _init_critics(self, continuous_action_dim):
+        """Build critic ensemble, targets, and optional discrete critic."""
+        heads = [
+            CriticHead(
+                input_dim=self.encoder_critic.output_dim + continuous_action_dim,
+                **asdict(self.config.critic_network_kwargs),
+            )
+            for _ in range(self.config.num_critics)
+        ]
+        self.critic_ensemble = CriticEnsemble(encoder=self.encoder_critic, ensemble=heads)
+        target_heads = [
+            CriticHead(
+                input_dim=self.encoder_critic.output_dim + continuous_action_dim,
+                **asdict(self.config.critic_network_kwargs),
+            )
+            for _ in range(self.config.num_critics)
+        ]
+        self.critic_target = CriticEnsemble(encoder=self.encoder_critic, ensemble=target_heads)
+        self.critic_target.load_state_dict(self.critic_ensemble.state_dict())
+
+        if self.config.use_torch_compile:
+            self.critic_ensemble = torch.compile(self.critic_ensemble)
+            self.critic_target = torch.compile(self.critic_target)
+
+        if self.config.num_discrete_actions is not None:
+            self._init_discrete_critics()
+
+    def _init_discrete_critics(self):
+        """Build discrete discrete critic ensemble and target networks."""
+        self.discrete_critic = DiscreteCritic(
+            encoder=self.encoder_critic,
+            input_dim=self.encoder_critic.output_dim,
+            output_dim=self.config.num_discrete_actions,
+            **asdict(self.config.discrete_critic_network_kwargs),
+        )
+        self.discrete_critic_target = DiscreteCritic(
+            encoder=self.encoder_critic,
+            input_dim=self.encoder_critic.output_dim,
+            output_dim=self.config.num_discrete_actions,
+            **asdict(self.config.discrete_critic_network_kwargs),
+        )
+
+        # TODO: (maractingi, azouitine) Compile the discrete critic
+        self.discrete_critic_target.load_state_dict(self.discrete_critic.state_dict())
+
    def _init_actor(self, continuous_action_dim):
-        """Initialize policy actor network."""
+        """Initialize policy actor network and default target entropy."""
        # NOTE: The actor select only the continuous action part
        self.actor = Policy(
            encoder=self.encoder_actor,
@@ -143,25 +455,21 @@ class GaussianActorPolicy(
            **asdict(self.config.policy_kwargs),
        )

-    def _init_discrete_critic(self) -> None:
-        """Initialize discrete critic network."""
-        if self.config.num_discrete_actions is None:
-            self.discrete_critic = None
-            return
+        self.target_entropy = self.config.target_entropy
+        if self.target_entropy is None:
+            dim = continuous_action_dim + (1 if self.config.num_discrete_actions is not None else 0)
+            self.target_entropy = -np.prod(dim) / 2

-        # TODO(Khalil): Compile the discrete critic
-        self.discrete_critic = DiscreteCritic(
-            encoder=self.encoder_critic,
-            input_dim=self.encoder_critic.output_dim,
-            output_dim=self.config.num_discrete_actions,
-            **asdict(self.config.discrete_critic_network_kwargs),
-        )
+    def _init_temperature(self) -> None:
+        """Set up temperature parameter (log_alpha)."""
+        temp_init = self.config.temperature_init
+        self.log_alpha = nn.Parameter(torch.tensor([math.log(temp_init)]))


-class GaussianActorObservationEncoder(nn.Module):
+class SACObservationEncoder(nn.Module):
    """Encode image and/or state vector observations."""

-    def __init__(self, config: GaussianActorConfig) -> None:
+    def __init__(self, config: SACConfig) -> None:
        super().__init__()
        self.config = config
        self._init_image_layers()
@@ -369,6 +677,84 @@ class MLP(nn.Module):
        return self.net(x)


+class CriticHead(nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dims: list[int],
+        activations: Callable[[torch.Tensor], torch.Tensor] | str = nn.SiLU(),
+        activate_final: bool = False,
+        dropout_rate: float | None = None,
+        init_final: float | None = None,
+        final_activation: Callable[[torch.Tensor], torch.Tensor] | str | None = None,
+    ):
+        super().__init__()
+        self.net = MLP(
+            input_dim=input_dim,
+            hidden_dims=hidden_dims,
+            activations=activations,
+            activate_final=activate_final,
+            dropout_rate=dropout_rate,
+            final_activation=final_activation,
+        )
+        self.output_layer = nn.Linear(in_features=hidden_dims[-1], out_features=1)
+        if init_final is not None:
+            nn.init.uniform_(self.output_layer.weight, -init_final, init_final)
+            nn.init.uniform_(self.output_layer.bias, -init_final, init_final)
+        else:
+            orthogonal_init()(self.output_layer.weight)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.output_layer(self.net(x))
+
+
+class CriticEnsemble(nn.Module):
+    """
+    CriticEnsemble wraps multiple CriticHead modules into an ensemble.
+
+    Args:
+        encoder (SACObservationEncoder): encoder for observations.
+        ensemble (List[CriticHead]): list of critic heads.
+        init_final (float | None): optional initializer scale for final layers.
+
+    Forward returns a tensor of shape (num_critics, batch_size) containing Q-values.
+    """
+
+    def __init__(
+        self,
+        encoder: SACObservationEncoder,
+        ensemble: list[CriticHead],
+        init_final: float | None = None,
+    ):
+        super().__init__()
+        self.encoder = encoder
+        self.init_final = init_final
+        self.critics = nn.ModuleList(ensemble)
+
+    def forward(
+        self,
+        observations: dict[str, torch.Tensor],
+        actions: torch.Tensor,
+        observation_features: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        device = get_device_from_parameters(self)
+        # Move each tensor in observations to device
+        observations = {k: v.to(device) for k, v in observations.items()}
+
+        obs_enc = self.encoder(observations, cache=observation_features)
+
+        inputs = torch.cat([obs_enc, actions], dim=-1)
+
+        # Loop through critics and collect outputs
+        q_values = []
+        for critic in self.critics:
+            q_values.append(critic(inputs))
+
+        # Stack outputs to match expected shape [num_critics, batch_size]
+        q_values = torch.stack([q.squeeze(-1) for q in q_values], dim=0)
+        return q_values
+
+
 class DiscreteCritic(nn.Module):
    def __init__(
        self,
@@ -414,7 +800,7 @@ class DiscreteCritic(nn.Module):
 class Policy(nn.Module):
    def __init__(
        self,
-        encoder: GaussianActorObservationEncoder,
+        encoder: SACObservationEncoder,
        network: nn.Module,
        action_dim: int,
        std_min: float = -5,
@@ -425,7 +811,7 @@ class Policy(nn.Module):
        encoder_is_shared: bool = False,
    ):
        super().__init__()
-        self.encoder: GaussianActorObservationEncoder = encoder
+        self.encoder: SACObservationEncoder = encoder
        self.network = network
        self.action_dim = action_dim
        self.std_min = std_min
@@ -499,7 +885,7 @@ class Policy(nn.Module):


 class DefaultImageEncoder(nn.Module):
-    def __init__(self, config: GaussianActorConfig):
+    def __init__(self, config: SACConfig):
        super().__init__()
        image_key = next(key for key in config.input_features if is_image_feature(key))
        self.image_enc_layers = nn.Sequential(
@@ -545,12 +931,12 @@ def freeze_image_encoder(image_encoder: nn.Module):


 class PretrainedImageEncoder(nn.Module):
-    def __init__(self, config: GaussianActorConfig):
+    def __init__(self, config: SACConfig):
        super().__init__()

        self.image_enc_layers, self.image_enc_out_shape = self._load_pretrained_vision_encoder(config)

-    def _load_pretrained_vision_encoder(self, config: GaussianActorConfig):
+    def _load_pretrained_vision_encoder(self, config: SACConfig):
        """Set up CNN encoder"""
        from transformers import AutoModel

@@ -32,18 +32,18 @@ from lerobot.processor import (
 )
 from lerobot.utils.constants import POLICY_POSTPROCESSOR_DEFAULT_NAME, POLICY_PREPROCESSOR_DEFAULT_NAME

-from .configuration_gaussian_actor import GaussianActorConfig
+from .configuration_sac import SACConfig


-def make_gaussian_actor_pre_post_processors(
-    config: GaussianActorConfig,
+def make_sac_pre_post_processors(
+    config: SACConfig,
    dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None,
 ) -> tuple[
    PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
    PolicyProcessorPipeline[PolicyAction, PolicyAction],
 ]:
    """
-    Constructs pre-processor and post-processor pipelines for the Gaussian actor policy.
+    Constructs pre-processor and post-processor pipelines for the SAC policy.

    The pre-processing pipeline prepares input data for the model by:
    1. Renaming features to match pretrained configurations.
@@ -56,7 +56,7 @@ def make_gaussian_actor_pre_post_processors(
    2. Unnormalizing the output features to their original scale.

    Args:
-        config: The configuration object for the tanh-Gaussian policy.
+        config: The configuration object for the SAC policy.
        dataset_stats: A dictionary of statistics for normalization.

    Returns:
@@ -31,7 +31,7 @@ class RewardClassifierConfig(PreTrainedConfig):
    latent_dim: int = 256
    image_embedding_pooling_dim: int = 8
    dropout_rate: float = 0.1
-    model_name: str = "lerobot/resnet10"
+    model_name: str = "helper2424/resnet10"  # TODO: This needs to be updated. The model on the Hub doesn't call self.post_init() in its __init__, which is required by transformers v5 to set all_tied_weights_keys. The from_pretrained call fails when it tries to access this attribute during _finalize_model_loading.
    device: str = "cpu"
    model_type: str = "cnn"  # "transformer" or "cnn"
    num_cameras: int = 2
@@ -108,7 +108,6 @@ class Classifier(PreTrainedPolicy):
    def __init__(
        self,
        config: RewardClassifierConfig,
-        **kwargs,
    ):
        from transformers import AutoModel

@@ -270,6 +269,10 @@ class Classifier(PreTrainedPolicy):

    def predict_reward(self, batch, threshold=0.5):
        """Eval method. Returns predicted reward with the decision threshold as argument."""
+        # Check for both OBS_IMAGE and OBS_IMAGES prefixes
+        batch = self.normalize_inputs(batch)
+        batch = self.normalize_targets(batch)
+
        # Extract images from batch dict
        images = [batch[key] for key in self.config.input_features if key.startswith(OBS_IMAGE)]

@@ -62,6 +62,7 @@ from torch import Tensor, nn

 from lerobot.utils.constants import ACTION, OBS_LANGUAGE_ATTENTION_MASK, OBS_LANGUAGE_TOKENS, OBS_STATE
 from lerobot.utils.device_utils import get_safe_dtype
+from lerobot.utils.import_utils import require_package

 from ..pretrained import PreTrainedPolicy
 from ..rtc.modeling_rtc import RTCProcessor
@@ -239,6 +240,7 @@ class SmolVLAPolicy(PreTrainedPolicy):
                    the configuration class is used.
        """

+        require_package("transformers", extra="smolvla")
        super().__init__(config)
        config.validate_features()
        self.config = config
@@ -27,7 +27,7 @@ import torch.distributed as distributed
 import torch.nn.functional as F  # noqa: N812
 from einops import pack, rearrange, reduce, repeat, unpack
 from torch import einsum, nn
-from torch.cuda.amp import autocast
+from torch.amp import autocast
 from torch.optim import Optimizer

 from .configuration_vqbet import VQBeTConfig
@@ -1370,7 +1370,7 @@ class EuclideanCodebook(nn.Module):
        batch_samples = rearrange(batch_samples, "h ... d -> h (...) d")
        self.replace(batch_samples, batch_mask=expired_codes)

-    @autocast(enabled=False)
+    @autocast("cuda", enabled=False)
    def forward(self, x, sample_codebook_temp=None, mask=None, freeze_codebook=False):
        needs_codebook_dim = x.ndim < 4
        sample_codebook_temp = (
@@ -61,7 +61,6 @@ from .hil_processor import (
    RewardClassifierProcessorStep,
    TimeLimitProcessorStep,
 )
-from .leader_follower_processor import LeaderFollowerProcessor
 from .newline_task_processor import NewLineTaskProcessorStep
 from .normalize_processor import NormalizerProcessorStep, UnnormalizerProcessorStep, hotswap_stats
 from .observation_processor import VanillaObservationProcessorStep
@@ -123,7 +122,6 @@ __all__ = [
    "ImageCropResizeProcessorStep",
    "InfoProcessorStep",
    "InterventionActionProcessorStep",
-    "LeaderFollowerProcessor",
    "make_default_processors",
    "make_default_teleop_action_processor",
    "make_default_robot_action_processor",
@@ -321,7 +321,6 @@ class GymHILAdapterProcessorStep(ProcessorStep):
    This step normalizes the `transition` object by:
    1. Copying `teleop_action` from `info` to `complementary_data`.
    2. Copying `is_intervention` from `info` (using the string key) to `info` (using the enum key).
-    3. Copying `discrete_penalty` from `info` to `complementary_data`.
    """

    def __call__(self, transition: EnvTransition) -> EnvTransition:
@@ -331,9 +330,6 @@ class GymHILAdapterProcessorStep(ProcessorStep):
        if TELEOP_ACTION_KEY in info:
            complementary_data[TELEOP_ACTION_KEY] = info[TELEOP_ACTION_KEY]

-        if DISCRETE_PENALTY_KEY in info:
-            complementary_data[DISCRETE_PENALTY_KEY] = info[DISCRETE_PENALTY_KEY]
-
        if "is_intervention" in info:
            info[TeleopEvents.IS_INTERVENTION] = info["is_intervention"]

@@ -352,24 +348,18 @@ class GymHILAdapterProcessorStep(ProcessorStep):
@ProcessorStepRegistry.register("gripper_penalty_processor")
 class GripperPenaltyProcessorStep(ProcessorStep):
    """
-    Applies a small per-transition cost on the discrete gripper action.
+    Applies a penalty for inefficient gripper usage.

-    Fires only when the commanded action would actually transition the gripper
-    from one extreme to the other (close-while-open or open-while-closed).
-    This discourages gripper oscillation while leaving "stay" and saturating-further
-    commands unpenalized.
+    This step penalizes actions that attempt to close an already closed gripper or
+    open an already open one, based on position thresholds.

    Attributes:
        penalty: The negative reward value to apply.
        max_gripper_pos: The maximum position value for the gripper, used for normalization.
-        open_threshold: Normalized state below which the gripper is considered "open".
-        closed_threshold: Normalized state above which the gripper is considered "closed".
    """

-    penalty: float = -0.02
+    penalty: float = -0.01
    max_gripper_pos: float = 30.0
-    open_threshold: float = 0.1
-    closed_threshold: float = 0.9

    def __call__(self, transition: EnvTransition) -> EnvTransition:
        """
@@ -401,13 +391,9 @@ class GripperPenaltyProcessorStep(ProcessorStep):
        gripper_state_normalized = current_gripper_pos / self.max_gripper_pos

        # Calculate penalty boolean as in original
-        #   - currently open  AND target is closed  -> close transition
-        #   - currently closed AND target is open   -> open transition
-        is_open = gripper_state_normalized < self.open_threshold
-        is_closed = gripper_state_normalized > self.closed_threshold
-        cmd_close = gripper_action_normalized > self.closed_threshold
-        cmd_open = gripper_action_normalized < self.open_threshold
-        gripper_penalty_bool = (is_open and cmd_close) or (is_closed and cmd_open)
+        gripper_penalty_bool = (gripper_state_normalized < 0.5 and gripper_action_normalized > 0.5) or (
+            gripper_state_normalized > 0.75 and gripper_action_normalized < 0.5
+        )

        gripper_penalty = self.penalty * int(gripper_penalty_bool)

@@ -423,14 +409,11 @@ class GripperPenaltyProcessorStep(ProcessorStep):
        Returns the configuration of the step for serialization.

        Returns:
-            A dictionary containing the penalty value, max gripper position,
-            and the open/closed thresholds.
+            A dictionary containing the penalty value and max gripper position.
        """
        return {
            "penalty": self.penalty,
            "max_gripper_pos": self.max_gripper_pos,
-            "open_threshold": self.open_threshold,
-            "closed_threshold": self.closed_threshold,
        }

    def reset(self) -> None:
@@ -443,39 +426,6 @@ class GripperPenaltyProcessorStep(ProcessorStep):
        return features


-def _ndarray_intervention_to_action_list(
-    flat: np.ndarray, use_rotation: bool, use_gripper: bool
-) -> list[float]:
-    """Flatten ``LeaderFollowerProcessor`` / policy outputs into a policy action list.
-
-    PR #2596 leader mode always produces 7 elements ``[dx,dy,dz,wx,wy,wz,g]``. When
-    ``use_rotation`` is False, rotation is disabled (zeroed in the 7-D vector) and
-    we still emit a 4-D tensor ``[dx,dy,dz,g]`` for the rest of the pipeline.
-    """
-    n = int(flat.size)
-    if not use_rotation and use_gripper and n == 7:
-        return [float(flat[0]), float(flat[1]), float(flat[2]), float(flat[6])]
-    if not use_rotation and not use_gripper and n == 6:
-        return [float(flat[0]), float(flat[1]), float(flat[2])]
-    return flat.tolist()
-
-
-def _tensor_intervention_to_action_list(
-    flat: torch.Tensor, use_rotation: bool, use_gripper: bool
-) -> list[float]:
-    n = int(flat.numel())
-    if not use_rotation and use_gripper and n == 7:
-        return [
-            float(flat[0].item()),
-            float(flat[1].item()),
-            float(flat[2].item()),
-            float(flat[6].item()),
-        ]
-    if not use_rotation and not use_gripper and n == 6:
-        return [float(flat[0].item()), float(flat[1].item()), float(flat[2].item())]
-    return [float(x.item()) for x in flat]
-
-
@dataclass
@ProcessorStepRegistry.register("intervention_action_processor")
 class InterventionActionProcessorStep(ProcessorStep):
@@ -488,16 +438,11 @@ class InterventionActionProcessorStep(ProcessorStep):

    Attributes:
        use_gripper: Whether to include the gripper in the teleoperated action.
-        use_rotation: For dict-based teleop actions, whether to include delta_wx/y/z.
-                      For 7-D ndarray/tensors from ``LeaderFollowerProcessor``, when
-                      ``False`` the policy action is sliced to ``[dx,dy,dz,gripper]``.
        terminate_on_success: If True, automatically sets the `done` flag when a
                              `success` event is received.
    """

    use_gripper: bool = False
-    use_rotation: bool = False
-    gripper_neutral_action: float = 1.0
    terminate_on_success: bool = True

    def __call__(self, transition: EnvTransition) -> EnvTransition:
@@ -535,22 +480,10 @@ class InterventionActionProcessorStep(ProcessorStep):
                    teleop_action.get("delta_y", 0.0),
                    teleop_action.get("delta_z", 0.0),
                ]
-                if self.use_rotation:
-                    action_list.extend(
-                        [
-                            teleop_action.get("delta_wx", 0.0),
-                            teleop_action.get("delta_wy", 0.0),
-                            teleop_action.get("delta_wz", 0.0),
-                        ]
-                    )
                if self.use_gripper:
-                    action_list.append(teleop_action.get(GRIPPER_KEY, self.gripper_neutral_action))
-            elif isinstance(teleop_action, torch.Tensor):
-                flat = teleop_action.detach().flatten()
-                action_list = _tensor_intervention_to_action_list(flat, self.use_rotation, self.use_gripper)
+                    action_list.append(teleop_action.get(GRIPPER_KEY, 1.0))
            elif isinstance(teleop_action, np.ndarray):
-                flat = np.asarray(teleop_action).reshape(-1)
-                action_list = _ndarray_intervention_to_action_list(flat, self.use_rotation, self.use_gripper)
+                action_list = teleop_action.tolist()
            else:
                action_list = teleop_action

@@ -586,8 +519,6 @@ class InterventionActionProcessorStep(ProcessorStep):
        """
        return {
            "use_gripper": self.use_gripper,
-            "use_rotation": self.use_rotation,
-            "gripper_neutral_action": self.gripper_neutral_action,
            "terminate_on_success": self.terminate_on_success,
        }

@@ -626,7 +557,7 @@ class RewardClassifierProcessorStep(ProcessorStep):
    def __post_init__(self):
        """Initializes the reward classifier model after the dataclass is created."""
        if self.pretrained_path is not None:
-            from lerobot.policies.gaussian_actor.reward_model.modeling_classifier import Classifier
+            from lerobot.policies.sac.reward_model.modeling_classifier import Classifier

            self.reward_classifier = Classifier.from_pretrained(self.pretrained_path)
            self.reward_classifier.to(self.device)
@@ -1,255 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from dataclasses import dataclass
-
-import numpy as np
-import torch
-
-from lerobot.configs.types import PipelineFeatureType, PolicyFeature
-from lerobot.model.kinematics import RobotKinematics
-from lerobot.processor.pipeline import EnvTransition, ProcessorStepRegistry, TransitionKey
-from lerobot.robots import Robot
-from lerobot.teleoperators import Teleoperator
-from lerobot.teleoperators.utils import TeleopEvents
-from lerobot.utils.rotation import Rotation
-
-from .pipeline import ProcessorStep
-
-
-@ProcessorStepRegistry.register("leader_follower_processor")
-@dataclass
-class LeaderFollowerProcessor(ProcessorStep):
-    """
-    Processor for leader-follower teleoperation mode.
-
-    This processor:
-    1. Sends follower positions to leader arm when not intervening
-    2. Computes EE delta actions from leader when intervening
-    3. Handles teleop events from the leader device
-    """
-
-    leader_device: Teleoperator
-    motor_names: list[str]
-    robot: Robot
-    kinematics: RobotKinematics
-    end_effector_step_sizes: np.ndarray | None = None
-    use_gripper: bool = True
-    # PR #2596 always produces a **7-D** intervention vector ``[dx, dy, dz, wx,
-    # wy, wz, gripper]`` (normalised to ~[-1, 1] per axis). When ``use_rotation``
-    # is False, the middle three components are **zeroed** (rotation disabled,
-    # not removed): the same tensor layout and code path as PR #2596, but the
-    # downstream policy / IK only sees position + gripper. ``wx/wy/wz`` step
-    # sizes are only read when ``use_rotation`` is True.
-    use_rotation: bool = True
-    # prev_leader_gripper: float | None = None
-    max_gripper_pos: float = 100.0
-    use_ik_solution: bool = False
-
-    def __call__(self, transition: EnvTransition) -> EnvTransition:
-        """Process transition with leader-follower logic."""
-        # Get current follower position from complementary data
-        # raw_joint_pos = transition.get(TransitionKey.COMPLEMENTARY_DATA, {}).get("raw_joint_positions")
-        raw_joint_pos = transition.get(TransitionKey.OBSERVATION)
-        if raw_joint_pos is not None:
-            # Send follower position to leader (for follow mode)
-            # follower_action = {
-            #     f"{motor}.pos": float(raw_joint_pos[motor])
-            #     for motor in self.motor_names
-            # }
-            self.leader_device.send_action(raw_joint_pos)
-
-        # Only compute EE action if intervention is active
-        # (AddTeleopEventsAsInfo already added IS_INTERVENTION to info)
-        info = transition.get(TransitionKey.INFO, {})
-        if info.get(TeleopEvents.IS_INTERVENTION, False):
-            # Get leader joint positions from teleop_action
-            # (AddTeleopActionAsComplimentaryData already got the action)
-            complementary = transition.get(TransitionKey.COMPLEMENTARY_DATA, {})
-            teleop_action = complementary.get("teleop_action", {})
-
-            if isinstance(teleop_action, dict) and raw_joint_pos is not None:
-                leader_pos = np.array([teleop_action[f"{motor}.pos"] for motor in self.motor_names])
-
-                leader_ee = self.kinematics.forward_kinematics(leader_pos)
-
-                if self.use_ik_solution and "IK_solution" in transition.get(TransitionKey.COMPLEMENTARY_DATA):
-                    follower_pos = transition.get(TransitionKey.COMPLEMENTARY_DATA)["IK_solution"]
-                else:
-                    follower_pos = np.array([raw_joint_pos[f"{motor}.pos"] for motor in self.motor_names])
-
-                follower_ee = self.kinematics.forward_kinematics(follower_pos)
-
-                # follower_gripper_pos = raw_joint_pos["gripper.pos"]
-                follower_gripper_pos = follower_pos[-1]  # assuming gripper is the last motor
-
-                leader_ee_pos = leader_ee[:3, 3]
-                leader_ee_rvec = Rotation.from_matrix(leader_ee[:3, :3]).as_rotvec()
-                leader_gripper_pos = np.clip(
-                    teleop_action["gripper.pos"], -self.max_gripper_pos, self.max_gripper_pos
-                )
-
-                follower_ee_pos = follower_ee[:3, 3]
-                # follower_ee_rvec = Rotation.from_matrix(follower_ee[:3, :3]).as_rotvec()
-
-                delta_pos = leader_ee_pos - follower_ee_pos
-                delta_gripper = leader_gripper_pos - follower_gripper_pos
-
-                # Normalise position to ~[-1, 1] per axis (PR #2596).
-                step_xyz = np.array(
-                    [
-                        self.end_effector_step_sizes["x"],
-                        self.end_effector_step_sizes["y"],
-                        self.end_effector_step_sizes["z"],
-                    ]
-                )
-                delta_pos = delta_pos / step_xyz
-                max_normalized_pos = max(
-                    abs(delta_pos[0]),
-                    abs(delta_pos[1]),
-                    abs(delta_pos[2]),
-                )
-
-                # Relative rotation follower -> leader (same construction as PR #2596).
-                r_delta = follower_ee[:3, :3].T @ leader_ee[:3, :3]
-                delta_rvec = Rotation.from_matrix(r_delta).as_rotvec()
-
-                if self.use_rotation:
-                    desired = np.eye(4, dtype=float)
-                    desired[:3, :3] = follower_ee[:3, :3] @ r_delta
-                    desired[:3, 3] = follower_ee[:3, 3] + delta_pos * step_xyz
-
-                    pos = desired[:3, 3]
-                    tw = Rotation.from_matrix(desired[:3, :3]).as_rotvec()
-
-                    assert np.allclose(pos, leader_ee_pos), "Position delta computation error"
-                    assert np.allclose(tw, leader_ee_rvec), "Orientation delta computation error"
-                    assert np.isclose(follower_gripper_pos + delta_gripper, leader_gripper_pos), (
-                        "Gripper delta computation error"
-                    )
-
-                    delta_rvec = delta_rvec / np.array(
-                        [
-                            self.end_effector_step_sizes["wx"],
-                            self.end_effector_step_sizes["wy"],
-                            self.end_effector_step_sizes["wz"],
-                        ]
-                    )
-                    normalized_rot = max(abs(delta_rvec[0]), abs(delta_rvec[1]), abs(delta_rvec[2]))
-                    max_normalized = max(max_normalized_pos, normalized_rot)
-                    if max_normalized > 1.0:
-                        delta_pos = delta_pos / max_normalized
-                        delta_rvec = delta_rvec / max_normalized
-                else:
-                    # Rotation **disabled**: keep PR #2596 joint scaling on position only.
-                    if max_normalized_pos > 1.0:
-                        delta_pos = delta_pos / max_normalized_pos
-                    delta_rvec = np.zeros(3, dtype=float)
-
-                grip_norm = (
-                    np.clip(delta_gripper, -self.max_gripper_pos, self.max_gripper_pos) / self.max_gripper_pos
-                )
-
-                intervention_action = np.array(
-                    [
-                        delta_pos[0],
-                        delta_pos[1],
-                        delta_pos[2],
-                        delta_rvec[0],
-                        delta_rvec[1],
-                        delta_rvec[2],
-                        grip_norm,
-                    ],
-                    dtype=float,
-                )
-
-                #         # Extract leader positions from teleop action dict
-                #         # leader_pos = np.array([teleop_action.get(f"{motor}.pos", 0) for motor in self.motor_names])
-                #         # follower_pos = np.array([raw_joint_pos[f"{motor}.pos"] for motor in self.motor_names])
-
-                #         teleop_action = self.leader_device.bus.sync_read("Present_Position")
-                #         raw_joint_pos = self.robot.bus.sync_read("Present_Position")
-                #         leader_pos = np.array([teleop_action.get(f"{motor}", 0) for motor in self.motor_names])
-                #         follower_pos = np.array([raw_joint_pos[f"{motor}"] for motor in self.motor_names])
-
-                #         # Compute EE positions
-                #         leader_ee_fi = self.kinematics.forward_kinematics(leader_pos)
-                #         leader_ee_pos = leader_ee_fi[:3, 3]
-                #         # leader_ee_rot = Rotation.from_matrix(leader_ee_fi[:3, :3]).as_rotvec()
-                #         leader_ee = np.concat([leader_ee_pos, [0,0,0]])
-
-                #         if "IK_solution" in transition.get(TransitionKey.COMPLEMENTARY_DATA):
-                #             follower_ee = transition.get(TransitionKey.COMPLEMENTARY_DATA)["IK_solution"]
-                #         else:
-                #             follower_pos = np.array([raw_joint_pos[f"{motor}.pos"] for motor in self.motor_names])
-                #             follower_ee_fi = self.kinematics.forward_kinematics(follower_pos)
-                #             follower_ee_pos = follower_ee_fi[:3, 3]
-                #             # follower_ee_rot = Rotation.from_matrix(follower_ee_fi[:3, :3]).as_rotvec()
-                #             follower_ee = np.concat([follower_ee_pos, [0,0,0]])
-
-                #         # Compute normalized EE delta
-                #         if self.end_effector_step_sizes is not None:
-                #             ee_delta = np.clip(
-                #                 leader_ee - follower_ee,
-                #                 -self.end_effector_step_sizes,
-                #                 self.end_effector_step_sizes
-                #             )
-                #             ee_delta_normalized = ee_delta / self.end_effector_step_sizes
-                #         else:
-                #             ee_delta_normalized = leader_ee - follower_ee
-
-                #         # Handle gripper
-                #         if self.use_gripper and len(leader_pos) > 3:
-                #             if self.prev_leader_gripper is None:
-                #                 self.prev_leader_gripper = np.clip(
-                #                     leader_pos[-1], 0, self.max_gripper_pos
-                #                 )
-
-                #             leader_gripper = leader_pos[-1]
-                #             gripper_delta = leader_gripper - self.prev_leader_gripper
-                #             normalized_delta = gripper_delta / self.max_gripper_pos
-
-                #             # Quantize gripper action
-                #             if normalized_delta >= 0.3:
-                #                 gripper_action = 2
-                #             elif normalized_delta <= -0.1:
-                #                 gripper_action = 0
-                #             else:
-                #                 gripper_action = 1
-
-                #             self.prev_leader_gripper = leader_gripper
-
-                #             # Create intervention action
-                #             intervention_action = np.append(ee_delta_normalized, gripper_action)
-                #         else:
-                #             intervention_action = ee_delta_normalized
-
-                #         # Override teleop_action with computed EE action
-                complementary["teleop_action"] = torch.from_numpy(intervention_action).float()
-                transition[TransitionKey.COMPLEMENTARY_DATA] = complementary  # type: ignore[misc]
-
-        return transition
-
-    def reset(self) -> None:
-        """Reset leader-follower state."""
-        # self.prev_leader_gripper = None
-        if hasattr(self.leader_device, "reset"):
-            self.leader_device.reset()
-
-    def transform_features(
-        self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
-    ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
-        return features
@@ -134,15 +134,6 @@ class _NormalizationMixin:
        if self.dtype is None:
            self.dtype = torch.float32
        self._tensor_stats = to_tensor(self.stats, device=self.device, dtype=self.dtype)
-        self._reshape_visual_stats()
-
-    def _reshape_visual_stats(self) -> None:
-        """Reshape visual stats from ``[C]`` to ``[C, 1, 1]`` for image broadcasting."""
-        for key, feature in self.features.items():
-            if feature.type == FeatureType.VISUAL and key in self._tensor_stats:
-                for stat_name, stat_tensor in self._tensor_stats[key].items():
-                    if isinstance(stat_tensor, Tensor) and stat_tensor.ndim == 1:
-                        self._tensor_stats[key][stat_name] = stat_tensor.reshape(-1, 1, 1)

    def to(
        self, device: torch.device | str | None = None, dtype: torch.dtype | None = None
@@ -161,7 +152,6 @@ class _NormalizationMixin:
        if dtype is not None:
            self.dtype = dtype
        self._tensor_stats = to_tensor(self.stats, device=self.device, dtype=self.dtype)
-        self._reshape_visual_stats()
        return self

    def state_dict(self) -> dict[str, Tensor]:
@@ -211,7 +201,6 @@ class _NormalizationMixin:
            # Don't load from state_dict, keep the explicitly provided stats
            # But ensure _tensor_stats is properly initialized
            self._tensor_stats = to_tensor(self.stats, device=self.device, dtype=self.dtype)  # type: ignore[assignment]
-            self._reshape_visual_stats()
            return

        # Normal behavior: load stats from state_dict
@@ -222,7 +211,6 @@ class _NormalizationMixin:
            self._tensor_stats.setdefault(key, {})[stat_name] = tensor.to(
                dtype=torch.float32, device=self.device
            )
-        self._reshape_visual_stats()

        # Reconstruct the original stats dict from tensor stats for compatibility with to() method
        # and other functions that rely on self.stats
@@ -12,33 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-"""Reinforcement learning modules.
+"""
+Reinforcement learning modules.

-Distributed actor / learner entry points (``actor``, ``learner``,
-``learner_service``) require ``pip install 'lerobot[hilserl]'``. Algorithms,
-buffer, data sources and trainer are gRPC-free and usable standalone.
+Requires: ``pip install 'lerobot[hilserl]'``
+
+Available modules (import directly)::
+
+    from lerobot.rl.actor import ...
+    from lerobot.rl.learner import ...
+    from lerobot.rl.learner_service import ...
+    from lerobot.rl.buffer import ...
+    from lerobot.rl.eval_policy import ...
+    from lerobot.rl.gym_manipulator import ...
 """

-from .algorithms.base import RLAlgorithm as RLAlgorithm
-from .algorithms.configs import RLAlgorithmConfig as RLAlgorithmConfig, TrainingStats as TrainingStats
-from .algorithms.factory import (
-    make_algorithm as make_algorithm,
-    make_algorithm_config as make_algorithm_config,
-)
-from .algorithms.sac.configuration_sac import SACAlgorithmConfig as SACAlgorithmConfig
-from .buffer import ReplayBuffer as ReplayBuffer
-from .data_sources import DataMixer as DataMixer, OnlineOfflineMixer as OnlineOfflineMixer
-from .trainer import RLTrainer as RLTrainer
+from lerobot.utils.import_utils import require_package

-__all__ = [
-    "RLAlgorithm",
-    "RLAlgorithmConfig",
-    "TrainingStats",
-    "make_algorithm",
-    "make_algorithm_config",
-    "SACAlgorithmConfig",
-    "RLTrainer",
-    "ReplayBuffer",
-    "DataMixer",
-    "OnlineOfflineMixer",
-]
+require_package("grpcio", extra="hilserl", import_name="grpc")
+
+__all__: list[str] = []
@@ -51,20 +51,17 @@ import os
 import time
 from functools import lru_cache
 from queue import Empty
-from typing import Any

 import grpc
 import torch
 from torch import nn
-from torch.multiprocessing import Queue
+from torch.multiprocessing import Event, Queue

 from lerobot.cameras import opencv  # noqa: F401
 from lerobot.configs import parser
-from lerobot.policies import PreTrainedPolicy, make_policy, make_pre_post_processors
-from lerobot.processor import TransitionKey
-from lerobot.rl.process import ProcessSignalHandler
-from lerobot.rl.queue import get_last_item_from_queue
-from lerobot.rl.train_rl import TrainRLServerPipelineConfig
+from lerobot.configs.train import TrainRLServerPipelineConfig
+from lerobot.policies import make_policy
+from lerobot.policies.sac.modeling_sac import SACPolicy
 from lerobot.robots import so_follower  # noqa: F401
 from lerobot.teleoperators import gamepad, so_leader  # noqa: F401
 from lerobot.teleoperators.utils import TeleopEvents
@@ -77,11 +74,13 @@ from lerobot.transport.utils import (
    send_bytes_in_chunks,
    transitions_to_bytes,
 )
+from lerobot.types import TransitionKey
 from lerobot.utils.device_utils import get_safe_torch_device
 from lerobot.utils.random_utils import set_seed
 from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.transition import (
    Transition,
+    move_state_dict_to_device,
    move_transition_to_device,
 )
 from lerobot.utils.utils import (
@@ -90,11 +89,13 @@ from lerobot.utils.utils import (
 )

 from .gym_manipulator import (
+    create_transition,
    make_processors,
    make_robot_env,
-    reset_and_build_transition,
    step_env_and_process_transition,
 )
+from .process import ProcessSignalHandler
+from .queue import get_last_item_from_queue

 # Main entry point

@@ -211,7 +212,7 @@ def actor_cli(cfg: TrainRLServerPipelineConfig):

 def act_with_policy(
    cfg: TrainRLServerPipelineConfig,
-    shutdown_event: Any,  # Event
+    shutdown_event: any,  # Event,
    parameters_queue: Queue,
    transitions_queue: Queue,
    interactions_queue: Queue,
@@ -251,21 +252,22 @@ def act_with_policy(
    logging.info("make_policy")

    ### Instantiate the policy in both the actor and learner processes
-    ### To avoid sending a policy object through the port, we create a policy instance
+    ### To avoid sending a SACPolicy object through the port, we create a policy instance
    ### on both sides, the learner sends the updated parameters every n steps to update the actor's parameters
-    policy = make_policy(
+    policy: SACPolicy = make_policy(
        cfg=cfg.policy,
        env_cfg=cfg.env,
    )
-    policy = policy.to(device).eval()
+    policy = policy.eval()
    assert isinstance(policy, nn.Module)

-    preprocessor, postprocessor = make_pre_post_processors(
-        policy_cfg=cfg.policy,
-        dataset_stats=cfg.policy.dataset_stats,
-    )
+    obs, info = online_env.reset()
+    env_processor.reset()
+    action_processor.reset()

-    transition = reset_and_build_transition(online_env, env_processor, action_processor)
+    # Process initial observation
+    transition = create_transition(observation=obs, info=info)
+    transition = env_processor(transition)

    # NOTE: For the moment we will solely handle the case of a single environment
    sum_reward_episode = 0
@@ -289,17 +291,8 @@ def act_with_policy(

        # Time policy inference and check if it meets FPS requirement
        with policy_timer:
-            normalized_observation = preprocessor.process_observation(observation)
-            action = policy.select_action(batch=normalized_observation)
-            # Unnormalize only the continuous part.
-            if cfg.policy.num_discrete_actions is not None:
-                continuous_action = postprocessor.process_action(action[..., :-1])
-                discrete_action = action[..., -1:].to(
-                    device=continuous_action.device, dtype=continuous_action.dtype
-                )
-                action = torch.cat([continuous_action, discrete_action], dim=-1)
-            else:
-                action = postprocessor.process_action(action)
+            # Extract observation from transition for policy
+            action = policy.select_action(batch=observation)
        policy_fps = policy_timer.fps_last

        log_policy_frequency_issue(policy_fps=policy_fps, cfg=cfg, interaction_step=interaction_step)
@@ -333,8 +326,7 @@ def act_with_policy(

        # Check for intervention from transition info
        intervention_info = new_transition[TransitionKey.INFO]
-        is_intervention = bool(intervention_info.get(TeleopEvents.IS_INTERVENTION, False))
-        if is_intervention:
+        if intervention_info.get(TeleopEvents.IS_INTERVENTION, False):
            episode_intervention = True
            episode_intervention_steps += 1

@@ -342,7 +334,6 @@ def act_with_policy(
            "discrete_penalty": torch.tensor(
                [new_transition[TransitionKey.COMPLEMENTARY_DATA].get("discrete_penalty", 0.0)]
            ),
-            TeleopEvents.IS_INTERVENTION.value: is_intervention,
        }
        # Create transition for learner (convert to old format)
        list_transition_to_send_to_learner.append(
@@ -399,7 +390,14 @@ def act_with_policy(
            episode_intervention_steps = 0
            episode_total_steps = 0

-            transition = reset_and_build_transition(online_env, env_processor, action_processor)
+            # Reset environment and processors
+            obs, info = online_env.reset()
+            env_processor.reset()
+            action_processor.reset()
+
+            # Process initial observation
+            transition = create_transition(observation=obs, info=info)
+            transition = env_processor(transition)

        if cfg.env.fps is not None:
            dt_time = time.perf_counter() - start_time
@@ -411,7 +409,7 @@ def act_with_policy(

 def establish_learner_connection(
    stub: services_pb2_grpc.LearnerServiceStub,
-    shutdown_event: Any,  # Event
+    shutdown_event: Event,  # type: ignore
    attempts: int = 30,
 ):
    """Establish a connection with the learner.
@@ -463,7 +461,7 @@ def learner_service_client(
 def receive_policy(
    cfg: TrainRLServerPipelineConfig,
    parameters_queue: Queue,
-    shutdown_event: Any,  # Event
+    shutdown_event: Event,  # type: ignore
    learner_client: services_pb2_grpc.LearnerServiceStub | None = None,
    grpc_channel: grpc.Channel | None = None,
 ):
@@ -515,7 +513,7 @@ def receive_policy(
 def send_transitions(
    cfg: TrainRLServerPipelineConfig,
    transitions_queue: Queue,
-    shutdown_event: Any,  # Event
+    shutdown_event: any,  # Event,
    learner_client: services_pb2_grpc.LearnerServiceStub | None = None,
    grpc_channel: grpc.Channel | None = None,
 ) -> services_pb2.Empty:
@@ -565,7 +563,7 @@ def send_transitions(
 def send_interactions(
    cfg: TrainRLServerPipelineConfig,
    interactions_queue: Queue,
-    shutdown_event: Any,  # Event
+    shutdown_event: Event,  # type: ignore
    learner_client: services_pb2_grpc.LearnerServiceStub | None = None,
    grpc_channel: grpc.Channel | None = None,
 ) -> services_pb2.Empty:
@@ -615,11 +613,7 @@ def send_interactions(
    logging.info("[ACTOR] Interactions process stopped")


-def transitions_stream(
-    shutdown_event: Any,  # Event
-    transitions_queue: Queue,
-    timeout: float,
-) -> services_pb2.Empty:
+def transitions_stream(shutdown_event: Event, transitions_queue: Queue, timeout: float) -> services_pb2.Empty:  # type: ignore
    while not shutdown_event.is_set():
        try:
            message = transitions_queue.get(block=True, timeout=timeout)
@@ -635,9 +629,9 @@ def transitions_stream(


 def interactions_stream(
-    shutdown_event: Any,  # Event
+    shutdown_event: Event,
    interactions_queue: Queue,
-    timeout: float,
+    timeout: float,  # type: ignore
 ) -> services_pb2.Empty:
    while not shutdown_event.is_set():
        try:
@@ -658,7 +652,7 @@ def interactions_stream(
 #  Policy functions


-def update_policy_parameters(policy: PreTrainedPolicy, parameters_queue: Queue, device):
+def update_policy_parameters(policy: SACPolicy, parameters_queue: Queue, device):
    bytes_state_dict = get_last_item_from_queue(parameters_queue, block=False)
    if bytes_state_dict is not None:
        logging.info("[ACTOR] Load new parameters from Learner.")
@@ -673,7 +667,18 @@ def update_policy_parameters(policy: PreTrainedPolicy, parameters_queue: Queue,
        # - Send critic's encoder state when shared_encoder=True
        # - Skip encoder params entirely when freeze_vision_encoder=True
        # - Ensure discrete_critic gets correct encoder state (currently uses encoder_critic)
-        policy.load_actor_weights(state_dicts, device=device)
+
+        # Load actor state dict
+        actor_state_dict = move_state_dict_to_device(state_dicts["policy"], device=device)
+        policy.actor.load_state_dict(actor_state_dict)
+
+        # Load discrete critic if present
+        if hasattr(policy, "discrete_critic") and "discrete_critic" in state_dicts:
+            discrete_critic_state_dict = move_state_dict_to_device(
+                state_dicts["discrete_critic"], device=device
+            )
+            policy.discrete_critic.load_state_dict(discrete_critic_state_dict)
+            logging.info("[ACTOR] Loaded discrete critic parameters from Learner.")


 #  Utilities functions
@@ -1,106 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import annotations
-
-import abc
-from collections.abc import Iterator
-from typing import TYPE_CHECKING, Any
-
-import torch
-from torch.optim import Optimizer
-
-from lerobot.rl.algorithms.configs import RLAlgorithmConfig, TrainingStats
-
-if TYPE_CHECKING:
-    from lerobot.rl.data_sources.data_mixer import DataMixer
-
-BatchType = dict[str, Any]
-
-
-class RLAlgorithm(abc.ABC):
-    """Base for all RL algorithms."""
-
-    config_class: type[RLAlgorithmConfig] | None = None
-    name: str | None = None
-
-    def __init_subclass__(cls, **kwargs):
-        super().__init_subclass__(**kwargs)
-        if not getattr(cls, "config_class", None):
-            raise TypeError(f"Class {cls.__name__} must define 'config_class'")
-        if not getattr(cls, "name", None):
-            raise TypeError(f"Class {cls.__name__} must define 'name'")
-
-    @abc.abstractmethod
-    def update(self, batch_iterator: Iterator[BatchType]) -> TrainingStats:
-        """One complete training step.
-
-        The algorithm calls ``next(batch_iterator)`` as many times as it
-        needs (e.g. ``utd_ratio`` times for SAC) to obtain fresh batches.
-        The iterator is owned by the trainer; the algorithm just consumes
-        from it.
-        """
-        ...
-
-    def configure_data_iterator(
-        self,
-        data_mixer: DataMixer,
-        batch_size: int,
-        *,
-        async_prefetch: bool = True,
-        queue_size: int = 2,
-    ) -> Iterator[BatchType]:
-        """Create the data iterator this algorithm needs.
-
-        The default implementation uses the standard ``data_mixer.get_iterator()``.
-        Algorithms that need specialised sampling should override this method.
-        """
-        return data_mixer.get_iterator(
-            batch_size=batch_size,
-            async_prefetch=async_prefetch,
-            queue_size=queue_size,
-        )
-
-    def make_optimizers_and_scheduler(self) -> dict[str, Optimizer]:
-        """Create, store, and return the optimizers needed for training.
-
-        Called on the **learner** side after construction.  Subclasses must
-        override this with algorithm-specific optimizer setup.
-        """
-        return {}
-
-    def get_optimizers(self) -> dict[str, Optimizer]:
-        """Return optimizers for checkpointing / external scheduling."""
-        return {}
-
-    @property
-    def optimization_step(self) -> int:
-        """Current learner optimization step.
-
-        Part of the stable contract for checkpoint/resume. Algorithms can
-        either use this default storage or override for custom behavior.
-        """
-        return getattr(self, "_optimization_step", 0)
-
-    @optimization_step.setter
-    def optimization_step(self, value: int) -> None:
-        self._optimization_step = int(value)
-
-    def get_weights(self) -> dict[str, Any]:
-        """Policy state-dict to push to actors."""
-        return {}
-
-    @abc.abstractmethod
-    def load_weights(self, weights: dict[str, Any], device: str | torch.device = "cpu") -> None:
-        """Load policy state-dict received from the learner."""
@@ -1,76 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import annotations
-
-import abc
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any
-
-import draccus
-import torch
-
-if TYPE_CHECKING:
-    from lerobot.rl.algorithms.base import RLAlgorithm
-
-
-@dataclass
-class TrainingStats:
-    """Returned by ``algorithm.update()`` for logging and checkpointing."""
-
-    losses: dict[str, float] = field(default_factory=dict)
-    grad_norms: dict[str, float] = field(default_factory=dict)
-    extra: dict[str, float] = field(default_factory=dict)
-
-    def to_log_dict(self) -> dict[str, float]:
-        """Flatten all stats into a single dict for logging."""
-
-        d: dict[str, float] = {}
-        for name, val in self.losses.items():
-            d[name] = val
-        for name, val in self.grad_norms.items():
-            d[f"{name}_grad_norm"] = val
-        for name, val in self.extra.items():
-            d[name] = val
-        return d
-
-
-@dataclass
-class RLAlgorithmConfig(draccus.ChoiceRegistry, abc.ABC):
-    """Registry for algorithm configs."""
-
-    @property
-    def type(self) -> str:
-        """Registered name of this algorithm config (e.g. ``"sac"``)."""
-        choice_name = self.get_choice_name(self.__class__)
-        if not isinstance(choice_name, str):
-            raise TypeError(f"Expected string from get_choice_name, got {type(choice_name)}")
-        return choice_name
-
-    @abc.abstractmethod
-    def build_algorithm(self, policy: torch.nn.Module) -> RLAlgorithm:
-        """Construct the :class:`RLAlgorithm` for this config.
-
-        Must be overridden by every registered config subclass.
-        """
-        raise NotImplementedError(f"{type(self).__name__} must implement build_algorithm()")
-
-    @classmethod
-    @abc.abstractmethod
-    def from_policy_config(cls, policy_cfg: Any) -> RLAlgorithmConfig:
-        """Build an algorithm config from a policy config.
-
-        Must be overridden by every registered config subclass.
-        """
-        raise NotImplementedError(f"{cls.__name__} must implement from_policy_config()")
@@ -1,47 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import annotations
-
-import torch
-
-from lerobot.rl.algorithms.base import RLAlgorithm
-from lerobot.rl.algorithms.configs import RLAlgorithmConfig
-
-
-def make_algorithm_config(algorithm_type: str, **kwargs) -> RLAlgorithmConfig:
-    """Instantiate an `RLAlgorithmConfig` from its registered type name.
-
-    Args:
-        algorithm_type: Registry key of the algorithm (e.g. ``"sac"``).
-        **kwargs: Keyword arguments forwarded to the config class constructor.
-
-    Returns:
-        An instance of the matching ``RLAlgorithmConfig`` subclass.
-
-    Raises:
-        ValueError: If ``algorithm_type`` is not registered.
-    """
-    try:
-        cls = RLAlgorithmConfig.get_choice_class(algorithm_type)
-    except KeyError as err:
-        raise ValueError(
-            f"Algorithm type '{algorithm_type}' is not registered. "
-            f"Available: {list(RLAlgorithmConfig.get_known_choices().keys())}"
-        ) from err
-    return cls(**kwargs)
-
-
-def make_algorithm(cfg: RLAlgorithmConfig, policy: torch.nn.Module) -> RLAlgorithm:
-    return cfg.build_algorithm(policy)
@@ -1,18 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from lerobot.rl.algorithms.sac.configuration_sac import SACAlgorithmConfig
-from lerobot.rl.algorithms.sac.sac_algorithm import SACAlgorithm
-
-__all__ = ["SACAlgorithm", "SACAlgorithmConfig"]
@@ -1,90 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING
-
-import torch
-
-from lerobot.policies.gaussian_actor.configuration_gaussian_actor import (
-    CriticNetworkConfig,
-    GaussianActorConfig,
-)
-from lerobot.rl.algorithms.configs import RLAlgorithmConfig
-
-if TYPE_CHECKING:
-    from lerobot.rl.algorithms.sac.sac_algorithm import SACAlgorithm
-
-
-@RLAlgorithmConfig.register_subclass("sac")
-@dataclass
-class SACAlgorithmConfig(RLAlgorithmConfig):
-    """SAC algorithm hyperparameters."""
-
-    # Optimizer learning rates
-    actor_lr: float = 3e-4
-    critic_lr: float = 3e-4
-    temperature_lr: float = 3e-4
-
-    # Bellman update
-    discount: float = 0.99
-    use_backup_entropy: bool = True
-    critic_target_update_weight: float = 0.005
-
-    # Critic ensemble
-    num_critics: int = 2
-    num_subsample_critics: int | None = None
-    critic_network_kwargs: CriticNetworkConfig = field(default_factory=CriticNetworkConfig)
-    discrete_critic_network_kwargs: CriticNetworkConfig = field(default_factory=CriticNetworkConfig)
-
-    # Temperature / entropy
-    temperature_init: float = 1.0
-    # Target entropy for automatic temperature tuning. If ``None``, defaults to
-    # ``-|A|/2`` where ``|A|`` is the total action dimension (continuous + 1 if
-    # there is a discrete action head).
-    target_entropy: float | None = None
-
-    # Update loop
-    utd_ratio: int = 1
-    policy_update_freq: int = 1
-    grad_clip_norm: float = 40.0
-
-    # Optimizations
-    # torch.compile is currently disabled by default
-    use_torch_compile: bool = False
-
-    # Policy config
-    policy_config: GaussianActorConfig | None = None
-
-    @classmethod
-    def from_policy_config(cls, policy_cfg: GaussianActorConfig) -> SACAlgorithmConfig:
-        """Build an algorithm config with default hyperparameters for a given policy."""
-        return cls(
-            policy_config=policy_cfg,
-            discrete_critic_network_kwargs=policy_cfg.discrete_critic_network_kwargs,
-        )
-
-    def build_algorithm(self, policy: torch.nn.Module) -> SACAlgorithm:
-        if self.policy_config is None:
-            raise ValueError(
-                "SACAlgorithmConfig.policy_config is None. "
-                "It must be populated (typically by TrainRLServerPipelineConfig.validate) "
-                "before calling build_algorithm()."
-            )
-
-        from lerobot.rl.algorithms.sac.sac_algorithm import SACAlgorithm
-
-        return SACAlgorithm(policy=policy, config=self)
@@ -1,595 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import annotations
-
-import math
-from collections.abc import Callable, Iterator
-from dataclasses import asdict
-from typing import Any
-
-import einops
-import torch
-import torch.nn as nn
-import torch.nn.functional as F  # noqa: N812
-from torch import Tensor
-from torch.optim import Optimizer
-
-from lerobot.policies.gaussian_actor.modeling_gaussian_actor import (
-    DISCRETE_DIMENSION_INDEX,
-    MLP,
-    DiscreteCritic,
-    GaussianActorObservationEncoder,
-    GaussianActorPolicy,
-    orthogonal_init,
-)
-from lerobot.policies.utils import get_device_from_parameters
-from lerobot.rl.algorithms.base import BatchType, RLAlgorithm
-from lerobot.rl.algorithms.configs import TrainingStats
-from lerobot.rl.algorithms.sac.configuration_sac import SACAlgorithmConfig
-from lerobot.utils.constants import ACTION
-from lerobot.utils.transition import move_state_dict_to_device
-
-
-class SACAlgorithm(RLAlgorithm):
-    """Soft Actor-Critic. Owns critics, targets, temperature, and loss computation."""
-
-    config_class = SACAlgorithmConfig
-    name = "sac"
-
-    def __init__(
-        self,
-        policy: GaussianActorPolicy,
-        config: SACAlgorithmConfig,
-    ):
-        self.config = config
-        self.policy_config = config.policy_config
-        self.policy = policy
-        self.optimizers: dict[str, Optimizer] = {}
-        self._optimization_step: int = 0
-
-        action_dim = self.policy.config.output_features[ACTION].shape[0]
-        self._init_critics(action_dim)
-        self._init_temperature(action_dim)
-
-        self._device = torch.device(self.policy.config.device)
-        self._move_to_device()
-
-    def _init_critics(self, action_dim) -> None:
-        """Build critic ensemble, targets."""
-        encoder = self.policy.encoder_critic
-
-        heads = [
-            CriticHead(
-                input_dim=encoder.output_dim + action_dim,
-                **asdict(self.config.critic_network_kwargs),
-            )
-            for _ in range(self.config.num_critics)
-        ]
-        self.critic_ensemble = CriticEnsemble(encoder=encoder, ensemble=heads)
-        target_heads = [
-            CriticHead(
-                input_dim=encoder.output_dim + action_dim,
-                **asdict(self.config.critic_network_kwargs),
-            )
-            for _ in range(self.config.num_critics)
-        ]
-        self.critic_target = CriticEnsemble(encoder=encoder, ensemble=target_heads)
-        self.critic_target.load_state_dict(self.critic_ensemble.state_dict())
-
-        # TODO(Khalil): Investigate and fix torch.compile
-        # NOTE: torch.compile is disabled, policy does not converge when enabled.
-        if self.config.use_torch_compile:
-            self.critic_ensemble = torch.compile(self.critic_ensemble)
-            self.critic_target = torch.compile(self.critic_target)
-
-        self.discrete_critic_target = None
-        if self.policy_config.num_discrete_actions is not None:
-            self.discrete_critic_target = self._init_discrete_critic_target(encoder)
-
-    def _init_discrete_critic_target(self, encoder: GaussianActorObservationEncoder) -> DiscreteCritic:
-        """Build target discrete critic (main network is owned by the policy)."""
-        discrete_critic_target = DiscreteCritic(
-            encoder=encoder,
-            input_dim=encoder.output_dim,
-            output_dim=self.policy_config.num_discrete_actions,
-            **asdict(self.config.discrete_critic_network_kwargs),
-        )
-        # TODO(Khalil): Compile the discrete critic
-        discrete_critic_target.load_state_dict(self.policy.discrete_critic.state_dict())
-        return discrete_critic_target
-
-    def _init_temperature(self, continuous_action_dim: int) -> None:
-        """Set up temperature parameter (log_alpha) and target entropy."""
-        temp_init = self.config.temperature_init
-        self.log_alpha = nn.Parameter(torch.tensor([math.log(temp_init)]))
-
-        self.target_entropy = self.config.target_entropy
-        if self.target_entropy is None:
-            total_action_dim = continuous_action_dim + (
-                1 if self.policy_config.num_discrete_actions is not None else 0
-            )
-            self.target_entropy = -total_action_dim / 2
-
-    def _move_to_device(self) -> None:
-        self.policy.to(self._device)
-        self.critic_ensemble.to(self._device)
-        self.critic_target.to(self._device)
-        self.log_alpha = nn.Parameter(self.log_alpha.data.to(self._device))
-        if self.discrete_critic_target is not None:
-            self.discrete_critic_target.to(self._device)
-
-    @property
-    def temperature(self) -> float:
-        """Return the current temperature value, always in sync with log_alpha."""
-        return self.log_alpha.exp().item()
-
-    def _critic_forward(
-        self,
-        observations: dict[str, Tensor],
-        actions: Tensor,
-        use_target: bool = False,
-        observation_features: Tensor | None = None,
-    ) -> Tensor:
-        """Forward pass through a critic network ensemble
-
-        Args:
-            observations: Dictionary of observations
-            actions: Action tensor
-            use_target: If True, use target critics, otherwise use ensemble critics
-
-        Returns:
-            Tensor of Q-values from all critics
-        """
-
-        critics = self.critic_target if use_target else self.critic_ensemble
-        q_values = critics(observations, actions, observation_features)
-        return q_values
-
-    def _discrete_critic_forward(
-        self, observations, use_target=False, observation_features=None
-    ) -> torch.Tensor:
-        """Forward pass through a discrete critic network
-
-        Args:
-            observations: Dictionary of observations
-            use_target: If True, use target critics, otherwise use ensemble critics
-            observation_features: Optional pre-computed observation features to avoid recomputing encoder output
-
-        Returns:
-            Tensor of Q-values from the discrete critic network
-        """
-        discrete_critic = self.discrete_critic_target if use_target else self.policy.discrete_critic
-        q_values = discrete_critic(observations, observation_features)
-        return q_values
-
-    def update(self, batch_iterator: Iterator[BatchType]) -> TrainingStats:
-        clip = self.config.grad_clip_norm
-
-        for _ in range(self.config.utd_ratio - 1):
-            batch = next(batch_iterator)
-            fb = self._prepare_forward_batch(batch, include_complementary_info=True)
-
-            loss_critic = self._compute_loss_critic(fb)
-            self.optimizers["critic"].zero_grad()
-            loss_critic.backward()
-            torch.nn.utils.clip_grad_norm_(self.critic_ensemble.parameters(), max_norm=clip)
-            self.optimizers["critic"].step()
-
-            if self.policy_config.num_discrete_actions is not None:
-                loss_dc = self._compute_loss_discrete_critic(fb)
-                self.optimizers["discrete_critic"].zero_grad()
-                loss_dc.backward()
-                torch.nn.utils.clip_grad_norm_(self.policy.discrete_critic.parameters(), max_norm=clip)
-                self.optimizers["discrete_critic"].step()
-
-            self._update_target_networks()
-
-        batch = next(batch_iterator)
-        fb = self._prepare_forward_batch(batch, include_complementary_info=False)
-
-        loss_critic = self._compute_loss_critic(fb)
-        self.optimizers["critic"].zero_grad()
-        loss_critic.backward()
-        critic_grad = torch.nn.utils.clip_grad_norm_(self.critic_ensemble.parameters(), max_norm=clip).item()
-        self.optimizers["critic"].step()
-
-        stats = TrainingStats(
-            losses={"loss_critic": loss_critic.item()},
-            grad_norms={"critic": critic_grad},
-        )
-
-        if self.policy_config.num_discrete_actions is not None:
-            loss_dc = self._compute_loss_discrete_critic(fb)
-            self.optimizers["discrete_critic"].zero_grad()
-            loss_dc.backward()
-            dc_grad = torch.nn.utils.clip_grad_norm_(
-                self.policy.discrete_critic.parameters(), max_norm=clip
-            ).item()
-            self.optimizers["discrete_critic"].step()
-            stats.losses["loss_discrete_critic"] = loss_dc.item()
-            stats.grad_norms["discrete_critic"] = dc_grad
-
-        if self._optimization_step % self.config.policy_update_freq == 0:
-            for _ in range(self.config.policy_update_freq):
-                loss_actor = self._compute_loss_actor(fb)
-                self.optimizers["actor"].zero_grad()
-                loss_actor.backward()
-                actor_grad = torch.nn.utils.clip_grad_norm_(
-                    self.policy.actor.parameters(), max_norm=clip
-                ).item()
-                self.optimizers["actor"].step()
-
-                loss_temp = self._compute_loss_temperature(fb)
-                self.optimizers["temperature"].zero_grad()
-                loss_temp.backward()
-                temp_grad = torch.nn.utils.clip_grad_norm_([self.log_alpha], max_norm=clip).item()
-                self.optimizers["temperature"].step()
-
-            stats.losses["loss_actor"] = loss_actor.item()
-            stats.losses["loss_temperature"] = loss_temp.item()
-            stats.grad_norms["actor"] = actor_grad
-            stats.grad_norms["temperature"] = temp_grad
-            stats.extra["temperature"] = self.temperature
-
-        self._update_target_networks()
-        self._optimization_step += 1
-        return stats
-
-    def _compute_loss_critic(self, batch: dict[str, Any]) -> Tensor:
-        observations = batch["state"]
-        actions = batch[ACTION]
-        rewards = batch["reward"]
-        next_observations = batch["next_state"]
-        done = batch["done"]
-        observation_features = batch.get("observation_feature")
-        next_observation_features = batch.get("next_observation_feature")
-
-        with torch.no_grad():
-            next_action_preds, next_log_probs, _ = self.policy.actor(
-                next_observations, next_observation_features
-            )
-
-            # 2- compute q targets
-            q_targets = self._critic_forward(
-                observations=next_observations,
-                actions=next_action_preds,
-                use_target=True,
-                observation_features=next_observation_features,
-            )
-
-            # subsample critics to prevent overfitting if use high UTD (update to date)
-            # TODO: Get indices before forward pass to avoid unnecessary computation
-            if self.config.num_subsample_critics is not None:
-                indices = torch.randperm(self.config.num_critics)
-                indices = indices[: self.config.num_subsample_critics]
-                q_targets = q_targets[indices]
-
-            # critics subsample size
-            min_q, _ = q_targets.min(dim=0)  # Get values from min operation
-            if self.config.use_backup_entropy:
-                min_q = min_q - (self.temperature * next_log_probs)
-
-            td_target = rewards + (1 - done) * self.config.discount * min_q
-
-        # 3- compute predicted qs
-        if self.policy_config.num_discrete_actions is not None:
-            # NOTE: We only want to keep the continuous action part
-            # In the buffer we have the full action space (continuous + discrete)
-            # We need to split them before concatenating them in the critic forward
-            actions: Tensor = actions[:, :DISCRETE_DIMENSION_INDEX]
-        q_preds = self._critic_forward(
-            observations=observations,
-            actions=actions,
-            use_target=False,
-            observation_features=observation_features,
-        )
-
-        # 4- Calculate loss
-        # Compute state-action value loss (TD loss) for all of the Q functions in the ensemble.
-        td_target_duplicate = einops.repeat(td_target, "b -> e b", e=q_preds.shape[0])
-        # You compute the mean loss of the batch for each critic and then to compute the final loss you sum them up
-        critics_loss = (
-            F.mse_loss(
-                input=q_preds,
-                target=td_target_duplicate,
-                reduction="none",
-            ).mean(dim=1)
-        ).sum()
-        return critics_loss
-
-    def _compute_loss_discrete_critic(self, batch: dict[str, Any]) -> Tensor:
-        observations = batch["state"]
-        actions = batch[ACTION]
-        rewards = batch["reward"]
-        next_observations = batch["next_state"]
-        done = batch["done"]
-        observation_features = batch.get("observation_feature")
-        next_observation_features = batch.get("next_observation_feature")
-        complementary_info = batch.get("complementary_info")
-
-        # NOTE: We only want to keep the discrete action part
-        # In the buffer we have the full action space (continuous + discrete)
-        # We need to split them before concatenating them in the critic forward
-        actions_discrete: Tensor = actions[:, DISCRETE_DIMENSION_INDEX:].clone()
-        actions_discrete = torch.round(actions_discrete)
-        actions_discrete = actions_discrete.long()
-
-        discrete_penalties: Tensor | None = None
-        if complementary_info is not None:
-            discrete_penalties = complementary_info.get("discrete_penalty")
-
-        with torch.no_grad():
-            # For DQN, select actions using online network, evaluate with target network
-            next_discrete_qs = self._discrete_critic_forward(
-                next_observations, use_target=False, observation_features=next_observation_features
-            )
-            best_next_discrete_action = torch.argmax(next_discrete_qs, dim=-1, keepdim=True)
-
-            # Get target Q-values from target network
-            target_next_discrete_qs = self._discrete_critic_forward(
-                observations=next_observations,
-                use_target=True,
-                observation_features=next_observation_features,
-            )
-
-            # Use gather to select Q-values for best actions
-            target_next_discrete_q = torch.gather(
-                target_next_discrete_qs, dim=1, index=best_next_discrete_action
-            ).squeeze(-1)
-
-            # Compute target Q-value with Bellman equation
-            rewards_discrete = rewards
-            if discrete_penalties is not None:
-                rewards_discrete = rewards + discrete_penalties
-            target_discrete_q = rewards_discrete + (1 - done) * self.config.discount * target_next_discrete_q
-
-        # Get predicted Q-values for current observations
-        predicted_discrete_qs = self._discrete_critic_forward(
-            observations=observations, use_target=False, observation_features=observation_features
-        )
-
-        # Use gather to select Q-values for taken actions
-        predicted_discrete_q = torch.gather(predicted_discrete_qs, dim=1, index=actions_discrete).squeeze(-1)
-
-        # Compute MSE loss between predicted and target Q-values
-        discrete_critic_loss = F.mse_loss(input=predicted_discrete_q, target=target_discrete_q)
-        return discrete_critic_loss
-
-    def _compute_loss_actor(self, batch: dict[str, Any]) -> Tensor:
-        observations = batch["state"]
-        observation_features = batch.get("observation_feature")
-
-        actions_pi, log_probs, _ = self.policy.actor(observations, observation_features)
-
-        q_preds = self._critic_forward(
-            observations=observations,
-            actions=actions_pi,
-            use_target=False,
-            observation_features=observation_features,
-        )
-        min_q_preds = q_preds.min(dim=0)[0]
-
-        actor_loss = ((self.temperature * log_probs) - min_q_preds).mean()
-        return actor_loss
-
-    def _compute_loss_temperature(self, batch: dict[str, Any]) -> Tensor:
-        """Compute the temperature loss"""
-        observations = batch["state"]
-        observation_features = batch.get("observation_feature")
-
-        # calculate temperature loss
-        with torch.no_grad():
-            _, log_probs, _ = self.policy.actor(observations, observation_features)
-
-        temperature_loss = (-self.log_alpha.exp() * (log_probs + self.target_entropy)).mean()
-        return temperature_loss
-
-    def _update_target_networks(self) -> None:
-        """Update target networks with exponential moving average"""
-        for target_p, p in zip(
-            self.critic_target.parameters(), self.critic_ensemble.parameters(), strict=True
-        ):
-            target_p.data.copy_(
-                p.data * self.config.critic_target_update_weight
-                + target_p.data * (1.0 - self.config.critic_target_update_weight)
-            )
-        if self.policy_config.num_discrete_actions is not None:
-            for target_p, p in zip(
-                self.discrete_critic_target.parameters(),
-                self.policy.discrete_critic.parameters(),
-                strict=True,
-            ):
-                target_p.data.copy_(
-                    p.data * self.config.critic_target_update_weight
-                    + target_p.data * (1.0 - self.config.critic_target_update_weight)
-                )
-
-    def _prepare_forward_batch(
-        self, batch: BatchType, *, include_complementary_info: bool = True
-    ) -> dict[str, Any]:
-        observations = batch["state"]
-        next_observations = batch["next_state"]
-        observation_features, next_observation_features = self.get_observation_features(
-            observations, next_observations
-        )
-        forward_batch: dict[str, Any] = {
-            ACTION: batch[ACTION],
-            "reward": batch["reward"],
-            "state": observations,
-            "next_state": next_observations,
-            "done": batch["done"],
-            "observation_feature": observation_features,
-            "next_observation_feature": next_observation_features,
-        }
-        if include_complementary_info and "complementary_info" in batch:
-            forward_batch["complementary_info"] = batch["complementary_info"]
-        return forward_batch
-
-    def make_optimizers_and_scheduler(self) -> dict[str, Optimizer]:
-        """
-        Creates and returns optimizers for the actor, critic, and temperature components of a reinforcement learning policy.
-
-        This function sets up Adam optimizers for:
-        - The **actor network**, ensuring that only relevant parameters are optimized.
-        - The **critic ensemble**, which evaluates the value function.
-        - The **temperature parameter**, which controls the entropy in soft actor-critic (SAC)-like methods.
-
-        It also initializes a learning rate scheduler, though currently, it is set to `None`.
-
-        NOTE:
-        - If the encoder is shared, its parameters are excluded from the actor's optimization process.
-        - The policy's log temperature (`log_alpha`) is wrapped in a list to ensure proper optimization as a standalone tensor.
-
-        Args:
-            cfg: Configuration object containing hyperparameters.
-            policy (nn.Module): The policy model containing the actor, critic, and temperature components.
-
-        Returns:
-            A dictionary mapping component names ("actor", "critic", "temperature")
-            to their respective Adam optimizers.
-        """
-        actor_params = self.policy.get_optim_params()["actor"]
-        self.optimizers = {
-            "actor": torch.optim.Adam(actor_params, lr=self.config.actor_lr),
-            "critic": torch.optim.Adam(self.critic_ensemble.parameters(), lr=self.config.critic_lr),
-            "temperature": torch.optim.Adam([self.log_alpha], lr=self.config.temperature_lr),
-        }
-        if self.policy_config.num_discrete_actions is not None:
-            self.optimizers["discrete_critic"] = torch.optim.Adam(
-                self.policy.discrete_critic.parameters(), lr=self.config.critic_lr
-            )
-        return self.optimizers
-
-    def get_optimizers(self) -> dict[str, Optimizer]:
-        return self.optimizers
-
-    def get_weights(self) -> dict[str, Any]:
-        """Send actor + discrete-critic state dicts."""
-        state_dicts: dict[str, Any] = {
-            "policy": move_state_dict_to_device(self.policy.actor.state_dict(), device="cpu"),
-        }
-        if self.policy_config.num_discrete_actions is not None:
-            state_dicts["discrete_critic"] = move_state_dict_to_device(
-                self.policy.discrete_critic.state_dict(), device="cpu"
-            )
-        return state_dicts
-
-    def load_weights(self, weights: dict[str, Any], device: str | torch.device = "cpu") -> None:
-        """Load actor + discrete-critic weights into the policy."""
-        self.policy.load_actor_weights(weights, device=device)
-
-    def get_observation_features(
-        self, observations: Tensor, next_observations: Tensor
-    ) -> tuple[Tensor | None, Tensor | None]:
-        """
-        Get observation features from the policy encoder. It act as cache for the observation features.
-        when the encoder is frozen, the observation features are not updated.
-        We can save compute by caching the observation features.
-
-        Args:
-            policy: The policy model
-            observations: The current observations
-            next_observations: The next observations
-
-        Returns:
-            tuple: observation_features, next_observation_features
-        """
-
-        if self.policy.config.vision_encoder_name is None or not self.policy.config.freeze_vision_encoder:
-            return None, None
-
-        with torch.no_grad():
-            observation_features = self.policy.actor.encoder.get_cached_image_features(observations)
-            next_observation_features = self.policy.actor.encoder.get_cached_image_features(next_observations)
-
-        return observation_features, next_observation_features
-
-
-class CriticHead(nn.Module):
-    def __init__(
-        self,
-        input_dim: int,
-        hidden_dims: list[int],
-        activations: Callable[[torch.Tensor], torch.Tensor] | str = nn.SiLU(),
-        activate_final: bool = False,
-        dropout_rate: float | None = None,
-        init_final: float | None = None,
-        final_activation: Callable[[torch.Tensor], torch.Tensor] | str | None = None,
-    ):
-        super().__init__()
-        self.net = MLP(
-            input_dim=input_dim,
-            hidden_dims=hidden_dims,
-            activations=activations,
-            activate_final=activate_final,
-            dropout_rate=dropout_rate,
-            final_activation=final_activation,
-        )
-        self.output_layer = nn.Linear(in_features=hidden_dims[-1], out_features=1)
-        if init_final is not None:
-            nn.init.uniform_(self.output_layer.weight, -init_final, init_final)
-            nn.init.uniform_(self.output_layer.bias, -init_final, init_final)
-        else:
-            orthogonal_init()(self.output_layer.weight)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        return self.output_layer(self.net(x))
-
-
-class CriticEnsemble(nn.Module):
-    """
-    CriticEnsemble wraps multiple CriticHead modules into an ensemble.
-
-    Args:
-        encoder (GaussianActorObservationEncoder): encoder for observations.
-        ensemble (List[CriticHead]): list of critic heads.
-        init_final (float | None): optional initializer scale for final layers.
-
-    Forward returns a tensor of shape (num_critics, batch_size) containing Q-values.
-    """
-
-    def __init__(
-        self,
-        encoder: GaussianActorObservationEncoder,
-        ensemble: list[CriticHead],
-        init_final: float | None = None,
-    ):
-        super().__init__()
-        self.encoder = encoder
-        self.init_final = init_final
-        self.critics = nn.ModuleList(ensemble)
-
-    def forward(
-        self,
-        observations: dict[str, torch.Tensor],
-        actions: torch.Tensor,
-        observation_features: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        device = get_device_from_parameters(self)
-        # Move each tensor in observations to device
-        observations = {k: v.to(device) for k, v in observations.items()}
-
-        obs_enc = self.encoder(observations, cache=observation_features)
-
-        inputs = torch.cat([obs_enc, actions], dim=-1)
-
-        # Loop through critics and collect outputs
-        q_values = []
-        for critic in self.critics:
-            q_values.append(critic(inputs))
-
-        # Stack outputs to match expected shape [num_critics, batch_size]
-        q_values = torch.stack([q.squeeze(-1) for q in q_values], dim=0)
-        return q_values
@@ -97,8 +97,8 @@ class ReplayBuffer:
        Args:
            capacity (int): Maximum number of transitions to store in the buffer.
            device (str): The device where the tensors will be moved when sampling ("cuda:0" or "cpu").
-            state_keys (list[str]): The list of keys that appear in `state` and `next_state`.
-            image_augmentation_function (Callable | None): A function that takes a batch of images
+            state_keys (List[str]): The list of keys that appear in `state` and `next_state`.
+            image_augmentation_function (Optional[Callable]): A function that takes a batch of images
                and returns a batch of augmented images. If None, a default augmentation function is used.
            use_drq (bool): Whether to use the default DRQ image augmentation style, when sampling in the buffer.
            storage_device: The device (e.g. "cpu" or "cuda:0") where the data will be stored.
@@ -634,7 +634,7 @@ class ReplayBuffer:
                If None, you must handle or define default keys.

        Returns:
-            transitions (list[Transition]):
+            transitions (List[Transition]):
                A list of Transition dictionaries with the same length as `dataset`.
        """
        if state_keys is None:
@@ -176,11 +176,11 @@ def convert_lerobot_dataset_to_cropped_lerobot_dataset(

    Args:
        original_dataset (LeRobotDataset): The source dataset.
-        crop_params_dict (dict[str, Tuple[int, int, int, int]]):
+        crop_params_dict (Dict[str, Tuple[int, int, int, int]]):
            A dictionary mapping observation keys to crop parameters (top, left, height, width).
        new_repo_id (str): Repository id for the new dataset.
        new_dataset_root (str): The root directory where the new dataset will be written.
-        resize_size (tuple[int, int], optional): The target size (height, width) after cropping.
+        resize_size (Tuple[int, int], optional): The target size (height, width) after cropping.
            Defaults to (128, 128).

    Returns:
@@ -1,17 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .data_mixer import BatchType, DataMixer, OnlineOfflineMixer
-
-__all__ = ["BatchType", "DataMixer", "OnlineOfflineMixer"]
@@ -1,96 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import annotations
-
-import abc
-
-from lerobot.rl.algorithms.base import BatchType
-from lerobot.rl.buffer import ReplayBuffer, concatenate_batch_transitions
-
-
-class DataMixer(abc.ABC):
-    """Abstract interface for all data mixing strategies."""
-
-    @abc.abstractmethod
-    def sample(self, batch_size: int) -> BatchType:
-        """Draw one batch of ``batch_size`` transitions."""
-        ...
-
-    def get_iterator(
-        self,
-        batch_size: int,
-        async_prefetch: bool = True,
-        queue_size: int = 2,
-    ):
-        """Infinite iterator that yields batches."""
-        while True:
-            yield self.sample(batch_size)
-
-
-class OnlineOfflineMixer(DataMixer):
-    """Mixes transitions from an online and an offline replay buffer."""
-
-    def __init__(
-        self,
-        online_buffer: ReplayBuffer,
-        offline_buffer: ReplayBuffer | None = None,
-        online_ratio: float = 1.0,
-    ):
-        if not 0.0 <= online_ratio <= 1.0:
-            raise ValueError(f"online_ratio must be in [0, 1], got {online_ratio}")
-        self.online_buffer = online_buffer
-        self.offline_buffer = offline_buffer
-        self.online_ratio = online_ratio
-
-    def sample(self, batch_size: int) -> BatchType:
-        if self.offline_buffer is None:
-            return self.online_buffer.sample(batch_size)
-
-        n_online = max(1, int(batch_size * self.online_ratio))
-        n_offline = batch_size - n_online
-
-        online_batch = self.online_buffer.sample(n_online)
-        offline_batch = self.offline_buffer.sample(n_offline)
-        return concatenate_batch_transitions(online_batch, offline_batch)
-
-    def get_iterator(
-        self,
-        batch_size: int,
-        async_prefetch: bool = True,
-        queue_size: int = 2,
-    ):
-        """Yield batches by composing buffer async iterators."""
-
-        n_online = max(1, int(batch_size * self.online_ratio))
-
-        online_iter = self.online_buffer.get_iterator(
-            batch_size=n_online,
-            async_prefetch=async_prefetch,
-            queue_size=queue_size,
-        )
-
-        if self.offline_buffer is None:
-            yield from online_iter
-            return
-
-        n_offline = batch_size - n_online
-        offline_iter = self.offline_buffer.get_iterator(
-            batch_size=n_offline,
-            async_prefetch=async_prefetch,
-            queue_size=queue_size,
-        )
-
-        while True:
-            yield concatenate_batch_transitions(next(online_iter), next(offline_iter))
@@ -17,9 +17,9 @@ import logging

 from lerobot.cameras import opencv  # noqa: F401
 from lerobot.configs import parser
+from lerobot.configs.train import TrainRLServerPipelineConfig
 from lerobot.datasets import LeRobotDataset
 from lerobot.policies import make_policy
-from lerobot.rl.train_rl import TrainRLServerPipelineConfig
 from lerobot.robots import (  # noqa: F401
    RobotConfig,
    make_robot_from_config,
@@ -39,7 +39,6 @@ from lerobot.processor import (
    GymHILAdapterProcessorStep,
    ImageCropResizeProcessorStep,
    InterventionActionProcessorStep,
-    LeaderFollowerProcessor,
    MapDeltaActionToRobotActionStep,
    MapTensorToDeltaActionDictStep,
    Numpy2TorchActionProcessorStep,
@@ -72,7 +71,6 @@ from lerobot.teleoperators import (
    make_teleoperator_from_config,
    so_leader,  # noqa: F401
 )
-from lerobot.teleoperators.so_leader import SO101LeaderFollower
 from lerobot.teleoperators.teleoperator import Teleoperator
 from lerobot.teleoperators.utils import TeleopEvents
 from lerobot.utils.constants import ACTION, DONE, OBS_IMAGES, OBS_STATE, REWARD
@@ -385,21 +383,10 @@ def make_processors(
            GymHILAdapterProcessorStep(),
            Numpy2TorchActionProcessorStep(),
            VanillaObservationProcessorStep(),
+            AddBatchDimensionProcessorStep(),
+            DeviceProcessorStep(device=device),
        ]

-        # Add time limit processor if reset config exists
-        if cfg.processor.reset is not None:
-            env_pipeline_steps.append(
-                TimeLimitProcessorStep(max_episode_steps=int(cfg.processor.reset.control_time_s * cfg.fps))
-            )
-
-        env_pipeline_steps.extend(
-            [
-                AddBatchDimensionProcessorStep(),
-                DeviceProcessorStep(device=device),
-            ]
-        )
-
        return DataProcessorPipeline(
            steps=env_pipeline_steps, to_transition=identity_transition, to_output=identity_transition
        ), DataProcessorPipeline(
@@ -483,53 +470,14 @@ def make_processors(
    env_pipeline_steps.append(AddBatchDimensionProcessorStep())
    env_pipeline_steps.append(DeviceProcessorStep(device=device))

-    # Get control mode (gamepad / keyboard / leader -- see PR #2596)
-    control_mode = cfg.processor.control_mode if cfg.processor is not None else "gamepad"
-
-    action_pipeline_steps: list = [
+    action_pipeline_steps = [
        AddTeleopActionAsComplimentaryDataStep(teleop_device=teleop_device),
        AddTeleopEventsAsInfoStep(teleop_device=teleop_device),
-    ]
-
-    # Leader-follower control mode: leader haptically tracks follower until the
-    # human toggles intervention with SPACE, at which point ``LeaderFollowerProcessor``
-    # builds the PR #2596 **7-D** EE delta tensor. Rotation can be toggled via
-    # ``processor.use_rotation``: when False the ``wx/wy/wz`` channels are zeroed.
-    # ``InterventionActionProcessorStep`` then maps that to either a full 7-D
-    # or 4-D policy action tensor (see helpers in ``hil_processor``).
-    leader_use_rotation = bool(getattr(cfg.processor, "use_rotation", False))
-    if control_mode == "leader":
-        if not isinstance(teleop_device, SO101LeaderFollower):
-            raise ValueError(
-                "Leader control mode requires SO101LeaderFollower teleop device. "
-                "Set `--teleop.type=so101_leader --teleop.leader_follower_mode=true`."
-            )
-        if cfg.processor.inverse_kinematics is None or kinematics_solver is None:
-            raise ValueError(
-                "Leader control mode requires `cfg.processor.inverse_kinematics` and a kinematics solver."
-            )
-        action_pipeline_steps.append(
-            LeaderFollowerProcessor(
-                leader_device=teleop_device,
-                motor_names=motor_names,
-                robot=env.robot,
-                kinematics=kinematics_solver,
-                end_effector_step_sizes=cfg.processor.inverse_kinematics.end_effector_step_sizes,
-                use_gripper=cfg.processor.gripper.use_gripper if cfg.processor.gripper is not None else False,
-                use_rotation=leader_use_rotation,
-                max_gripper_pos=cfg.processor.max_gripper_pos
-                if cfg.processor.max_gripper_pos is not None
-                else 100.0,
-            )
-        )
-
-    action_pipeline_steps.append(
        InterventionActionProcessorStep(
            use_gripper=cfg.processor.gripper.use_gripper if cfg.processor.gripper is not None else False,
-            use_rotation=(control_mode == "leader" and leader_use_rotation),
            terminate_on_success=terminate_on_success,
-        )
-    )
+        ),
+    ]

    # Replace InverseKinematicsProcessor with new kinematic processors
    if cfg.processor.inverse_kinematics is not None and kinematics_solver is not None:
@@ -603,19 +551,8 @@ def step_env_and_process_transition(
    terminated = terminated or processed_action_transition[TransitionKey.DONE]
    truncated = truncated or processed_action_transition[TransitionKey.TRUNCATED]
    complementary_data = processed_action_transition[TransitionKey.COMPLEMENTARY_DATA].copy()
-
-    if hasattr(env, "get_raw_joint_positions"):
-        raw_joint_positions = env.get_raw_joint_positions()
-        if raw_joint_positions is not None:
-            complementary_data["raw_joint_positions"] = raw_joint_positions
-
-    # Merge env and action-processor info: env wins for str keys, action-processor
-    # wins for `TeleopEvents` enum keys
-    action_info = processed_action_transition[TransitionKey.INFO]
    new_info = info.copy()
-    for key, value in action_info.items():
-        if isinstance(key, TeleopEvents):
-            new_info[key] = value
+    new_info.update(processed_action_transition[TransitionKey.INFO])

    new_transition = create_transition(
        observation=obs,
@@ -631,24 +568,6 @@ def step_env_and_process_transition(
    return new_transition


-def reset_and_build_transition(
-    env: gym.Env,
-    env_processor: DataProcessorPipeline[EnvTransition, EnvTransition],
-    action_processor: DataProcessorPipeline[EnvTransition, EnvTransition],
-) -> EnvTransition:
-    """Reset env + processors and return the first env-processed transition."""
-    obs, info = env.reset()
-    env_processor.reset()
-    action_processor.reset()
-    complementary_data: dict[str, Any] = {}
-    if hasattr(env, "get_raw_joint_positions"):
-        raw_joint_positions = env.get_raw_joint_positions()
-        if raw_joint_positions is not None:
-            complementary_data["raw_joint_positions"] = raw_joint_positions
-    transition = create_transition(observation=obs, info=info, complementary_data=complementary_data)
-    return env_processor(data=transition)
-
-
 def control_loop(
    env: gym.Env,
    env_processor: DataProcessorPipeline[EnvTransition, EnvTransition],
@@ -674,7 +593,17 @@ def control_loop(
    print("- When not intervening, robot will stay still")
    print("- Press Ctrl+C to exit")

-    transition = reset_and_build_transition(env, env_processor, action_processor)
+    # Reset environment and processors
+    obs, info = env.reset()
+    complementary_data = (
+        {"raw_joint_positions": info.pop("raw_joint_positions")} if "raw_joint_positions" in info else {}
+    )
+    env_processor.reset()
+    action_processor.reset()
+
+    # Process initial observation
+    transition = create_transition(observation=obs, info=info, complementary_data=complementary_data)
+    transition = env_processor(data=transition)

    # Determine if gripper is used
    use_gripper = cfg.env.processor.gripper.use_gripper if cfg.env.processor.gripper is not None else True
@@ -730,81 +659,79 @@ def control_loop(
    episode_step = 0
    episode_start_time = time.perf_counter()

-    try:
-        while episode_idx < cfg.dataset.num_episodes_to_record:
-            step_start_time = time.perf_counter()
+    while episode_idx < cfg.dataset.num_episodes_to_record:
+        step_start_time = time.perf_counter()

-            # Create a neutral action (no movement)
-            neutral_action = torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32)
-            if use_gripper:
-                neutral_action = torch.cat([neutral_action, torch.tensor([1.0])])  # Gripper stay
+        # Create a neutral action (no movement)
+        neutral_action = torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32)
+        if use_gripper:
+            neutral_action = torch.cat([neutral_action, torch.tensor([0.0])])  # Gripper stay

-            transition = step_env_and_process_transition(
-                env=env,
-                transition=transition,
-                action=neutral_action,
-                env_processor=env_processor,
-                action_processor=action_processor,
+        # Use the new step function
+        transition = step_env_and_process_transition(
+            env=env,
+            transition=transition,
+            action=neutral_action,
+            env_processor=env_processor,
+            action_processor=action_processor,
+        )
+        terminated = transition.get(TransitionKey.DONE, False)
+        truncated = transition.get(TransitionKey.TRUNCATED, False)
+
+        if cfg.mode == "record":
+            observations = {
+                k: v.squeeze(0).cpu()
+                for k, v in transition[TransitionKey.OBSERVATION].items()
+                if isinstance(v, torch.Tensor)
+            }
+            # Use teleop_action if available, otherwise use the action from the transition
+            action_to_record = transition[TransitionKey.COMPLEMENTARY_DATA].get(
+                "teleop_action", transition[TransitionKey.ACTION]
            )
-            terminated = transition.get(TransitionKey.DONE, False)
-            truncated = transition.get(TransitionKey.TRUNCATED, False)
+            frame = {
+                **observations,
+                ACTION: action_to_record.cpu(),
+                REWARD: np.array([transition[TransitionKey.REWARD]], dtype=np.float32),
+                DONE: np.array([terminated or truncated], dtype=bool),
+            }
+            if use_gripper:
+                discrete_penalty = transition[TransitionKey.COMPLEMENTARY_DATA].get("discrete_penalty", 0.0)
+                frame["complementary_info.discrete_penalty"] = np.array([discrete_penalty], dtype=np.float32)

-            if cfg.mode == "record":
-                observations = {
-                    k: v.squeeze(0).cpu()
-                    for k, v in transition[TransitionKey.OBSERVATION].items()
-                    if isinstance(v, torch.Tensor)
-                }
-                action_to_record = transition[TransitionKey.COMPLEMENTARY_DATA].get(
-                    "teleop_action", transition[TransitionKey.ACTION]
-                )
-                frame = {
-                    **observations,
-                    ACTION: action_to_record.cpu(),
-                    REWARD: np.array([transition[TransitionKey.REWARD]], dtype=np.float32),
-                    DONE: np.array([terminated or truncated], dtype=bool),
-                }
-                if use_gripper:
-                    discrete_penalty = transition[TransitionKey.COMPLEMENTARY_DATA].get(
-                        "discrete_penalty", 0.0
-                    )
-                    frame["complementary_info.discrete_penalty"] = np.array(
-                        [discrete_penalty], dtype=np.float32
-                    )
+            if dataset is not None:
+                frame["task"] = cfg.dataset.task
+                dataset.add_frame(frame)

-                if dataset is not None:
-                    frame["task"] = cfg.dataset.task
-                    dataset.add_frame(frame)
+        episode_step += 1

-            episode_step += 1
+        # Handle episode termination
+        if terminated or truncated:
+            episode_time = time.perf_counter() - episode_start_time
+            logging.info(
+                f"Episode ended after {episode_step} steps in {episode_time:.1f}s with reward {transition[TransitionKey.REWARD]}"
+            )
+            episode_step = 0
+            episode_idx += 1

-            # Handle episode termination
-            if terminated or truncated:
-                episode_time = time.perf_counter() - episode_start_time
-                logging.info(
-                    f"Episode ended after {episode_step} steps in {episode_time:.1f}s with reward {transition[TransitionKey.REWARD]}"
-                )
-                episode_step = 0
-                episode_idx += 1
+            if dataset is not None:
+                if transition[TransitionKey.INFO].get(TeleopEvents.RERECORD_EPISODE, False):
+                    logging.info(f"Re-recording episode {episode_idx}")
+                    dataset.clear_episode_buffer()
+                    episode_idx -= 1
+                else:
+                    logging.info(f"Saving episode {episode_idx}")
+                    dataset.save_episode()

-                if dataset is not None:
-                    if transition[TransitionKey.INFO].get(TeleopEvents.RERECORD_EPISODE, False):
-                        logging.info(f"Re-recording episode {episode_idx}")
-                        dataset.clear_episode_buffer()
-                        episode_idx -= 1
-                    else:
-                        logging.info(f"Saving episode {episode_idx}")
-                        dataset.save_episode()
+            # Reset for new episode
+            obs, info = env.reset()
+            env_processor.reset()
+            action_processor.reset()

-                # Reset for new episode
-                transition = reset_and_build_transition(env, env_processor, action_processor)
+            transition = create_transition(observation=obs, info=info)
+            transition = env_processor(transition)

-            # Maintain fps timing
-            precise_sleep(max(dt - (time.perf_counter() - step_start_time), 0.0))
-    finally:
-        if dataset is not None and dataset.writer is not None and dataset.writer.image_writer is not None:
-            logging.info("Waiting for image writer to finish...")
-            dataset.writer.image_writer.stop()
+        # Maintain fps timing
+        precise_sleep(max(dt - (time.perf_counter() - step_start_time), 0.0))

    if dataset is not None and cfg.dataset.push_to_hub:
        logging.info("Finalizing dataset before pushing to hub")
@@ -51,7 +51,6 @@ import time
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
 from pprint import pformat
-from typing import Any

 import grpc
 import torch
@@ -69,15 +68,10 @@ from lerobot.common.train_utils import (
 )
 from lerobot.common.wandb_utils import WandBLogger
 from lerobot.configs import parser
+from lerobot.configs.train import TrainRLServerPipelineConfig
 from lerobot.datasets import LeRobotDataset, make_dataset
-from lerobot.policies import make_policy, make_pre_post_processors
-from lerobot.rl.algorithms.base import RLAlgorithm
-from lerobot.rl.algorithms.factory import make_algorithm
-from lerobot.rl.buffer import ReplayBuffer
-from lerobot.rl.data_sources import OnlineOfflineMixer
-from lerobot.rl.process import ProcessSignalHandler
-from lerobot.rl.train_rl import TrainRLServerPipelineConfig
-from lerobot.rl.trainer import RLTrainer
+from lerobot.policies import make_policy
+from lerobot.policies.sac.modeling_sac import SACPolicy
 from lerobot.robots import so_follower  # noqa: F401
 from lerobot.teleoperators import gamepad, so_leader  # noqa: F401
 from lerobot.teleoperators.utils import TeleopEvents
@@ -97,12 +91,15 @@ from lerobot.utils.constants import (
 )
 from lerobot.utils.device_utils import get_safe_torch_device
 from lerobot.utils.random_utils import set_seed
+from lerobot.utils.transition import move_state_dict_to_device, move_transition_to_device
 from lerobot.utils.utils import (
    format_big_number,
    init_logging,
 )

+from .buffer import ReplayBuffer, concatenate_batch_transitions
 from .learner_service import MAX_WORKERS, SHUTDOWN_TIMEOUT, LearnerService
+from .process import ProcessSignalHandler


@parser.wrap()
@@ -182,7 +179,7 @@ def train(cfg: TrainRLServerPipelineConfig, job_name: str | None = None):
 def start_learner_threads(
    cfg: TrainRLServerPipelineConfig,
    wandb_logger: WandBLogger | None,
-    shutdown_event: Any,  # Event
+    shutdown_event: any,  # Event,
 ) -> None:
    """
    Start the learner threads for training.
@@ -256,7 +253,7 @@ def start_learner_threads(
 def add_actor_information_and_train(
    cfg: TrainRLServerPipelineConfig,
    wandb_logger: WandBLogger | None,
-    shutdown_event: Any,  # Event
+    shutdown_event: any,  # Event,
    transition_queue: Queue,
    interaction_message_queue: Queue,
    parameters_queue: Queue,
@@ -269,8 +266,8 @@ def add_actor_information_and_train(
    - Transfers transitions from the actor to the replay buffer.
    - Logs received interaction messages.
    - Ensures training begins only when the replay buffer has a sufficient number of transitions.
-    - Delegates training updates to an ``RLAlgorithm``.
-    - Periodically pushes updated weights to actors.
+    - Samples batches from the replay buffer and performs multiple critic updates.
+    - Periodically updates the actor, critic, and temperature optimizers.
    - Logs training statistics, including loss values and optimization frequency.

    NOTE: This function doesn't have a single responsibility, it should be split into multiple functions
@@ -289,13 +286,17 @@ def add_actor_information_and_train(
    # of 7%
    device = get_safe_torch_device(try_device=cfg.policy.device, log=True)
    storage_device = get_safe_torch_device(try_device=cfg.policy.storage_device)
+    clip_grad_norm_value = cfg.policy.grad_clip_norm
    online_step_before_learning = cfg.policy.online_step_before_learning
+    utd_ratio = cfg.policy.utd_ratio
    fps = cfg.env.fps
    log_freq = cfg.log_freq
    save_freq = cfg.save_freq
+    policy_update_freq = cfg.policy.policy_update_freq
    policy_parameters_push_frequency = cfg.policy.actor_learner_config.policy_parameters_push_frequency
    saving_checkpoint = cfg.save_checkpoint
    online_steps = cfg.policy.online_steps
+    async_prefetch = cfg.policy.async_prefetch

    # Initialize logging for multiprocessing
    if not use_threads(cfg):
@@ -307,7 +308,7 @@ def add_actor_information_and_train(

    logging.info("Initializing policy")

-    policy = make_policy(
+    policy: SACPolicy = make_policy(
        cfg=cfg.policy,
        env_cfg=cfg.env,
    )
@@ -316,17 +317,15 @@ def add_actor_information_and_train(

    policy.train()

-    algorithm = make_algorithm(cfg=cfg.algorithm, policy=policy)
+    push_actor_policy_to_queue(parameters_queue=parameters_queue, policy=policy)

-    preprocessor, postprocessor = make_pre_post_processors(
-        policy_cfg=cfg.policy,
-        dataset_stats=cfg.policy.dataset_stats,
-    )
-
-    # Push initial policy weights to actors
-    push_actor_policy_to_queue(parameters_queue=parameters_queue, algorithm=algorithm)
    last_time_policy_pushed = time.time()

+    optimizers, lr_scheduler = make_optimizers_and_scheduler(cfg=cfg, policy=policy)
+
+    # If we are resuming, we need to load the training state
+    resume_optimization_step, resume_interaction_step = load_training_state(cfg=cfg, optimizers=optimizers)
+
    log_training_info(cfg=cfg, policy=policy)

    replay_buffer = initialize_replay_buffer(cfg, device, storage_device)
@@ -339,35 +338,21 @@ def add_actor_information_and_train(
            device=device,
            storage_device=storage_device,
        )
-
-    # DataMixer: online-only or online/offline 50-50 mix
-    data_mixer = OnlineOfflineMixer(
-        online_buffer=replay_buffer,
-        offline_buffer=offline_replay_buffer,
-        online_ratio=cfg.online_ratio,
-    )
-    # RLTrainer owns the iterator, preprocessor, and creates optimizers.
-    trainer = RLTrainer(
-        algorithm=algorithm,
-        data_mixer=data_mixer,
-        batch_size=batch_size,
-        preprocessor=preprocessor,
-    )
-
-    # If we are resuming, we need to load the training state
-    optimizers = algorithm.get_optimizers()
-    resume_optimization_step, resume_interaction_step = load_training_state(cfg=cfg, optimizers=optimizers)
+        batch_size: int = batch_size // 2  # We will sample from both replay buffer

    logging.info("Starting learner thread")
    interaction_message = None
    optimization_step = resume_optimization_step if resume_optimization_step is not None else 0
-    algorithm.optimization_step = optimization_step
    interaction_step_shift = resume_interaction_step if resume_interaction_step is not None else 0

    dataset_repo_id = None
    if cfg.dataset is not None:
        dataset_repo_id = cfg.dataset.repo_id

+    # Initialize iterators
+    online_iterator = None
+    offline_iterator = None
+
    # NOTE: THIS IS THE MAIN LOOP OF THE LEARNER
    while True:
        # Exit the training loop if shutdown is requested
@@ -380,6 +365,7 @@ def add_actor_information_and_train(
            transition_queue=transition_queue,
            replay_buffer=replay_buffer,
            offline_replay_buffer=offline_replay_buffer,
+            device=device,
            dataset_repo_id=dataset_repo_id,
            shutdown_event=shutdown_event,
        )
@@ -396,20 +382,180 @@ def add_actor_information_and_train(
        if len(replay_buffer) < online_step_before_learning:
            continue

-        time_for_one_optimization_step = time.time()
+        if online_iterator is None:
+            online_iterator = replay_buffer.get_iterator(
+                batch_size=batch_size, async_prefetch=async_prefetch, queue_size=2
+            )

-        # One training step (trainer owns data_mixer iterator; algorithm owns UTD loop)
-        stats = trainer.training_step()
+        if offline_replay_buffer is not None and offline_iterator is None:
+            offline_iterator = offline_replay_buffer.get_iterator(
+                batch_size=batch_size, async_prefetch=async_prefetch, queue_size=2
+            )
+
+        time_for_one_optimization_step = time.time()
+        for _ in range(utd_ratio - 1):
+            # Sample from the iterators
+            batch = next(online_iterator)
+
+            if dataset_repo_id is not None:
+                batch_offline = next(offline_iterator)
+                batch = concatenate_batch_transitions(
+                    left_batch_transitions=batch, right_batch_transition=batch_offline
+                )
+
+            actions = batch[ACTION]
+            rewards = batch["reward"]
+            observations = batch["state"]
+            next_observations = batch["next_state"]
+            done = batch["done"]
+            check_nan_in_transition(observations=observations, actions=actions, next_state=next_observations)
+
+            observation_features, next_observation_features = get_observation_features(
+                policy=policy, observations=observations, next_observations=next_observations
+            )
+
+            # Create a batch dictionary with all required elements for the forward method
+            forward_batch = {
+                ACTION: actions,
+                "reward": rewards,
+                "state": observations,
+                "next_state": next_observations,
+                "done": done,
+                "observation_feature": observation_features,
+                "next_observation_feature": next_observation_features,
+                "complementary_info": batch["complementary_info"],
+            }
+
+            # Use the forward method for critic loss
+            critic_output = policy.forward(forward_batch, model="critic")
+
+            # Main critic optimization
+            loss_critic = critic_output["loss_critic"]
+            optimizers["critic"].zero_grad()
+            loss_critic.backward()
+            critic_grad_norm = torch.nn.utils.clip_grad_norm_(
+                parameters=policy.critic_ensemble.parameters(), max_norm=clip_grad_norm_value
+            )
+            optimizers["critic"].step()
+
+            # Discrete critic optimization (if available)
+            if policy.config.num_discrete_actions is not None:
+                discrete_critic_output = policy.forward(forward_batch, model="discrete_critic")
+                loss_discrete_critic = discrete_critic_output["loss_discrete_critic"]
+                optimizers["discrete_critic"].zero_grad()
+                loss_discrete_critic.backward()
+                discrete_critic_grad_norm = torch.nn.utils.clip_grad_norm_(
+                    parameters=policy.discrete_critic.parameters(), max_norm=clip_grad_norm_value
+                )
+                optimizers["discrete_critic"].step()
+
+            # Update target networks (main and discrete)
+            policy.update_target_networks()
+
+        # Sample for the last update in the UTD ratio
+        batch = next(online_iterator)
+
+        if dataset_repo_id is not None:
+            batch_offline = next(offline_iterator)
+            batch = concatenate_batch_transitions(
+                left_batch_transitions=batch, right_batch_transition=batch_offline
+            )
+
+        actions = batch[ACTION]
+        rewards = batch["reward"]
+        observations = batch["state"]
+        next_observations = batch["next_state"]
+        done = batch["done"]
+
+        check_nan_in_transition(observations=observations, actions=actions, next_state=next_observations)
+
+        observation_features, next_observation_features = get_observation_features(
+            policy=policy, observations=observations, next_observations=next_observations
+        )
+
+        # Create a batch dictionary with all required elements for the forward method
+        forward_batch = {
+            ACTION: actions,
+            "reward": rewards,
+            "state": observations,
+            "next_state": next_observations,
+            "done": done,
+            "observation_feature": observation_features,
+            "next_observation_feature": next_observation_features,
+        }
+
+        critic_output = policy.forward(forward_batch, model="critic")
+
+        loss_critic = critic_output["loss_critic"]
+        optimizers["critic"].zero_grad()
+        loss_critic.backward()
+        critic_grad_norm = torch.nn.utils.clip_grad_norm_(
+            parameters=policy.critic_ensemble.parameters(), max_norm=clip_grad_norm_value
+        ).item()
+        optimizers["critic"].step()
+
+        # Initialize training info dictionary
+        training_infos = {
+            "loss_critic": loss_critic.item(),
+            "critic_grad_norm": critic_grad_norm,
+        }
+
+        # Discrete critic optimization (if available)
+        if policy.config.num_discrete_actions is not None:
+            discrete_critic_output = policy.forward(forward_batch, model="discrete_critic")
+            loss_discrete_critic = discrete_critic_output["loss_discrete_critic"]
+            optimizers["discrete_critic"].zero_grad()
+            loss_discrete_critic.backward()
+            discrete_critic_grad_norm = torch.nn.utils.clip_grad_norm_(
+                parameters=policy.discrete_critic.parameters(), max_norm=clip_grad_norm_value
+            ).item()
+            optimizers["discrete_critic"].step()
+
+            # Add discrete critic info to training info
+            training_infos["loss_discrete_critic"] = loss_discrete_critic.item()
+            training_infos["discrete_critic_grad_norm"] = discrete_critic_grad_norm
+
+        # Actor and temperature optimization (at specified frequency)
+        if optimization_step % policy_update_freq == 0:
+            for _ in range(policy_update_freq):
+                # Actor optimization
+                actor_output = policy.forward(forward_batch, model="actor")
+                loss_actor = actor_output["loss_actor"]
+                optimizers["actor"].zero_grad()
+                loss_actor.backward()
+                actor_grad_norm = torch.nn.utils.clip_grad_norm_(
+                    parameters=policy.actor.parameters(), max_norm=clip_grad_norm_value
+                ).item()
+                optimizers["actor"].step()
+
+                # Add actor info to training info
+                training_infos["loss_actor"] = loss_actor.item()
+                training_infos["actor_grad_norm"] = actor_grad_norm
+
+                # Temperature optimization
+                temperature_output = policy.forward(forward_batch, model="temperature")
+                loss_temperature = temperature_output["loss_temperature"]
+                optimizers["temperature"].zero_grad()
+                loss_temperature.backward()
+                temp_grad_norm = torch.nn.utils.clip_grad_norm_(
+                    parameters=[policy.log_alpha], max_norm=clip_grad_norm_value
+                ).item()
+                optimizers["temperature"].step()
+
+                # Add temperature info to training info
+                training_infos["loss_temperature"] = loss_temperature.item()
+                training_infos["temperature_grad_norm"] = temp_grad_norm
+                training_infos["temperature"] = policy.temperature

        # Push policy to actors if needed
        if time.time() - last_time_policy_pushed > policy_parameters_push_frequency:
-            push_actor_policy_to_queue(parameters_queue=parameters_queue, algorithm=algorithm)
+            push_actor_policy_to_queue(parameters_queue=parameters_queue, policy=policy)
            last_time_policy_pushed = time.time()

-        training_infos = stats.to_log_dict()
+        # Update target networks (main and discrete)
+        policy.update_target_networks()

        # Log training metrics at specified intervals
-        optimization_step = algorithm.optimization_step
        if optimization_step % log_freq == 0:
            training_infos["replay_buffer_size"] = len(replay_buffer)
            if offline_replay_buffer is not None:
@@ -437,6 +583,7 @@ def add_actor_information_and_train(
                custom_step_key="Optimization step",
            )

+        optimization_step += 1
        if optimization_step % log_freq == 0:
            logging.info(f"[LEARNER] Number of optimization step: {optimization_step}")

@@ -453,8 +600,6 @@ def add_actor_information_and_train(
                offline_replay_buffer=offline_replay_buffer,
                dataset_repo_id=dataset_repo_id,
                fps=fps,
-                preprocessor=preprocessor,
-                postprocessor=postprocessor,
            )


@@ -462,7 +607,7 @@ def start_learner(
    parameters_queue: Queue,
    transition_queue: Queue,
    interaction_message_queue: Queue,
-    shutdown_event: Any,  # Event
+    shutdown_event: any,  # Event,
    cfg: TrainRLServerPipelineConfig,
 ):
    """
@@ -539,8 +684,6 @@ def save_training_checkpoint(
    offline_replay_buffer: ReplayBuffer | None = None,
    dataset_repo_id: str | None = None,
    fps: int = 30,
-    preprocessor=None,
-    postprocessor=None,
 ) -> None:
    """
    Save training checkpoint and associated data.
@@ -564,8 +707,6 @@ def save_training_checkpoint(
        offline_replay_buffer: Optional offline replay buffer to save
        dataset_repo_id: Repository ID for dataset
        fps: Frames per second for dataset
-        preprocessor: Optional preprocessor pipeline to save
-        postprocessor: Optional postprocessor pipeline to save
    """
    logging.info(f"Checkpoint policy after step {optimization_step}")
    _num_digits = max(6, len(str(online_steps)))
@@ -582,8 +723,6 @@ def save_training_checkpoint(
        policy=policy,
        optimizer=optimizers,
        scheduler=None,
-        preprocessor=preprocessor,
-        postprocessor=postprocessor,
    )

    # Save interaction step manually
@@ -621,6 +760,58 @@ def save_training_checkpoint(
    logging.info("Resume training")


+def make_optimizers_and_scheduler(cfg: TrainRLServerPipelineConfig, policy: nn.Module):
+    """
+    Creates and returns optimizers for the actor, critic, and temperature components of a reinforcement learning policy.
+
+    This function sets up Adam optimizers for:
+    - The **actor network**, ensuring that only relevant parameters are optimized.
+    - The **critic ensemble**, which evaluates the value function.
+    - The **temperature parameter**, which controls the entropy in soft actor-critic (SAC)-like methods.
+
+    It also initializes a learning rate scheduler, though currently, it is set to `None`.
+
+    NOTE:
+    - If the encoder is shared, its parameters are excluded from the actor's optimization process.
+    - The policy's log temperature (`log_alpha`) is wrapped in a list to ensure proper optimization as a standalone tensor.
+
+    Args:
+        cfg: Configuration object containing hyperparameters.
+        policy (nn.Module): The policy model containing the actor, critic, and temperature components.
+
+    Returns:
+        Tuple[Dict[str, torch.optim.Optimizer], Optional[torch.optim.lr_scheduler._LRScheduler]]:
+        A tuple containing:
+        - `optimizers`: A dictionary mapping component names ("actor", "critic", "temperature") to their respective Adam optimizers.
+        - `lr_scheduler`: Currently set to `None` but can be extended to support learning rate scheduling.
+
+    """
+    optimizer_actor = torch.optim.Adam(
+        params=[
+            p
+            for n, p in policy.actor.named_parameters()
+            if not policy.config.shared_encoder or not n.startswith("encoder")
+        ],
+        lr=cfg.policy.actor_lr,
+    )
+    optimizer_critic = torch.optim.Adam(params=policy.critic_ensemble.parameters(), lr=cfg.policy.critic_lr)
+
+    if cfg.policy.num_discrete_actions is not None:
+        optimizer_discrete_critic = torch.optim.Adam(
+            params=policy.discrete_critic.parameters(), lr=cfg.policy.critic_lr
+        )
+    optimizer_temperature = torch.optim.Adam(params=[policy.log_alpha], lr=cfg.policy.critic_lr)
+    lr_scheduler = None
+    optimizers = {
+        "actor": optimizer_actor,
+        "critic": optimizer_critic,
+        "temperature": optimizer_temperature,
+    }
+    if cfg.policy.num_discrete_actions is not None:
+        optimizers["discrete_critic"] = optimizer_discrete_critic
+    return optimizers, lr_scheduler
+
+
 # Training setup functions


@@ -825,6 +1016,33 @@ def initialize_offline_replay_buffer(
 # Utilities/Helpers functions


+def get_observation_features(
+    policy: SACPolicy, observations: torch.Tensor, next_observations: torch.Tensor
+) -> tuple[torch.Tensor | None, torch.Tensor | None]:
+    """
+    Get observation features from the policy encoder. It act as cache for the observation features.
+    when the encoder is frozen, the observation features are not updated.
+    We can save compute by caching the observation features.
+
+    Args:
+        policy: The policy model
+        observations: The current observations
+        next_observations: The next observations
+
+    Returns:
+        tuple: observation_features, next_observation_features
+    """
+
+    if policy.config.vision_encoder_name is None or not policy.config.freeze_vision_encoder:
+        return None, None
+
+    with torch.no_grad():
+        observation_features = policy.actor.encoder.get_cached_image_features(observations)
+        next_observation_features = policy.actor.encoder.get_cached_image_features(next_observations)
+
+    return observation_features, next_observation_features
+
+
 def use_threads(cfg: TrainRLServerPipelineConfig) -> bool:
    return cfg.policy.concurrency.learner == "threads"

@@ -875,11 +1093,19 @@ def check_nan_in_transition(
    return nan_detected


-def push_actor_policy_to_queue(parameters_queue: Queue, algorithm: RLAlgorithm) -> None:
+def push_actor_policy_to_queue(parameters_queue: Queue, policy: nn.Module):
    logging.debug("[LEARNER] Pushing actor policy to the queue")

    # Create a dictionary to hold all the state dicts
-    state_dicts = algorithm.get_weights()
+    state_dicts = {"policy": move_state_dict_to_device(policy.actor.state_dict(), device="cpu")}
+
+    # Add discrete critic if it exists
+    if hasattr(policy, "discrete_critic") and policy.discrete_critic is not None:
+        state_dicts["discrete_critic"] = move_state_dict_to_device(
+            policy.discrete_critic.state_dict(), device="cpu"
+        )
+        logging.debug("[LEARNER] Including discrete critic in state dict push")
+
    state_bytes = state_to_bytes(state_dicts)
    parameters_queue.put(state_bytes)

@@ -903,8 +1129,9 @@ def process_transitions(
    transition_queue: Queue,
    replay_buffer: ReplayBuffer,
    offline_replay_buffer: ReplayBuffer,
+    device: str,
    dataset_repo_id: str | None,
-    shutdown_event: Any,  # Event
+    shutdown_event: any,
 ):
    """Process all available transitions from the queue.

@@ -912,6 +1139,7 @@ def process_transitions(
        transition_queue: Queue for receiving transitions from the actor
        replay_buffer: Replay buffer to add transitions to
        offline_replay_buffer: Offline replay buffer to add transitions to
+        device: Device to move transitions to
        dataset_repo_id: Repository ID for dataset
        shutdown_event: Event to signal shutdown
    """
@@ -920,6 +1148,8 @@ def process_transitions(
        transition_list = bytes_to_transitions(buffer=transition_list)

        for transition in transition_list:
+            transition = move_transition_to_device(transition=transition, device=device)
+
            # Skip transitions with NaN values
            if check_nan_in_transition(
                observations=transition["state"],
@@ -933,7 +1163,7 @@ def process_transitions(

            # Add to offline buffer if it's an intervention
            if dataset_repo_id is not None and transition.get("complementary_info", {}).get(
-                TeleopEvents.IS_INTERVENTION.value
+                TeleopEvents.IS_INTERVENTION
            ):
                offline_replay_buffer.add(**transition)

@@ -942,7 +1172,7 @@ def process_interaction_messages(
    interaction_message_queue: Queue,
    interaction_step_shift: int,
    wandb_logger: WandBLogger | None,
-    shutdown_event: Any,  # Event
+    shutdown_event: any,
 ) -> dict | None:
    """Process all available interaction messages from the queue.

@@ -1,49 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Top-level pipeline config for distributed RL training (actor / learner)."""
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-
-from lerobot.configs.default import DatasetConfig
-from lerobot.configs.train import TrainPipelineConfig
-from lerobot.rl.algorithms.configs import RLAlgorithmConfig
-from lerobot.rl.algorithms.factory import make_algorithm_config
-from lerobot.rl.algorithms.sac import SACAlgorithmConfig  # noqa: F401
-
-
-@dataclass(kw_only=True)
-class TrainRLServerPipelineConfig(TrainPipelineConfig):
-    # NOTE: In RL, we don't need an offline dataset
-    # TODO: Make `TrainPipelineConfig.dataset` optional
-    dataset: DatasetConfig | None = None  # type: ignore[assignment] # because the parent class has made it's type non-optional
-
-    # Algorithm config.
-    algorithm: RLAlgorithmConfig | None = None
-
-    # Data mixer strategy name. Currently supports "online_offline".
-    mixer: str = "online_offline"
-    # Fraction sampled from online replay when using OnlineOfflineMixer.
-    online_ratio: float = 0.5
-
-    def validate(self) -> None:
-        super().validate()
-
-        if self.algorithm is None:
-            self.algorithm = make_algorithm_config("sac")
-
-        if getattr(self.algorithm, "policy_config", None) is None:
-            self.algorithm.policy_config = self.policy
@@ -1,99 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import annotations
-
-from collections.abc import Iterator
-from typing import Any
-
-from lerobot.rl.algorithms.base import BatchType, RLAlgorithm
-from lerobot.rl.algorithms.configs import TrainingStats
-from lerobot.rl.data_sources.data_mixer import DataMixer
-
-
-class RLTrainer:
-    """Unified training step orchestrator.
-
-    Holds the algorithm, a DataMixer, and an optional preprocessor.
-    """
-
-    def __init__(
-        self,
-        algorithm: RLAlgorithm,
-        data_mixer: DataMixer,
-        batch_size: int,
-        *,
-        preprocessor: Any | None = None,
-    ):
-        self.algorithm = algorithm
-        self.data_mixer = data_mixer
-        self.batch_size = batch_size
-        self._preprocessor = preprocessor
-
-        self._iterator: Iterator[BatchType] | None = None
-
-        self.algorithm.make_optimizers_and_scheduler()
-
-    def _build_data_iterator(self) -> Iterator[BatchType]:
-        """Create a fresh algorithm-configured iterator (optionally preprocessed)."""
-        raw = self.algorithm.configure_data_iterator(
-            data_mixer=self.data_mixer,
-            batch_size=self.batch_size,
-        )
-        if self._preprocessor is not None:
-            return _PreprocessedIterator(raw, self._preprocessor)
-        return raw
-
-    def reset_data_iterator(self) -> None:
-        """Discard the current iterator so it will be rebuilt lazily next step."""
-        self._iterator = None
-
-    def set_data_mixer(self, data_mixer: DataMixer, *, reset: bool = True) -> None:
-        """Swap the active data mixer, optionally resetting the iterator."""
-        self.data_mixer = data_mixer
-        if reset:
-            self.reset_data_iterator()
-
-    def training_step(self) -> TrainingStats:
-        """Run one training step (algorithm-agnostic)."""
-        if self._iterator is None:
-            self._iterator = self._build_data_iterator()
-        return self.algorithm.update(self._iterator)
-
-
-def preprocess_rl_batch(preprocessor: Any, batch: BatchType) -> BatchType:
-    """Apply policy preprocessing to RL observations only."""
-    observations = batch["state"]
-    next_observations = batch["next_state"]
-    batch["state"] = preprocessor.process_observation(observations)
-    batch["next_state"] = preprocessor.process_observation(next_observations)
-
-    return batch
-
-
-class _PreprocessedIterator:
-    """Iterator wrapper that preprocesses each sampled RL batch."""
-
-    __slots__ = ("_raw", "_preprocessor")
-
-    def __init__(self, raw_iterator: Iterator[BatchType], preprocessor: Any) -> None:
-        self._raw = raw_iterator
-        self._preprocessor = preprocessor
-
-    def __iter__(self) -> _PreprocessedIterator:
-        return self
-
-    def __next__(self) -> BatchType:
-        batch = next(self._raw)
-        return preprocess_rl_batch(self._preprocessor, batch)
@@ -20,7 +20,7 @@ from typing import TYPE_CHECKING, Any

 from lerobot.cameras import make_cameras_from_configs
 from lerobot.types import RobotAction, RobotObservation
-from lerobot.utils.import_utils import _reachy2_sdk_available
+from lerobot.utils.import_utils import _reachy2_sdk_available, require_package

 from ..robot import Robot
 from ..utils import ensure_safe_goal_position
@@ -81,6 +81,7 @@ class Reachy2Robot(Robot):
    name = "reachy2"

    def __init__(self, config: Reachy2RobotConfig):
+        require_package("reachy2_sdk", extra="reachy2")
        super().__init__(config)

        self.config = config
@@ -353,23 +353,13 @@ class GripperVelocityToJoint(RobotActionProcessorStep):
        speed_factor: A scaling factor to convert the normalized velocity command to a position change.
        clip_min: The minimum allowed gripper joint position.
        clip_max: The maximum allowed gripper joint position.
-        discrete_gripper: If True, interpret the input as a discrete class index
-            {0 = close, 1 = stay, 2 = open}, matching `GamepadTeleop.GripperAction`.
-        scale_velocity: If True, scale the continuous gripper velocity by ``clip_max``
-            so a normalized [-1, 1] command produces a meaningful position delta
-            (PR #2596).
-        use_ik_solution: If True, integrate the gripper position on top of the
-            previous IK solution stored in ``complementary_data['IK_solution']``
-            instead of the raw joint observation (PR #2596). Useful for
-            leader-follower haptic teleop where the IK solution is more stable.
+        discrete_gripper: If True, treat the input action as discrete (0: open, 1: close, 2: stay).
    """

    speed_factor: float = 20.0
    clip_min: float = 0.0
    clip_max: float = 100.0
    discrete_gripper: bool = False
-    scale_velocity: bool = False
-    use_ik_solution: bool = False

    def action(self, action: RobotAction) -> RobotAction:
        observation = self.transition.get(TransitionKey.OBSERVATION).copy()
@@ -379,26 +369,18 @@ class GripperVelocityToJoint(RobotActionProcessorStep):
        if observation is None:
            raise ValueError("Joints observation is require for computing robot kinematics")

-        if self.use_ik_solution and "IK_solution" in self.transition.get(
-            TransitionKey.COMPLEMENTARY_DATA, {}
-        ):
-            q_raw = self.transition.get(TransitionKey.COMPLEMENTARY_DATA)["IK_solution"]
-        else:
-            q_raw = np.array(
-                [float(v) for k, v in observation.items() if isinstance(k, str) and k.endswith(".pos")],
-                dtype=float,
-            )
+        q_raw = np.array(
+            [float(v) for k, v in observation.items() if isinstance(k, str) and k.endswith(".pos")],
+            dtype=float,
+        )
        if q_raw is None:
            raise ValueError("Joints observation is require for computing robot kinematics")

        if self.discrete_gripper:
-            # Map discrete command {0=close, 1=stay, 2=open} -> signed velocity.
-            # Negation accounts for SO100 sign (joint position increases on close).
-            #   0 -> +clip_max (close), 1 -> 0 (stay), 2 -> -clip_max (open)
-            gripper_vel = -(gripper_vel - 1) * self.clip_max
-        elif self.scale_velocity:
-            # Scale a continuous [-1, 1] velocity command into joint-position units.
-            gripper_vel = gripper_vel * self.clip_max
+            # Discrete gripper actions are in [0, 1, 2]
+            # 0: open, 1: close, 2: stay
+            # We need to shift them to [-1, 0, 1] and then scale them to clip_max
+            gripper_vel = (gripper_vel - 1) * self.clip_max

        # Compute desired gripper position
        delta = gripper_vel * float(self.speed_factor)
@@ -27,7 +27,7 @@ import numpy as np

 from lerobot.cameras import make_cameras_from_configs
 from lerobot.types import RobotAction, RobotObservation
-from lerobot.utils.import_utils import _unitree_sdk_available
+from lerobot.utils.import_utils import _unitree_sdk_available, require_package

 from ..robot import Robot
 from .config_unitree_g1 import UnitreeG1Config
@@ -111,6 +111,7 @@ class UnitreeG1(Robot):
    name = "unitree_g1"

    def __init__(self, config: UnitreeG1Config):
+        require_package("unitree-sdk2py", extra="unitree_g1", import_name="unitree_sdk2py")
        super().__init__(config)

        logger.info("Initialize UnitreeG1...")
@@ -286,7 +286,7 @@ def convert_videos(root: Path, new_root: Path, video_file_size_in_mb: int):
    if len(set(num_eps_per_cam)) != 1:
        raise ValueError(f"All cams dont have same number of episodes ({num_eps_per_cam}).")

-    episods_metadata = []
+    episodes_metadata = []
    num_cameras = len(video_keys)
    num_episodes = num_eps_per_cam[0]
    for ep_idx in tqdm.tqdm(range(num_episodes), desc="convert videos"):
@@ -299,9 +299,9 @@ def convert_videos(root: Path, new_root: Path, video_file_size_in_mb: int):
        ep_dict = {}
        for cam_idx in range(num_cameras):
            ep_dict.update(eps_metadata_per_cam[cam_idx][ep_idx])
-        episods_metadata.append(ep_dict)
+        episodes_metadata.append(ep_dict)

-    return episods_metadata
+    return episodes_metadata


 def convert_videos_of_camera(root: Path, new_root: Path, video_key: str, video_file_size_in_mb: int):
@@ -559,7 +559,11 @@ def record(cfg: RecordConfig) -> LeRobotDataset:
            )

        # Load pretrained policy
-        policy = None if cfg.policy is None else make_policy(cfg.policy, ds_meta=dataset.meta)
+        policy = (
+            None
+            if cfg.policy is None
+            else make_policy(cfg.policy, ds_meta=dataset.meta, rename_map=cfg.dataset.rename_map)
+        )
        preprocessor = None
        postprocessor = None
        interpolator = None
@@ -386,7 +386,8 @@ def train(cfg: TrainPipelineConfig, accelerator: "Accelerator | None" = None):
        sampler=sampler,
        pin_memory=device.type == "cuda",
        drop_last=False,
-        prefetch_factor=2 if cfg.num_workers > 0 else None,
+        prefetch_factor=cfg.prefetch_factor if cfg.num_workers > 0 else None,
+        persistent_workers=cfg.persistent_workers and cfg.num_workers > 0,
    )

    # Prepare everything with accelerator
@@ -433,6 +434,9 @@ def train(cfg: TrainPipelineConfig, accelerator: "Accelerator | None" = None):
    for _ in range(step, cfg.steps):
        start_time = time.perf_counter()
        batch = next(dl_iter)
+        for cam_key in dataset.meta.camera_keys:
+            if cam_key in batch and batch[cam_key].dtype == torch.uint8:
+                batch[cam_key] = batch[cam_key].to(dtype=torch.float32) / 255.0
        batch = preprocessor(batch)
        train_tracker.dataloading_s = time.perf_counter() - start_time

@@ -15,9 +15,22 @@
 # limitations under the License.

 import logging
+from typing import TYPE_CHECKING
+
+from lerobot.utils.import_utils import _hidapi_available, _pygame_available, require_package

 from ..utils import TeleopEvents

+if TYPE_CHECKING or _pygame_available:
+    import pygame
+else:
+    pygame = None  # type: ignore[assignment]
+
+if TYPE_CHECKING or _hidapi_available:
+    import hid
+else:
+    hid = None  # type: ignore[assignment]
+

 class InputController:
    """Base class for input controllers that generate motion deltas."""
@@ -199,6 +212,7 @@ class GamepadController(InputController):
    """Generate motion deltas from gamepad input."""

    def __init__(self, x_step_size=1.0, y_step_size=1.0, z_step_size=1.0, deadzone=0.1):
+        require_package("pygame", extra="gamepad")
        super().__init__(x_step_size, y_step_size, z_step_size)
        self.deadzone = deadzone
        self.joystick = None
@@ -206,8 +220,6 @@ class GamepadController(InputController):

    def start(self):
        """Initialize pygame and the gamepad."""
-        import pygame
-
        pygame.init()
        pygame.joystick.init()

@@ -230,8 +242,6 @@ class GamepadController(InputController):

    def stop(self):
        """Clean up pygame resources."""
-        import pygame
-
        if pygame.joystick.get_init():
            if self.joystick:
                self.joystick.quit()
@@ -240,8 +250,6 @@ class GamepadController(InputController):

    def update(self):
        """Process pygame events to get fresh gamepad readings."""
-        import pygame
-
        for event in pygame.event.get():
            if event.type == pygame.JOYBUTTONDOWN:
                if event.button == 3:
@@ -280,8 +288,6 @@ class GamepadController(InputController):

    def get_deltas(self):
        """Get the current movement deltas from gamepad state."""
-        import pygame
-
        try:
            # Read joystick axes
            # Left stick X and Y (typically axes 0 and 1)
@@ -326,6 +332,7 @@ class GamepadControllerHID(InputController):
            z_scale: Scaling factor for Z-axis movement
            deadzone: Joystick deadzone to prevent drift
        """
+        require_package("hidapi", extra="gamepad", import_name="hid")
        super().__init__(x_step_size, y_step_size, z_step_size)
        self.deadzone = deadzone
        self.device = None
@@ -342,8 +349,6 @@ class GamepadControllerHID(InputController):

    def find_device(self):
        """Look for the gamepad device by vendor and product ID."""
-        import hid
-
        devices = hid.enumerate()
        for device in devices:
            device_name = device["product_string"]
@@ -357,8 +362,6 @@ class GamepadControllerHID(InputController):

    def start(self):
        """Connect to the gamepad using HIDAPI."""
-        import hid
-
        self.device_info = self.find_device()
        if not self.device_info:
            self.running = False
@@ -45,7 +45,7 @@ class HomunculusArm(Teleoperator):
    name = "homunculus_arm"

    def __init__(self, config: HomunculusArmConfig):
-        require_package("pyserial", extra="hardware", import_name="serial")
+        require_package("pyserial", extra="pyserial-dep", import_name="serial")
        super().__init__(config)
        self.config = config
        self.serial = serial.Serial(config.port, config.baud_rate, timeout=1)
@@ -71,7 +71,7 @@ class HomunculusGlove(Teleoperator):
    name = "homunculus_glove"

    def __init__(self, config: HomunculusGloveConfig):
-        require_package("pyserial", extra="hardware", import_name="serial")
+        require_package("pyserial", extra="pyserial-dep", import_name="serial")
        super().__init__(config)
        self.config = config
        self.serial = serial.Serial(config.port, config.baud_rate, timeout=1)
@@ -23,7 +23,7 @@ from typing import Any

 from lerobot.types import RobotAction
 from lerobot.utils.decorators import check_if_already_connected, check_if_not_connected
-from lerobot.utils.import_utils import _pynput_available
+from lerobot.utils.import_utils import _pynput_available, require_package

 from ..teleoperator import Teleoperator
 from ..utils import TeleopEvents
@@ -56,6 +56,7 @@ class KeyboardTeleop(Teleoperator):
    name = "keyboard"

    def __init__(self, config: KeyboardTeleopConfig):
+        require_package("pynput", extra="pynput-dep")
        super().__init__(config)
        self.config = config
        self.robot_type = config.type
@@ -103,14 +104,11 @@ class KeyboardTeleop(Teleoperator):

    def _on_press(self, key):
        if hasattr(key, "char"):
-            key = key.char
-        self.event_queue.put((key, True))
+            self.event_queue.put((key.char, True))

    def _on_release(self, key):
        if hasattr(key, "char"):
-            key = key.char
-        self.event_queue.put((key, False))
-
+            self.event_queue.put((key.char, False))
        if key == keyboard.Key.esc:
            logging.info("ESC pressed, disconnecting.")
            self.disconnect()
@@ -206,6 +204,8 @@ class KeyboardEndEffectorTeleop(KeyboardTeleop):
                # this is useful for retrieving other events like interventions for RL, episode success, etc.
                self.misc_keys_queue.put(key)

+        self.current_pressed.clear()
+
        action_dict = {
            "delta_x": delta_x,
            "delta_y": delta_y,
@@ -256,8 +256,6 @@ class KeyboardEndEffectorTeleop(KeyboardTeleop):
        ]
        is_intervention = any(self.current_pressed.get(key, False) for key in movement_keys)

-        self.current_pressed.clear()
-
        # Check for episode control commands from misc_keys_queue
        terminate_episode = False
        success = False
@@ -21,14 +21,24 @@
 import logging
 import threading
 import time
+from typing import TYPE_CHECKING

-import hebi
 import numpy as np
-from teleop import Teleop

 from lerobot.utils.decorators import check_if_already_connected, check_if_not_connected
+from lerobot.utils.import_utils import _hebi_available, _teleop_available, require_package
 from lerobot.utils.rotation import Rotation

+if TYPE_CHECKING or _hebi_available:
+    import hebi
+else:
+    hebi = None
+
+if TYPE_CHECKING or _teleop_available:
+    from teleop import Teleop
+else:
+    Teleop = None
+
 from ..teleoperator import Teleoperator
 from .config_phone import PhoneConfig, PhoneOS

@@ -74,6 +84,8 @@ class IOSPhone(BasePhone, Teleoperator):
    name = "ios_phone"

    def __init__(self, config: PhoneConfig):
+        require_package("hebi-py", extra="phone", import_name="hebi")
+        require_package("teleop", extra="phone")
        super().__init__(config)
        self.config = config
        self._group = None
@@ -213,6 +225,8 @@ class AndroidPhone(BasePhone, Teleoperator):
    name = "android_phone"

    def __init__(self, config: PhoneConfig):
+        require_package("hebi-py", extra="phone", import_name="hebi")
+        require_package("teleop", extra="phone")
        super().__init__(config)
        self.config = config
        self._teleop = None
@@ -19,7 +19,7 @@ import logging
 import time
 from typing import TYPE_CHECKING

-from lerobot.utils.import_utils import _reachy2_sdk_available
+from lerobot.utils.import_utils import _reachy2_sdk_available, require_package

 if TYPE_CHECKING or _reachy2_sdk_available:
    from reachy2_sdk import ReachySDK
@@ -84,6 +84,7 @@ class Reachy2Teleoperator(Teleoperator):
    name = "reachy2_specific"

    def __init__(self, config: Reachy2TeleoperatorConfig):
+        require_package("reachy2_sdk", extra="reachy2")
        super().__init__(config)

        self.config = config
@@ -20,7 +20,6 @@ from .config_so_leader import (
    SOLeaderConfig,
    SOLeaderTeleopConfig,
 )
-from .so101_leader_follower import SO101LeaderFollower
 from .so_leader import SO100Leader, SO101Leader, SOLeader

 __all__ = [
@@ -28,7 +27,6 @@ __all__ = [
    "SO100LeaderConfig",
    "SO101Leader",
    "SO101LeaderConfig",
-    "SO101LeaderFollower",
    "SOLeader",
    "SOLeaderConfig",
    "SOLeaderTeleopConfig",
@@ -29,14 +29,6 @@ class SOLeaderConfig:
    # Whether to use degrees for angles
    use_degrees: bool = True

-    # Enable leader-follower mode where leader can both lead and follow.
-    # When True, ``make_teleoperator_from_config`` returns ``SO101LeaderFollower``
-    # instead of the bare ``SOLeader`` -- see PR #2596.
-    leader_follower_mode: bool = False
-
-    # Whether to include the gripper in the leader-follower action vector.
-    use_gripper: bool = True
-

@TeleoperatorConfig.register_subclass("so101_leader")
@TeleoperatorConfig.register_subclass("so100_leader")
@@ -1,259 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import sys
-import time
-from collections import deque
-from threading import Event, Thread
-
-import numpy as np
-
-from lerobot.teleoperators.so_leader.so_leader import SO101Leader
-from lerobot.teleoperators.utils import TeleopEvents
-
-PYNPUT_AVAILABLE = True
-try:
-    if ("DISPLAY" not in os.environ) and ("linux" in sys.platform):
-        logging.info("No DISPLAY set. Skipping pynput import.")
-        raise ImportError("pynput blocked intentionally due to no display.")
-
-    from pynput import keyboard
-except ImportError:
-    keyboard = None
-    PYNPUT_AVAILABLE = False
-except Exception as e:
-    keyboard = None
-    PYNPUT_AVAILABLE = False
-    logging.info(f"Could not import pynput: {e}")
-
-logger = logging.getLogger(__name__)
-
-
-class SO101LeaderFollower(SO101Leader):
-    """
-    Extended SO101 Leader that can both lead (human control) and follow (mimic follower).
-
-    This class adds leader-follower functionality where:
-    - In follow mode: The leader arm mimics the follower's position (torque enabled)
-    - In lead mode: Human controls the leader (torque disabled) and provides actions
-    """
-
-    def __init__(self, config):
-        super().__init__(config)
-
-        # Leader-follower state
-        self.is_intervening = False
-        self.leader_torque_enabled = True
-
-        # Tracking error for automatic intervention detection
-        self.leader_tracking_error_queue = deque(maxlen=4)
-
-        # Keyboard event handling
-        self.keyboard_events = {
-            "intervention": False,
-            "success": False,
-            "failure": False,
-            "rerecord": False,
-        }
-        self.keyboard_thread = None
-        self.stop_event = Event()
-
-        # Store last follower position for action computation
-        self.last_follower_pos = None
-
-    @property
-    def action_features(self) -> dict:
-        if self.config.use_gripper:
-            return {
-                "dtype": "float32",
-                "shape": (7,),
-                "names": {
-                    "delta_x": 0,
-                    "delta_y": 1,
-                    "delta_z": 2,
-                    "delta_wx": 3,
-                    "delta_wy": 4,
-                    "delta_wz": 5,
-                    "gripper": 6,
-                },
-            }
-        else:
-            return {
-                "dtype": "float32",
-                "shape": (6,),
-                "names": {
-                    "delta_x": 0,
-                    "delta_y": 1,
-                    "delta_z": 2,
-                    "delta_wx": 3,
-                    "delta_wy": 4,
-                    "delta_wz": 5,
-                },
-            }
-
-    def connect(self, calibrate: bool = True) -> None:
-        """Connect and configure for leader-follower mode."""
-        super().connect(calibrate)
-
-        # Configure for leader-follower mode with lower gains
-        # Lower gains allow manual intervention without injury risk
-        # self.bus.sync_write("Torque_Enable", 1)
-        for motor in self.bus.motors:
-            self.bus.write("P_Coefficient", motor, 16)
-            self.bus.write("I_Coefficient", motor, 0)
-            self.bus.write("D_Coefficient", motor, 16)
-
-        # Start keyboard listener
-        self._start_keyboard_listener()
-
-        print("- Leader-Follower Mode:")
-        print("  - Press SPACE to toggle intervention (leader control)")
-        print("  - When not intervening, leader follows follower position")
-        print("  - When intervening, follower follows leader in end-effector space")
-        print("  - Press 's' to mark episode as success")
-        print("  - Press ESC to end episode as failure")
-        print("  - Press 'r' to re-record episode")
-
-    def _start_keyboard_listener(self):
-        """Start keyboard listener thread for intervention control."""
-
-        def on_press(key):
-            try:
-                if key == keyboard.Key.space:
-                    self.keyboard_events["intervention"] = not self.keyboard_events["intervention"]
-                    self.is_intervening = self.keyboard_events["intervention"]
-                    state = "INTERVENTION MODE" if self.is_intervening else "FOLLOWING MODE"
-                    logger.info(f"Toggled to {state}")
-                elif key == keyboard.Key.esc:
-                    self.keyboard_events["failure"] = True
-                elif hasattr(key, "char"):
-                    if key.char == "s":
-                        self.keyboard_events["success"] = True
-                    elif key.char == "r":
-                        self.keyboard_events["rerecord"] = True
-            except Exception as e:
-                logger.error(f"Error handling key press: {e}")
-
-        def listen():
-            with keyboard.Listener(on_press=on_press) as listener:
-                while not self.stop_event.is_set():
-                    time.sleep(0.1)
-                listener.stop()
-
-        self.keyboard_thread = Thread(target=listen, daemon=True)
-        self.keyboard_thread.start()
-
-    def send_action(self, action: dict[str, float]) -> None:
-        """
-        Send position commands to leader arm (follow mode).
-
-        Args:
-            action: Dictionary of motor positions to command
-        """
-        # Store follower position for later use
-        self.last_follower_pos = np.array([action.get(f"{motor}.pos", 0) for motor in self.bus.motors])
-
-        if not self.is_intervening:
-            # Follow mode: enable torque and track follower
-            if not self.leader_torque_enabled:
-                self.bus.sync_write("Torque_Enable", 1)
-                self.leader_torque_enabled = True
-
-            # Send follower positions to leader
-            goal_pos = {motor: action[f"{motor}.pos"] for motor in self.bus.motors}
-            self.bus.sync_write("Goal_Position", goal_pos)
-
-            # Track error for automatic intervention detection
-            current_pos = self.bus.sync_read("Present_Position")
-            current_array = np.array([current_pos[motor] for motor in self.bus.motors])
-            error = np.linalg.norm(self.last_follower_pos[:-1] - current_array[:-1])
-            self.leader_tracking_error_queue.append(error)
-
-    def get_action(self) -> dict[str, float]:
-        """
-        Get action from leader arm.
-
-        In follow mode: Returns neutral/current positions
-        In lead mode: Returns actual leader positions for follower to track
-        """
-        start = time.perf_counter()
-
-        if self.is_intervening:
-            # Lead mode: disable torque if needed and return leader positions
-            if self.leader_torque_enabled:
-                self.bus.sync_write("Torque_Enable", 0)
-                self.leader_torque_enabled = False
-
-            # Get current leader position
-            action = self.bus.sync_read("Present_Position")
-            action = {f"{motor}.pos": val for motor, val in action.items()}
-
-            # Track error
-            if self.last_follower_pos is not None:
-                current_array = np.array([action[f"{motor}.pos"] for motor in self.bus.motors])
-                error = np.linalg.norm(self.last_follower_pos[:-1] - current_array[:-1])
-                self.leader_tracking_error_queue.append(error)
-        else:
-            # Follow mode: return current/neutral positions
-            action = self.bus.sync_read("Present_Position")
-            action = {f"{motor}.pos": val for motor, val in action.items()}
-
-        dt_ms = (time.perf_counter() - start) * 1e3
-        logger.debug(f"{self} read action: {dt_ms:.1f}ms")
-        return action
-
-    def get_teleop_events(self) -> dict[TeleopEvents, bool]:
-        """Get current keyboard events."""
-        events = {}
-
-        # Map keyboard events to TeleopEvents
-        if self.keyboard_events["success"]:
-            events[TeleopEvents.SUCCESS] = True
-            self.keyboard_events["success"] = False
-        if self.keyboard_events["failure"]:
-            events[TeleopEvents.FAILURE] = True
-            events[TeleopEvents.TERMINATE_EPISODE] = True
-            self.keyboard_events["failure"] = False
-        if self.keyboard_events["rerecord"]:
-            events[TeleopEvents.RERECORD_EPISODE] = True
-            events[TeleopEvents.TERMINATE_EPISODE] = True
-            self.keyboard_events["rerecord"] = False
-
-        # Always report intervention state
-        events[TeleopEvents.IS_INTERVENTION] = self.is_intervening
-
-        return events
-
-    def disconnect(self) -> None:
-        """Disconnect and cleanup."""
-        self.stop_event.set()
-        if self.keyboard_thread:
-            self.keyboard_thread.join(timeout=1.0)
-        super().disconnect()
-
-    def reset(self) -> None:
-        """Reset leader-follower state."""
-        self.is_intervening = False
-        self.leader_torque_enabled = True
-        self.leader_tracking_error_queue.clear()
-        self.keyboard_events = {
-            "intervention": False,
-            "success": False,
-            "failure": False,
-            "rerecord": False,
-        }
@@ -34,7 +34,7 @@ from typing import TYPE_CHECKING

 import numpy as np

-from lerobot.utils.import_utils import _serial_available
+from lerobot.utils.import_utils import _serial_available, require_package

 if TYPE_CHECKING or _serial_available:
    import serial
@@ -156,6 +156,7 @@ def run_exo_calibration(
    """
    Run interactive calibration for an exoskeleton arm.
    """
+    require_package("pyserial", extra="unitree_g1", import_name="serial")
    try:
        import cv2
        import matplotlib.pyplot as plt
@@ -76,7 +76,7 @@ class ExoskeletonArm:
    calibration: ExoskeletonCalibration | None = None

    def __post_init__(self):
-        require_package("pyserial", extra="hardware", import_name="serial")
+        require_package("pyserial", extra="unitree_g1", import_name="serial")
        if self.calibration_fpath.is_file():
            self._load_calibration()

@@ -52,10 +52,7 @@ def make_teleoperator_from_config(config: TeleoperatorConfig) -> "Teleoperator":

        return SO100Leader(config)
    elif config.type == "so101_leader":
-        from .so_leader import SO101Leader, SO101LeaderFollower
-
-        if getattr(config, "leader_follower_mode", False):
-            return SO101LeaderFollower(config)
+        from .so_leader import SO101Leader

        return SO101Leader(config)
    elif config.type == "mock_teleop":
--- a/Show More
+++ b/Show More