add check for cfg.policy in force_cpu line

2026-05-11 22:59:50 +00:00 · 2026-01-19 13:54:44 +01:00
340 changed files with 6584 additions and 21716 deletions
@@ -18,11 +18,6 @@ name: Documentation
 on:
  # Allows running this workflow manually from the Actions tab
  workflow_dispatch:
-    inputs:
-      version:
-        description: 'Version tag (e.g. v0.1.2) - Leave empty for standard main build'
-        required: false
-        type: string

  # Triggers the workflow on push events to main for the docs folder
  push:
@@ -59,13 +54,7 @@ jobs:
    with:
      commit_sha: ${{ github.sha }}
      package: lerobot
-      additional_args: >-
-        --not_python_module
-        ${{
-          (github.event_name == 'release' && format('--version {0}', github.event.release.tag_name)) ||
-          (inputs.version != '' && format('--version {0}', inputs.version)) ||
-          ''
-        }}
+      additional_args: --not_python_module ${{ github.event_name == 'release' && format('--version {0}', github.event.release.tag_name) || '' }}
    secrets:
      token: ${{ secrets.HUGGINGFACE_PUSH }}
      hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
@@ -44,7 +44,7 @@ permissions:
 # Sets up the environment variables
 env:
  UV_VERSION: "0.8.0"
-  PYTHON_VERSION: "3.12"
+  PYTHON_VERSION: "3.10"

 # Ensures that only the latest commit for a PR or branch is built, canceling older runs.
 concurrency:
@@ -61,7 +61,6 @@ jobs:
      MUJOCO_GL: egl
      HF_HOME: /mnt/cache/.cache/huggingface
      HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
    steps:
      - uses: actions/checkout@v6
        with:
@@ -90,11 +89,5 @@ jobs:
      - name: Install lerobot with test extras
        run: uv sync --extra "test"

-      - name: Login to Hugging Face
-        if: env.HF_USER_TOKEN != ''
-        run: |
-          uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
-          uv run hf auth whoami
-
      - name: Run pytest
        run: uv run pytest tests -vv --maxfail=10
@@ -37,7 +37,7 @@ permissions:
 # Sets up the environment variables
 env:
  UV_VERSION: "0.8.0"
-  PYTHON_VERSION: "3.12"
+  PYTHON_VERSION: "3.10"
  DOCKER_IMAGE_NAME: huggingface/lerobot-gpu

 # Ensures that only the latest action is built, canceling older runs.
@@ -60,7 +60,6 @@ jobs:
      MUJOCO_GL: egl
      HF_HOME: /mnt/cache/.cache/huggingface
      HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
    steps:
      - uses: actions/checkout@v6
        with:
@@ -88,12 +87,6 @@ jobs:
      - name: Install lerobot with all extras
        run: uv sync --extra all # TODO(Steven): Make flash-attn optional

-      - name: Login to Hugging Face
-        if: env.HF_USER_TOKEN != ''
-        run: |
-          uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
-          uv run hf auth whoami
-
      - name: Run pytest (all extras)
        run: uv run pytest tests -vv --maxfail=10

@@ -108,11 +101,9 @@ jobs:
    runs-on:
      group: aws-general-8-plus
    if: |
-      github.repository == 'huggingface/lerobot' && (
-        (github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && github.event.pull_request.head.repo.fork == false) ||
-        github.event_name == 'push' ||
-        github.event_name == 'workflow_dispatch'
-      )
+      (github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && github.event.pull_request.head.repo.fork == false) ||
+      github.event_name == 'push' ||
+      github.event_name == 'workflow_dispatch'
    outputs:
      image_tag: ${{ steps.set_tag.outputs.image_tag }}
    env:
@@ -169,7 +160,6 @@ jobs:
      HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
      TORCH_HOME: /home/user_lerobot/.cache/torch
      TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
    container:
      image: ${{ needs.build-and-push-docker.outputs.image_tag }} # zizmor: ignore[unpinned-images]
      options: --gpus all --shm-size "16gb"
@@ -181,13 +171,6 @@ jobs:
        shell: bash
        working-directory: /lerobot
    steps:
-      - name: Login to Hugging Face
-        if: env.HF_USER_TOKEN != ''
-        run: |
-          hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
-          hf auth whoami
-      - name: Fix ptxas permissions
-        run: chmod +x /lerobot/.venv/lib/python3.12/site-packages/triton/backends/nvidia/bin/ptxas
      - name: Run pytest on GPU
        run: pytest tests -vv --maxfail=10
      - name: Run end-to-end tests
@@ -28,7 +28,7 @@ on:
 # Sets up the environment variables
 env:
  UV_VERSION: "0.8.0"
-  PYTHON_VERSION: "3.12"
+  PYTHON_VERSION: "3.10"
  DOCKER_IMAGE_NAME_CPU: huggingface/lerobot-cpu:latest
  DOCKER_IMAGE_NAME_GPU: huggingface/lerobot-gpu:latest

@@ -119,7 +119,6 @@ jobs:
      HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
      TORCH_HOME: /home/user_lerobot/.cache/torch
      TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
    container:
      image: ${{ needs.build-docker-cpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
      options: --shm-size "16gb"
@@ -131,11 +130,6 @@ jobs:
        shell: bash
        working-directory: /lerobot
    steps:
-      - name: Login to Hugging Face
-        if: env.HF_USER_TOKEN != ''
-        run: |
-          hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
-          hf auth whoami
      - name: Run pytest on CPU
        run: pytest tests -vv --maxfail=10
      - name: Run end-to-end tests
@@ -152,7 +146,6 @@ jobs:
      HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
      TORCH_HOME: /home/user_lerobot/.cache/torch
      TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
    container:
      image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
      options: --gpus all --shm-size "16gb"
@@ -164,11 +157,6 @@ jobs:
        shell: bash
        working-directory: /lerobot
    steps:
-      - name: Login to Hugging Face
-        if: env.HF_USER_TOKEN != ''
-        run: |
-          hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
-          hf auth whoami
      - name: Run pytest on GPU
        run: pytest tests -vv --maxfail=10
      - name: Run end-to-end tests
@@ -186,7 +174,6 @@ jobs:
      TORCH_HOME: /home/user_lerobot/.cache/torch
      TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
      CUDA_VISIBLE_DEVICES: "0,1,2,3"
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
    container:
      image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
      options: --gpus all --shm-size "16gb"
@@ -198,15 +185,12 @@ jobs:
        shell: bash
        working-directory: /lerobot
    steps:
-      - name: Login to Hugging Face
-        if: env.HF_USER_TOKEN != ''
-        run: |
-          hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
-          hf auth whoami
      - name: Verify GPU availability
        run: |
          nvidia-smi
          python -c "import torch; print(f'PyTorch CUDA available: {torch.cuda.is_available()}'); print(f'Number of GPUs: {torch.cuda.device_count()}')"

      - name: Run multi-GPU training tests
-        run: pytest -vv tests/training/
+      # TODO(Steven): Investigate why motors tests are failing in multi-GPU setup
+        run: pytest tests -vv --maxfail=10 --ignore=tests/motors/
+        timeout-minutes: 10
@@ -50,7 +50,7 @@ jobs:
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
-          python-version: '3.12'
+          python-version: '3.10'

      - name: Run pre-commit hooks
        uses: pre-commit/action@v3.0.1 # zizmor: ignore[unpinned-uses]
@@ -22,7 +22,7 @@ on:
 # Sets up the environment variables
 env:
  UV_VERSION: "0.8.0"
-  PYTHON_VERSION: "3.12"
+  PYTHON_VERSION: "3.10"

 jobs:
  # This job builds the Python package and publishes it to PyPI
@@ -45,7 +45,7 @@ jobs:
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
-          python-version: '3.12'
+          python-version: '3.10'

      - name: Extract Version
        id: extract_info
@@ -83,6 +83,14 @@ jobs:
            exit 1
          fi

+      - name: Remove Tags with Git dependencies
+        # TODO(Steven): Temporary patch to remove pi from PyPi 0.4.0 release due to its reliance on git dependencies.
+        run: |
+          echo "::info:: Checking for Git dependencies to remove from pyproject.toml..."
+          grep -E '@ git\+https|lerobot\[pi\]' pyproject.toml | sed 's/^/::warning:: Removing line: /' || true
+          sed -E -i '/@ git\+https|lerobot\[pi\]/d' pyproject.toml
+          echo "::info:: Git dependencies removed. Proceeding with build."
+
      - name: Install build dependencies
        run: python -m pip install build

@@ -20,8 +20,8 @@ on:
  workflow_dispatch:

  # Run on the 1st and 15th of every month at 09:00 UTC
-  # schedule:
-  #  - cron: '0 2 1,15 * *'
+  schedule:
+    - cron: '0 2 1,15 * *'

 permissions:
  contents: read
@@ -29,7 +29,7 @@ permissions:
 # Sets up the environment variables
 env:
  UV_VERSION: "0.8.0"
-  PYTHON_VERSION: "3.12"
+  PYTHON_VERSION: "3.10"
  DOCKER_IMAGE_NAME: huggingface/lerobot-gpu:unbound

 # Ensures that only the latest action is built, canceling older runs.
@@ -48,7 +48,6 @@ jobs:
      MUJOCO_GL: egl
      HF_HOME: /mnt/cache/.cache/huggingface
      HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
    steps:
      - uses: actions/checkout@v6
        with:
@@ -80,11 +79,7 @@ jobs:

      - name: Install lerobot with all extras
        run: uv sync --extra all # TODO(Steven): Make flash-attn optional
-      - name: Login to Hugging Face
-        if: env.HF_USER_TOKEN != ''
-        run: |
-          uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
-          uv run hf auth whoami
+
      - name: Run pytest (all extras)
        run: uv run pytest tests -vv

@@ -96,7 +91,6 @@ jobs:
    name: Build and Push Docker
    runs-on:
      group: aws-general-8-plus
-    if: github.repository == 'huggingface/lerobot'
    outputs:
      image_tag: ${{ env.DOCKER_IMAGE_NAME }}
    env:
@@ -142,7 +136,6 @@ jobs:
      HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
      TORCH_HOME: /home/user_lerobot/.cache/torch
      TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
    container:
      image: ${{ needs.build-and-push-docker.outputs.image_tag }} # zizmor: ignore[unpinned-images]
      options: --gpus all --shm-size "16gb"
@@ -154,11 +147,6 @@ jobs:
        shell: bash
        working-directory: /lerobot
    steps:
-      - name: Login to Hugging Face
-        if: env.HF_USER_TOKEN != ''
-        run: |
-          hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
-          hf auth whoami
      - name: Run pytest on GPU
        run: pytest tests -vv
      - name: Run end-to-end tests
@@ -13,7 +13,7 @@
 # limitations under the License.

 default_language_version:
-    python: python3.12
+    python: python3.10

 exclude: "tests/artifacts/.*\\.safetensors$"

@@ -55,7 +55,7 @@ repos:
    rev: v3.21.0
    hooks:
    -   id: pyupgrade
-        args: [--py312-plus]
+        args: [--py310-plus]

  ##### Markdown Quality #####
  - repo: https://github.com/rbubley/mirrors-prettier
@@ -1,25 +0,0 @@
-# AI Usage Policy
-
-The LeRobot project welcomes contributions from everyone, and we have a few guidelines regarding AI usage to ensure high code quality, clear communication, and a healthy open-source ecosystem:
-
- **Please disclose significant AI assistance.** If you used AI tools (e.g., Copilot, Claude, Cursor, ChatGPT) to generate a substantial portion of your code or text, let us know in your PR description. Transparency helps us review your changes more effectively.
- **Own your code (The Human-in-the-Loop).** You must fully understand all the changes you are proposing. If you cannot explain what your AI-assisted code does or how it interacts with LeRobot's broader architecture, please take the time to learn and test it before submitting.
- **Keep issues and discussions focused.** You are welcome to use AI to help draft issues or PR descriptions, but please review and edit them carefully before posting. AI can often be overly verbose; trimming the noise and getting straight to the point helps our maintainers address your needs faster.
-
-Our core maintainers also use AI tools to aid their workflows, but they do so while bringing deep contextual knowledge of the LeRobot codebase to validate the output. We ask all contributors to apply that same level of rigor.
-
-## Remember the Human Maintainers
-
-Please remember that LeRobot is maintained by a dedicated team of humans.
-
-Every discussion, issue, and pull request is read and reviewed by real people. While AI tools can generate thousands of lines of code in seconds, reviewing that code still takes human time and energy. Submitting unverified or low-effort AI output puts an unfair burden on our maintainers.
-
-Today, the quality of the AI output still heavily depends on the developer driving the tool. We ask that you respect our maintainers' time by thoroughly vetting, testing, and refining your submissions.
-
-## AI is Welcome Here
-
-LeRobot operates at the cutting edge of AI and robotics, and many of our maintainers actively embrace AI coding assistants as valuable productivity tools. We are a pro-AI project!
-
-Our reason for having an AI policy is not an anti-AI stance. Rather, it exists to ensure that AI is used to enhance human contributions, not replace them with unverified noise. It's about how the tools are used, not the tools themselves.
-
-We value the unique human insight you bring to the LeRobot community. Let AI empower your workflow, but always let your own judgment take the wheel.
@@ -2,7 +2,7 @@

 Everyone is welcome to contribute, and we value everybody's contribution. Code is not the only way to help the community. Answering questions, helping others, reaching out, and improving the documentation are immensely valuable.

-Whichever way you choose to contribute, please be mindful to respect our [code of conduct](https://github.com/huggingface/lerobot/blob/main/CODE_OF_CONDUCT.md) and our [AI policy](https://github.com/huggingface/lerobot/blob/main/AI_POLICY.md).
+Whichever way you choose to contribute, please be mindful to respect our [code of conduct](./CODE_OF_CONDUCT.md).

 ## Ways to Contribute

@@ -14,7 +14,7 @@ You can contribute in many ways:
 - **Documentation:** Improve examples, guides, and docstrings.
 - **Feedback:** Submit tickets related to bugs or desired new features.

-If you are unsure where to start, join our [Discord Channel](https://discord.gg/q8Dzzpym3f).
+If you are unsure where to start, join our [Discord Channel](https://discord.gg/JkrYNdmw).

 ## Development Setup

@@ -32,7 +32,7 @@ git remote add upstream https://github.com/huggingface/lerobot.git

 ### 2. Environment Installation

-Please follow our [Installation Guide](https://huggingface.co/docs/lerobot/installation) for the environment setup & installation from source.
+Please follow our [Installation Guide](./docs/source/installation.mdx) for the environment setup & installation from source.

 ## Running Tests & Quality Checks

@@ -75,8 +75,8 @@ pytest -sv tests/test_specific_feature.py

 Use the templates for required fields and examples.

- **Issues:** Follow the [ticket template](https://github.com/huggingface/lerobot/blob/main/.github/ISSUE_TEMPLATE/bug-report.yml).
- **Pull requests:** Rebase on `upstream/main`, use a descriptive branch (don't work on `main`), run `pre-commit` and tests locally, and follow the [PR template](https://github.com/huggingface/lerobot/blob/main/.github/PULL_REQUEST_TEMPLATE.md).
+- **Issues:** Follow the [ticket template](./.github/ISSUE_TEMPLATE/bug-report.yml).
+- **Pull requests:** Rebase on `upstream/main`, use a descriptive branch (don't work on `main`), run `pre-commit` and tests locally, and follow the [PR template](./.github/PULL_REQUEST_TEMPLATE.md).

 One member of the LeRobot team will then review your contribution.

@@ -1,3 +1,2 @@
 include src/lerobot/templates/lerobot_modelcard_template.md
 include src/lerobot/datasets/card_template.md
-include src/lerobot/envs/metaworld_config.json
@@ -128,14 +128,13 @@ Learn how to implement your own simulation environment or benchmark and distribu
 ## Resources

 - **[Documentation](https://huggingface.co/docs/lerobot/index):** The complete guide to tutorials & API.
- **[Chinese Tutorials: LeRobot+SO-ARM101中文教程-同济子豪兄](https://zihao-ai.feishu.cn/wiki/space/7589642043471924447)** Detailed doc for assembling, teleoperate, dataset, train, deploy. Verified by Seed Studio and 5 global hackathon players.
 - **[Discord](https://discord.gg/q8Dzzpym3f):** Join the `LeRobot` server to discuss with the community.
 - **[X](https://x.com/LeRobotHF):** Follow us on X to stay up-to-date with the latest developments.
 - **[Robot Learning Tutorial](https://huggingface.co/spaces/lerobot/robot-learning-tutorial):** A free, hands-on course to learn robot learning using LeRobot.

 ## Citation

-If you use LeRobot in your project, please cite the GitHub repository to acknowledge the ongoing development and contributors:
+If you use LeRobot in your research, please cite:

 ```bibtex
@misc{cadene2024lerobot,
@@ -146,26 +145,9 @@ If you use LeRobot in your project, please cite the GitHub repository to acknowl
 }
 ```

-If you are referencing our research or the academic paper, please also cite our ICLR publication:
-
-<details>
-<summary><b>ICLR 2026 Paper</b></summary>
-
-```bibtex
-@inproceedings{cadenelerobot,
-  title={LeRobot: An Open-Source Library for End-to-End Robot Learning},
-  author={Cadene, Remi and Alibert, Simon and Capuano, Francesco and Aractingi, Michel and Zouitine, Adil and Kooijmans, Pepijn and Choghari, Jade and Russi, Martino and Pascal, Caroline and Palma, Steven and Shukor, Mustafa and Moss, Jess and Soare, Alexander and Aubakirova, Dana and Lhoest, Quentin and Gallou\'edec, Quentin and Wolf, Thomas},
-  booktitle={The Fourteenth International Conference on Learning Representations},
-  year={2026},
-  url={https://arxiv.org/abs/2602.22818}
-}
-```
-
-</details>
-
 ## Contribute

-We welcome contributions from everyone in the community! To get started, please read our [CONTRIBUTING.md](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md) guide. Whether you're adding a new feature, improving documentation, or fixing a bug, your help and feedback are invaluable. We're incredibly excited about the future of open-source robotics and can't wait to work with you on what's next—thank you for your support!
+We welcome contributions from everyone in the community! To get started, please read our [CONTRIBUTING.md](./CONTRIBUTING.md) guide. Whether you're adding a new feature, improving documentation, or fixing a bug, your help and feedback are invaluable. We're incredibly excited about the future of open-source robotics and can't wait to work with you on what's next—thank you for your support!

 <p align="center">
  <img alt="SO101 Video" src="./media/readme/so100_video.webp" width="640px">
@@ -1,48 +0,0 @@
-# Security Policy
-
-## Project Status & Philosophy
-
-`lerobot` has so far been primarily a research and prototyping tool, which is why deployment security hasn’t been a strong focus until now. As `lerobot` continues to be adopted and deployed in production, we are paying much closer attention to these kinds of issues.
-
-Fortunately, being an open-source project, the community can also help by reporting and fixing vulnerabilities. We appreciate your efforts to responsibly disclose your findings and will make every effort to acknowledge your contributions.
-
-## Reporting a Vulnerability
-
-To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/huggingface/lerobot/security/advisories/new) tab.
-
-The `lerobot` team will send a response indicating the next steps in handling your report. After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.
-
-#### Hugging Face Security Team
-
-Since this project is part of the Hugging Face ecosystem, feel free to submit vulnerability reports directly to: **[security@huggingface.co](mailto:security@huggingface.co)**. Someone from the HF security team will review the report and recommend next steps.
-
-#### Open Source Disclosures
-
-If reporting a vulnerability specific to the open-source codebase (and not the underlying Hub infrastructure), you may also use [Huntr](https://huntr.com), a vulnerability disclosure program for open source software.
-
-## Supported Versions
-
-Currently, we treat `lerobot` as a rolling release. We prioritize security updates for the latest available version (`main` branch).
-
-| Version  | Supported |
-| -------- | --------- |
-| Latest   | ✅        |
-| < Latest | ❌        |
-
-## Secure Usage Guidelines
-
-`lerobot` is tightly coupled to the Hugging Face Hub for sharing data and pretrained policies. When downloading artifacts uploaded by others, you expose yourself to risks. Please read below for recommendations to keep your runtime and robot environment safe.
-
-### Remote Artefacts (Weights & Policies)
-
-Models and policies uploaded to the Hugging Face Hub come in different formats. We heavily recommend uploading and downloading models in the [`safetensors`](https://github.com/huggingface/safetensors) format.
-
-`safetensors` was developed specifically to prevent arbitrary code execution on your system, which is critical when running software on physical hardware/robots.
-
-To avoid loading models from unsafe formats (e.g., `pickle`), you should ensure you are prioritizing `safetensors` files.
-
-### Remote Code
-
-Some models or environments on the Hub may require `trust_remote_code=True` to run custom architecture code.
-
-Please **always** verify the content of the modeling files when using this argument. We recommend setting a specific `revision` (commit hash) when loading remote code to ensure you protect yourself from unverified updates to the repository.
@@ -28,9 +28,9 @@ We don't expect the same optimal settings for a dataset of images from a simulat
 For these reasons, we run this benchmark on four representative datasets:

 - `lerobot/pusht_image`: (96 x 96 pixels) simulation with simple geometric shapes, fixed camera.
- `lerobot/aloha_mobile_shrimp_image`: (480 x 640 pixels) real-world indoor, moving camera.
- `lerobot/paris_street`: (720 x 1280 pixels) real-world outdoor, moving camera.
- `lerobot/kitchen`: (1080 x 1920 pixels) real-world indoor, fixed camera.
+- `aliberts/aloha_mobile_shrimp_image`: (480 x 640 pixels) real-world indoor, moving camera.
+- `aliberts/paris_street`: (720 x 1280 pixels) real-world outdoor, moving camera.
+- `aliberts/kitchen`: (1080 x 1920 pixels) real-world indoor, fixed camera.

 Note: The datasets used for this benchmark need to be image datasets, not video datasets.

@@ -179,7 +179,7 @@ python benchmark/video/run_video_benchmark.py \
    --output-dir outputs/video_benchmark \
    --repo-ids \
        lerobot/pusht_image \
-        lerobot/aloha_mobile_shrimp_image \
+        aliberts/aloha_mobile_shrimp_image \
    --vcodec libx264 libx265 \
    --pix-fmt yuv444p yuv420p \
    --g 2 20 None \
@@ -203,9 +203,9 @@ python benchmark/video/run_video_benchmark.py \
    --output-dir outputs/video_benchmark \
    --repo-ids \
        lerobot/pusht_image \
-        lerobot/aloha_mobile_shrimp_image \
-        lerobot/paris_street \
-        lerobot/kitchen \
+        aliberts/aloha_mobile_shrimp_image \
+        aliberts/paris_street \
+        aliberts/kitchen \
    --vcodec libx264 libx265 \
    --pix-fmt yuv444p yuv420p \
    --g 1 2 3 4 5 6 10 15 20 40 None \
@@ -221,9 +221,9 @@ python benchmark/video/run_video_benchmark.py \
    --output-dir outputs/video_benchmark \
    --repo-ids \
        lerobot/pusht_image \
-        lerobot/aloha_mobile_shrimp_image \
-        lerobot/paris_street \
-        lerobot/kitchen \
+        aliberts/aloha_mobile_shrimp_image \
+        aliberts/paris_street \
+        aliberts/kitchen \
    --vcodec libsvtav1 \
    --pix-fmt yuv420p \
    --g 1 2 3 4 5 6 10 15 20 40 None \
@@ -252,37 +252,37 @@ Since we're using av1 encoding, we're choosing the `pyav` decoder as `video_read

 These tables show the results for `g=2` and `crf=30`, using `timestamps-modes=6_frames` and `backend=pyav`

-| video_images_size_ratio           | vcodec     | pix_fmt |           |           |           |
-| --------------------------------- | ---------- | ------- | --------- | --------- | --------- |
-|                                   | libx264    |         | libx265   |           | libsvtav1 |
-| repo_id                           | yuv420p    | yuv444p | yuv420p   | yuv444p   | yuv420p   |
-| lerobot/pusht_image               | **16.97%** | 17.58%  | 18.57%    | 18.86%    | 22.06%    |
-| lerobot/aloha_mobile_shrimp_image | 2.14%      | 2.11%   | 1.38%     | **1.37%** | 5.59%     |
-| lerobot/paris_street              | 2.12%      | 2.13%   | **1.54%** | **1.54%** | 4.43%     |
-| lerobot/kitchen                   | 1.40%      | 1.39%   | **1.00%** | **1.00%** | 2.52%     |
+| video_images_size_ratio            | vcodec     | pix_fmt |           |           |           |
+| ---------------------------------- | ---------- | ------- | --------- | --------- | --------- |
+|                                    | libx264    |         | libx265   |           | libsvtav1 |
+| repo_id                            | yuv420p    | yuv444p | yuv420p   | yuv444p   | yuv420p   |
+| lerobot/pusht_image                | **16.97%** | 17.58%  | 18.57%    | 18.86%    | 22.06%    |
+| aliberts/aloha_mobile_shrimp_image | 2.14%      | 2.11%   | 1.38%     | **1.37%** | 5.59%     |
+| aliberts/paris_street              | 2.12%      | 2.13%   | **1.54%** | **1.54%** | 4.43%     |
+| aliberts/kitchen                   | 1.40%      | 1.39%   | **1.00%** | **1.00%** | 2.52%     |

-| video_images_load_time_ratio      | vcodec  | pix_fmt |          |         |           |
-| --------------------------------- | ------- | ------- | -------- | ------- | --------- |
-|                                   | libx264 |         | libx265  |         | libsvtav1 |
-| repo_id                           | yuv420p | yuv444p | yuv420p  | yuv444p | yuv420p   |
-| lerobot/pusht_image               | 6.45    | 5.19    | **1.90** | 2.12    | 2.47      |
-| lerobot/aloha_mobile_shrimp_image | 11.80   | 7.92    | 0.71     | 0.85    | **0.48**  |
-| lerobot/paris_street              | 2.21    | 2.05    | 0.36     | 0.49    | **0.30**  |
-| lerobot/kitchen                   | 1.46    | 1.46    | 0.28     | 0.51    | **0.26**  |
+| video_images_load_time_ratio       | vcodec  | pix_fmt |          |         |           |
+| ---------------------------------- | ------- | ------- | -------- | ------- | --------- |
+|                                    | libx264 |         | libx265  |         | libsvtav1 |
+| repo_id                            | yuv420p | yuv444p | yuv420p  | yuv444p | yuv420p   |
+| lerobot/pusht_image                | 6.45    | 5.19    | **1.90** | 2.12    | 2.47      |
+| aliberts/aloha_mobile_shrimp_image | 11.80   | 7.92    | 0.71     | 0.85    | **0.48**  |
+| aliberts/paris_street              | 2.21    | 2.05    | 0.36     | 0.49    | **0.30**  |
+| aliberts/kitchen                   | 1.46    | 1.46    | 0.28     | 0.51    | **0.26**  |

-|                                   |          | vcodec   | pix_fmt      |          |           |              |
-| --------------------------------- | -------- | -------- | ------------ | -------- | --------- | ------------ |
-|                                   |          | libx264  |              | libx265  |           | libsvtav1    |
-| repo_id                           | metric   | yuv420p  | yuv444p      | yuv420p  | yuv444p   | yuv420p      |
-| lerobot/pusht_image               | avg_mse  | 2.90E-04 | **2.03E-04** | 3.13E-04 | 2.29E-04  | 2.19E-04     |
-|                                   | avg_psnr | 35.44    | 37.07        | 35.49    | **37.30** | 37.20        |
-|                                   | avg_ssim | 98.28%   | **98.85%**   | 98.31%   | 98.84%    | 98.72%       |
-| lerobot/aloha_mobile_shrimp_image | avg_mse  | 2.76E-04 | 2.59E-04     | 3.17E-04 | 3.06E-04  | **1.30E-04** |
-|                                   | avg_psnr | 35.91    | 36.21        | 35.88    | 36.09     | **40.17**    |
-|                                   | avg_ssim | 95.19%   | 95.18%       | 95.00%   | 95.05%    | **97.73%**   |
-| lerobot/paris_street              | avg_mse  | 6.89E-04 | 6.70E-04     | 4.03E-03 | 4.02E-03  | **3.09E-04** |
-|                                   | avg_psnr | 33.48    | 33.68        | 32.05    | 32.15     | **35.40**    |
-|                                   | avg_ssim | 93.76%   | 93.75%       | 89.46%   | 89.46%    | **95.46%**   |
-| lerobot/kitchen                   | avg_mse  | 2.50E-04 | 2.24E-04     | 4.28E-04 | 4.18E-04  | **1.53E-04** |
-|                                   | avg_psnr | 36.73    | 37.33        | 36.56    | 36.75     | **39.12**    |
-|                                   | avg_ssim | 95.47%   | 95.58%       | 95.52%   | 95.53%    | **96.82%**   |
+|                                    |          | vcodec   | pix_fmt      |          |           |              |
+| ---------------------------------- | -------- | -------- | ------------ | -------- | --------- | ------------ |
+|                                    |          | libx264  |              | libx265  |           | libsvtav1    |
+| repo_id                            | metric   | yuv420p  | yuv444p      | yuv420p  | yuv444p   | yuv420p      |
+| lerobot/pusht_image                | avg_mse  | 2.90E-04 | **2.03E-04** | 3.13E-04 | 2.29E-04  | 2.19E-04     |
+|                                    | avg_psnr | 35.44    | 37.07        | 35.49    | **37.30** | 37.20        |
+|                                    | avg_ssim | 98.28%   | **98.85%**   | 98.31%   | 98.84%    | 98.72%       |
+| aliberts/aloha_mobile_shrimp_image | avg_mse  | 2.76E-04 | 2.59E-04     | 3.17E-04 | 3.06E-04  | **1.30E-04** |
+|                                    | avg_psnr | 35.91    | 36.21        | 35.88    | 36.09     | **40.17**    |
+|                                    | avg_ssim | 95.19%   | 95.18%       | 95.00%   | 95.05%    | **97.73%**   |
+| aliberts/paris_street              | avg_mse  | 6.89E-04 | 6.70E-04     | 4.03E-03 | 4.02E-03  | **3.09E-04** |
+|                                    | avg_psnr | 33.48    | 33.68        | 32.05    | 32.15     | **35.40**    |
+|                                    | avg_ssim | 93.76%   | 93.75%       | 89.46%   | 89.46%    | **95.46%**   |
+| aliberts/kitchen                   | avg_mse  | 2.50E-04 | 2.24E-04     | 4.28E-04 | 4.18E-04  | **1.53E-04** |
+|                                    | avg_psnr | 36.73    | 37.33        | 36.56    | 36.75     | **39.12**    |
+|                                    | avg_ssim | 95.47%   | 95.58%       | 95.52%   | 95.53%    | **96.82%**   |
@@ -24,7 +24,7 @@ ARG OS_VERSION=22.04
 FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION}

 # Define Python version argument
-ARG PYTHON_VERSION=3.12
+ARG PYTHON_VERSION=3.10

 # Configure environment variables
 ENV DEBIAN_FRONTEND=noninteractive \
@@ -85,8 +85,6 @@ RUN if [ "$UNBOUND_DEPS" = "true" ]; then \

 RUN uv pip install --no-cache ".[all]"

-RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas
-
 # Copy the rest of the application source code
 # Make sure to have the git-LFS files for testing
 COPY --chown=user_lerobot:user_lerobot . .
@@ -18,10 +18,8 @@
 # docker build -f docker/Dockerfile.user -t lerobot-user .
 # docker run -it --rm lerobot-user

-# With USB physical access : docker run -it --device=/dev/ -v /dev/:/dev/ --rm lerobot-user
-
 # Configure the base image
-ARG PYTHON_VERSION=3.12
+ARG PYTHON_VERSION=3.10
 FROM python:${PYTHON_VERSION}-slim

 # Configure environment variables
@@ -7,6 +7,8 @@
 - sections:
  - local: il_robots
    title: Imitation Learning for Robots
+  - local: cameras
+    title: Cameras
  - local: bring_your_own_policies
    title: Bring Your Own Policies
  - local: integrate_hardware
@@ -27,10 +29,6 @@
    title: Porting Large Datasets
  - local: using_dataset_tools
    title: Using the Dataset Tools
-  - local: dataset_subtask
-    title: Using Subtasks in the Dataset
-  - local: streaming_video_encoding
-    title: Streaming Video Encoding
  title: "Datasets"
 - sections:
  - local: act
@@ -101,19 +99,11 @@
    title: Unitree G1
  - local: earthrover_mini_plus
    title: Earth Rover Mini
-  - local: omx
-    title: OMX
-  - local: openarm
-    title: OpenArm
  title: "Robots"
 - sections:
  - local: phone_teleop
    title: Phone
  title: "Teleoperators"
- sections:
-  - local: cameras
-    title: Cameras
-  title: "Sensors"
 - sections:
  - local: torch_accelerators
    title: PyTorch accelerators
@@ -123,8 +113,6 @@
    title: Notebooks
  - local: feetech
    title: Updating Feetech Firmware
-  - local: damiao
-    title: Damiao Motors and CAN Bus
  title: "Resources"
 - sections:
  - local: contributing
@@ -88,8 +88,5 @@ lerobot-record \
  --dataset.repo_id=${HF_USER}/eval_act_your_dataset \
  --dataset.num_episodes=10 \
  --dataset.single_task="Your task description" \
-  --dataset.streaming_encoding=true \
-  --dataset.encoder_threads=2 \
-  # --dataset.vcodec=auto \
  --policy.path=${HF_USER}/act_policy
 ```
@@ -48,7 +48,7 @@ python -m lerobot.async_inference.robot_client \
    --task="dummy" \ # POLICY: The task to run the policy on (`Fold my t-shirt`). Not necessarily defined for all policies, such as `act`
    --policy_type=your_policy_type \ # POLICY: the type of policy to run (smolvla, act, etc)
    --pretrained_name_or_path=user/model \ # POLICY: the model name/path on server to the checkpoint to run (e.g., lerobot/smolvla_base)
-    --policy_device=mps \ # POLICY: the device to run the policy on, on the server (cuda, mps, xpu, cpu)
+    --policy_device=mps \ # POLICY: the device to run the policy on, on the server
    --actions_per_chunk=50 \ # POLICY: the number of actions to output at once
    --chunk_size_threshold=0.5 \ # CLIENT: the threshold for the chunk size before sending a new observation to the server
    --aggregate_fn_name=weighted_average \ # CLIENT: the function to aggregate actions on overlapping portions
@@ -195,7 +195,6 @@ client_cfg = RobotClientConfig(
    robot=robot_cfg,
    server_address="localhost:8080",
    policy_device="mps",
-    client_device="cpu",
    policy_type="smolvla",
    pretrained_name_or_path="<user>/smolvla_async",
    chunk_size_threshold=0.5,
@@ -32,7 +32,7 @@ version = "0.1.0"
 dependencies = [
    # your policy-specific dependencies
 ]
-requires-python = ">= 3.12"
+requires-python = ">= 3.11"

 [build-system]
 build-backend = # your-build-backend
@@ -82,7 +82,7 @@ Create your policy implementation by inheriting from LeRobot's base `PreTrainedP
 # modeling_my_custom_policy.py
 import torch
 import torch.nn as nn
-from typing import Any
+from typing import Dict, Any

 from lerobot.policies.pretrained import PreTrainedPolicy
 from .configuration_my_custom_policy import MyCustomPolicyConfig
@@ -91,7 +91,7 @@ class MyCustomPolicy(PreTrainedPolicy):
    config_class = MyCustomPolicyConfig
    name = "my_custom_policy"

-    def __init__(self, config: MyCustomPolicyConfig, dataset_stats: dict[str, Any] = None):
+    def __init__(self, config: MyCustomPolicyConfig, dataset_stats: Dict[str, Any] = None):
        super().__init__(config, dataset_stats)
        ...
 ```
@@ -102,7 +102,7 @@ Create processor functions:

 ```python
 # processor_my_custom_policy.py
-from typing import Any
+from typing import Dict, Any
 import torch


@@ -1,22 +1,12 @@
 # Cameras

-LeRobot offers multiple options for video capture:
+LeRobot offers multiple options for video capture, including phone cameras, built-in laptop cameras, external webcams, and Intel RealSense cameras. To efficiently record frames from most cameras, you can use either the `OpenCVCamera` or `RealSenseCamera` class. For additional compatibility details on the `OpenCVCamera` class, refer to the [Video I/O with OpenCV Overview](https://docs.opencv.org/4.x/d0/da7/videoio_overview.html).

-| Class             | Supported Cameras                   |
-| ----------------- | ----------------------------------- |
-| `OpenCVCamera`    | Phone, built-in laptop, USB webcams |
-| `ZMQCamera`       | Network-connected cameras           |
-| `RealSenseCamera` | Intel RealSense (with depth)        |
-| `Reachy2Camera`   | Reachy 2 robot cameras              |
+### Finding your camera

-> [!TIP]
-> For `OpenCVCamera` compatibility details, see the [Video I/O with OpenCV Overview](https://docs.opencv.org/4.x/d0/da7/videoio_overview.html).
+To instantiate a camera, you need a camera identifier. This identifier might change if you reboot your computer or re-plug your camera, a behavior mostly dependant on your operating system.

-### Find your camera
-
-Every camera requires a unique identifier to be instantiated, allowing you to distinguish between multiple connected devices.
-
-`OpenCVCamera` and `RealSenseCamera` support auto-discovery. Run the command below to list available devices and their identifiers. Note that these identifiers may change after rebooting your computer or re-plugging the camera, depending on your operating system.
+To find the camera indices of the cameras plugged into your system, run the following script:

 ```bash
 lerobot-find-cameras opencv # or realsense for Intel Realsense cameras
@@ -24,7 +14,7 @@ lerobot-find-cameras opencv # or realsense for Intel Realsense cameras

 The output will look something like this if you have two cameras connected:

-```bash
+```
 --- Detected Cameras ---
 Camera #0:
  Name: OpenCV Camera @ 0
@@ -43,37 +33,13 @@ Camera #0:
 > [!WARNING]
 > When using Intel RealSense cameras in `macOS`, you could get this [error](https://github.com/IntelRealSense/librealsense/issues/12307): `Error finding RealSense cameras: failed to set power state`, this can be solved by running the same command with `sudo` permissions. Note that using RealSense cameras in `macOS` is unstable.

-`ZMQCamera` and `Reachy2Camera` do not support auto-discovery. They must be configured manually by providing their network address and port or robot SDK settings.
+## Use Cameras

-## Use cameras
+Below are two examples, demonstrating how to work with the API.

-### Frame access modes
-
-All camera classes implement three access modes for capturing frames:
-
-| Method                    | Behavior                                                                                                                                                   | Blocks?        | Best For                                 |
-| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------- | ---------------------------------------- |
-| `read()`                  | Waits for the camera hardware to return a frame. May block for a long time depending on the camera and SDK.                                                | Yes            | Simple scripts, sequential capture       |
-| `async_read(timeout_ms)`  | Returns the latest unconsumed frame from background thread. Blocks only if buffer is empty, up to `timeout_ms`. Raises `TimeoutError` if no frame arrives. | With a timeout | Control loops synchronized to camera FPS |
-| `read_latest(max_age_ms)` | Peeks at the most recent frame in buffer (may be stale). Raises `TimeoutError` if frame is older than `max_age_ms`.                                        | No             | UI visualization, logging, monitoring    |
-
-### Usage examples
-
-The following examples show how to use the camera API to configure and capture frames from different camera types.
-
- **Blocking and non-blocking frame capture** using an OpenCV-based camera
+- **Asynchronous frame capture** using an OpenCV-based camera
 - **Color and depth capture** using an Intel RealSense camera

-> [!WARNING]
-> Failing to cleanly disconnect cameras can cause resource leaks. Use the context manager protocol to ensure automatic cleanup:
->
-> ```python
-> with OpenCVCamera(config) as camera:
->     ...
-> ```
->
-> You can also call `connect()` and `disconnect()` manually, but always use a `finally` block for the latter.
-
 <hfoptions id="shell_restart">
 <hfoption id="Open CV Camera">

@@ -94,30 +60,16 @@ config = OpenCVCameraConfig(
 )

 # Instantiate and connect an `OpenCVCamera`, performing a warm-up read (default).
-with OpenCVCamera(config) as camera:
-
-    # Read a frame synchronously — blocks until hardware delivers a new frame
-    frame = camera.read()
-    print(f"read() call returned frame with shape:", frame.shape)
-
-    # Read a frame asynchronously with a timeout — returns the latest unconsumed frame or waits up to timeout_ms for a new one
-    try:
-        for i in range(10):
-            frame = camera.async_read(timeout_ms=200)
-            print(f"async_read call returned frame {i} with shape:", frame.shape)
-    except TimeoutError as e:
-        print(f"No frame received within timeout: {e}")
-
-    # Instantly return a frame - returns the most recent frame captured by the camera
-    try:
-        initial_frame = camera.read_latest(max_age_ms=1000)
-        for i in range(10):
-            frame = camera.read_latest(max_age_ms=1000)
-            print(f"read_latest call returned frame {i} with shape:", frame.shape)
-            print(f"Was a new frame received by the camera? {not (initial_frame == frame).any()}")
-    except TimeoutError as e:
-        print(f"Frame too old: {e}")
+camera = OpenCVCamera(config)
+camera.connect()

+# Read frames asynchronously in a loop via `async_read(timeout_ms)`
+try:
+    for i in range(10):
+        frame = camera.async_read(timeout_ms=200)
+        print(f"Async frame {i} shape:", frame.shape)
+finally:
+    camera.disconnect()
 ```
 <!-- prettier-ignore-end -->

@@ -159,10 +111,10 @@ finally:
 </hfoption>
 </hfoptions>

-## Use your phone's camera
+## Use your phone

 <hfoptions id="use phone">
-<hfoption id="iPhone & macOS">
+<hfoption id="Mac">

 To use your iPhone as a camera on macOS, enable the Continuity Camera feature:

@@ -172,49 +124,83 @@ To use your iPhone as a camera on macOS, enable the Continuity Camera feature:

 For more details, visit [Apple support](https://support.apple.com/en-gb/guide/mac-help/mchl77879b8a/mac).

+Your iPhone should be detected automatically when running the camera setup script in the next section.
+
 </hfoption>
-<hfoption id="OBS virtual camera">
+<hfoption id="Linux">

-If you want to use your phone as a camera using OBS, follow these steps to set up a virtual camera.
+If you want to use your phone as a camera on Linux, follow these steps to set up a virtual camera

-1. _(Linux only) Install `v4l2loopback-dkms` and `v4l-utils`_. These packages create virtual camera devices and verify their settings. Install with:
+1. _Install `v4l2loopback-dkms` and `v4l-utils`_. Those packages are required to create virtual camera devices (`v4l2loopback`) and verify their settings with the `v4l2-ctl` utility from `v4l-utils`. Install them using:

-```bash
+<!-- prettier-ignore-start -->
+```python
 sudo apt install v4l2loopback-dkms v4l-utils
 ```
+<!-- prettier-ignore-end -->

-2. _Install the [DroidCam app](https://droidcam.app) on your phone_. This app is available for both iOS and Android.
-3. _Download and install [OBS Studio](https://obsproject.com)_.
-4. _Download and install the [DroidCam OBS plugin](https://droidcam.app/obs)_.
-5. _Start OBS Studio_.
+2. _Install [DroidCam](https://droidcam.app) on your phone_. This app is available for both iOS and Android.
+3. _Install [OBS Studio](https://obsproject.com)_. This software will help you manage the camera feed. Install it using [Flatpak](https://flatpak.org):

-6. _Add your phone as a source_. Follow the instructions [here](https://droidcam.app/obs/usage). Be sure to set the resolution to `640x480` to avoid the watermarks.
-7. _Adjust resolution settings_. In OBS Studio, go to `File > Settings > Video` or `OBS > Preferences... > Video`. Change the `Base(Canvas) Resolution` and the `Output(Scaled) Resolution` to `640x480` by manually typing it.
+<!-- prettier-ignore-start -->
+```python
+flatpak install flathub com.obsproject.Studio
+```
+<!-- prettier-ignore-end -->
+
+4. _Install the DroidCam OBS plugin_. This plugin integrates DroidCam with OBS Studio. Install it with:
+
+<!-- prettier-ignore-start -->
+```python
+flatpak install flathub com.obsproject.Studio.Plugin.DroidCam
+```
+<!-- prettier-ignore-end -->
+
+5. _Start OBS Studio_. Launch with:
+
+<!-- prettier-ignore-start -->
+```python
+flatpak run com.obsproject.Studio
+```
+<!-- prettier-ignore-end -->
+
+6. _Add your phone as a source_. Follow the instructions [here](https://droidcam.app/obs/usage). Be sure to set the resolution to `640x480`.
+7. _Adjust resolution settings_. In OBS Studio, go to `File > Settings > Video`. Change the `Base(Canvas) Resolution` and the `Output(Scaled) Resolution` to `640x480` by manually typing it in.
 8. _Start virtual camera_. In OBS Studio, follow the instructions [here](https://obsproject.com/kb/virtual-camera-guide).
-9. _Verify the virtual camera setup and resolution_.
-   - **Linux**: Use `v4l2-ctl` to list devices and check resolution:
-     ```bash
-     v4l2-ctl --list-devices  # find VirtualCam and note its /dev/videoX path
-     v4l2-ctl -d /dev/videoX --get-fmt-video  # replace with your VirtualCam path
-     ```
-     You should see `VirtualCam` listed and resolution `640x480`.
-   - **macOS**: Open Photo Booth or FaceTime and select "OBS Virtual Camera" as the input.
-   - **Windows**: The native Camera app doesn't support virtual cameras. Use a video conferencing app (Zoom, Teams) or run `lerobot-find-cameras opencv` directly to verify.
+9. _Verify the virtual camera setup_. Use `v4l2-ctl` to list the devices:

-<details>
-<summary><strong>Troubleshooting</strong></summary>
+<!-- prettier-ignore-start -->
+```python
+v4l2-ctl --list-devices
+```
+<!-- prettier-ignore-end -->

-> The virtual camera resolution is incorrect.
+You should see an entry like:

-Delete the virtual camera source and recreate it. The resolution cannot be changed after creation.
+```
+VirtualCam (platform:v4l2loopback-000):
+/dev/video1
+```

-> Error reading frame in background thread for OpenCVCamera(X): OpenCVCamera(X) frame width=640 or height=480 do not match configured width=1920 or height=1080.
+10. _Check the camera resolution_. Use `v4l2-ctl` to ensure that the virtual camera output resolution is `640x480`. Change `/dev/video1` to the port of your virtual camera from the output of `v4l2-ctl --list-devices`.

-This error is caused by OBS Virtual Camera advertising a `1920x1080` resolution despite rescaling. The only fix for now is to comment out the width and height check in `_postprocess_image()`.
+<!-- prettier-ignore-start -->
+```python
+v4l2-ctl -d /dev/video1 --get-fmt-video
+```
+<!-- prettier-ignore-end -->

-</details>
+You should see an entry like:
+
+```
+>>> Format Video Capture:
+>>>	Width/Height      : 640/480
+>>>	Pixel Format      : 'YUYV' (YUYV 4:2:2)
+```
+
+Troubleshooting: If the resolution is not correct you will have to delete the Virtual Camera port and try again as it cannot be changed.
+
+If everything is set up correctly, you can proceed with the rest of the tutorial.

 </hfoption>
 </hfoptions>
-
-If everything is set up correctly, your phone will appear as a standard OpenCV camera and can be used with `OpenCVCamera`.
@@ -1,165 +0,0 @@
-# Damiao Motors and CAN Bus
-
-This guide covers setup and usage of Damiao motors with LeRobot via CAN bus communication.
-
-Currently, only Linux is supported, as the OpenArms CAN adapter only has drivers for Linux.
-
-## Linux CAN Setup
-
-Before using Damiao motors, you need to set up the CAN interface on your Linux system.
-
-### Install CAN Utilities
-
-```bash
-sudo apt-get install can-utils
-```
-
-### Configure CAN Interface (Manual)
-
-For standard CAN FD (recommended for OpenArms):
-
-```bash
-sudo ip link set can0 down
-sudo ip link set can0 type can bitrate 1000000 dbitrate 5000000 fd on
-sudo ip link set can0 up
-```
-
-For standard CAN (without FD):
-
-```bash
-sudo ip link set can0 down
-sudo ip link set can0 type can bitrate 1000000
-sudo ip link set can0 up
-```
-
-### Configure CAN Interface (Using LeRobot)
-
-LeRobot provides a utility script to setup and test CAN interfaces:
-
-```bash
-# Setup multiple interfaces (e.g., OpenArms Followers with 2 CAN buses)
-lerobot-setup-can --mode=setup --interfaces=can0,can1
-```
-
-## Debugging CAN Communication
-
-Use the built-in debug tools to test motor communication:
-
-```bash
-# Test motors on all interfaces
-lerobot-setup-can --mode=test --interfaces=can0,can1
-
-# Run speed/latency test
-lerobot-setup-can --mode=speed --interfaces=can0
-```
-
-The test mode will scan for motors (IDs 0x01-0x08) and report which ones respond. Example output:
-
-```
-can0: UP (CAN FD)
-  Motor 0x01 (joint_1): ✓ FOUND
-    → Response 0x11 [FD]: 00112233...
-  Motor 0x02 (joint_2): ✓ FOUND
-  Motor 0x03 (joint_3): ✗ No response
-  ...
-  Summary: 2/8 motors found
-```
-
-## Usage
-
-### Basic Setup
-
-```python
-from lerobot.motors import Motor
-from lerobot.motors.damiao import DamiaoMotorsBus
-
-# Define your motors with send/receive CAN IDs
-motors = {
-    "joint_1": Motor(id=0x01, motor_type_str="dm8009", recv_id=0x11),
-    "joint_2": Motor(id=0x02, motor_type_str="dm4340", recv_id=0x12),
-    "joint_3": Motor(id=0x03, motor_type_str="dm4310", recv_id=0x13),
-}
-
-# Create the bus
-bus = DamiaoMotorsBus(
-    port="can0",  # Linux socketcan interface
-    motors=motors,
-)
-
-# Connect
-bus.connect()
-```
-
-### Reading Motor States
-
-```python
-# Read single motor position (degrees)
-position = bus.read("Present_Position", "joint_1")
-
-# Read from multiple motors
-positions = bus.sync_read("Present_Position")  # All motors
-positions = bus.sync_read("Present_Position", ["joint_1", "joint_2"])
-
-# Read all states at once (position, velocity, torque)
-states = bus.sync_read_all_states()
-# Returns: {'joint_1': {'position': 45.2, 'velocity': 1.3, 'torque': 0.5}, ...}
-```
-
-### Writing Motor Commands
-
-```python
-# Enable torque
-bus.enable_torque()
-
-# Set goal position (degrees)
-bus.write("Goal_Position", "joint_1", 45.0)
-
-# Set positions for multiple motors
-bus.sync_write("Goal_Position", {
-    "joint_1": 45.0,
-    "joint_2": -30.0,
-    "joint_3": 90.0,
-})
-
-# Disable torque
-bus.disable_torque()
-```
-
-## Configuration Options
-
-| Parameter      | Default   | Description                                                 |
-| -------------- | --------- | ----------------------------------------------------------- |
-| `port`         | -         | CAN interface (`can0`) or serial port (`/dev/cu.usbmodem*`) |
-| `use_can_fd`   | `True`    | Enable CAN FD for higher data rates                         |
-| `bitrate`      | `1000000` | Nominal bitrate (1 Mbps)                                    |
-| `data_bitrate` | `5000000` | CAN FD data bitrate (5 Mbps)                                |
-
-## Motor Configuration
-
-Each motor requires:
-
- `id`: CAN ID for sending commands
- `motor_type`: One of the supported motor types (e.g., `"dm8009"`, `"dm4340"`)
- `recv_id`: CAN ID for receiving responses
-
-OpenArms default IDs follow the pattern: send ID `0x0N`, receive ID `0x1N` where N is the joint number.
-
-## Troubleshooting
-
-### No Response from Motors
-
-1. **Check power**
-2. **Verify CAN wiring**: Check CAN-H, CAN-L, and GND connections
-3. **Check motor IDs**: Use Damiao Debugging Tools to verify/configure IDs
-4. **Test CAN interface**: Run `candump can0` to see if messages are being received
-5. **Run diagnostics**: `lerobot-setup-can --mode=test --interfaces=can0`
-
-### Motor Timeout Parameter
-
-If motors were configured with timeout=0, they won't respond to commands. Use Damiao Debugging Tools to set a non-zero timeout value.
-
-### Verify CAN FD Status
-
-```bash
-ip -d link show can0 | grep fd
-```
@@ -1,278 +0,0 @@
-# Using Subtasks in LeRobot Datasets
-
-Subtask support in robotics datasets has proven effective in improving robot reasoning and understanding. Subtasks are particularly useful for:
-
- **Hierarchical policies**: Building policies that include subtask predictions to visualize robot reasoning in real time
- **Reward modeling**: Helping reward models understand task progression (e.g., SARM-style stage-aware reward models)
- **Task decomposition**: Breaking down complex manipulation tasks into atomic, interpretable steps
-
-LeRobotDataset now supports subtasks as part of its dataset structure, alongside tasks.
-
-## What are Subtasks?
-
-While a **task** describes the overall goal (e.g., "Pick up the apple and place it in the basket"), **subtasks** break down the execution into finer-grained steps:
-
-1. "Approach the apple"
-2. "Grasp the apple"
-3. "Lift the apple"
-4. "Move to basket"
-5. "Release the apple"
-
-Each frame in the dataset can be annotated with its corresponding subtask, enabling models to learn and predict these intermediate stages.
-
-<img
-  src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/subtask-asset.png"
-  alt="An overview of subtask annotation showing how frames are labeled with intermediate subtask stages"
-  width="80%"
-/>
-
-<p>
-  <em>Figure: Overview of subtask annotation.</em>
-</p>
-
-**Reference:** _Subtask-learning based for robot self-assembly in flexible collaborative assembly in manufacturing_, Original Article, Published: 19 April 2022.
-
-## Dataset Structure
-
-Subtask information is stored in the dataset metadata:
-
-```
-my-dataset/
-├── data/
-│   └── ...
-├── meta/
-│   ├── info.json
-│   ├── stats.json
-│   ├── tasks.parquet
-│   ├── subtasks.parquet      # Subtask index → subtask string mapping
-│   └── episodes/
-│       └── ...
-└── videos/
-    └── ...
-```
-
-### Subtasks Parquet File
-
-The `meta/subtasks.parquet` file maps subtask indices to their natural language descriptions:
-
-| subtask_index | subtask (index column) |
-| ------------- | ---------------------- |
-| 0             | "Approach the apple"   |
-| 1             | "Grasp the apple"      |
-| 2             | "Lift the apple"       |
-| ...           | ...                    |
-
-### Frame-Level Annotations
-
-Each frame in the dataset can include a `subtask_index` field that references the subtasks parquet file:
-
-```python
-# Example frame data in the parquet file
-{
-    "index": 42,
-    "timestamp": 1.4,
-    "episode_index": 0,
-    "task_index": 0,
-    "subtask_index": 2,  # References "Lift the apple"
-    "observation.state": [...],
-    "action": [...],
-}
-```
-
-## Annotating Datasets with Subtasks
-
-We provide a HuggingFace Space for easily annotating any LeRobotDataset with subtasks:
-
-**[https://huggingface.co/spaces/lerobot/annotate](https://huggingface.co/spaces/lerobot/annotate)**
-
-After completing your annotation:
-
-1. Click "Push to Hub" to upload your annotated dataset
-2. You can also run the annotation space locally by following the instructions at [github.com/huggingface/lerobot-annotate](https://github.com/huggingface/lerobot-annotate)
-
-## Loading Datasets with Subtasks
-
-When you load a dataset with subtask annotations, the subtask information is automatically available:
-
-```python
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
-
-# Load a dataset with subtask annotations
-dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated")
-
-# Access a sample
-sample = dataset[100]
-
-# The sample includes both task and subtask information
-print(sample["task"])        # "Collect the fruit"
-print(sample["subtask"])     # "Grasp the apple"
-print(sample["task_index"])  # tensor(0)
-print(sample["subtask_index"])  # tensor(2)
-```
-
-### Checking for Subtask Support
-
-You can check if a dataset has subtask annotations:
-
-```python
-# Check if subtasks are available
-has_subtasks = (
-    "subtask_index" in dataset.features
-    and dataset.meta.subtasks is not None
-)
-
-if has_subtasks:
-    print(f"Dataset has {len(dataset.meta.subtasks)} unique subtasks")
-    print("Subtasks:", list(dataset.meta.subtasks.index))
-```
-
-## Using Subtasks for Training
-
-### With the Tokenizer Processor
-
-The `TokenizerProcessor` automatically handles subtask tokenization for Vision-Language Action (VLA) models:
-
-```python
-from lerobot.processor.tokenizer_processor import TokenizerProcessor
-from lerobot.processor.pipeline import ProcessorPipeline
-
-# Create a tokenizer processor
-tokenizer_processor = TokenizerProcessor(
-    tokenizer_name_or_path="google/paligemma-3b-pt-224",
-    padding="max_length",
-    max_length=64,
-)
-
-# The processor will automatically tokenize subtasks if present in the batch
-# and add them to the observation under:
-# - "observation.subtask.tokens"
-# - "observation.subtask.attention_mask"
-```
-
-When subtasks are available in the batch, the tokenizer processor adds:
-
- `observation.subtask.tokens`: Tokenized subtask text
- `observation.subtask.attention_mask`: Attention mask for the subtask tokens
-
-### DataLoader with Subtasks
-
-```python
-import torch
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
-
-dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated")
-
-dataloader = torch.utils.data.DataLoader(
-    dataset,
-    batch_size=16,
-    shuffle=True,
-)
-
-for batch in dataloader:
-    # Access subtask information in the batch
-    subtasks = batch["subtask"]  # List of subtask strings
-    subtask_indices = batch["subtask_index"]  # Tensor of subtask indices
-
-    # Use for training hierarchical policies or reward models
-    print(f"Batch subtasks: {set(subtasks)}")
-```
-
-## Example Datasets with Subtask Annotations
-
-Try loading a dataset with subtask annotations:
-
-```python
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
-
-# Example dataset with subtask annotations
-dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated")
-
-# Explore the subtasks
-print("Available subtasks:")
-for subtask_name in dataset.meta.subtasks.index:
-    print(f"  - {subtask_name}")
-
-# Get subtask distribution
-subtask_counts = {}
-for i in range(len(dataset)):
-    sample = dataset[i]
-    subtask = sample["subtask"]
-    subtask_counts[subtask] = subtask_counts.get(subtask, 0) + 1
-
-print("\nSubtask distribution:")
-for subtask, count in sorted(subtask_counts.items(), key=lambda x: -x[1]):
-    print(f"  {subtask}: {count} frames")
-```
-
-## Use Cases
-
-### 1. Hierarchical Policy Training
-
-Train policies that predict both actions and current subtask:
-
-```python
-class HierarchicalPolicy(nn.Module):
-    def __init__(self, num_subtasks):
-        super().__init__()
-        self.action_head = nn.Linear(hidden_dim, action_dim)
-        self.subtask_head = nn.Linear(hidden_dim, num_subtasks)
-
-    def forward(self, observations):
-        features = self.encoder(observations)
-        actions = self.action_head(features)
-        subtask_logits = self.subtask_head(features)
-        return actions, subtask_logits
-```
-
-### 2. Stage-Aware Reward Modeling (SARM)
-
-Build reward models that understand task progression:
-
-```python
-# SARM predicts:
-# - Stage: Which subtask is being executed (discrete)
-# - Progress: How far along the subtask (continuous 0-1)
-
-class SARMRewardModel(nn.Module):
-    def forward(self, observations):
-        features = self.encoder(observations)
-        stage_logits = self.stage_classifier(features)
-        progress = self.progress_regressor(features)
-        return stage_logits, progress
-```
-
-### 3. Progress Visualization
-
-Monitor robot execution by tracking subtask progression:
-
-```python
-def visualize_execution(model, observations):
-    for t, obs in enumerate(observations):
-        action, subtask_logits = model(obs)
-        predicted_subtask = subtask_names[subtask_logits.argmax()]
-        print(f"t={t}: Executing '{predicted_subtask}'")
-```
-
-## API Reference
-
-### LeRobotDataset Properties
-
-| Property                    | Type                   | Description                                |
-| --------------------------- | ---------------------- | ------------------------------------------ |
-| `meta.subtasks`             | `pd.DataFrame \| None` | DataFrame mapping subtask names to indices |
-| `features["subtask_index"]` | `dict`                 | Feature spec for subtask_index if present  |
-
-### Sample Keys
-
-When subtasks are available, each sample includes:
-
-| Key             | Type           | Description                          |
-| --------------- | -------------- | ------------------------------------ |
-| `subtask_index` | `torch.Tensor` | Integer index of the current subtask |
-| `subtask`       | `str`          | Natural language subtask description |
-
-## Related Resources
-
- [SARM Paper](https://arxiv.org/pdf/2509.25358) - Stage-Aware Reward Modeling for Long Horizon Robot Manipulation
- [LeRobot Annotate Space](https://huggingface.co/spaces/lerobot/annotate) - Interactive annotation tool
- [LeRobotDataset v3.0](./lerobot-dataset-v3) - Dataset format documentation
@@ -1,11 +1,5 @@
 # EarthRover Mini Plus

-<img
-  src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/Earth_Rover_Mini_5_240c9adc-4f9e-44b7-982f-5d1dc24af1d8.png.webp"
-  alt="EarthRover Mini Plus"
-  width="70%"
-/>
-
 The EarthRover Mini Plus is a fully open source mobile robot that connects through the cloud using the Frodobots SDK. This lets you control the robot and record datasets for training AI models.

 ## What You Need
@@ -13,7 +7,7 @@ The EarthRover Mini Plus is a fully open source mobile robot that connects throu
 ### Hardware

 - EarthRover Mini robot
- Computer with Python 3.12 or newer
+- Computer with Python 3.10 or newer
 - Internet connection

 ### Setting Up the Frodobots SDK
@@ -170,13 +164,13 @@ Once you can drive the robot well, you can start recording data to train AI mode
 We use Hugging Face to store your data online. First, log in with your token from [Hugging Face settings](https://huggingface.co/settings/tokens):

 ```bash
-hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
+huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
 ```

 Store your Hugging Face username:

 ```bash
-HF_USER=$(hf auth whoami | awk -F': *' 'NR==1 {print $2}')
+HF_USER=$(huggingface-cli whoami | head -n 1)
 echo $HF_USER
 ```

@@ -185,16 +179,13 @@ echo $HF_USER
 Use the standard recording command:

 ```bash
-lerobot-record \
+python src/lerobot/scripts/lerobot_record.py \
    --robot.type=earthrover_mini_plus \
    --teleop.type=keyboard_rover \
    --dataset.repo_id=your_username/dataset_name \
    --dataset.num_episodes=2 \
    --dataset.fps=10 \
    --dataset.single_task="Navigate around obstacles" \
-    --dataset.streaming_encoding=true \
-    --dataset.encoder_threads=2 \
-    # --dataset.vcodec=auto \
    --display_data=true
 ```

@@ -204,26 +195,22 @@ Replace `your_username/dataset_name` with your Hugging Face username and a name

 Your dataset includes:

-**Your Actions (2 features)**:
+**Your Actions (2 things)**:

- `linear_velocity`: How much you moved forward/backward
- `angular_velocity`: How much you turned left/right
+- How much you moved forward/backward
+- How much you turned left/right

-**Robot Observations (24 features)**:
+**Robot Observations (12 things)**:

 - Front camera video
 - Rear camera video
 - Current speed
 - Battery level
- Orientation
- GPS (latitude, longitude, signal strength)
+- Which way the robot is facing
+- GPS location (latitude, longitude, signal strength)
 - Network signal strength
 - Vibration level
- Lamp state (on/off)
- Accelerometer (x, y, z)
- Gyroscope (x, y, z)
- Magnetometer (x, y, z)
- Wheel RPMs (4 wheels)
+- Lamp status (on/off)

 ### Where Your Data Goes

@@ -155,10 +155,10 @@ Upload your repository to Hugging Face:
 pip install huggingface_hub

 # Login to Hugging Face
-hf auth login
+huggingface-cli login

 # Create a new repository
-hf repo create my-org/my-custom-env
+huggingface-cli repo create my-custom-env --type space --org my-org

 # Initialize git and push
 git init
@@ -120,12 +120,9 @@ lerobot-record \
  --display_data=true \
  --dataset.repo_id=<user>/eval_groot-bimanual  \
  --dataset.num_episodes=10 \
-  --dataset.single_task="Grab and handover the red cube to the other arm" \
-  --dataset.streaming_encoding=true \
-  --dataset.encoder_threads=2 \
-  # --dataset.vcodec=auto \
-  --policy.path=<user>/groot-bimanual \ # your trained model
-  --dataset.episode_time_s=30 \
+  --dataset.single_task="Grab and handover the red cube to the other arm"
+  --policy.path=<user>/groot-bimanual # your trained model
+  --dataset.episode_time_s=30
  --dataset.reset_time_s=10
 ```

@@ -224,15 +224,12 @@ lerobot-record \
    --teleop.port=/dev/tty.usbmodem1201 \
    --teleop.id=right \
    --teleop.side=right \
-    --dataset.repo_id=<USER>/hand_record_test_with_video_data \
+    --dataset.repo_id=nepyope/hand_record_test_with_video_data \
    --dataset.single_task="Hand recording test with video data" \
    --dataset.num_episodes=1 \
    --dataset.episode_time_s=5 \
    --dataset.push_to_hub=true \
    --dataset.private=true \
-    --dataset.streaming_encoding=true \
-    --dataset.encoder_threads=2 \
-    # --dataset.vcodec=auto \
    --display_data=true
 ```

@@ -244,7 +241,7 @@ lerobot-replay \
    --robot.port=/dev/tty.usbmodem58760432281 \
    --robot.id=right \
    --robot.side=right \
-    --dataset.repo_id=<USER>/hand_record_test_with_camera \
+    --dataset.repo_id=nepyope/hand_record_test_with_camera \
    --dataset.episode=0
 ```

@@ -252,13 +249,13 @@ lerobot-replay \

 ```bash
 lerobot-train \
-  --dataset.repo_id=<USER>/hand_record_test_with_video_data \
+  --dataset.repo_id=nepyope/hand_record_test_with_video_data \
  --policy.type=act \
  --output_dir=outputs/train/hopejr_hand \
  --job_name=hopejr \
  --policy.device=mps \
  --wandb.enable=true \
-  --policy.repo_id=<USER>/hand_test_policy
+  --policy.repo_id=nepyope/hand_test_policy
 ```

 ### Evaluate
@@ -273,11 +270,8 @@ lerobot-record \
  --robot.side=right \
  --robot.cameras='{"main": {"type": "opencv", "index_or_path": 0, "width": 640, "height": 480, "fps": 30}}' \
  --display_data=false \
-  --dataset.repo_id=<USER>/eval_hopejr \
+  --dataset.repo_id=nepyope/eval_hopejr \
  --dataset.single_task="Evaluate hopejr hand policy" \
  --dataset.num_episodes=10 \
-  --dataset.streaming_encoding=true \
-  --dataset.encoder_threads=2 \
-  # --dataset.vcodec=auto \
  --policy.path=outputs/train/hopejr_hand/checkpoints/last/pretrained_model
 ```
@@ -159,13 +159,13 @@ We use the Hugging Face hub features for uploading your dataset. If you haven't
 Add your token to the CLI by running this command:

 ```bash
-hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
+huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
 ```

 Then store your Hugging Face repository name in a variable:

 ```bash
-HF_USER=$(NO_COLOR=1 hf auth whoami | awk -F': *' 'NR==1 {print $2}')
+HF_USER=$(hf auth whoami | head -n 1)
 echo $HF_USER
 ```

@@ -185,10 +185,7 @@ lerobot-record \
    --display_data=true \
    --dataset.repo_id=${HF_USER}/record-test \
    --dataset.num_episodes=5 \
-    --dataset.single_task="Grab the black cube" \
-    --dataset.streaming_encoding=true \
-    # --dataset.vcodec=auto \
-    --dataset.encoder_threads=2
+    --dataset.single_task="Grab the black cube"
 ```
 </hfoption>
 <hfoption id="API example">
@@ -327,7 +324,7 @@ You can look for other LeRobot datasets on the hub by searching for `LeRobot` [t
 You can also push your local dataset to the Hub manually, running:

 ```bash
-hf upload ${HF_USER}/record-test ~/.cache/huggingface/lerobot/{repo-id} --repo-type dataset
+huggingface-cli upload ${HF_USER}/record-test ~/.cache/huggingface/lerobot/{repo-id} --repo-type dataset
 ```

 #### Record function
@@ -491,7 +488,7 @@ If your local computer doesn't have a powerful GPU you could utilize Google Cola
 Once training is done, upload the latest checkpoint with:

 ```bash
-hf upload ${HF_USER}/act_so101_test \
+huggingface-cli upload ${HF_USER}/act_so101_test \
  outputs/train/act_so101_test/checkpoints/last/pretrained_model
 ```

@@ -499,7 +496,7 @@ You can also upload intermediate checkpoints with:

 ```bash
 CKPT=010000
-hf upload ${HF_USER}/act_so101_test${CKPT} \
+huggingface-cli upload ${HF_USER}/act_so101_test${CKPT} \
  outputs/train/act_so101_test/checkpoints/${CKPT}/pretrained_model
 ```

@@ -518,9 +515,6 @@ lerobot-record  \
  --display_data=false \
  --dataset.repo_id=${HF_USER}/eval_so100 \
  --dataset.single_task="Put lego brick into the transparent box" \
-  --dataset.streaming_encoding=true \
-  --dataset.encoder_threads=2 \
-  # --dataset.vcodec=auto \
  # <- Teleop optional if you want to teleoperate in between episodes \
  # --teleop.type=so100_leader \
  # --teleop.port=/dev/ttyACM0 \
@@ -1,57 +1,30 @@
 # Installation

-This guide uses `conda` (via miniforge) to manage environments (recommended). If you prefer another environment manager (e.g. `uv`, `venv`), ensure you have Python >=3.12 and `ffmpeg` installed with the `libsvtav1` encoder, then skip ahead to [Environment Setup](#step-2-environment-setup).
-
-## Step 1 (`conda` only): Install [`miniforge`](https://conda-forge.org/download/)
+## Install [`miniforge`](https://conda-forge.org/download/)

 ```bash
 wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
 bash Miniforge3-$(uname)-$(uname -m).sh
 ```

-## Step 2: Environment Setup
+## Environment Setup

-Create a virtual environment with Python 3.12:
+Create a virtual environment with Python 3.10, using conda:

-<!-- prettier-ignore-start -->
-<hfoptions id="create_venv">
-<hfoption id="conda">
 ```bash
-conda create -y -n lerobot python=3.12
+conda create -y -n lerobot python=3.10
 ```
-</hfoption>
-<hfoption id="uv">
+
+Then activate your conda environment, you have to do this each time you open a shell to use lerobot:
+
 ```bash
-uv python install 3.12
-uv venv --python 3.12
-```
-</hfoption>
-</hfoptions>
-<!-- prettier-ignore-end -->
-
-Then activate your virtual environment, you have to do this each time you open a shell to use lerobot:
-
-<!-- prettier-ignore-start -->
-<hfoptions id="activate_venv">
-<hfoption id="conda">```bash
 conda activate lerobot
-```</hfoption>
-<hfoption id="uv">
-```bash
-# Linux/macOSsource
-source .venv/bin/activate
-# Windows PowerShell
-source .venv\Scripts\Activate.ps1
 ```
-</hfoption>
-</hfoptions>
-<!-- prettier-ignore-end -->

 When using `conda`, install `ffmpeg` in your environment:

 ```bash
 conda install ffmpeg -c conda-forge
-ffmpeg -version  # ffmpeg 8.X is not yet supported !
 ```

 > [!TIP]
@@ -65,17 +38,7 @@ ffmpeg -version  # ffmpeg 8.X is not yet supported !
 >
 > - _[On Linux only]_ If you want to bring your own ffmpeg: Install [ffmpeg build dependencies](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#GettheDependencies) and [compile ffmpeg from source with libsvtav1](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#libsvtav1), and make sure you use the corresponding ffmpeg binary to your install with `which ffmpeg`.

-> [!NOTE]
-> When installing LeRobot inside WSL (Windows Subsystem for Linux), make sure to install `evdev` with the following command:
->
-> ```bash
-> conda install evdev -c conda-forge
-> ```
-
-> [!IMPORTANT]
-> If you are using `uv` you will have to install `ffmpeg` system-wide (outside of the virtual environment). You rely on `uv` and `torchcodec` ability to dynamically link to the system `ffmpeg`.
-
-## Step 3: Install LeRobot 🤗
+## Install LeRobot 🤗

 ### From Source

@@ -88,45 +51,23 @@ cd lerobot

 Then, install the library in editable mode. This is useful if you plan to contribute to the code.

-<!-- prettier-ignore-start -->
-<hfoptions id="install_lerobot_src">
-<hfoption id="conda">
 ```bash
 pip install -e .
 ```
-</hfoption>
-<hfoption id="uv">
-```bash
-uv pip install -e .
-```
-</hfoption>
-</hfoptions>
-<!-- prettier-ignore-end -->

 ### Installation from PyPI

 **Core Library:**
 Install the base package with:

-<!-- prettier-ignore-start -->
-<hfoptions id="install_lerobot_pypi">
-<hfoption id="conda">
 ```bash
 pip install lerobot
 ```
-</hfoption>
-<hfoption id="uv">
-```bash
-uv pip install lerobot
-```
-</hfoption>
-</hfoptions>
-<!-- prettier-ignore-end -->

 _This installs only the default dependencies._

 **Extra Features:**
-To install additional functionality, use one of the following (If you are using `uv`, replace `pip install` with `uv pip install` in the commands below.):
+To install additional functionality, use one of the following:

 ```bash
 pip install 'lerobot[all]'          # All available features
@@ -140,10 +81,13 @@ _Replace `[...]` with your desired features._
 For a full list of optional dependencies, see:
 https://pypi.org/project/lerobot/

+> [!NOTE]
+> For lerobot 0.4.0, if you want to install pi, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`
+
 ### Troubleshooting

 If you encounter build errors, you may need to install additional dependencies: `cmake`, `build-essential`, and `ffmpeg libs`.
-To install these for Linux run:
+To install these for linux run:

 ```bash
 sudo apt-get install cmake build-essential python3-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev
@@ -153,7 +97,7 @@ For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/

 ## Optional dependencies

-LeRobot provides optional extras for specific functionalities. Multiple extras can be combined (e.g., `.[aloha,feetech]`). For all available extras, refer to `pyproject.toml`. If you are using `uv`, replace `pip install` with `uv pip install` in the commands below.
+LeRobot provides optional extras for specific functionalities. Multiple extras can be combined (e.g., `.[aloha,feetech]`). For all available extras, refer to `pyproject.toml`.

 ### Simulations

@@ -1,11 +1,5 @@
 # LeKiwi

-<img
-  src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/1740517739083.jpeg"
-  alt="LeKiwi"
-  width="70%"
-/>
-
 In the steps below, we explain how to assemble the LeKiwi mobile robot.

 ## Source the parts
@@ -279,13 +273,13 @@ We use the Hugging Face hub features for uploading your dataset. If you haven't
 Add your token to the CLI by running this command:

 ```bash
-hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
+huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
 ```

 Then store your Hugging Face repository name in a variable:

 ```bash
-HF_USER=$(hf auth whoami | awk -F': *' 'NR==1 {print $2}')
+HF_USER=$(huggingface-cli whoami | head -n 1)
 echo $HF_USER
 ```

@@ -41,10 +41,7 @@ lerobot-record \
  --display_data=true \
  --dataset.repo_id=${HF_USER}/record-test \
  --dataset.num_episodes=5 \
-  --dataset.single_task="Grab the black cube" \
-  --dataset.streaming_encoding=true \
-  # --dataset.vcodec=auto \
-  --dataset.encoder_threads=2
+  --dataset.single_task="Grab the black cube"
 ```

 See the [recording guide](./il_robots#record-a-dataset) for more details.
@@ -42,7 +42,6 @@ lerobot-eval \
 ```

 - `--env.task` picks the suite (`libero_object`, `libero_spatial`, etc.).
- `--env.task_ids` picks task ids to run (`[0]`, `[1,2,3]`, etc.). Omit this flag (or set it to `null`) to run all tasks in the suite.
 - `--eval.batch_size` controls how many environments run in parallel.
 - `--eval.n_episodes` sets how many episodes to run in total.

@@ -1,197 +0,0 @@
-## Order and Assemble the parts
-
-First, assemble the OMX hardware following the official assembly guide.
-
-OMX Assembly Guide: https://ai.robotis.com/omx/assembly_guide_omx.html
-
-OMX robots are shipped preconfigured from the factory. Motor IDs, communication parameters, and joint offsets are already set, so no additional motor setup or calibration is required before using LeRobot.
-
-## Install LeRobot 🤗
-
-To install LeRobot, follow our [Installation Guide](./installation)
-
-In addition to these instructions, you need to install the Dynamixel SDK:
-
-```bash
-pip install -e ".[dynamixel]"
-```
-
-## Connect the robot
-
-To find the port for each bus servo adapter, run this script:
-
-```bash
-lerobot-find-port
-```
-
-This command runs and when prompted, disconnect the USB cable from either the leader or follower arm and press Enter. The output will show 'The port of this MotorsBus is [port]'. This identifies the port for the disconnected arm. Repeat for the other arm to identify both ports.
-
-<hfoptions id="find_port">
-<hfoption id="Mac">
-
-Example output on macOS:
-
-```
-Finding all available ports for the MotorBus.
-['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
-Remove the USB cable from your MotorsBus and press Enter when done.
-
-[...Disconnect corresponding leader or follower arm and press Enter...]
-
-The port of this MotorsBus is /dev/tty.usbmodem575E0032081
-Reconnect the USB cable.
-```
-
-Where the found port is: `/dev/tty.usbmodem575E0032081` corresponding to your leader or follower arm.
-
-</hfoption>
-<hfoption id="Linux">
-
-On Linux, we strongly recommend using udev rules to assign persistent and human-readable device names to the OMX leader and follower arms. This avoids issues where device names such as ttyACM0 and ttyACM1 change when the robot is unplugged, replugged, or when the system is rebooted.
-
-#### 1. Find your device serial numbers
-
-You should have obtained the port numbers like ../../ttyACM? for the leader and follower using `lerobot-find-port`. You can match those results with the serial numbers using the `ls -l /dev/serial/by-id/` command.
-To create udev rules, you need the unique serial number for each OMX device. The easiest way is to list devices under:
-
-```bash
-ls -l /dev/serial/by-id/
-```
-
-You will see output similar to:
-
-```bash
-usb-ROBOTIS_OpenRB-150_228BDD7B503059384C2E3120FF0A2B19-if00 -> ../../ttyACM0
-usb-ROBOTIS_OpenRB-150_67E1ED68503059384C2E3120FF092234-if00 -> ../../ttyACM1
-```
-
-In each line, the serial number is the long string after `usb-ROBOTIS_OpenRB-150_` and before `-if00`.
-
-Follower serial: `228BDD7B503059384C2E3120FF0A2B19`
-
-Leader serial: `67E1ED68503059384C2E3120FF092234`
-
-#### 2. Create the udev rule
-
-Create a new udev rule file:
-
-```bash
-sudo nano /etc/udev/rules.d/99-omx.rules
-```
-
-Paste the following lines, replacing the serial numbers with the values you found above:
-
-```bash
-SUBSYSTEM=="tty", ATTRS{idVendor}=="0403", ATTRS{serial}=="228BDD7B503059384C2E3120FF0A2B19", SYMLINK+="omx_follower"
-SUBSYSTEM=="tty", ATTRS{idVendor}=="0403", ATTRS{serial}=="67E1ED68503059384C2E3120FF092234", SYMLINK+="omx_leader"
-```
-
-Save the file and reload udev rules:
-
-```bash
-sudo udevadm control --reload-rules
-sudo udevadm trigger
-```
-
-Now unplug and replug both devices once.
-
-#### 3. Verify the symlinks
-
-Check that the persistent device names exist:
-
-```bash
-ls -l /dev/omx_follower /dev/omx_leader
-```
-
-You should see them pointing to ttyACM\* devices:
-
-```bash
-/dev/omx_follower -> ttyACM*
-/dev/omx_leader   -> ttyACM*
-```
-
-These names remain stable across reboots and reconnections.
-
-</hfoption>
-</hfoptions>
-
-## Teleoperate
-
-After identifying the correct ports, you can directly teleoperate the follower arm using the leader arm.
-
-<hfoptions id="teleoperate">
-<hfoption id="Mac">
-
-### Teleoperate without camera
-
-```bash
-lerobot-teleoperate \
-  --robot.type=omx_follower \
-  --robot.port=<your_follower_port> \
-  --robot.id=omx_follower_arm \
-  --teleop.type=omx_leader \
-  --teleop.port=<your_leader_port> \
-  --teleop.id=omx_leader_arm
-```
-
-During teleoperation, motions of the leader arm are mirrored in real time by the follower arm. OMX is already preconfigured, teleoperation can begin immediately without any calibration steps.
-
-### Teleoperate with camera
-
-You can also enable camera input during teleoperation by providing a camera configuration for the follower arm.
-
-```bash
-lerobot-teleoperate \
-  --robot.type=omx_follower \
-  --robot.port=<your_follower_port> \
-  --robot.id=omx_follower_arm \
-  --robot.cameras="{front: {type: opencv, index_or_path: '/dev/video0', width: 640, height: 480, fps: 30}}" \
-  --teleop.type=omx_leader \
-  --teleop.port=<your_leader_port> \
-  --teleop.id=omx_leader_arm \
-  --display_data=true
-```
-
-When the camera is enabled, the camera stream is displayed in real time and synchronized with the robot state. This setup is useful for visual monitoring and can be reused later for demonstration recording and imitation learning.
-
-</hfoption>
-<hfoption id="Linux">
-
-### Teleoperate without camera
-
-```bash
-lerobot-teleoperate \
-  --robot.type=omx_follower \
-  --robot.port=/dev/omx_follower \
-  --robot.id=omx_follower_arm \
-  --teleop.type=omx_leader \
-  --teleop.port=/dev/omx_leader \
-  --teleop.id=omx_leader_arm
-```
-
-During teleoperation, motions of the leader arm are mirrored in real time by the follower arm. OMX is already preconfigured, teleoperation can begin immediately without any calibration steps.
-
-### Teleoperate with camera
-
-You can also enable camera input during teleoperation by providing a camera configuration for the follower arm.
-
-```bash
-lerobot-teleoperate \
-  --robot.type=omx_follower \
-  --robot.port=/dev/omx_follower \
-  --robot.id=omx_follower_arm \
-  --robot.cameras="{front: {type: opencv, index_or_path: '/dev/video0', width: 640, height: 480, fps: 30}}" \
-  --teleop.type=omx_leader \
-  --teleop.port=/dev/omx_leader \
-  --teleop.id=omx_leader_arm \
-  --display_data=true
-```
-
-When the camera is enabled, the camera stream is displayed in real time and synchronized with the robot state. This setup is useful for visual monitoring and can be reused later for demonstration recording and imitation learning.
-
-</hfoption>
-</hfoptions>
-
-Congrats 🎉, your robot is all set to learn a task on its own.
-
-> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/robotis).
@@ -1,276 +0,0 @@
-# OpenArm
-
-[OpenArm](https://openarm.dev) is an open-source 7DOF humanoid arm designed for physical AI research and deployment.
-
-To get your OpenArm, assembled or DIY, and join the global community, browse verified and certified manufacturers worldwide at [openarm.dev](https://openarm.dev).
-
-## What's Unique?
-
- **Human-Scale Design**: OpenArm is designed with human-like proportions, scaled for a person around 160-165cm tall. This provides an optimal balance between practical reach and manageable inertia for safe, responsive operation.
-
- **Safety-First Architecture**: Built with QDD backdrivable motors and high compliance, OpenArm prioritizes safe human-robot interaction while maintaining practical payload capabilities (6.0kg peak / 4.1kg nominal) for real-world tasks.
-
- **Built for Durability**: Critical structural components use aluminum and stainless steel construction, ensuring robust performance for repetitive data collection and continuous research use.
-
- **Fully Accessible & Buildable**: Every component, from CNC parts and 3D-printed casings to electrical wiring is designed to be purchasable and buildable by individual researchers and labs, with complete fabrication data provided.
-
- **Practical & Affordable**: At $6,500 USD for a complete bimanual system, OpenArm delivers research-grade capabilities at a fraction of traditional humanoid robot costs.
-
-## Platform Requirements
-
-<Tip warning={true}>
-  **Linux Only**: OpenArm currently only works on Linux. The CAN bus USB adapter
-  does not have macOS drivers and has not been tested on Windows.
-</Tip>
-
-## Safety Guide
-
-Before operating OpenArm, please read the [official safety guide](https://docs.openarm.dev/getting-started/safety-guide). Key points:
-
- **Secure installation**: Fasten the arm to a flat, stable surface with screws or clamps
- **Safe distance**: Keep body parts and objects outside the range of motion during operation
- **Protective equipment**: Always wear safety goggles; use additional PPE as needed
- **Payload limits**: Do not exceed specified payload limits (6.0kg peak / 4.1kg nominal per arm)
- **Emergency stop**: Know the location and operation of the emergency stop device
- **Regular inspection**: Check for loose screws, damaged mechanical limits, unusual noises, and wiring damage
-
-## Hardware Setup
-
-Follow the official [OpenArm hardware documentation](https://docs.openarm.dev) for:
-
- Bill of materials and sourcing
- 3D printing instructions
- Mechanical assembly
- Electrical wiring
-
-The hardware repositories are available at [github.com/enactic/openarm](https://github.com/enactic/openarm).
-
-## CAN Bus Setup
-
-OpenArm uses CAN bus communication with Damiao motors. Once you have the CAN bus USB adapter plugged into your Linux PC, follow the [Damiao Motors and CAN Bus guide](./damiao) to configure the interface.
-
-Quick setup:
-
-```bash
-# Setup CAN interfaces
-lerobot-setup-can --mode=setup --interfaces=can0,can1
-
-# Test motor communication
-lerobot-setup-can --mode=test --interfaces=can0,can1
-```
-
-## Install LeRobot 🤗
-
-Follow our [Installation Guide](./installation), then install the Damiao motor support:
-
-```bash
-pip install -e ".[damiao]"
-```
-
-## Usage
-
-### Follower Arm (Robot)
-
-<hfoptions id="follower">
-<hfoption id="Command">
-
-```bash
-lerobot-calibrate \
-    --robot.type=openarm_follower \
-    --robot.port=can0 \
-    --robot.side=right \
-    --robot.id=my_openarm_follower
-```
-
-</hfoption>
-<hfoption id="API example">
-
-```python
-from lerobot.robots.openarm_follower import OpenArmFollower, OpenArmFollowerConfig
-
-config = OpenArmFollowerConfig(
-    port="can0",
-    side="right",  # or "left" for left arm
-    id="my_openarm_follower",
-)
-
-follower = OpenArmFollower(config)
-follower.connect()
-
-# Read current state
-obs = follower.get_observation()
-print(obs)
-
-# Send action (position in degrees)
-action = {
-    "joint_1.pos": 0.0,
-    "joint_2.pos": 0.0,
-    "joint_3.pos": 0.0,
-    "joint_4.pos": 45.0,
-    "joint_5.pos": 0.0,
-    "joint_6.pos": 0.0,
-    "joint_7.pos": 0.0,
-    "gripper.pos": 0.0,
-}
-follower.send_action(action)
-
-follower.disconnect()
-```
-
-</hfoption>
-</hfoptions>
-
-### Leader Arm (Teleoperator)
-
-The leader arm is used for teleoperation - manually moving it to control the follower arm.
-
-<hfoptions id="leader">
-<hfoption id="Command">
-
-```bash
-lerobot-calibrate \
-    --teleop.type=openarm_leader \
-    --teleop.port=can1 \
-    --teleop.id=my_openarm_leader
-```
-
-</hfoption>
-<hfoption id="API example">
-
-```python
-from lerobot.teleoperators.openarm_leader import OpenArmLeader, OpenArmLeaderConfig
-
-config = OpenArmLeaderConfig(
-    port="can1",
-    id="my_openarm_leader",
-    manual_control=True,  # Disable torque for manual movement
-)
-
-leader = OpenArmLeader(config)
-leader.connect()
-
-# Read current position (as action to send to follower)
-action = leader.get_action()
-print(action)
-
-leader.disconnect()
-```
-
-</hfoption>
-</hfoptions>
-
-### Teleoperation
-
-To teleoperate OpenArm with leader-follower control:
-
-```bash
-lerobot-teleoperate \
-    --robot.type=openarm_follower \
-    --robot.port=can0 \
-    --robot.side=right \
-    --robot.id=my_follower \
-    --teleop.type=openarm_leader \
-    --teleop.port=can1 \
-    --teleop.id=my_leader
-```
-
-### Bimanual Teleoperation
-
-To teleoperate a bimanual OpenArm setup with two leader and two follower arms:
-
-```bash
-lerobot-teleoperate \
-    --robot.type=bi_openarm_follower \
-    --robot.left_arm_config.port=can0 \
-    --robot.left_arm_config.side=left \
-    --robot.right_arm_config.port=can1 \
-    --robot.right_arm_config.side=right \
-    --robot.id=my_bimanual_follower \
-    --teleop.type=bi_openarm_leader \
-    --teleop.left_arm_config.port=can2 \
-    --teleop.right_arm_config.port=can3 \
-    --teleop.id=my_bimanual_leader
-```
-
-### Recording Data
-
-To record a dataset during teleoperation:
-
-```bash
-lerobot-record \
-    --robot.type=openarm_follower \
-    --robot.port=can0 \
-    --robot.side=right \
-    --robot.id=my_follower \
-    --teleop.type=openarm_leader \
-    --teleop.port=can1 \
-    --teleop.id=my_leader \
-    --repo-id=my_hf_username/my_openarm_dataset \
-    --fps=30 \
-    --num-episodes=10
-```
-
-## Configuration Options
-
-### Follower Configuration
-
-| Parameter             | Default   | Description                                                |
-| --------------------- | --------- | ---------------------------------------------------------- |
-| `port`                | -         | CAN interface (e.g., `can0`)                               |
-| `side`                | `None`    | Arm side: `"left"`, `"right"`, or `None` for custom limits |
-| `use_can_fd`          | `True`    | Enable CAN FD for higher data rates                        |
-| `can_bitrate`         | `1000000` | Nominal bitrate (1 Mbps)                                   |
-| `can_data_bitrate`    | `5000000` | CAN FD data bitrate (5 Mbps)                               |
-| `max_relative_target` | `None`    | Safety limit for relative target positions                 |
-| `position_kp`         | Per-joint | Position control proportional gains                        |
-| `position_kd`         | Per-joint | Position control derivative gains                          |
-
-### Leader Configuration
-
-| Parameter          | Default   | Description                         |
-| ------------------ | --------- | ----------------------------------- |
-| `port`             | -         | CAN interface (e.g., `can1`)        |
-| `manual_control`   | `True`    | Disable torque for manual movement  |
-| `use_can_fd`       | `True`    | Enable CAN FD for higher data rates |
-| `can_bitrate`      | `1000000` | Nominal bitrate (1 Mbps)            |
-| `can_data_bitrate` | `5000000` | CAN FD data bitrate (5 Mbps)        |
-
-## Motor Configuration
-
-OpenArm uses Damiao motors with the following default configuration:
-
-| Joint                       | Motor Type | Send ID | Recv ID |
-| --------------------------- | ---------- | ------- | ------- |
-| joint_1 (Shoulder pan)      | DM8009     | 0x01    | 0x11    |
-| joint_2 (Shoulder lift)     | DM8009     | 0x02    | 0x12    |
-| joint_3 (Shoulder rotation) | DM4340     | 0x03    | 0x13    |
-| joint_4 (Elbow flex)        | DM4340     | 0x04    | 0x14    |
-| joint_5 (Wrist roll)        | DM4310     | 0x05    | 0x15    |
-| joint_6 (Wrist pitch)       | DM4310     | 0x06    | 0x16    |
-| joint_7 (Wrist rotation)    | DM4310     | 0x07    | 0x17    |
-| gripper                     | DM4310     | 0x08    | 0x18    |
-
-## Troubleshooting
-
-### No Response from Motors
-
-1. Check power supply connections
-2. Verify CAN wiring (CAN-H, CAN-L, GND)
-3. Run diagnostics: `lerobot-setup-can --mode=test --interfaces=can0`
-4. See the [Damiao troubleshooting guide](./damiao#troubleshooting) for more details
-
-### CAN Interface Not Found
-
-Ensure the CAN interface is configured:
-
-```bash
-ip link show can0
-```
-
-## Resources
-
- [OpenArm Website](https://openarm.dev)
- [OpenArm Documentation](https://docs.openarm.dev)
- [OpenArm GitHub](https://github.com/enactic/openarm)
- [Safety Guide](https://docs.openarm.dev/getting-started/safety-guide)
- [Damiao Motors and CAN Bus](./damiao)
@@ -66,13 +66,12 @@ Run on of the examples scripts to teleoperate, record a dataset, replay a datase

 All scripts assume you configured your robot (e.g., SO-100 follower) and set the correct serial port.

-Additionally you need to **copy the URDF of the robot into the examples folder**. For the examples in this tutorial (using SO100/SO101), copy the `SO101` folder from the [SO-ARM100 repo](https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101) into the `examples/phone_to_so100/` directory, so that the URDF file path becomes `examples/phone_to_so100/SO101/so101_new_calib.urdf`.
+Additionally you need to **copy the urdf of the robot to the examples folder**. For the examples in this tutorial (Using SO100/SO101) it is highly recommended to use the urdf in the [SO-ARM100 repo](https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf)

 - Run this example to teleoperate:

  ```bash
-  cd examples/phone_to_so100
-  python teleoperate.py
+  python examples/phone_to_so100/teleoperate.py
  ```

 After running the example:
@@ -85,22 +84,19 @@ Additionally you can customize mapping or safety limits by editing the processor
 - Run this example to record a dataset, which saves absolute end effector observations and actions:

  ```bash
-  cd examples/phone_to_so100
-  python record.py
+  python examples/phone_to_so100/record.py
  ```

 - Run this example to replay recorded episodes:

  ```bash
-  cd examples/phone_to_so100
-  python replay.py
+  python examples/phone_to_so100/replay.py
  ```

 - Run this example to evaluate a pretrained policy:

  ```bash
-  cd examples/phone_to_so100
-  python evaluate.py
+  python examples/phone_to_so100/evaluate.py
  ```

 ### Important pipeline steps and options
@@ -34,6 +34,11 @@ As described by Physical Intelligence, while AI has achieved remarkable success
   pip install -e ".[pi]"
   ```

+   > [!NOTE]
+   > For lerobot 0.4.0, if you want to install pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
+   >
+   > This will be solved in the next patch release
+
 ## Training Data and Capabilities

 π₀ is trained on the largest robot interaction dataset to date, combining three key data sources:
@@ -55,7 +60,7 @@ policy.type=pi0
 For training π₀, you can use the standard LeRobot training script with the appropriate configuration:

 ```bash
-lerobot-train \
+python src/lerobot/scripts/lerobot_train.py \
    --dataset.repo_id=your_dataset \
    --policy.type=pi0 \
    --output_dir=./outputs/pi0_training \
@@ -36,6 +36,11 @@ This diverse training mixture creates a "curriculum" that enables generalization
   pip install -e ".[pi]"
   ```

+   > [!NOTE]
+   > For lerobot 0.4.0, if you want to install pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
+   >
+   > This will be solved in the next patch release
+
 ## Usage

 To use π₀.₅ in your LeRobot configuration, specify the policy type as:
@@ -51,7 +56,7 @@ policy.type=pi05
 Here's a complete training command for finetuning the base π₀.₅ model on your own dataset:

 ```bash
-lerobot-train \
+python src/lerobot/scripts/lerobot_train.py\
    --dataset.repo_id=your_dataset \
    --policy.type=pi05 \
    --output_dir=./outputs/pi05_training \
@@ -43,11 +43,16 @@ This approach can transform **any existing VLM** into a VLA by training it to pr
   pip install -e ".[pi]"
   ```

+   > [!NOTE]
+   > For lerobot 0.4.0, if you want to install the pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
+   >
+   > This will be solved in the next patch release
+
 ## Training a Custom FAST Tokenizer

 You have two options for the FAST tokenizer:

-1. **Use the pre-trained tokenizer**: The `lerobot/fast-action-tokenizer` tokenizer was trained on 1M+ real robot action sequences and works as a general-purpose tokenizer.
+1. **Use the pre-trained tokenizer**: The `physical-intelligence/fast` tokenizer was trained on 1M+ real robot action sequences and works as a general-purpose tokenizer.

 2. **Train your own tokenizer**: For maximum performance on your specific dataset, you can finetune the tokenizer on your own data.

@@ -109,15 +114,15 @@ lerobot-train \

 ### Key Training Parameters

-| Parameter                              | Description                                        | Default                         |
-| -------------------------------------- | -------------------------------------------------- | ------------------------------- |
-| `--policy.gradient_checkpointing=true` | Reduces memory usage significantly during training | `false`                         |
-| `--policy.dtype=bfloat16`              | Use mixed precision training for efficiency        | `float32`                       |
-| `--policy.chunk_size`                  | Number of action steps to predict (action horizon) | `50`                            |
-| `--policy.n_action_steps`              | Number of action steps to execute                  | `50`                            |
-| `--policy.max_action_tokens`           | Maximum number of FAST tokens per action chunk     | `256`                           |
-| `--policy.action_tokenizer_name`       | FAST tokenizer to use                              | `lerobot/fast-action-tokenizer` |
-| `--policy.compile_model=true`          | Enable torch.compile for faster training           | `false`                         |
+| Parameter                              | Description                                        | Default                      |
+| -------------------------------------- | -------------------------------------------------- | ---------------------------- |
+| `--policy.gradient_checkpointing=true` | Reduces memory usage significantly during training | `false`                      |
+| `--policy.dtype=bfloat16`              | Use mixed precision training for efficiency        | `float32`                    |
+| `--policy.chunk_size`                  | Number of action steps to predict (action horizon) | `50`                         |
+| `--policy.n_action_steps`              | Number of action steps to execute                  | `50`                         |
+| `--policy.max_action_tokens`           | Maximum number of FAST tokens per action chunk     | `256`                        |
+| `--policy.action_tokenizer_name`       | FAST tokenizer to use                              | `physical-intelligence/fast` |
+| `--policy.compile_model=true`          | Enable torch.compile for faster training           | `false`                      |

 ## Inference

@@ -159,9 +159,6 @@ lerobot-record \
    --dataset.fps=15 \
    --dataset.push_to_hub=true \
    --dataset.private=true \
-    --dataset.streaming_encoding=true \
-    --dataset.encoder_threads=2 \
-    # --dataset.vcodec=auto \
    --display_data=true
 ```

@@ -201,9 +198,6 @@ lerobot-record \
    --dataset.fps=15 \
    --dataset.push_to_hub=true \
    --dataset.private=true \
-    --dataset.streaming_encoding=true \
-    --dataset.encoder_threads=2 \
-    # --dataset.vcodec=auto \
    --display_data=true
 ```

@@ -269,7 +269,7 @@ This generates visualizations showing video frames with subtask boundaries overl
 Train with **no annotations** - uses linear progress from 0 to 1:

 ```bash
-lerobot-train \
+python src/lerobot/scripts/lerobot_train.py \
  --dataset.repo_id=your-username/your-dataset \
  --policy.type=sarm \
  --policy.annotation_mode=single_stage \
@@ -288,7 +288,7 @@ lerobot-train \
 Train with **dense annotations only** (sparse auto-generated):

 ```bash
-lerobot-train \
+python src/lerobot/scripts/lerobot_train.py \
  --dataset.repo_id=your-username/your-dataset \
  --policy.type=sarm \
  --policy.annotation_mode=dense_only \
@@ -307,7 +307,7 @@ lerobot-train \
 Train with **both sparse and dense annotations**:

 ```bash
-lerobot-train \
+python src/lerobot/scripts/lerobot_train.py \
  --dataset.repo_id=your-username/your-dataset \
  --policy.type=sarm \
  --policy.annotation_mode=dual \
@@ -468,7 +468,7 @@ This script:
 Once you have the progress file, train your policy with RA-BC weighting. The progress file is auto-detected from the dataset path (`sarm_progress.parquet`). Currently PI0, PI0.5 and SmolVLA are supported with RA-BC:

 ```bash
-lerobot-train \
+python src/lerobot/scripts/lerobot_train.py \
  --dataset.repo_id=your-username/your-dataset \
  --policy.type=pi0 \
  --use_rabc=true \
@@ -106,9 +106,6 @@ lerobot-record \
  --dataset.repo_id=${HF_USER}/eval_DATASET_NAME_test \  # <- This will be the dataset name on HF Hub
  --dataset.episode_time_s=50 \
  --dataset.num_episodes=10 \
-  --dataset.streaming_encoding=true \
-  --dataset.encoder_threads=2 \
-  # --dataset.vcodec=auto \
  # <- Teleop optional if you want to teleoperate in between episodes \
  # --teleop.type=so100_leader \
  # --teleop.port=/dev/ttyACM0 \
@@ -1,18 +1,5 @@
 # SO-101

-<div style="display: flex; align-items: center; gap: 10px;">
-  <img
-    src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/SO101_Follower.webp"
-    alt="SO-101"
-    width="60%"
-  />
-  <img
-    src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/SO101_Leader.webp"
-    alt="SO-101"
-    width="60%"
-  />
-</div>
-
 In the steps below, we explain how to assemble our flagship robot, the SO-101.

 ## Source the parts
@@ -1,155 +0,0 @@
-# Streaming Video Encoding Guide
-
-## 1. Overview
-
-Streaming video encoding eliminates the traditional PNG round-trip during video dataset recording. Instead of:
-
-1. Capture frame -> write PNG to disk -> (at episode end) read PNG's -> encode to MP4 -> delete PNG's
-
-Frames can be encoded in real-time during capture:
-
-1. Capture frame -> queue to encoder thread -> encode to MP4 directly
-
-This makes `save_episode()` near-instant (the video is already encoded by the time the episode ends) and removes the blocking wait that previously occurred between episodes, especially with multiple cameras in long episodes.
-
-## 2. Tuning Parameters
-
-| Parameter               | CLI Flag                          | Type          | Default       | Description                                                       |
-| ----------------------- | --------------------------------- | ------------- | ------------- | ----------------------------------------------------------------- |
-| `streaming_encoding`    | `--dataset.streaming_encoding`    | `bool`        | `True`        | Enable real-time encoding during capture                          |
-| `vcodec`                | `--dataset.vcodec`                | `str`         | `"libsvtav1"` | Video codec. `"auto"` detects best HW encoder                     |
-| `encoder_threads`       | `--dataset.encoder_threads`       | `int \| None` | `None` (auto) | Threads per encoder instance. `None` will leave the vcoded decide |
-| `encoder_queue_maxsize` | `--dataset.encoder_queue_maxsize` | `int`         | `60`          | Max buffered frames per camera (~2s at 30fps). Consumes RAM       |
-
-## 3. Performance Considerations
-
-Streaming encoding means the CPU is encoding video **during** the capture loop, not after. This creates a CPU budget that must be shared between:
-
- **Control loop** (reading cameras, control the robot, writing non-video data)
- **Encoder threads** (one pool per camera)
- **Rerun visualization** (if enabled)
- **OS and other processes**
-
-### Resolution & Number of Cameras Impact
-
-| Setup                     | Throughput (px/sec) | CPU Encoding Load | Notes                          |
-| ------------------------- | ------------------- | ----------------- | ------------------------------ |
-| 2camsx 640x480x3 @30fps   | 55M                 | Low               | Works on most systems          |
-| 2camsx 1280x720x3 @30fps  | 165M                | Moderate          | Comfortable on modern systems  |
-| 2camsx 1920x1080x3 @30fps | 373M                | High              | Requires powerful high-end CPU |
-
-### `encoder_threads` Tuning
-
-This parameter controls how many threads each encoder instance uses internally:
-
- **Higher values** (e.g., 4-5): Faster encoding, but uses more CPU cores per camera. Good for high-end systems with many cores.
- **Lower values** (e.g., 1-2): Less CPU per camera, freeing cores for capture and visualization. Good for low-res images and capable CPUs.
- **`None` (default)**: Lets the codec decide. Information available in the codec logs.
-
-### Backpressure and Frame Dropping
-
-Each camera has a bounded queue (`encoder_queue_maxsize`, default 60 frames). When the encoder can't keep up:
-
-1. The queue fills up (consuming RAM)
-2. New frames are **dropped** (not blocked) — the capture loop continues uninterrupted
-3. A warning is logged: `"Encoder queue full for {camera}, dropped N frame(s)"`
-4. At episode end, total dropped frames per camera are reported
-
-### Symptoms of Encoder Falling Behind
-
- **System feels laggy and freezes**: all CPUs are at 100%
- **Dropped frame warnings** in the log or lower frames/FPS than expected in the recorded dataset
- **Choppy robot movement**: If CPU is severely overloaded, even the capture loop may be affected
- **Accumulated rerun lag**: Visualization falls behind real-time
-
-## 4. Hardware-Accelerated Encoding
-
-### When to Use
-
-Use HW encoding when:
-
- CPU is the bottleneck (dropped frames, choppy robot, rerun lag)
- You have compatible hardware (GPU or dedicated encoder)
- You're recording at high throughput (high resolution or with many cameras)
-
-### Choosing a Codec
-
-| Codec                 | CPU Usage | File Size      | Quality | Notes                                                            |
-| --------------------- | --------- | -------------- | ------- | ---------------------------------------------------------------- |
-| `libsvtav1` (default) | High      | Smallest       | Best    | Default. Best compression but most CPU-intensive                 |
-| `h264`                | Medium    | ~30-50% larger | Good    | Software H.264. Lower CPU                                        |
-| HW encoders           | Very Low  | Largest        | Good    | Offloads to dedicated hardware. Best for CPU-constrained systems |
-
-### Available HW Encoders
-
-| Encoder             | Platform      | Hardware                                                                                         | CLI Value                            |
-| ------------------- | ------------- | ------------------------------------------------------------------------------------------------ | ------------------------------------ |
-| `h264_videotoolbox` | macOS         | Apple Silicon / Intel                                                                            | `--dataset.vcodec=h264_videotoolbox` |
-| `hevc_videotoolbox` | macOS         | Apple Silicon / Intel                                                                            | `--dataset.vcodec=hevc_videotoolbox` |
-| `h264_nvenc`        | Linux/Windows | NVIDIA GPU                                                                                       | `--dataset.vcodec=h264_nvenc`        |
-| `hevc_nvenc`        | Linux/Windows | NVIDIA GPU                                                                                       | `--dataset.vcodec=hevc_nvenc`        |
-| `h264_vaapi`        | Linux         | Intel/AMD GPU                                                                                    | `--dataset.vcodec=h264_vaapi`        |
-| `h264_qsv`          | Linux/Windows | Intel Quick Sync                                                                                 | `--dataset.vcodec=h264_qsv`          |
-| `auto`              | Any           | Probes the system for available HW encoders. Falls back to `libsvtav1` if no HW encoder is found | `--dataset.vcodec=auto`              |
-
-> [!NOTE]
-> In order to use the HW accelerated encoders you might need to upgrade your GPU drivers.
-
-> [!NOTE]
-> `libsvtav1` is the default because it provides the best training performance; other vcodecs can reduce CPU usage and be faster, but they typically produce larger files and may affect training time.
-
-## 5. Troubleshooting
-
-| Symptom                                                            | Likely Cause                                 | Fix                                                                                                                                                                                                                                                                                  |
-| ------------------------------------------------------------------ | -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| System freezes or choppy robot movement or Rerun visualization lag | CPU starved (100% load usage)                | Close other apps, reduce encoding throughput, lower `encoder_threads`, use `h264`, use `display_data=False`. If the CPU continues to be at 100% then it might be insufficient for your setup, consider `--dataset.streaming_encoding=false` or HW encoding (`--dataset.vcodec=auto`) |
-| "Encoder queue full" warnings or dropped frames in dataset         | Encoder can't keep up (Queue overflow)       | If CPU is not at 100%: Increase `encoder_threads`, increase `encoder_queue_maxsize` or use HW encoding (`--dataset.vcodec=auto`).                                                                                                                                                    |
-| High RAM usage                                                     | Queue filling faster than encoding           | `encoder_threads` too low or CPU insufficient. Reduce `encoder_queue_maxsize` or use HW encoding                                                                                                                                                                                     |
-| Large video files                                                  | Using HW encoder or H.264                    | Expected trade-off. Switch to `libsvtav1` if CPU allows                                                                                                                                                                                                                              |
-| `save_episode()` still slow                                        | `streaming_encoding` is `False`              | Set `--dataset.streaming_encoding=true`                                                                                                                                                                                                                                              |
-| Encoder thread crash                                               | Codec not available or invalid settings      | Check `vcodec` is installed, try `--dataset.vcodec=auto`                                                                                                                                                                                                                             |
-| Recorded dataset is missing frames                                 | CPU/GPU starvation or occasional load spikes | If ~5% of frames are missing, your system is likely overloaded — follow the recommendations above. If fewer frames are missing (~2%), they are probably due to occasional transient load spikes (often at startup) and can be considered expected.                                   |
-
-## 6. Recommended Configurations
-
-These estimates are conservative; we recommend testing them on your setup—start with a low load and increase it gradually.
-
-### High-End Systems: modern 12+ cores (24+ threads)
-
-A throughput between ~250-500M px/sec should be comfortable in CPU. For even better results try HW encoding if available.
-
-```bash
-# 3camsx 1280x720x3 @30fps: Defaults work well. Optionally increase encoder parallelism.
-# 2camsx 1920x1080x3 @30fps: Defaults work well. Optionally increase encoder parallelism.
-lerobot-record --dataset.encoder_threads=5 ...
-
-# 3camsx 1920x1080x3 @30fps: Might require some tuning.
-```
-
-### Mid-Range Systems: modern 8+ cores (16+ threads) or Apple Silicon
-
-A throughput between ~80-300M px/sec should be possible in CPU.
-
-```bash
-# 3camsx 640x480x3 @30fps: Defaults work well. Optionally decrease encoder parallelism.
-# 2camsx 1280x720x3 @30fps: Defaults work well. Optionally decrease encoder parallelism.
-lerobot-record --dataset.encoder_threads=2 ...
-
-# 2camsx 1920x1080x3 @30fps: Might require some tuning.
-```
-
-### Low-Resource Systems: modern 4+ cores (8+ threads) or Raspberry Pi 5
-
-On very constrained systems, streaming encoding may compete too heavily with the capture loop. Disabling it falls back to the PNG-based approach where encoding happens between episodes (blocking, but doesn't interfere with capture). Alternatively, record at a lower throughput to reduce both capture and encoding load. Consider also changing codec to `h264` and using batch encoding.
-
-```bash
-# 2camsx 640x480x3 @30fps: Requires some tuning.
-
-# Use H.264, disable streaming, consider batching encoding
-lerobot-record --dataset.vcodec=h264 --dataset.streaming_encoding=false ...
-```
-
-## 7. Closing note
-
-Performance ultimately depends on your exact setup — frames-per-second, resolution, CPU cores and load, available memory, episode length, and the encoder you choose. Always test with your target workload, be mindful about your CPU & system capabilities and tune `encoder_threads`, `encoder_queue_maxsize`, and
-`vcodec` reasonably. That said, a common practical configuration (for many applications) is three cameras at 640×480x3 @30fps; this usually runs fine with the default streaming video encoding settings in modern systems. Always verify your recorded dataset is healthy by comparing the video duration to the CLI episode duration and confirming the row count equals FPS × CLI duration.
@@ -1,72 +1,23 @@
 # Unitree G1

-<img
-  src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/unitree_thumbnail.jpg"
-  alt="Unitree G1 locomanipulation demo"
-  style={{ width: "100%" }}
-/>
+This guide covers the complete setup process for the Unitree G1 humanoid, from initial connection to running gr00t_wbc locomotion.

-The Unitree G1 humanoid is now supported in LeRobot! You can teleoperate, train locomanipulation policies, test in sim, and more. Both 29 and 23 DoF variants are supported.
+## About
+
+We support both 29 and 23 DOF G1 EDU version. We introduce:
+
+- **`unitree g1` robot class, handling low level read/write from/to the humanoid**
+- **ZMQ socket bridge** for remote communication and camera streaming, allowing for remote policy deployment over wlan, eth or directly on the robot
+- **Locomotion policies** from NVIDIA gr00t and Amazon FAR Holosoma
+- **Simulation mode** for testing policies without the physical robot in mujoco

 ---

-## Part 1: Getting Started
+## Connection guide

-### Install the Unitree SDK
+### Step 1: Configure Ethernet Interface

-Follow the [unitree_sdk2_python installation guide](https://github.com/unitreerobotics/unitree_sdk2_python#installation). Tested with `unitree_sdk2py==1.0.1` and `cyclonedds==0.10.2`:
-
-```bash
-conda create -y -n lerobot python=3.12
-conda activate lerobot
-git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
-cd unitree_sdk2_python
-pip install -e .
-cd ..
-```
-
-### Install LeRobot
-
-```bash
-conda install ffmpeg -c conda-forge
-conda install -c conda-forge "pinocchio>=3.0.0,<4.0.0"
-git clone https://github.com/huggingface/lerobot.git
-cd lerobot
-pip install -e '.[unitree_g1]'
-```
-
-<Tip>
-  For now, pinocchio must be installed from conda-forge (not pip) to include the
-  CasADi bindings needed for arm IK.
-</Tip>
-
-### Test the Installation (Simulation)
-
-The simulation environment has its own dependencies. Check the Simulation environment dependencies: [Unitree G1 Mujoco EnvHub](https://huggingface.co/lerobot/unitree-g1-mujoco/tree/main).
-
-```bash
-pip install mujoco loguru msgpack msgpack-numpy
-```
-
-```bash
-lerobot-teleoperate \
-  --robot.type=unitree_g1 \
-  --robot.is_simulation=true \
-  --teleop.type=unitree_g1 \
-  --teleop.id=wbc_unitree \
-  --robot.cameras='{"global_view": {"type": "zmq", "server_address": "localhost", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30, "warmup_s": 5}}' \
-  --display_data=true \
-  --robot.controller=GrootLocomotionController
-```
-
-This will launch a [MuJoCo sim instance](https://huggingface.co/lerobot/unitree-g1-mujoco/tree/main) for the G1. You can connect a gamepad to your machine before launching in order to control the robot's locomotion in sim. We support both [HolosomaLocomotionController](https://github.com/amazon-far/holosoma) and [GrootLocomotionController](https://github.com/NVlabs/GR00T-WholeBodyControl) via `--robot.controller`.
-
- Press `9` to release the robot
- Press `7` / `8` to increase / decrease waist height
-
-### Connect to the Physical Robot
-
-The G1's Ethernet IP is fixed at `192.168.123.164`. Your machine must have a static IP on the same subnet: `192.168.123.x` where `x ≠ 164`.
+Set a static IP on the same subnet as the robot:

 ```bash
 # Replace 'enp131s0' with your ethernet interface name (check with `ip a`)
@@ -75,23 +26,47 @@ sudo ip addr add 192.168.123.200/24 dev enp131s0
 sudo ip link set enp131s0 up
 ```

-### SSH into the Robot
+**Note**: The G1's Ethernet IP is fixed at `192.168.123.164`. Your computer must use `192.168.123.x` with x ≠ 164.
+
+### Step 2: SSH into the Robot

 ```bash
 ssh unitree@192.168.123.164
 # Password: 123
 ```

-### Share Internet via Ethernet
+You should now be connected to the G1's Orin.

-The G1 needs internet access to clone repos and install packages. Share your laptop's connection over Ethernet:
+---
+
+## Part 2: Enable WiFi on the Robot
+
+Wlan0 is disabled by default on the G1. To enable it:
+
+### Step 1: Enable WiFi Hardware
+
+```bash
+sudo rfkill unblock wifi
+sudo rfkill unblock all
+
+# Bring up wlan0
+sudo ip link set wlan0 up
+
+# Enable NetworkManager control of wlan0
+sudo nmcli radio wifi on
+sudo nmcli device set wlan0 managed yes
+sudo systemctl restart NetworkManager
+```
+
+### Step 2: Enable Internet Forwarding

 **On your laptop:**

 ```bash
+# Enable IP forwarding
 sudo sysctl -w net.ipv4.ip_forward=1

-# Replace wlp132s0f0 with your WiFi interface name
+# Set up NAT (replace wlp132s0f0 with your WiFi interface)
 sudo iptables -t nat -A POSTROUTING -o wlp132s0f0 -s 192.168.123.0/24 -j MASQUERADE
 sudo iptables -A FORWARD -i wlp132s0f0 -o enp131s0 -m state --state RELATED,ESTABLISHED -j ACCEPT
 sudo iptables -A FORWARD -i enp131s0 -o wlp132s0f0 -j ACCEPT
@@ -100,203 +75,129 @@ sudo iptables -A FORWARD -i enp131s0 -o wlp132s0f0 -j ACCEPT
 **On the G1:**

 ```bash
+# Add laptop as default gateway
 sudo ip route del default 2>/dev/null || true
 sudo ip route add default via 192.168.123.200 dev eth0
 echo "nameserver 8.8.8.8" | sudo tee /etc/resolv.conf

-# Verify
+# Test connection
 ping -c 3 8.8.8.8
 ```

-### Install the Unitree SDK on the G1
-
-Follow the [unitree_sdk2_python installation guide](https://github.com/unitreerobotics/unitree_sdk2_python#installation):
-
-```bash
-conda create -y -n lerobot python=3.12
-conda activate lerobot
-git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
-cd unitree_sdk2_python
-python -m pip install -e .
-cd ..
-```
-
-### Install LeRobot on the G1
-
-```bash
-git clone https://github.com/huggingface/lerobot.git
-cd lerobot
-conda install -c conda-forge "pinocchio>=3.0.0,<4.0.0"
-python -m pip install -e '.[unitree_g1]'
-```
-
-<Tip>
-  For now, pinocchio must be installed from conda-forge (not pip) to include the
-  CasADi bindings needed for arm IK.
-</Tip>
-
-### (Optional) Enable WiFi on the Robot
-
-For wireless SSH access, you can enable WiFi on the G1 (it's blocked by default):
-
-```bash
-sudo rfkill unblock all
-sudo ip link set wlan0 up
-sudo nmcli radio wifi on
-sudo nmcli device set wlan0 managed yes
-sudo systemctl restart NetworkManager
-```
-
-**Connect to a WiFi network:**
+### Step 3: Connect to WiFi Network

 ```bash
+# List available networks
 nmcli device wifi list

+# Connect to your WiFi (example)
 sudo nmcli connection add type wifi ifname wlan0 con-name "YourNetwork" ssid "YourNetwork"
 sudo nmcli connection modify "YourNetwork" wifi-sec.key-mgmt wpa-psk
 sudo nmcli connection modify "YourNetwork" wifi-sec.psk "YourPassword"
 sudo nmcli connection modify "YourNetwork" connection.autoconnect yes
 sudo nmcli connection up "YourNetwork"

+# Check WiFi IP address
 ip a show wlan0
 ```

-You can then SSH over WiFi instead of Ethernet:
+### Step 4: SSH Over WiFi
+
+Once connected to WiFi, note the robot's IP address and disconnect the Ethernet cable. You can now SSH over WiFi:

 ```bash
-ssh unitree@<ROBOT_WIFI_IP>
+ssh unitree@<YOUR_ROBOT_IP>
 # Password: 123
 ```

---
-
-## Part 2: Teleoperation & Locomotion
-
-### Run the Robot Server
-
-On the robot (from `~/lerobot`):
-
-```bash
-cd ~/lerobot
-python src/lerobot/robots/unitree_g1/run_g1_server.py --camera
-```
-
-### Run the Locomotion Policy
-
-You can run the teleoperation client from your laptop over Ethernet, over WiFi (experimental), or directly on the robot itself. Mind potential latency introduced by your network.
-
-**From your laptop:**
-
-```bash
-lerobot-teleoperate \
-  --robot.type=unitree_g1 \
-  --robot.is_simulation=false \
-  --robot.robot_ip=<ROBOT_IP> \
-  --teleop.type=unitree_g1 \
-  --teleop.id=wbc_unitree \
-  --robot.cameras='{"global_view": {"type": "zmq", "server_address": "<ROBOT_IP>", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30}}' \
-  --display_data=true \
-  --robot.controller=HolosomaLocomotionController
-```
-
-We support both [GrootLocomotionController](https://github.com/NVlabs/GR00T-WholeBodyControl) and [HolosomaLocomotionController](https://github.com/amazon-far/holosoma) via `--robot.controller`.
+Replace `<YOUR_ROBOT_IP>` with your robot's actual WiFi IP address.

 ---

-## Part 3: Loco-Manipulation with the Homunculus Exoskeleton
+## Part 3: Robot Server Setup

-We provide a loco-manipulation solution via the Homunculus Exoskeleton — an open-source 7 DoF exoskeleton for whole-body control. Check it out [here](https://github.com/nepyope/hmc_exo).
+### Step 1: Install LeRobot on the Orin

-### Calibrate
+SSH into the robot and install LeRobot:

 ```bash
-lerobot-calibrate \
-  --teleop.type=unitree_g1 \
-  --teleop.left_arm_config.port=/dev/ttyACM1 \
-  --teleop.right_arm_config.port=/dev/ttyACM0 \
-  --teleop.id=exo
+ssh unitree@<YOUR_ROBOT_IP>
+
+conda create -y -n lerobot python=3.10
+conda activate lerobot
+git clone https://github.com/huggingface/lerobot.git
+cd lerobot
+pip install -e '.[unitree_g1]'
+git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
+cd unitree_sdk2_python  && pip install -e .
 ```

-During calibration move each joint through its entire range. After fitting, move the joint in a neutral position and press `n` to advance.
+**Note**: The Unitree SDK requires CycloneDDS v0.10.2 to be installed. See the [Unitree SDK documentation](https://github.com/unitreerobotics/unitree_sdk2_python) for details.

-### Record a Dataset
+### Step 2: Run the Robot Server
+
+On the robot:

 ```bash
-lerobot-record \
-  --robot.type=unitree_g1 \
-  --robot.is_simulation=true \
-  --robot.cameras='{"global_view": {"type": "zmq", "server_address": "localhost", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30}}' \
-  --teleop.type=unitree_g1 \
-  --teleop.left_arm_config.port=/dev/ttyACM1 \
-  --teleop.right_arm_config.port=/dev/ttyACM0 \
-  --teleop.id=exo \
-  --dataset.repo_id=your-username/dataset-name \
-  --dataset.single_task="Test" \
-  --dataset.num_episodes=2 \
-  --dataset.episode_time_s=5 \
-  --dataset.reset_time_s=5 \
-  --dataset.push_to_hub=true \
-  --dataset.streaming_encoding=true \
-  --dataset.encoder_threads=2
+python src/lerobot/robots/unitree_g1/run_g1_server.py
 ```

-> **Note:** Omit `--teleop.left_arm_config.port` and `--teleop.right_arm_config.port` if you're only using the joystick.
-
-Example dataset: [nepyope/unitree_box_move_blue_full](https://huggingface.co/datasets/nepyope/unitree_box_move_blue_full)
+**Important**: Keep this terminal running. The server must be active for remote control.

 ---

-## Part 4: Training & Inference
+## Part 4: Controlling the robot

-### Train
+With the robot server running, you can now control the robot remotely. Let's launch a locomotion policy
+
+### Step 1: Install LeRobot on your machine

 ```bash
-python src/lerobot/scripts/lerobot_train.py \
-  --dataset.repo_id=your-username/dataset-name  \
-  --policy.type=pi05 \
-  --output_dir=./outputs/pi05_training \
-  --job_name=pi05_training \
-  --policy.repo_id=your-username/your-repo-id \
-  --policy.pretrained_path=lerobot/pi05_base \
-  --policy.compile_model=true \
-  --policy.gradient_checkpointing=true \
-  --wandb.enable=true \
-  --policy.dtype=bfloat16 \
-  --policy.freeze_vision_encoder=false \
-  --policy.train_expert_only=false \
-  --steps=3000 \
-  --policy.device=cuda \
-  --batch_size=32
+conda create -y -n lerobot python=3.10
+conda activate lerobot
+git clone https://github.com/huggingface/lerobot.git
+cd lerobot
+pip install -e '.[unitree_g1]'
+git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
+cd unitree_sdk2_python  && pip install -e .
 ```

-### Inference with RTC
+### Step 2: Update Robot IP in Config

-Once trained, we recommend deploying policies using inference-time RTC:
+Edit the config file to match your robot's WiFi IP:
+
+```python
+# In src/lerobot/robots/unitree_g1/config_unitree_g1.py
+robot_ip: str = "<YOUR_ROBOT_IP>"  # Replace with your robot's WiFi IP.
+```
+
+### Step 3: Run the Locomotion Policy

 ```bash
-python examples/rtc/eval_with_real_robot.py \
-  --policy.path=your-username/your-repo-id \
-  --policy.device=cuda \
-  --robot.type=unitree_g1 \
-  --robot.is_simulation=false \
-  --robot.controller=HolosomaLocomotionController \
-  --robot.cameras='{"global_view": {"type": "zmq", "server_address": "<ROBOT_IP>", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30}}' \
-  --task="task_description" \
-  --duration=1000 \
-  --fps=30 \
-  --rtc.enabled=true
+# Run GR00T locomotion controller
+python examples/unitree_g1/gr00t_locomotion.py --repo-id "nepyope/GR00T-WholeBodyControl_g1"
+
+# Run Holosoma locomotion controller
+python examples/unitree_g1/holosoma_locomotion.py
+
 ```

+Press `Ctrl+C` to stop the policy.
+
 ---

+## Running in Simulation Mode (MuJoCo)
+
+You can now test policies before unleashing them on the physical robot using MuJoCo. To do so simply set `is_simulation=True` in config.
+
 ## Additional Resources

 - [Unitree SDK Documentation](https://github.com/unitreerobotics/unitree_sdk2_python)
 - [GR00T-WholeBodyControl](https://github.com/NVlabs/GR00T-WholeBodyControl)
 - [Holosoma](https://github.com/amazon-far/holosoma)
 - [LeRobot Documentation](https://github.com/huggingface/lerobot)
- [Unitree IL LeRobot](https://github.com/unitreerobotics/unitree_IL_lerobot)
+- [Unitree_IL_Lerobot](https://github.com/unitreerobotics/unitree_IL_lerobot)

 ---

-_Last updated: March 2026_
+_Last updated: December 2025_
@@ -12,7 +12,6 @@ LeRobot provides several utilities for manipulating datasets:
 4. **Add Features** - Add new features to a dataset
 5. **Remove Features** - Remove features from a dataset
 6. **Convert to Video** - Convert image-based datasets to video format for efficient storage
-7. **Show the Info of Datasets** - Show the summary of datasets information such as number of episode etc.

 The core implementation is in `lerobot.datasets.dataset_tools`.
 An example script detailing how to use the tools API is available in `examples/dataset/use_dataset_tools.py`.
@@ -96,26 +95,26 @@ Convert an image-based dataset to video format, creating a new LeRobotDataset wh
 # Local-only: Save to a custom output directory (no hub push)
 lerobot-edit-dataset \
    --repo_id lerobot/pusht_image \
-    --operation.type convert_image_to_video \
+    --operation.type convert_to_video \
    --operation.output_dir /path/to/output/pusht_video

 # Save with new repo_id (local storage)
 lerobot-edit-dataset \
    --repo_id lerobot/pusht_image \
    --new_repo_id lerobot/pusht_video \
-    --operation.type convert_image_to_video
+    --operation.type convert_to_video

 # Convert and push to Hugging Face Hub
 lerobot-edit-dataset \
    --repo_id lerobot/pusht_image \
    --new_repo_id lerobot/pusht_video \
-    --operation.type convert_image_to_video \
+    --operation.type convert_to_video \
    --push_to_hub true

 # Convert with custom video codec and quality settings
 lerobot-edit-dataset \
    --repo_id lerobot/pusht_image \
-    --operation.type convert_image_to_video \
+    --operation.type convert_to_video \
    --operation.output_dir outputs/pusht_video \
    --operation.vcodec libsvtav1 \
    --operation.pix_fmt yuv420p \
@@ -125,23 +124,16 @@ lerobot-edit-dataset \
 # Convert only specific episodes
 lerobot-edit-dataset \
    --repo_id lerobot/pusht_image \
-    --operation.type convert_image_to_video \
+    --operation.type convert_to_video \
    --operation.output_dir outputs/pusht_video \
    --operation.episode_indices "[0, 1, 2, 5, 10]"

 # Convert with multiple workers for parallel processing
 lerobot-edit-dataset \
    --repo_id lerobot/pusht_image \
-    --operation.type convert_image_to_video \
+    --operation.type convert_to_video \
    --operation.output_dir outputs/pusht_video \
    --operation.num_workers 8
-
-# For memory-constrained systems, users can now specify limits:
-lerobot-edit-dataset \
-    --repo_id lerobot/pusht_image \
-    --operation.type convert_to_video \
-    --operation.max_episodes_per_batch 50 \
-    --operation.max_frames_per_batch 10000
 ```

 **Parameters:**
@@ -157,30 +149,6 @@ lerobot-edit-dataset \

 **Note:** The resulting dataset will be a proper LeRobotDataset with all cameras encoded as videos in the `videos/` directory, with parquet files containing only metadata (no raw image data). All episodes, stats, and tasks are preserved.

-### Show the information of datasets
-
-Show the information of datasets such as number of episode, number of frame, File size and so on.
-No change will be made to the dataset
-
-```bash
-
-# Show dataset information without feature details
-lerobot-edit-dataset \
-    --repo_id lerobot/pusht_image \
-    --operation.type info \
-
-# Show dataset information with feature details
-lerobot-edit-dataset \
-    --repo_id lerobot/pusht_image \
-    --operation.type info \
-    --operation.show_features true
-
-```
-
-**Parameters:**
-
- `parameters`: The flag to control show or no show dataset information with feature details.(default=false)
-
 ### Push to Hub

 Add the `--push_to_hub true` flag to any command to automatically upload the resulting dataset to the Hugging Face Hub:
@@ -45,7 +45,7 @@ policy.type=wall_x
 For training WallX, you can use the standard LeRobot training script with the appropriate configuration:

 ```bash
-lerobot-train \
+python src/lerobot/scripts/lerobot_train.py \
    --dataset.repo_id=your_dataset \
    --policy.type=wall_x \
    --output_dir=./outputs/wallx_training \
@@ -154,7 +154,7 @@ lerobot-train \

 ```bash
 lerobot-train \
-  --dataset.repo_id=<USER>/bimanual-so100-handover-cube \
+  --dataset.repo_id=pepijn223/bimanual-so100-handover-cube \
  --output_dir=./outputs/xvla_bimanual \
  --job_name=xvla_so101_training \
  --policy.path="lerobot/xvla-base" \
@@ -22,7 +22,7 @@ lerobot-replay \
    --robot.type=so100_follower \
    --robot.port=/dev/tty.usbmodem58760431541 \
    --robot.id=black \
-    --dataset.repo_id=<USER>/record-test \
+    --dataset.repo_id=aliberts/record-test \
    --dataset.episode=2
 ```
 """
@@ -57,7 +57,7 @@ class DatasetReplayConfig:
    repo_id: str
    # Episode to replay.
    episode: int
-    # Root directory where the dataset will be stored (e.g. 'dataset/path'). If None, defaults to $HF_LEROBOT_HOME/repo_id.
+    # Root directory where the dataset will be stored (e.g. 'dataset/path').
    root: str | Path | None = None
    # Limit the frames per second. By default, uses the policy fps.
    fps: int = 30
@@ -81,25 +81,24 @@ def replay(cfg: ReplayConfig):
    actions = dataset.hf_dataset.select_columns(ACTION)
    robot.connect()

-    try:
-        log_say("Replaying episode", cfg.play_sounds, blocking=True)
-        for idx in range(dataset.num_frames):
-            start_episode_t = time.perf_counter()
+    log_say("Replaying episode", cfg.play_sounds, blocking=True)
+    for idx in range(dataset.num_frames):
+        start_episode_t = time.perf_counter()

-            action_array = actions[idx][ACTION]
-            action = {}
-            for i, name in enumerate(dataset.features[ACTION]["names"]):
-                key = f"{name.removeprefix('main_')}.pos"
-                action[key] = action_array[i].item()
+        action_array = actions[idx][ACTION]
+        action = {}
+        for i, name in enumerate(dataset.features[ACTION]["names"]):
+            key = f"{name.removeprefix('main_')}.pos"
+            action[key] = action_array[i].item()

-            action["shoulder_lift.pos"] = -(action["shoulder_lift.pos"] - 90)
-            action["elbow_flex.pos"] -= 90
-            robot.send_action(action)
+        action["shoulder_lift.pos"] = -(action["shoulder_lift.pos"] - 90)
+        action["elbow_flex.pos"] -= 90
+        robot.send_action(action)

-            dt_s = time.perf_counter() - start_episode_t
-            precise_sleep(max(1 / dataset.fps - dt_s, 0.0))
-    finally:
-        robot.disconnect()
+        dt_s = time.perf_counter() - start_episode_t
+        precise_sleep(max(1 / dataset.fps - dt_s, 0.0))
+
+    robot.disconnect()


 if __name__ == "__main__":
@@ -32,8 +32,7 @@ import torch
 from huggingface_hub import HfApi

 import lerobot
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata


 def main():
@@ -1,490 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-SLURM-distributed SARM RA-BC annotation pipeline.
-
-Computes SARM progress values for all frames in a dataset, distributed across
-SLURM workers, then merges the shards into a single sarm_progress.parquet.
-
-Two subcommands, each a separate SLURM submission:
-
-  compute    – N workers, each computes progress for a subset of episodes
-  aggregate  – 1 worker, merges N shards into sarm_progress.parquet, pushes to hub
-
-Usage:
-    python slurm_compute_rabc.py compute \\
-        --repo-id user/dataset --reward-model-path user/sarm_model \\
-        --stride 10 --device cpu --workers 50 --partition cpu
-
-    python slurm_compute_rabc.py aggregate \\
-        --repo-id user/dataset --reward-model-path user/sarm_model \\
-        --partition cpu --push-to-hub
-"""
-
-import argparse
-from pathlib import Path
-
-from datatrove.executor import LocalPipelineExecutor
-from datatrove.executor.slurm import SlurmPipelineExecutor
-from datatrove.pipeline.base import PipelineStep
-
-
-class ComputeProgressShards(PipelineStep):
-    """Each worker computes SARM progress for its assigned episodes."""
-
-    def __init__(
-        self, repo_id, reward_model_path, stride=1, head_mode="sparse", device="cpu", shard_dir="rabc_shards"
-    ):
-        super().__init__()
-        if stride < 1:
-            raise ValueError(f"stride must be >= 1, got {stride}")
-        self.repo_id = repo_id
-        self.reward_model_path = reward_model_path
-        self.stride = stride
-        self.head_mode = head_mode
-        self.device = device
-        self.shard_dir = shard_dir
-
-    def run(self, data=None, rank: int = 0, world_size: int = 1):
-        import logging
-        from pathlib import Path
-
-        import numpy as np
-        import pyarrow as pa
-        import pyarrow.parquet as pq
-        import torch
-        from tqdm import tqdm
-
-        from lerobot.policies.sarm.compute_rabc_weights import (
-            generate_all_frame_indices,
-            interpolate_progress,
-            load_sarm_resources,
-        )
-        from lerobot.utils.utils import init_logging
-
-        init_logging()
-
-        dataset, reward_model, preprocess = load_sarm_resources(
-            self.repo_id,
-            self.reward_model_path,
-            self.device,
-        )
-
-        if hasattr(preprocess, "eval"):
-            preprocess.eval()
-        for step in preprocess.steps:
-            if hasattr(step, "eval"):
-                step.eval()
-
-        image_key = reward_model.config.image_key
-        state_key = reward_model.config.state_key
-        frame_gap = reward_model.config.frame_gap
-        center_idx = reward_model.config.n_obs_steps // 2
-
-        dual_mode = reward_model.config.uses_dual_heads
-        compute_sparse = self.head_mode in ("sparse", "both") or not dual_mode
-        compute_dense = self.head_mode in ("dense", "both") and dual_mode
-
-        my_episodes = list(range(dataset.num_episodes))[rank::world_size]
-        if not my_episodes:
-            logging.info(f"Rank {rank}: no episodes assigned")
-            return
-        logging.info(f"Rank {rank}: {len(my_episodes)} / {dataset.num_episodes} episodes")
-
-        all_rows = []
-
-        for ep_idx in tqdm(my_episodes, desc=f"Rank {rank}"):
-            ep = dataset.meta.episodes[ep_idx]
-            ep_start, ep_end = ep["dataset_from_index"], ep["dataset_to_index"]
-            task = dataset[ep_start].get("task", "perform the task")
-
-            all_ep_indices = generate_all_frame_indices(ep_start, ep_end, frame_gap)
-            if self.stride > 1:
-                compute_indices = [i for i in all_ep_indices if (i - ep_start) % self.stride == 0]
-                if (ep_end - 1) not in compute_indices:
-                    compute_indices.append(ep_end - 1)
-                compute_indices = sorted(set(compute_indices))
-            else:
-                compute_indices = all_ep_indices
-
-            frame_results = {}
-            for qi in tqdm(compute_indices, desc=f"  Ep {ep_idx}", leave=False):
-                try:
-                    sample = dataset[qi]
-                    batch = {
-                        image_key: sample[image_key],
-                        "task": task,
-                        "index": qi,
-                        "episode_index": ep_idx,
-                    }
-                    if state_key in sample:
-                        batch[state_key] = sample[state_key]
-
-                    with torch.no_grad():
-                        processed = preprocess(batch)
-                        vf = processed["video_features"].to(self.device)
-                        tf = processed["text_features"].to(self.device)
-                        sf = processed.get("state_features")
-                        if sf is not None:
-                            sf = sf.to(self.device)
-                        lengths = processed.get("lengths")
-
-                        sparse_val = dense_val = np.nan
-                        if compute_sparse:
-                            r = reward_model.calculate_rewards(
-                                text_embeddings=tf,
-                                video_embeddings=vf,
-                                state_features=sf,
-                                lengths=lengths,
-                                return_all_frames=True,
-                                head_mode="sparse",
-                            )
-                            sparse_val = float(r[0, center_idx] if r.ndim == 2 else r[center_idx])
-                        if compute_dense:
-                            r = reward_model.calculate_rewards(
-                                text_embeddings=tf,
-                                video_embeddings=vf,
-                                state_features=sf,
-                                lengths=lengths,
-                                return_all_frames=True,
-                                head_mode="dense",
-                            )
-                            dense_val = float(r[0, center_idx] if r.ndim == 2 else r[center_idx])
-
-                        frame_results[qi] = (sparse_val, dense_val)
-                except Exception as e:
-                    logging.warning(f"Failed frame {qi}: {e}")
-
-            if not frame_results:
-                logging.warning(f"Episode {ep_idx}: all frames failed, skipping")
-                continue
-
-            # Interpolate to all frames in this episode
-            computed_idx = np.array(sorted(frame_results.keys()))
-            all_frame_arr = np.arange(ep_start, ep_end)
-
-            sparse_vals = np.array([frame_results[i][0] for i in computed_idx]) if compute_sparse else None
-            dense_vals = np.array([frame_results[i][1] for i in computed_idx]) if compute_dense else None
-
-            if self.stride > 1 and len(computed_idx) > 1:
-                if compute_sparse:
-                    sparse_vals = interpolate_progress(computed_idx, sparse_vals, all_frame_arr)
-                if compute_dense:
-                    dense_vals = interpolate_progress(computed_idx, dense_vals, all_frame_arr)
-                output_frames = all_frame_arr
-            else:
-                # Use only successfully computed frames to avoid indexing mismatch on failures
-                output_frames = computed_idx
-
-            for i, fi in enumerate(output_frames):
-                row = {"index": int(fi), "episode_index": ep_idx, "frame_index": int(fi - ep_start)}
-                if compute_sparse:
-                    row["progress_sparse"] = float(sparse_vals[i])
-                if compute_dense:
-                    row["progress_dense"] = float(dense_vals[i])
-                all_rows.append(row)
-
-        if all_rows:
-            import pandas as pd
-
-            df = pd.DataFrame(all_rows).sort_values("index").reset_index(drop=True)
-            table = pa.Table.from_pandas(df, preserve_index=False)
-            table = table.replace_schema_metadata({b"reward_model_path": self.reward_model_path.encode()})
-            shard_dir = Path(self.shard_dir)
-            shard_dir.mkdir(parents=True, exist_ok=True)
-            out = shard_dir / f"shard_{rank:05d}.parquet"
-            pq.write_table(table, out)
-            logging.info(f"Rank {rank}: saved {len(df)} rows to {out}")
-
-
-class AggregateProgress(PipelineStep):
-    """Merge all shard parquets into final sarm_progress.parquet."""
-
-    def __init__(self, repo_id, reward_model_path, shard_dir="rabc_shards", push_to_hub=False):
-        super().__init__()
-        self.repo_id = repo_id
-        self.reward_model_path = reward_model_path
-        self.shard_dir = shard_dir
-        self.push_to_hub = push_to_hub
-
-    def run(self, data=None, rank: int = 0, world_size: int = 1):
-        import datetime
-        import logging
-        import os
-        from pathlib import Path
-
-        import pandas as pd
-        import pyarrow as pa
-        import pyarrow.parquet as pq
-
-        from lerobot.datasets.lerobot_dataset import LeRobotDataset
-        from lerobot.utils.utils import init_logging
-
-        init_logging()
-        if rank != 0:
-            return
-
-        shard_dir = Path(self.shard_dir)
-        shards = sorted(shard_dir.glob("shard_*.parquet"))
-        if not shards:
-            raise FileNotFoundError(f"No shards found in {shard_dir}")
-
-        # Log shard modification time range to help detect stale files
-        mtimes = [os.path.getmtime(s) for s in shards]
-        oldest = datetime.datetime.fromtimestamp(min(mtimes)).isoformat(timespec="seconds")
-        newest = datetime.datetime.fromtimestamp(max(mtimes)).isoformat(timespec="seconds")
-        logging.info(f"Aggregating {len(shards)} shards (oldest: {oldest}, newest: {newest})")
-
-        df = pd.concat([pd.read_parquet(s) for s in shards], ignore_index=True)
-        df = df.sort_values("index").reset_index(drop=True)
-
-        table = pa.Table.from_pandas(df, preserve_index=False)
-        table = table.replace_schema_metadata({b"reward_model_path": self.reward_model_path.encode()})
-
-        temp_ds = LeRobotDataset(self.repo_id, download_videos=False)
-        out_path = Path(temp_ds.root) / "sarm_progress.parquet"
-        out_path.parent.mkdir(parents=True, exist_ok=True)
-        pq.write_table(table, out_path)
-        logging.info(f"Saved {len(df)} rows to {out_path}")
-
-        for col in ["progress_sparse", "progress_dense"]:
-            if col in df.columns:
-                v = df[col].dropna()
-                logging.info(
-                    f"{col}: mean={v.mean():.4f} std={v.std():.4f} min={v.min():.4f} max={v.max():.4f}"
-                )
-
-        if self.push_to_hub:
-            from huggingface_hub import HfApi
-
-            api = HfApi()
-            hub_path = "sarm_progress.parquet"
-            logging.info(f"Uploading to {self.repo_id}/{hub_path}")
-            api.upload_file(
-                path_or_fileobj=str(out_path),
-                path_in_repo=hub_path,
-                repo_id=self.repo_id,
-                repo_type="dataset",
-            )
-            logging.info(f"Uploaded: https://huggingface.co/datasets/{self.repo_id}/blob/main/{hub_path}")
-
-
-def make_compute_executor(
-    repo_id,
-    reward_model_path,
-    stride,
-    head_mode,
-    device,
-    shard_dir,
-    logs_dir,
-    job_name,
-    slurm,
-    workers,
-    partition,
-    cpus_per_task,
-    mem_per_cpu,
-):
-    kwargs = {
-        "pipeline": [
-            ComputeProgressShards(repo_id, reward_model_path, stride, head_mode, device, str(shard_dir)),
-        ],
-        "logging_dir": str(logs_dir / job_name),
-    }
-
-    if slurm:
-        kwargs.update(
-            {
-                "job_name": job_name,
-                "tasks": workers,
-                "workers": workers,
-                "time": "24:00:00",
-                "partition": partition,
-                "cpus_per_task": cpus_per_task,
-                "sbatch_args": {"mem-per-cpu": mem_per_cpu},
-            }
-        )
-        return SlurmPipelineExecutor(**kwargs)
-
-    kwargs.update({"tasks": workers, "workers": 1})
-    return LocalPipelineExecutor(**kwargs)
-
-
-def make_aggregate_executor(
-    repo_id,
-    reward_model_path,
-    shard_dir,
-    logs_dir,
-    job_name,
-    slurm,
-    partition,
-    cpus_per_task,
-    mem_per_cpu,
-    push_to_hub,
-):
-    kwargs = {
-        "pipeline": [
-            AggregateProgress(repo_id, reward_model_path, str(shard_dir), push_to_hub),
-        ],
-        "logging_dir": str(logs_dir / job_name),
-    }
-
-    if slurm:
-        kwargs.update(
-            {
-                "job_name": job_name,
-                "tasks": 1,
-                "workers": 1,
-                "time": "02:00:00",
-                "partition": partition,
-                "cpus_per_task": cpus_per_task,
-                "sbatch_args": {"mem-per-cpu": mem_per_cpu},
-            }
-        )
-        return SlurmPipelineExecutor(**kwargs)
-
-    kwargs.update({"tasks": 1, "workers": 1})
-    return LocalPipelineExecutor(**kwargs)
-
-
-def _add_shared_args(p):
-    p.add_argument(
-        "--repo-id",
-        type=str,
-        required=True,
-        help="Hugging Face repository identifier, e.g. 'user/dataset'.",
-    )
-    p.add_argument(
-        "--shard-dir",
-        type=Path,
-        default=Path("rabc_shards"),
-        help="Directory to read/write per-rank parquet shards.",
-    )
-    p.add_argument(
-        "--logs-dir",
-        type=Path,
-        default=Path("logs"),
-        help="Directory for datatrove logs.",
-    )
-    p.add_argument(
-        "--job-name",
-        type=str,
-        default=None,
-        help="SLURM job name (defaults to rabc_<subcommand>).",
-    )
-    p.add_argument(
-        "--slurm",
-        type=int,
-        default=1,
-        help="1 = submit via SLURM; 0 = run locally (useful for debugging).",
-    )
-    p.add_argument(
-        "--partition",
-        type=str,
-        default=None,
-        help="SLURM partition to submit to.",
-    )
-    p.add_argument(
-        "--cpus-per-task",
-        type=int,
-        default=4,
-        help="Number of CPUs per SLURM task.",
-    )
-    p.add_argument(
-        "--mem-per-cpu",
-        type=str,
-        default="4G",
-        help="Memory per CPU, e.g. '4G' or '1950M'.",
-    )
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="SLURM-distributed SARM RA-BC annotation pipeline",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-    sub = parser.add_subparsers(dest="command", required=True)
-
-    # compute subcommand
-    cp = sub.add_parser(
-        "compute",
-        help="Distribute progress computation across SLURM workers.",
-    )
-    _add_shared_args(cp)
-    cp.add_argument(
-        "--reward-model-path",
-        type=str,
-        required=True,
-        help="Path or HF repo id of the SARM reward model.",
-    )
-    cp.add_argument(
-        "--stride",
-        type=int,
-        default=1,
-        help="Compute every Nth frame; intermediate frames are interpolated (must be >= 1).",
-    )
-    cp.add_argument(
-        "--head-mode",
-        type=str,
-        default="sparse",
-        choices=["sparse", "dense", "both"],
-        help="Which reward head(s) to compute.",
-    )
-    cp.add_argument(
-        "--device",
-        type=str,
-        default="cpu",
-        help="Device for reward model inference, e.g. 'cpu' or 'cuda'.",
-    )
-    cp.add_argument(
-        "--workers",
-        type=int,
-        default=50,
-        help="Number of parallel SLURM tasks (one shard per worker).",
-    )
-
-    # aggregate subcommand
-    ap = sub.add_parser(
-        "aggregate",
-        help="Merge per-rank shards into a single sarm_progress.parquet.",
-    )
-    _add_shared_args(ap)
-    ap.add_argument(
-        "--reward-model-path",
-        type=str,
-        required=True,
-        help="Path or HF repo id of the SARM reward model (stored in parquet metadata).",
-    )
-    ap.add_argument(
-        "--push-to-hub",
-        action="store_true",
-        help="Upload sarm_progress.parquet to the Hugging Face Hub after aggregation.",
-    )
-
-    args = parser.parse_args()
-    job_name = args.job_name or f"rabc_{args.command}"
-    kwargs = vars(args)
-    kwargs["slurm"] = kwargs.pop("slurm") == 1
-    kwargs["job_name"] = job_name
-    command = kwargs.pop("command")
-
-    executor = make_compute_executor(**kwargs) if command == "compute" else make_aggregate_executor(**kwargs)
-
-    executor.run()
-
-
-if __name__ == "__main__":
-    main()
@@ -1,717 +0,0 @@
-"""
-Action consistency analysis for imitation learning datasets.
-
-Two parallel analyses per dataset:
-  1. State-based: KNN in joint-state space → action chunk variance
-  2. Image-based: KNN in SigLIP embedding space → action chunk variance
-
-Comparing them reveals whether visual similarity and proprioceptive similarity
-agree on where the data is inconsistent — and images are what the policy
-primarily sees.
-"""
-
-import json
-from pathlib import Path
-
-import av
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import torch
-from huggingface_hub import snapshot_download
-from matplotlib.colors import LinearSegmentedColormap
-from PIL import Image
-from scipy.spatial import cKDTree
-from transformers import AutoImageProcessor, AutoModel
-
-DATASETS = [
-    {"repo_id": "lerobot-data-collection/level2_final_quality3", "label": "HQ curated"},
-    {"repo_id": "lerobot-data-collection/level12_rac_2_2026-02-08_1", "label": "Full collection"},
-]
-OUTPUT_DIR = Path(__file__).resolve().parent / "outputs"
-OUTPUT_DIR.mkdir(exist_ok=True)
-
-MAX_FRAMES = 100_000
-K_NEIGHBORS = 50
-ACTION_CHUNK_SIZE = 30
-CAMERA_KEY = "observation.images.base"
-ENCODER_MODEL = "google/siglip-base-patch16-224"
-ENCODE_BATCH_SIZE = 512
-SEED = 42
-DPI = 150
-
-CONSISTENCY_CMAP = LinearSegmentedColormap.from_list(
-    "consistency", ["#0a2e0a", "#1a8e1a", "#88cc22", "#ffaa22", "#ff2222"]
-)
-
-# FK chains from OpenArm bimanual URDF (same as workspace_density.py).
-LEFT_CHAIN = [
-    ((-np.pi / 2, 0, 0), (0, 0.031, 0.698), None),
-    ((0, 0, 0), (0, 0, 0.0625), (0, 0, 1)),
-    ((-np.pi / 2, 0, 0), (-0.0301, 0, 0.06), (-1, 0, 0)),
-    ((0, 0, 0), (0.0301, 0, 0.06625), (0, 0, 1)),
-    ((0, 0, 0), (0, 0.0315, 0.15375), (0, 1, 0)),
-    ((0, 0, 0), (0, -0.0315, 0.0955), (0, 0, 1)),
-    ((0, 0, 0), (0.0375, 0, 0.1205), (1, 0, 0)),
-    ((0, 0, 0), (-0.0375, 0, 0), (0, -1, 0)),
-    ((0, 0, 0), (0, 0, 0.1001), None),
-    ((0, 0, 0), (0, 0, 0.08), None),
-]
-RIGHT_CHAIN = [
-    ((np.pi / 2, 0, 0), (0, -0.031, 0.698), None),
-    ((0, 0, 0), (0, 0, 0.0625), (0, 0, 1)),
-    ((np.pi / 2, 0, 0), (-0.0301, 0, 0.06), (-1, 0, 0)),
-    ((0, 0, 0), (0.0301, 0, 0.06625), (0, 0, 1)),
-    ((0, 0, 0), (0, 0.0315, 0.15375), (0, 1, 0)),
-    ((0, 0, 0), (0, -0.0315, 0.0955), (0, 0, 1)),
-    ((0, 0, 0), (0.0375, 0, 0.1205), (1, 0, 0)),
-    ((0, 0, 0), (-0.0375, 0, 0), (0, 1, 0)),
-    ((0, 0, 0), (0, 0, 0.1001), None),
-    ((0, 0, 0), (0, 0, 0.08), None),
-]
-
-
-# ── FK math ─────────────────────────────────────────────
-
-
-def _rot_x(a: float) -> np.ndarray:
-    c, s = np.cos(a), np.sin(a)
-    return np.array([[1, 0, 0], [0, c, -s], [0, s, c]])
-
-
-def _rot_y(a: float) -> np.ndarray:
-    c, s = np.cos(a), np.sin(a)
-    return np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]])
-
-
-def _rot_z(a: float) -> np.ndarray:
-    c, s = np.cos(a), np.sin(a)
-    return np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
-
-
-def _tf(rpy: tuple, xyz: tuple) -> np.ndarray:
-    r, p, y = rpy
-    mat = np.eye(4)
-    mat[:3, :3] = _rot_z(y) @ _rot_y(p) @ _rot_x(r)
-    mat[:3, 3] = xyz
-    return mat
-
-
-def _batch_axis_rot(axis: tuple, angles: np.ndarray) -> np.ndarray:
-    n = len(angles)
-    ax = np.asarray(axis, dtype=np.float64)
-    ax = ax / np.linalg.norm(ax)
-    x, y, z = ax
-    c = np.cos(angles)
-    s = np.sin(angles)
-    t = 1 - c
-    rot = np.zeros((n, 4, 4))
-    rot[:, 0, 0] = t * x * x + c
-    rot[:, 0, 1] = t * x * y - s * z
-    rot[:, 0, 2] = t * x * z + s * y
-    rot[:, 1, 0] = t * x * y + s * z
-    rot[:, 1, 1] = t * y * y + c
-    rot[:, 1, 2] = t * y * z - s * x
-    rot[:, 2, 0] = t * x * z - s * y
-    rot[:, 2, 1] = t * y * z + s * x
-    rot[:, 2, 2] = t * z * z + c
-    rot[:, 3, 3] = 1.0
-    return rot
-
-
-def batch_fk(chain: list, joint_angles: np.ndarray) -> np.ndarray:
-    n = joint_angles.shape[0]
-    tf_batch = np.tile(np.eye(4), (n, 1, 1))
-    qi = 0
-    for rpy, xyz, axis in chain:
-        tf_batch = tf_batch @ _tf(rpy, xyz)
-        if axis is not None:
-            rot = _batch_axis_rot(axis, joint_angles[:, qi])
-            tf_batch = np.einsum("nij,njk->nik", tf_batch, rot)
-            qi += 1
-    return tf_batch[:, :3, 3]
-
-
-# ── Data helpers ────────────────────────────────────────
-
-
-def _flatten_names(obj: object) -> list[str]:
-    if isinstance(obj, dict):
-        out: list[str] = []
-        for v in obj.values():
-            out.extend(_flatten_names(v))
-        return out
-    if isinstance(obj, (list, tuple)):
-        out = []
-        for item in obj:
-            if isinstance(item, (list, tuple, dict)):
-                out.extend(_flatten_names(item))
-            else:
-                out.append(str(item))
-        return out
-    return [str(obj)]
-
-
-def _detect_and_convert(vals: np.ndarray) -> np.ndarray:
-    mx = np.max(np.abs(vals))
-    if mx > 360:
-        print(f"    Unit detection: servo ticks (max={mx:.0f})")
-        return (vals - 2048) / 2048 * np.pi
-    if mx > 6.3:
-        print(f"    Unit detection: degrees (max={mx:.1f})")
-        return np.deg2rad(vals)
-    print(f"    Unit detection: radians (max={mx:.3f})")
-    return vals.astype(np.float64)
-
-
-def _find_joint_indices(features: dict, state_col: str, n_dim: int) -> tuple[list[int], list[int]]:
-    feat = features.get("observation.state", features.get(state_col, {}))
-    names = _flatten_names(feat.get("names", []))
-    left_idx: list[int] = []
-    right_idx: list[int] = []
-    if names and len(names) == n_dim:
-        names_l = [n.lower() for n in names]
-        print(f"  Feature names: {names[:4]}…{names[-4:]}")
-        for j in range(1, 8):
-            for i, nm in enumerate(names_l):
-                if f"left_joint_{j}" in nm and i not in left_idx:
-                    left_idx.append(i)
-                    break
-            for i, nm in enumerate(names_l):
-                if f"right_joint_{j}" in nm and i not in right_idx:
-                    right_idx.append(i)
-                    break
-    if len(left_idx) == 7 and len(right_idx) == 7:
-        print(f"  Matched by name: left={left_idx} right={right_idx}")
-        return left_idx, right_idx
-    if n_dim >= 16:
-        print("  Falling back to positional: [0:7]=left, [8:15]=right")
-        return list(range(7)), list(range(8, 15))
-    if n_dim >= 14:
-        print("  Falling back to positional: [0:7]=left, [7:14]=right")
-        return list(range(7)), list(range(7, 14))
-    raise RuntimeError(f"State dim {n_dim} too small for bimanual 7-DOF robot")
-
-
-def download_data(repo_id: str, camera_key: str) -> Path:
-    print(f"  Downloading {repo_id} (parquet + {camera_key} videos) …")
-    return Path(
-        snapshot_download(
-            repo_id=repo_id,
-            repo_type="dataset",
-            allow_patterns=[
-                "meta/**",
-                "data/**",
-                f"videos/{camera_key}/**",
-            ],
-        )
-    )
-
-
-# ── Data loading ────────────────────────────────────────
-
-
-def _build_action_chunks(
-    actions: np.ndarray, episode_ids: np.ndarray, chunk_size: int
-) -> tuple[np.ndarray, np.ndarray]:
-    """
-    For each frame, concatenate the next chunk_size actions from the same episode.
-    Returns (action_chunks, valid_mask).
-    """
-    n = len(actions)
-    act_dim = actions.shape[1]
-    chunks = np.zeros((n, chunk_size * act_dim), dtype=np.float64)
-    valid = np.zeros(n, dtype=bool)
-
-    for i in range(n):
-        end = i + chunk_size
-        if end > n:
-            continue
-        if episode_ids[i] != episode_ids[end - 1]:
-            continue
-        chunks[i] = actions[i:end].ravel()
-        valid[i] = True
-
-    return chunks, valid
-
-
-def load_state_action_data(local: Path, max_frames: int, chunk_size: int, rng: np.random.Generator) -> dict:
-    """
-    Load observation.state and action, build action chunks, subsample, normalize.
-    Also returns the original row indices (`chosen_idx`) for video frame mapping.
-    """
-    info = json.loads((local / "meta" / "info.json").read_text())
-    features = info.get("features", {})
-
-    dfs = [pd.read_parquet(pq) for pq in sorted((local / "data").glob("**/*.parquet"))]
-    df = pd.concat(dfs, ignore_index=True)
-    n_total = len(df)
-    print(f"  Total frames: {n_total:,}")
-
-    state_col = next((c for c in df.columns if "observation.state" in c), None)
-    action_col = next((c for c in df.columns if c == "action"), None)
-    if state_col is None:
-        raise RuntimeError(f"No observation.state column. Available: {list(df.columns)}")
-    if action_col is None:
-        raise RuntimeError(f"No action column. Available: {list(df.columns)}")
-
-    ep_col = next((c for c in df.columns if c == "episode_index"), None)
-    if ep_col is None:
-        raise RuntimeError(f"No episode_index column. Available: {list(df.columns)}")
-
-    state_all = np.stack(df[state_col].values).astype(np.float64)
-    action_all = np.stack(df[action_col].values).astype(np.float64)
-    episode_all = df[ep_col].values.astype(np.int64)
-
-    n_dim = state_all.shape[1]
-    act_dim = action_all.shape[1]
-    print(f"  State dim: {n_dim}  Action dim: {act_dim}  Chunk size: {chunk_size}")
-    print(f"  Action chunk dim: {chunk_size * act_dim}")
-
-    left_idx, right_idx = _find_joint_indices(features, state_col, n_dim)
-
-    print("  Building action chunks …")
-    action_chunks, valid = _build_action_chunks(action_all, episode_all, chunk_size)
-    valid_idx = np.where(valid)[0]
-    print(f"  Valid frames (with full action chunk): {len(valid_idx):,} / {n_total:,}")
-
-    if len(valid_idx) > max_frames:
-        chosen = np.sort(rng.choice(valid_idx, max_frames, replace=False))
-    else:
-        chosen = valid_idx
-    print(f"  Using {len(chosen):,} frames")
-
-    state_raw = state_all[chosen]
-    action_raw = action_chunks[chosen]
-    episode_ids = episode_all[chosen]
-
-    state_mean = state_raw.mean(axis=0)
-    state_std = state_raw.std(axis=0)
-    state_std[state_std < 1e-8] = 1.0
-    state_norm = (state_raw - state_mean) / state_std
-
-    action_mean = action_raw.mean(axis=0)
-    action_std = action_raw.std(axis=0)
-    action_std[action_std < 1e-8] = 1.0
-    action_norm = (action_raw - action_mean) / action_std
-
-    return {
-        "state_raw": state_raw,
-        "state_norm": state_norm,
-        "action_raw": action_raw,
-        "action_norm": action_norm,
-        "episode_ids": episode_ids,
-        "episode_all": episode_all,
-        "left_joint_idx": left_idx,
-        "right_joint_idx": right_idx,
-        "n_total": n_total,
-        "chosen_idx": chosen,
-        "df": df,
-    }
-
-
-# ── Video → frame extraction ──────────────────────────────
-
-
-def build_video_lookup(local: Path, camera_key: str) -> dict:
-    """
-    Build a mapping from episode_index → {video_path, fps, from_ts}.
-    """
-    info = json.loads((local / "meta" / "info.json").read_text())
-    fps = info["fps"]
-    video_template = info.get(
-        "video_path",
-        "videos/{video_key}/chunk-{chunk_index:03d}/file-{file_index:03d}.mp4",
-    )
-
-    ep_rows = []
-    for pq in sorted((local / "meta" / "episodes").glob("**/*.parquet")):
-        ep_rows.append(pd.read_parquet(pq))
-    ep_df = pd.concat(ep_rows, ignore_index=True)
-
-    chunk_col = f"videos/{camera_key}/chunk_index"
-    file_col = f"videos/{camera_key}/file_index"
-    ts_from = f"videos/{camera_key}/from_timestamp"
-    if chunk_col not in ep_df.columns:
-        chunk_col = f"{camera_key}/chunk_index"
-        file_col = f"{camera_key}/file_index"
-        ts_from = f"{camera_key}/from_timestamp"
-
-    lookup: dict[int, dict] = {}
-    for _, row in ep_df.iterrows():
-        ci = int(row[chunk_col])
-        fi = int(row[file_col])
-        video_rel = video_template.format(video_key=camera_key, chunk_index=ci, file_index=fi)
-        lookup[int(row["episode_index"])] = {
-            "video_path": local / video_rel,
-            "from_ts": float(row[ts_from]),
-            "fps": fps,
-        }
-    return lookup
-
-
-def _decode_video_frames(video_path: str) -> list[np.ndarray]:
-    """Decode all frames from a video file using PyAV. Returns list of RGB arrays."""
-    container = av.open(video_path)
-    stream = container.streams.video[0]
-    stream.thread_type = "AUTO"
-    decoded = []
-    for frame in container.decode(stream):
-        decoded.append(frame.to_ndarray(format="rgb24"))
-    container.close()
-    return decoded
-
-
-def extract_frames(
-    chosen_idx: np.ndarray,
-    episode_all: np.ndarray,
-    video_lookup: dict,
-) -> list[np.ndarray | None]:
-    """
-    Extract RGB frames for each chosen global index using PyAV.
-    Returns list of (H, W, 3) RGB arrays (or None on failure).
-    """
-    unique_eps = np.unique(episode_all)
-    ep_start: dict[int, int] = {}
-    for ep in unique_eps:
-        ep_start[int(ep)] = int(np.where(episode_all == ep)[0][0])
-
-    # Build jobs: (output_index, video_path, local_frame_number)
-    jobs: list[tuple[int, str, int]] = []
-    for out_i, global_i in enumerate(chosen_idx):
-        ep = int(episode_all[global_i])
-        info = video_lookup.get(ep)
-        if info is None:
-            continue
-        local_frame = global_i - ep_start[ep]
-        jobs.append((out_i, str(info["video_path"]), local_frame))
-
-    # Group by video file, decode each video once
-    from collections import defaultdict
-
-    video_jobs: dict[str, list[tuple[int, int]]] = defaultdict(list)
-    for out_i, vpath, local_frame in jobs:
-        video_jobs[vpath].append((out_i, local_frame))
-
-    frames: list[np.ndarray | None] = [None] * len(chosen_idx)
-    extracted = 0
-    n_videos = len(video_jobs)
-    for vi, (vpath, frame_requests) in enumerate(video_jobs.items()):
-        if not Path(vpath).exists():
-            continue
-        try:
-            decoded = _decode_video_frames(vpath)
-        except Exception as exc:
-            print(f"    Warning: failed to decode {Path(vpath).name}: {exc}")
-            continue
-        for out_i, local_frame in frame_requests:
-            if 0 <= local_frame < len(decoded):
-                frames[out_i] = decoded[local_frame]
-                extracted += 1
-        if (vi + 1) % 50 == 0 or (vi + 1) == n_videos:
-            print(f"    Decoded {vi + 1}/{n_videos} videos ({extracted:,} frames so far)")
-        del decoded
-
-    print(f"  Extracted {extracted:,} / {len(chosen_idx):,} frames from video")
-    return frames
-
-
-# ── SigLIP encoding ─────────────────────────────────────
-
-
-def encode_frames_siglip(
-    frames: list[np.ndarray | None],
-    model_name: str,
-    batch_size: int,
-    device: torch.device,
-) -> np.ndarray:
-    """
-    Encode RGB frames through SigLIP vision encoder.
-    Returns (N, embed_dim) float32 array. Frames that are None get a zero vector.
-    """
-    print(f"  Loading SigLIP model: {model_name} …")
-    processor = AutoImageProcessor.from_pretrained(model_name)
-    model = AutoModel.from_pretrained(model_name).to(device).eval()
-    embed_dim = model.config.vision_config.hidden_size
-
-    n = len(frames)
-    embeddings = np.zeros((n, embed_dim), dtype=np.float32)
-
-    valid_indices = [i for i, f in enumerate(frames) if f is not None]
-    print(f"  Encoding {len(valid_indices):,} valid frames in batches of {batch_size} …")
-
-    for batch_start in range(0, len(valid_indices), batch_size):
-        batch_idx = valid_indices[batch_start : batch_start + batch_size]
-        pil_images = [Image.fromarray(frames[i]) for i in batch_idx]
-
-        inputs = processor(images=pil_images, return_tensors="pt").to(device)
-        with torch.no_grad():
-            image_features = model.get_image_features(**inputs)
-        image_features = torch.nn.functional.normalize(image_features, dim=-1)
-        embeddings[batch_idx] = image_features.cpu().numpy()
-
-        done = min(batch_start + batch_size, len(valid_indices))
-        if done % (batch_size * 10) == 0 or done == len(valid_indices):
-            print(f"    {done:,} / {len(valid_indices):,} encoded")
-
-    del model, processor
-    torch.cuda.empty_cache()
-    return embeddings
-
-
-# ── KNN consistency ─────────────────────────────────────
-
-
-def compute_consistency(
-    features: np.ndarray,
-    action_norm: np.ndarray,
-    episode_ids: np.ndarray,
-    k: int,
-    label: str = "",
-) -> np.ndarray:
-    """
-    For each frame, find K nearest neighbors in feature space from other episodes.
-    Return per-frame action variance (mean across action dims).
-    """
-    n = len(features)
-    print(f"  Building KD-tree on {n:,} vectors ({label}) …")
-    tree = cKDTree(features)
-
-    k_query = min(k * 3, n - 1)
-    print(f"  Querying {k_query} neighbors per frame …")
-    _dists, indices = tree.query(features, k=k_query + 1)
-    indices = indices[:, 1:]
-
-    print(f"  Computing cross-episode action variance ({label}) …")
-    variance = np.zeros(n)
-    for i in range(n):
-        ep_i = episode_ids[i]
-        neighbors = indices[i]
-        cross_ep = neighbors[episode_ids[neighbors] != ep_i][:k]
-        if len(cross_ep) < 2:
-            variance[i] = 0.0
-            continue
-        neighbor_actions = action_norm[cross_ep]
-        variance[i] = np.mean(np.var(neighbor_actions, axis=0))
-
-    return variance
-
-
-# ── Visualization ───────────────────────────────────────
-
-
-def _style_ax(ax: plt.Axes) -> None:
-    ax.set_facecolor("#0d1117")
-    ax.tick_params(colors="#555", labelsize=8)
-    for spine in ax.spines.values():
-        spine.set_color("#333")
-
-
-def _plot_histogram(ax: plt.Axes, variance: np.ndarray, title: str, color: str) -> None:
-    _style_ax(ax)
-    median_var = np.median(variance)
-    mean_var = np.mean(variance)
-    nonzero = variance[variance > 0]
-    if len(nonzero) > 0:
-        bins = np.logspace(np.log10(nonzero.min().clip(1e-6)), np.log10(nonzero.max()), 60)
-        ax.hist(nonzero, bins=bins, color=color, alpha=0.8, edgecolor="#222")
-    ax.set_xscale("log")
-    ax.axvline(median_var, color="#ff6600", linewidth=2, label=f"median={median_var:.3f}")
-    ax.axvline(mean_var, color="#ff2222", linewidth=2, linestyle="--", label=f"mean={mean_var:.3f}")
-    ax.set_xlabel("Action variance (log scale)", color="#888", fontsize=10)
-    ax.set_ylabel("Frame count", color="#888", fontsize=10)
-    ax.set_title(title, color="white", fontsize=11, pad=10)
-    ax.legend(fontsize=8, facecolor="#1a1a2e", edgecolor="#333", labelcolor="white")
-
-
-def _plot_episode_curves(
-    ax: plt.Axes,
-    var_state: np.ndarray,
-    var_image: np.ndarray,
-    episode_ids: np.ndarray,
-    title: str,
-) -> None:
-    _style_ax(ax)
-    unique_eps = np.unique(episode_ids)
-
-    ep_means_s = np.array([var_state[episode_ids == ep].mean() for ep in unique_eps])
-    ep_means_i = np.array([var_image[episode_ids == ep].mean() for ep in unique_eps])
-
-    sorted_s = np.sort(ep_means_s)[::-1]
-    sorted_i = np.sort(ep_means_i)[::-1]
-    ep_x = np.arange(len(unique_eps))
-
-    ax.fill_between(ep_x, sorted_s, alpha=0.2, color="#4363d8")
-    ax.plot(ep_x, sorted_s, color="#4363d8", linewidth=1.2, label=f"State (med={np.median(ep_means_s):.3f})")
-    ax.fill_between(ep_x, sorted_i, alpha=0.2, color="#e6194b")
-    ax.plot(ep_x, sorted_i, color="#e6194b", linewidth=1.2, label=f"Image (med={np.median(ep_means_i):.3f})")
-
-    ax.set_xlabel("Episode rank (worst → best)", color="#888", fontsize=10)
-    ax.set_ylabel("Mean action variance", color="#888", fontsize=10)
-    ax.set_title(title, color="white", fontsize=11, pad=10)
-    ax.legend(fontsize=8, facecolor="#1a1a2e", edgecolor="#333", labelcolor="white")
-
-
-def _plot_heatmap(
-    ax: plt.Axes, fig: plt.Figure, tcp_xz: np.ndarray, variance: np.ndarray, title: str
-) -> None:
-    _style_ax(ax)
-    order = np.argsort(variance)
-    pts = tcp_xz[order]
-    var_sorted = variance[order]
-    vmin = np.percentile(variance[variance > 0], 5) if np.any(variance > 0) else 0
-    vmax = np.percentile(variance[variance > 0], 95) if np.any(variance > 0) else 1
-    sc = ax.scatter(
-        pts[:, 0],
-        pts[:, 1],
-        c=var_sorted,
-        cmap=CONSISTENCY_CMAP,
-        s=0.5,
-        alpha=0.6,
-        vmin=vmin,
-        vmax=vmax,
-        rasterized=True,
-    )
-    ax.set_xlabel("X (m)", color="#888", fontsize=10)
-    ax.set_ylabel("Z (m)", color="#888", fontsize=10)
-    ax.set_title(title, color="white", fontsize=11, pad=10)
-    ax.set_aspect("equal")
-    cbar = fig.colorbar(sc, ax=ax, shrink=0.8, pad=0.02)
-    cbar.set_label("Action variance", color="white", fontsize=9)
-    cbar.ax.tick_params(colors="#aaa", labelsize=7)
-
-
-def render(results: list[dict], out_path: Path) -> None:
-    """
-    4-row x N-column figure:
-      Row 0: State-based variance histogram
-      Row 1: Image-based variance histogram
-      Row 2: Per-episode curves (both overlaid)
-      Row 3: Spatial heatmap (image-based variance)
-    """
-    n_ds = len(results)
-    fig, axes = plt.subplots(4, n_ds, figsize=(9 * n_ds, 24), facecolor="#0d1117")
-    if n_ds == 1:
-        axes = axes[:, np.newaxis]
-
-    headline_parts = []
-    for col, r in enumerate(results):
-        label = r["label"]
-        var_s = r["var_state"]
-        var_i = r["var_image"]
-        tcp_xz = r["tcp_xz"]
-        episode_ids = r["episode_ids"]
-
-        med_s = np.median(var_s)
-        med_i = np.median(var_i)
-        headline_parts.append(f"{label}: state={med_s:.3f}, image={med_i:.3f}")
-
-        _plot_histogram(axes[0, col], var_s, f"{label}\nState-based variance (K={K_NEIGHBORS})", "#4363d8")
-        _plot_histogram(
-            axes[1, col], var_i, f"{label}\nImage-based variance (SigLIP, K={K_NEIGHBORS})", "#e6194b"
-        )
-        _plot_episode_curves(
-            axes[2, col],
-            var_s,
-            var_i,
-            episode_ids,
-            f"{label}\nPer-episode inconsistency ({len(np.unique(episode_ids)):,} episodes)",
-        )
-        _plot_heatmap(
-            axes[3, col],
-            fig,
-            tcp_xz,
-            var_i,
-            f"{label}\nImage-based variance by TCP position (XZ)",
-        )
-
-    fig.suptitle(
-        f"Action Consistency: State vs Image  (chunk={ACTION_CHUNK_SIZE}, K={K_NEIGHBORS})\n"
-        + "  |  ".join(headline_parts),
-        color="white",
-        fontsize=15,
-        y=0.99,
-    )
-    plt.tight_layout(rect=[0, 0, 1, 0.96])
-    plt.savefig(out_path, dpi=DPI, bbox_inches="tight", facecolor=fig.get_facecolor())
-    plt.close()
-    print(f"\n✓ Saved: {out_path}")
-
-
-# ── Main ────────────────────────────────────────────────
-
-
-def main() -> None:
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    print(f"Device: {device}")
-    rng = np.random.default_rng(SEED)
-    results = []
-
-    for ds in DATASETS:
-        repo_id, label = ds["repo_id"], ds["label"]
-        print(f"\n{'=' * 60}")
-        print(f"  {label}: {repo_id}")
-        print(f"{'=' * 60}")
-
-        local = download_data(repo_id, CAMERA_KEY)
-        data = load_state_action_data(local, MAX_FRAMES, ACTION_CHUNK_SIZE, rng)
-
-        # --- State-based KNN ---
-        var_state = compute_consistency(
-            data["state_norm"], data["action_norm"], data["episode_ids"], K_NEIGHBORS, "state"
-        )
-        print(
-            f"  State variance: median={np.median(var_state):.4f}  "
-            f"mean={np.mean(var_state):.4f}  p90={np.percentile(var_state, 90):.4f}"
-        )
-
-        # --- Image-based KNN ---
-        print("\n  Preparing image embeddings …")
-        video_lookup = build_video_lookup(local, CAMERA_KEY)
-        frames = extract_frames(data["chosen_idx"], data["episode_all"], video_lookup)
-        embeddings = encode_frames_siglip(frames, ENCODER_MODEL, ENCODE_BATCH_SIZE, device)
-        del frames  # free memory
-
-        var_image = compute_consistency(
-            embeddings, data["action_norm"], data["episode_ids"], K_NEIGHBORS, "image"
-        )
-        print(
-            f"  Image variance: median={np.median(var_image):.4f}  "
-            f"mean={np.mean(var_image):.4f}  p90={np.percentile(var_image, 90):.4f}"
-        )
-
-        # FK for spatial heatmap
-        print("  Computing FK for spatial heatmap …")
-        left_raw = data["state_raw"][:, data["left_joint_idx"]]
-        left_rad = _detect_and_convert(left_raw)
-        left_tcp = batch_fk(LEFT_CHAIN, left_rad)
-        tcp_xz = left_tcp[:, [0, 2]]
-
-        results.append(
-            {
-                "label": label,
-                "var_state": var_state,
-                "var_image": var_image,
-                "episode_ids": data["episode_ids"],
-                "tcp_xz": tcp_xz,
-                "n_total": data["n_total"],
-            }
-        )
-
-    out = OUTPUT_DIR / "action_consistency_comparison.jpg"
-    render(results, out)
-
-    # Save worst-episodes summary (image-based, since that's the stronger signal)
-    worst_summary = {}
-    for r in results:
-        unique_eps = np.unique(r["episode_ids"])
-        ep_means = {int(ep): float(r["var_image"][r["episode_ids"] == ep].mean()) for ep in unique_eps}
-        ranked = sorted(ep_means.items(), key=lambda x: x[1], reverse=True)[:50]
-        worst_summary[r["label"]] = [{"episode": ep, "mean_variance": v} for ep, v in ranked]
-    worst_path = OUTPUT_DIR / "action_consistency_worst_episodes.json"
-    worst_path.write_text(json.dumps(worst_summary, indent=2))
-    print(f"✓ Saved worst episodes: {worst_path}")
-
-
-if __name__ == "__main__":
-    main()
@@ -1,178 +0,0 @@
-"""
-Create a JPG grid of random frames sampled from a LeRobot video dataset.
-Downloads metadata + video chunks from HuggingFace, picks random frames,
-decodes them, and tiles into a single image.
-"""
-
-import json
-import random
-from pathlib import Path
-
-import cv2
-import numpy as np
-import pandas as pd
-from huggingface_hub import snapshot_download
-
-REPO_ID = "lerobot-data-collection/level2_final_quality3"
-CAMERA_KEY = "observation.images.base"
-GRID_COLS = 15
-GRID_ROWS = 10
-THUMB_WIDTH = 160
-OUTPUT_DIR = Path(__file__).resolve().parent / "outputs"
-OUTPUT_DIR.mkdir(exist_ok=True)
-SEED = 1
-
-
-def download_metadata(repo_id: str) -> Path:
-    """Download only metadata (no videos yet)."""
-    print(f"[1/3] Downloading metadata for {repo_id} …")
-    return Path(
-        snapshot_download(
-            repo_id=repo_id,
-            repo_type="dataset",
-            allow_patterns=["meta/**"],
-            ignore_patterns=["*.mp4"],
-        )
-    )
-
-
-def load_video_info(local: Path) -> tuple[str, list[dict], int]:
-    """Parse info.json and episode parquets. Returns (camera_key, episode_rows, fps)."""
-    info = json.loads((local / "meta" / "info.json").read_text())
-    fps = info["fps"]
-    features = info["features"]
-
-    video_keys = [k for k, v in features.items() if v.get("dtype") == "video"]
-    if not video_keys:
-        raise RuntimeError("No video keys found in dataset features")
-
-    if CAMERA_KEY is not None:
-        if CAMERA_KEY not in video_keys:
-            raise RuntimeError(f"CAMERA_KEY='{CAMERA_KEY}' not found. Available: {video_keys}")
-        cam = CAMERA_KEY
-    else:
-        cam = video_keys[0]
-    print(f"   camera='{cam}'  all_cams={video_keys}  fps={fps}")
-
-    ep_rows = []
-    for pq in sorted((local / "meta" / "episodes").glob("**/*.parquet")):
-        ep_rows.append(pd.read_parquet(pq))
-    ep_df = pd.concat(ep_rows, ignore_index=True)
-
-    video_template = info.get(
-        "video_path",
-        "videos/{video_key}/chunk-{chunk_index:03d}/file-{file_index:03d}.mp4",
-    )
-
-    chunk_col = f"videos/{cam}/chunk_index"
-    file_col = f"videos/{cam}/file_index"
-    ts_from = f"videos/{cam}/from_timestamp"
-    ts_to = f"videos/{cam}/to_timestamp"
-    if chunk_col not in ep_df.columns:
-        chunk_col = f"{cam}/chunk_index"
-        file_col = f"{cam}/file_index"
-        ts_from = f"{cam}/from_timestamp"
-        ts_to = f"{cam}/to_timestamp"
-
-    episodes = []
-    for _, row in ep_df.iterrows():
-        ci = int(row[chunk_col])
-        fi = int(row[file_col])
-        episodes.append(
-            {
-                "episode_index": int(row["episode_index"]),
-                "chunk_index": ci,
-                "file_index": fi,
-                "from_ts": float(row[ts_from]),
-                "to_ts": float(row[ts_to]),
-                "video_rel": video_template.format(video_key=cam, chunk_index=ci, file_index=fi),
-            }
-        )
-    return cam, episodes, fps
-
-
-def pick_random_frames(episodes: list[dict], fps: int, n: int, rng: random.Random) -> list[dict]:
-    """Pick n random (episode, timestamp) pairs, return sorted by video file for efficient access."""
-    picks = []
-    for _ in range(n):
-        ep = rng.choice(episodes)
-        duration = ep["to_ts"] - ep["from_ts"]
-        if duration <= 0:
-            continue
-        t = ep["from_ts"] + rng.random() * duration
-        picks.append({**ep, "seek_ts": t})
-    picks.sort(key=lambda p: (p["video_rel"], p["seek_ts"]))
-    return picks
-
-
-def download_video_files(repo_id: str, local: Path, picks: list[dict]) -> None:
-    """Download only the video files we need."""
-    needed = sorted({p["video_rel"] for p in picks})
-    print(f"[2/3] Downloading {len(needed)} video file(s) …")
-    snapshot_download(
-        repo_id=repo_id,
-        repo_type="dataset",
-        local_dir=str(local),
-        allow_patterns=needed,
-    )
-
-
-def extract_frame(video_path: Path, seek_ts: float) -> np.ndarray | None:
-    """Decode a single frame at the given timestamp."""
-    cap = cv2.VideoCapture(str(video_path))
-    cap.set(cv2.CAP_PROP_POS_MSEC, seek_ts * 1000.0)
-    ret, frame = cap.read()
-    cap.release()
-    return frame if ret else None
-
-
-def build_grid(frames: list[np.ndarray], cols: int, thumb_w: int) -> np.ndarray:
-    """Resize frames to uniform thumbnails and tile into a grid."""
-    if not frames:
-        raise RuntimeError("No frames decoded")
-
-    h0, w0 = frames[0].shape[:2]
-    thumb_h = int(thumb_w * h0 / w0)
-
-    thumbs = [cv2.resize(f, (thumb_w, thumb_h), interpolation=cv2.INTER_AREA) for f in frames]
-
-    rows = []
-    for i in range(0, len(thumbs), cols):
-        row_thumbs = thumbs[i : i + cols]
-        while len(row_thumbs) < cols:
-            row_thumbs.append(np.zeros_like(row_thumbs[0]))
-        rows.append(np.hstack(row_thumbs))
-    return np.vstack(rows)
-
-
-def main() -> None:
-    rng = random.Random(SEED)
-    n_frames = GRID_COLS * GRID_ROWS
-
-    local = download_metadata(REPO_ID)
-    cam, episodes, fps = load_video_info(local)
-    picks = pick_random_frames(episodes, fps, n_frames, rng)
-    download_video_files(REPO_ID, local, picks)
-
-    print(f"[3/3] Decoding {n_frames} frames …")
-    frames: list[np.ndarray] = []
-    for p in picks:
-        vp = local / p["video_rel"]
-        if not vp.exists():
-            print(f"   SKIP: {p['video_rel']} not found")
-            continue
-        frame = extract_frame(vp, p["seek_ts"])
-        if frame is not None:
-            frames.append(frame)
-
-    print(f"   Decoded {len(frames)}/{n_frames} frames")
-    grid = build_grid(frames, GRID_COLS, THUMB_WIDTH)
-
-    safe_name = REPO_ID.replace("/", "_")
-    out_path = OUTPUT_DIR / f"{safe_name}_grid_{GRID_COLS}x{GRID_ROWS}.jpg"
-    cv2.imwrite(str(out_path), grid, [cv2.IMWRITE_JPEG_QUALITY, 92])
-    print(f"\n✓ Saved: {out_path}  ({grid.shape[1]}×{grid.shape[0]})")
-
-
-if __name__ == "__main__":
-    main()
@@ -1,526 +0,0 @@
-"""
-Create MP4 videos with sarm_progress overlay for specified episodes.
-Downloads datasets from HuggingFace, extracts episode video + progress data,
-and draws the progress line directly on each frame (no panel, no axes).
-"""
-
-import json
-import subprocess
-from pathlib import Path
-
-import cv2
-import numpy as np
-import pandas as pd
-from huggingface_hub import snapshot_download
-
-DATASETS = [
-    {"repo_id": "lerobot-data-collection/level2_final_quality3", "episode": 250},
-]
-CAMERA_KEY = (
-    "observation.images.base"  # None = auto-select first camera, or set e.g. "observation.images.top"
-)
-OUTPUT_DIR = Path(__file__).resolve().parent / "outputs"
-OUTPUT_DIR.mkdir(exist_ok=True)
-
-# Progress line spans the full video height
-GRAPH_Y_TOP_FRAC = 0.01
-GRAPH_Y_BOT_FRAC = 0.99
-LINE_THICKNESS = 3
-SHADOW_THICKNESS = 6  # white edge thickness
-REF_ALPHA = 0.45  # opacity of the 1.0 reference line
-FILL_ALPHA = 0.55  # opacity of the grey fill under the line
-SCORE_FONT_SCALE = 0.8
-TASK_FONT_SCALE = 0.55
-
-
-def download_episode(repo_id: str, episode: int) -> Path:
-    """Download only the files needed for this episode."""
-    # We need: meta/, sarm_progress.parquet, and the relevant video/data chunks.
-    # We'll download meta + sarm first, then figure out chunks.
-    print(f"\n[1/5] Downloading metadata for {repo_id} …")
-    local = Path(
-        snapshot_download(
-            repo_id=repo_id,
-            repo_type="dataset",
-            allow_patterns=["meta/**", "sarm_progress.parquet"],
-            ignore_patterns=["*.mp4"],
-        )
-    )
-    return local
-
-
-def load_episode_meta(local: Path, episode: int) -> dict:
-    """Read info.json + episode-level parquet to get fps, video paths, timestamps."""
-    info = json.loads((local / "meta" / "info.json").read_text())
-    fps = info["fps"]
-    features = info["features"]
-
-    # Find video keys (keys whose dtype=="video")
-    video_keys = [k for k, v in features.items() if v.get("dtype") == "video"]
-    if not video_keys:
-        raise RuntimeError("No video keys found in dataset features")
-    if CAMERA_KEY is not None:
-        if CAMERA_KEY not in video_keys:
-            raise RuntimeError(f"CAMERA_KEY='{CAMERA_KEY}' not found. Available: {video_keys}")
-        first_cam = CAMERA_KEY
-    else:
-        first_cam = video_keys[0]
-    print(f"   fps={fps}  camera='{first_cam}'  all_cams={video_keys}")
-
-    # Load all episode-meta parquet files and find our episode
-    ep_rows = []
-    for pq in sorted((local / "meta" / "episodes").glob("**/*.parquet")):
-        df = pd.read_parquet(pq)
-        ep_rows.append(df)
-    ep_df = pd.concat(ep_rows, ignore_index=True)
-    row = ep_df[ep_df["episode_index"] == episode]
-    if row.empty:
-        raise RuntimeError(f"Episode {episode} not found in episode metadata")
-    row = row.iloc[0]
-
-    # Extract video chunk/file index for first camera
-    # Try both dot and slash variants of the key
-    chunk_col = f"videos/{first_cam}/chunk_index"
-    file_col = f"videos/{first_cam}/file_index"
-    ts_col = f"videos/{first_cam}/from_timestamp"
-    to_col = f"videos/{first_cam}/to_timestamp"
-
-    # Some datasets use different column naming
-    if chunk_col not in row.index:
-        # Try without the 'videos/' prefix
-        chunk_col = f"{first_cam}/chunk_index"
-        file_col = f"{first_cam}/file_index"
-        ts_col = f"{first_cam}/from_timestamp"
-        to_col = f"{first_cam}/to_timestamp"
-    if chunk_col not in row.index:
-        raise RuntimeError(
-            f"Cannot find video metadata columns for {first_cam}.\nAvailable: {list(row.index)}"
-        )
-
-    chunk_idx = int(row[chunk_col])
-    file_idx = int(row[file_col])
-    from_ts = float(row[ts_col])
-    to_ts = float(row[to_col])
-
-    video_template = info.get(
-        "video_path", "videos/{video_key}/chunk-{chunk_index:03d}/file-{file_index:03d}.mp4"
-    )
-    video_rel = video_template.format(
-        video_key=first_cam,
-        chunk_index=chunk_idx,
-        file_index=file_idx,
-    )
-
-    # Load task name for this episode
-    # tasks.parquet uses the task string as the row index; task_index column holds the int id
-    task_name = ""
-    try:
-        # Prefer the 'tasks' list directly on the episode row
-        if "tasks" in row.index and row["tasks"] is not None:
-            tasks_val = row["tasks"]
-            if isinstance(tasks_val, (list, tuple, np.ndarray)) and len(tasks_val) > 0:
-                task_name = str(tasks_val[0])
-            else:
-                task_name = str(tasks_val).strip("[]'")
-        else:
-            tasks_pq = local / "meta" / "tasks.parquet"
-            if tasks_pq.exists():
-                tasks_df = pd.read_parquet(tasks_pq)
-                # Row index is the task string; task_index column is the int
-                task_idx = int(row.get("task_index", 0)) if "task_index" in row.index else 0
-                match = tasks_df[tasks_df["task_index"] == task_idx]
-                if not match.empty:
-                    task_name = str(match.index[0])
-        print(f"   Task name: '{task_name}'")
-    except Exception as e:
-        print(f"   WARNING: could not load task name: {e}")
-
-    return {
-        "fps": fps,
-        "first_cam": first_cam,
-        "video_rel": video_rel,
-        "chunk_index": chunk_idx,
-        "file_index": file_idx,
-        "from_ts": from_ts,
-        "to_ts": to_ts,
-        "task_name": task_name,
-    }
-
-
-def download_video(repo_id: str, local: Path, video_rel: str) -> Path:
-    """Download the specific video file if not already present."""
-    video_path = local / video_rel
-    if video_path.exists():
-        print(f"   Video already cached: {video_path}")
-        return video_path
-    print(f"[2/5] Downloading video file {video_rel} …")
-    snapshot_download(
-        repo_id=repo_id,
-        repo_type="dataset",
-        local_dir=str(local),
-        allow_patterns=[video_rel],
-    )
-    if not video_path.exists():
-        raise RuntimeError(f"Video not found after download: {video_path}")
-    return video_path
-
-
-def load_progress(local: Path, episode: int) -> np.ndarray | None:
-    """Load sarm_progress values for this episode. Returns sorted array of (frame_index, progress)."""
-    pq_path = local / "sarm_progress.parquet"
-    if not pq_path.exists():
-        print("   WARNING: sarm_progress.parquet not found, trying data parquet …")
-        return None
-    df = pd.read_parquet(pq_path)
-    print(f"   sarm_progress.parquet columns: {list(df.columns)}")
-    ep_df = df[df["episode_index"] == episode].copy()
-    if ep_df.empty:
-        print(f"   WARNING: No sarm_progress rows for episode {episode}")
-        return None
-    ep_df = ep_df.sort_values("frame_index")
-
-    # Prefer dense, fall back to sparse
-    if "progress_dense" in ep_df.columns and ep_df["progress_dense"].notna().any():
-        prog_col = "progress_dense"
-    elif "progress_sparse" in ep_df.columns:
-        prog_col = "progress_sparse"
-    else:
-        # Last resort: any column with 'progress' in the name
-        prog_cols = [c for c in ep_df.columns if "progress" in c.lower()]
-        if not prog_cols:
-            return None
-        prog_col = prog_cols[0]
-
-    print(f"   Using progress column: '{prog_col}'")
-    return ep_df[["frame_index", prog_col]].rename(columns={prog_col: "progress"}).values
-
-
-def extract_episode_clip(video_path: Path, from_ts: float, to_ts: float, out_path: Path) -> Path:
-    """Use ffmpeg to cut the episode segment from the combined video file."""
-    duration = to_ts - from_ts
-    print(f"[3/5] Extracting clip [{from_ts:.3f}s → {to_ts:.3f}s] ({duration:.2f}s) …")
-    cmd = [
-        "ffmpeg",
-        "-y",
-        "-ss",
-        str(from_ts),
-        "-i",
-        str(video_path),
-        "-t",
-        str(duration),
-        "-c:v",
-        "libx264",
-        "-preset",
-        "fast",
-        "-crf",
-        "18",
-        "-an",
-        str(out_path),
-    ]
-    result = subprocess.run(cmd, capture_output=True, text=True)
-    if result.returncode != 0:
-        raise RuntimeError(f"ffmpeg clip extraction failed:\n{result.stderr}")
-    return out_path
-
-
-def precompute_pixels(
-    progress_data: np.ndarray,
-    n_frames: int,
-    frame_w: int,
-    frame_h: int,
-) -> np.ndarray:
-    """
-    Map each progress sample to pixel coordinates.
-    Returns array of shape (N, 2) with (x, y) in pixel space.
-    x spans full video width; y maps progress [0,1] to graph band.
-    """
-    frame_indices = progress_data[:, 0].astype(float)
-    progress_vals = np.clip(progress_data[:, 1].astype(float), 0.0, 1.0)
-
-    y_top = int(frame_h * GRAPH_Y_TOP_FRAC)
-    y_bot = int(frame_h * GRAPH_Y_BOT_FRAC)
-    graph_h = y_bot - y_top
-
-    xs = (frame_indices / (n_frames - 1) * (frame_w - 1)).astype(int)
-    # progress=1 → y_top, progress=0 → y_bot
-    ys = (y_bot - progress_vals * graph_h).astype(int)
-
-    return np.stack([xs, ys], axis=1)  # (N, 2)
-
-
-def progress_color(t: float) -> tuple[int, int, int]:
-    """Interpolate BGR color red→green based on normalised position t in [0,1]."""
-    r = int(255 * (1.0 - t))
-    g = int(255 * t)
-    return (0, g, r)  # BGR
-
-
-def prerender_fill(
-    pixels: np.ndarray,
-    frame_w: int,
-    frame_h: int,
-) -> np.ndarray:
-    """Pre-render the full grey fill polygon under the curve as a BGRA image."""
-    y_bot = int(frame_h * GRAPH_Y_BOT_FRAC)
-    fill_img = np.zeros((frame_h, frame_w, 4), dtype=np.uint8)
-    poly = np.concatenate(
-        [
-            pixels,
-            [[pixels[-1][0], y_bot], [pixels[0][0], y_bot]],
-        ],
-        axis=0,
-    ).astype(np.int32)
-    cv2.fillPoly(fill_img, [poly], color=(128, 128, 128, int(255 * FILL_ALPHA)))
-    return fill_img
-
-
-def alpha_composite(base: np.ndarray, overlay_bgra: np.ndarray, x_max: int) -> None:
-    """Blend overlay onto base in-place, but only for x < x_max."""
-    if x_max <= 0:
-        return
-    roi_b = base[:, :x_max]
-    roi_o = overlay_bgra[:, :x_max]
-    alpha = roi_o[:, :, 3:4].astype(np.float32) / 255.0
-    roi_b[:] = np.clip(
-        roi_o[:, :, :3].astype(np.float32) * alpha + roi_b.astype(np.float32) * (1.0 - alpha),
-        0,
-        255,
-    ).astype(np.uint8)
-
-
-def draw_text_outlined(
-    frame: np.ndarray,
-    text: str,
-    pos: tuple[int, int],
-    font_scale: float,
-    thickness: int = 1,
-) -> None:
-    """Draw text with a dark outline for readability on any background."""
-    font = cv2.FONT_HERSHEY_SIMPLEX
-    cv2.putText(frame, text, pos, font, font_scale, (0, 0, 0), thickness + 2, cv2.LINE_AA)
-    cv2.putText(frame, text, pos, font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)
-
-
-def composite_video(
-    clip_path: Path,
-    progress_data: np.ndarray,
-    out_path: Path,
-    fps: float,
-    frame_h: int,
-    frame_w: int,
-    task_name: str = "",
-) -> Path:
-    """Read clip frames, draw gradient progress line with fill + labels, export as GIF."""
-    n_total = int(cv2.VideoCapture(str(clip_path)).get(cv2.CAP_PROP_FRAME_COUNT))
-    pixels = precompute_pixels(progress_data, n_total, frame_w, frame_h)
-
-    y_ref = int(frame_h * GRAPH_Y_TOP_FRAC)
-
-    # Pre-render fill polygon (line is drawn per-frame with live color)
-    fill_img = prerender_fill(pixels, frame_w, frame_h)
-
-    # 1.0 reference line overlay (full width, drawn once)
-    ref_img = np.zeros((frame_h, frame_w, 4), dtype=np.uint8)
-    cv2.line(ref_img, (0, y_ref), (frame_w - 1, y_ref), (200, 200, 200, int(255 * REF_ALPHA)), 1, cv2.LINE_AA)
-
-    frame_indices = progress_data[:, 0].astype(int)
-    progress_vals = progress_data[:, 1].astype(float)
-
-    print(f"[4/4] Compositing {n_total} frames …")
-    cap = cv2.VideoCapture(str(clip_path))
-    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-    tmp_path = out_path.parent / (out_path.stem + "_tmp.mp4")
-    writer = cv2.VideoWriter(str(tmp_path), fourcc, fps, (frame_w, frame_h))
-
-    fi = 0
-    while True:
-        ret, frame = cap.read()
-        if not ret:
-            break
-
-        n_drawn = int(np.searchsorted(frame_indices, fi, side="right"))
-        x_cur = int(pixels[min(n_drawn, len(pixels)) - 1][0]) + 1 if n_drawn > 0 else 0
-
-        # 1. reference line (full width, always)
-        alpha_composite(frame, ref_img, frame_w)
-
-        # 2. grey fill under curve up to current x
-        alpha_composite(frame, fill_img, x_cur)
-
-        # 3. progress line — single color that transitions red→green over time
-        if n_drawn >= 2:
-            t_cur = (n_drawn - 1) / max(len(progress_vals) - 1, 1)
-            line_col = progress_color(t_cur)
-            pts = pixels[:n_drawn].reshape(-1, 1, 2).astype(np.int32)
-            cv2.polylines(
-                frame,
-                [pts],
-                isClosed=False,
-                color=(255, 255, 255),
-                thickness=SHADOW_THICKNESS,
-                lineType=cv2.LINE_AA,
-            )
-            cv2.polylines(
-                frame, [pts], isClosed=False, color=line_col, thickness=LINE_THICKNESS, lineType=cv2.LINE_AA
-            )
-
-        # 4. score — bottom right
-        if n_drawn > 0:
-            score = float(progress_vals[min(n_drawn, len(progress_vals)) - 1])
-            score_text = f"{score:.2f}"
-            (tw, th), _ = cv2.getTextSize(score_text, cv2.FONT_HERSHEY_SIMPLEX, SCORE_FONT_SCALE, 2)
-            sx = frame_w - tw - 12
-            sy = frame_h - 12
-            # coloured score matching current gradient position
-            t_cur = (n_drawn - 1) / max(len(progress_vals) - 1, 1)
-            score_col = progress_color(t_cur)
-            cv2.putText(
-                frame,
-                score_text,
-                (sx, sy),
-                cv2.FONT_HERSHEY_SIMPLEX,
-                SCORE_FONT_SCALE,
-                (0, 0, 0),
-                4,
-                cv2.LINE_AA,
-            )
-            cv2.putText(
-                frame,
-                score_text,
-                (sx, sy),
-                cv2.FONT_HERSHEY_SIMPLEX,
-                SCORE_FONT_SCALE,
-                score_col,
-                2,
-                cv2.LINE_AA,
-            )
-
-        # 5. task name — top centre
-        if task_name:
-            (tw, _), _ = cv2.getTextSize(task_name, cv2.FONT_HERSHEY_SIMPLEX, TASK_FONT_SCALE, 1)
-            tx = max((frame_w - tw) // 2, 4)
-            draw_text_outlined(frame, task_name, (tx, 22), TASK_FONT_SCALE)
-
-        writer.write(frame)
-        fi += 1
-        if fi % 100 == 0:
-            print(f"   Frame {fi}/{n_total} …", end="\r")
-
-    cap.release()
-    writer.release()
-    print()
-
-    # Convert to GIF: full resolution, 12fps, 128-color diff palette (<40MB)
-    gif_path = out_path.with_suffix(".gif")
-    palette = out_path.parent / "_palette.png"
-    r1 = subprocess.run(  # nosec B607
-        [
-            "ffmpeg",
-            "-y",
-            "-i",
-            str(tmp_path),
-            "-vf",
-            f"fps=10,scale={frame_w}:-1:flags=lanczos,palettegen=max_colors=128:stats_mode=diff",
-            "-update",
-            "1",
-            str(palette),
-        ],
-        capture_output=True,
-        text=True,
-    )
-    if r1.returncode != 0:
-        print(f"   WARNING: palettegen failed:\n{r1.stderr[-500:]}")
-    r2 = subprocess.run(  # nosec B607
-        [
-            "ffmpeg",
-            "-y",
-            "-i",
-            str(tmp_path),
-            "-i",
-            str(palette),
-            "-filter_complex",
-            f"fps=10,scale={frame_w}:-1:flags=lanczos[v];[v][1:v]paletteuse=dither=bayer:bayer_scale=3",
-            str(gif_path),
-        ],
-        capture_output=True,
-        text=True,
-    )
-    if r2.returncode != 0:
-        print(f"   WARNING: gif encode failed:\n{r2.stderr[-500:]}")
-    tmp_path.unlink(missing_ok=True)
-    palette.unlink(missing_ok=True)
-    return gif_path
-
-
-def process_dataset(repo_id: str, episode: int):
-    safe_name = repo_id.replace("/", "_")
-    print(f"\n{'=' * 60}")
-    print(f"Processing: {repo_id}  |  episode {episode}")
-    print(f"{'=' * 60}")
-
-    # 1. Download metadata
-    local = download_episode(repo_id, episode)
-    print(f"   Local cache: {local}")
-
-    # 2. Read episode metadata
-    ep_meta = load_episode_meta(local, episode)
-    print(f"   Episode meta: {ep_meta}")
-
-    # 3. Download video file
-    video_path = download_video(repo_id, local, ep_meta["video_rel"])
-
-    # 4. Extract clip
-    clip_path = OUTPUT_DIR / f"{safe_name}_ep{episode}_clip.mp4"
-    extract_episode_clip(video_path, ep_meta["from_ts"], ep_meta["to_ts"], clip_path)
-
-    # 5. Load progress data
-    progress_data = load_progress(local, episode)
-    if progress_data is None:
-        print("   ERROR: Could not load sarm_progress data. Skipping overlay.")
-        return
-
-    n_progress = len(progress_data)
-    print(f"   Progress frames: {n_progress}")
-
-    # 6. Get clip dimensions
-    cap = cv2.VideoCapture(str(clip_path))
-    frame_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    frame_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    actual_fps = cap.get(cv2.CAP_PROP_FPS) or ep_meta["fps"]
-    cap.release()
-    print(f"   Clip: {frame_w}×{frame_h}  {n_frames} frames @ {actual_fps:.1f}fps")
-
-    # 7. Composite (draw line directly on frames)
-    out_path = OUTPUT_DIR / f"{safe_name}_ep{episode}_progress.mp4"
-    final = composite_video(
-        clip_path,
-        progress_data,
-        out_path,
-        actual_fps,
-        frame_h,
-        frame_w,
-        task_name=ep_meta.get("task_name", ""),
-    )
-    clip_path.unlink(missing_ok=True)
-    print(f"\n✓ Done: {final}")
-    return final
-
-
-if __name__ == "__main__":
-    results = []
-    for cfg in DATASETS:
-        try:
-            out = process_dataset(cfg["repo_id"], cfg["episode"])
-            if out:
-                results.append(out)
-        except Exception as e:
-            print(f"\nERROR processing {cfg['repo_id']}: {e}")
-            import traceback
-
-            traceback.print_exc()
-
-    print("\n" + "=" * 60)
-    print("Output files:")
-    for r in results:
-        print(f"  {r}")
@@ -1,496 +0,0 @@
-"""
-Visualize end-effector workspace density and trajectory clusters for OpenArm datasets.
-Downloads joint position data (no videos) from HuggingFace, computes forward
-kinematics per episode, clusters trajectories with K-means, and renders
-2D projections comparing dataset coverage and multimodality.
-"""
-
-import json
-from pathlib import Path
-
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-from huggingface_hub import snapshot_download
-from sklearn.cluster import KMeans
-
-DATASETS = [
-    {"repo_id": "lerobot-data-collection/level2_final_quality3", "label": "HQ curated"},
-    {"repo_id": "lerobot-data-collection/level12_rac_2_2026-02-08_1", "label": "Full collection"},
-]
-OUTPUT_DIR = Path(__file__).resolve().parent / "outputs"
-OUTPUT_DIR.mkdir(exist_ok=True)
-
-N_CLUSTERS = 10
-WAYPOINTS = 50
-SEED = 42
-DPI = 180
-
-CLUSTER_COLORS = [
-    "#e6194b",
-    "#3cb44b",
-    "#4363d8",
-    "#f58231",
-    "#911eb4",
-    "#42d4f4",
-    "#f032e6",
-    "#bfef45",
-    "#fabed4",
-    "#dcbeff",
-    "#9a6324",
-    "#fffac8",
-    "#800000",
-    "#aaffc3",
-    "#808000",
-    "#ffd8b1",
-    "#000075",
-    "#a9a9a9",
-]
-
-# FK chains extracted from OpenArm bimanual URDF.
-# Each entry: (rpy, xyz, revolute_axis_or_None).
-LEFT_CHAIN = [
-    ((-np.pi / 2, 0, 0), (0, 0.031, 0.698), None),
-    ((0, 0, 0), (0, 0, 0.0625), (0, 0, 1)),
-    ((-np.pi / 2, 0, 0), (-0.0301, 0, 0.06), (-1, 0, 0)),
-    ((0, 0, 0), (0.0301, 0, 0.06625), (0, 0, 1)),
-    ((0, 0, 0), (0, 0.0315, 0.15375), (0, 1, 0)),
-    ((0, 0, 0), (0, -0.0315, 0.0955), (0, 0, 1)),
-    ((0, 0, 0), (0.0375, 0, 0.1205), (1, 0, 0)),
-    ((0, 0, 0), (-0.0375, 0, 0), (0, -1, 0)),
-    ((0, 0, 0), (0, 0, 0.1001), None),
-    ((0, 0, 0), (0, 0, 0.08), None),
-]
-RIGHT_CHAIN = [
-    ((np.pi / 2, 0, 0), (0, -0.031, 0.698), None),
-    ((0, 0, 0), (0, 0, 0.0625), (0, 0, 1)),
-    ((np.pi / 2, 0, 0), (-0.0301, 0, 0.06), (-1, 0, 0)),
-    ((0, 0, 0), (0.0301, 0, 0.06625), (0, 0, 1)),
-    ((0, 0, 0), (0, 0.0315, 0.15375), (0, 1, 0)),
-    ((0, 0, 0), (0, -0.0315, 0.0955), (0, 0, 1)),
-    ((0, 0, 0), (0.0375, 0, 0.1205), (1, 0, 0)),
-    ((0, 0, 0), (-0.0375, 0, 0), (0, 1, 0)),
-    ((0, 0, 0), (0, 0, 0.1001), None),
-    ((0, 0, 0), (0, 0, 0.08), None),
-]
-
-
-# ── FK math ─────────────────────────────────────────────
-
-
-def _rot_x(a: float) -> np.ndarray:
-    c, s = np.cos(a), np.sin(a)
-    return np.array([[1, 0, 0], [0, c, -s], [0, s, c]])
-
-
-def _rot_y(a: float) -> np.ndarray:
-    c, s = np.cos(a), np.sin(a)
-    return np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]])
-
-
-def _rot_z(a: float) -> np.ndarray:
-    c, s = np.cos(a), np.sin(a)
-    return np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
-
-
-def _tf(rpy: tuple, xyz: tuple) -> np.ndarray:
-    """Build a 4x4 homogeneous transform from URDF rpy + xyz."""
-    r, p, y = rpy
-    mat = np.eye(4)
-    mat[:3, :3] = _rot_z(y) @ _rot_y(p) @ _rot_x(r)
-    mat[:3, 3] = xyz
-    return mat
-
-
-def _batch_axis_rot(axis: tuple, angles: np.ndarray) -> np.ndarray:
-    """Batched Rodrigues rotation: (n,) angles around a fixed axis → (n, 4, 4)."""
-    n = len(angles)
-    ax = np.asarray(axis, dtype=np.float64)
-    ax = ax / np.linalg.norm(ax)
-    x, y, z = ax
-    c = np.cos(angles)
-    s = np.sin(angles)
-    t = 1 - c
-    rot = np.zeros((n, 4, 4))
-    rot[:, 0, 0] = t * x * x + c
-    rot[:, 0, 1] = t * x * y - s * z
-    rot[:, 0, 2] = t * x * z + s * y
-    rot[:, 1, 0] = t * x * y + s * z
-    rot[:, 1, 1] = t * y * y + c
-    rot[:, 1, 2] = t * y * z - s * x
-    rot[:, 2, 0] = t * x * z - s * y
-    rot[:, 2, 1] = t * y * z + s * x
-    rot[:, 2, 2] = t * z * z + c
-    rot[:, 3, 3] = 1.0
-    return rot
-
-
-def batch_fk(chain: list, joint_angles: np.ndarray) -> np.ndarray:
-    """Vectorized FK: (n, 7) radians → (n, 3) TCP positions in world frame."""
-    n = joint_angles.shape[0]
-    tf_batch = np.tile(np.eye(4), (n, 1, 1))
-    qi = 0
-    for rpy, xyz, axis in chain:
-        tf_batch = tf_batch @ _tf(rpy, xyz)
-        if axis is not None:
-            rot = _batch_axis_rot(axis, joint_angles[:, qi])
-            tf_batch = np.einsum("nij,njk->nik", tf_batch, rot)
-            qi += 1
-    return tf_batch[:, :3, 3]
-
-
-# ── Data loading ────────────────────────────────────────
-
-
-def _flatten_names(obj: object) -> list[str]:
-    """Recursively flatten a names structure (list, dict, or nested) into a flat string list."""
-    if isinstance(obj, dict):
-        out: list[str] = []
-        for v in obj.values():
-            out.extend(_flatten_names(v))
-        return out
-    if isinstance(obj, (list, tuple)):
-        out = []
-        for item in obj:
-            if isinstance(item, (list, tuple, dict)):
-                out.extend(_flatten_names(item))
-            else:
-                out.append(str(item))
-        return out
-    return [str(obj)]
-
-
-def _detect_and_convert(vals: np.ndarray) -> np.ndarray:
-    """Auto-detect servo ticks / degrees / radians and convert to radians."""
-    mx = np.max(np.abs(vals))
-    if mx > 360:
-        print(f"    Unit detection: servo ticks (max={mx:.0f})")
-        return (vals - 2048) / 2048 * np.pi
-    if mx > 6.3:
-        print(f"    Unit detection: degrees (max={mx:.1f})")
-        return np.deg2rad(vals)
-    print(f"    Unit detection: radians (max={mx:.3f})")
-    return vals.astype(np.float64)
-
-
-def _find_joint_indices(features: dict, state_col: str, n_dim: int) -> tuple[list[int], list[int]]:
-    """Try to find left/right joint indices from info.json feature names."""
-    feat = features.get("observation.state", features.get(state_col, {}))
-    names = _flatten_names(feat.get("names", []))
-
-    left_idx: list[int] = []
-    right_idx: list[int] = []
-    if names and len(names) == n_dim:
-        names_l = [n.lower() for n in names]
-        print(f"  Feature names: {names[:4]}…{names[-4:]}")
-        for j in range(1, 8):
-            for i, nm in enumerate(names_l):
-                if f"left_joint_{j}" in nm and i not in left_idx:
-                    left_idx.append(i)
-                    break
-            for i, nm in enumerate(names_l):
-                if f"right_joint_{j}" in nm and i not in right_idx:
-                    right_idx.append(i)
-                    break
-
-    if len(left_idx) == 7 and len(right_idx) == 7:
-        print(f"  Matched by name: left={left_idx} right={right_idx}")
-        return left_idx, right_idx
-    if n_dim >= 16:
-        print("  Falling back to positional: [0:7]=left, [8:15]=right")
-        return list(range(7)), list(range(8, 15))
-    if n_dim >= 14:
-        print("  Falling back to positional: [0:7]=left, [7:14]=right")
-        return list(range(7)), list(range(7, 14))
-    raise RuntimeError(f"State dim {n_dim} too small for bimanual 7-DOF robot")
-
-
-def download_data(repo_id: str) -> Path:
-    print(f"  Downloading {repo_id} (parquet only) …")
-    return Path(
-        snapshot_download(
-            repo_id=repo_id,
-            repo_type="dataset",
-            allow_patterns=["meta/**", "data/**"],
-            ignore_patterns=["*.mp4", "videos/**"],
-        )
-    )
-
-
-def resample_trajectory(traj: np.ndarray, n_waypoints: int) -> np.ndarray:
-    """Resample a (F, 3) trajectory to exactly n_waypoints via linear interpolation."""
-    f = traj.shape[0]
-    if f == n_waypoints:
-        return traj
-    old_t = np.linspace(0, 1, f)
-    new_t = np.linspace(0, 1, n_waypoints)
-    return np.column_stack([np.interp(new_t, old_t, traj[:, d]) for d in range(3)])
-
-
-def load_episode_trajectories(local: Path) -> list[dict]:
-    """
-    Load per-episode joint data, compute FK, return list of trajectory dicts.
-    Each dict: {"left_tcp": (F,3), "right_tcp": (F,3), "episode_index": int}.
-    Uses all episodes in the dataset for a fair comparison.
-    """
-    info = json.loads((local / "meta" / "info.json").read_text())
-    features = info.get("features", {})
-
-    dfs = [pd.read_parquet(pq) for pq in sorted((local / "data").glob("**/*.parquet"))]
-    df = pd.concat(dfs, ignore_index=True)
-    print(f"  Total frames: {len(df):,}")
-
-    state_col = next((c for c in df.columns if "observation.state" in c), None)
-    if state_col is None:
-        raise RuntimeError(f"No observation.state column. Available: {list(df.columns)}")
-
-    first = df[state_col].iloc[0]
-    if not hasattr(first, "__len__"):
-        raise RuntimeError(f"observation.state is scalar ({type(first)}), expected array")
-
-    state = np.stack(df[state_col].values).astype(np.float64)
-    n_dim = state.shape[1]
-    print(f"  State dim: {n_dim}  max|val|: {np.max(np.abs(state)):.1f}")
-
-    left_idx, right_idx = _find_joint_indices(features, state_col, n_dim)
-
-    ep_col = next((c for c in df.columns if c == "episode_index"), None)
-    if ep_col is None:
-        raise RuntimeError(f"No episode_index column. Available: {list(df.columns)}")
-
-    episode_ids = df[ep_col].values
-    unique_eps = np.unique(episode_ids)
-    print(f"  Episodes: {len(unique_eps):,}")
-
-    left_raw = state[:, left_idx]
-    right_raw = state[:, right_idx]
-    left_all = _detect_and_convert(left_raw)
-    right_all = _detect_and_convert(right_raw)
-
-    print("  Computing FK per episode …")
-    trajectories = []
-    for ep_id in unique_eps:
-        mask = episode_ids == ep_id
-        left_tcp = batch_fk(LEFT_CHAIN, left_all[mask])
-        right_tcp = batch_fk(RIGHT_CHAIN, right_all[mask])
-        if len(left_tcp) < 3:
-            continue
-        trajectories.append({"left_tcp": left_tcp, "right_tcp": right_tcp, "episode_index": int(ep_id)})
-
-    print(f"  Valid trajectories: {len(trajectories):,}")
-    return trajectories
-
-
-# ── Clustering ──────────────────────────────────────────
-
-
-def cluster_trajectories(
-    trajectories: list[dict], n_clusters: int, n_waypoints: int
-) -> tuple[np.ndarray, np.ndarray]:
-    """
-    K-means on resampled trajectory features.
-    Combines left+right TCP into a single feature vector per episode.
-    Returns (labels, centroid_trajs (k, waypoints, 6), spread_per_cluster (k,) in metres).
-    Spread = mean per-waypoint Euclidean distance from each trajectory to its centroid.
-    """
-    feat_vecs = []
-    for t in trajectories:
-        left_rs = resample_trajectory(t["left_tcp"], n_waypoints)
-        right_rs = resample_trajectory(t["right_tcp"], n_waypoints)
-        feat_vecs.append(np.concatenate([left_rs.ravel(), right_rs.ravel()]))
-    feat_matrix = np.array(feat_vecs)
-
-    k = min(n_clusters, len(feat_vecs))
-    km = KMeans(n_clusters=k, n_init=10, random_state=SEED)
-    labels = km.fit_predict(feat_matrix)
-
-    centroids_flat = km.cluster_centers_
-    centroid_trajs = np.zeros((k, n_waypoints, 6))
-    for ci in range(k):
-        left_flat = centroids_flat[ci, : n_waypoints * 3]
-        right_flat = centroids_flat[ci, n_waypoints * 3 :]
-        centroid_trajs[ci, :, :3] = left_flat.reshape(n_waypoints, 3)
-        centroid_trajs[ci, :, 3:] = right_flat.reshape(n_waypoints, 3)
-
-    # Mean per-waypoint distance to centroid (in metres) for each cluster
-    spread = np.zeros(k)
-    for ci in range(k):
-        members = np.where(labels == ci)[0]
-        if len(members) == 0:
-            continue
-        centroid_left = centroid_trajs[ci, :, :3]
-        centroid_right = centroid_trajs[ci, :, 3:]
-        dists = []
-        for mi in members:
-            t = trajectories[mi]
-            left_rs = resample_trajectory(t["left_tcp"], n_waypoints)
-            right_rs = resample_trajectory(t["right_tcp"], n_waypoints)
-            d_left = np.linalg.norm(left_rs - centroid_left, axis=1).mean()
-            d_right = np.linalg.norm(right_rs - centroid_right, axis=1).mean()
-            dists.append((d_left + d_right) / 2)
-        spread[ci] = np.mean(dists)
-
-    return labels, centroid_trajs, spread
-
-
-# ── Visualization ───────────────────────────────────────
-
-PROJ_VIEWS = [
-    ("XZ (side)", 0, 2, "X (m)", "Z (m)"),
-    ("XY (top)", 0, 1, "X (m)", "Y (m)"),
-    ("YZ (front)", 1, 2, "Y (m)", "Z (m)"),
-]
-
-
-def render(results: list[dict], out_path: Path) -> None:
-    """
-    2-row × 3-col grid per dataset (3 projections × 2 datasets).
-    Trajectory lines colored by cluster, centroid trajectories drawn thick.
-    """
-    n_ds = len(results)
-    n_proj = len(PROJ_VIEWS)
-    fig, axes = plt.subplots(n_ds, n_proj, figsize=(7 * n_proj, 7 * n_ds), facecolor="#0d1117")
-    if n_ds == 1:
-        axes = axes[np.newaxis, :]
-
-    for row, r in enumerate(results):
-        trajectories = r["trajectories"]
-        labels = r["labels"]
-        centroids = r["centroids"]
-        k = centroids.shape[0]
-
-        cluster_sizes = np.bincount(labels, minlength=k)
-        size_order = np.argsort(-cluster_sizes)
-        pcts = cluster_sizes / len(labels) * 100
-        spread = r["spread"]
-
-        for col, (view_name, dim_a, dim_b, xlabel, ylabel) in enumerate(PROJ_VIEWS):
-            ax = axes[row, col]
-            ax.set_facecolor("#0d1117")
-
-            for ti, traj in enumerate(trajectories):
-                color = CLUSTER_COLORS[labels[ti] % len(CLUSTER_COLORS)]
-                for tcp_key in ("left_tcp", "right_tcp"):
-                    pts = traj[tcp_key]
-                    ax.plot(pts[:, dim_a], pts[:, dim_b], color=color, alpha=0.12, linewidth=0.4)
-
-            for ci in range(k):
-                color = CLUSTER_COLORS[ci % len(CLUSTER_COLORS)]
-                left_c = centroids[ci, :, :3]
-                right_c = centroids[ci, :, 3:]
-                lw = 1.5 + 2.0 * cluster_sizes[ci] / cluster_sizes.max()
-                for c_pts in (left_c, right_c):
-                    ax.plot(
-                        c_pts[:, dim_a],
-                        c_pts[:, dim_b],
-                        color=color,
-                        linewidth=lw,
-                        alpha=0.95,
-                        zorder=10,
-                    )
-                    ax.plot(
-                        c_pts[0, dim_a],
-                        c_pts[0, dim_b],
-                        "o",
-                        color=color,
-                        markersize=4,
-                        zorder=11,
-                    )
-                    ax.plot(
-                        c_pts[-1, dim_a],
-                        c_pts[-1, dim_b],
-                        "s",
-                        color=color,
-                        markersize=4,
-                        zorder=11,
-                    )
-
-            ax.set_xlabel(xlabel, color="#888", fontsize=9)
-            ax.set_ylabel(ylabel, color="#888", fontsize=9)
-            ax.tick_params(colors="#555", labelsize=7)
-            for spine in ax.spines.values():
-                spine.set_color("#333")
-            ax.set_aspect("equal")
-
-            mean_spread_cm = np.average(spread, weights=cluster_sizes) * 100
-            if col == 0:
-                ax.set_title(
-                    f"{r['label']}  ({r['n_episodes']:,} episodes, {k} clusters, "
-                    f"avg spread {mean_spread_cm:.1f}cm)",
-                    color="white",
-                    fontsize=11,
-                    pad=10,
-                )
-            else:
-                ax.set_title(view_name, color="#aaa", fontsize=10, pad=8)
-
-        # Cluster size + spread legend on the rightmost panel
-        legend_ax = axes[row, -1]
-        for ci in size_order:
-            color = CLUSTER_COLORS[ci % len(CLUSTER_COLORS)]
-            spread_cm = spread[ci] * 100
-            label = f"C{ci}: {cluster_sizes[ci]} eps ({pcts[ci]:.0f}%) ±{spread_cm:.1f}cm"
-            legend_ax.plot([], [], color=color, linewidth=3, label=label)
-        legend_ax.legend(
-            loc="upper right",
-            fontsize=7,
-            frameon=True,
-            facecolor="#1a1a2e",
-            edgecolor="#333",
-            labelcolor="white",
-            handlelength=1.5,
-        )
-
-    fig.suptitle(
-        "End-Effector Trajectory Clusters (FK · K-means)",
-        color="white",
-        fontsize=16,
-        y=0.98,
-    )
-    plt.tight_layout(rect=[0, 0, 1, 0.95])
-    plt.savefig(out_path, dpi=DPI, bbox_inches="tight", facecolor=fig.get_facecolor())
-    plt.close()
-    print(f"\n✓ Saved: {out_path}")
-
-
-# ── Main ────────────────────────────────────────────────
-
-
-def main() -> None:
-    results = []
-
-    for ds in DATASETS:
-        repo_id, label = ds["repo_id"], ds["label"]
-        print(f"\n{'=' * 60}")
-        print(f"  {label}: {repo_id}")
-        print(f"{'=' * 60}")
-
-        local = download_data(repo_id)
-        trajectories = load_episode_trajectories(local)
-        labels, centroids, spread = cluster_trajectories(trajectories, N_CLUSTERS, WAYPOINTS)
-
-        cluster_sizes = np.bincount(labels, minlength=centroids.shape[0])
-        print(f"  Cluster sizes: {sorted(cluster_sizes, reverse=True)}")
-        for ci in np.argsort(-cluster_sizes):
-            print(
-                f"    C{ci}: {cluster_sizes[ci]} eps ({cluster_sizes[ci] / len(labels) * 100:.0f}%) "
-                f"spread ±{spread[ci] * 100:.1f}cm"
-            )
-
-        results.append(
-            {
-                "label": label,
-                "trajectories": trajectories,
-                "labels": labels,
-                "centroids": centroids,
-                "spread": spread,
-                "n_episodes": len(trajectories),
-            }
-        )
-
-    out = OUTPUT_DIR / "workspace_trajectory_clusters.jpg"
-    render(results, out)
-
-
-if __name__ == "__main__":
-    main()
@@ -14,8 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from lerobot.datasets.feature_utils import hw_to_dataset_features
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.utils import hw_to_dataset_features
 from lerobot.policies.act.modeling_act import ACTPolicy
 from lerobot.policies.factory import make_pre_post_processors
 from lerobot.processor import make_default_processors
@@ -78,24 +78,40 @@ def main():
    listener, events = init_keyboard_listener()
    init_rerun(session_name="lekiwi_evaluate")

-    try:
-        if not robot.is_connected:
-            raise ValueError("Robot is not connected!")
+    if not robot.is_connected:
+        raise ValueError("Robot is not connected!")

-        print("Starting evaluate loop...")
-        recorded_episodes = 0
-        while recorded_episodes < NUM_EPISODES and not events["stop_recording"]:
-            log_say(f"Running inference, recording eval episode {recorded_episodes} of {NUM_EPISODES}")
+    print("Starting evaluate loop...")
+    recorded_episodes = 0
+    while recorded_episodes < NUM_EPISODES and not events["stop_recording"]:
+        log_say(f"Running inference, recording eval episode {recorded_episodes} of {NUM_EPISODES}")

-            # Main record loop
+        # Main record loop
+        record_loop(
+            robot=robot,
+            events=events,
+            fps=FPS,
+            policy=policy,
+            preprocessor=preprocessor,  # Pass the pre and post policy processors
+            postprocessor=postprocessor,
+            dataset=dataset,
+            control_time_s=EPISODE_TIME_SEC,
+            single_task=TASK_DESCRIPTION,
+            display_data=True,
+            teleop_action_processor=teleop_action_processor,
+            robot_action_processor=robot_action_processor,
+            robot_observation_processor=robot_observation_processor,
+        )
+
+        # Reset the environment if not stopping or re-recording
+        if not events["stop_recording"] and (
+            (recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"]
+        ):
+            log_say("Reset the environment")
            record_loop(
                robot=robot,
                events=events,
                fps=FPS,
-                policy=policy,
-                preprocessor=preprocessor,  # Pass the pre and post policy processors
-                postprocessor=postprocessor,
-                dataset=dataset,
                control_time_s=EPISODE_TIME_SEC,
                single_task=TASK_DESCRIPTION,
                display_data=True,
@@ -104,42 +120,24 @@ def main():
                robot_observation_processor=robot_observation_processor,
            )

-            # Reset the environment if not stopping or re-recording
-            if not events["stop_recording"] and (
-                (recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"]
-            ):
-                log_say("Reset the environment")
-                record_loop(
-                    robot=robot,
-                    events=events,
-                    fps=FPS,
-                    control_time_s=EPISODE_TIME_SEC,
-                    single_task=TASK_DESCRIPTION,
-                    display_data=True,
-                    teleop_action_processor=teleop_action_processor,
-                    robot_action_processor=robot_action_processor,
-                    robot_observation_processor=robot_observation_processor,
-                )
+        if events["rerecord_episode"]:
+            log_say("Re-record episode")
+            events["rerecord_episode"] = False
+            events["exit_early"] = False
+            dataset.clear_episode_buffer()
+            continue

-            if events["rerecord_episode"]:
-                log_say("Re-record episode")
-                events["rerecord_episode"] = False
-                events["exit_early"] = False
-                dataset.clear_episode_buffer()
-                continue
+        # Save episode
+        dataset.save_episode()
+        recorded_episodes += 1

-            # Save episode
-            dataset.save_episode()
-            recorded_episodes += 1
+    # Clean up
+    log_say("Stop recording")
+    robot.disconnect()
+    listener.stop()

-    finally:
-        # Clean up
-        log_say("Stop recording")
-        robot.disconnect()
-        listener.stop()
-
-        dataset.finalize()
-        dataset.push_to_hub()
+    dataset.finalize()
+    dataset.push_to_hub()


 if __name__ == "__main__":
@@ -14,8 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from lerobot.datasets.feature_utils import hw_to_dataset_features
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.utils import hw_to_dataset_features
 from lerobot.processor import make_default_processors
 from lerobot.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
 from lerobot.robots.lekiwi.lekiwi_client import LeKiwiClient
@@ -74,23 +74,40 @@ def main():
    listener, events = init_keyboard_listener()
    init_rerun(session_name="lekiwi_record")

-    try:
-        if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected:
-            raise ValueError("Robot or teleop is not connected!")
+    if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected:
+        raise ValueError("Robot or teleop is not connected!")

-        print("Starting record loop...")
-        recorded_episodes = 0
-        while recorded_episodes < NUM_EPISODES and not events["stop_recording"]:
-            log_say(f"Recording episode {recorded_episodes}")
+    print("Starting record loop...")
+    recorded_episodes = 0
+    while recorded_episodes < NUM_EPISODES and not events["stop_recording"]:
+        log_say(f"Recording episode {recorded_episodes}")

-            # Main record loop
+        # Main record loop
+        record_loop(
+            robot=robot,
+            events=events,
+            fps=FPS,
+            dataset=dataset,
+            teleop=[leader_arm, keyboard],
+            control_time_s=EPISODE_TIME_SEC,
+            single_task=TASK_DESCRIPTION,
+            display_data=True,
+            teleop_action_processor=teleop_action_processor,
+            robot_action_processor=robot_action_processor,
+            robot_observation_processor=robot_observation_processor,
+        )
+
+        # Reset the environment if not stopping or re-recording
+        if not events["stop_recording"] and (
+            (recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"]
+        ):
+            log_say("Reset the environment")
            record_loop(
                robot=robot,
                events=events,
                fps=FPS,
-                dataset=dataset,
                teleop=[leader_arm, keyboard],
-                control_time_s=EPISODE_TIME_SEC,
+                control_time_s=RESET_TIME_SEC,
                single_task=TASK_DESCRIPTION,
                display_data=True,
                teleop_action_processor=teleop_action_processor,
@@ -98,44 +115,26 @@ def main():
                robot_observation_processor=robot_observation_processor,
            )

-            # Reset the environment if not stopping or re-recording
-            if not events["stop_recording"] and (
-                (recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"]
-            ):
-                log_say("Reset the environment")
-                record_loop(
-                    robot=robot,
-                    events=events,
-                    fps=FPS,
-                    teleop=[leader_arm, keyboard],
-                    control_time_s=RESET_TIME_SEC,
-                    single_task=TASK_DESCRIPTION,
-                    display_data=True,
-                    teleop_action_processor=teleop_action_processor,
-                    robot_action_processor=robot_action_processor,
-                    robot_observation_processor=robot_observation_processor,
-                )
+        if events["rerecord_episode"]:
+            log_say("Re-record episode")
+            events["rerecord_episode"] = False
+            events["exit_early"] = False
+            dataset.clear_episode_buffer()
+            continue

-            if events["rerecord_episode"]:
-                log_say("Re-record episode")
-                events["rerecord_episode"] = False
-                events["exit_early"] = False
-                dataset.clear_episode_buffer()
-                continue
+        # Save episode
+        dataset.save_episode()
+        recorded_episodes += 1

-            # Save episode
-            dataset.save_episode()
-            recorded_episodes += 1
-    finally:
-        # Clean up
-        log_say("Stop recording")
-        robot.disconnect()
-        leader_arm.disconnect()
-        keyboard.disconnect()
-        listener.stop()
+    # Clean up
+    log_say("Stop recording")
+    robot.disconnect()
+    leader_arm.disconnect()
+    keyboard.disconnect()
+    listener.stop()

-        dataset.finalize()
-        dataset.push_to_hub()
+    dataset.finalize()
+    dataset.push_to_hub()


 if __name__ == "__main__":
@@ -42,27 +42,25 @@ def main():
    # Connect to the robot
    robot.connect()

-    try:
-        if not robot.is_connected:
-            raise ValueError("Robot is not connected!")
+    if not robot.is_connected:
+        raise ValueError("Robot is not connected!")

-        print("Starting replay loop...")
-        log_say(f"Replaying episode {EPISODE_IDX}")
-        for idx in range(len(episode_frames)):
-            t0 = time.perf_counter()
+    print("Starting replay loop...")
+    log_say(f"Replaying episode {EPISODE_IDX}")
+    for idx in range(len(episode_frames)):
+        t0 = time.perf_counter()

-            # Get recorded action from dataset
-            action = {
-                name: float(actions[idx][ACTION][i])
-                for i, name in enumerate(dataset.features[ACTION]["names"])
-            }
+        # Get recorded action from dataset
+        action = {
+            name: float(actions[idx][ACTION][i]) for i, name in enumerate(dataset.features[ACTION]["names"])
+        }

-            # Send action to robot
-            _ = robot.send_action(action)
+        # Send action to robot
+        _ = robot.send_action(action)

-            precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
-    finally:
-        robot.disconnect()
+        precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
+
+    robot.disconnect()


 if __name__ == "__main__":
@@ -16,13 +16,15 @@

 from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
 from lerobot.configs.types import FeatureType, PolicyFeature
-from lerobot.datasets.feature_utils import combine_feature_dicts
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
+from lerobot.datasets.utils import combine_feature_dicts
 from lerobot.model.kinematics import RobotKinematics
 from lerobot.policies.act.modeling_act import ACTPolicy
 from lerobot.policies.factory import make_pre_post_processors
 from lerobot.processor import (
+    RobotAction,
+    RobotObservation,
    RobotProcessorPipeline,
    make_default_teleop_action_processor,
 )
@@ -38,7 +40,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
    InverseKinematicsEEToJoints,
 )
 from lerobot.scripts.lerobot_record import record_loop
-from lerobot.types import RobotAction, RobotObservation
 from lerobot.utils.control_utils import init_keyboard_listener
 from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import init_rerun
@@ -141,24 +142,38 @@ def main():
    listener, events = init_keyboard_listener()
    init_rerun(session_name="phone_so100_evaluate")

-    try:
-        if not robot.is_connected:
-            raise ValueError("Robot is not connected!")
+    if not robot.is_connected:
+        raise ValueError("Robot is not connected!")

-        print("Starting evaluate loop...")
-        episode_idx = 0
-        for episode_idx in range(NUM_EPISODES):
-            log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}")
+    print("Starting evaluate loop...")
+    episode_idx = 0
+    for episode_idx in range(NUM_EPISODES):
+        log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}")

-            # Main record loop
+        # Main record loop
+        record_loop(
+            robot=robot,
+            events=events,
+            fps=FPS,
+            policy=policy,
+            preprocessor=preprocessor,  # Pass the pre and post policy processors
+            postprocessor=postprocessor,
+            dataset=dataset,
+            control_time_s=EPISODE_TIME_SEC,
+            single_task=TASK_DESCRIPTION,
+            display_data=True,
+            teleop_action_processor=make_default_teleop_action_processor(),
+            robot_action_processor=robot_ee_to_joints_processor,
+            robot_observation_processor=robot_joints_to_ee_pose_processor,
+        )
+
+        # Reset the environment if not stopping or re-recording
+        if not events["stop_recording"] and ((episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]):
+            log_say("Reset the environment")
            record_loop(
                robot=robot,
                events=events,
                fps=FPS,
-                policy=policy,
-                preprocessor=preprocessor,  # Pass the pre and post policy processors
-                postprocessor=postprocessor,
-                dataset=dataset,
                control_time_s=EPISODE_TIME_SEC,
                single_task=TASK_DESCRIPTION,
                display_data=True,
@@ -167,41 +182,24 @@ def main():
                robot_observation_processor=robot_joints_to_ee_pose_processor,
            )

-            # Reset the environment if not stopping or re-recording
-            if not events["stop_recording"] and (
-                (episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]
-            ):
-                log_say("Reset the environment")
-                record_loop(
-                    robot=robot,
-                    events=events,
-                    fps=FPS,
-                    control_time_s=EPISODE_TIME_SEC,
-                    single_task=TASK_DESCRIPTION,
-                    display_data=True,
-                    teleop_action_processor=make_default_teleop_action_processor(),
-                    robot_action_processor=robot_ee_to_joints_processor,
-                    robot_observation_processor=robot_joints_to_ee_pose_processor,
-                )
+        if events["rerecord_episode"]:
+            log_say("Re-record episode")
+            events["rerecord_episode"] = False
+            events["exit_early"] = False
+            dataset.clear_episode_buffer()
+            continue

-            if events["rerecord_episode"]:
-                log_say("Re-record episode")
-                events["rerecord_episode"] = False
-                events["exit_early"] = False
-                dataset.clear_episode_buffer()
-                continue
+        # Save episode
+        dataset.save_episode()
+        episode_idx += 1

-            # Save episode
-            dataset.save_episode()
-            episode_idx += 1
-    finally:
-        # Clean up
-        log_say("Stop recording")
-        robot.disconnect()
-        listener.stop()
+    # Clean up
+    log_say("Stop recording")
+    robot.disconnect()
+    listener.stop()

-        dataset.finalize()
-        dataset.push_to_hub()
+    dataset.finalize()
+    dataset.push_to_hub()


 if __name__ == "__main__":
@@ -15,11 +15,11 @@
 # limitations under the License.

 from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
-from lerobot.datasets.feature_utils import combine_feature_dicts
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
+from lerobot.datasets.utils import combine_feature_dicts
 from lerobot.model.kinematics import RobotKinematics
-from lerobot.processor import RobotProcessorPipeline
+from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
 from lerobot.processor.converters import (
    observation_to_transition,
    robot_action_observation_to_transition,
@@ -38,7 +38,6 @@ from lerobot.scripts.lerobot_record import record_loop
 from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
 from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
 from lerobot.teleoperators.phone.teleop_phone import Phone
-from lerobot.types import RobotAction, RobotObservation
 from lerobot.utils.control_utils import init_keyboard_listener
 from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import init_rerun
@@ -150,23 +149,38 @@ def main():
    listener, events = init_keyboard_listener()
    init_rerun(session_name="phone_so100_record")

-    try:
-        if not robot.is_connected or not phone.is_connected:
-            raise ValueError("Robot or teleop is not connected!")
+    if not robot.is_connected or not phone.is_connected:
+        raise ValueError("Robot or teleop is not connected!")

-        print("Starting record loop. Move your phone to teleoperate the robot...")
-        episode_idx = 0
-        while episode_idx < NUM_EPISODES and not events["stop_recording"]:
-            log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}")
+    print("Starting record loop. Move your phone to teleoperate the robot...")
+    episode_idx = 0
+    while episode_idx < NUM_EPISODES and not events["stop_recording"]:
+        log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}")

-            # Main record loop
+        # Main record loop
+        record_loop(
+            robot=robot,
+            events=events,
+            fps=FPS,
+            teleop=phone,
+            dataset=dataset,
+            control_time_s=EPISODE_TIME_SEC,
+            single_task=TASK_DESCRIPTION,
+            display_data=True,
+            teleop_action_processor=phone_to_robot_ee_pose_processor,
+            robot_action_processor=robot_ee_to_joints_processor,
+            robot_observation_processor=robot_joints_to_ee_pose,
+        )
+
+        # Reset the environment if not stopping or re-recording
+        if not events["stop_recording"] and (episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]):
+            log_say("Reset the environment")
            record_loop(
                robot=robot,
                events=events,
                fps=FPS,
                teleop=phone,
-                dataset=dataset,
-                control_time_s=EPISODE_TIME_SEC,
+                control_time_s=RESET_TIME_SEC,
                single_task=TASK_DESCRIPTION,
                display_data=True,
                teleop_action_processor=phone_to_robot_ee_pose_processor,
@@ -174,43 +188,25 @@ def main():
                robot_observation_processor=robot_joints_to_ee_pose,
            )

-            # Reset the environment if not stopping or re-recording
-            if not events["stop_recording"] and (
-                episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]
-            ):
-                log_say("Reset the environment")
-                record_loop(
-                    robot=robot,
-                    events=events,
-                    fps=FPS,
-                    teleop=phone,
-                    control_time_s=RESET_TIME_SEC,
-                    single_task=TASK_DESCRIPTION,
-                    display_data=True,
-                    teleop_action_processor=phone_to_robot_ee_pose_processor,
-                    robot_action_processor=robot_ee_to_joints_processor,
-                    robot_observation_processor=robot_joints_to_ee_pose,
-                )
+        if events["rerecord_episode"]:
+            log_say("Re-recording episode")
+            events["rerecord_episode"] = False
+            events["exit_early"] = False
+            dataset.clear_episode_buffer()
+            continue

-            if events["rerecord_episode"]:
-                log_say("Re-recording episode")
-                events["rerecord_episode"] = False
-                events["exit_early"] = False
-                dataset.clear_episode_buffer()
-                continue
+        # Save episode
+        dataset.save_episode()
+        episode_idx += 1

-            # Save episode
-            dataset.save_episode()
-            episode_idx += 1
-    finally:
-        # Clean up
-        log_say("Stop recording")
-        robot.disconnect()
-        phone.disconnect()
-        listener.stop()
+    # Clean up
+    log_say("Stop recording")
+    robot.disconnect()
+    phone.disconnect()
+    listener.stop()

-        dataset.finalize()
-        dataset.push_to_hub()
+    dataset.finalize()
+    dataset.push_to_hub()


 if __name__ == "__main__":
@@ -18,7 +18,7 @@ import time

 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.model.kinematics import RobotKinematics
-from lerobot.processor import RobotProcessorPipeline
+from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
 from lerobot.processor.converters import (
    robot_action_observation_to_transition,
    transition_to_robot_action,
@@ -27,7 +27,6 @@ from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
 from lerobot.robots.so_follower.robot_kinematic_processor import (
    InverseKinematicsEEToJoints,
 )
-from lerobot.types import RobotAction, RobotObservation
 from lerobot.utils.constants import ACTION
 from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import log_say
@@ -74,34 +73,32 @@ def main():
    # Connect to the robot
    robot.connect()

-    try:
-        if not robot.is_connected:
-            raise ValueError("Robot is not connected!")
+    if not robot.is_connected:
+        raise ValueError("Robot is not connected!")

-        print("Starting replay loop...")
-        log_say(f"Replaying episode {EPISODE_IDX}")
-        for idx in range(len(episode_frames)):
-            t0 = time.perf_counter()
+    print("Starting replay loop...")
+    log_say(f"Replaying episode {EPISODE_IDX}")
+    for idx in range(len(episode_frames)):
+        t0 = time.perf_counter()

-            # Get recorded action from dataset
-            ee_action = {
-                name: float(actions[idx][ACTION][i])
-                for i, name in enumerate(dataset.features[ACTION]["names"])
-            }
+        # Get recorded action from dataset
+        ee_action = {
+            name: float(actions[idx][ACTION][i]) for i, name in enumerate(dataset.features[ACTION]["names"])
+        }

-            # Get robot observation
-            robot_obs = robot.get_observation()
+        # Get robot observation
+        robot_obs = robot.get_observation()

-            # Dataset EE -> robot joints
-            joint_action = robot_ee_to_joints_processor((ee_action, robot_obs))
+        # Dataset EE -> robot joints
+        joint_action = robot_ee_to_joints_processor((ee_action, robot_obs))

-            # Send action to robot
-            _ = robot.send_action(joint_action)
+        # Send action to robot
+        _ = robot.send_action(joint_action)

-            precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
-    finally:
-        # Clean up
-        robot.disconnect()
+        precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
+
+    # Clean up
+    robot.disconnect()


 if __name__ == "__main__":
@@ -16,7 +16,7 @@
 import time

 from lerobot.model.kinematics import RobotKinematics
-from lerobot.processor import RobotProcessorPipeline
+from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
 from lerobot.processor.converters import (
    robot_action_observation_to_transition,
    transition_to_robot_action,
@@ -31,7 +31,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
 from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
 from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
 from lerobot.teleoperators.phone.teleop_phone import Phone
-from lerobot.types import RobotAction, RobotObservation
 from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.visualization_utils import init_rerun, log_rerun_data

@@ -22,8 +22,7 @@ from pathlib import Path
 import numpy as np
 import tensorflow_datasets as tfds

-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
 from lerobot.utils.utils import get_elapsed_time_in_days_hours_minutes_seconds

 DROID_SHARDS = 2048
@@ -26,7 +26,7 @@ from huggingface_hub import HfApi
 from huggingface_hub.constants import REPOCARD_NAME
 from port_droid import DROID_SHARDS

-from lerobot.datasets.dataset_metadata import CODEBASE_VERSION, LeRobotDatasetMetadata
+from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDatasetMetadata
 from lerobot.datasets.utils import create_lerobot_dataset_card
 from lerobot.utils.utils import init_logging

@@ -155,7 +155,7 @@ class UploadDataset(PipelineStep):
        from datasets.utils.tqdm import disable_progress_bars
        from huggingface_hub import CommitOperationAdd, preupload_lfs_files

-        from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+        from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
        from lerobot.utils.utils import init_logging

        init_logging()
@@ -27,8 +27,8 @@ measuring consistency and ground truth alignment.
 Usage:
    # Basic usage with smolvla policy
    uv run python examples/rtc/eval_dataset.py \
-        --policy.path=<USER>/smolvla_check_rtc_last3 \
-        --dataset.repo_id=<USER>/check_rtc \
+        --policy.path=helper2424/smolvla_check_rtc_last3 \
+        --dataset.repo_id=helper2424/check_rtc \
        --rtc.execution_horizon=8 \
        --device=mps \
        --rtc.max_guidance_weight=10.0 \
@@ -58,16 +58,16 @@ Usage:
        --device=cuda

    uv run python examples/rtc/eval_dataset.py \
-        --policy.path=<USER>/reuben_pi0 \
-        --dataset.repo_id=<USER>/so101_cube_in_cup \
+        --policy.path=lipsop/reuben_pi0 \
+        --dataset.repo_id=ReubenLim/so101_cube_in_cup \
        --rtc.execution_horizon=8 \
        --device=cuda

    # With torch.compile for faster inference (PyTorch 2.0+)
    # Note: CUDA graphs disabled by default due to in-place ops in denoising loop
    uv run python examples/rtc/eval_dataset.py \
-        --policy.path=<USER>/smolvla_check_rtc_last3 \
-        --dataset.repo_id=<USER>/check_rtc \
+        --policy.path=helper2424/smolvla_check_rtc_last3 \
+        --dataset.repo_id=helper2424/check_rtc \
        --rtc.execution_horizon=8 \
        --device=mps \
        --use_torch_compile=true \
@@ -75,8 +75,8 @@ Usage:

    # With torch.compile on CUDA (CUDA graphs disabled by default)
    uv run python examples/rtc/eval_dataset.py \
-        --policy.path=<USER>/smolvla_check_rtc_last3 \
-        --dataset.repo_id=<USER>/check_rtc \
+        --policy.path=helper2424/smolvla_check_rtc_last3 \
+        --dataset.repo_id=helper2424/check_rtc \
        --rtc.execution_horizon=8 \
        --device=cuda \
        --use_torch_compile=true \
@@ -84,8 +84,8 @@ Usage:

    # Enable CUDA graphs (advanced - may cause tensor aliasing errors)
    uv run python examples/rtc/eval_dataset.py \
-        --policy.path=<USER>/smolvla_check_rtc_last3 \
-        --dataset.repo_id=<USER>/check_rtc \
+        --policy.path=helper2424/smolvla_check_rtc_last3 \
+        --dataset.repo_id=helper2424/check_rtc \
        --use_torch_compile=true \
        --torch_compile_backend=inductor \
        --torch_compile_mode=max-autotune \
@@ -113,9 +113,8 @@ from lerobot.configs import parser
 from lerobot.configs.default import DatasetConfig
 from lerobot.configs.policies import PreTrainedConfig
 from lerobot.configs.types import RTCAttentionSchedule
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
 from lerobot.datasets.factory import resolve_delta_timestamps
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
 from lerobot.policies.factory import get_policy_class, make_pre_post_processors
 from lerobot.policies.rtc.configuration_rtc import RTCConfig
 from lerobot.policies.rtc.debug_visualizer import RTCDebugVisualizer
@@ -28,7 +28,7 @@ For simulation environments, see eval_with_simulation.py
 Usage:
    # Run RTC with Real robot with RTC
    uv run examples/rtc/eval_with_real_robot.py \
-        --policy.path=<USER>/smolvla_check_rtc_last3 \
+        --policy.path=helper2424/smolvla_check_rtc_last3 \
        --policy.device=mps \
        --rtc.enabled=true \
        --rtc.execution_horizon=20 \
@@ -41,7 +41,7 @@ Usage:

    # Run RTC with Real robot without RTC
    uv run examples/rtc/eval_with_real_robot.py \
-        --policy.path=<USER>/smolvla_check_rtc_last3 \
+        --policy.path=helper2424/smolvla_check_rtc_last3 \
        --policy.device=mps \
        --rtc.enabled=false \
        --robot.type=so100_follower \
@@ -53,7 +53,7 @@ Usage:

    # Run RTC with Real robot with pi0.5 policy
    uv run examples/rtc/eval_with_real_robot.py \
-        --policy.path=<USER>/pi05_check_rtc \
+        --policy.path=helper2424/pi05_check_rtc \
        --policy.device=mps \
        --rtc.enabled=true \
        --rtc.execution_horizon=20 \
@@ -78,11 +78,10 @@ from torch import Tensor

 from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig  # noqa: F401
 from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig  # noqa: F401
-from lerobot.cameras.zmq.configuration_zmq import ZMQCameraConfig  # noqa: F401
 from lerobot.configs import parser
 from lerobot.configs.policies import PreTrainedConfig
 from lerobot.configs.types import RTCAttentionSchedule
-from lerobot.datasets.feature_utils import build_dataset_frame, hw_to_dataset_features
+from lerobot.datasets.utils import build_dataset_frame, hw_to_dataset_features
 from lerobot.policies.factory import get_policy_class, make_pre_post_processors
 from lerobot.policies.rtc.action_queue import ActionQueue
 from lerobot.policies.rtc.configuration_rtc import RTCConfig
@@ -98,7 +97,6 @@ from lerobot.robots import (  # noqa: F401
    bi_so_follower,
    koch_follower,
    so_follower,
-    unitree_g1,
 )
 from lerobot.robots.utils import make_robot_from_config
 from lerobot.utils.constants import OBS_IMAGES
@@ -16,13 +16,15 @@

 from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
 from lerobot.configs.types import FeatureType, PolicyFeature
-from lerobot.datasets.feature_utils import combine_feature_dicts
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
+from lerobot.datasets.utils import combine_feature_dicts
 from lerobot.model.kinematics import RobotKinematics
 from lerobot.policies.act.modeling_act import ACTPolicy
 from lerobot.policies.factory import make_pre_post_processors
 from lerobot.processor import (
+    RobotAction,
+    RobotObservation,
    RobotProcessorPipeline,
    make_default_teleop_action_processor,
 )
@@ -38,7 +40,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
    InverseKinematicsEEToJoints,
 )
 from lerobot.scripts.lerobot_record import record_loop
-from lerobot.types import RobotAction, RobotObservation
 from lerobot.utils.control_utils import init_keyboard_listener
 from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import init_rerun
@@ -141,24 +142,38 @@ def main():
    listener, events = init_keyboard_listener()
    init_rerun(session_name="so100_so100_evaluate")

-    try:
-        if not robot.is_connected:
-            raise ValueError("Robot is not connected!")
+    if not robot.is_connected:
+        raise ValueError("Robot is not connected!")

-        print("Starting evaluate loop...")
-        episode_idx = 0
-        for episode_idx in range(NUM_EPISODES):
-            log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}")
+    print("Starting evaluate loop...")
+    episode_idx = 0
+    for episode_idx in range(NUM_EPISODES):
+        log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}")

-            # Main record loop
+        # Main record loop
+        record_loop(
+            robot=robot,
+            events=events,
+            fps=FPS,
+            policy=policy,
+            preprocessor=preprocessor,  # Pass the pre and post policy processors
+            postprocessor=postprocessor,
+            dataset=dataset,
+            control_time_s=EPISODE_TIME_SEC,
+            single_task=TASK_DESCRIPTION,
+            display_data=True,
+            teleop_action_processor=make_default_teleop_action_processor(),
+            robot_action_processor=robot_ee_to_joints_processor,
+            robot_observation_processor=robot_joints_to_ee_pose_processor,
+        )
+
+        # Reset the environment if not stopping or re-recording
+        if not events["stop_recording"] and ((episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]):
+            log_say("Reset the environment")
            record_loop(
                robot=robot,
                events=events,
                fps=FPS,
-                policy=policy,
-                preprocessor=preprocessor,  # Pass the pre and post policy processors
-                postprocessor=postprocessor,
-                dataset=dataset,
                control_time_s=EPISODE_TIME_SEC,
                single_task=TASK_DESCRIPTION,
                display_data=True,
@@ -167,41 +182,24 @@ def main():
                robot_observation_processor=robot_joints_to_ee_pose_processor,
            )

-            # Reset the environment if not stopping or re-recording
-            if not events["stop_recording"] and (
-                (episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]
-            ):
-                log_say("Reset the environment")
-                record_loop(
-                    robot=robot,
-                    events=events,
-                    fps=FPS,
-                    control_time_s=EPISODE_TIME_SEC,
-                    single_task=TASK_DESCRIPTION,
-                    display_data=True,
-                    teleop_action_processor=make_default_teleop_action_processor(),
-                    robot_action_processor=robot_ee_to_joints_processor,
-                    robot_observation_processor=robot_joints_to_ee_pose_processor,
-                )
+        if events["rerecord_episode"]:
+            log_say("Re-record episode")
+            events["rerecord_episode"] = False
+            events["exit_early"] = False
+            dataset.clear_episode_buffer()
+            continue

-            if events["rerecord_episode"]:
-                log_say("Re-record episode")
-                events["rerecord_episode"] = False
-                events["exit_early"] = False
-                dataset.clear_episode_buffer()
-                continue
+        # Save episode
+        dataset.save_episode()
+        episode_idx += 1

-            # Save episode
-            dataset.save_episode()
-            episode_idx += 1
-    finally:
-        # Clean up
-        log_say("Stop recording")
-        robot.disconnect()
-        listener.stop()
+    # Clean up
+    log_say("Stop recording")
+    robot.disconnect()
+    listener.stop()

-        dataset.finalize()
-        dataset.push_to_hub()
+    dataset.finalize()
+    dataset.push_to_hub()


 if __name__ == "__main__":
@@ -16,11 +16,11 @@


 from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
-from lerobot.datasets.feature_utils import combine_feature_dicts
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
+from lerobot.datasets.utils import combine_feature_dicts
 from lerobot.model.kinematics import RobotKinematics
-from lerobot.processor import RobotProcessorPipeline
+from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
 from lerobot.processor.converters import (
    observation_to_transition,
    robot_action_observation_to_transition,
@@ -35,7 +35,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
 )
 from lerobot.scripts.lerobot_record import record_loop
 from lerobot.teleoperators.so_leader import SO100Leader, SO100LeaderConfig
-from lerobot.types import RobotAction, RobotObservation
 from lerobot.utils.control_utils import init_keyboard_listener
 from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import init_rerun
@@ -147,23 +146,38 @@ def main():
    listener, events = init_keyboard_listener()
    init_rerun(session_name="recording_phone")

-    try:
-        if not leader.is_connected or not follower.is_connected:
-            raise ValueError("Robot or teleop is not connected!")
+    if not leader.is_connected or not follower.is_connected:
+        raise ValueError("Robot or teleop is not connected!")

-        print("Starting record loop...")
-        episode_idx = 0
-        while episode_idx < NUM_EPISODES and not events["stop_recording"]:
-            log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}")
+    print("Starting record loop...")
+    episode_idx = 0
+    while episode_idx < NUM_EPISODES and not events["stop_recording"]:
+        log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}")

-            # Main record loop
+        # Main record loop
+        record_loop(
+            robot=follower,
+            events=events,
+            fps=FPS,
+            teleop=leader,
+            dataset=dataset,
+            control_time_s=EPISODE_TIME_SEC,
+            single_task=TASK_DESCRIPTION,
+            display_data=True,
+            teleop_action_processor=leader_joints_to_ee,
+            robot_action_processor=ee_to_follower_joints,
+            robot_observation_processor=follower_joints_to_ee,
+        )
+
+        # Reset the environment if not stopping or re-recording
+        if not events["stop_recording"] and (episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]):
+            log_say("Reset the environment")
            record_loop(
                robot=follower,
                events=events,
                fps=FPS,
                teleop=leader,
-                dataset=dataset,
-                control_time_s=EPISODE_TIME_SEC,
+                control_time_s=RESET_TIME_SEC,
                single_task=TASK_DESCRIPTION,
                display_data=True,
                teleop_action_processor=leader_joints_to_ee,
@@ -171,44 +185,25 @@ def main():
                robot_observation_processor=follower_joints_to_ee,
            )

-            # Reset the environment if not stopping or re-recording
-            if not events["stop_recording"] and (
-                episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]
-            ):
-                log_say("Reset the environment")
-                record_loop(
-                    robot=follower,
-                    events=events,
-                    fps=FPS,
-                    teleop=leader,
-                    control_time_s=RESET_TIME_SEC,
-                    single_task=TASK_DESCRIPTION,
-                    display_data=True,
-                    teleop_action_processor=leader_joints_to_ee,
-                    robot_action_processor=ee_to_follower_joints,
-                    robot_observation_processor=follower_joints_to_ee,
-                )
+        if events["rerecord_episode"]:
+            log_say("Re-recording episode")
+            events["rerecord_episode"] = False
+            events["exit_early"] = False
+            dataset.clear_episode_buffer()
+            continue

-            if events["rerecord_episode"]:
-                log_say("Re-recording episode")
-                events["rerecord_episode"] = False
-                events["exit_early"] = False
-                dataset.clear_episode_buffer()
-                continue
+        # Save episode
+        dataset.save_episode()
+        episode_idx += 1

-            # Save episode
-            dataset.save_episode()
-            episode_idx += 1
+    # Clean up
+    log_say("Stop recording")
+    leader.disconnect()
+    follower.disconnect()
+    listener.stop()

-    finally:
-        # Clean up
-        log_say("Stop recording")
-        leader.disconnect()
-        follower.disconnect()
-        listener.stop()
-
-        dataset.finalize()
-        dataset.push_to_hub()
+    dataset.finalize()
+    dataset.push_to_hub()


 if __name__ == "__main__":
@@ -19,7 +19,7 @@ import time

 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.model.kinematics import RobotKinematics
-from lerobot.processor import RobotProcessorPipeline
+from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
 from lerobot.processor.converters import (
    robot_action_observation_to_transition,
    transition_to_robot_action,
@@ -28,7 +28,6 @@ from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
 from lerobot.robots.so_follower.robot_kinematic_processor import (
    InverseKinematicsEEToJoints,
 )
-from lerobot.types import RobotAction, RobotObservation
 from lerobot.utils.constants import ACTION
 from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import log_say
@@ -75,35 +74,32 @@ def main():
    # Connect to the robot
    robot.connect()

-    try:
-        if not robot.is_connected:
-            raise ValueError("Robot is not connected!")
+    if not robot.is_connected:
+        raise ValueError("Robot is not connected!")

-        print("Starting replay loop...")
-        log_say(f"Replaying episode {EPISODE_IDX}")
-        for idx in range(len(episode_frames)):
-            t0 = time.perf_counter()
+    print("Starting replay loop...")
+    log_say(f"Replaying episode {EPISODE_IDX}")
+    for idx in range(len(episode_frames)):
+        t0 = time.perf_counter()

-            # Get recorded action from dataset
-            ee_action = {
-                name: float(actions[idx][ACTION][i])
-                for i, name in enumerate(dataset.features[ACTION]["names"])
-            }
+        # Get recorded action from dataset
+        ee_action = {
+            name: float(actions[idx][ACTION][i]) for i, name in enumerate(dataset.features[ACTION]["names"])
+        }

-            # Get robot observation
-            robot_obs = robot.get_observation()
+        # Get robot observation
+        robot_obs = robot.get_observation()

-            # Dataset EE -> robot joints
-            joint_action = robot_ee_to_joints_processor((ee_action, robot_obs))
+        # Dataset EE -> robot joints
+        joint_action = robot_ee_to_joints_processor((ee_action, robot_obs))

-            # Send action to robot
-            _ = robot.send_action(joint_action)
+        # Send action to robot
+        _ = robot.send_action(joint_action)

-            precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
+        precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))

-    finally:
-        # Clean up
-        robot.disconnect()
+    # Clean up
+    robot.disconnect()


 if __name__ == "__main__":
@@ -17,7 +17,7 @@
 import time

 from lerobot.model.kinematics import RobotKinematics
-from lerobot.processor import RobotProcessorPipeline
+from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
 from lerobot.processor.converters import (
    robot_action_observation_to_transition,
    robot_action_to_transition,
@@ -30,7 +30,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
    InverseKinematicsEEToJoints,
 )
 from lerobot.teleoperators.so_leader import SO100Leader, SO100LeaderConfig
-from lerobot.types import RobotAction, RobotObservation
 from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.visualization_utils import init_rerun, log_rerun_data

@@ -19,9 +19,8 @@ from pathlib import Path
 import torch

 from lerobot.configs.types import FeatureType
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
-from lerobot.datasets.feature_utils import dataset_to_policy_features
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
+from lerobot.datasets.utils import dataset_to_policy_features
 from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
 from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
 from lerobot.policies.factory import make_pre_post_processors
@@ -20,9 +20,9 @@ from pathlib import Path
 import torch

 from lerobot.configs.types import FeatureType
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
-from lerobot.datasets.feature_utils import dataset_to_policy_features
+from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
 from lerobot.datasets.streaming_dataset import StreamingLeRobotDataset
+from lerobot.datasets.utils import dataset_to_policy_features
 from lerobot.policies.act.configuration_act import ACTConfig
 from lerobot.policies.act.modeling_act import ACTPolicy
 from lerobot.policies.factory import make_pre_post_processors
@@ -5,9 +5,8 @@ from pathlib import Path
 import torch

 from lerobot.configs.types import FeatureType
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
-from lerobot.datasets.feature_utils import dataset_to_policy_features
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
+from lerobot.datasets.utils import dataset_to_policy_features
 from lerobot.policies.act.configuration_act import ACTConfig
 from lerobot.policies.act.modeling_act import ACTPolicy
 from lerobot.policies.factory import make_pre_post_processors
@@ -1,7 +1,7 @@
 import torch

 from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
 from lerobot.policies.act.modeling_act import ACTPolicy
 from lerobot.policies.factory import make_pre_post_processors
 from lerobot.policies.utils import build_inference_frame, make_robot_action
@@ -30,7 +30,6 @@ def main():
        robot=robot_cfg,
        server_address=server_address,
        policy_device="mps",
-        client_device="cpu",
        policy_type="act",
        pretrained_name_or_path="<user>/robot_learning_tutorial_act",
        chunk_size_threshold=0.5,  # g
@@ -5,9 +5,8 @@ from pathlib import Path
 import torch

 from lerobot.configs.types import FeatureType
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
-from lerobot.datasets.feature_utils import dataset_to_policy_features
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
+from lerobot.datasets.utils import dataset_to_policy_features
 from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
 from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
 from lerobot.policies.factory import make_pre_post_processors
@@ -1,7 +1,7 @@
 import torch

 from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
 from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
 from lerobot.policies.factory import make_pre_post_processors
 from lerobot.policies.utils import build_inference_frame, make_robot_action
@@ -1,7 +1,7 @@
 import torch

 from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
-from lerobot.datasets.feature_utils import hw_to_dataset_features
+from lerobot.datasets.utils import hw_to_dataset_features
 from lerobot.policies.factory import make_pre_post_processors
 from lerobot.policies.pi0.modeling_pi0 import PI0Policy
 from lerobot.policies.utils import build_inference_frame, make_robot_action
@@ -6,8 +6,8 @@ from queue import Empty, Full
 import torch
 import torch.optim as optim

-from lerobot.datasets.feature_utils import hw_to_dataset_features
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.utils import hw_to_dataset_features
 from lerobot.envs.configs import HILSerlProcessorConfig, HILSerlRobotEnvConfig
 from lerobot.policies.sac.configuration_sac import SACConfig
 from lerobot.policies.sac.modeling_sac import SACPolicy
@@ -1,7 +1,7 @@
 import torch

 from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
-from lerobot.datasets.feature_utils import hw_to_dataset_features
+from lerobot.datasets.utils import hw_to_dataset_features
 from lerobot.policies.factory import make_pre_post_processors
 from lerobot.policies.smolvla.modeling_smolvla import SmolVLAPolicy
 from lerobot.policies.utils import build_inference_frame, make_robot_action
@@ -14,20 +14,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import argparse
 import logging
+import time
 from collections import deque

 import numpy as np
 import onnxruntime as ort
 from huggingface_hub import hf_hub_download

-from lerobot.robots.unitree_g1.g1_utils import (
-    REMOTE_AXES,
-    REMOTE_BUTTONS,
-    G1_29_JointIndex,
-    get_gravity_orientation,
-)
+from lerobot.robots.unitree_g1.config_unitree_g1 import UnitreeG1Config
+from lerobot.robots.unitree_g1.g1_utils import G1_29_JointIndex
+from lerobot.robots.unitree_g1.unitree_g1 import UnitreeG1

+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)


@@ -36,13 +36,18 @@ GROOT_DEFAULT_ANGLES[[0, 6]] = -0.1  # Hip pitch
 GROOT_DEFAULT_ANGLES[[3, 9]] = 0.3  # Knee
 GROOT_DEFAULT_ANGLES[[4, 10]] = -0.2  # Ankle pitch

+MISSING_JOINTS = []
+G1_MODEL = "g1_23"  # Or "g1_29"
+if G1_MODEL == "g1_23":
+    MISSING_JOINTS = [12, 14, 20, 21, 27, 28]  # Waist yaw/pitch, wrist pitch/yaw
+
 # Control parameters
 ACTION_SCALE = 0.25
 CONTROL_DT = 0.02  # 50Hz
 ANG_VEL_SCALE: float = 0.25
 DOF_POS_SCALE: float = 1.0
 DOF_VEL_SCALE: float = 0.05
-CMD_SCALE: list[float] = [2.0, 2.0, 0.25]
+CMD_SCALE: list = [2.0, 2.0, 0.25]


 DEFAULT_GROOT_REPO_ID = "nepyope/GR00T-WholeBodyControl_g1"
@@ -80,11 +85,11 @@ def load_groot_policies(
 class GrootLocomotionController:
    """GR00T lower-body locomotion controller for the Unitree G1."""

-    control_dt = CONTROL_DT  # Expose for unitree_g1.py
-
-    def __init__(self):
-        # Load policies
-        self.policy_balance, self.policy_walk = load_groot_policies()
+    def __init__(self, policy_balance, policy_walk, robot, config):
+        self.policy_balance = policy_balance
+        self.policy_walk = policy_walk
+        self.robot = robot
+        self.config = config

        self.cmd = np.array([0.0, 0.0, 0.0], dtype=np.float32)  # vx, vy, theta_dot

@@ -104,60 +109,45 @@ class GrootLocomotionController:

        logger.info("GrootLocomotionController initialized")

-    def reset(self) -> None:
-        """Reset internal state for a new episode."""
-        self.cmd[:] = 0.0
-        self.groot_qj_all[:] = 0.0
-        self.groot_dqj_all[:] = 0.0
-        self.groot_action[:] = 0.0
-        self.groot_obs_single[:] = 0.0
-        self.groot_obs_stacked[:] = 0.0
-        self.groot_height_cmd = 0.74
-        self.groot_orientation_cmd[:] = 0.0
-        self.groot_obs_history.clear()
-        for _ in range(6):
-            self.groot_obs_history.append(np.zeros(86, dtype=np.float32))
+    def run_step(self):
+        # Get current observation
+        obs = self.robot.get_observation()

-    def run_step(self, action: dict, lowstate) -> dict:
-        """Run one step of the locomotion controller.
+        if not obs:
+            return

-        Args:
-            action: Action dict containing remote.lx/ly/rx/ry and buttons
-            lowstate: Robot lowstate containing motor positions/velocities and IMU
-
-        Returns:
-            Action dict for lower body joints (0-14)
-        """
-        if lowstate is None:
-            return {}
-
-        buttons = [int(action.get(k, 0)) for k in REMOTE_BUTTONS]
-        if buttons[0]:  # R1 - raise waist
+        # Get command from remote controller
+        if obs["remote.buttons"][0]:  # R1 - raise waist
            self.groot_height_cmd += 0.001
            self.groot_height_cmd = np.clip(self.groot_height_cmd, 0.50, 1.00)
-        if buttons[4]:  # R2 - lower waist
+        if obs["remote.buttons"][4]:  # R2 - lower waist
            self.groot_height_cmd -= 0.001
            self.groot_height_cmd = np.clip(self.groot_height_cmd, 0.50, 1.00)

-        lx, ly, rx, _ry = (action.get(k, 0.0) for k in REMOTE_AXES)
-        self.cmd[0] = ly  # Forward/backward
-        self.cmd[1] = -lx  # Left/right (negated)
-        self.cmd[2] = -rx  # Rotation rate (negated)
+        self.cmd[0] = obs["remote.ly"]  # Forward/backward
+        self.cmd[1] = obs["remote.lx"] * -1  # Left/right
+        self.cmd[2] = obs["remote.rx"] * -1  # Rotation rate

-        # Get joint positions and velocities from lowstate
+        # Get joint positions and velocities from flat dict
        for motor in G1_29_JointIndex:
+            name = motor.name
            idx = motor.value
-            self.groot_qj_all[idx] = lowstate.motor_state[idx].q
-            self.groot_dqj_all[idx] = lowstate.motor_state[idx].dq
+            self.groot_qj_all[idx] = obs[f"{name}.q"]
+            self.groot_dqj_all[idx] = obs[f"{name}.dq"]
+
+        # Adapt observation for g1_23dof
+        for idx in MISSING_JOINTS:
+            self.groot_qj_all[idx] = 0.0
+            self.groot_dqj_all[idx] = 0.0

        # Scale joint positions and velocities
        qj_obs = self.groot_qj_all.copy()
        dqj_obs = self.groot_dqj_all.copy()

        # Express IMU data in gravity frame of reference
-        quat = lowstate.imu_state.quaternion
-        ang_vel = np.array(lowstate.imu_state.gyroscope, dtype=np.float32)
-        gravity_orientation = get_gravity_orientation(quat)
+        quat = [obs["imu.quat.w"], obs["imu.quat.x"], obs["imu.quat.y"], obs["imu.quat.z"]]
+        ang_vel = np.array([obs["imu.gyro.x"], obs["imu.gyro.y"], obs["imu.gyro.z"]], dtype=np.float32)
+        gravity_orientation = self.robot.get_gravity_orientation(quat)

        # Scale joint positions and velocities before policy inference
        qj_obs = (qj_obs - GROOT_DEFAULT_ANGLES) * DOF_POS_SCALE
@@ -196,10 +186,73 @@ class GrootLocomotionController:
        # Transform action back to target joint positions
        target_dof_pos_15 = GROOT_DEFAULT_ANGLES[:15] + self.groot_action * ACTION_SCALE

-        # Build action dict
+        # Build action dict (only first 15 joints for GR00T)
        action_dict = {}
        for i in range(15):
            motor_name = G1_29_JointIndex(i).name
            action_dict[f"{motor_name}.q"] = float(target_dof_pos_15[i])

-        return action_dict
+        # Zero out missing joints for g1_23dof
+        for joint_idx in MISSING_JOINTS:
+            motor_name = G1_29_JointIndex(joint_idx).name
+            action_dict[f"{motor_name}.q"] = 0.0
+
+        # Send action to robot
+        self.robot.send_action(action_dict)
+
+
+def run(repo_id: str = DEFAULT_GROOT_REPO_ID) -> None:
+    """Main function to run the GR00T locomotion controller.
+
+    Args:
+        repo_id: Hugging Face Hub repository ID for GR00T policies.
+    """
+    # Load policies
+    policy_balance, policy_walk = load_groot_policies(repo_id=repo_id)
+
+    # Initialize robot
+    config = UnitreeG1Config()
+    robot = UnitreeG1(config)
+
+    robot.connect()
+
+    # Initialize gr00T locomotion controller
+    groot_controller = GrootLocomotionController(
+        policy_balance=policy_balance,
+        policy_walk=policy_walk,
+        robot=robot,
+        config=config,
+    )
+
+    try:
+        robot.reset(CONTROL_DT, GROOT_DEFAULT_ANGLES)
+
+        logger.info("Use joystick: LY=fwd/back, LX=left/right, RX=rotate, R1=raise waist, R2=lower waist")
+        logger.info("Press Ctrl+C to stop")
+
+        # Run step
+        while not robot._shutdown_event.is_set():
+            start_time = time.time()
+            groot_controller.run_step()
+            elapsed = time.time() - start_time
+            sleep_time = max(0, CONTROL_DT - elapsed)
+            time.sleep(sleep_time)
+    except KeyboardInterrupt:
+        logger.info("Stopping locomotion...")
+    finally:
+        if robot.is_connected:
+            robot.disconnect()
+        logger.info("Done!")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="GR00T Locomotion Controller for Unitree G1")
+    parser.add_argument(
+        "--repo-id",
+        type=str,
+        default=DEFAULT_GROOT_REPO_ID,
+        help=f"Hugging Face Hub repo ID for GR00T policies (default: {DEFAULT_GROOT_REPO_ID})",
+    )
+    args = parser.parse_args()
+
+    run(repo_id=args.repo_id)
@@ -14,21 +14,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import argparse
 import json
 import logging
+import time

 import numpy as np
 import onnx
 import onnxruntime as ort
 from huggingface_hub import hf_hub_download

-from lerobot.robots.unitree_g1.g1_utils import (
-    REMOTE_AXES,
-    G1_29_JointArmIndex,
-    G1_29_JointIndex,
-    get_gravity_orientation,
-)
+from lerobot.robots.unitree_g1.config_unitree_g1 import UnitreeG1Config
+from lerobot.robots.unitree_g1.g1_utils import G1_29_JointIndex
+from lerobot.robots.unitree_g1.unitree_g1 import UnitreeG1

+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)

 DEFAULT_ANGLES = np.zeros(29, dtype=np.float32)
@@ -40,13 +40,18 @@ DEFAULT_ANGLES[16] = 0.2  # Left shoulder roll
 DEFAULT_ANGLES[23] = -0.2  # Right shoulder roll
 DEFAULT_ANGLES[[18, 25]] = 0.6  # Elbow

+MISSING_JOINTS = []
+G1_MODEL = "g1_23"  # Or "g1_29"
+if G1_MODEL == "g1_23":
+    MISSING_JOINTS = [12, 14, 20, 21, 27, 28]  # Waist yaw/pitch, wrist pitch/yaw
+
 # Control parameters
 ACTION_SCALE = 0.25
-CONTROL_DT = 0.005  # 200Hz
+CONTROL_DT = 0.02  # 50Hz
 ANG_VEL_SCALE = 0.25
 DOF_POS_SCALE = 1.0
 DOF_VEL_SCALE = 0.05
-GAIT_PERIOD = 0.5
+GAIT_PERIOD = 1.0


 DEFAULT_HOLOSOMA_REPO_ID = "nepyope/holosoma_locomotion"
@@ -82,7 +87,7 @@ def load_policy(
    logger.info(f"Policy loaded: {policy.get_inputs()[0].shape} → {policy.get_outputs()[0].shape}")

    # Extract KP/KD from ONNX metadata
-    model = onnx.load(policy_path, load_external_data=False)
+    model = onnx.load(policy_path)
    metadata = {prop.key: prop.value for prop in model.metadata_props}

    if "kp" not in metadata or "kd" not in metadata:
@@ -96,13 +101,15 @@ def load_policy(


 class HolosomaLocomotionController:
-    """Holosoma lower-body locomotion controller for Unitree G1."""
+    """Holosoma whole-body locomotion controller for Unitree G1."""

-    control_dt = CONTROL_DT  # Expose for unitree_g1.py
+    def __init__(self, policy, robot, kp: np.ndarray, kd: np.ndarray):
+        self.policy = policy
+        self.robot = robot

-    def __init__(self):
-        # Load policy and gains
-        self.policy, self.kp, self.kd = load_policy()
+        # Override robot's PD gains with policy gains
+        self.robot.kp = kp
+        self.robot.kd = kd

        self.cmd = np.zeros(3, dtype=np.float32)

@@ -117,55 +124,35 @@ class HolosomaLocomotionController:
        self.phase_dt = 2 * np.pi / ((1.0 / CONTROL_DT) * GAIT_PERIOD)
        self.is_standing = True

-        logger.info("HolosomaLocomotionController initialized")
+    def run_step(self):
+        # Get current observation
+        obs = self.robot.get_observation()

-    def reset(self) -> None:
-        """Reset internal state for a new episode."""
-        self.cmd[:] = 0.0
-        self.qj[:] = 0.0
-        self.dqj[:] = 0.0
-        self.obs[:] = 0.0
-        self.last_action[:] = 0.0
-        self.phase = np.array([[0.0, np.pi]], dtype=np.float32)
-        self.is_standing = True
+        if not obs:
+            return

-    def run_step(self, action: dict, lowstate) -> dict:
-        """Run one step of the locomotion controller.
-
-        Args:
-            action: Action dict containing remote.lx/ly/rx/ry
-            lowstate: Robot lowstate containing motor positions/velocities and IMU
-
-        Returns:
-            Action dict for lower body joints (0-14)
-        """
-        if lowstate is None:
-            return {}
-
-        lx, ly, rx, _ry = (action.get(k, 0.0) for k in REMOTE_AXES)
-        ly = ly if abs(ly) > 0.1 else 0.0
-        lx = lx if abs(lx) > 0.1 else 0.0
-        rx = rx if abs(rx) > 0.1 else 0.0
-        ly = np.clip(ly, -0.3, 0.3)
-        lx = np.clip(lx, -0.3, 0.3)
+        # Get command from remote controller
+        ly = obs["remote.ly"] if abs(obs["remote.ly"]) > 0.1 else 0.0
+        lx = obs["remote.lx"] if abs(obs["remote.lx"]) > 0.1 else 0.0
+        rx = obs["remote.rx"] if abs(obs["remote.rx"]) > 0.1 else 0.0
        self.cmd[:] = [ly, -lx, -rx]

-        # Get joint positions and velocities from lowstate
+        # Get joint positions and velocities
        for motor in G1_29_JointIndex:
+            name = motor.name
            idx = motor.value
-            self.qj[idx] = lowstate.motor_state[idx].q
-            self.dqj[idx] = lowstate.motor_state[idx].dq
+            self.qj[idx] = obs[f"{name}.q"]
+            self.dqj[idx] = obs[f"{name}.dq"]

-        # Hide arm positions from policy (show DEFAULT_ANGLES instead)
-        # This prevents policy from reacting to teleop arm movements
-        for arm_joint in G1_29_JointArmIndex:
-            self.qj[arm_joint.value] = DEFAULT_ANGLES[arm_joint.value]
-            self.dqj[arm_joint.value] = 0.0
+        # Adapt observation for g1_23dof
+        for idx in MISSING_JOINTS:
+            self.qj[idx] = 0.0
+            self.dqj[idx] = 0.0

        # Express IMU data in gravity frame of reference
-        quat = lowstate.imu_state.quaternion
-        ang_vel = np.array(lowstate.imu_state.gyroscope, dtype=np.float32)
-        gravity = get_gravity_orientation(quat)
+        quat = [obs["imu.quat.w"], obs["imu.quat.x"], obs["imu.quat.y"], obs["imu.quat.z"]]
+        ang_vel = np.array([obs["imu.gyro.x"], obs["imu.gyro.y"], obs["imu.gyro.z"]], dtype=np.float32)
+        gravity = self.robot.get_gravity_orientation(quat)

        # Scale joint positions and velocities before policy inference
        qj_obs = (self.qj - DEFAULT_ANGLES) * DOF_POS_SCALE
@@ -199,16 +186,79 @@ class HolosomaLocomotionController:
        # Run policy inference
        ort_in = {self.policy.get_inputs()[0].name: self.obs.reshape(1, -1).astype(np.float32)}
        raw_action = self.policy.run(None, ort_in)[0].squeeze()
-        policy_action = np.clip(raw_action, -100.0, 100.0)
-        self.last_action = policy_action.copy()
+        action = np.clip(raw_action, -100.0, 100.0)
+        self.last_action = action.copy()

        # Transform action back to target joint positions
-        target = DEFAULT_ANGLES + policy_action * ACTION_SCALE
+        target = DEFAULT_ANGLES + action * ACTION_SCALE

-        # Build action dict (first 15 joints only)
+        # Build action dict
        action_dict = {}
-        for i in range(15):
-            motor_name = G1_29_JointIndex(i).name
-            action_dict[f"{motor_name}.q"] = float(target[i])
+        for motor in G1_29_JointIndex:
+            action_dict[f"{motor.name}.q"] = float(target[motor.value])

-        return action_dict
+        # Zero out missing joints for g1_23dof
+        for joint_idx in MISSING_JOINTS:
+            motor_name = G1_29_JointIndex(joint_idx).name
+            action_dict[f"{motor_name}.q"] = 0.0
+
+        # Send action to robot
+        self.robot.send_action(action_dict)
+
+
+def run(repo_id: str = DEFAULT_HOLOSOMA_REPO_ID, policy_type: str = "fastsac") -> None:
+    """Main function to run the Holosoma locomotion controller.
+
+    Args:
+        repo_id: Hugging Face Hub repository ID for Holosoma policies.
+        policy_type: Policy type to use ('fastsac' or 'ppo').
+    """
+    # Load policy and gains
+    policy, kp, kd = load_policy(repo_id=repo_id, policy_type=policy_type)
+
+    # Initialize robot
+    config = UnitreeG1Config()
+    robot = UnitreeG1(config)
+    robot.connect()
+
+    holosoma_controller = HolosomaLocomotionController(policy, robot, kp, kd)
+
+    try:
+        robot.reset(CONTROL_DT, DEFAULT_ANGLES)
+
+        logger.info("Use joystick: LY=fwd/back, LX=left/right, RX=rotate")
+        logger.info("Press Ctrl+C to stop")
+
+        # Run step
+        while not robot._shutdown_event.is_set():
+            start_time = time.time()
+            holosoma_controller.run_step()
+            elapsed = time.time() - start_time
+            sleep_time = max(0, CONTROL_DT - elapsed)
+            time.sleep(sleep_time)
+    except KeyboardInterrupt:
+        logger.info("Stopping locomotion...")
+    finally:
+        if robot.is_connected:
+            robot.disconnect()
+        logger.info("Done!")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Holosoma Locomotion Controller for Unitree G1")
+    parser.add_argument(
+        "--repo-id",
+        type=str,
+        default=DEFAULT_HOLOSOMA_REPO_ID,
+        help=f"Hugging Face Hub repo ID for Holosoma policies (default: {DEFAULT_HOLOSOMA_REPO_ID})",
+    )
+    parser.add_argument(
+        "--policy",
+        type=str,
+        choices=["fastsac", "ppo"],
+        default="fastsac",
+        help="Policy type to use: 'fastsac' (default) or 'ppo'",
+    )
+    args = parser.parse_args()
+
+    run(repo_id=args.repo_id, policy_type=args.policy)
@@ -25,11 +25,11 @@ discord = "https://discord.gg/s3KuuzsPFb"

 [project]
 name = "lerobot"
-version = "0.5.1"
+version = "0.4.3"
 description = "🤗 LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch"
 dynamic = ["readme"]
 license = { text = "Apache-2.0" }
-requires-python = ">=3.12"
+requires-python = ">=3.10"
 authors = [
    { name = "Rémi Cadène", email = "re.cadene@gmail.com" },
    { name = "Simon Alibert", email = "alibert.sim@gmail.com" },
@@ -50,8 +50,7 @@ classifiers = [
    "Intended Audience :: Education",
    "Intended Audience :: Science/Research",
    "License :: OSI Approved :: Apache Software License",
-    "Programming Language :: Python :: 3.12",
-    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.10",
    "Topic :: Software Development :: Build Tools",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
 ]
@@ -60,30 +59,28 @@ keywords = ["lerobot", "huggingface", "robotics",  "machine learning", "artifici
 dependencies = [

    # Hugging Face dependencies
-    "datasets>=4.0.0,<5.0.0",
+    "datasets>=4.0.0,<4.2.0",
    "diffusers>=0.27.2,<0.36.0",
-    "huggingface-hub>=1.0.0,<2.0.0",
+    "huggingface-hub[hf-transfer,cli]>=0.34.2,<0.36.0",
    "accelerate>=1.10.0,<2.0.0",

    # Core dependencies
-    "numpy>=2.0.0,<2.3.0", # NOTE: Explicitly listing numpy helps the resolver converge faster. Upper bound imposed by opencv-python-headless.
    "setuptools>=71.0.0,<81.0.0",
    "cmake>=3.29.0.1,<4.2.0",
-    "packaging>=24.2,<26.0",
-
-    "torch>=2.2.1,<2.11.0",
-    "torchcodec>=0.2.1,<0.11.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')",
-    "torchvision>=0.21.0,<0.26.0",
-
    "einops>=0.8.0,<0.9.0",
-    "opencv-python-headless>=4.9.0,<4.14.0",
+    "opencv-python-headless>=4.9.0,<4.13.0",
    "av>=15.0.0,<16.0.0",
    "jsonlines>=4.0.0,<5.0.0",
-    "pynput>=1.7.8,<1.9.0",
+    "packaging>=24.2,<26.0",
+    "pynput>=1.7.7,<1.9.0",
    "pyserial>=3.5,<4.0",
-
    "wandb>=0.24.0,<0.25.0",
-    "draccus==0.10.0", # TODO: Relax version constraint
+
+    "torch>=2.2.1,<2.8.0", # TODO: Bumb dependency
+    "torchcodec>=0.2.1,<0.6.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')", # TODO: Bumb dependency
+    "torchvision>=0.21.0,<0.23.0", # TODO: Bumb dependency
+
+    "draccus==0.10.0", # TODO: Remove ==
    "gymnasium>=1.1.1,<2.0.0",
    "rerun-sdk>=0.24.0,<0.27.0",

@@ -98,56 +95,43 @@ dependencies = [

 # Common
 pygame-dep = ["pygame>=2.5.1,<2.7.0"]
-placo-dep = ["placo>=0.9.6,<0.9.17"]
-transformers-dep = ["transformers>=5.3.0,<6.0.0"]
+placo-dep = ["placo>=0.9.6,<0.10.0"]
+transformers-dep = ["transformers>=4.57.1,<5.0.0"]
 grpcio-dep = ["grpcio==1.73.1", "protobuf>=6.31.1,<6.32.0"]
-can-dep = ["python-can>=4.2.0,<5.0.0"]
-peft-dep = ["peft>=0.18.0,<1.0.0"]
-scipy-dep = ["scipy>=1.14.0,<2.0.0"]
-qwen-vl-utils-dep = ["qwen-vl-utils>=0.0.11,<0.1.0"]
-matplotlib-dep = ["matplotlib>=3.10.3,<4.0.0", "contourpy>=1.3.0,<2.0.0"] # NOTE: Explicitly listing contourpy helps the resolver converge faster.

 # Motors
 feetech = ["feetech-servo-sdk>=1.0.0,<2.0.0"]
 dynamixel = ["dynamixel-sdk>=3.7.31,<3.9.0"]
-damiao = ["lerobot[can-dep]"]
-robstride = ["lerobot[can-dep]"]

 # Robots
-openarms = ["lerobot[damiao]"]
 gamepad = ["lerobot[pygame-dep]", "hidapi>=0.14.0,<0.15.0"]
 hopejr = ["lerobot[feetech]", "lerobot[pygame-dep]"]
 lekiwi = ["lerobot[feetech]", "pyzmq>=26.2.1,<28.0.0"]
 unitree_g1 = [
-    # "unitree-sdk2==1.0.1",
    "pyzmq>=26.2.1,<28.0.0",
-    "onnxruntime>=1.16.0,<2.0.0",
-    "onnx>=1.16.0,<2.0.0",
-    "meshcat>=0.3.0,<0.4.0",
-    "lerobot[matplotlib-dep]",
-    "lerobot[pygame-dep]",
+    "onnxruntime>=1.16.0,<2.0.0"
 ]
 reachy2 = ["reachy2_sdk>=1.0.15,<1.1.0"]
 kinematics = ["lerobot[placo-dep]"]
 intelrealsense = [
    "pyrealsense2>=2.55.1.6486,<2.57.0 ; sys_platform != 'darwin'",
-    "pyrealsense2-macosx>=2.54,<2.57.0 ; sys_platform == 'darwin'",
+    "pyrealsense2-macosx>=2.54,<2.55.0 ; sys_platform == 'darwin'",
 ]
-phone = ["hebi-py>=2.8.0,<2.12.0", "teleop>=0.1.0,<0.2.0", "fastapi<1.0", "lerobot[scipy-dep]"]
+phone = ["hebi-py>=2.8.0,<2.12.0", "teleop>=0.1.0,<0.2.0", "fastapi<1.0"]

 # Policies
 wallx = [
-    "lerobot[transformers-dep]",
-    "lerobot[peft]",
-    "lerobot[scipy-dep]",
-    "torchdiffeq>=0.2.4,<0.3.0",
-    "lerobot[qwen-vl-utils-dep]",
+    "transformers==4.49.0",
+    "peft==0.17.1",
+    "scipy==1.15.3",
+    "torchdiffeq==0.2.5",
+    "qwen_vl_utils==0.0.11"
 ]
-pi = ["lerobot[transformers-dep]", "lerobot[scipy-dep]"]
+pi = ["transformers @ git+https://github.com/huggingface/transformers.git@fix/lerobot_openpi", "scipy>=1.10.1,<1.15"]
 smolvla = ["lerobot[transformers-dep]", "num2words>=0.5.14,<0.6.0", "accelerate>=1.7.0,<2.0.0", "safetensors>=0.4.3,<1.0.0"]
 groot = [
    "lerobot[transformers-dep]",
-    "lerobot[peft]",
+    "peft>=0.13.0,<1.0.0",
    "dm-tree>=0.1.8,<1.0.0",
    "timm>=1.0.0,<1.1.0",
    "safetensors>=0.4.3,<1.0.0",
@@ -156,13 +140,13 @@ groot = [
    "ninja>=1.11.1,<2.0.0",
    "flash-attn>=2.5.9,<3.0.0 ; sys_platform != 'darwin'"
 ]
-sarm = ["lerobot[transformers-dep]", "faker>=33.0.0,<35.0.0", "lerobot[matplotlib-dep]", "lerobot[qwen-vl-utils-dep]"]
+sarm = ["lerobot[transformers-dep]", "faker>=33.0.0,<35.0.0", "matplotlib>=3.10.3,<4.0.0", "qwen-vl-utils>=0.0.14,<0.1.0"]
 xvla = ["lerobot[transformers-dep]"]
 hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpcio-dep]", "lerobot[placo-dep]"]

 # Features
-async = ["lerobot[grpcio-dep]", "lerobot[matplotlib-dep]"]
-peft = ["lerobot[transformers-dep]", "lerobot[peft-dep]"]
+async = ["lerobot[grpcio-dep]", "matplotlib>=3.10.3,<4.0.0"]
+peft = ["lerobot[transformers-dep]", "peft>=0.18.0,<1.0.0"]

 # Development
 dev = ["pre-commit>=3.7.0,<5.0.0", "debugpy>=1.8.1,<1.9.0", "lerobot[grpcio-dep]", "grpcio-tools==1.73.1", "mypy>=1.19.1"]
@@ -170,19 +154,13 @@ test = ["pytest>=8.1.0,<9.0.0", "pytest-timeout>=2.4.0,<3.0.0", "pytest-cov>=5.0
 video_benchmark = ["scikit-image>=0.23.2,<0.26.0", "pandas>=2.2.2,<2.4.0"]

 # Simulation
-# NOTE: Explicitly listing scipy helps flatten the dependecy tree.
-aloha = ["gym-aloha>=0.1.2,<0.2.0", "lerobot[scipy-dep]"]
+aloha = ["gym-aloha>=0.1.2,<0.2.0"]
 pusht = ["gym-pusht>=0.1.5,<0.2.0", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk version in gym-pusht instead
-libero = ["lerobot[transformers-dep]", "hf-libero>=0.1.3,<0.2.0; sys_platform == 'linux'", "lerobot[scipy-dep]"]
-metaworld = ["metaworld==3.0.0", "lerobot[scipy-dep]"]
+libero = ["lerobot[transformers-dep]", "hf-libero>=0.1.3,<0.2.0"]
+metaworld = ["metaworld==3.0.0"]

 # All
 all = [
-    # NOTE(resolver hint): scipy is pulled in transitively via lerobot[scipy-dep] through
-    # multiple extras (aloha, metaworld, pi, wallx, phone). Listing it explicitly
-    # helps pip's resolver converge by constraining scipy early, before it encounters
-    # the loose scipy requirements from transitive deps like dm-control and metaworld.
-    "scipy>=1.14.0,<2.0.0",
    "lerobot[dynamixel]",
    "lerobot[gamepad]",
    "lerobot[hopejr]",
@@ -190,8 +168,8 @@ all = [
    "lerobot[reachy2]",
    "lerobot[kinematics]",
    "lerobot[intelrealsense]",
-    "lerobot[wallx]",
-    "lerobot[pi]",
+    # "lerobot[wallx]",
+    # "lerobot[pi]", TODO(Pepijn): Update pi to transformers v5
    "lerobot[smolvla]",
    # "lerobot[groot]", TODO(Steven): Gr00t requires specific installation instructions for flash-attn
    "lerobot[xvla]",
@@ -203,11 +181,10 @@ all = [
    "lerobot[aloha]",
    "lerobot[pusht]",
    "lerobot[phone]",
-    "lerobot[libero]; sys_platform == 'linux'",
+    "lerobot[libero]",
    "lerobot[metaworld]",
    "lerobot[sarm]",
    "lerobot[peft]",
-    # "lerobot[unitree_g1]", TODO: Unitree requires specific installation instructions for unitree_sdk2
 ]

 [project.scripts]
@@ -226,17 +203,13 @@ lerobot-info="lerobot.scripts.lerobot_info:main"
 lerobot-find-joint-limits="lerobot.scripts.lerobot_find_joint_limits:main"
 lerobot-imgtransform-viz="lerobot.scripts.lerobot_imgtransform_viz:main"
 lerobot-edit-dataset="lerobot.scripts.lerobot_edit_dataset:main"
-lerobot-setup-can="lerobot.scripts.lerobot_setup_can:main"

 # ---------------- Tool Configurations ----------------
-[tool.setuptools.package-data]
-lerobot = ["envs/*.json"]
-
 [tool.setuptools.packages.find]
 where = ["src"]

 [tool.ruff]
-target-version = "py312"
+target-version = "py310"
 line-length = 110
 exclude = ["tests/artifacts/**/*.safetensors", "*_pb2.py", "*_pb2_grpc.py"]

@@ -305,7 +278,6 @@ default.extend-ignore-identifiers-re = [
    "thw",
    "inpt",
    "ROBOTIS",
-    "OT_VALUE"
 ]

 # TODO: Uncomment when ready to use
@@ -328,7 +300,7 @@ default.extend-ignore-identifiers-re = [
 # Uncomment [tool.mypy] first, then uncomment individual module overrides as they get proper type annotations

 [tool.mypy]
-python_version = "3.12"
+python_version = "3.10"
 ignore_missing_imports = true
 follow_imports = "skip"
 # warn_return_any = true
@@ -380,9 +352,9 @@ ignore_errors = false
 module = "lerobot.cameras.*"
 ignore_errors = false

-[[tool.mypy.overrides]]
-module = "lerobot.motors.*"
-ignore_errors = false
+# [[tool.mypy.overrides]]
+# module = "lerobot.motors.*"
+# ignore_errors = false

 # [[tool.mypy.overrides]]
 # module = "lerobot.robots.*"
@@ -412,3 +384,85 @@ ignore_errors = false
 # [[tool.mypy.overrides]]
 # module = "lerobot.scripts.*"
 # ignore_errors = false
+
+[tool.uv]
+# wallx requires transformers==4.49.0 which conflicts with other extras that need >=4.53.0
+conflicts = [
+    [
+        { extra = "wallx" },
+        { extra = "transformers-dep" },
+    ],
+    [
+        { extra = "wallx" },
+        { extra = "pi" },
+    ],
+    [
+        { extra = "wallx" },
+        { extra = "smolvla" },
+    ],
+    [
+        { extra = "wallx" },
+        { extra = "groot" },
+    ],
+    [
+        { extra = "wallx" },
+        { extra = "xvla" },
+    ],
+    [
+        { extra = "wallx" },
+        { extra = "sarm" },
+    ],
+    [
+        { extra = "wallx" },
+        { extra = "hilserl" },
+    ],
+    [
+        { extra = "wallx" },
+        { extra = "libero" },
+    ],
+    [
+        { extra = "wallx" },
+        { extra = "peft" },
+    ],
+    [
+        { extra = "wallx" },
+        { extra = "all" },
+    ],
+    # pi uses custom branch which conflicts with transformers-dep
+    [
+        { extra = "pi" },
+        { extra = "transformers-dep" },
+    ],
+    [
+        { extra = "pi" },
+        { extra = "smolvla" },
+    ],
+    [
+        { extra = "pi" },
+        { extra = "groot" },
+    ],
+    [
+        { extra = "pi" },
+        { extra = "xvla" },
+    ],
+    [
+        { extra = "pi" },
+        { extra = "sarm" },
+    ],
+    [
+        { extra = "pi" },
+        { extra = "hilserl" },
+    ],
+    [
+        { extra = "pi" },
+        { extra = "libero" },
+    ],
+    [
+        { extra = "pi" },
+        { extra = "peft" },
+    ],
+    [
+        { extra = "pi" },
+        { extra = "all" },
+    ],
+]
@@ -1,73 +1,76 @@
 #
-# This file is autogenerated by pip-compile with Python 3.12
+# This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
 #    pip-compile --output-file=requirements-macos.txt requirements.in
 #
 -e .[all]
    # via -[all]
-absl-py==2.4.0
+absl-py==2.3.1
    # via
    #   dm-control
    #   dm-env
    #   dm-tree
    #   labmaze
    #   mujoco
-accelerate==1.13.0
+    #   tensorboard
+accelerate==1.11.0
    # via
    #   lerobot
    #   peft
 aiohappyeyeballs==2.6.1
    # via aiohttp
-aiohttp==3.13.3
+aiohttp==3.13.1
    # via fsspec
 aiosignal==1.4.0
    # via aiohttp
-annotated-doc==0.0.4
-    # via
-    #   fastapi
-    #   typer
 annotated-types==0.7.0
    # via pydantic
-anyio==4.12.1
+antlr4-python3-runtime==4.9.3
+    # via
+    #   hydra-core
+    #   omegaconf
+anyio==4.11.0
    # via
-    #   httpx
    #   starlette
    #   watchfiles
-asttokens==3.0.1
+asttokens==3.0.0
    # via stack-data
+async-timeout==5.0.1
+    # via aiohttp
 attrs==25.4.0
    # via
    #   aiohttp
    #   dm-tree
    #   jsonlines
+    #   jsonschema
+    #   referencing
    #   rerun-sdk
 av==15.1.0
+    # via lerobot
+bddl==1.0.1
+    # via libero
+certifi==2025.10.5
    # via
-    #   lerobot
-    #   qwen-vl-utils
-certifi==2026.2.25
-    # via
-    #   httpcore
-    #   httpx
    #   requests
    #   sentry-sdk
 cffi==2.0.0
    # via pymunk
-cfgv==3.5.0
+cfgv==3.4.0
    # via pre-commit
-charset-normalizer==3.4.5
+charset-normalizer==3.4.4
    # via requests
-click==8.3.1
+click==8.3.0
    # via
-    #   typer
    #   uvicorn
    #   wandb
-cloudpickle==3.1.2
-    # via gymnasium
-cmake==4.1.3
+cloudpickle==3.1.1
+    # via
+    #   gymnasium
+    #   libero
+cmake==4.1.0
    # via lerobot
-cmeel==0.59.0
+cmeel==0.57.3
    # via
    #   cmeel-assimp
    #   cmeel-boost
@@ -105,17 +108,15 @@ cmeel-zlib==1.3.1
    # via cmeel-assimp
 coal-library==3.0.1
    # via pin
-contourpy==1.3.3
-    # via
-    #   lerobot
-    #   matplotlib
-coverage[toml]==7.13.4
+contourpy==1.3.2
+    # via matplotlib
+coverage[toml]==7.11.0
    # via pytest-cov
 cycler==0.12.1
    # via matplotlib
-datasets==4.6.1
+datasets==4.1.1
    # via lerobot
-debugpy==1.8.20
+debugpy==1.8.17
    # via lerobot
 decorator==5.2.1
    # via ipython
@@ -129,7 +130,7 @@ dill==0.4.0
    #   multiprocess
 distlib==0.4.0
    # via virtualenv
-dm-control==1.0.37
+dm-control==1.0.34
    # via gym-aloha
 dm-env==1.6
    # via dm-control
@@ -137,55 +138,69 @@ dm-tree==0.1.9
    # via
    #   dm-control
    #   dm-env
+    #   lerobot
 docopt==0.6.2
    # via num2words
 draccus==0.10.0
    # via lerobot
 dynamixel-sdk==3.8.4
    # via lerobot
+easydict==1.13
+    # via libero
+egl-probe @ git+https://github.com/huggingface/egl_probe.git
+    # via
+    #   libero
+    #   robomimic
 eigenpy==3.10.3
    # via coal-library
-einops==0.8.2
-    # via lerobot
-eiquadprog==1.2.9
-    # via placo
-etils[epath,epy]==1.14.0
-    # via mujoco
-executing==2.2.1
-    # via stack-data
-faker==34.0.2
-    # via lerobot
-farama-notifications==0.0.4
-    # via gymnasium
-fastapi==0.135.1
+einops==0.8.1
    # via
    #   lerobot
-    #   teleop
+    #   libero
+eiquadprog==1.2.9
+    # via placo
+etils[epath,epy]==1.13.0
+    # via mujoco
+exceptiongroup==1.3.0
+    # via
+    #   anyio
+    #   ipython
+    #   pytest
+executing==2.2.1
+    # via stack-data
+farama-notifications==0.0.4
+    # via gymnasium
+fastapi==0.119.1
+    # via teleop
+fastjsonschema==2.21.2
+    # via nbformat
 feetech-servo-sdk==1.0.0
    # via lerobot
-filelock==3.25.0
+filelock==3.20.0
    # via
    #   datasets
    #   diffusers
    #   huggingface-hub
-    #   python-discovery
    #   torch
+    #   transformers
    #   virtualenv
-fonttools==4.61.1
+fonttools==4.60.1
    # via matplotlib
 frozenlist==1.8.0
    # via
    #   aiohttp
    #   aiosignal
-fsspec[http]==2026.2.0
+fsspec[http]==2025.9.0
    # via
    #   datasets
    #   etils
    #   huggingface-hub
    #   torch
+future==1.0.0
+    # via libero
 gitdb==4.0.12
    # via gitpython
-gitpython==3.1.46
+gitpython==3.1.45
    # via wandb
 glfw==2.10.0
    # via
@@ -197,6 +212,7 @@ grpcio==1.73.1
    #   lerobot
    #   reachy2-sdk
    #   reachy2-sdk-api
+    #   tensorboard
 grpcio-tools==1.73.1
    # via
    #   lerobot
@@ -207,67 +223,71 @@ gym-hil==0.1.13
    # via lerobot
 gym-pusht==0.1.6
    # via lerobot
-gymnasium==1.2.3
+gymnasium==1.2.1
    # via
    #   gym-aloha
    #   gym-hil
    #   gym-pusht
    #   lerobot
+    #   libero
    #   metaworld
 h11==0.16.0
-    # via
-    #   httpcore
-    #   uvicorn
+    # via uvicorn
+h5py==3.15.1
+    # via robomimic
 hebi-py==2.11.0
    # via lerobot
-hf-xet==1.3.2
+hf-transfer==0.1.9
+    # via huggingface-hub
+hf-xet==1.1.10
    # via huggingface-hub
 hidapi==0.14.0.post4
    # via
    #   gym-hil
    #   lerobot
-httpcore==1.0.9
-    # via httpx
 httptools==0.7.1
    # via uvicorn
-httpx==0.28.1
-    # via
-    #   datasets
-    #   huggingface-hub
-huggingface-hub==1.6.0
+huggingface-hub[cli,hf-transfer]==0.35.3
    # via
    #   accelerate
    #   datasets
    #   diffusers
    #   lerobot
    #   peft
+    #   timm
    #   tokenizers
    #   transformers
-identify==2.6.17
+hydra-core==1.3.2
+    # via libero
+identify==2.6.15
    # via pre-commit
 idna==3.11
    # via
    #   anyio
-    #   httpx
    #   requests
    #   yarl
-imageio[ffmpeg]==2.37.2
+imageio[ffmpeg]==2.37.0
    # via
    #   gym-aloha
    #   gym-hil
    #   lerobot
    #   metaworld
+    #   robomimic
    #   scikit-image
 imageio-ffmpeg==0.6.0
-    # via imageio
-importlib-metadata==8.7.1
+    # via
+    #   imageio
+    #   robomimic
+importlib-metadata==8.7.0
    # via diffusers
+importlib-resources==6.5.2
+    # via etils
 iniconfig==2.3.0
    # via pytest
-ipython==9.11.0
+inquirerpy==0.3.4
+    # via huggingface-hub
+ipython==8.37.0
    # via meshcat
-ipython-pygments-lexers==1.1.1
-    # via ipython
 ischedule==1.2.7
    # via placo
 jedi==0.19.2
@@ -276,24 +296,44 @@ jinja2==3.1.6
    # via torch
 jsonlines==4.0.0
    # via lerobot
+jsonschema==4.25.1
+    # via nbformat
+jsonschema-specifications==2025.9.1
+    # via jsonschema
+jupyter-core==5.9.1
+    # via nbformat
+jupytext==1.18.1
+    # via bddl
 kiwisolver==1.4.9
    # via matplotlib
 labmaze==1.0.6
    # via dm-control
-lazy-loader==0.5
+lazy-loader==0.4
    # via scikit-image
-librt==0.8.1
-    # via mypy
+libero @ git+https://github.com/huggingface/lerobot-libero.git@main
+    # via lerobot
+llvmlite==0.45.1
+    # via numba
 lxml==6.0.2
    # via dm-control
+markdown==3.9
+    # via tensorboard
 markdown-it-py==4.0.0
-    # via rich
+    # via
+    #   jupytext
+    #   mdit-py-plugins
 markupsafe==3.0.3
-    # via jinja2
-matplotlib==3.10.8
-    # via lerobot
+    # via
+    #   jinja2
+    #   werkzeug
+matplotlib==3.10.7
+    # via
+    #   lerobot
+    #   libero
 matplotlib-inline==0.2.1
    # via ipython
+mdit-py-plugins==0.5.0
+    # via jupytext
 mdurl==0.1.2
    # via markdown-it-py
 mergedeep==1.3.4
@@ -306,35 +346,41 @@ mock-serial==0.0.1
    # via lerobot
 mpmath==1.3.0
    # via sympy
-mujoco==3.5.0
+mujoco==3.3.7
    # via
    #   dm-control
    #   gym-aloha
    #   gym-hil
+    #   libero
    #   metaworld
-multidict==6.7.1
+    #   robosuite
+multidict==6.7.0
    # via
    #   aiohttp
    #   yarl
-multiprocess==0.70.18
+multiprocess==0.70.16
    # via datasets
-mypy==1.19.1
-    # via lerobot
 mypy-extensions==1.1.0
+    # via typing-inspect
+nbformat==5.10.4
+    # via jupytext
+networkx==3.4.2
    # via
-    #   mypy
-    #   typing-inspect
-networkx==3.6.1
-    # via
+    #   bddl
    #   scikit-image
    #   torch
-nodeenv==1.10.0
+ninja==1.13.0
+    # via lerobot
+nodeenv==1.9.1
    # via pre-commit
 num2words==0.5.14
    # via lerobot
+numba==0.62.1
+    # via robosuite
 numpy==2.2.6
    # via
    #   accelerate
+    #   bddl
    #   cmeel-boost
    #   contourpy
    #   datasets
@@ -343,14 +389,16 @@ numpy==2.2.6
    #   dm-env
    #   dm-tree
    #   gymnasium
+    #   h5py
    #   hebi-py
    #   imageio
    #   labmaze
-    #   lerobot
+    #   libero
    #   matplotlib
    #   meshcat
    #   metaworld
    #   mujoco
+    #   numba
    #   opencv-python
    #   opencv-python-headless
    #   pandas
@@ -358,18 +406,26 @@ numpy==2.2.6
    #   pyquaternion
    #   reachy2-sdk
    #   rerun-sdk
+    #   robomimic
+    #   robosuite
    #   scikit-image
    #   scipy
    #   shapely
    #   teleop
+    #   tensorboard
+    #   tensorboardx
    #   tifffile
    #   torchvision
    #   transformers
    #   transforms3d
-opencv-python==4.13.0.92
+omegaconf==2.3.0
+    # via hydra-core
+opencv-python==4.12.0.88
    # via
    #   gym-pusht
+    #   libero
    #   reachy2-sdk
+    #   robosuite
 opencv-python-headless==4.12.0.88
    # via lerobot
 orderly-set==5.5.0
@@ -379,87 +435,97 @@ packaging==25.0
    #   accelerate
    #   datasets
    #   huggingface-hub
+    #   hydra-core
+    #   jupytext
    #   lazy-loader
    #   lerobot
    #   matplotlib
    #   peft
    #   pytest
-    #   qwen-vl-utils
    #   reachy2-sdk
    #   scikit-image
+    #   tensorboard
+    #   tensorboardx
    #   transformers
    #   wandb
 pandas==2.3.3
    # via
    #   datasets
    #   lerobot
-parso==0.8.6
+parso==0.8.5
    # via jedi
-pathspec==1.0.4
-    # via mypy
-peft==0.18.1
+peft==0.17.1
    # via lerobot
 pexpect==4.9.0
    # via ipython
-pillow==12.1.1
+pfzy==0.3.4
+    # via inquirerpy
+pillow==12.0.0
    # via
    #   diffusers
    #   imageio
+    #   lerobot
    #   matplotlib
    #   meshcat
-    #   qwen-vl-utils
    #   rerun-sdk
+    #   robosuite
    #   scikit-image
+    #   tensorboard
    #   torchvision
 pin==3.4.0
    # via placo
-placo==0.9.16
+placo==0.9.14
    # via lerobot
-platformdirs==4.9.4
+platformdirs==4.5.0
    # via
-    #   python-discovery
+    #   jupyter-core
    #   virtualenv
    #   wandb
 pluggy==1.6.0
    # via
    #   pytest
    #   pytest-cov
-pre-commit==4.5.1
+pre-commit==4.3.0
    # via lerobot
 prompt-toolkit==3.0.52
-    # via ipython
+    # via
+    #   inquirerpy
+    #   ipython
 propcache==0.4.1
    # via
    #   aiohttp
    #   yarl
-protobuf==6.31.1
+protobuf==6.31.0
    # via
    #   dm-control
    #   grpcio-tools
    #   lerobot
    #   reachy2-sdk
    #   reachy2-sdk-api
+    #   tensorboard
+    #   tensorboardx
    #   wandb
-psutil==7.2.2
+psutil==7.1.1
    # via
    #   accelerate
    #   imageio
    #   peft
+    #   robomimic
 ptyprocess==0.7.0
    # via pexpect
 pure-eval==0.2.3
    # via stack-data
-pyarrow==23.0.1
+pyarrow==21.0.0
    # via
    #   datasets
    #   rerun-sdk
-pycparser==3.0
+pycparser==2.23
    # via cffi
-pydantic==2.12.5
+pydantic==2.12.3
    # via
    #   fastapi
    #   wandb
-pydantic-core==2.41.5
+pydantic-core==2.41.4
    # via pydantic
 pygame==2.6.1
    # via
@@ -469,35 +535,33 @@ pygame==2.6.1
 pygments==2.19.2
    # via
    #   ipython
-    #   ipython-pygments-lexers
    #   pytest
-    #   rich
 pymunk==6.11.1
    # via
    #   gym-pusht
    #   lerobot
-pyngrok==7.5.1
+pyngrok==7.4.1
    # via meshcat
 pynput==1.8.1
    # via
    #   gym-hil
    #   lerobot
-pyobjc-core==12.1
+pyobjc-core==12.0
    # via
    #   pyobjc-framework-applicationservices
    #   pyobjc-framework-cocoa
    #   pyobjc-framework-coretext
    #   pyobjc-framework-quartz
-pyobjc-framework-applicationservices==12.1
+pyobjc-framework-applicationservices==12.0
    # via pynput
-pyobjc-framework-cocoa==12.1
+pyobjc-framework-cocoa==12.0
    # via
    #   pyobjc-framework-applicationservices
    #   pyobjc-framework-coretext
    #   pyobjc-framework-quartz
-pyobjc-framework-coretext==12.1
+pyobjc-framework-coretext==12.0
    # via pyobjc-framework-applicationservices
-pyobjc-framework-quartz==12.1
+pyobjc-framework-quartz==12.0
    # via
    #   pynput
    #   pyobjc-framework-applicationservices
@@ -506,13 +570,13 @@ pyopengl==3.1.10
    # via
    #   dm-control
    #   mujoco
-pyparsing==3.3.2
+pyparsing==3.2.5
    # via
    #   dm-control
    #   matplotlib
 pyquaternion==0.9.9
    # via reachy2-sdk
-pyrealsense2-macosx==2.56.5
+pyrealsense2-macosx==2.54.2
    # via lerobot
 pyserial==3.5
    # via
@@ -521,6 +585,7 @@ pyserial==3.5
    #   lerobot
 pytest==8.4.2
    # via
+    #   bddl
    #   lerobot
    #   pytest-cov
    #   pytest-timeout
@@ -531,14 +596,11 @@ pytest-timeout==2.4.0
    # via lerobot
 python-dateutil==2.9.0.post0
    # via
-    #   faker
    #   matplotlib
    #   pandas
-python-discovery==1.1.1
-    # via virtualenv
-python-dotenv==1.2.2
+python-dotenv==1.1.1
    # via uvicorn
-pytz==2026.1.post1
+pytz==2025.2
    # via pandas
 pyyaml==6.0.3
    # via
@@ -547,10 +609,13 @@ pyyaml==6.0.3
    #   draccus
    #   hebi-py
    #   huggingface-hub
+    #   jupytext
+    #   omegaconf
    #   peft
    #   pre-commit
    #   pyngrok
    #   pyyaml-include
+    #   timm
    #   transformers
    #   uvicorn
    #   wandb
@@ -560,13 +625,15 @@ pyzmq==27.1.0
    # via
    #   lerobot
    #   meshcat
-qwen-vl-utils==0.0.14
-    # via lerobot
-reachy2-sdk==1.0.15
+reachy2-sdk==1.0.14
    # via lerobot
 reachy2-sdk-api==1.0.21
    # via reachy2-sdk
-regex==2026.2.28
+referencing==0.37.0
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+regex==2025.10.23
    # via
    #   diffusers
    #   transformers
@@ -575,150 +642,184 @@ requests==2.32.5
    #   datasets
    #   diffusers
    #   dm-control
-    #   qwen-vl-utils
+    #   huggingface-hub
    #   teleop
+    #   transformers
    #   wandb
-rerun-sdk==0.26.2
+rerun-sdk==0.26.1
    # via lerobot
 rhoban-cmeel-jsoncpp==1.9.4.9
    # via placo
-rich==14.3.3
-    # via typer
-safetensors==0.7.0
+robomimic==0.2.0
+    # via libero
+robosuite==1.4.0
+    # via libero
+rpds-py==0.28.0
+    # via
+    #   jsonschema
+    #   referencing
+safetensors==0.6.2
    # via
    #   accelerate
    #   diffusers
    #   lerobot
    #   peft
+    #   timm
    #   transformers
 scikit-image==0.25.2
    # via
    #   gym-pusht
    #   lerobot
-scipy==1.17.1
+scipy==1.15.3
    # via
    #   dm-control
-    #   lerobot
    #   metaworld
+    #   robosuite
    #   scikit-image
-    #   torchdiffeq
-sentry-sdk==2.54.0
+sentry-sdk==2.42.1
    # via wandb
 shapely==2.1.2
    # via gym-pusht
-shellingham==1.5.4
-    # via typer
 six==1.17.0
    # via
    #   pynput
    #   python-dateutil
-smmap==5.0.3
+smmap==5.0.2
    # via gitdb
+sniffio==1.3.1
+    # via anyio
 stack-data==0.6.3
    # via ipython
-starlette==0.52.1
+starlette==0.48.0
    # via fastapi
 sympy==1.14.0
    # via torch
-teleop==0.1.4
+teleop==0.1.2
    # via lerobot
-termcolor==3.3.0
-    # via lerobot
-tifffile==2026.3.3
+tensorboard==2.20.0
+    # via robomimic
+tensorboard-data-server==0.7.2
+    # via tensorboard
+tensorboardx==2.6.4
+    # via robomimic
+termcolor==3.1.0
+    # via
+    #   lerobot
+    #   robomimic
+thop==0.1.1.post2209072238
+    # via libero
+tifffile==2025.5.10
    # via scikit-image
-tokenizers==0.22.2
+timm==1.0.20
+    # via lerobot
+tokenizers==0.22.1
    # via transformers
 toml==0.10.2
    # via draccus
-torch==2.10.0
+tomli==2.3.0
+    # via
+    #   cmeel
+    #   coverage
+    #   jupytext
+    #   pytest
+torch==2.7.1
    # via
    #   accelerate
    #   lerobot
    #   peft
-    #   torchdiffeq
+    #   robomimic
+    #   thop
+    #   timm
    #   torchvision
-torchcodec==0.10.0
+torchcodec==0.5
    # via lerobot
-torchdiffeq==0.2.5
-    # via lerobot
-torchvision==0.25.0
-    # via lerobot
-tornado==6.5.4
+torchvision==0.22.1
+    # via
+    #   lerobot
+    #   robomimic
+    #   timm
+tornado==6.5.2
    # via meshcat
-tqdm==4.67.3
+tqdm==4.67.1
    # via
    #   datasets
    #   dm-control
    #   huggingface-hub
    #   peft
+    #   robomimic
    #   transformers
 traitlets==5.14.3
    # via
    #   ipython
+    #   jupyter-core
    #   matplotlib-inline
-transformers==5.3.0
+    #   nbformat
+transformers==4.57.1
    # via
    #   lerobot
+    #   libero
    #   peft
 transforms3d==0.4.2
    # via teleop
-typer==0.24.1
-    # via
-    #   huggingface-hub
-    #   transformers
 typing-extensions==4.15.0
    # via
    #   aiosignal
    #   anyio
    #   etils
-    #   faker
+    #   exceptiongroup
    #   fastapi
    #   gymnasium
    #   huggingface-hub
-    #   mypy
+    #   ipython
+    #   multidict
    #   pydantic
    #   pydantic-core
+    #   referencing
    #   rerun-sdk
    #   starlette
    #   torch
    #   typing-inspect
    #   typing-inspection
+    #   uvicorn
+    #   virtualenv
    #   wandb
 typing-inspect==0.9.0
    # via draccus
 typing-inspection==0.4.2
-    # via
-    #   fastapi
-    #   pydantic
-tzdata==2025.3
+    # via pydantic
+tzdata==2025.2
    # via pandas
 u-msgpack-python==2.8.0
    # via meshcat
-urllib3==2.6.3
+urllib3==2.5.0
    # via
    #   requests
    #   sentry-sdk
-uvicorn[standard]==0.41.0
+uvicorn[standard]==0.38.0
    # via teleop
 uvloop==0.22.1
    # via uvicorn
-virtualenv==21.1.0
+virtualenv==20.35.3
    # via pre-commit
-wandb==0.24.2
-    # via lerobot
+wandb==0.21.4
+    # via
+    #   lerobot
+    #   libero
 watchfiles==1.1.1
    # via uvicorn
-wcwidth==0.6.0
+wcwidth==0.2.14
    # via prompt-toolkit
 websocket-client==1.9.0
    # via teleop
-websockets==16.0
+websockets==15.0.1
    # via uvicorn
-wrapt==2.1.2
+werkzeug==3.1.3
+    # via tensorboard
+wrapt==2.0.0
    # via dm-tree
 xxhash==3.6.0
    # via datasets
-yarl==1.23.0
+yarl==1.22.0
    # via aiohttp
 zipp==3.23.0
    # via
@@ -1,12 +1,12 @@
 #
-# This file is autogenerated by pip-compile with Python 3.12
+# This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
 #    pip-compile --output-file=requirements-ubuntu.txt requirements.in
 #
 -e .[all]
    # via -[all]
-absl-py==2.4.0
+absl-py==2.3.1
    # via
    #   dm-control
    #   dm-env
@@ -14,33 +14,30 @@ absl-py==2.4.0
    #   labmaze
    #   mujoco
    #   tensorboard
-accelerate==1.13.0
+accelerate==1.11.0
    # via
    #   lerobot
    #   peft
 aiohappyeyeballs==2.6.1
    # via aiohttp
-aiohttp==3.13.3
+aiohttp==3.13.1
    # via fsspec
 aiosignal==1.4.0
    # via aiohttp
-annotated-doc==0.0.4
-    # via
-    #   fastapi
-    #   typer
 annotated-types==0.7.0
    # via pydantic
 antlr4-python3-runtime==4.9.3
    # via
    #   hydra-core
    #   omegaconf
-anyio==4.12.1
+anyio==4.11.0
    # via
-    #   httpx
    #   starlette
    #   watchfiles
-asttokens==3.0.1
+asttokens==3.0.0
    # via stack-data
+async-timeout==5.0.1
+    # via aiohttp
 attrs==25.4.0
    # via
    #   aiohttp
@@ -50,35 +47,30 @@ attrs==25.4.0
    #   referencing
    #   rerun-sdk
 av==15.1.0
-    # via
-    #   lerobot
-    #   qwen-vl-utils
+    # via lerobot
 bddl==1.0.1
-    # via hf-libero
-certifi==2026.2.25
+    # via libero
+certifi==2025.10.5
    # via
-    #   httpcore
-    #   httpx
    #   requests
    #   sentry-sdk
 cffi==2.0.0
    # via pymunk
-cfgv==3.5.0
+cfgv==3.4.0
    # via pre-commit
-charset-normalizer==3.4.5
+charset-normalizer==3.4.4
    # via requests
-click==8.3.1
+click==8.3.0
    # via
-    #   typer
    #   uvicorn
    #   wandb
-cloudpickle==3.1.2
+cloudpickle==3.1.1
    # via
    #   gymnasium
-    #   hf-libero
-cmake==4.1.3
+    #   libero
+cmake==4.1.0
    # via lerobot
-cmeel==0.59.0
+cmeel==0.57.3
    # via
    #   cmeel-assimp
    #   cmeel-boost
@@ -116,24 +108,20 @@ cmeel-zlib==1.3.1
    # via cmeel-assimp
 coal-library==3.0.1
    # via pin
-contourpy==1.3.3
-    # via
-    #   lerobot
-    #   matplotlib
-coverage[toml]==7.13.4
+contourpy==1.3.2
+    # via matplotlib
+coverage[toml]==7.11.0
    # via pytest-cov
-cuda-bindings==12.9.4
-    # via torch
-cuda-pathfinder==1.4.1
-    # via cuda-bindings
 cycler==0.12.1
    # via matplotlib
-datasets==4.6.1
+datasets==4.1.1
    # via lerobot
-debugpy==1.8.20
+debugpy==1.8.17
    # via lerobot
 decorator==5.2.1
    # via ipython
+decord==0.6.0
+    # via lerobot
 deepdiff==8.6.1
    # via lerobot
 diffusers==0.35.2
@@ -144,7 +132,7 @@ dill==0.4.0
    #   multiprocess
 distlib==0.4.0
    # via virtualenv
-dm-control==1.0.37
+dm-control==1.0.34
    # via gym-aloha
 dm-env==1.6
    # via dm-control
@@ -152,6 +140,7 @@ dm-tree==0.1.9
    # via
    #   dm-control
    #   dm-env
+    #   lerobot
 docopt==0.6.2
    # via num2words
 draccus==0.10.0
@@ -159,60 +148,66 @@ draccus==0.10.0
 dynamixel-sdk==3.8.4
    # via lerobot
 easydict==1.13
-    # via hf-libero
-egl-probe==1.0.2
-    # via robomimic
+    # via libero
+egl-probe @ git+https://github.com/huggingface/egl_probe.git
+    # via
+    #   libero
+    #   robomimic
 eigenpy==3.10.3
    # via coal-library
-einops==0.8.2
+einops==0.8.1
    # via
-    #   hf-libero
+    #   flash-attn
    #   lerobot
+    #   libero
 eiquadprog==1.2.9
    # via placo
-etils[epath,epy]==1.14.0
+etils[epath,epy]==1.13.0
    # via mujoco
-evdev==1.9.3
+evdev==1.9.2
    # via pynput
+exceptiongroup==1.3.0
+    # via
+    #   anyio
+    #   ipython
+    #   pytest
 executing==2.2.1
    # via stack-data
-faker==34.0.2
-    # via lerobot
 farama-notifications==0.0.4
    # via gymnasium
-fastapi==0.135.1
-    # via
-    #   lerobot
-    #   teleop
+fastapi==0.119.1
+    # via teleop
 fastjsonschema==2.21.2
    # via nbformat
 feetech-servo-sdk==1.0.0
    # via lerobot
-filelock==3.25.0
+filelock==3.20.0
    # via
    #   datasets
    #   diffusers
    #   huggingface-hub
-    #   python-discovery
    #   torch
+    #   transformers
    #   virtualenv
-fonttools==4.61.1
+flash-attn==2.8.3
+    # via lerobot
+fonttools==4.60.1
    # via matplotlib
 frozenlist==1.8.0
    # via
    #   aiohttp
    #   aiosignal
-fsspec[http]==2026.2.0
+fsspec[http]==2025.9.0
    # via
    #   datasets
    #   etils
    #   huggingface-hub
    #   torch
 future==1.0.0
-    # via hf-libero
+    # via libero
 gitdb==4.0.12
    # via gitpython
-gitpython==3.1.46
+gitpython==3.1.45
    # via wandb
 glfw==2.10.0
    # via
@@ -235,60 +230,50 @@ gym-hil==0.1.13
    # via lerobot
 gym-pusht==0.1.6
    # via lerobot
-gymnasium==1.2.3
+gymnasium==1.2.1
    # via
    #   gym-aloha
    #   gym-hil
    #   gym-pusht
-    #   hf-libero
    #   lerobot
+    #   libero
    #   metaworld
 h11==0.16.0
-    # via
-    #   httpcore
-    #   uvicorn
-h5py==3.16.0
+    # via uvicorn
+h5py==3.15.1
    # via robomimic
 hebi-py==2.11.0
    # via lerobot
-hf-egl-probe==1.0.2
-    # via hf-libero
-hf-libero==0.1.3
-    # via lerobot
-hf-xet==1.3.2
+hf-transfer==0.1.9
+    # via huggingface-hub
+hf-xet==1.1.10
    # via huggingface-hub
 hidapi==0.14.0.post4
    # via
    #   gym-hil
    #   lerobot
-httpcore==1.0.9
-    # via httpx
 httptools==0.7.1
    # via uvicorn
-httpx==0.28.1
-    # via
-    #   datasets
-    #   huggingface-hub
-huggingface-hub==1.6.0
+huggingface-hub[cli,hf-transfer]==0.35.3
    # via
    #   accelerate
    #   datasets
    #   diffusers
    #   lerobot
    #   peft
+    #   timm
    #   tokenizers
    #   transformers
 hydra-core==1.3.2
-    # via hf-libero
-identify==2.6.17
+    # via libero
+identify==2.6.15
    # via pre-commit
 idna==3.11
    # via
    #   anyio
-    #   httpx
    #   requests
    #   yarl
-imageio[ffmpeg]==2.37.2
+imageio[ffmpeg]==2.37.0
    # via
    #   gym-aloha
    #   gym-hil
@@ -300,14 +285,16 @@ imageio-ffmpeg==0.6.0
    # via
    #   imageio
    #   robomimic
-importlib-metadata==8.7.1
+importlib-metadata==8.7.0
    # via diffusers
+importlib-resources==6.5.2
+    # via etils
 iniconfig==2.3.0
    # via pytest
-ipython==9.11.0
+inquirerpy==0.3.4
+    # via huggingface-hub
+ipython==8.37.0
    # via meshcat
-ipython-pygments-lexers==1.1.1
-    # via ipython
 ischedule==1.2.7
    # via placo
 jedi==0.19.2
@@ -316,41 +303,40 @@ jinja2==3.1.6
    # via torch
 jsonlines==4.0.0
    # via lerobot
-jsonschema==4.26.0
+jsonschema==4.25.1
    # via nbformat
 jsonschema-specifications==2025.9.1
    # via jsonschema
 jupyter-core==5.9.1
    # via nbformat
-jupytext==1.19.1
+jupytext==1.18.1
    # via bddl
 kiwisolver==1.4.9
    # via matplotlib
 labmaze==1.0.6
    # via dm-control
-lazy-loader==0.5
+lazy-loader==0.4
    # via scikit-image
-librt==0.8.1
-    # via mypy
-llvmlite==0.46.0
+libero @ git+https://github.com/huggingface/lerobot-libero.git@main
+    # via lerobot
+llvmlite==0.45.1
    # via numba
 lxml==6.0.2
    # via dm-control
-markdown==3.10.2
+markdown==3.9
    # via tensorboard
 markdown-it-py==4.0.0
    # via
    #   jupytext
    #   mdit-py-plugins
-    #   rich
 markupsafe==3.0.3
    # via
    #   jinja2
    #   werkzeug
-matplotlib==3.10.8
+matplotlib==3.10.7
    # via
-    #   hf-libero
    #   lerobot
+    #   libero
 matplotlib-inline==0.2.1
    # via ipython
 mdit-py-plugins==0.5.0
@@ -367,38 +353,36 @@ mock-serial==0.0.1
    # via lerobot
 mpmath==1.3.0
    # via sympy
-mujoco==3.5.0
+mujoco==3.3.7
    # via
    #   dm-control
    #   gym-aloha
    #   gym-hil
-    #   hf-libero
+    #   libero
    #   metaworld
    #   robosuite
-multidict==6.7.1
+multidict==6.7.0
    # via
    #   aiohttp
    #   yarl
-multiprocess==0.70.18
+multiprocess==0.70.16
    # via datasets
-mypy==1.19.1
-    # via lerobot
 mypy-extensions==1.1.0
-    # via
-    #   mypy
-    #   typing-inspect
+    # via typing-inspect
 nbformat==5.10.4
    # via jupytext
-networkx==3.6.1
+networkx==3.4.2
    # via
    #   bddl
    #   scikit-image
    #   torch
-nodeenv==1.10.0
+ninja==1.13.0
+    # via lerobot
+nodeenv==1.9.1
    # via pre-commit
 num2words==0.5.14
    # via lerobot
-numba==0.64.0
+numba==0.62.1
    # via robosuite
 numpy==2.2.6
    # via
@@ -407,6 +391,7 @@ numpy==2.2.6
    #   cmeel-boost
    #   contourpy
    #   datasets
+    #   decord
    #   diffusers
    #   dm-control
    #   dm-env
@@ -414,10 +399,9 @@ numpy==2.2.6
    #   gymnasium
    #   h5py
    #   hebi-py
-    #   hf-libero
    #   imageio
    #   labmaze
-    #   lerobot
+    #   libero
    #   matplotlib
    #   meshcat
    #   metaworld
@@ -442,51 +426,49 @@ numpy==2.2.6
    #   torchvision
    #   transformers
    #   transforms3d
-nvidia-cublas-cu12==12.8.4.1
+nvidia-cublas-cu12==12.6.4.1
    # via
    #   nvidia-cudnn-cu12
    #   nvidia-cusolver-cu12
    #   torch
-nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-cupti-cu12==12.6.80
    # via torch
-nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-nvrtc-cu12==12.6.77
    # via torch
-nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cuda-runtime-cu12==12.6.77
    # via torch
-nvidia-cudnn-cu12==9.10.2.21
+nvidia-cudnn-cu12==9.5.1.17
    # via torch
-nvidia-cufft-cu12==11.3.3.83
+nvidia-cufft-cu12==11.3.0.4
    # via torch
-nvidia-cufile-cu12==1.13.1.3
+nvidia-cufile-cu12==1.11.1.6
    # via torch
-nvidia-curand-cu12==10.3.9.90
+nvidia-curand-cu12==10.3.7.77
    # via torch
-nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusolver-cu12==11.7.1.2
    # via torch
-nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparse-cu12==12.5.4.2
    # via
    #   nvidia-cusolver-cu12
    #   torch
-nvidia-cusparselt-cu12==0.7.1
+nvidia-cusparselt-cu12==0.6.3
    # via torch
-nvidia-nccl-cu12==2.27.5
+nvidia-nccl-cu12==2.26.2
    # via torch
-nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvjitlink-cu12==12.6.85
    # via
    #   nvidia-cufft-cu12
    #   nvidia-cusolver-cu12
    #   nvidia-cusparse-cu12
    #   torch
-nvidia-nvshmem-cu12==3.4.5
-    # via torch
-nvidia-nvtx-cu12==12.8.90
+nvidia-nvtx-cu12==12.6.77
    # via torch
 omegaconf==2.3.0
    # via hydra-core
-opencv-python==4.13.0.92
+opencv-python==4.12.0.88
    # via
    #   gym-pusht
-    #   hf-libero
+    #   libero
    #   reachy2-sdk
    #   robosuite
 opencv-python-headless==4.12.0.88
@@ -505,7 +487,6 @@ packaging==25.0
    #   matplotlib
    #   peft
    #   pytest
-    #   qwen-vl-utils
    #   reachy2-sdk
    #   scikit-image
    #   tensorboard
@@ -516,21 +497,21 @@ pandas==2.3.3
    # via
    #   datasets
    #   lerobot
-parso==0.8.6
+parso==0.8.5
    # via jedi
-pathspec==1.0.4
-    # via mypy
-peft==0.18.1
+peft==0.17.1
    # via lerobot
 pexpect==4.9.0
    # via ipython
-pillow==12.1.1
+pfzy==0.3.4
+    # via inquirerpy
+pillow==12.0.0
    # via
    #   diffusers
    #   imageio
+    #   lerobot
    #   matplotlib
    #   meshcat
-    #   qwen-vl-utils
    #   rerun-sdk
    #   robosuite
    #   scikit-image
@@ -538,27 +519,28 @@ pillow==12.1.1
    #   torchvision
 pin==3.4.0
    # via placo
-placo==0.9.16
+placo==0.9.14
    # via lerobot
-platformdirs==4.9.4
+platformdirs==4.5.0
    # via
    #   jupyter-core
-    #   python-discovery
    #   virtualenv
    #   wandb
 pluggy==1.6.0
    # via
    #   pytest
    #   pytest-cov
-pre-commit==4.5.1
+pre-commit==4.3.0
    # via lerobot
 prompt-toolkit==3.0.52
-    # via ipython
+    # via
+    #   inquirerpy
+    #   ipython
 propcache==0.4.1
    # via
    #   aiohttp
    #   yarl
-protobuf==6.31.1
+protobuf==6.31.0
    # via
    #   dm-control
    #   grpcio-tools
@@ -568,7 +550,7 @@ protobuf==6.31.1
    #   tensorboard
    #   tensorboardx
    #   wandb
-psutil==7.2.2
+psutil==7.1.1
    # via
    #   accelerate
    #   imageio
@@ -578,17 +560,17 @@ ptyprocess==0.7.0
    # via pexpect
 pure-eval==0.2.3
    # via stack-data
-pyarrow==23.0.1
+pyarrow==21.0.0
    # via
    #   datasets
    #   rerun-sdk
-pycparser==3.0
+pycparser==2.23
    # via cffi
-pydantic==2.12.5
+pydantic==2.12.3
    # via
    #   fastapi
    #   wandb
-pydantic-core==2.41.5
+pydantic-core==2.41.4
    # via pydantic
 pygame==2.6.1
    # via
@@ -598,14 +580,12 @@ pygame==2.6.1
 pygments==2.19.2
    # via
    #   ipython
-    #   ipython-pygments-lexers
    #   pytest
-    #   rich
 pymunk==6.11.1
    # via
    #   gym-pusht
    #   lerobot
-pyngrok==7.5.1
+pyngrok==7.4.1
    # via meshcat
 pynput==1.8.1
    # via
@@ -615,7 +595,7 @@ pyopengl==3.1.10
    # via
    #   dm-control
    #   mujoco
-pyparsing==3.3.2
+pyparsing==3.2.5
    # via
    #   dm-control
    #   matplotlib
@@ -641,16 +621,13 @@ pytest-timeout==2.4.0
    # via lerobot
 python-dateutil==2.9.0.post0
    # via
-    #   faker
    #   matplotlib
    #   pandas
-python-discovery==1.1.1
-    # via virtualenv
-python-dotenv==1.2.2
+python-dotenv==1.1.1
    # via uvicorn
 python-xlib==0.33
    # via pynput
-pytz==2026.1.post1
+pytz==2025.2
    # via pandas
 pyyaml==6.0.3
    # via
@@ -665,6 +642,7 @@ pyyaml==6.0.3
    #   pre-commit
    #   pyngrok
    #   pyyaml-include
+    #   timm
    #   transformers
    #   uvicorn
    #   wandb
@@ -674,9 +652,7 @@ pyzmq==27.1.0
    # via
    #   lerobot
    #   meshcat
-qwen-vl-utils==0.0.14
-    # via lerobot
-reachy2-sdk==1.0.15
+reachy2-sdk==1.0.14
    # via lerobot
 reachy2-sdk-api==1.0.21
    # via reachy2-sdk
@@ -684,7 +660,7 @@ referencing==0.37.0
    # via
    #   jsonschema
    #   jsonschema-specifications
-regex==2026.2.28
+regex==2025.10.23
    # via
    #   diffusers
    #   transformers
@@ -693,62 +669,60 @@ requests==2.32.5
    #   datasets
    #   diffusers
    #   dm-control
-    #   qwen-vl-utils
+    #   huggingface-hub
    #   teleop
+    #   transformers
    #   wandb
-rerun-sdk==0.26.2
+rerun-sdk==0.26.1
    # via lerobot
 rhoban-cmeel-jsoncpp==1.9.4.9
    # via placo
-rich==14.3.3
-    # via typer
 robomimic==0.2.0
-    # via hf-libero
+    # via libero
 robosuite==1.4.0
-    # via hf-libero
-rpds-py==0.30.0
+    # via libero
+rpds-py==0.28.0
    # via
    #   jsonschema
    #   referencing
-safetensors==0.7.0
+safetensors==0.6.2
    # via
    #   accelerate
    #   diffusers
    #   lerobot
    #   peft
+    #   timm
    #   transformers
 scikit-image==0.25.2
    # via
    #   gym-pusht
    #   lerobot
-scipy==1.17.1
+scipy==1.15.3
    # via
    #   dm-control
-    #   lerobot
    #   metaworld
    #   robosuite
    #   scikit-image
-    #   torchdiffeq
-sentry-sdk==2.54.0
+sentry-sdk==2.42.1
    # via wandb
 shapely==2.1.2
    # via gym-pusht
-shellingham==1.5.4
-    # via typer
 six==1.17.0
    # via
    #   pynput
    #   python-dateutil
    #   python-xlib
-smmap==5.0.3
+smmap==5.0.2
    # via gitdb
+sniffio==1.3.1
+    # via anyio
 stack-data==0.6.3
    # via ipython
-starlette==0.52.1
+starlette==0.48.0
    # via fastapi
 sympy==1.14.0
    # via torch
-teleop==0.1.4
+teleop==0.1.2
    # via lerobot
 tensorboard==2.20.0
    # via robomimic
@@ -756,38 +730,46 @@ tensorboard-data-server==0.7.2
    # via tensorboard
 tensorboardx==2.6.4
    # via robomimic
-termcolor==3.3.0
+termcolor==3.1.0
    # via
    #   lerobot
    #   robomimic
 thop==0.1.1.post2209072238
-    # via hf-libero
-tifffile==2026.3.3
+    # via libero
+tifffile==2025.5.10
    # via scikit-image
-tokenizers==0.22.2
+timm==1.0.20
+    # via lerobot
+tokenizers==0.22.1
    # via transformers
 toml==0.10.2
    # via draccus
-torch==2.10.0
+tomli==2.3.0
+    # via
+    #   cmeel
+    #   coverage
+    #   jupytext
+    #   pytest
+torch==2.7.1
    # via
    #   accelerate
+    #   flash-attn
    #   lerobot
    #   peft
    #   robomimic
    #   thop
-    #   torchdiffeq
+    #   timm
    #   torchvision
-torchcodec==0.10.0
+torchcodec==0.5
    # via lerobot
-torchdiffeq==0.2.5
-    # via lerobot
-torchvision==0.25.0
+torchvision==0.22.1
    # via
    #   lerobot
    #   robomimic
-tornado==6.5.4
+    #   timm
+tornado==6.5.2
    # via meshcat
-tqdm==4.67.3
+tqdm==4.67.1
    # via
    #   datasets
    #   dm-control
@@ -801,29 +783,26 @@ traitlets==5.14.3
    #   jupyter-core
    #   matplotlib-inline
    #   nbformat
-transformers==5.3.0
+transformers==4.57.1
    # via
-    #   hf-libero
    #   lerobot
+    #   libero
    #   peft
 transforms3d==0.4.2
    # via teleop
-triton==3.6.0
+triton==3.3.1
    # via torch
-typer==0.24.1
-    # via
-    #   huggingface-hub
-    #   transformers
 typing-extensions==4.15.0
    # via
    #   aiosignal
    #   anyio
    #   etils
-    #   faker
+    #   exceptiongroup
    #   fastapi
    #   gymnasium
    #   huggingface-hub
-    #   mypy
+    #   ipython
+    #   multidict
    #   pydantic
    #   pydantic-core
    #   referencing
@@ -832,46 +811,46 @@ typing-extensions==4.15.0
    #   torch
    #   typing-inspect
    #   typing-inspection
+    #   uvicorn
+    #   virtualenv
    #   wandb
 typing-inspect==0.9.0
    # via draccus
 typing-inspection==0.4.2
-    # via
-    #   fastapi
-    #   pydantic
-tzdata==2025.3
+    # via pydantic
+tzdata==2025.2
    # via pandas
 u-msgpack-python==2.8.0
    # via meshcat
-urllib3==2.6.3
+urllib3==2.5.0
    # via
    #   requests
    #   sentry-sdk
-uvicorn[standard]==0.41.0
+uvicorn[standard]==0.38.0
    # via teleop
 uvloop==0.22.1
    # via uvicorn
-virtualenv==21.1.0
+virtualenv==20.35.3
    # via pre-commit
-wandb==0.24.2
+wandb==0.21.4
    # via
-    #   hf-libero
    #   lerobot
+    #   libero
 watchfiles==1.1.1
    # via uvicorn
-wcwidth==0.6.0
+wcwidth==0.2.14
    # via prompt-toolkit
 websocket-client==1.9.0
    # via teleop
-websockets==16.0
+websockets==15.0.1
    # via uvicorn
-werkzeug==3.1.6
+werkzeug==3.1.3
    # via tensorboard
-wrapt==2.1.2
+wrapt==2.0.0
    # via dm-tree
 xxhash==3.6.0
    # via datasets
-yarl==1.23.0
+yarl==1.22.0
    # via aiohttp
 zipp==3.23.0
    # via
@@ -1,9 +1,9 @@
 # requirements.in

-# requirements-macos.txt was generated on macOS and is platform-specific (macOS 26.3.1 25D2128 arm64).
-# Darwin MacBook-Pro.local 25.3.0 Darwin Kernel Version 25.3.0: Wed Jan 28 20:54:55 PST 2026; root:xnu-12377.91.3~2/RELEASE_ARM64_T8132 arm64
+# requirements-macos.txt was generated on macOS and is platform-specific (macOS 26.0.1 25A362 arm64).
+# Darwin MacBook-Pro.local 25.0.0 Darwin Kernel Version 25.0.0: Wed Sep 17 21:42:08 PDT 2025; root:xnu-12377.1.9~141/RELEASE_ARM64_T8132 arm64

-# requirements-ubuntu.txt was generated on Linux and is platform-specific (Ubuntu 24.04.4 LTS x86_64).
-# Linux lerobot-linux 6.17.0-14-generic #14~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Jan 15 15:52:10 UTC 2 x86_64 x86_64 x86_64 GNU/Linux
+# requirements-ubuntu.txt was generated on Linux and is platform-specific (Ubuntu 24.04.3 LTS x86_64).
+# Linux mlerobot-linux 6.14.0-33-generic #33~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Fri Sep 19 17:02:30 UTC 2 x86_64 x86_64 x86_64 GNU/Linux

 -e .[all]
@@ -126,12 +126,6 @@ class RobotClientConfig:

    # Device configuration
    policy_device: str = field(default="cpu", metadata={"help": "Device for policy inference"})
-    client_device: str = field(
-        default="cpu",
-        metadata={
-            "help": "Device to move actions to after receiving from server (e.g., for downstream planners)"
-        },
-    )

    # Control behavior configuration
    chunk_size_threshold: float = field(default=0.5, metadata={"help": "Threshold for chunk size control"})
@@ -167,9 +161,6 @@ class RobotClientConfig:
        if not self.policy_device:
            raise ValueError("policy_device cannot be empty")

-        if not self.client_device:
-            raise ValueError("client_device cannot be empty")
-
        if self.chunk_size_threshold < 0 or self.chunk_size_threshold > 1:
            raise ValueError(f"chunk_size_threshold must be between 0 and 1, got {self.chunk_size_threshold}")

@@ -193,7 +184,6 @@ class RobotClientConfig:
            "policy_type": self.policy_type,
            "pretrained_name_or_path": self.pretrained_name_or_path,
            "policy_device": self.policy_device,
-            "client_device": self.client_device,
            "chunk_size_threshold": self.chunk_size_threshold,
            "fps": self.fps,
            "actions_per_chunk": self.actions_per_chunk,
@@ -23,7 +23,7 @@ DEFAULT_INFERENCE_LATENCY = 1 / DEFAULT_FPS
 DEFAULT_OBS_QUEUE_TIMEOUT = 2

 # All action chunking policies
-SUPPORTED_POLICIES = ["act", "smolvla", "diffusion", "tdmpc", "vqbet", "pi0", "pi05", "groot"]
+SUPPORTED_POLICIES = ["act", "smolvla", "diffusion", "tdmpc", "vqbet", "pi0", "pi05"]

 # TODO: Add all other robots
 SUPPORTED_ROBOTS = ["so100_follower", "so101_follower", "bi_so_follower", "omx_follower"]
@@ -18,12 +18,11 @@ import os
 import time
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any

 import torch

 from lerobot.configs.types import PolicyFeature
-from lerobot.datasets.feature_utils import build_dataset_frame, hw_to_dataset_features
+from lerobot.datasets.utils import build_dataset_frame, hw_to_dataset_features

 # NOTE: Configs need to be loaded for the client to be able to instantiate the policy config
 from lerobot.policies import (  # noqa: F401
@@ -40,8 +39,8 @@ from lerobot.utils.utils import init_logging

 Action = torch.Tensor

-# observation as received from the robot (can be numpy arrays, floats, etc.)
-RawObservation = dict[str, Any]
+# observation as received from the robot
+RawObservation = dict[str, torch.Tensor]

 # observation as those recorded in LeRobot dataset (keys are different)
 LeRobotObservation = dict[str, torch.Tensor]
@@ -39,13 +39,15 @@ import grpc
 import torch

 from lerobot.policies.factory import get_policy_class, make_pre_post_processors
-from lerobot.processor import PolicyProcessorPipeline
+from lerobot.processor import (
+    PolicyAction,
+    PolicyProcessorPipeline,
+)
 from lerobot.transport import (
    services_pb2,  # type: ignore
    services_pb2_grpc,  # type: ignore
 )
 from lerobot.transport.utils import receive_bytes_in_chunks
-from lerobot.types import PolicyAction

 from .configs import PolicyServerConfig
 from .constants import SUPPORTED_POLICIES
@@ -379,8 +381,6 @@ class PolicyServer(services_pb2_grpc.AsyncInferenceServicer):
        action_tensor = torch.stack(processed_actions, dim=1).squeeze(0)
        self.logger.debug(f"Postprocessed action shape: {action_tensor.shape}")

-        action_tensor = action_tensor.detach().cpu()
-
        """5. Convert to TimedAction list"""
        action_chunk = self._time_action_chunk(
            observation_t.get_timestamp(), list(action_tensor), observation_t.get_timestep()
@@ -25,7 +25,6 @@ python src/lerobot/async_inference/robot_client.py \
    --policy_type=act \
    --pretrained_name_or_path=user/model \
    --policy_device=mps \
-    --client_device=cpu \
    --actions_per_chunk=50 \
    --chunk_size_threshold=0.5 \
    --aggregate_fn_name=weighted_average \
@@ -41,7 +40,6 @@ from collections.abc import Callable
 from dataclasses import asdict
 from pprint import pformat
 from queue import Queue
-from typing import Any

 import draccus
 import grpc
@@ -49,6 +47,7 @@ import torch

 from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig  # noqa: F401
 from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig  # noqa: F401
+from lerobot.processor import RobotAction
 from lerobot.robots import (  # noqa: F401
    Robot,
    RobotConfig,
@@ -63,9 +62,9 @@ from lerobot.transport import (
    services_pb2_grpc,  # type: ignore
 )
 from lerobot.transport.utils import grpc_channel_options, send_bytes_in_chunks
-from lerobot.utils.import_utils import register_third_party_plugins

 from .configs import RobotClientConfig
+from .constants import SUPPORTED_ROBOTS
 from .helpers import (
    Action,
    FPSTracker,
@@ -286,21 +285,6 @@ class RobotClient:
                timed_actions = pickle.loads(actions_chunk.data)  # nosec
                deserialize_time = time.perf_counter() - deserialize_start

-                # Log device type of received actions
-                if len(timed_actions) > 0:
-                    received_device = timed_actions[0].get_action().device.type
-                    self.logger.debug(f"Received actions on device: {received_device}")
-
-                # Move actions to client_device (e.g., for downstream planners that need GPU)
-                client_device = self.config.client_device
-                if client_device != "cpu":
-                    for timed_action in timed_actions:
-                        if timed_action.get_action().device.type != client_device:
-                            timed_action.action = timed_action.get_action().to(client_device)
-                    self.logger.debug(f"Converted actions to device: {client_device}")
-                else:
-                    self.logger.debug(f"Actions kept on device: {client_device}")
-
                self.action_chunk_size = max(self.action_chunk_size, len(timed_actions))

                # Calculate network latency if we have matching observations
@@ -367,7 +351,7 @@ class RobotClient:
        action = {key: action_tensor[i].item() for i, key in enumerate(self.robot.action_features)}
        return action

-    def control_loop_action(self, verbose: bool = False) -> dict[str, Any]:
+    def control_loop_action(self, verbose: bool = False) -> RobotAction:
        """Reading and performing actions in local queue"""

        # Lock only for queue operations
@@ -485,9 +469,8 @@ class RobotClient:
 def async_client(cfg: RobotClientConfig):
    logging.info(pformat(asdict(cfg)))

-    # TODO: Assert if checking robot support is still needed with the plugin system
-    # if cfg.robot.type not in SUPPORTED_ROBOTS:
-    #     raise ValueError(f"Robot {cfg.robot.type} not yet supported!")
+    if cfg.robot.type not in SUPPORTED_ROBOTS:
+        raise ValueError(f"Robot {cfg.robot.type} not yet supported!")

    client = RobotClient(cfg)

@@ -513,5 +496,4 @@ def async_client(cfg: RobotClientConfig):


 if __name__ == "__main__":
-    register_third_party_plugins()
    async_client()  # run the client
@@ -13,5 +13,5 @@
 # limitations under the License.

 from .camera import Camera
-from .configs import CameraConfig, ColorMode, Cv2Backends, Cv2Rotation
+from .configs import CameraConfig, ColorMode, Cv2Rotation
 from .utils import make_cameras_from_configs
@@ -15,12 +15,11 @@
 # limitations under the License.

 import abc
-import warnings
 from typing import Any

 from numpy.typing import NDArray  # type: ignore  # TODO: add type stubs for numpy.typing

-from .configs import CameraConfig
+from .configs import CameraConfig, ColorMode


 class Camera(abc.ABC):
@@ -31,12 +30,20 @@ class Camera(abc.ABC):

    Manages basic camera properties (FPS, resolution) and core operations:
    - Connection/disconnection
-    - Frame capture (sync/async/latest)
+    - Frame capture (sync/async)

    Attributes:
        fps (int | None): Configured frames per second
        width (int | None): Frame width in pixels
        height (int | None): Frame height in pixels
+
+    Example:
+        class MyCamera(Camera):
+            def __init__(self, config): ...
+            @property
+            def is_connected(self) -> bool: ...
+            def connect(self, warmup=True): ...
+            # Plus other required methods
    """

    def __init__(self, config: CameraConfig):
@@ -49,32 +56,6 @@ class Camera(abc.ABC):
        self.width: int | None = config.width
        self.height: int | None = config.height

-    def __enter__(self):
-        """
-        Context manager entry.
-        Automatically connects to the camera.
-        """
-        self.connect()
-        return self
-
-    def __exit__(self, exc_type, exc_value, traceback) -> None:
-        """
-        Context manager exit.
-        Automatically disconnects, ensuring resources are released even on error.
-        """
-        self.disconnect()
-
-    def __del__(self) -> None:
-        """
-        Destructor safety net.
-        Attempts to disconnect if the object is garbage collected without cleanup.
-        """
-        try:
-            if self.is_connected:
-                self.disconnect()
-        except Exception:  # nosec B110
-            pass
-
    @property
    @abc.abstractmethod
    def is_connected(self) -> bool:
@@ -108,10 +89,12 @@ class Camera(abc.ABC):
        pass

    @abc.abstractmethod
-    def read(self) -> NDArray[Any]:
-        """Capture and return a single frame from the camera synchronously.
+    def read(self, color_mode: ColorMode | None = None) -> NDArray[Any]:
+        """Capture and return a single frame from the camera.

-        This is a blocking call that will wait for the hardware and its SDK.
+        Args:
+            color_mode: Desired color mode for the output frame. If None,
+                        uses the camera's default color mode.

        Returns:
            np.ndarray: Captured frame as a numpy array.
@@ -120,64 +103,17 @@ class Camera(abc.ABC):

    @abc.abstractmethod
    def async_read(self, timeout_ms: float = ...) -> NDArray[Any]:
-        """Return the most recent new frame.
-
-        This method retrieves the latest frame captured by the background thread.
-        If a new frame is already available in the buffer (captured since the last call),
-        it returns it immediately.
-
-        It blocks up to `timeout_ms` only if the buffer is empty or if the latest frame
-        was already consumed by a previous `async_read` call.
-
-        Essentially, this method return the latest unconsumed frame, waiting if necessary
-        for a new one to arrive within the specified timeout.
-
-        Usage:
-            - Ideal for control loops where you want to ensure every processed frame
-            is fresh, effectively synchronizing your loop to the camera's FPS.
-            - Causes of a timeout usually include: very low camera FPS, heavy processing load,
-            or if the camera is disconnected.
+        """Asynchronously capture and return a single frame from the camera.

        Args:
-            timeout_ms: Maximum time to wait for a new frame in milliseconds.
-                        Defaults to 200ms (0.2s).
+            timeout_ms: Maximum time to wait for a frame in milliseconds.
+                        Defaults to implementation-specific timeout.

        Returns:
            np.ndarray: Captured frame as a numpy array.
-
-        Raises:
-            TimeoutError: If no new frame arrives within `timeout_ms`.
        """
        pass

-    def read_latest(self, max_age_ms: int = 500) -> NDArray[Any]:
-        """Return the most recent frame captured immediately (Peeking).
-
-        This method is non-blocking and returns whatever is currently in the
-        memory buffer. The frame may be stale,
-        meaning it could have been captured a while ago (hanging camera scenario e.g.).
-
-        Usage:
-            Ideal for scenarios requiring zero latency or decoupled frequencies & when
-            we want a guaranteed frame, such as UI visualization, logging, or
-            non-critical monitoring.
-
-        Returns:
-            NDArray[Any]: The frame image (numpy array).
-
-        Raises:
-            TimeoutError: If the latest frame is older than `max_age_ms`.
-            NotConnectedError: If the camera is not connected.
-            RuntimeError: If the camera is connected but has not captured any frames yet.
-        """
-        warnings.warn(
-            f"{self.__class__.__name__}.read_latest() is not implemented. "
-            "Please override read_latest(); it will be required in future releases.",
-            FutureWarning,
-            stacklevel=2,
-        )
-        return self.async_read()
-
    @abc.abstractmethod
    def disconnect(self) -> None:
        """Disconnect from the camera and release resources."""
@@ -25,10 +25,6 @@ class ColorMode(str, Enum):
    RGB = "rgb"
    BGR = "bgr"

-    @classmethod
-    def _missing_(cls, value: object) -> None:
-        raise ValueError(f"`color_mode` is expected to be in {list(cls)}, but {value} is provided.")
-

 class Cv2Rotation(int, Enum):
    NO_ROTATION = 0
@@ -36,25 +32,6 @@ class Cv2Rotation(int, Enum):
    ROTATE_180 = 180
    ROTATE_270 = -90

-    @classmethod
-    def _missing_(cls, value: object) -> None:
-        raise ValueError(f"`rotation` is expected to be in {list(cls)}, but {value} is provided.")
-
-
-# Subset from https://docs.opencv.org/3.4/d4/d15/group__videoio__flags__base.html
-class Cv2Backends(int, Enum):
-    ANY = 0
-    V4L2 = 200
-    DSHOW = 700
-    PVAPI = 800
-    ANDROID = 1000
-    AVFOUNDATION = 1200
-    MSMF = 1400
-
-    @classmethod
-    def _missing_(cls, value: object) -> None:
-        raise ValueError(f"`backend` is expected to be in {list(cls)}, but {value} is provided.")
-

@dataclass(kw_only=True)
 class CameraConfig(draccus.ChoiceRegistry, abc.ABC):  # type: ignore  # TODO: add type stubs for draccus
@@ -32,11 +32,10 @@ if platform.system() == "Windows" and "OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS"
    os.environ["OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS"] = "0"
 import cv2  # type: ignore  # TODO: add type stubs for OpenCV

-from lerobot.utils.decorators import check_if_already_connected, check_if_not_connected
-from lerobot.utils.errors import DeviceNotConnectedError
+from lerobot.utils.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError

 from ..camera import Camera
-from ..utils import get_cv2_rotation
+from ..utils import get_cv2_backend, get_cv2_rotation
 from .configuration_opencv import ColorMode, OpenCVCameraConfig

 # NOTE(Steven): The maximum opencv device index depends on your operating system. For instance,
@@ -71,24 +70,34 @@ class OpenCVCamera(Camera):
    Example:
        ```python
        from lerobot.cameras.opencv import OpenCVCamera
-        from lerobot.cameras.configuration_opencv import OpenCVCameraConfig
+        from lerobot.cameras.configuration_opencv import OpenCVCameraConfig, ColorMode, Cv2Rotation

        # Basic usage with camera index 0
        config = OpenCVCameraConfig(index_or_path=0)
        camera = OpenCVCamera(config)
        camera.connect()

-        # Read 1 frame synchronously (blocking)
+        # Read 1 frame synchronously
        color_image = camera.read()
+        print(color_image.shape)

-        # Read 1 frame asynchronously (waits for new frame with a timeout)
+        # Read 1 frame asynchronously
        async_image = camera.async_read()

-        # Get the latest frame immediately (no wait, returns timestamp)
-        latest_image, timestamp = camera.read_latest()
-
        # When done, properly disconnect the camera using
        camera.disconnect()
+
+        # Example with custom settings
+        custom_config = OpenCVCameraConfig(
+            index_or_path='/dev/video0', # Or use an index
+            fps=30,
+            width=1280,
+            height=720,
+            color_mode=ColorMode.RGB,
+            rotation=Cv2Rotation.ROTATE_90
+        )
+        custom_camera = OpenCVCamera(custom_config)
+        # ... connect, read, disconnect ...
        ```
    """

@@ -114,11 +123,10 @@ class OpenCVCamera(Camera):
        self.stop_event: Event | None = None
        self.frame_lock: Lock = Lock()
        self.latest_frame: NDArray[Any] | None = None
-        self.latest_timestamp: float | None = None
        self.new_frame_event: Event = Event()

        self.rotation: int | None = get_cv2_rotation(config.rotation)
-        self.backend: int = config.backend
+        self.backend: int = get_cv2_backend()

        if self.height and self.width:
            self.capture_width, self.capture_height = self.width, self.height
@@ -133,23 +141,20 @@ class OpenCVCamera(Camera):
        """Checks if the camera is currently connected and opened."""
        return isinstance(self.videocapture, cv2.VideoCapture) and self.videocapture.isOpened()

-    @check_if_already_connected
    def connect(self, warmup: bool = True) -> None:
        """
        Connects to the OpenCV camera specified in the configuration.

        Initializes the OpenCV VideoCapture object, sets desired camera properties
-        (FPS, width, height), starts the background reading thread and performs initial checks.
-
-        Args:
-            warmup (bool): If True, waits at connect() time until at least one valid frame
-                           has been captured by the background thread. Defaults to True.
+        (FPS, width, height), and performs initial checks.

        Raises:
            DeviceAlreadyConnectedError: If the camera is already connected.
-            ConnectionError: If the specified camera index/path is not found or fails to open.
-            RuntimeError: If the camera opens but fails to apply requested settings.
+            ConnectionError: If the specified camera index/path is not found or the camera is found but fails to open.
+            RuntimeError: If the camera opens but fails to apply requested FPS/resolution settings.
        """
+        if self.is_connected:
+            raise DeviceAlreadyConnectedError(f"{self} is already connected.")

        # Use 1 thread for OpenCV operations to avoid potential conflicts or
        # blocking in multi-threaded applications, especially during data collection.
@@ -165,20 +170,15 @@ class OpenCVCamera(Camera):
            )

        self._configure_capture_settings()
-        self._start_read_thread()

-        if warmup and self.warmup_s > 0:
+        if warmup:
            start_time = time.time()
            while time.time() - start_time < self.warmup_s:
-                self.async_read(timeout_ms=self.warmup_s * 1000)
+                self.read()
                time.sleep(0.1)
-            with self.frame_lock:
-                if self.latest_frame is None:
-                    raise ConnectionError(f"{self} failed to capture frames during warmup.")

        logger.info(f"{self} connected.")

-    @check_if_not_connected
    def _configure_capture_settings(self) -> None:
        """
        Applies the specified FOURCC, FPS, width, and height settings to the connected camera.
@@ -196,8 +196,11 @@ class OpenCVCamera(Camera):
        Raises:
            RuntimeError: If the camera fails to set any of the specified properties
                          to the requested value.
-            DeviceNotConnectedError: If the camera is not connected.
+            DeviceNotConnectedError: If the camera is not connected when attempting
+                                     to configure settings.
        """
+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"Cannot configure settings for {self} as it is not connected.")

        # Set FOURCC first (if specified) as it can affect available FPS/resolution options
        if self.config.fourcc is not None:
@@ -336,18 +339,6 @@ class OpenCVCamera(Camera):

        return found_cameras_info

-    def _read_from_hardware(self) -> NDArray[Any]:
-        if self.videocapture is None:
-            raise DeviceNotConnectedError(f"{self} videocapture is not initialized")
-
-        ret, frame = self.videocapture.read()
-
-        if not ret:
-            raise RuntimeError(f"{self} read failed (status={ret}).")
-
-        return frame
-
-    @check_if_not_connected
    def read(self, color_mode: ColorMode | None = None) -> NDArray[Any]:
        """
        Reads a single frame synchronously from the camera.
@@ -355,6 +346,11 @@ class OpenCVCamera(Camera):
        This is a blocking call. It waits for the next available frame from the
        camera hardware via OpenCV.

+        Args:
+            color_mode (Optional[ColorMode]): If specified, overrides the default
+                color mode (`self.color_mode`) for this read operation (e.g.,
+                request RGB even if default is BGR).
+
        Returns:
            np.ndarray: The captured frame as a NumPy array in the format
                       (height, width, channels), using the specified or default
@@ -366,31 +362,34 @@ class OpenCVCamera(Camera):
                          received frame dimensions don't match expectations before rotation.
            ValueError: If an invalid `color_mode` is requested.
        """
+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"{self} is not connected.")

        start_time = time.perf_counter()

-        if color_mode is not None:
-            logger.warning(
-                f"{self} read() color_mode parameter is deprecated and will be removed in future versions."
-            )
+        if self.videocapture is None:
+            raise DeviceNotConnectedError(f"{self} videocapture is not initialized")

-        if self.thread is None or not self.thread.is_alive():
-            raise RuntimeError(f"{self} read thread is not running.")
+        ret, frame = self.videocapture.read()

-        self.new_frame_event.clear()
-        frame = self.async_read(timeout_ms=10000)
+        if not ret or frame is None:
+            raise RuntimeError(f"{self} read failed (status={ret}).")
+
+        processed_frame = self._postprocess_image(frame, color_mode)

        read_duration_ms = (time.perf_counter() - start_time) * 1e3
        logger.debug(f"{self} read took: {read_duration_ms:.1f}ms")

-        return frame
+        return processed_frame

-    def _postprocess_image(self, image: NDArray[Any]) -> NDArray[Any]:
+    def _postprocess_image(self, image: NDArray[Any], color_mode: ColorMode | None = None) -> NDArray[Any]:
        """
        Applies color conversion, dimension validation, and rotation to a raw frame.

        Args:
            image (np.ndarray): The raw image frame (expected BGR format from OpenCV).
+            color_mode (Optional[ColorMode]): The target color mode (RGB or BGR). If None,
+                                             uses the instance's default `self.color_mode`.

        Returns:
            np.ndarray: The processed image frame.
@@ -400,10 +399,11 @@ class OpenCVCamera(Camera):
            RuntimeError: If the raw frame dimensions do not match the configured
                          `width` and `height`.
        """
+        requested_color_mode = self.color_mode if color_mode is None else color_mode

-        if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
+        if requested_color_mode not in (ColorMode.RGB, ColorMode.BGR):
            raise ValueError(
-                f"Invalid color mode '{self.color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}."
+                f"Invalid color mode '{requested_color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}."
            )

        h, w, c = image.shape
@@ -417,7 +417,7 @@ class OpenCVCamera(Camera):
            raise RuntimeError(f"{self} frame channels={c} do not match expected 3 channels (RGB/BGR).")

        processed_image = image
-        if self.color_mode == ColorMode.RGB:
+        if requested_color_mode == ColorMode.RGB:
            processed_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE, cv2.ROTATE_180]:
@@ -431,7 +431,7 @@ class OpenCVCamera(Camera):

        On each iteration:
        1. Reads a color frame
-        2. Stores result in latest_frame and updates timestamp (thread-safe)
+        2. Stores result in latest_frame (thread-safe)
        3. Sets new_frame_event to notify listeners

        Stops on DeviceNotConnectedError, logs other errors and continues.
@@ -439,37 +439,30 @@ class OpenCVCamera(Camera):
        if self.stop_event is None:
            raise RuntimeError(f"{self}: stop_event is not initialized before starting read loop.")

-        failure_count = 0
        while not self.stop_event.is_set():
            try:
-                raw_frame = self._read_from_hardware()
-                processed_frame = self._postprocess_image(raw_frame)
-                capture_time = time.perf_counter()
+                color_image = self.read()

                with self.frame_lock:
-                    self.latest_frame = processed_frame
-                    self.latest_timestamp = capture_time
+                    self.latest_frame = color_image
                self.new_frame_event.set()
-                failure_count = 0

            except DeviceNotConnectedError:
                break
            except Exception as e:
-                if failure_count <= 10:
-                    failure_count += 1
-                    logger.warning(f"Error reading frame in background thread for {self}: {e}")
-                else:
-                    raise RuntimeError(f"{self} exceeded maximum consecutive read failures.") from e
+                logger.warning(f"Error reading frame in background thread for {self}: {e}")

    def _start_read_thread(self) -> None:
        """Starts or restarts the background read thread if it's not running."""
-        self._stop_read_thread()
+        if self.thread is not None and self.thread.is_alive():
+            self.thread.join(timeout=0.1)
+        if self.stop_event is not None:
+            self.stop_event.set()

        self.stop_event = Event()
        self.thread = Thread(target=self._read_loop, args=(), name=f"{self}_read_loop")
        self.thread.daemon = True
        self.thread.start()
-        time.sleep(0.1)

    def _stop_read_thread(self) -> None:
        """Signals the background read thread to stop and waits for it to join."""
@@ -482,12 +475,6 @@ class OpenCVCamera(Camera):
        self.thread = None
        self.stop_event = None

-        with self.frame_lock:
-            self.latest_frame = None
-            self.latest_timestamp = None
-            self.new_frame_event.clear()
-
-    @check_if_not_connected
    def async_read(self, timeout_ms: float = 200) -> NDArray[Any]:
        """
        Reads the latest available frame asynchronously.
@@ -495,7 +482,6 @@ class OpenCVCamera(Camera):
        This method retrieves the most recent frame captured by the background
        read thread. It does not block waiting for the camera hardware directly,
        but may wait up to timeout_ms for the background thread to provide a frame.
-        It is “best effort” under high FPS.

        Args:
            timeout_ms (float): Maximum time in milliseconds to wait for a frame
@@ -510,14 +496,17 @@ class OpenCVCamera(Camera):
            TimeoutError: If no frame becomes available within the specified timeout.
            RuntimeError: If an unexpected error occurs.
        """
+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"{self} is not connected.")

        if self.thread is None or not self.thread.is_alive():
-            raise RuntimeError(f"{self} read thread is not running.")
+            self._start_read_thread()

        if not self.new_frame_event.wait(timeout=timeout_ms / 1000.0):
+            thread_alive = self.thread is not None and self.thread.is_alive()
            raise TimeoutError(
                f"Timed out waiting for frame from camera {self} after {timeout_ms} ms. "
-                f"Read thread alive: {self.thread.is_alive()}."
+                f"Read thread alive: {thread_alive}."
            )

        with self.frame_lock:
@@ -529,41 +518,6 @@ class OpenCVCamera(Camera):

        return frame

-    @check_if_not_connected
-    def read_latest(self, max_age_ms: int = 500) -> NDArray[Any]:
-        """Return the most recent frame captured immediately (Peeking).
-
-        This method is non-blocking and returns whatever is currently in the
-        memory buffer. The frame may be stale,
-        meaning it could have been captured a while ago (hanging camera scenario e.g.).
-
-        Returns:
-            NDArray[Any]: The frame image (numpy array).
-
-        Raises:
-            TimeoutError: If the latest frame is older than `max_age_ms`.
-            DeviceNotConnectedError: If the camera is not connected.
-            RuntimeError: If the camera is connected but has not captured any frames yet.
-        """
-
-        if self.thread is None or not self.thread.is_alive():
-            raise RuntimeError(f"{self} read thread is not running.")
-
-        with self.frame_lock:
-            frame = self.latest_frame
-            timestamp = self.latest_timestamp
-
-        if frame is None or timestamp is None:
-            raise RuntimeError(f"{self} has not captured any frames yet.")
-
-        age_ms = (time.perf_counter() - timestamp) * 1e3
-        if age_ms > max_age_ms:
-            raise TimeoutError(
-                f"{self} latest frame is too old: {age_ms:.1f} ms (max allowed: {max_age_ms} ms)."
-            )
-
-        return frame
-
    def disconnect(self) -> None:
        """
        Disconnects from the camera and cleans up resources.
@@ -584,9 +538,4 @@ class OpenCVCamera(Camera):
            self.videocapture.release()
            self.videocapture = None

-        with self.frame_lock:
-            self.latest_frame = None
-            self.latest_timestamp = None
-            self.new_frame_event.clear()
-
        logger.info(f"{self} disconnected.")
@@ -15,9 +15,9 @@
 from dataclasses import dataclass
 from pathlib import Path

-from ..configs import CameraConfig, ColorMode, Cv2Backends, Cv2Rotation
+from ..configs import CameraConfig, ColorMode, Cv2Rotation

-__all__ = ["OpenCVCameraConfig", "ColorMode", "Cv2Rotation", "Cv2Backends"]
+__all__ = ["OpenCVCameraConfig", "ColorMode", "Cv2Rotation"]


@CameraConfig.register_subclass("opencv")
@@ -50,7 +50,6 @@ class OpenCVCameraConfig(CameraConfig):
        rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation.
        warmup_s: Time reading frames before returning from connect (in seconds)
        fourcc: FOURCC code for video format (e.g., "MJPG", "YUYV", "I420"). Defaults to None (auto-detect).
-        backend: OpenCV backend identifier (https://docs.opencv.org/3.4/d4/d15/group__videoio__flags__base.html). Defaults to ANY.

    Note:
        - Only 3-channel color output (RGB/BGR) is currently supported.
@@ -63,12 +62,22 @@ class OpenCVCameraConfig(CameraConfig):
    rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION
    warmup_s: int = 1
    fourcc: str | None = None
-    backend: Cv2Backends = Cv2Backends.ANY

    def __post_init__(self) -> None:
-        self.color_mode = ColorMode(self.color_mode)
-        self.rotation = Cv2Rotation(self.rotation)
-        self.backend = Cv2Backends(self.backend)
+        if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
+            raise ValueError(
+                f"`color_mode` is expected to be {ColorMode.RGB.value} or {ColorMode.BGR.value}, but {self.color_mode} is provided."
+            )
+
+        if self.rotation not in (
+            Cv2Rotation.NO_ROTATION,
+            Cv2Rotation.ROTATE_90,
+            Cv2Rotation.ROTATE_180,
+            Cv2Rotation.ROTATE_270,
+        ):
+            raise ValueError(
+                f"`rotation` is expected to be in {(Cv2Rotation.NO_ROTATION, Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_180, Cv2Rotation.ROTATE_270)}, but {self.rotation} is provided."
+            )

        if self.fourcc is not None and (not isinstance(self.fourcc, str) or len(self.fourcc) != 4):
            raise ValueError(
@@ -74,4 +74,7 @@ class Reachy2CameraConfig(CameraConfig):
                f"`image_type` is expected to be 'left' or 'right' for teleop camera, and 'rgb' or 'depth' for depth camera, but {self.image_type} is provided."
            )

-        self.color_mode = ColorMode(self.color_mode)
+        if self.color_mode not in ["rgb", "bgr"]:
+            raise ValueError(
+                f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
+            )
@@ -32,7 +32,6 @@ if platform.system() == "Windows" and "OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS"
 import cv2  # type: ignore  # TODO: add type stubs for OpenCV
 import numpy as np  # type: ignore  # TODO: add type stubs for numpy

-from lerobot.utils.decorators import check_if_not_connected
 from lerobot.utils.import_utils import _reachy2_sdk_available

 if TYPE_CHECKING or _reachy2_sdk_available:
@@ -81,8 +80,6 @@ class Reachy2Camera(Camera):
        self.config = config

        self.color_mode = config.color_mode
-        self.latest_frame: NDArray[Any] | None = None
-        self.latest_timestamp: float | None = None

        self.cam_manager: CameraManager | None = None

@@ -124,12 +121,16 @@ class Reachy2Camera(Camera):
        """
        raise NotImplementedError("Camera detection is not implemented for Reachy2 cameras.")

-    @check_if_not_connected
    def read(self, color_mode: ColorMode | None = None) -> NDArray[Any]:
        """
        Reads a single frame synchronously from the camera.

-        This method retrieves the most recent frame available in Reachy 2's low-level software.
+        This is a blocking call.
+
+        Args:
+            color_mode (Optional[ColorMode]): If specified, overrides the default
+                color mode (`self.color_mode`) for this read operation (e.g.,
+                request RGB even if default is BGR).

        Returns:
            np.ndarray: The captured frame as a NumPy array in the format
@@ -138,13 +139,11 @@ class Reachy2Camera(Camera):
        """
        start_time = time.perf_counter()

-        if self.cam_manager is None:
+        if not self.is_connected:
            raise DeviceNotConnectedError(f"{self} is not connected.")

-        if color_mode is not None:
-            logger.warning(
-                f"{self} read() color_mode parameter is deprecated and will be removed in future versions."
-            )
+        if self.cam_manager is None:
+            raise DeviceNotConnectedError(f"{self} is not connected.")

        frame: NDArray[Any] = np.empty((0, 0, 3), dtype=np.uint8)

@@ -166,27 +165,25 @@ class Reachy2Camera(Camera):
            raise ValueError(f"Invalid camera name '{self.config.name}'. Expected 'teleop' or 'depth'.")

        if frame is None:
-            raise RuntimeError(f"Internal error: No frame available for {self}.")
+            return np.empty((0, 0, 3), dtype=np.uint8)

-        if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
-            raise ValueError(
-                f"Invalid color mode '{self.color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}."
-            )
-        if self.color_mode == ColorMode.RGB:
+        if self.config.color_mode == "rgb":
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

-        self.latest_frame = frame
-        self.latest_timestamp = time.perf_counter()
-
        read_duration_ms = (time.perf_counter() - start_time) * 1e3
        logger.debug(f"{self} read took: {read_duration_ms:.1f}ms")

        return frame

-    @check_if_not_connected
    def async_read(self, timeout_ms: float = 200) -> NDArray[Any]:
        """
-        Same as read()
+        Reads the latest available frame.
+
+        This method retrieves the most recent frame available in Reachy 2's low-level software.
+
+        Args:
+            timeout_ms (float): Maximum time in milliseconds to wait for a frame
+                to become available. Defaults to 200ms (0.2 seconds).

        Returns:
            np.ndarray: The latest captured frame as a NumPy array in the format
@@ -197,40 +194,16 @@ class Reachy2Camera(Camera):
            TimeoutError: If no frame becomes available within the specified timeout.
            RuntimeError: If an unexpected error occurs.
        """
+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"{self} is not connected.")

-        return self.read()
+        frame = self.read()

-    @check_if_not_connected
-    def read_latest(self, max_age_ms: int = 500) -> NDArray[Any]:
-        """Return the most recent frame captured immediately (Peeking).
+        if frame is None:
+            raise RuntimeError(f"Internal error: No frame available for {self}.")

-        This method is non-blocking and returns whatever is currently in the
-        memory buffer. The frame may be stale,
-        meaning it could have been captured a while ago (hanging camera scenario e.g.).
+        return frame

-        Returns:
-            tuple[NDArray, float]:
-                - The frame image (numpy array).
-                - The timestamp (time.perf_counter) when this frame was captured.
-
-        Raises:
-            TimeoutError: If the latest frame is older than `max_age_ms`.
-            DeviceNotConnectedError: If the camera is not connected.
-            RuntimeError: If the camera is connected but has not captured any frames yet.
-        """
-
-        if self.latest_frame is None or self.latest_timestamp is None:
-            raise RuntimeError(f"{self} has not captured any frames yet.")
-
-        age_ms = (time.perf_counter() - self.latest_timestamp) * 1e3
-        if age_ms > max_age_ms:
-            raise TimeoutError(
-                f"{self} latest frame is too old: {age_ms:.1f} ms (max allowed: {max_age_ms} ms)."
-            )
-
-        return self.latest_frame
-
-    @check_if_not_connected
    def disconnect(self) -> None:
        """
        Stops the background read thread (if running).
@@ -238,6 +211,8 @@ class Reachy2Camera(Camera):
        Raises:
            DeviceNotConnectedError: If the camera is already disconnected.
        """
+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"{self} not connected.")

        if self.cam_manager is not None:
            self.cam_manager.disconnect()
@@ -30,8 +30,7 @@ try:
 except Exception as e:
    logging.info(f"Could not import realsense: {e}")

-from lerobot.utils.decorators import check_if_already_connected, check_if_not_connected
-from lerobot.utils.errors import DeviceNotConnectedError
+from lerobot.utils.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError

 from ..camera import Camera
 from ..configs import ColorMode
@@ -73,14 +72,15 @@ class RealSenseCamera(Camera):
        camera = RealSenseCamera(config)
        camera.connect()

-        # Read 1 frame synchronously (blocking)
+        # Read 1 frame synchronously
        color_image = camera.read()
+        print(color_image.shape)

-        # Read 1 frame asynchronously (waits for new frame with a timeout)
+        # Read 1 frame asynchronously
        async_image = camera.async_read()

-        # Get the latest frame immediately (no wait, returns timestamp)
-        latest_image, timestamp = camera.read_latest()
+        # When done, properly disconnect the camera using
+        camera.disconnect()

        # Example with depth capture and custom settings
        custom_config = RealSenseCameraConfig(
@@ -133,9 +133,7 @@ class RealSenseCamera(Camera):
        self.thread: Thread | None = None
        self.stop_event: Event | None = None
        self.frame_lock: Lock = Lock()
-        self.latest_color_frame: NDArray[Any] | None = None
-        self.latest_depth_frame: NDArray[Any] | None = None
-        self.latest_timestamp: float | None = None
+        self.latest_frame: NDArray[Any] | None = None
        self.new_frame_event: Event = Event()

        self.rotation: int | None = get_cv2_rotation(config.rotation)
@@ -153,7 +151,6 @@ class RealSenseCamera(Camera):
        """Checks if the camera pipeline is started and streams are active."""
        return self.rs_pipeline is not None and self.rs_profile is not None

-    @check_if_already_connected
    def connect(self, warmup: bool = True) -> None:
        """
        Connects to the RealSense camera specified in the configuration.
@@ -161,16 +158,14 @@ class RealSenseCamera(Camera):
        Initializes the RealSense pipeline, configures the required streams (color
        and optionally depth), starts the pipeline, and validates the actual stream settings.

-        Args:
-            warmup (bool): If True, waits at connect() time until at least one valid frame
-                           has been captured by the background thread. Defaults to True.
-
        Raises:
            DeviceAlreadyConnectedError: If the camera is already connected.
            ValueError: If the configuration is invalid (e.g., missing serial/name, name not unique).
            ConnectionError: If the camera is found but fails to start the pipeline or no RealSense devices are detected at all.
            RuntimeError: If the pipeline starts but fails to apply requested settings.
        """
+        if self.is_connected:
+            raise DeviceAlreadyConnectedError(f"{self} is already connected.")

        self.rs_pipeline = rs.pipeline()
        rs_config = rs.config()
@@ -186,18 +181,15 @@ class RealSenseCamera(Camera):
            ) from e

        self._configure_capture_settings()
-        self._start_read_thread()

-        # NOTE(Steven/Caroline): Enforcing at least one second of warmup as RS cameras need a bit of time before the first read. If we don't wait, the first read from the warmup will raise.
-        self.warmup_s = max(self.warmup_s, 1)
-
-        start_time = time.time()
-        while time.time() - start_time < self.warmup_s:
-            self.async_read(timeout_ms=self.warmup_s * 1000)
-            time.sleep(0.1)
-        with self.frame_lock:
-            if self.latest_color_frame is None or self.use_depth and self.latest_depth_frame is None:
-                raise ConnectionError(f"{self} failed to capture frames during warmup.")
+        if warmup:
+            time.sleep(
+                1
+            )  # NOTE(Steven): RS cameras need a bit of time to warm up before the first read. If we don't wait, the first read from the warmup will raise.
+            start_time = time.time()
+            while time.time() - start_time < self.warmup_s:
+                self.read()
+                time.sleep(0.1)

        logger.info(f"{self} connected.")

@@ -290,7 +282,6 @@ class RealSenseCamera(Camera):
            if self.use_depth:
                rs_config.enable_stream(rs.stream.depth)

-    @check_if_not_connected
    def _configure_capture_settings(self) -> None:
        """Sets fps, width, and height from device stream if not already configured.

@@ -300,6 +291,8 @@ class RealSenseCamera(Camera):
        Raises:
            DeviceNotConnectedError: If device is not connected.
        """
+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"Cannot validate settings for {self} as it is not connected.")

        if self.rs_profile is None:
            raise RuntimeError(f"{self}: rs_profile must be initialized before use.")
@@ -319,7 +312,6 @@ class RealSenseCamera(Camera):
                self.width, self.height = actual_width, actual_height
                self.capture_width, self.capture_height = actual_width, actual_height

-    @check_if_not_connected
    def read_depth(self, timeout_ms: int = 200) -> NDArray[Any]:
        """
        Reads a single frame (depth) synchronously from the camera.
@@ -327,6 +319,9 @@ class RealSenseCamera(Camera):
        This is a blocking call. It waits for a coherent set of frames (depth)
        from the camera hardware via the RealSense pipeline.

+        Args:
+            timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms.
+
        Returns:
            np.ndarray: The depth map as a NumPy array (height, width)
                  of type `np.uint16` (raw depth values in millimeters) and rotation.
@@ -335,50 +330,44 @@ class RealSenseCamera(Camera):
            DeviceNotConnectedError: If the camera is not connected.
            RuntimeError: If reading frames from the pipeline fails or frames are invalid.
        """
-        if timeout_ms:
-            logger.warning(
-                f"{self} read() timeout_ms parameter is deprecated and will be removed in future versions."
-            )

+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"{self} is not connected.")
        if not self.use_depth:
            raise RuntimeError(
                f"Failed to capture depth frame '.read_depth()'. Depth stream is not enabled for {self}."
            )

-        if self.thread is None or not self.thread.is_alive():
-            raise RuntimeError(f"{self} read thread is not running.")
+        start_time = time.perf_counter()

-        self.new_frame_event.clear()
-
-        _ = self.async_read(timeout_ms=10000)
-
-        with self.frame_lock:
-            depth_map = self.latest_depth_frame
-
-        if depth_map is None:
-            raise RuntimeError("No depth frame available. Ensure camera is streaming.")
-
-        return depth_map
-
-    def _read_from_hardware(self):
        if self.rs_pipeline is None:
            raise RuntimeError(f"{self}: rs_pipeline must be initialized before use.")

-        ret, frame = self.rs_pipeline.try_wait_for_frames(timeout_ms=10000)
+        ret, frame = self.rs_pipeline.try_wait_for_frames(timeout_ms=timeout_ms)

        if not ret or frame is None:
-            raise RuntimeError(f"{self} read failed (status={ret}).")
+            raise RuntimeError(f"{self} read_depth failed (status={ret}).")

-        return frame
+        depth_frame = frame.get_depth_frame()
+        depth_map = np.asanyarray(depth_frame.get_data())

-    @check_if_not_connected
-    def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 0) -> NDArray[Any]:
+        depth_map_processed = self._postprocess_image(depth_map, depth_frame=True)
+
+        read_duration_ms = (time.perf_counter() - start_time) * 1e3
+        logger.debug(f"{self} read took: {read_duration_ms:.1f}ms")
+
+        return depth_map_processed
+
+    def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 200) -> NDArray[Any]:
        """
        Reads a single frame (color) synchronously from the camera.

        This is a blocking call. It waits for a coherent set of frames (color)
        from the camera hardware via the RealSense pipeline.

+        Args:
+            timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 200ms.
+
        Returns:
            np.ndarray: The captured color frame as a NumPy array
              (height, width, channels), processed according to `color_mode` and rotation.
@@ -389,36 +378,39 @@ class RealSenseCamera(Camera):
            ValueError: If an invalid `color_mode` is requested.
        """

+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"{self} is not connected.")
+
        start_time = time.perf_counter()

-        if color_mode is not None:
-            logger.warning(
-                f"{self} read() color_mode parameter is deprecated and will be removed in future versions."
-            )
+        if self.rs_pipeline is None:
+            raise RuntimeError(f"{self}: rs_pipeline must be initialized before use.")

-        if timeout_ms:
-            logger.warning(
-                f"{self} read() timeout_ms parameter is deprecated and will be removed in future versions."
-            )
+        ret, frame = self.rs_pipeline.try_wait_for_frames(timeout_ms=timeout_ms)

-        if self.thread is None or not self.thread.is_alive():
-            raise RuntimeError(f"{self} read thread is not running.")
+        if not ret or frame is None:
+            raise RuntimeError(f"{self} read failed (status={ret}).")

-        self.new_frame_event.clear()
+        color_frame = frame.get_color_frame()
+        color_image_raw = np.asanyarray(color_frame.get_data())

-        frame = self.async_read(timeout_ms=10000)
+        color_image_processed = self._postprocess_image(color_image_raw, color_mode)

        read_duration_ms = (time.perf_counter() - start_time) * 1e3
        logger.debug(f"{self} read took: {read_duration_ms:.1f}ms")

-        return frame
+        return color_image_processed

-    def _postprocess_image(self, image: NDArray[Any], depth_frame: bool = False) -> NDArray[Any]:
+    def _postprocess_image(
+        self, image: NDArray[Any], color_mode: ColorMode | None = None, depth_frame: bool = False
+    ) -> NDArray[Any]:
        """
        Applies color conversion, dimension validation, and rotation to a raw color frame.

        Args:
            image (np.ndarray): The raw image frame (expected RGB format from RealSense).
+            color_mode (Optional[ColorMode]): The target color mode (RGB or BGR). If None,
+                                             uses the instance's default `self.color_mode`.

        Returns:
            np.ndarray: The processed image frame according to `self.color_mode` and `self.rotation`.
@@ -429,9 +421,9 @@ class RealSenseCamera(Camera):
                          `width` and `height`.
        """

-        if self.color_mode and self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
+        if color_mode and color_mode not in (ColorMode.RGB, ColorMode.BGR):
            raise ValueError(
-                f"Invalid requested color mode '{self.color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}."
+                f"Invalid requested color mode '{color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}."
            )

        if depth_frame:
@@ -462,7 +454,7 @@ class RealSenseCamera(Camera):

        On each iteration:
        1. Reads a color frame with 500ms timeout
-        2. Stores result in latest_frame and updates timestamp (thread-safe)
+        2. Stores result in latest_frame (thread-safe)
        3. Sets new_frame_event to notify listeners

        Stops on DeviceNotConnectedError, logs other errors and continues.
@@ -470,41 +462,25 @@ class RealSenseCamera(Camera):
        if self.stop_event is None:
            raise RuntimeError(f"{self}: stop_event is not initialized before starting read loop.")

-        failure_count = 0
        while not self.stop_event.is_set():
            try:
-                frame = self._read_from_hardware()
-                color_frame_raw = frame.get_color_frame()
-                color_frame = np.asanyarray(color_frame_raw.get_data())
-                processed_color_frame = self._postprocess_image(color_frame)
-
-                if self.use_depth:
-                    depth_frame_raw = frame.get_depth_frame()
-                    depth_frame = np.asanyarray(depth_frame_raw.get_data())
-                    processed_depth_frame = self._postprocess_image(depth_frame, depth_frame=True)
-
-                capture_time = time.perf_counter()
+                color_image = self.read(timeout_ms=500)

                with self.frame_lock:
-                    self.latest_color_frame = processed_color_frame
-                    if self.use_depth:
-                        self.latest_depth_frame = processed_depth_frame
-                    self.latest_timestamp = capture_time
+                    self.latest_frame = color_image
                self.new_frame_event.set()
-                failure_count = 0

            except DeviceNotConnectedError:
                break
            except Exception as e:
-                if failure_count <= 10:
-                    failure_count += 1
-                    logger.warning(f"Error reading frame in background thread for {self}: {e}")
-                else:
-                    raise RuntimeError(f"{self} exceeded maximum consecutive read failures.") from e
+                logger.warning(f"Error reading frame in background thread for {self}: {e}")

    def _start_read_thread(self) -> None:
        """Starts or restarts the background read thread if it's not running."""
-        self._stop_read_thread()
+        if self.thread is not None and self.thread.is_alive():
+            self.thread.join(timeout=0.1)
+        if self.stop_event is not None:
+            self.stop_event.set()

        self.stop_event = Event()
        self.thread = Thread(target=self._read_loop, args=(), name=f"{self}_read_loop")
@@ -522,14 +498,7 @@ class RealSenseCamera(Camera):
        self.thread = None
        self.stop_event = None

-        with self.frame_lock:
-            self.latest_color_frame = None
-            self.latest_depth_frame = None
-            self.latest_timestamp = None
-            self.new_frame_event.clear()
-
    # NOTE(Steven): Missing implementation for depth for now
-    @check_if_not_connected
    def async_read(self, timeout_ms: float = 200) -> NDArray[Any]:
        """
        Reads the latest available frame data (color) asynchronously.
@@ -537,7 +506,6 @@ class RealSenseCamera(Camera):
        This method retrieves the most recent color frame captured by the background
        read thread. It does not block waiting for the camera hardware directly,
        but may wait up to timeout_ms for the background thread to provide a frame.
-        It is “best effort” under high FPS.

        Args:
            timeout_ms (float): Maximum time in milliseconds to wait for a frame
@@ -552,18 +520,21 @@ class RealSenseCamera(Camera):
            TimeoutError: If no frame data becomes available within the specified timeout.
            RuntimeError: If the background thread died unexpectedly or another error occurs.
        """
+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"{self} is not connected.")

        if self.thread is None or not self.thread.is_alive():
-            raise RuntimeError(f"{self} read thread is not running.")
+            self._start_read_thread()

        if not self.new_frame_event.wait(timeout=timeout_ms / 1000.0):
+            thread_alive = self.thread is not None and self.thread.is_alive()
            raise TimeoutError(
                f"Timed out waiting for frame from camera {self} after {timeout_ms} ms. "
-                f"Read thread alive: {self.thread.is_alive()}."
+                f"Read thread alive: {thread_alive}."
            )

        with self.frame_lock:
-            frame = self.latest_color_frame
+            frame = self.latest_frame
            self.new_frame_event.clear()

        if frame is None:
@@ -571,42 +542,6 @@ class RealSenseCamera(Camera):

        return frame

-    # NOTE(Steven): Missing implementation for depth for now
-    @check_if_not_connected
-    def read_latest(self, max_age_ms: int = 500) -> NDArray[Any]:
-        """Return the most recent (color) frame captured immediately (Peeking).
-
-        This method is non-blocking and returns whatever is currently in the
-        memory buffer. The frame may be stale,
-        meaning it could have been captured a while ago (hanging camera scenario e.g.).
-
-        Returns:
-            NDArray[Any]: The frame image (numpy array).
-
-        Raises:
-            TimeoutError: If the latest frame is older than `max_age_ms`.
-            DeviceNotConnectedError: If the camera is not connected.
-            RuntimeError: If the camera is connected but has not captured any frames yet.
-        """
-
-        if self.thread is None or not self.thread.is_alive():
-            raise RuntimeError(f"{self} read thread is not running.")
-
-        with self.frame_lock:
-            frame = self.latest_color_frame
-            timestamp = self.latest_timestamp
-
-        if frame is None or timestamp is None:
-            raise RuntimeError(f"{self} has not captured any frames yet.")
-
-        age_ms = (time.perf_counter() - timestamp) * 1e3
-        if age_ms > max_age_ms:
-            raise TimeoutError(
-                f"{self} latest frame is too old: {age_ms:.1f} ms (max allowed: {max_age_ms} ms)."
-            )
-
-        return frame
-
    def disconnect(self) -> None:
        """
        Disconnects from the camera, stops the pipeline, and cleans up resources.
@@ -630,10 +565,4 @@ class RealSenseCamera(Camera):
            self.rs_pipeline = None
            self.rs_profile = None

-        with self.frame_lock:
-            self.latest_color_frame = None
-            self.latest_depth_frame = None
-            self.latest_timestamp = None
-            self.new_frame_event.clear()
-
        logger.info(f"{self} disconnected.")
@@ -60,8 +60,20 @@ class RealSenseCameraConfig(CameraConfig):
    warmup_s: int = 1

    def __post_init__(self) -> None:
-        self.color_mode = ColorMode(self.color_mode)
-        self.rotation = Cv2Rotation(self.rotation)
+        if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
+            raise ValueError(
+                f"`color_mode` is expected to be {ColorMode.RGB.value} or {ColorMode.BGR.value}, but {self.color_mode} is provided."
+            )
+
+        if self.rotation not in (
+            Cv2Rotation.NO_ROTATION,
+            Cv2Rotation.ROTATE_90,
+            Cv2Rotation.ROTATE_180,
+            Cv2Rotation.ROTATE_270,
+        ):
+            raise ValueError(
+                f"`rotation` is expected to be in {(Cv2Rotation.NO_ROTATION, Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_180, Cv2Rotation.ROTATE_270)}, but {self.rotation} is provided."
+            )

        values = (self.fps, self.width, self.height)
        if any(v is not None for v in values) and any(v is None for v in values):
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import platform
 from typing import cast

 from lerobot.utils.import_utils import make_device_from_device_class
@@ -67,3 +68,14 @@ def get_cv2_rotation(rotation: Cv2Rotation) -> int | None:
        return int(cv2.ROTATE_90_COUNTERCLOCKWISE)
    else:
        return None
+
+
+def get_cv2_backend() -> int:
+    import cv2
+
+    if platform.system() == "Windows":
+        return int(cv2.CAP_MSMF)  # Use MSMF for Windows instead of AVFOUNDATION
+    # elif platform.system() == "Darwin":  # macOS
+    #     return cv2.CAP_AVFOUNDATION
+    else:  # Linux and others
+        return int(cv2.CAP_ANY)
--- a/Show More
+++ b/Show More