diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 48a10e4bc..c7926c542 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -18,6 +18,11 @@ name: Documentation
on:
# Allows running this workflow manually from the Actions tab
workflow_dispatch:
+ inputs:
+ version:
+ description: 'Version tag (e.g. v0.1.2) - Leave empty for standard main build'
+ required: false
+ type: string
# Triggers the workflow on push events to main for the docs folder
push:
@@ -54,7 +59,13 @@ jobs:
with:
commit_sha: ${{ github.sha }}
package: lerobot
- additional_args: --not_python_module ${{ github.event_name == 'release' && format('--version {0}', github.event.release.tag_name) || '' }}
+ additional_args: >-
+ --not_python_module
+ ${{
+ (github.event_name == 'release' && format('--version {0}', github.event.release.tag_name)) ||
+ (inputs.version != '' && format('--version {0}', inputs.version)) ||
+ ''
+ }}
secrets:
token: ${{ secrets.HUGGINGFACE_PUSH }}
hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
diff --git a/.github/workflows/fast_tests.yml b/.github/workflows/fast_tests.yml
index 10ec91199..fc169e253 100644
--- a/.github/workflows/fast_tests.yml
+++ b/.github/workflows/fast_tests.yml
@@ -44,7 +44,7 @@ permissions:
# Sets up the environment variables
env:
UV_VERSION: "0.8.0"
- PYTHON_VERSION: "3.10"
+ PYTHON_VERSION: "3.12"
# Ensures that only the latest commit for a PR or branch is built, canceling older runs.
concurrency:
@@ -61,6 +61,7 @@ jobs:
MUJOCO_GL: egl
HF_HOME: /mnt/cache/.cache/huggingface
HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
+ HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
steps:
- uses: actions/checkout@v6
with:
@@ -89,5 +90,11 @@ jobs:
- name: Install lerobot with test extras
run: uv sync --extra "test"
+ - name: Login to Hugging Face
+ if: env.HF_USER_TOKEN != ''
+ run: |
+ uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
+ uv run hf auth whoami
+
- name: Run pytest
run: uv run pytest tests -vv --maxfail=10
diff --git a/.github/workflows/full_tests.yml b/.github/workflows/full_tests.yml
index 4dce3121a..8b7d28123 100644
--- a/.github/workflows/full_tests.yml
+++ b/.github/workflows/full_tests.yml
@@ -37,7 +37,7 @@ permissions:
# Sets up the environment variables
env:
UV_VERSION: "0.8.0"
- PYTHON_VERSION: "3.10"
+ PYTHON_VERSION: "3.12"
DOCKER_IMAGE_NAME: huggingface/lerobot-gpu
# Ensures that only the latest action is built, canceling older runs.
@@ -60,6 +60,7 @@ jobs:
MUJOCO_GL: egl
HF_HOME: /mnt/cache/.cache/huggingface
HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
+ HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
steps:
- uses: actions/checkout@v6
with:
@@ -87,6 +88,12 @@ jobs:
- name: Install lerobot with all extras
run: uv sync --extra all # TODO(Steven): Make flash-attn optional
+ - name: Login to Hugging Face
+ if: env.HF_USER_TOKEN != ''
+ run: |
+ uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
+ uv run hf auth whoami
+
- name: Run pytest (all extras)
run: uv run pytest tests -vv --maxfail=10
@@ -101,9 +108,11 @@ jobs:
runs-on:
group: aws-general-8-plus
if: |
- (github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && github.event.pull_request.head.repo.fork == false) ||
- github.event_name == 'push' ||
- github.event_name == 'workflow_dispatch'
+ github.repository == 'huggingface/lerobot' && (
+ (github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && github.event.pull_request.head.repo.fork == false) ||
+ github.event_name == 'push' ||
+ github.event_name == 'workflow_dispatch'
+ )
outputs:
image_tag: ${{ steps.set_tag.outputs.image_tag }}
env:
@@ -160,6 +169,7 @@ jobs:
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
TORCH_HOME: /home/user_lerobot/.cache/torch
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
+ HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
container:
image: ${{ needs.build-and-push-docker.outputs.image_tag }} # zizmor: ignore[unpinned-images]
options: --gpus all --shm-size "16gb"
@@ -171,6 +181,13 @@ jobs:
shell: bash
working-directory: /lerobot
steps:
+ - name: Login to Hugging Face
+ if: env.HF_USER_TOKEN != ''
+ run: |
+ hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
+ hf auth whoami
+ - name: Fix ptxas permissions
+ run: chmod +x /lerobot/.venv/lib/python3.12/site-packages/triton/backends/nvidia/bin/ptxas
- name: Run pytest on GPU
run: pytest tests -vv --maxfail=10
- name: Run end-to-end tests
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index 45bfb9bd5..5bc86857a 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -28,7 +28,7 @@ on:
# Sets up the environment variables
env:
UV_VERSION: "0.8.0"
- PYTHON_VERSION: "3.10"
+ PYTHON_VERSION: "3.12"
DOCKER_IMAGE_NAME_CPU: huggingface/lerobot-cpu:latest
DOCKER_IMAGE_NAME_GPU: huggingface/lerobot-gpu:latest
@@ -119,6 +119,7 @@ jobs:
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
TORCH_HOME: /home/user_lerobot/.cache/torch
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
+ HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
container:
image: ${{ needs.build-docker-cpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
options: --shm-size "16gb"
@@ -130,6 +131,11 @@ jobs:
shell: bash
working-directory: /lerobot
steps:
+ - name: Login to Hugging Face
+ if: env.HF_USER_TOKEN != ''
+ run: |
+ hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
+ hf auth whoami
- name: Run pytest on CPU
run: pytest tests -vv --maxfail=10
- name: Run end-to-end tests
@@ -146,6 +152,7 @@ jobs:
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
TORCH_HOME: /home/user_lerobot/.cache/torch
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
+ HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
container:
image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
options: --gpus all --shm-size "16gb"
@@ -157,6 +164,11 @@ jobs:
shell: bash
working-directory: /lerobot
steps:
+ - name: Login to Hugging Face
+ if: env.HF_USER_TOKEN != ''
+ run: |
+ hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
+ hf auth whoami
- name: Run pytest on GPU
run: pytest tests -vv --maxfail=10
- name: Run end-to-end tests
@@ -174,6 +186,7 @@ jobs:
TORCH_HOME: /home/user_lerobot/.cache/torch
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
CUDA_VISIBLE_DEVICES: "0,1,2,3"
+ HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
container:
image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
options: --gpus all --shm-size "16gb"
@@ -185,12 +198,15 @@ jobs:
shell: bash
working-directory: /lerobot
steps:
+ - name: Login to Hugging Face
+ if: env.HF_USER_TOKEN != ''
+ run: |
+ hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
+ hf auth whoami
- name: Verify GPU availability
run: |
nvidia-smi
python -c "import torch; print(f'PyTorch CUDA available: {torch.cuda.is_available()}'); print(f'Number of GPUs: {torch.cuda.device_count()}')"
- name: Run multi-GPU training tests
- # TODO(Steven): Investigate why motors tests are failing in multi-GPU setup
- run: pytest tests -vv --maxfail=10 --ignore=tests/motors/
- timeout-minutes: 10
+ run: pytest -vv tests/training/
diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml
index 0dc94cdd4..a84e9c17e 100644
--- a/.github/workflows/quality.yml
+++ b/.github/workflows/quality.yml
@@ -50,7 +50,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v6
with:
- python-version: '3.10'
+ python-version: '3.12'
- name: Run pre-commit hooks
uses: pre-commit/action@v3.0.1 # zizmor: ignore[unpinned-uses]
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index bcab4c262..f7bd2be6c 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -22,7 +22,7 @@ on:
# Sets up the environment variables
env:
UV_VERSION: "0.8.0"
- PYTHON_VERSION: "3.10"
+ PYTHON_VERSION: "3.12"
jobs:
# This job builds the Python package and publishes it to PyPI
@@ -45,7 +45,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v6
with:
- python-version: '3.10'
+ python-version: '3.12'
- name: Extract Version
id: extract_info
@@ -83,14 +83,6 @@ jobs:
exit 1
fi
- - name: Remove Tags with Git dependencies
- # TODO(Steven): Temporary patch to remove pi from PyPi 0.4.0 release due to its reliance on git dependencies.
- run: |
- echo "::info:: Checking for Git dependencies to remove from pyproject.toml..."
- grep -E '@ git\+https|lerobot\[pi\]' pyproject.toml | sed 's/^/::warning:: Removing line: /' || true
- sed -E -i '/@ git\+https|lerobot\[pi\]/d' pyproject.toml
- echo "::info:: Git dependencies removed. Proceeding with build."
-
- name: Install build dependencies
run: python -m pip install build
diff --git a/.github/workflows/unbound_deps_tests.yml b/.github/workflows/unbound_deps_tests.yml
index a75ecc121..404816c52 100644
--- a/.github/workflows/unbound_deps_tests.yml
+++ b/.github/workflows/unbound_deps_tests.yml
@@ -29,7 +29,7 @@ permissions:
# Sets up the environment variables
env:
UV_VERSION: "0.8.0"
- PYTHON_VERSION: "3.10"
+ PYTHON_VERSION: "3.12"
DOCKER_IMAGE_NAME: huggingface/lerobot-gpu:unbound
# Ensures that only the latest action is built, canceling older runs.
@@ -48,6 +48,7 @@ jobs:
MUJOCO_GL: egl
HF_HOME: /mnt/cache/.cache/huggingface
HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
+ HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
steps:
- uses: actions/checkout@v6
with:
@@ -79,7 +80,11 @@ jobs:
- name: Install lerobot with all extras
run: uv sync --extra all # TODO(Steven): Make flash-attn optional
-
+ - name: Login to Hugging Face
+ if: env.HF_USER_TOKEN != ''
+ run: |
+ uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
+ uv run hf auth whoami
- name: Run pytest (all extras)
run: uv run pytest tests -vv
@@ -91,6 +96,7 @@ jobs:
name: Build and Push Docker
runs-on:
group: aws-general-8-plus
+ if: github.repository == 'huggingface/lerobot'
outputs:
image_tag: ${{ env.DOCKER_IMAGE_NAME }}
env:
@@ -136,6 +142,7 @@ jobs:
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
TORCH_HOME: /home/user_lerobot/.cache/torch
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
+ HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
container:
image: ${{ needs.build-and-push-docker.outputs.image_tag }} # zizmor: ignore[unpinned-images]
options: --gpus all --shm-size "16gb"
@@ -147,6 +154,11 @@ jobs:
shell: bash
working-directory: /lerobot
steps:
+ - name: Login to Hugging Face
+ if: env.HF_USER_TOKEN != ''
+ run: |
+ hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
+ hf auth whoami
- name: Run pytest on GPU
run: pytest tests -vv
- name: Run end-to-end tests
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bfa3340d4..dff7416f4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -13,7 +13,7 @@
# limitations under the License.
default_language_version:
- python: python3.10
+ python: python3.12
exclude: "tests/artifacts/.*\\.safetensors$"
@@ -55,7 +55,7 @@ repos:
rev: v3.21.0
hooks:
- id: pyupgrade
- args: [--py310-plus]
+ args: [--py312-plus]
##### Markdown Quality #####
- repo: https://github.com/rbubley/mirrors-prettier
diff --git a/AI_POLICY.md b/AI_POLICY.md
new file mode 100644
index 000000000..272ee8c12
--- /dev/null
+++ b/AI_POLICY.md
@@ -0,0 +1,25 @@
+# AI Usage Policy
+
+The LeRobot project welcomes contributions from everyone, and we have a few guidelines regarding AI usage to ensure high code quality, clear communication, and a healthy open-source ecosystem:
+
+- **Please disclose significant AI assistance.** If you used AI tools (e.g., Copilot, Claude, Cursor, ChatGPT) to generate a substantial portion of your code or text, let us know in your PR description. Transparency helps us review your changes more effectively.
+- **Own your code (The Human-in-the-Loop).** You must fully understand all the changes you are proposing. If you cannot explain what your AI-assisted code does or how it interacts with LeRobot's broader architecture, please take the time to learn and test it before submitting.
+- **Keep issues and discussions focused.** You are welcome to use AI to help draft issues or PR descriptions, but please review and edit them carefully before posting. AI can often be overly verbose; trimming the noise and getting straight to the point helps our maintainers address your needs faster.
+
+Our core maintainers also use AI tools to aid their workflows, but they do so while bringing deep contextual knowledge of the LeRobot codebase to validate the output. We ask all contributors to apply that same level of rigor.
+
+## Remember the Human Maintainers
+
+Please remember that LeRobot is maintained by a dedicated team of humans.
+
+Every discussion, issue, and pull request is read and reviewed by real people. While AI tools can generate thousands of lines of code in seconds, reviewing that code still takes human time and energy. Submitting unverified or low-effort AI output puts an unfair burden on our maintainers.
+
+Today, the quality of the AI output still heavily depends on the developer driving the tool. We ask that you respect our maintainers' time by thoroughly vetting, testing, and refining your submissions.
+
+## AI is Welcome Here
+
+LeRobot operates at the cutting edge of AI and robotics, and many of our maintainers actively embrace AI coding assistants as valuable productivity tools. We are a pro-AI project!
+
+Our reason for having an AI policy is not an anti-AI stance. Rather, it exists to ensure that AI is used to enhance human contributions, not replace them with unverified noise. It's about how the tools are used, not the tools themselves.
+
+We value the unique human insight you bring to the LeRobot community. Let AI empower your workflow, but always let your own judgment take the wheel.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index abca0d821..60df93b27 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,7 +2,7 @@
Everyone is welcome to contribute, and we value everybody's contribution. Code is not the only way to help the community. Answering questions, helping others, reaching out, and improving the documentation are immensely valuable.
-Whichever way you choose to contribute, please be mindful to respect our [code of conduct](./CODE_OF_CONDUCT.md).
+Whichever way you choose to contribute, please be mindful to respect our [code of conduct](https://github.com/huggingface/lerobot/blob/main/CODE_OF_CONDUCT.md) and our [AI policy](https://github.com/huggingface/lerobot/blob/main/AI_POLICY.md).
## Ways to Contribute
@@ -14,7 +14,7 @@ You can contribute in many ways:
- **Documentation:** Improve examples, guides, and docstrings.
- **Feedback:** Submit tickets related to bugs or desired new features.
-If you are unsure where to start, join our [Discord Channel](https://discord.gg/JkrYNdmw).
+If you are unsure where to start, join our [Discord Channel](https://discord.gg/q8Dzzpym3f).
## Development Setup
@@ -32,7 +32,7 @@ git remote add upstream https://github.com/huggingface/lerobot.git
### 2. Environment Installation
-Please follow our [Installation Guide](./docs/source/installation.mdx) for the environment setup & installation from source.
+Please follow our [Installation Guide](https://huggingface.co/docs/lerobot/installation) for the environment setup & installation from source.
## Running Tests & Quality Checks
@@ -75,8 +75,8 @@ pytest -sv tests/test_specific_feature.py
Use the templates for required fields and examples.
-- **Issues:** Follow the [ticket template](./.github/ISSUE_TEMPLATE/bug-report.yml).
-- **Pull requests:** Rebase on `upstream/main`, use a descriptive branch (don't work on `main`), run `pre-commit` and tests locally, and follow the [PR template](./.github/PULL_REQUEST_TEMPLATE.md).
+- **Issues:** Follow the [ticket template](https://github.com/huggingface/lerobot/blob/main/.github/ISSUE_TEMPLATE/bug-report.yml).
+- **Pull requests:** Rebase on `upstream/main`, use a descriptive branch (don't work on `main`), run `pre-commit` and tests locally, and follow the [PR template](https://github.com/huggingface/lerobot/blob/main/.github/PULL_REQUEST_TEMPLATE.md).
One member of the LeRobot team will then review your contribution.
diff --git a/MANIFEST.in b/MANIFEST.in
index c1fb2ea75..c1fce3b5a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,2 +1,3 @@
include src/lerobot/templates/lerobot_modelcard_template.md
include src/lerobot/datasets/card_template.md
+include src/lerobot/envs/metaworld_config.json
diff --git a/README.md b/README.md
index 57fec2e5f..f67d9103c 100644
--- a/README.md
+++ b/README.md
@@ -100,11 +100,11 @@ lerobot-train \
--dataset.repo_id=lerobot/aloha_mobile_cabinet
```
-| Category | Models |
-| -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| **Imitation Learning** | [ACT](./docs/source/policy_act_README.md), [Diffusion](./docs/source/policy_diffusion_README.md), [VQ-BeT](./docs/source/policy_vqbet_README.md) |
-| **Reinforcement Learning** | [HIL-SERL](./docs/source/hilserl.mdx), [TDMPC](./docs/source/policy_tdmpc_README.md) & QC-FQL (coming soon) |
-| **VLAs Models** | [Pi0Fast](./docs/source/pi0fast.mdx), [Pi0.5](./docs/source/pi05.mdx), [GR00T N1.5](./docs/source/policy_groot_README.md), [SmolVLA](./docs/source/policy_smolvla_README.md), [XVLA](./docs/source/xvla.mdx) |
+| Category | Models |
+| -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **Imitation Learning** | [ACT](./docs/source/policy_act_README.md), [Diffusion](./docs/source/policy_diffusion_README.md), [VQ-BeT](./docs/source/policy_vqbet_README.md), [Multitask DiT Policy](./docs/source/policy_multi_task_dit_README.md) |
+| **Reinforcement Learning** | [HIL-SERL](./docs/source/hilserl.mdx), [TDMPC](./docs/source/policy_tdmpc_README.md) & QC-FQL (coming soon) |
+| **VLAs Models** | [Pi0Fast](./docs/source/pi0fast.mdx), [Pi0.5](./docs/source/pi05.mdx), [GR00T N1.5](./docs/source/policy_groot_README.md), [SmolVLA](./docs/source/policy_smolvla_README.md), [XVLA](./docs/source/xvla.mdx) |
Similarly to the hardware, you can easily implement your own policy & leverage LeRobot's data collection, training, and visualization tools, and share your model to the HF Hub
@@ -128,13 +128,14 @@ Learn how to implement your own simulation environment or benchmark and distribu
## Resources
- **[Documentation](https://huggingface.co/docs/lerobot/index):** The complete guide to tutorials & API.
+- **[Chinese Tutorials: LeRobot+SO-ARM101中文教程-同济子豪兄](https://zihao-ai.feishu.cn/wiki/space/7589642043471924447)** Detailed doc for assembling, teleoperate, dataset, train, deploy. Verified by Seed Studio and 5 global hackathon players.
- **[Discord](https://discord.gg/q8Dzzpym3f):** Join the `LeRobot` server to discuss with the community.
- **[X](https://x.com/LeRobotHF):** Follow us on X to stay up-to-date with the latest developments.
- **[Robot Learning Tutorial](https://huggingface.co/spaces/lerobot/robot-learning-tutorial):** A free, hands-on course to learn robot learning using LeRobot.
## Citation
-If you use LeRobot in your research, please cite:
+If you use LeRobot in your project, please cite the GitHub repository to acknowledge the ongoing development and contributors:
```bibtex
@misc{cadene2024lerobot,
@@ -145,9 +146,26 @@ If you use LeRobot in your research, please cite:
}
```
+If you are referencing our research or the academic paper, please also cite our ICLR publication:
+
+
+ICLR 2026 Paper
+
+```bibtex
+@inproceedings{cadenelerobot,
+ title={LeRobot: An Open-Source Library for End-to-End Robot Learning},
+ author={Cadene, Remi and Alibert, Simon and Capuano, Francesco and Aractingi, Michel and Zouitine, Adil and Kooijmans, Pepijn and Choghari, Jade and Russi, Martino and Pascal, Caroline and Palma, Steven and Shukor, Mustafa and Moss, Jess and Soare, Alexander and Aubakirova, Dana and Lhoest, Quentin and Gallou\'edec, Quentin and Wolf, Thomas},
+ booktitle={The Fourteenth International Conference on Learning Representations},
+ year={2026},
+ url={https://arxiv.org/abs/2602.22818}
+}
+```
+
+
+
## Contribute
-We welcome contributions from everyone in the community! To get started, please read our [CONTRIBUTING.md](./CONTRIBUTING.md) guide. Whether you're adding a new feature, improving documentation, or fixing a bug, your help and feedback are invaluable. We're incredibly excited about the future of open-source robotics and can't wait to work with you on what's next—thank you for your support!
+We welcome contributions from everyone in the community! To get started, please read our [CONTRIBUTING.md](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md) guide. Whether you're adding a new feature, improving documentation, or fixing a bug, your help and feedback are invaluable. We're incredibly excited about the future of open-source robotics and can't wait to work with you on what's next—thank you for your support!
diff --git a/benchmarks/video/README.md b/benchmarks/video/README.md
index 490a4b495..1feee69c4 100644
--- a/benchmarks/video/README.md
+++ b/benchmarks/video/README.md
@@ -28,9 +28,9 @@ We don't expect the same optimal settings for a dataset of images from a simulat
For these reasons, we run this benchmark on four representative datasets:
- `lerobot/pusht_image`: (96 x 96 pixels) simulation with simple geometric shapes, fixed camera.
-- `aliberts/aloha_mobile_shrimp_image`: (480 x 640 pixels) real-world indoor, moving camera.
-- `aliberts/paris_street`: (720 x 1280 pixels) real-world outdoor, moving camera.
-- `aliberts/kitchen`: (1080 x 1920 pixels) real-world indoor, fixed camera.
+- `lerobot/aloha_mobile_shrimp_image`: (480 x 640 pixels) real-world indoor, moving camera.
+- `lerobot/paris_street`: (720 x 1280 pixels) real-world outdoor, moving camera.
+- `lerobot/kitchen`: (1080 x 1920 pixels) real-world indoor, fixed camera.
Note: The datasets used for this benchmark need to be image datasets, not video datasets.
@@ -179,7 +179,7 @@ python benchmark/video/run_video_benchmark.py \
--output-dir outputs/video_benchmark \
--repo-ids \
lerobot/pusht_image \
- aliberts/aloha_mobile_shrimp_image \
+ lerobot/aloha_mobile_shrimp_image \
--vcodec libx264 libx265 \
--pix-fmt yuv444p yuv420p \
--g 2 20 None \
@@ -203,9 +203,9 @@ python benchmark/video/run_video_benchmark.py \
--output-dir outputs/video_benchmark \
--repo-ids \
lerobot/pusht_image \
- aliberts/aloha_mobile_shrimp_image \
- aliberts/paris_street \
- aliberts/kitchen \
+ lerobot/aloha_mobile_shrimp_image \
+ lerobot/paris_street \
+ lerobot/kitchen \
--vcodec libx264 libx265 \
--pix-fmt yuv444p yuv420p \
--g 1 2 3 4 5 6 10 15 20 40 None \
@@ -221,9 +221,9 @@ python benchmark/video/run_video_benchmark.py \
--output-dir outputs/video_benchmark \
--repo-ids \
lerobot/pusht_image \
- aliberts/aloha_mobile_shrimp_image \
- aliberts/paris_street \
- aliberts/kitchen \
+ lerobot/aloha_mobile_shrimp_image \
+ lerobot/paris_street \
+ lerobot/kitchen \
--vcodec libsvtav1 \
--pix-fmt yuv420p \
--g 1 2 3 4 5 6 10 15 20 40 None \
@@ -252,37 +252,37 @@ Since we're using av1 encoding, we're choosing the `pyav` decoder as `video_read
These tables show the results for `g=2` and `crf=30`, using `timestamps-modes=6_frames` and `backend=pyav`
-| video_images_size_ratio | vcodec | pix_fmt | | | |
-| ---------------------------------- | ---------- | ------- | --------- | --------- | --------- |
-| | libx264 | | libx265 | | libsvtav1 |
-| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
-| lerobot/pusht_image | **16.97%** | 17.58% | 18.57% | 18.86% | 22.06% |
-| aliberts/aloha_mobile_shrimp_image | 2.14% | 2.11% | 1.38% | **1.37%** | 5.59% |
-| aliberts/paris_street | 2.12% | 2.13% | **1.54%** | **1.54%** | 4.43% |
-| aliberts/kitchen | 1.40% | 1.39% | **1.00%** | **1.00%** | 2.52% |
+| video_images_size_ratio | vcodec | pix_fmt | | | |
+| --------------------------------- | ---------- | ------- | --------- | --------- | --------- |
+| | libx264 | | libx265 | | libsvtav1 |
+| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
+| lerobot/pusht_image | **16.97%** | 17.58% | 18.57% | 18.86% | 22.06% |
+| lerobot/aloha_mobile_shrimp_image | 2.14% | 2.11% | 1.38% | **1.37%** | 5.59% |
+| lerobot/paris_street | 2.12% | 2.13% | **1.54%** | **1.54%** | 4.43% |
+| lerobot/kitchen | 1.40% | 1.39% | **1.00%** | **1.00%** | 2.52% |
-| video_images_load_time_ratio | vcodec | pix_fmt | | | |
-| ---------------------------------- | ------- | ------- | -------- | ------- | --------- |
-| | libx264 | | libx265 | | libsvtav1 |
-| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
-| lerobot/pusht_image | 6.45 | 5.19 | **1.90** | 2.12 | 2.47 |
-| aliberts/aloha_mobile_shrimp_image | 11.80 | 7.92 | 0.71 | 0.85 | **0.48** |
-| aliberts/paris_street | 2.21 | 2.05 | 0.36 | 0.49 | **0.30** |
-| aliberts/kitchen | 1.46 | 1.46 | 0.28 | 0.51 | **0.26** |
+| video_images_load_time_ratio | vcodec | pix_fmt | | | |
+| --------------------------------- | ------- | ------- | -------- | ------- | --------- |
+| | libx264 | | libx265 | | libsvtav1 |
+| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
+| lerobot/pusht_image | 6.45 | 5.19 | **1.90** | 2.12 | 2.47 |
+| lerobot/aloha_mobile_shrimp_image | 11.80 | 7.92 | 0.71 | 0.85 | **0.48** |
+| lerobot/paris_street | 2.21 | 2.05 | 0.36 | 0.49 | **0.30** |
+| lerobot/kitchen | 1.46 | 1.46 | 0.28 | 0.51 | **0.26** |
-| | | vcodec | pix_fmt | | | |
-| ---------------------------------- | -------- | -------- | ------------ | -------- | --------- | ------------ |
-| | | libx264 | | libx265 | | libsvtav1 |
-| repo_id | metric | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
-| lerobot/pusht_image | avg_mse | 2.90E-04 | **2.03E-04** | 3.13E-04 | 2.29E-04 | 2.19E-04 |
-| | avg_psnr | 35.44 | 37.07 | 35.49 | **37.30** | 37.20 |
-| | avg_ssim | 98.28% | **98.85%** | 98.31% | 98.84% | 98.72% |
-| aliberts/aloha_mobile_shrimp_image | avg_mse | 2.76E-04 | 2.59E-04 | 3.17E-04 | 3.06E-04 | **1.30E-04** |
-| | avg_psnr | 35.91 | 36.21 | 35.88 | 36.09 | **40.17** |
-| | avg_ssim | 95.19% | 95.18% | 95.00% | 95.05% | **97.73%** |
-| aliberts/paris_street | avg_mse | 6.89E-04 | 6.70E-04 | 4.03E-03 | 4.02E-03 | **3.09E-04** |
-| | avg_psnr | 33.48 | 33.68 | 32.05 | 32.15 | **35.40** |
-| | avg_ssim | 93.76% | 93.75% | 89.46% | 89.46% | **95.46%** |
-| aliberts/kitchen | avg_mse | 2.50E-04 | 2.24E-04 | 4.28E-04 | 4.18E-04 | **1.53E-04** |
-| | avg_psnr | 36.73 | 37.33 | 36.56 | 36.75 | **39.12** |
-| | avg_ssim | 95.47% | 95.58% | 95.52% | 95.53% | **96.82%** |
+| | | vcodec | pix_fmt | | | |
+| --------------------------------- | -------- | -------- | ------------ | -------- | --------- | ------------ |
+| | | libx264 | | libx265 | | libsvtav1 |
+| repo_id | metric | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
+| lerobot/pusht_image | avg_mse | 2.90E-04 | **2.03E-04** | 3.13E-04 | 2.29E-04 | 2.19E-04 |
+| | avg_psnr | 35.44 | 37.07 | 35.49 | **37.30** | 37.20 |
+| | avg_ssim | 98.28% | **98.85%** | 98.31% | 98.84% | 98.72% |
+| lerobot/aloha_mobile_shrimp_image | avg_mse | 2.76E-04 | 2.59E-04 | 3.17E-04 | 3.06E-04 | **1.30E-04** |
+| | avg_psnr | 35.91 | 36.21 | 35.88 | 36.09 | **40.17** |
+| | avg_ssim | 95.19% | 95.18% | 95.00% | 95.05% | **97.73%** |
+| lerobot/paris_street | avg_mse | 6.89E-04 | 6.70E-04 | 4.03E-03 | 4.02E-03 | **3.09E-04** |
+| | avg_psnr | 33.48 | 33.68 | 32.05 | 32.15 | **35.40** |
+| | avg_ssim | 93.76% | 93.75% | 89.46% | 89.46% | **95.46%** |
+| lerobot/kitchen | avg_mse | 2.50E-04 | 2.24E-04 | 4.28E-04 | 4.18E-04 | **1.53E-04** |
+| | avg_psnr | 36.73 | 37.33 | 36.56 | 36.75 | **39.12** |
+| | avg_ssim | 95.47% | 95.58% | 95.52% | 95.53% | **96.82%** |
diff --git a/docker/Dockerfile.internal b/docker/Dockerfile.internal
index c1dfa1dae..b385fc51c 100644
--- a/docker/Dockerfile.internal
+++ b/docker/Dockerfile.internal
@@ -24,7 +24,7 @@ ARG OS_VERSION=22.04
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION}
# Define Python version argument
-ARG PYTHON_VERSION=3.10
+ARG PYTHON_VERSION=3.12
# Configure environment variables
ENV DEBIAN_FRONTEND=noninteractive \
@@ -85,6 +85,8 @@ RUN if [ "$UNBOUND_DEPS" = "true" ]; then \
RUN uv pip install --no-cache ".[all]"
+RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas
+
# Copy the rest of the application source code
# Make sure to have the git-LFS files for testing
COPY --chown=user_lerobot:user_lerobot . .
diff --git a/docker/Dockerfile.user b/docker/Dockerfile.user
index 031165930..f267be7f2 100644
--- a/docker/Dockerfile.user
+++ b/docker/Dockerfile.user
@@ -18,8 +18,10 @@
# docker build -f docker/Dockerfile.user -t lerobot-user .
# docker run -it --rm lerobot-user
+# With USB physical access : docker run -it --device=/dev/ -v /dev/:/dev/ --rm lerobot-user
+
# Configure the base image
-ARG PYTHON_VERSION=3.10
+ARG PYTHON_VERSION=3.12
FROM python:${PYTHON_VERSION}-slim
# Configure environment variables
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index 2b8086cd7..cc41ddb51 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -7,8 +7,6 @@
- sections:
- local: il_robots
title: Imitation Learning for Robots
- - local: cameras
- title: Cameras
- local: bring_your_own_policies
title: Bring Your Own Policies
- local: integrate_hardware
@@ -21,6 +19,8 @@
title: Multi GPU training
- local: peft_training
title: Training with PEFT (e.g., LoRA)
+ - local: rename_map
+ title: Using Rename Map and Empty Cameras
title: "Tutorials"
- sections:
- local: lerobot-dataset-v3
@@ -29,6 +29,10 @@
title: Porting Large Datasets
- local: using_dataset_tools
title: Using the Dataset Tools
+ - local: dataset_subtask
+ title: Using Subtasks in the Dataset
+ - local: streaming_video_encoding
+ title: Streaming Video Encoding
title: "Datasets"
- sections:
- local: act
@@ -45,6 +49,8 @@
title: NVIDIA GR00T N1.5
- local: xvla
title: X-VLA
+ - local: multi_task_dit
+ title: Multitask DiT Policy
- local: walloss
title: WALL-OSS
title: "Policies"
@@ -81,6 +87,8 @@
title: Processors for Robots and Teleoperators
- local: env_processor
title: Environment Processors
+ - local: action_representations
+ title: Action Representations
title: "Robot Processors"
- sections:
- local: so101
@@ -99,11 +107,19 @@
title: Unitree G1
- local: earthrover_mini_plus
title: Earth Rover Mini
+ - local: omx
+ title: OMX
+ - local: openarm
+ title: OpenArm
title: "Robots"
- sections:
- local: phone_teleop
title: Phone
title: "Teleoperators"
+- sections:
+ - local: cameras
+ title: Cameras
+ title: "Sensors"
- sections:
- local: torch_accelerators
title: PyTorch accelerators
@@ -113,6 +129,8 @@
title: Notebooks
- local: feetech
title: Updating Feetech Firmware
+ - local: damiao
+ title: Damiao Motors and CAN Bus
title: "Resources"
- sections:
- local: contributing
diff --git a/docs/source/act.mdx b/docs/source/act.mdx
index e3294ca69..453bcbba8 100644
--- a/docs/source/act.mdx
+++ b/docs/source/act.mdx
@@ -88,5 +88,8 @@ lerobot-record \
--dataset.repo_id=${HF_USER}/eval_act_your_dataset \
--dataset.num_episodes=10 \
--dataset.single_task="Your task description" \
+ --dataset.streaming_encoding=true \
+ --dataset.encoder_threads=2 \
+ # --dataset.vcodec=auto \
--policy.path=${HF_USER}/act_policy
```
diff --git a/docs/source/action_representations.mdx b/docs/source/action_representations.mdx
new file mode 100644
index 000000000..1604ed467
--- /dev/null
+++ b/docs/source/action_representations.mdx
@@ -0,0 +1,223 @@
+# Action Representations
+
+This guide explains the different ways robot actions can be represented in LeRobot, how they relate to each other, and when to use each one.
+
+## Joint Space vs End-Effector Space
+
+Before discussing action representations, it helps to understand the two coordinate spaces actions can live in.
+
+### Joint Space
+
+Joint-space actions directly specify target positions for each motor. For a 6-DOF arm with a gripper, a joint-space action might look like:
+
+```
+action = [shoulder_pan: 45.0, shoulder_lift: -20.0, elbow: -30.0, wrist_pitch: 10.0, wrist_roll: 0.0, wrist_yaw: 5.0, gripper: 0.8]
+```
+
+Joint space is the default in LeRobot. It is simple, requires no kinematics model, and maps directly to motor commands. Most beginner setups (SO-100, Koch) use joint-space actions.
+
+### End-Effector (EE) Space
+
+End-effector-space actions specify the desired position and orientation of the robot's tool tip (gripper) in Cartesian coordinates:
+
+```
+action = [x: 0.25, y: -0.10, z: 0.15, wx: 0.0, wy: 0.0, wz: 0.1, gripper: 0.8]
+```
+
+EE space is more intuitive for tasks like pick-and-place because it directly describes where the gripper should go, but it requires a kinematics model (URDF) to convert between EE poses and joint angles.
+
+### Converting Between Spaces
+
+LeRobot provides processor steps for converting between joint and EE spaces using forward and inverse kinematics. These are built on top of `RobotKinematics`, which loads a URDF model of your robot.
+
+```python
+from lerobot.model.kinematics import RobotKinematics
+from lerobot.robots.so_follower.robot_kinematic_processor import (
+ ForwardKinematicsJointsToEE,
+ InverseKinematicsEEToJoints,
+)
+
+kinematics = RobotKinematics(
+ urdf_path="./SO101/so101_new_calib.urdf",
+ target_frame_name="gripper_frame_link",
+ joint_names=["shoulder", "elbow", "wrist_pitch", "wrist_roll", "wrist_yaw"],
+)
+
+# Joints → EE (for observations: "where is my gripper?")
+fk_step = ForwardKinematicsJointsToEE(kinematics=kinematics, motor_names=[...])
+
+# EE → Joints (for actions: "move my gripper here")
+ik_step = InverseKinematicsEEToJoints(kinematics=kinematics, motor_names=[...])
+```
+
+See [`examples/so100_to_so100_EE/`](https://github.com/huggingface/lerobot/tree/main/examples/so100_to_so100_EE) for a complete working example of recording, replaying, and evaluating with EE-space actions on an SO-100 arm.
+
+## Absolute, Relative, and Delta Actions
+
+Regardless of whether you work in joint space or EE space, the action values can be expressed in three different ways. The terminology follows [UMI (Chi et al., 2024)](https://arxiv.org/abs/2402.10329).
+
+### Absolute Actions (LeRobot default)
+
+Each action specifies the target position directly.
+
+**Example** (joint space, chunk of 4):
+
+```
+current_state = [45.0, -30.0, 10.0]
+
+action_chunk = [
+ [46.0, -29.0, 11.0], # go to 46, -29, 11
+ [47.5, -27.0, 12.0], # go to 47.5, -27, 12
+ [49.0, -25.0, 13.5], # go to 49, -25, 13.5
+ [50.0, -24.0, 15.0], # go to 50, -24, 15
+]
+```
+
+Each value is a target position in the robot's coordinate frame. Simple and direct, but requires a consistent global coordinate frame. This is the default in LeRobot.
+
+### Relative Actions (used by OpenPI / pi0)
+
+Each action in the chunk is an offset from the **current state at the moment of prediction**. All actions in the chunk share the same reference point:
+
+```
+current_state = [45.0, -30.0, 10.0]
+
+relative_chunk = [
+ [1.0, 1.0, 1.0], # +1 from current → target 46, -29, 11
+ [2.5, 3.0, 2.0], # +2.5 from current → target 47.5, -27, 12
+ [4.0, 5.0, 3.5], # +4 from current → target 49, -25, 13.5
+ [5.0, 6.0, 5.0], # +5 from current → target 50, -24, 15
+]
+```
+
+The conversion is straightforward: `relative = absolute - current_state`. To recover absolute: `absolute = relative + current_state`.
+
+**Why use relative actions?** The model learns to predict offsets centered around zero, which is easier to normalize and leads to more stable training. Because every chunk references the same current state, there is no error accumulation across chunks.
+
+### Delta Actions (sequential differences)
+
+Each action is an offset from the **previous action** (or from the current state for the first step):
+
+```
+current_state = [45.0, -30.0, 10.0]
+
+delta_chunk = [
+ [1.0, 1.0, 1.0], # current → 46, -29, 11
+ [1.5, 2.0, 1.0], # previous action → 47.5, -27, 12
+ [1.5, 2.0, 1.5], # previous action → 49, -25, 13.5
+ [1.0, 1.0, 1.5], # previous action → 50, -24, 15
+]
+```
+
+Here each step is relative to the one before it. To recover absolute positions you must sum all previous deltas, which means errors accumulate over time. UMI explicitly argues against this representation for this reason.
+
+### Visual Comparison
+
+The figure below (based on a figure from [UMI, Chi et al., 2024](https://arxiv.org/abs/2402.10329)) illustrates the key difference. With **relative trajectory**, every action in the chunk points back to the same origin (current state), so a new inference step cleanly resets the reference. With **delta**, each action depends on the previous one, so errors accumulate. **Absolute** actions require a consistent global coordinate frame.
+
+
+
+## Using Relative Actions in LeRobot
+
+LeRobot provides `RelativeActionsProcessorStep` to convert between absolute and relative actions inside the processor pipeline. This is how pi0, pi0.5, and pi0_fast support relative actions.
+
+> **Note:** All pi models (pi0, pi0.5, pi0*fast) apply relative conversion \_before* normalization (`relative → normalize`), so the normalizer always sees delta (relative) values. This means **relative action stats are required** for all of them when training with `use_relative_actions=true`. In pi0_fast the `RelativeActionsProcessorStep` only modifies the action — the state observation is unchanged — so `NormalizerProcessorStep` still runs before the state tokenizer and the tokenizer continues to receive normalized state as expected.
+
+### How it works
+
+During **training** (preprocessing), actions are converted from absolute to relative before the model sees them:
+
+```
+raw absolute action → RelativeActionsProcessorStep → normalize → model
+```
+
+During **inference** (postprocessing), model predictions are converted back to absolute before being sent to the robot:
+
+```
+model output → unnormalize → AbsoluteActionsProcessorStep → robot
+```
+
+The `AbsoluteActionsProcessorStep` reads the cached current state from its paired `RelativeActionsProcessorStep`, so the two must be wired together (handled automatically by the policy factory).
+
+### Enabling relative actions for the pi family (pi0, pi0.5, pi0_fast)
+
+**Step 1**: Precompute relative action statistics for your dataset:
+
+```bash
+lerobot-edit-dataset \
+ --repo_id your_dataset \
+ --operation.type recompute_stats \
+ --operation.relative_action true \
+ --operation.chunk_size 50 \
+ --operation.relative_exclude_joints "['gripper']"
+```
+
+**Step 2**: Train with relative actions enabled:
+
+```bash
+lerobot-train \
+ --dataset.repo_id=your_dataset \
+ --policy.type=pi0 \
+ --policy.use_relative_actions=true \
+ --policy.relative_exclude_joints='["gripper"]'
+```
+
+The `relative_exclude_joints` parameter specifies joints that should remain in absolute space. For example, gripper commands are typically binary (open/close) and don't benefit from relative encoding.
+
+### Combining relative actions with RTC
+
+[RTC](https://arxiv.org/abs/2506.07339) runs policy inference at high frequency and sends actions to the robot as they are predicted rather than waiting for a full chunk. Relative actions and RTC are fully compatible: because every chunk in relative mode references the **same** current state (captured at the start of inference), each predicted action in the chunk remains a valid offset even if the robot has already moved. No special handling is needed — `RelativeActionsProcessorStep` caches the state once per inference call and `AbsoluteActionsProcessorStep` applies it to every action in the streamed output.
+
+### Combining relative actions with EE space
+
+Relative actions work in both joint space and EE space. For example, if your dataset stores EE actions, relative encoding converts them to offsets from the current EE pose:
+
+```
+current_ee_state = [x: 0.25, y: -0.10, z: 0.15, gripper: 0.8]
+
+absolute_ee_chunk = [
+ [0.26, -0.09, 0.16, 0.8],
+ [0.28, -0.07, 0.18, 0.8],
+]
+
+relative_ee_chunk = [
+ [0.01, 0.01, 0.01, 0.0], # offset from current EE pose
+ [0.03, 0.03, 0.03, 0.0], # offset from current EE pose
+]
+```
+
+## Processing Pipeline Summary
+
+Here is how the different processors compose. Each arrow is a processor step, and they can be chained in a `RobotProcessorPipeline` or `PolicyProcessorPipeline`:
+
+```
+ ┌─────────────────────────────────────────┐
+ Action Space │ Joint Space ←──IK──→ EE Space │
+ │ ForwardKinematicsJointsToEE │
+ │ InverseKinematicsEEToJoints │
+ └─────────────────────────────────────────┘
+
+ ┌─────────────────────────────────────────┐
+ Representation │ Absolute ←────→ Relative │
+ │ RelativeActionsProcessorStep (pre) │
+ │ AbsoluteActionsProcessorStep (post) │
+ └─────────────────────────────────────────┘
+
+ ┌─────────────────────────────────────────┐
+ Normalization │ Raw ←────→ Normalized │
+ │ NormalizerProcessorStep (pre) │
+ │ UnnormalizerProcessorStep (post) │
+ └─────────────────────────────────────────┘
+```
+
+A typical training preprocessor might chain: `raw absolute joint actions → relative → normalize`. A typical inference postprocessor: `unnormalize → absolute → (optionally IK to joints)`.
+
+## References
+
+- [Universal Manipulation Interface (UMI)](https://arxiv.org/abs/2402.10329) - Chi et al., 2024. Defines the relative trajectory action representation and compares it with absolute and delta actions.
+- [Introduction to Processors](./introduction_processors) - How processor pipelines work in LeRobot.
+- [`examples/so100_to_so100_EE/`](https://github.com/huggingface/lerobot/tree/main/examples/so100_to_so100_EE) - Complete example of recording and evaluating with EE-space actions.
diff --git a/docs/source/async.mdx b/docs/source/async.mdx
index 1d3e0edbf..a46408a0d 100644
--- a/docs/source/async.mdx
+++ b/docs/source/async.mdx
@@ -48,7 +48,7 @@ python -m lerobot.async_inference.robot_client \
--task="dummy" \ # POLICY: The task to run the policy on (`Fold my t-shirt`). Not necessarily defined for all policies, such as `act`
--policy_type=your_policy_type \ # POLICY: the type of policy to run (smolvla, act, etc)
--pretrained_name_or_path=user/model \ # POLICY: the model name/path on server to the checkpoint to run (e.g., lerobot/smolvla_base)
- --policy_device=mps \ # POLICY: the device to run the policy on, on the server
+ --policy_device=mps \ # POLICY: the device to run the policy on, on the server (cuda, mps, xpu, cpu)
--actions_per_chunk=50 \ # POLICY: the number of actions to output at once
--chunk_size_threshold=0.5 \ # CLIENT: the threshold for the chunk size before sending a new observation to the server
--aggregate_fn_name=weighted_average \ # CLIENT: the function to aggregate actions on overlapping portions
@@ -195,6 +195,7 @@ client_cfg = RobotClientConfig(
robot=robot_cfg,
server_address="localhost:8080",
policy_device="mps",
+ client_device="cpu",
policy_type="smolvla",
pretrained_name_or_path="/smolvla_async",
chunk_size_threshold=0.5,
@@ -309,4 +310,4 @@ Asynchronous inference represents a significant advancement in real-time robotic
- **Universal Compatibility**: Works with all LeRobot-supported policies, from lightweight ACT models to vision-language models like SmolVLA
Start experimenting with the default parameters, monitor your action queue sizes, and iteratively refine your setup to achieve optimal performance for your specific use case.
-If you want to discuss this further, hop into our [Discord community](https://discord.gg/s3KuuzsPFb), or open an issue on our [GitHub repository](https://github.com/lerobot/lerobot/issues).
+If you want to discuss this further, hop into our [Discord community](https://discord.gg/s3KuuzsPFb), or open an issue on our [GitHub repository](https://github.com/huggingface/lerobot/issues).
diff --git a/docs/source/bring_your_own_policies.mdx b/docs/source/bring_your_own_policies.mdx
index 0ff098708..38c32aa71 100644
--- a/docs/source/bring_your_own_policies.mdx
+++ b/docs/source/bring_your_own_policies.mdx
@@ -32,7 +32,7 @@ version = "0.1.0"
dependencies = [
# your policy-specific dependencies
]
-requires-python = ">= 3.11"
+requires-python = ">= 3.12"
[build-system]
build-backend = # your-build-backend
@@ -41,13 +41,15 @@ requires = # your-build-system
## Step 2: Define the Policy Configuration
-Create a configuration class that inherits from `PreTrainedConfig` and registers your policy type:
+Create a configuration class that inherits from [`PreTrainedConfig`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/configs/policies.py) and registers your policy type:
+Here is a template to get you started, customize the parameters and methods as needed for your policy's architecture and training requirements.
```python
# configuration_my_custom_policy.py
from dataclasses import dataclass, field
from lerobot.configs.policies import PreTrainedConfig
-from lerobot.configs.types import NormalizationMode
+from lerobot.optim.optimizers import AdamWConfig
+from lerobot.optim.schedulers import CosineDecayWithWarmupSchedulerConfig
@PreTrainedConfig.register_subclass("my_custom_policy")
@dataclass
@@ -61,62 +63,132 @@ class MyCustomPolicyConfig(PreTrainedConfig):
hidden_dim: Hidden dimension for the policy network
# Add your policy-specific parameters here
"""
- # ...PreTrainedConfig fields...
- pass
+
+ horizon: int = 50
+ n_action_steps: int = 50
+ hidden_dim: int = 256
+
+ optimizer_lr: float = 1e-4
+ optimizer_weight_decay: float = 1e-4
def __post_init__(self):
super().__post_init__()
- # Add any validation logic here
+ if self.n_action_steps > self.horizon:
+ raise ValueError("n_action_steps cannot exceed horizon")
def validate_features(self) -> None:
"""Validate input/output feature compatibility."""
- # Implement validation logic for your policy's requirements
- pass
+ if not self.image_features:
+ raise ValueError("MyCustomPolicy requires at least one image feature.")
+ if self.action_feature is None:
+ raise ValueError("MyCustomPolicy requires 'action' in output_features.")
+
+ def get_optimizer_preset(self) -> AdamWConfig:
+ return AdamWConfig(lr=self.optimizer_lr, weight_decay=self.optimizer_weight_decay)
+
+ def get_scheduler_preset(self):
+ return None
+
+ @property
+ def observation_delta_indices(self) -> list[int] | None:
+ """Relative timestep offsets the dataset loader provides per observation.
+
+ Return `None` for single-frame policies. For temporal policies that consume
+ multiple past or future frames, return a list of offsets, e.g. `[-20, -10, 0, 10]` for
+ 3 past frames at stride 10 and 1 future frame at stride 10.
+ """
+ return None
+
+ @property
+ def action_delta_indices(self) -> list[int]:
+ """Relative timestep offsets for the action chunk the dataset loader returns.
+ """
+ return list(range(self.horizon))
+
+ @property
+ def reward_delta_indices(self) -> None:
+ return None
```
## Step 3: Implement the Policy Class
-Create your policy implementation by inheriting from LeRobot's base `PreTrainedPolicy` class:
+Create your policy implementation by inheriting from [`PreTrainedPolicy`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/pretrained.py):
```python
# modeling_my_custom_policy.py
import torch
import torch.nn as nn
-from typing import Dict, Any
+from typing import Any
from lerobot.policies.pretrained import PreTrainedPolicy
+from lerobot.utils.constants import ACTION
from .configuration_my_custom_policy import MyCustomPolicyConfig
class MyCustomPolicy(PreTrainedPolicy):
- config_class = MyCustomPolicyConfig
+ config_class = MyCustomPolicyConfig # must match the string in @register_subclass
name = "my_custom_policy"
- def __init__(self, config: MyCustomPolicyConfig, dataset_stats: Dict[str, Any] = None):
+ def __init__(self, config: MyCustomPolicyConfig, dataset_stats: dict[str, Any] = None):
super().__init__(config, dataset_stats)
+ config.validate_features() # not called automatically by the base class
+ self.config = config
+ self.model = ... # your nn.Module here
+
+ def reset(self):
+ """Reset episode state."""
...
+
+ def get_optim_params(self) -> dict:
+ """Return parameters to pass to the optimizer (e.g. with per-group lr/wd)."""
+ return {"params": self.parameters()}
+
+ def predict_action_chunk(self, batch: dict[str, torch.Tensor], **kwargs) -> torch.Tensor:
+ """Return the full action chunk (B, chunk_size, action_dim) for the current observation."""
+ ...
+
+ def select_action(self, batch: dict[str, torch.Tensor], **kwargs) -> torch.Tensor:
+ """Return a single action for the current timestep (called at inference)."""
+ ...
+
+ def forward(self, batch: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
+ """Compute the training loss.
+
+ `batch["action_is_pad"]` is a bool mask of shape (B, horizon) that marks
+ timesteps padded because the episode ended before `horizon` steps, you
+ can exclude those from your loss.
+ """
+ actions = batch[ACTION]
+ action_is_pad = batch.get("action_is_pad")
+ ...
+ return {"loss": ...}
```
## Step 4: Add Data Processors
-Create processor functions:
+Create processor functions. For a concrete reference, see [processor_act.py](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/act/processor_act.py) or [processor_diffusion.py](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/diffusion/processor_diffusion.py).
```python
# processor_my_custom_policy.py
-from typing import Dict, Any
+from typing import Any
import torch
+from lerobot.processor import PolicyAction, PolicyProcessorPipeline
+
def make_my_custom_policy_pre_post_processors(
config,
+ dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None,
) -> tuple[
PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
PolicyProcessorPipeline[PolicyAction, PolicyAction],
]:
- """Create preprocessing and postprocessing functions for your policy."""
- pass # Define your preprocessing and postprocessing logic here
-
+ preprocessor = ... # build your PolicyProcessorPipeline for inputs
+ postprocessor = ... # build your PolicyProcessorPipeline for outputs
+ return preprocessor, postprocessor
```
+**Important - function naming:** LeRobot discovers your processor by name. The function **must** be called `make_{policy_name}_pre_post_processors` (matching the string you passed to `@PreTrainedConfig.register_subclass`).
+
## Step 5: Package Initialization
Expose your classes in the package's `__init__.py`:
diff --git a/docs/source/cameras.mdx b/docs/source/cameras.mdx
index 5c35be0ba..8af0f5ae5 100644
--- a/docs/source/cameras.mdx
+++ b/docs/source/cameras.mdx
@@ -1,12 +1,22 @@
# Cameras
-LeRobot offers multiple options for video capture, including phone cameras, built-in laptop cameras, external webcams, and Intel RealSense cameras. To efficiently record frames from most cameras, you can use either the `OpenCVCamera` or `RealSenseCamera` class. For additional compatibility details on the `OpenCVCamera` class, refer to the [Video I/O with OpenCV Overview](https://docs.opencv.org/4.x/d0/da7/videoio_overview.html).
+LeRobot offers multiple options for video capture:
-### Finding your camera
+| Class | Supported Cameras |
+| ----------------- | ----------------------------------- |
+| `OpenCVCamera` | Phone, built-in laptop, USB webcams |
+| `ZMQCamera` | Network-connected cameras |
+| `RealSenseCamera` | Intel RealSense (with depth) |
+| `Reachy2Camera` | Reachy 2 robot cameras |
-To instantiate a camera, you need a camera identifier. This identifier might change if you reboot your computer or re-plug your camera, a behavior mostly dependant on your operating system.
+> [!TIP]
+> For `OpenCVCamera` compatibility details, see the [Video I/O with OpenCV Overview](https://docs.opencv.org/4.x/d0/da7/videoio_overview.html).
-To find the camera indices of the cameras plugged into your system, run the following script:
+### Find your camera
+
+Every camera requires a unique identifier to be instantiated, allowing you to distinguish between multiple connected devices.
+
+`OpenCVCamera` and `RealSenseCamera` support auto-discovery. Run the command below to list available devices and their identifiers. Note that these identifiers may change after rebooting your computer or re-plugging the camera, depending on your operating system.
```bash
lerobot-find-cameras opencv # or realsense for Intel Realsense cameras
@@ -14,7 +24,7 @@ lerobot-find-cameras opencv # or realsense for Intel Realsense cameras
The output will look something like this if you have two cameras connected:
-```
+```bash
--- Detected Cameras ---
Camera #0:
Name: OpenCV Camera @ 0
@@ -33,13 +43,37 @@ Camera #0:
> [!WARNING]
> When using Intel RealSense cameras in `macOS`, you could get this [error](https://github.com/IntelRealSense/librealsense/issues/12307): `Error finding RealSense cameras: failed to set power state`, this can be solved by running the same command with `sudo` permissions. Note that using RealSense cameras in `macOS` is unstable.
-## Use Cameras
+`ZMQCamera` and `Reachy2Camera` do not support auto-discovery. They must be configured manually by providing their network address and port or robot SDK settings.
-Below are two examples, demonstrating how to work with the API.
+## Use cameras
-- **Asynchronous frame capture** using an OpenCV-based camera
+### Frame access modes
+
+All camera classes implement three access modes for capturing frames:
+
+| Method | Behavior | Blocks? | Best For |
+| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------- | ---------------------------------------- |
+| `read()` | Waits for the camera hardware to return a frame. May block for a long time depending on the camera and SDK. | Yes | Simple scripts, sequential capture |
+| `async_read(timeout_ms)` | Returns the latest unconsumed frame from background thread. Blocks only if buffer is empty, up to `timeout_ms`. Raises `TimeoutError` if no frame arrives. | With a timeout | Control loops synchronized to camera FPS |
+| `read_latest(max_age_ms)` | Peeks at the most recent frame in buffer (may be stale). Raises `TimeoutError` if frame is older than `max_age_ms`. | No | UI visualization, logging, monitoring |
+
+### Usage examples
+
+The following examples show how to use the camera API to configure and capture frames from different camera types.
+
+- **Blocking and non-blocking frame capture** using an OpenCV-based camera
- **Color and depth capture** using an Intel RealSense camera
+> [!WARNING]
+> Failing to cleanly disconnect cameras can cause resource leaks. Use the context manager protocol to ensure automatic cleanup:
+>
+> ```python
+> with OpenCVCamera(config) as camera:
+> ...
+> ```
+>
+> You can also call `connect()` and `disconnect()` manually, but always use a `finally` block for the latter.
+
@@ -60,16 +94,30 @@ config = OpenCVCameraConfig(
)
# Instantiate and connect an `OpenCVCamera`, performing a warm-up read (default).
-camera = OpenCVCamera(config)
-camera.connect()
+with OpenCVCamera(config) as camera:
+
+ # Read a frame synchronously — blocks until hardware delivers a new frame
+ frame = camera.read()
+ print(f"read() call returned frame with shape:", frame.shape)
+
+ # Read a frame asynchronously with a timeout — returns the latest unconsumed frame or waits up to timeout_ms for a new one
+ try:
+ for i in range(10):
+ frame = camera.async_read(timeout_ms=200)
+ print(f"async_read call returned frame {i} with shape:", frame.shape)
+ except TimeoutError as e:
+ print(f"No frame received within timeout: {e}")
+
+ # Instantly return a frame - returns the most recent frame captured by the camera
+ try:
+ initial_frame = camera.read_latest(max_age_ms=1000)
+ for i in range(10):
+ frame = camera.read_latest(max_age_ms=1000)
+ print(f"read_latest call returned frame {i} with shape:", frame.shape)
+ print(f"Was a new frame received by the camera? {not (initial_frame == frame).any()}")
+ except TimeoutError as e:
+ print(f"Frame too old: {e}")
-# Read frames asynchronously in a loop via `async_read(timeout_ms)`
-try:
- for i in range(10):
- frame = camera.async_read(timeout_ms=200)
- print(f"Async frame {i} shape:", frame.shape)
-finally:
- camera.disconnect()
```
@@ -111,10 +159,10 @@ finally:
-## Use your phone
+## Use your phone's camera
-
+
To use your iPhone as a camera on macOS, enable the Continuity Camera feature:
@@ -124,83 +172,49 @@ To use your iPhone as a camera on macOS, enable the Continuity Camera feature:
For more details, visit [Apple support](https://support.apple.com/en-gb/guide/mac-help/mchl77879b8a/mac).
-Your iPhone should be detected automatically when running the camera setup script in the next section.
-
-
+
-If you want to use your phone as a camera on Linux, follow these steps to set up a virtual camera
+If you want to use your phone as a camera using OBS, follow these steps to set up a virtual camera.
-1. _Install `v4l2loopback-dkms` and `v4l-utils`_. Those packages are required to create virtual camera devices (`v4l2loopback`) and verify their settings with the `v4l2-ctl` utility from `v4l-utils`. Install them using:
+1. _(Linux only) Install `v4l2loopback-dkms` and `v4l-utils`_. These packages create virtual camera devices and verify their settings. Install with:
-
-```python
+```bash
sudo apt install v4l2loopback-dkms v4l-utils
```
-
-2. _Install [DroidCam](https://droidcam.app) on your phone_. This app is available for both iOS and Android.
-3. _Install [OBS Studio](https://obsproject.com)_. This software will help you manage the camera feed. Install it using [Flatpak](https://flatpak.org):
+2. _Install the [DroidCam app](https://droidcam.app) on your phone_. This app is available for both iOS and Android.
+3. _Download and install [OBS Studio](https://obsproject.com)_.
+4. _Download and install the [DroidCam OBS plugin](https://droidcam.app/obs)_.
+5. _Start OBS Studio_.
-
-```python
-flatpak install flathub com.obsproject.Studio
-```
-
-
-4. _Install the DroidCam OBS plugin_. This plugin integrates DroidCam with OBS Studio. Install it with:
-
-
-```python
-flatpak install flathub com.obsproject.Studio.Plugin.DroidCam
-```
-
-
-5. _Start OBS Studio_. Launch with:
-
-
-```python
-flatpak run com.obsproject.Studio
-```
-
-
-6. _Add your phone as a source_. Follow the instructions [here](https://droidcam.app/obs/usage). Be sure to set the resolution to `640x480`.
-7. _Adjust resolution settings_. In OBS Studio, go to `File > Settings > Video`. Change the `Base(Canvas) Resolution` and the `Output(Scaled) Resolution` to `640x480` by manually typing it in.
+6. _Add your phone as a source_. Follow the instructions [here](https://droidcam.app/obs/usage). Be sure to set the resolution to `640x480` to avoid the watermarks.
+7. _Adjust resolution settings_. In OBS Studio, go to `File > Settings > Video` or `OBS > Preferences... > Video`. Change the `Base(Canvas) Resolution` and the `Output(Scaled) Resolution` to `640x480` by manually typing it.
8. _Start virtual camera_. In OBS Studio, follow the instructions [here](https://obsproject.com/kb/virtual-camera-guide).
-9. _Verify the virtual camera setup_. Use `v4l2-ctl` to list the devices:
+9. _Verify the virtual camera setup and resolution_.
+ - **Linux**: Use `v4l2-ctl` to list devices and check resolution:
+ ```bash
+ v4l2-ctl --list-devices # find VirtualCam and note its /dev/videoX path
+ v4l2-ctl -d /dev/videoX --get-fmt-video # replace with your VirtualCam path
+ ```
+ You should see `VirtualCam` listed and resolution `640x480`.
+ - **macOS**: Open Photo Booth or FaceTime and select "OBS Virtual Camera" as the input.
+ - **Windows**: The native Camera app doesn't support virtual cameras. Use a video conferencing app (Zoom, Teams) or run `lerobot-find-cameras opencv` directly to verify.
-
-```python
-v4l2-ctl --list-devices
-```
-
+
+Troubleshooting
-You should see an entry like:
+> The virtual camera resolution is incorrect.
-```
-VirtualCam (platform:v4l2loopback-000):
-/dev/video1
-```
+Delete the virtual camera source and recreate it. The resolution cannot be changed after creation.
-10. _Check the camera resolution_. Use `v4l2-ctl` to ensure that the virtual camera output resolution is `640x480`. Change `/dev/video1` to the port of your virtual camera from the output of `v4l2-ctl --list-devices`.
+> Error reading frame in background thread for OpenCVCamera(X): OpenCVCamera(X) frame width=640 or height=480 do not match configured width=1920 or height=1080.
-
-```python
-v4l2-ctl -d /dev/video1 --get-fmt-video
-```
-
+This error is caused by OBS Virtual Camera advertising a `1920x1080` resolution despite rescaling. The only fix for now is to comment out the width and height check in `_postprocess_image()`.
-You should see an entry like:
-
-```
->>> Format Video Capture:
->>> Width/Height : 640/480
->>> Pixel Format : 'YUYV' (YUYV 4:2:2)
-```
-
-Troubleshooting: If the resolution is not correct you will have to delete the Virtual Camera port and try again as it cannot be changed.
-
-If everything is set up correctly, you can proceed with the rest of the tutorial.
+
+
+If everything is set up correctly, your phone will appear as a standard OpenCV camera and can be used with `OpenCVCamera`.
diff --git a/docs/source/damiao.mdx b/docs/source/damiao.mdx
new file mode 100644
index 000000000..45388ab9b
--- /dev/null
+++ b/docs/source/damiao.mdx
@@ -0,0 +1,165 @@
+# Damiao Motors and CAN Bus
+
+This guide covers setup and usage of Damiao motors with LeRobot via CAN bus communication.
+
+Currently, only Linux is supported, as the OpenArms CAN adapter only has drivers for Linux.
+
+## Linux CAN Setup
+
+Before using Damiao motors, you need to set up the CAN interface on your Linux system.
+
+### Install CAN Utilities
+
+```bash
+sudo apt-get install can-utils
+```
+
+### Configure CAN Interface (Manual)
+
+For standard CAN FD (recommended for OpenArms):
+
+```bash
+sudo ip link set can0 down
+sudo ip link set can0 type can bitrate 1000000 dbitrate 5000000 fd on
+sudo ip link set can0 up
+```
+
+For standard CAN (without FD):
+
+```bash
+sudo ip link set can0 down
+sudo ip link set can0 type can bitrate 1000000
+sudo ip link set can0 up
+```
+
+### Configure CAN Interface (Using LeRobot)
+
+LeRobot provides a utility script to setup and test CAN interfaces:
+
+```bash
+# Setup multiple interfaces (e.g., OpenArms Followers with 2 CAN buses)
+lerobot-setup-can --mode=setup --interfaces=can0,can1
+```
+
+## Debugging CAN Communication
+
+Use the built-in debug tools to test motor communication:
+
+```bash
+# Test motors on all interfaces
+lerobot-setup-can --mode=test --interfaces=can0,can1
+
+# Run speed/latency test
+lerobot-setup-can --mode=speed --interfaces=can0
+```
+
+The test mode will scan for motors (IDs 0x01-0x08) and report which ones respond. Example output:
+
+```
+can0: UP (CAN FD)
+ Motor 0x01 (joint_1): ✓ FOUND
+ → Response 0x11 [FD]: 00112233...
+ Motor 0x02 (joint_2): ✓ FOUND
+ Motor 0x03 (joint_3): ✗ No response
+ ...
+ Summary: 2/8 motors found
+```
+
+## Usage
+
+### Basic Setup
+
+```python
+from lerobot.motors import Motor
+from lerobot.motors.damiao import DamiaoMotorsBus
+
+# Define your motors with send/receive CAN IDs
+motors = {
+ "joint_1": Motor(id=0x01, motor_type_str="dm8009", recv_id=0x11),
+ "joint_2": Motor(id=0x02, motor_type_str="dm4340", recv_id=0x12),
+ "joint_3": Motor(id=0x03, motor_type_str="dm4310", recv_id=0x13),
+}
+
+# Create the bus
+bus = DamiaoMotorsBus(
+ port="can0", # Linux socketcan interface
+ motors=motors,
+)
+
+# Connect
+bus.connect()
+```
+
+### Reading Motor States
+
+```python
+# Read single motor position (degrees)
+position = bus.read("Present_Position", "joint_1")
+
+# Read from multiple motors
+positions = bus.sync_read("Present_Position") # All motors
+positions = bus.sync_read("Present_Position", ["joint_1", "joint_2"])
+
+# Read all states at once (position, velocity, torque)
+states = bus.sync_read_all_states()
+# Returns: {'joint_1': {'position': 45.2, 'velocity': 1.3, 'torque': 0.5}, ...}
+```
+
+### Writing Motor Commands
+
+```python
+# Enable torque
+bus.enable_torque()
+
+# Set goal position (degrees)
+bus.write("Goal_Position", "joint_1", 45.0)
+
+# Set positions for multiple motors
+bus.sync_write("Goal_Position", {
+ "joint_1": 45.0,
+ "joint_2": -30.0,
+ "joint_3": 90.0,
+})
+
+# Disable torque
+bus.disable_torque()
+```
+
+## Configuration Options
+
+| Parameter | Default | Description |
+| -------------- | --------- | ----------------------------------------------------------- |
+| `port` | - | CAN interface (`can0`) or serial port (`/dev/cu.usbmodem*`) |
+| `use_can_fd` | `True` | Enable CAN FD for higher data rates |
+| `bitrate` | `1000000` | Nominal bitrate (1 Mbps) |
+| `data_bitrate` | `5000000` | CAN FD data bitrate (5 Mbps) |
+
+## Motor Configuration
+
+Each motor requires:
+
+- `id`: CAN ID for sending commands
+- `motor_type`: One of the supported motor types (e.g., `"dm8009"`, `"dm4340"`)
+- `recv_id`: CAN ID for receiving responses
+
+OpenArms default IDs follow the pattern: send ID `0x0N`, receive ID `0x1N` where N is the joint number.
+
+## Troubleshooting
+
+### No Response from Motors
+
+1. **Check power**
+2. **Verify CAN wiring**: Check CAN-H, CAN-L, and GND connections
+3. **Check motor IDs**: Use Damiao Debugging Tools to verify/configure IDs
+4. **Test CAN interface**: Run `candump can0` to see if messages are being received
+5. **Run diagnostics**: `lerobot-setup-can --mode=test --interfaces=can0`
+
+### Motor Timeout Parameter
+
+If motors were configured with timeout=0, they won't respond to commands. Use Damiao Debugging Tools to set a non-zero timeout value.
+
+### Verify CAN FD Status
+
+```bash
+ip -d link show can0 | grep fd
+```
diff --git a/docs/source/dataset_subtask.mdx b/docs/source/dataset_subtask.mdx
new file mode 100644
index 000000000..beb5d80bd
--- /dev/null
+++ b/docs/source/dataset_subtask.mdx
@@ -0,0 +1,278 @@
+# Using Subtasks in LeRobot Datasets
+
+Subtask support in robotics datasets has proven effective in improving robot reasoning and understanding. Subtasks are particularly useful for:
+
+- **Hierarchical policies**: Building policies that include subtask predictions to visualize robot reasoning in real time
+- **Reward modeling**: Helping reward models understand task progression (e.g., SARM-style stage-aware reward models)
+- **Task decomposition**: Breaking down complex manipulation tasks into atomic, interpretable steps
+
+LeRobotDataset now supports subtasks as part of its dataset structure, alongside tasks.
+
+## What are Subtasks?
+
+While a **task** describes the overall goal (e.g., "Pick up the apple and place it in the basket"), **subtasks** break down the execution into finer-grained steps:
+
+1. "Approach the apple"
+2. "Grasp the apple"
+3. "Lift the apple"
+4. "Move to basket"
+5. "Release the apple"
+
+Each frame in the dataset can be annotated with its corresponding subtask, enabling models to learn and predict these intermediate stages.
+
+
+
+
+ Figure: Overview of subtask annotation.
+
+
+**Reference:** _Subtask-learning based for robot self-assembly in flexible collaborative assembly in manufacturing_, Original Article, Published: 19 April 2022.
+
+## Dataset Structure
+
+Subtask information is stored in the dataset metadata:
+
+```
+my-dataset/
+├── data/
+│ └── ...
+├── meta/
+│ ├── info.json
+│ ├── stats.json
+│ ├── tasks.parquet
+│ ├── subtasks.parquet # Subtask index → subtask string mapping
+│ └── episodes/
+│ └── ...
+└── videos/
+ └── ...
+```
+
+### Subtasks Parquet File
+
+The `meta/subtasks.parquet` file maps subtask indices to their natural language descriptions:
+
+| subtask_index | subtask (index column) |
+| ------------- | ---------------------- |
+| 0 | "Approach the apple" |
+| 1 | "Grasp the apple" |
+| 2 | "Lift the apple" |
+| ... | ... |
+
+### Frame-Level Annotations
+
+Each frame in the dataset can include a `subtask_index` field that references the subtasks parquet file:
+
+```python
+# Example frame data in the parquet file
+{
+ "index": 42,
+ "timestamp": 1.4,
+ "episode_index": 0,
+ "task_index": 0,
+ "subtask_index": 2, # References "Lift the apple"
+ "observation.state": [...],
+ "action": [...],
+}
+```
+
+## Annotating Datasets with Subtasks
+
+We provide a HuggingFace Space for easily annotating any LeRobotDataset with subtasks:
+
+**[https://huggingface.co/spaces/lerobot/annotate](https://huggingface.co/spaces/lerobot/annotate)**
+
+After completing your annotation:
+
+1. Click "Push to Hub" to upload your annotated dataset
+2. You can also run the annotation space locally by following the instructions at [github.com/huggingface/lerobot-annotate](https://github.com/huggingface/lerobot-annotate)
+
+## Loading Datasets with Subtasks
+
+When you load a dataset with subtask annotations, the subtask information is automatically available:
+
+```python
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+
+# Load a dataset with subtask annotations
+dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated")
+
+# Access a sample
+sample = dataset[100]
+
+# The sample includes both task and subtask information
+print(sample["task"]) # "Collect the fruit"
+print(sample["subtask"]) # "Grasp the apple"
+print(sample["task_index"]) # tensor(0)
+print(sample["subtask_index"]) # tensor(2)
+```
+
+### Checking for Subtask Support
+
+You can check if a dataset has subtask annotations:
+
+```python
+# Check if subtasks are available
+has_subtasks = (
+ "subtask_index" in dataset.features
+ and dataset.meta.subtasks is not None
+)
+
+if has_subtasks:
+ print(f"Dataset has {len(dataset.meta.subtasks)} unique subtasks")
+ print("Subtasks:", list(dataset.meta.subtasks.index))
+```
+
+## Using Subtasks for Training
+
+### With the Tokenizer Processor
+
+The `TokenizerProcessor` automatically handles subtask tokenization for Vision-Language Action (VLA) models:
+
+```python
+from lerobot.processor.tokenizer_processor import TokenizerProcessor
+from lerobot.processor.pipeline import ProcessorPipeline
+
+# Create a tokenizer processor
+tokenizer_processor = TokenizerProcessor(
+ tokenizer_name_or_path="google/paligemma-3b-pt-224",
+ padding="max_length",
+ max_length=64,
+)
+
+# The processor will automatically tokenize subtasks if present in the batch
+# and add them to the observation under:
+# - "observation.subtask.tokens"
+# - "observation.subtask.attention_mask"
+```
+
+When subtasks are available in the batch, the tokenizer processor adds:
+
+- `observation.subtask.tokens`: Tokenized subtask text
+- `observation.subtask.attention_mask`: Attention mask for the subtask tokens
+
+### DataLoader with Subtasks
+
+```python
+import torch
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+
+dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated")
+
+dataloader = torch.utils.data.DataLoader(
+ dataset,
+ batch_size=16,
+ shuffle=True,
+)
+
+for batch in dataloader:
+ # Access subtask information in the batch
+ subtasks = batch["subtask"] # List of subtask strings
+ subtask_indices = batch["subtask_index"] # Tensor of subtask indices
+
+ # Use for training hierarchical policies or reward models
+ print(f"Batch subtasks: {set(subtasks)}")
+```
+
+## Example Datasets with Subtask Annotations
+
+Try loading a dataset with subtask annotations:
+
+```python
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+
+# Example dataset with subtask annotations
+dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated")
+
+# Explore the subtasks
+print("Available subtasks:")
+for subtask_name in dataset.meta.subtasks.index:
+ print(f" - {subtask_name}")
+
+# Get subtask distribution
+subtask_counts = {}
+for i in range(len(dataset)):
+ sample = dataset[i]
+ subtask = sample["subtask"]
+ subtask_counts[subtask] = subtask_counts.get(subtask, 0) + 1
+
+print("\nSubtask distribution:")
+for subtask, count in sorted(subtask_counts.items(), key=lambda x: -x[1]):
+ print(f" {subtask}: {count} frames")
+```
+
+## Use Cases
+
+### 1. Hierarchical Policy Training
+
+Train policies that predict both actions and current subtask:
+
+```python
+class HierarchicalPolicy(nn.Module):
+ def __init__(self, num_subtasks):
+ super().__init__()
+ self.action_head = nn.Linear(hidden_dim, action_dim)
+ self.subtask_head = nn.Linear(hidden_dim, num_subtasks)
+
+ def forward(self, observations):
+ features = self.encoder(observations)
+ actions = self.action_head(features)
+ subtask_logits = self.subtask_head(features)
+ return actions, subtask_logits
+```
+
+### 2. Stage-Aware Reward Modeling (SARM)
+
+Build reward models that understand task progression:
+
+```python
+# SARM predicts:
+# - Stage: Which subtask is being executed (discrete)
+# - Progress: How far along the subtask (continuous 0-1)
+
+class SARMRewardModel(nn.Module):
+ def forward(self, observations):
+ features = self.encoder(observations)
+ stage_logits = self.stage_classifier(features)
+ progress = self.progress_regressor(features)
+ return stage_logits, progress
+```
+
+### 3. Progress Visualization
+
+Monitor robot execution by tracking subtask progression:
+
+```python
+def visualize_execution(model, observations):
+ for t, obs in enumerate(observations):
+ action, subtask_logits = model(obs)
+ predicted_subtask = subtask_names[subtask_logits.argmax()]
+ print(f"t={t}: Executing '{predicted_subtask}'")
+```
+
+## API Reference
+
+### LeRobotDataset Properties
+
+| Property | Type | Description |
+| --------------------------- | ---------------------- | ------------------------------------------ |
+| `meta.subtasks` | `pd.DataFrame \| None` | DataFrame mapping subtask names to indices |
+| `features["subtask_index"]` | `dict` | Feature spec for subtask_index if present |
+
+### Sample Keys
+
+When subtasks are available, each sample includes:
+
+| Key | Type | Description |
+| --------------- | -------------- | ------------------------------------ |
+| `subtask_index` | `torch.Tensor` | Integer index of the current subtask |
+| `subtask` | `str` | Natural language subtask description |
+
+## Related Resources
+
+- [SARM Paper](https://arxiv.org/pdf/2509.25358) - Stage-Aware Reward Modeling for Long Horizon Robot Manipulation
+- [LeRobot Annotate Space](https://huggingface.co/spaces/lerobot/annotate) - Interactive annotation tool
+- [LeRobotDataset v3.0](./lerobot-dataset-v3) - Dataset format documentation
diff --git a/docs/source/earthrover_mini_plus.mdx b/docs/source/earthrover_mini_plus.mdx
index e3ffa6b32..884e84d8c 100644
--- a/docs/source/earthrover_mini_plus.mdx
+++ b/docs/source/earthrover_mini_plus.mdx
@@ -1,5 +1,11 @@
# EarthRover Mini Plus
+
+
The EarthRover Mini Plus is a fully open source mobile robot that connects through the cloud using the Frodobots SDK. This lets you control the robot and record datasets for training AI models.
## What You Need
@@ -7,7 +13,7 @@ The EarthRover Mini Plus is a fully open source mobile robot that connects throu
### Hardware
- EarthRover Mini robot
-- Computer with Python 3.10 or newer
+- Computer with Python 3.12 or newer
- Internet connection
### Setting Up the Frodobots SDK
@@ -164,13 +170,13 @@ Once you can drive the robot well, you can start recording data to train AI mode
We use Hugging Face to store your data online. First, log in with your token from [Hugging Face settings](https://huggingface.co/settings/tokens):
```bash
-huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
+hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
```
Store your Hugging Face username:
```bash
-HF_USER=$(huggingface-cli whoami | head -n 1)
+HF_USER=$(hf auth whoami | awk -F': *' 'NR==1 {print $2}')
echo $HF_USER
```
@@ -179,13 +185,16 @@ echo $HF_USER
Use the standard recording command:
```bash
-python src/lerobot/scripts/lerobot_record.py \
+lerobot-record \
--robot.type=earthrover_mini_plus \
--teleop.type=keyboard_rover \
--dataset.repo_id=your_username/dataset_name \
--dataset.num_episodes=2 \
--dataset.fps=10 \
--dataset.single_task="Navigate around obstacles" \
+ --dataset.streaming_encoding=true \
+ --dataset.encoder_threads=2 \
+ # --dataset.vcodec=auto \
--display_data=true
```
@@ -195,22 +204,26 @@ Replace `your_username/dataset_name` with your Hugging Face username and a name
Your dataset includes:
-**Your Actions (2 things)**:
+**Your Actions (2 features)**:
-- How much you moved forward/backward
-- How much you turned left/right
+- `linear_velocity`: How much you moved forward/backward
+- `angular_velocity`: How much you turned left/right
-**Robot Observations (12 things)**:
+**Robot Observations (24 features)**:
- Front camera video
- Rear camera video
- Current speed
- Battery level
-- Which way the robot is facing
-- GPS location (latitude, longitude, signal strength)
+- Orientation
+- GPS (latitude, longitude, signal strength)
- Network signal strength
- Vibration level
-- Lamp status (on/off)
+- Lamp state (on/off)
+- Accelerometer (x, y, z)
+- Gyroscope (x, y, z)
+- Magnetometer (x, y, z)
+- Wheel RPMs (4 wheels)
### Where Your Data Goes
diff --git a/docs/source/envhub.mdx b/docs/source/envhub.mdx
index df103d0dd..36c08a8b3 100644
--- a/docs/source/envhub.mdx
+++ b/docs/source/envhub.mdx
@@ -155,10 +155,10 @@ Upload your repository to Hugging Face:
pip install huggingface_hub
# Login to Hugging Face
-huggingface-cli login
+hf auth login
# Create a new repository
-huggingface-cli repo create my-custom-env --type space --org my-org
+hf repo create my-org/my-custom-env
# Initialize git and push
git init
diff --git a/docs/source/groot.mdx b/docs/source/groot.mdx
index 8bfc22996..0ef591466 100644
--- a/docs/source/groot.mdx
+++ b/docs/source/groot.mdx
@@ -120,9 +120,12 @@ lerobot-record \
--display_data=true \
--dataset.repo_id=/eval_groot-bimanual \
--dataset.num_episodes=10 \
- --dataset.single_task="Grab and handover the red cube to the other arm"
- --policy.path=/groot-bimanual # your trained model
- --dataset.episode_time_s=30
+ --dataset.single_task="Grab and handover the red cube to the other arm" \
+ --dataset.streaming_encoding=true \
+ --dataset.encoder_threads=2 \
+ # --dataset.vcodec=auto \
+ --policy.path=/groot-bimanual \ # your trained model
+ --dataset.episode_time_s=30 \
--dataset.reset_time_s=10
```
diff --git a/docs/source/hope_jr.mdx b/docs/source/hope_jr.mdx
index 856febb95..8826d9758 100644
--- a/docs/source/hope_jr.mdx
+++ b/docs/source/hope_jr.mdx
@@ -224,12 +224,15 @@ lerobot-record \
--teleop.port=/dev/tty.usbmodem1201 \
--teleop.id=right \
--teleop.side=right \
- --dataset.repo_id=nepyope/hand_record_test_with_video_data \
+ --dataset.repo_id=/hand_record_test_with_video_data \
--dataset.single_task="Hand recording test with video data" \
--dataset.num_episodes=1 \
--dataset.episode_time_s=5 \
--dataset.push_to_hub=true \
--dataset.private=true \
+ --dataset.streaming_encoding=true \
+ --dataset.encoder_threads=2 \
+ # --dataset.vcodec=auto \
--display_data=true
```
@@ -241,7 +244,7 @@ lerobot-replay \
--robot.port=/dev/tty.usbmodem58760432281 \
--robot.id=right \
--robot.side=right \
- --dataset.repo_id=nepyope/hand_record_test_with_camera \
+ --dataset.repo_id=/hand_record_test_with_camera \
--dataset.episode=0
```
@@ -249,13 +252,13 @@ lerobot-replay \
```bash
lerobot-train \
- --dataset.repo_id=nepyope/hand_record_test_with_video_data \
+ --dataset.repo_id=/hand_record_test_with_video_data \
--policy.type=act \
--output_dir=outputs/train/hopejr_hand \
--job_name=hopejr \
--policy.device=mps \
--wandb.enable=true \
- --policy.repo_id=nepyope/hand_test_policy
+ --policy.repo_id=/hand_test_policy
```
### Evaluate
@@ -270,8 +273,11 @@ lerobot-record \
--robot.side=right \
--robot.cameras='{"main": {"type": "opencv", "index_or_path": 0, "width": 640, "height": 480, "fps": 30}}' \
--display_data=false \
- --dataset.repo_id=nepyope/eval_hopejr \
+ --dataset.repo_id=/eval_hopejr \
--dataset.single_task="Evaluate hopejr hand policy" \
--dataset.num_episodes=10 \
+ --dataset.streaming_encoding=true \
+ --dataset.encoder_threads=2 \
+ # --dataset.vcodec=auto \
--policy.path=outputs/train/hopejr_hand/checkpoints/last/pretrained_model
```
diff --git a/docs/source/il_robots.mdx b/docs/source/il_robots.mdx
index 84dc6f2f6..8e50a2aec 100644
--- a/docs/source/il_robots.mdx
+++ b/docs/source/il_robots.mdx
@@ -159,13 +159,13 @@ We use the Hugging Face hub features for uploading your dataset. If you haven't
Add your token to the CLI by running this command:
```bash
-huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
+hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
```
Then store your Hugging Face repository name in a variable:
```bash
-HF_USER=$(hf auth whoami | head -n 1)
+HF_USER=$(NO_COLOR=1 hf auth whoami | awk -F': *' 'NR==1 {print $2}')
echo $HF_USER
```
@@ -185,7 +185,10 @@ lerobot-record \
--display_data=true \
--dataset.repo_id=${HF_USER}/record-test \
--dataset.num_episodes=5 \
- --dataset.single_task="Grab the black cube"
+ --dataset.single_task="Grab the black cube" \
+ --dataset.streaming_encoding=true \
+ # --dataset.vcodec=auto \
+ --dataset.encoder_threads=2
```
@@ -324,7 +327,7 @@ You can look for other LeRobot datasets on the hub by searching for `LeRobot` [t
You can also push your local dataset to the Hub manually, running:
```bash
-huggingface-cli upload ${HF_USER}/record-test ~/.cache/huggingface/lerobot/{repo-id} --repo-type dataset
+hf upload ${HF_USER}/record-test ~/.cache/huggingface/lerobot/{repo-id} --repo-type dataset
```
#### Record function
@@ -421,7 +424,7 @@ robot = SO100Follower(robot_config)
robot.connect()
dataset = LeRobotDataset("/", episodes=[episode_idx])
-actions = dataset.hf_dataset.select_columns("action")
+actions = dataset.select_columns("action")
log_say(f"Replaying episode {episode_idx}")
for idx in range(dataset.num_frames):
@@ -488,7 +491,7 @@ If your local computer doesn't have a powerful GPU you could utilize Google Cola
Once training is done, upload the latest checkpoint with:
```bash
-huggingface-cli upload ${HF_USER}/act_so101_test \
+hf upload ${HF_USER}/act_so101_test \
outputs/train/act_so101_test/checkpoints/last/pretrained_model
```
@@ -496,7 +499,7 @@ You can also upload intermediate checkpoints with:
```bash
CKPT=010000
-huggingface-cli upload ${HF_USER}/act_so101_test${CKPT} \
+hf upload ${HF_USER}/act_so101_test${CKPT} \
outputs/train/act_so101_test/checkpoints/${CKPT}/pretrained_model
```
@@ -515,6 +518,9 @@ lerobot-record \
--display_data=false \
--dataset.repo_id=${HF_USER}/eval_so100 \
--dataset.single_task="Put lego brick into the transparent box" \
+ --dataset.streaming_encoding=true \
+ --dataset.encoder_threads=2 \
+ # --dataset.vcodec=auto \
# <- Teleop optional if you want to teleoperate in between episodes \
# --teleop.type=so100_leader \
# --teleop.port=/dev/ttyACM0 \
diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx
index 44d8c7034..80f705e88 100644
--- a/docs/source/installation.mdx
+++ b/docs/source/installation.mdx
@@ -1,30 +1,57 @@
# Installation
-## Install [`miniforge`](https://conda-forge.org/download/)
+This guide uses `conda` (via miniforge) to manage environments (recommended). If you prefer another environment manager (e.g. `uv`, `venv`), ensure you have Python >=3.12 and `ffmpeg` installed with the `libsvtav1` encoder, then skip ahead to [Environment Setup](#step-2-environment-setup).
+
+## Step 1 (`conda` only): Install [`miniforge`](https://conda-forge.org/download/)
```bash
wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
bash Miniforge3-$(uname)-$(uname -m).sh
```
-## Environment Setup
+## Step 2: Environment Setup
-Create a virtual environment with Python 3.10, using conda:
+Create a virtual environment with Python 3.12:
+
+
+
```bash
-conda create -y -n lerobot python=3.10
+conda create -y -n lerobot python=3.12
```
-
-Then activate your conda environment, you have to do this each time you open a shell to use lerobot:
-
+
+
```bash
+uv python install 3.12
+uv venv --python 3.12
+```
+
+
+
+
+Then activate your virtual environment, you have to do this each time you open a shell to use lerobot:
+
+
+
+```bash
conda activate lerobot
+```
+
+```bash
+# Linux/macOSsource
+source .venv/bin/activate
+# Windows PowerShell
+source .venv\Scripts\Activate.ps1
```
+
+
+
When using `conda`, install `ffmpeg` in your environment:
```bash
conda install ffmpeg -c conda-forge
+ffmpeg -version # ffmpeg 8.X is not yet supported !
```
> [!TIP]
@@ -38,7 +65,17 @@ conda install ffmpeg -c conda-forge
>
> - _[On Linux only]_ If you want to bring your own ffmpeg: Install [ffmpeg build dependencies](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#GettheDependencies) and [compile ffmpeg from source with libsvtav1](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#libsvtav1), and make sure you use the corresponding ffmpeg binary to your install with `which ffmpeg`.
-## Install LeRobot 🤗
+> [!NOTE]
+> When installing LeRobot inside WSL (Windows Subsystem for Linux), make sure to install `evdev` with the following command:
+>
+> ```bash
+> conda install evdev -c conda-forge
+> ```
+
+> [!IMPORTANT]
+> If you are using `uv` you will have to install `ffmpeg` system-wide (outside of the virtual environment). You rely on `uv` and `torchcodec` ability to dynamically link to the system `ffmpeg`.
+
+## Step 3: Install LeRobot 🤗
### From Source
@@ -51,23 +88,45 @@ cd lerobot
Then, install the library in editable mode. This is useful if you plan to contribute to the code.
+
+
+
```bash
pip install -e .
```
+
+
+```bash
+uv pip install -e .
+```
+
+
+
### Installation from PyPI
**Core Library:**
Install the base package with:
+
+
+
```bash
pip install lerobot
```
+
+
+```bash
+uv pip install lerobot
+```
+
+
+
_This installs only the default dependencies._
**Extra Features:**
-To install additional functionality, use one of the following:
+To install additional functionality, use one of the following (If you are using `uv`, replace `pip install` with `uv pip install` in the commands below.):
```bash
pip install 'lerobot[all]' # All available features
@@ -81,13 +140,10 @@ _Replace `[...]` with your desired features._
For a full list of optional dependencies, see:
https://pypi.org/project/lerobot/
-> [!NOTE]
-> For lerobot 0.4.0, if you want to install pi, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`
-
### Troubleshooting
If you encounter build errors, you may need to install additional dependencies: `cmake`, `build-essential`, and `ffmpeg libs`.
-To install these for linux run:
+To install these for Linux run:
```bash
sudo apt-get install cmake build-essential python3-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev
@@ -97,7 +153,7 @@ For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/
## Optional dependencies
-LeRobot provides optional extras for specific functionalities. Multiple extras can be combined (e.g., `.[aloha,feetech]`). For all available extras, refer to `pyproject.toml`.
+LeRobot provides optional extras for specific functionalities. Multiple extras can be combined (e.g., `.[aloha,feetech]`). For all available extras, refer to `pyproject.toml`. If you are using `uv`, replace `pip install` with `uv pip install` in the commands below.
### Simulations
diff --git a/docs/source/lekiwi.mdx b/docs/source/lekiwi.mdx
index 511521580..7e7c1a680 100644
--- a/docs/source/lekiwi.mdx
+++ b/docs/source/lekiwi.mdx
@@ -1,5 +1,11 @@
# LeKiwi
+
+
In the steps below, we explain how to assemble the LeKiwi mobile robot.
## Source the parts
@@ -273,13 +279,13 @@ We use the Hugging Face hub features for uploading your dataset. If you haven't
Add your token to the CLI by running this command:
```bash
-huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
+hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
```
Then store your Hugging Face repository name in a variable:
```bash
-HF_USER=$(huggingface-cli whoami | head -n 1)
+HF_USER=$(hf auth whoami | awk -F': *' 'NR==1 {print $2}')
echo $HF_USER
```
diff --git a/docs/source/lerobot-dataset-v3.mdx b/docs/source/lerobot-dataset-v3.mdx
index 3521914f2..235a355bd 100644
--- a/docs/source/lerobot-dataset-v3.mdx
+++ b/docs/source/lerobot-dataset-v3.mdx
@@ -41,7 +41,10 @@ lerobot-record \
--display_data=true \
--dataset.repo_id=${HF_USER}/record-test \
--dataset.num_episodes=5 \
- --dataset.single_task="Grab the black cube"
+ --dataset.single_task="Grab the black cube" \
+ --dataset.streaming_encoding=true \
+ # --dataset.vcodec=auto \
+ --dataset.encoder_threads=2
```
See the [recording guide](./il_robots#record-a-dataset) for more details.
diff --git a/docs/source/libero.mdx b/docs/source/libero.mdx
index 3617f3b25..def974531 100644
--- a/docs/source/libero.mdx
+++ b/docs/source/libero.mdx
@@ -42,6 +42,7 @@ lerobot-eval \
```
- `--env.task` picks the suite (`libero_object`, `libero_spatial`, etc.).
+- `--env.task_ids` picks task ids to run (`[0]`, `[1,2,3]`, etc.). Omit this flag (or set it to `null`) to run all tasks in the suite.
- `--eval.batch_size` controls how many environments run in parallel.
- `--eval.n_episodes` sets how many episodes to run in total.
diff --git a/docs/source/multi_task_dit.mdx b/docs/source/multi_task_dit.mdx
new file mode 100644
index 000000000..c3cced708
--- /dev/null
+++ b/docs/source/multi_task_dit.mdx
@@ -0,0 +1,340 @@
+# Multitask DiT Policy
+
+Multitask Diffusion Transformer (DiT) Policy is an evolution of the original Diffusion Policy architecture, which leverages a large DiT with text and vision conditioning for multitask robot learning. This implementation supports both diffusion and flow matching objectives for action generation, enabling robots to perform diverse manipulation tasks conditioned on language instructions.
+
+## Model Overview
+
+The model uses:
+
+- **CLIP Vision Encoder**: Processes RGB images from multiple camera views
+- **CLIP Text Encoder**: Encodes language task instructions (frozen weights with learnable projection)
+- **Diffusion Transformer**: Predicts action sequences conditioned on observations and language
+- **Two Objectives**: Supports both diffusion (DDPM/DDIM) and flow matching for action generation
+
+This model is exciting because you can achieve extremely high dexterity, competitive with multi-billion parameter
+VLAs, with only ~450M parameters and significantly less training.
+
+## Installation Requirements
+
+Multitask DiT Policy has additional dependencies. Install it with:
+
+```bash
+pip install lerobot[multi_task_dit]
+```
+
+This will install all necessary dependencies including the HuggingFace Transformers library for CLIP models.
+
+## Usage
+
+To use Multitask DiT in your LeRobot configuration, specify the policy type as:
+
+```python
+policy.type=multi_task_dit
+```
+
+## Training
+
+### Basic Training Command
+
+Here's a complete training command for training Multitask DiT on your dataset:
+
+```bash
+lerobot-train \
+ --dataset.repo_id=YOUR_DATASET \
+ --output_dir=./outputs/multitask_dit_training \
+ --batch_size=32 \
+ --steps=5000 \
+ --save_freq=500 \
+ --log_freq=100 \
+ --policy.type=multi_task_dit \
+ --policy.device=cuda \
+ --policy.repo_id="HF_USER/multitask-dit-your-robot" \
+ --wandb.enable=true
+```
+
+### Recommended Hyperparameters and Dataset Details (30Hz Control Frequency)
+
+For reliable performance, start with these suggested default hyperparameters:
+
+```bash
+lerobot-train \
+ --dataset.repo_id=YOUR_DATASET \
+ --output_dir=./outputs/mutitask_dit_training \
+ --batch_size=320 \
+ --steps=30000 \
+ --policy.type=multi_task_dit \
+ --policy.device=cuda \
+ --policy.horizon=32 \
+ --policy.n_action_steps=24 \
+ --policy.objective=diffusion \
+ --policy.noise_scheduler_type=DDPM \
+ --policy.num_train_timesteps=100 \
+ --policy.repo_id="HF_USER/multitask-dit-your-robot" \
+ --wandb.enable=true
+```
+
+**Key Parameters:**
+
+- **Batch Size**: 192-320 - If you have access to a GPU that can support this, you will get the best training dynamics
+- **Horizon**: 32 - number of action steps to predict, ~1.0 sec at 30Hz
+- **n_action_steps**: 24 - ~0.8 seconds at 30Hz
+- **Objective**: `diffusion` - start with diffusion and experiment with flow matching if generation quality is poor
+- **Training Steps**: >30k steps recommended for a single task
+
+### Training Configuration Parameters
+
+#### Objective Selection
+
+Choose between diffusion and flow matching:
+
+```bash
+# Diffusion objective (default)
+--policy.objective=diffusion \
+--policy.noise_scheduler_type=DDPM \ # or "DDIM"
+--policy.num_train_timesteps=100 \
+--policy.num_inference_steps=10 \ # For faster inference
+--policy.beta_schedule=squaredcos_cap_v2 \ # Noise schedule type
+--policy.prediction_type=epsilon \ # "epsilon" (predict noise) or "sample" (predict clean)
+--policy.clip_sample=true \ # Clip samples during denoising
+--policy.clip_sample_range=1.0 # Clipping range [-x, x]
+
+# Flow matching objective
+--policy.objective=flow_matching \
+--policy.timestep_sampling_strategy=beta \ # or "uniform" | the beta sampling strategy performance appears much better in practice
+--policy.num_integration_steps=100 \
+--policy.integration_method=euler \ # or "rk4"
+--policy.sigma_min=0.0 # Minimum noise in flow interpolation path
+```
+
+#### Transformer Architecture
+
+Adjust model capacity based on dataset size:
+
+```bash
+# Small datasets (< 100 examples)
+--policy.num_layers=4 \
+--policy.hidden_dim=512 \
+--policy.num_heads=8 # should ideally be hidden_dim // 64
+
+# Medium datasets (100-5k examples) - default
+--policy.num_layers=6 \
+--policy.hidden_dim=512 \
+--policy.num_heads=8 # should ideally be hidden_dim // 64
+
+# Large datasets (> 5k examples)
+--policy.num_layers=8 \
+--policy.hidden_dim=512 \
+--policy.num_heads=8 # should ideally be hidden_dim // 64
+```
+
+**Positional Encoding Options:**
+
+The model supports two positional encoding methods for action sequences:
+
+```bash
+# Rotary Position Embedding (RoPE) - default, recommended
+--policy.use_rope=true \
+--policy.rope_base=10000.0 # Base frequency for RoPE
+
+# Absolute positional encoding
+--policy.use_positional_encoding=true # Disables RoPE when true
+```
+
+**Other Transformer Parameters:**
+
+```bash
+--policy.dropout=0.1 # Dropout rate for DiT blocks (0.0-1.0)
+--policy.timestep_embed_dim=256 # Timestep embedding dimension
+```
+
+#### Vision Encoder Configuration
+
+```bash
+# Use different CLIP model for more expressivity at the cost of inference time
+# experiment with larger or smaller models depending on the complexity of your tasks and size of dataset
+--policy.vision_encoder_name=openai/clip-vit-large-patch14
+
+# Use separate vision encoder per camera
+# This may be useful when cameras have significantly different characteristics, but
+# be wary of increased VRAM footprint.
+--policy.use_separate_rgb_encoder_per_camera=true
+
+# Image preprocessing
+--policy.image_resize_shape=[XXX,YYY] \ # you may need to resize your images for inference speed ups
+--policy.image_crop_shape=[224,224] \
+--policy.image_crop_is_random=true # Random during training, center at inference
+```
+
+#### Text Encoder Configuration
+
+```bash
+# Use different CLIP text encoder model
+# same as vision: experiment with larger or smaller models depending on the
+# complexity of your tasks and size of dataset
+--policy.text_encoder_name=openai/clip-vit-large-patch14
+```
+
+#### Learning Rate Configuration
+
+The vision encoder uses a separate learning rate multiplier, where 1/10th is suggested to be the ideal staritng point:
+
+```bash
+--policy.optimizer_lr=2e-5 \
+--policy.vision_encoder_lr_multiplier=0.1 # Vision encoder LR = 0.1 * optimizer_lr
+```
+
+### Training Tuning Guidelines
+
+#### 1. Flow Matching with Beta Sampling
+
+The original diffusion implementation here is based on the work described in [TRI's LBM paper](https://arxiv.org/abs/2507.05331)
+
+Additionally, we have implemented a flow-matching objective, which is described at a high-level in [Boston Dynamics blog post](https://bostondynamics.com/blog/large-behavior-models-atlas-find-new-footing/).
+
+Consider testing the flow-matching objective and evaluating performance differences for your task:
+
+```bash
+--policy.objective=flow_matching \
+--policy.timestep_sampling_strategy=beta \
+--policy.timestep_sampling_alpha=1.5 \
+--policy.timestep_sampling_beta=1.0 \
+--policy.timestep_sampling_s=0.999
+```
+
+This hasn't been shown to be a silver bullet across every user case, but it occasionally results in smoother and more consistent actions.
+
+#### 2. Number of Transformer Layers
+
+Match model capacity to your dataset size:
+
+- **Small datasets** (< 100 examples): Reduce to 4 layers
+- **Large datasets** (> 5k examples): Increase to 8 layers
+
+#### 3. `horizon` Tuning
+
+The model can be sensitive to the horizon you choose. Start with around a 1 second horizon based on your control frequency:
+
+- **30 Hz frequency**: `horizon=30`
+- **10 Hz frequency**: `horizon=10`
+
+Then experiment with increasing from there. The horizon determines how far into the future the model predicts actions.
+
+#### 4. `n_action_steps` Sensitivity
+
+The model can also be very sensitive to `n_action_steps`. Start with it being around 0.8 seconds based on your control frequency and tune from there:
+
+- **Lower values**: More reactive but potentially less stable for long-horizon tasks
+- **Higher values**: Better for long-horizon execution but open-loop failures are limited in their recovery
+
+### Inference Tuning
+
+For faster inference, use DDIM with fewer sampling steps:
+
+```bash
+--policy.noise_scheduler_type=DDIM \
+--policy.num_inference_steps=10
+```
+
+### Resuming Training
+
+To resume training from a checkpoint:
+
+```bash
+lerobot-train \
+ --config_path=./outputs/mutitask_dit_training/checkpoints/last/pretrained_model/train_config.json \
+ --resume=true
+```
+
+The checkpoint directory should contain `model.safetensors` and `config.json` files (saved automatically during training). When resuming, the configuration is loaded from the checkpoint, so you don't need to specify other parameters.
+
+## Common Failure Modes and Debugging
+
+Training these models can be finicky. Here are common failure modes and debugging approaches:
+
+### Idling / No Motion
+
+The model may "collapse" during inference, resulting in static or no motion. This can occur when:
+
+1. **Insufficient training data**: If you only have 20-50 examples, try to roughly double your dataset size. Once you have above 300 examples, if you're still seeing this, the task may be too complex.
+
+2. **Multiple similar tasks**: When your dataset contains multiple similar tasks (e.g., picking up 2 different objects), the model may rely too heavily on language conditioning which might not be rich enough.
+
+**Debugging tips:**
+
+- Increase dataset size (double until you get to over 300 examples)
+- Train for longer, up to 100k steps, even when the loss flatlines
+- Check if the model is receiving proper language instructions or increase diversity of instruction
+
+### Executing the Wrong Task
+
+Sometimes the robot will completely ignore your instruction and perform some other task. This generally only happens if you have trained on multiple tasks.
+
+**Potential causes:**
+
+- Language instruction ambiguity
+- Insufficient task-specific training data
+- Model confusion between similar tasks in the multitask dataset
+
+**Debugging tips:**
+
+- Verify language instruction specificity, especially if descriptions are similar between multiple tasks
+- Check task distribution in your training dataset and add weighting to the failing/ignored task
+- Consider task-specific fine-tuning
+
+### Training Instability
+
+If training loss is unstable or diverging:
+
+- Try adjusting learning rate between `1e-5` and `3e-4`
+- Increase batch size if possible
+- Check that your dataset normalization is correct
+- Verify image preprocessing is working correctly
+
+## Performance Considerations
+
+### GPU Requirements
+
+- **Inference**: At least an RTX 5070 Ti (or equivalent GPU) is recommended for reasonable speed performance
+- **Training**: A GPU with enough VRAM to load batch sizes of >64 is ideal, which will vary depending on the number of image observations, etc
+
+### Batch Size Recommendations
+
+- **Minimum**: 64 (less than this may result in unstable training)
+- **Recommended**: 256-320 (best performance, requires larger GPU)
+
+## Example: Training on Custom Dataset
+
+Here's a complete example training on a custom dataset:
+
+```bash
+lerobot-train \
+ --dataset.repo_id=YOUR_DATASET \
+ --output_dir=./outputs/mutitask_dit_training \
+ --batch_size=320 \
+ --steps=30000 \
+ --save_freq=1000 \
+ --log_freq=100 \
+ --eval_freq=1000 \
+ --policy.type=multi_task_dit \
+ --policy.device=cuda \
+ --policy.horizon=32 \
+ --policy.n_action_steps=24 \
+ --policy.objective=diffusion \
+ --policy.noise_scheduler_type=DDPM \
+ --policy.num_layers=6 \
+ --policy.hidden_dim=512 \
+ --policy.vision_encoder_name=openai/clip-vit-base-patch16 \
+ --policy.image_resize_shape=[320,240] \
+ --policy.image_crop_shape=[224,224] \
+ --policy.repo_id="HF_USER/multitask-dit-your-robot" \
+ --wandb.enable=true \
+ --wandb.project=multitask_dit
+```
+
+## References
+
+For more details on the technical implementation and architecture, see:
+
+- [A Careful Examination of Large Behavior Models for Multitask Dexterous Manipulation](https://arxiv.org/abs/2507.05331)
+- [Large Behavior Models and Atlas Find New Footing](https://bostondynamics.com/blog/large-behavior-models-atlas-find-new-footing/)
+- [Dissecting and Open-Sourcing Multitask Diffusion Transformer Policy](https://brysonkjones.substack.com/p/dissecting-and-open-sourcing-multitask-diffusion-transformer-policy)
diff --git a/docs/source/omx.mdx b/docs/source/omx.mdx
new file mode 100644
index 000000000..4617ac7bd
--- /dev/null
+++ b/docs/source/omx.mdx
@@ -0,0 +1,197 @@
+## Order and Assemble the parts
+
+First, assemble the OMX hardware following the official assembly guide.
+
+OMX Assembly Guide: https://ai.robotis.com/omx/assembly_guide_omx.html
+
+OMX robots are shipped preconfigured from the factory. Motor IDs, communication parameters, and joint offsets are already set, so no additional motor setup or calibration is required before using LeRobot.
+
+## Install LeRobot 🤗
+
+To install LeRobot, follow our [Installation Guide](./installation)
+
+In addition to these instructions, you need to install the Dynamixel SDK:
+
+```bash
+pip install -e ".[dynamixel]"
+```
+
+## Connect the robot
+
+To find the port for each bus servo adapter, run this script:
+
+```bash
+lerobot-find-port
+```
+
+This command runs and when prompted, disconnect the USB cable from either the leader or follower arm and press Enter. The output will show 'The port of this MotorsBus is [port]'. This identifies the port for the disconnected arm. Repeat for the other arm to identify both ports.
+
+
+
+
+Example output on macOS:
+
+```
+Finding all available ports for the MotorBus.
+['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
+Remove the USB cable from your MotorsBus and press Enter when done.
+
+[...Disconnect corresponding leader or follower arm and press Enter...]
+
+The port of this MotorsBus is /dev/tty.usbmodem575E0032081
+Reconnect the USB cable.
+```
+
+Where the found port is: `/dev/tty.usbmodem575E0032081` corresponding to your leader or follower arm.
+
+
+
+
+On Linux, we strongly recommend using udev rules to assign persistent and human-readable device names to the OMX leader and follower arms. This avoids issues where device names such as ttyACM0 and ttyACM1 change when the robot is unplugged, replugged, or when the system is rebooted.
+
+#### 1. Find your device serial numbers
+
+You should have obtained the port numbers like ../../ttyACM? for the leader and follower using `lerobot-find-port`. You can match those results with the serial numbers using the `ls -l /dev/serial/by-id/` command.
+To create udev rules, you need the unique serial number for each OMX device. The easiest way is to list devices under:
+
+```bash
+ls -l /dev/serial/by-id/
+```
+
+You will see output similar to:
+
+```bash
+usb-ROBOTIS_OpenRB-150_228BDD7B503059384C2E3120FF0A2B19-if00 -> ../../ttyACM0
+usb-ROBOTIS_OpenRB-150_67E1ED68503059384C2E3120FF092234-if00 -> ../../ttyACM1
+```
+
+In each line, the serial number is the long string after `usb-ROBOTIS_OpenRB-150_` and before `-if00`.
+
+Follower serial: `228BDD7B503059384C2E3120FF0A2B19`
+
+Leader serial: `67E1ED68503059384C2E3120FF092234`
+
+#### 2. Create the udev rule
+
+Create a new udev rule file:
+
+```bash
+sudo nano /etc/udev/rules.d/99-omx.rules
+```
+
+Paste the following lines, replacing the serial numbers with the values you found above:
+
+```bash
+SUBSYSTEM=="tty", ATTRS{idVendor}=="0403", ATTRS{serial}=="228BDD7B503059384C2E3120FF0A2B19", SYMLINK+="omx_follower"
+SUBSYSTEM=="tty", ATTRS{idVendor}=="0403", ATTRS{serial}=="67E1ED68503059384C2E3120FF092234", SYMLINK+="omx_leader"
+```
+
+Save the file and reload udev rules:
+
+```bash
+sudo udevadm control --reload-rules
+sudo udevadm trigger
+```
+
+Now unplug and replug both devices once.
+
+#### 3. Verify the symlinks
+
+Check that the persistent device names exist:
+
+```bash
+ls -l /dev/omx_follower /dev/omx_leader
+```
+
+You should see them pointing to ttyACM\* devices:
+
+```bash
+/dev/omx_follower -> ttyACM*
+/dev/omx_leader -> ttyACM*
+```
+
+These names remain stable across reboots and reconnections.
+
+
+
+
+## Teleoperate
+
+After identifying the correct ports, you can directly teleoperate the follower arm using the leader arm.
+
+
+
+
+### Teleoperate without camera
+
+```bash
+lerobot-teleoperate \
+ --robot.type=omx_follower \
+ --robot.port= \
+ --robot.id=omx_follower_arm \
+ --teleop.type=omx_leader \
+ --teleop.port= \
+ --teleop.id=omx_leader_arm
+```
+
+During teleoperation, motions of the leader arm are mirrored in real time by the follower arm. OMX is already preconfigured, teleoperation can begin immediately without any calibration steps.
+
+### Teleoperate with camera
+
+You can also enable camera input during teleoperation by providing a camera configuration for the follower arm.
+
+```bash
+lerobot-teleoperate \
+ --robot.type=omx_follower \
+ --robot.port= \
+ --robot.id=omx_follower_arm \
+ --robot.cameras="{front: {type: opencv, index_or_path: '/dev/video0', width: 640, height: 480, fps: 30}}" \
+ --teleop.type=omx_leader \
+ --teleop.port= \
+ --teleop.id=omx_leader_arm \
+ --display_data=true
+```
+
+When the camera is enabled, the camera stream is displayed in real time and synchronized with the robot state. This setup is useful for visual monitoring and can be reused later for demonstration recording and imitation learning.
+
+
+
+
+### Teleoperate without camera
+
+```bash
+lerobot-teleoperate \
+ --robot.type=omx_follower \
+ --robot.port=/dev/omx_follower \
+ --robot.id=omx_follower_arm \
+ --teleop.type=omx_leader \
+ --teleop.port=/dev/omx_leader \
+ --teleop.id=omx_leader_arm
+```
+
+During teleoperation, motions of the leader arm are mirrored in real time by the follower arm. OMX is already preconfigured, teleoperation can begin immediately without any calibration steps.
+
+### Teleoperate with camera
+
+You can also enable camera input during teleoperation by providing a camera configuration for the follower arm.
+
+```bash
+lerobot-teleoperate \
+ --robot.type=omx_follower \
+ --robot.port=/dev/omx_follower \
+ --robot.id=omx_follower_arm \
+ --robot.cameras="{front: {type: opencv, index_or_path: '/dev/video0', width: 640, height: 480, fps: 30}}" \
+ --teleop.type=omx_leader \
+ --teleop.port=/dev/omx_leader \
+ --teleop.id=omx_leader_arm \
+ --display_data=true
+```
+
+When the camera is enabled, the camera stream is displayed in real time and synchronized with the robot state. This setup is useful for visual monitoring and can be reused later for demonstration recording and imitation learning.
+
+
+
+
+Congrats 🎉, your robot is all set to learn a task on its own.
+
+> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/robotis).
diff --git a/docs/source/openarm.mdx b/docs/source/openarm.mdx
new file mode 100644
index 000000000..cd4ace912
--- /dev/null
+++ b/docs/source/openarm.mdx
@@ -0,0 +1,276 @@
+# OpenArm
+
+[OpenArm](https://openarm.dev) is an open-source 7DOF humanoid arm designed for physical AI research and deployment.
+
+To get your OpenArm, assembled or DIY, and join the global community, browse verified and certified manufacturers worldwide at [openarm.dev](https://openarm.dev).
+
+## What's Unique?
+
+- **Human-Scale Design**: OpenArm is designed with human-like proportions, scaled for a person around 160-165cm tall. This provides an optimal balance between practical reach and manageable inertia for safe, responsive operation.
+
+- **Safety-First Architecture**: Built with QDD backdrivable motors and high compliance, OpenArm prioritizes safe human-robot interaction while maintaining practical payload capabilities (6.0kg peak / 4.1kg nominal) for real-world tasks.
+
+- **Built for Durability**: Critical structural components use aluminum and stainless steel construction, ensuring robust performance for repetitive data collection and continuous research use.
+
+- **Fully Accessible & Buildable**: Every component, from CNC parts and 3D-printed casings to electrical wiring is designed to be purchasable and buildable by individual researchers and labs, with complete fabrication data provided.
+
+- **Practical & Affordable**: At $6,500 USD for a complete bimanual system, OpenArm delivers research-grade capabilities at a fraction of traditional humanoid robot costs.
+
+## Platform Requirements
+
+
+ **Linux Only**: OpenArm currently only works on Linux. The CAN bus USB adapter
+ does not have macOS drivers and has not been tested on Windows.
+
+
+## Safety Guide
+
+Before operating OpenArm, please read the [official safety guide](https://docs.openarm.dev/getting-started/safety-guide). Key points:
+
+- **Secure installation**: Fasten the arm to a flat, stable surface with screws or clamps
+- **Safe distance**: Keep body parts and objects outside the range of motion during operation
+- **Protective equipment**: Always wear safety goggles; use additional PPE as needed
+- **Payload limits**: Do not exceed specified payload limits (6.0kg peak / 4.1kg nominal per arm)
+- **Emergency stop**: Know the location and operation of the emergency stop device
+- **Regular inspection**: Check for loose screws, damaged mechanical limits, unusual noises, and wiring damage
+
+## Hardware Setup
+
+Follow the official [OpenArm hardware documentation](https://docs.openarm.dev) for:
+
+- Bill of materials and sourcing
+- 3D printing instructions
+- Mechanical assembly
+- Electrical wiring
+
+The hardware repositories are available at [github.com/enactic/openarm](https://github.com/enactic/openarm).
+
+## CAN Bus Setup
+
+OpenArm uses CAN bus communication with Damiao motors. Once you have the CAN bus USB adapter plugged into your Linux PC, follow the [Damiao Motors and CAN Bus guide](./damiao) to configure the interface.
+
+Quick setup:
+
+```bash
+# Setup CAN interfaces
+lerobot-setup-can --mode=setup --interfaces=can0,can1
+
+# Test motor communication
+lerobot-setup-can --mode=test --interfaces=can0,can1
+```
+
+## Install LeRobot 🤗
+
+Follow our [Installation Guide](./installation), then install the Damiao motor support:
+
+```bash
+pip install -e ".[damiao]"
+```
+
+## Usage
+
+### Follower Arm (Robot)
+
+
+
+
+```bash
+lerobot-calibrate \
+ --robot.type=openarm_follower \
+ --robot.port=can0 \
+ --robot.side=right \
+ --robot.id=my_openarm_follower
+```
+
+
+
+
+```python
+from lerobot.robots.openarm_follower import OpenArmFollower, OpenArmFollowerConfig
+
+config = OpenArmFollowerConfig(
+ port="can0",
+ side="right", # or "left" for left arm
+ id="my_openarm_follower",
+)
+
+follower = OpenArmFollower(config)
+follower.connect()
+
+# Read current state
+obs = follower.get_observation()
+print(obs)
+
+# Send action (position in degrees)
+action = {
+ "joint_1.pos": 0.0,
+ "joint_2.pos": 0.0,
+ "joint_3.pos": 0.0,
+ "joint_4.pos": 45.0,
+ "joint_5.pos": 0.0,
+ "joint_6.pos": 0.0,
+ "joint_7.pos": 0.0,
+ "gripper.pos": 0.0,
+}
+follower.send_action(action)
+
+follower.disconnect()
+```
+
+
+
+
+### Leader Arm (Teleoperator)
+
+The leader arm is used for teleoperation - manually moving it to control the follower arm.
+
+
+
+
+```bash
+lerobot-calibrate \
+ --teleop.type=openarm_leader \
+ --teleop.port=can1 \
+ --teleop.id=my_openarm_leader
+```
+
+
+
+
+```python
+from lerobot.teleoperators.openarm_leader import OpenArmLeader, OpenArmLeaderConfig
+
+config = OpenArmLeaderConfig(
+ port="can1",
+ id="my_openarm_leader",
+ manual_control=True, # Disable torque for manual movement
+)
+
+leader = OpenArmLeader(config)
+leader.connect()
+
+# Read current position (as action to send to follower)
+action = leader.get_action()
+print(action)
+
+leader.disconnect()
+```
+
+
+
+
+### Teleoperation
+
+To teleoperate OpenArm with leader-follower control:
+
+```bash
+lerobot-teleoperate \
+ --robot.type=openarm_follower \
+ --robot.port=can0 \
+ --robot.side=right \
+ --robot.id=my_follower \
+ --teleop.type=openarm_leader \
+ --teleop.port=can1 \
+ --teleop.id=my_leader
+```
+
+### Bimanual Teleoperation
+
+To teleoperate a bimanual OpenArm setup with two leader and two follower arms:
+
+```bash
+lerobot-teleoperate \
+ --robot.type=bi_openarm_follower \
+ --robot.left_arm_config.port=can0 \
+ --robot.left_arm_config.side=left \
+ --robot.right_arm_config.port=can1 \
+ --robot.right_arm_config.side=right \
+ --robot.id=my_bimanual_follower \
+ --teleop.type=bi_openarm_leader \
+ --teleop.left_arm_config.port=can2 \
+ --teleop.right_arm_config.port=can3 \
+ --teleop.id=my_bimanual_leader
+```
+
+### Recording Data
+
+To record a dataset during teleoperation:
+
+```bash
+lerobot-record \
+ --robot.type=openarm_follower \
+ --robot.port=can0 \
+ --robot.side=right \
+ --robot.id=my_follower \
+ --teleop.type=openarm_leader \
+ --teleop.port=can1 \
+ --teleop.id=my_leader \
+ --repo-id=my_hf_username/my_openarm_dataset \
+ --fps=30 \
+ --num-episodes=10
+```
+
+## Configuration Options
+
+### Follower Configuration
+
+| Parameter | Default | Description |
+| --------------------- | --------- | ---------------------------------------------------------- |
+| `port` | - | CAN interface (e.g., `can0`) |
+| `side` | `None` | Arm side: `"left"`, `"right"`, or `None` for custom limits |
+| `use_can_fd` | `True` | Enable CAN FD for higher data rates |
+| `can_bitrate` | `1000000` | Nominal bitrate (1 Mbps) |
+| `can_data_bitrate` | `5000000` | CAN FD data bitrate (5 Mbps) |
+| `max_relative_target` | `None` | Safety limit for relative target positions |
+| `position_kp` | Per-joint | Position control proportional gains |
+| `position_kd` | Per-joint | Position control derivative gains |
+
+### Leader Configuration
+
+| Parameter | Default | Description |
+| ------------------ | --------- | ----------------------------------- |
+| `port` | - | CAN interface (e.g., `can1`) |
+| `manual_control` | `True` | Disable torque for manual movement |
+| `use_can_fd` | `True` | Enable CAN FD for higher data rates |
+| `can_bitrate` | `1000000` | Nominal bitrate (1 Mbps) |
+| `can_data_bitrate` | `5000000` | CAN FD data bitrate (5 Mbps) |
+
+## Motor Configuration
+
+OpenArm uses Damiao motors with the following default configuration:
+
+| Joint | Motor Type | Send ID | Recv ID |
+| --------------------------- | ---------- | ------- | ------- |
+| joint_1 (Shoulder pan) | DM8009 | 0x01 | 0x11 |
+| joint_2 (Shoulder lift) | DM8009 | 0x02 | 0x12 |
+| joint_3 (Shoulder rotation) | DM4340 | 0x03 | 0x13 |
+| joint_4 (Elbow flex) | DM4340 | 0x04 | 0x14 |
+| joint_5 (Wrist roll) | DM4310 | 0x05 | 0x15 |
+| joint_6 (Wrist pitch) | DM4310 | 0x06 | 0x16 |
+| joint_7 (Wrist rotation) | DM4310 | 0x07 | 0x17 |
+| gripper | DM4310 | 0x08 | 0x18 |
+
+## Troubleshooting
+
+### No Response from Motors
+
+1. Check power supply connections
+2. Verify CAN wiring (CAN-H, CAN-L, GND)
+3. Run diagnostics: `lerobot-setup-can --mode=test --interfaces=can0`
+4. See the [Damiao troubleshooting guide](./damiao#troubleshooting) for more details
+
+### CAN Interface Not Found
+
+Ensure the CAN interface is configured:
+
+```bash
+ip link show can0
+```
+
+## Resources
+
+- [OpenArm Website](https://openarm.dev)
+- [OpenArm Documentation](https://docs.openarm.dev)
+- [OpenArm GitHub](https://github.com/enactic/openarm)
+- [Safety Guide](https://docs.openarm.dev/getting-started/safety-guide)
+- [Damiao Motors and CAN Bus](./damiao)
diff --git a/docs/source/phone_teleop.mdx b/docs/source/phone_teleop.mdx
index 06e524975..678783e7b 100644
--- a/docs/source/phone_teleop.mdx
+++ b/docs/source/phone_teleop.mdx
@@ -66,12 +66,13 @@ Run on of the examples scripts to teleoperate, record a dataset, replay a datase
All scripts assume you configured your robot (e.g., SO-100 follower) and set the correct serial port.
-Additionally you need to **copy the urdf of the robot to the examples folder**. For the examples in this tutorial (Using SO100/SO101) it is highly recommended to use the urdf in the [SO-ARM100 repo](https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf)
+Additionally you need to **copy the URDF of the robot into the examples folder**. For the examples in this tutorial (using SO100/SO101), copy the `SO101` folder from the [SO-ARM100 repo](https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101) into the `examples/phone_to_so100/` directory, so that the URDF file path becomes `examples/phone_to_so100/SO101/so101_new_calib.urdf`.
- Run this example to teleoperate:
```bash
- python examples/phone_to_so100/teleoperate.py
+ cd examples/phone_to_so100
+ python teleoperate.py
```
After running the example:
@@ -84,19 +85,22 @@ Additionally you can customize mapping or safety limits by editing the processor
- Run this example to record a dataset, which saves absolute end effector observations and actions:
```bash
- python examples/phone_to_so100/record.py
+ cd examples/phone_to_so100
+ python record.py
```
- Run this example to replay recorded episodes:
```bash
- python examples/phone_to_so100/replay.py
+ cd examples/phone_to_so100
+ python replay.py
```
- Run this example to evaluate a pretrained policy:
```bash
- python examples/phone_to_so100/evaluate.py
+ cd examples/phone_to_so100
+ python evaluate.py
```
### Important pipeline steps and options
diff --git a/docs/source/pi0.mdx b/docs/source/pi0.mdx
index 93e0b4c88..980490163 100644
--- a/docs/source/pi0.mdx
+++ b/docs/source/pi0.mdx
@@ -34,11 +34,6 @@ As described by Physical Intelligence, while AI has achieved remarkable success
pip install -e ".[pi]"
```
- > [!NOTE]
- > For lerobot 0.4.0, if you want to install pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
- >
- > This will be solved in the next patch release
-
## Training Data and Capabilities
π₀ is trained on the largest robot interaction dataset to date, combining three key data sources:
@@ -60,7 +55,7 @@ policy.type=pi0
For training π₀, you can use the standard LeRobot training script with the appropriate configuration:
```bash
-python src/lerobot/scripts/lerobot_train.py \
+lerobot-train \
--dataset.repo_id=your_dataset \
--policy.type=pi0 \
--output_dir=./outputs/pi0_training \
@@ -96,6 +91,46 @@ python src/lerobot/scripts/lerobot_train.py \
**💡 Tip**: Setting `train_expert_only=true` freezes the VLM and trains only the action expert and projections, allowing finetuning with reduced memory usage.
+## Relative Actions
+
+By default, π₀ predicts absolute actions. You can enable **relative actions** so the model predicts offsets relative to the current robot state. This can improve training stability for certain setups.
+
+To use relative actions, first recompute your dataset stats in relative space via the CLI:
+
+```bash
+lerobot-edit-dataset \
+ --repo_id your_dataset \
+ --operation.type recompute_stats \
+ --operation.relative_action true \
+ --operation.chunk_size 50 \
+ --operation.relative_exclude_joints "['gripper']" \
+ --push_to_hub true
+```
+
+Or equivalently in Python:
+
+```python
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.dataset_tools import recompute_stats
+
+dataset = LeRobotDataset("your_dataset")
+recompute_stats(dataset, relative_action=True, chunk_size=50, relative_exclude_joints=["gripper"])
+dataset.push_to_hub()
+```
+
+The `chunk_size` should match your policy's `chunk_size` (default 50 for π₀). `relative_exclude_joints` lists joint names that should remain in absolute space (e.g. gripper commands). Use `--push_to_hub true` to upload the updated stats to the Hub.
+
+Then train with relative actions enabled:
+
+```bash
+lerobot-train \
+ --dataset.repo_id=your_dataset \
+ --policy.type=pi0 \
+ --policy.use_relative_actions=true \
+ --policy.relative_exclude_joints='["gripper"]' \
+ ...
+```
+
## License
This model follows the **Apache 2.0 License**, consistent with the original [OpenPI repository](https://github.com/Physical-Intelligence/openpi).
diff --git a/docs/source/pi05.mdx b/docs/source/pi05.mdx
index dbf118aa3..74e056efa 100644
--- a/docs/source/pi05.mdx
+++ b/docs/source/pi05.mdx
@@ -36,11 +36,6 @@ This diverse training mixture creates a "curriculum" that enables generalization
pip install -e ".[pi]"
```
- > [!NOTE]
- > For lerobot 0.4.0, if you want to install pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
- >
- > This will be solved in the next patch release
-
## Usage
To use π₀.₅ in your LeRobot configuration, specify the policy type as:
@@ -56,7 +51,7 @@ policy.type=pi05
Here's a complete training command for finetuning the base π₀.₅ model on your own dataset:
```bash
-python src/lerobot/scripts/lerobot_train.py\
+lerobot-train \
--dataset.repo_id=your_dataset \
--policy.type=pi05 \
--output_dir=./outputs/pi05_training \
@@ -102,6 +97,46 @@ python src/lerobot/datasets/v30/augment_dataset_quantile_stats.py \
Or train pi05 with this normalization mapping: `--policy.normalization_mapping='{"ACTION": "MEAN_STD", "STATE": "MEAN_STD", "VISUAL": "IDENTITY"}'`
+## Relative Actions
+
+By default, π₀.₅ predicts absolute actions. You can enable **relative actions** so the model predicts offsets relative to the current robot state. This can improve training stability for certain setups.
+
+To use relative actions, first recompute your dataset stats in relative space via the CLI:
+
+```bash
+lerobot-edit-dataset \
+ --repo_id your_dataset \
+ --operation.type recompute_stats \
+ --operation.relative_action true \
+ --operation.chunk_size 50 \
+ --operation.relative_exclude_joints "['gripper']" \
+ --push_to_hub true
+```
+
+Or equivalently in Python:
+
+```python
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.dataset_tools import recompute_stats
+
+dataset = LeRobotDataset("your_dataset")
+recompute_stats(dataset, relative_action=True, chunk_size=50, relative_exclude_joints=["gripper"])
+dataset.push_to_hub()
+```
+
+The `chunk_size` should match your policy's `chunk_size` (default 50 for π₀.₅). `relative_exclude_joints` lists joint names that should remain in absolute space (e.g. gripper commands). Use `--push_to_hub true` to upload the updated stats to the Hub.
+
+Then train with relative actions enabled:
+
+```bash
+lerobot-train \
+ --dataset.repo_id=your_dataset \
+ --policy.type=pi05 \
+ --policy.use_relative_actions=true \
+ --policy.relative_exclude_joints='["gripper"]' \
+ ...
+```
+
## Performance Results
### Libero Benchmark Results
diff --git a/docs/source/pi0fast.mdx b/docs/source/pi0fast.mdx
index c4230fa79..f7272acc5 100644
--- a/docs/source/pi0fast.mdx
+++ b/docs/source/pi0fast.mdx
@@ -43,16 +43,11 @@ This approach can transform **any existing VLM** into a VLA by training it to pr
pip install -e ".[pi]"
```
- > [!NOTE]
- > For lerobot 0.4.0, if you want to install the pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
- >
- > This will be solved in the next patch release
-
## Training a Custom FAST Tokenizer
You have two options for the FAST tokenizer:
-1. **Use the pre-trained tokenizer**: The `physical-intelligence/fast` tokenizer was trained on 1M+ real robot action sequences and works as a general-purpose tokenizer.
+1. **Use the pre-trained tokenizer**: The `lerobot/fast-action-tokenizer` tokenizer was trained on 1M+ real robot action sequences and works as a general-purpose tokenizer.
2. **Train your own tokenizer**: For maximum performance on your specific dataset, you can finetune the tokenizer on your own data.
@@ -114,15 +109,15 @@ lerobot-train \
### Key Training Parameters
-| Parameter | Description | Default |
-| -------------------------------------- | -------------------------------------------------- | ---------------------------- |
-| `--policy.gradient_checkpointing=true` | Reduces memory usage significantly during training | `false` |
-| `--policy.dtype=bfloat16` | Use mixed precision training for efficiency | `float32` |
-| `--policy.chunk_size` | Number of action steps to predict (action horizon) | `50` |
-| `--policy.n_action_steps` | Number of action steps to execute | `50` |
-| `--policy.max_action_tokens` | Maximum number of FAST tokens per action chunk | `256` |
-| `--policy.action_tokenizer_name` | FAST tokenizer to use | `physical-intelligence/fast` |
-| `--policy.compile_model=true` | Enable torch.compile for faster training | `false` |
+| Parameter | Description | Default |
+| -------------------------------------- | -------------------------------------------------- | ------------------------------- |
+| `--policy.gradient_checkpointing=true` | Reduces memory usage significantly during training | `false` |
+| `--policy.dtype=bfloat16` | Use mixed precision training for efficiency | `float32` |
+| `--policy.chunk_size` | Number of action steps to predict (action horizon) | `50` |
+| `--policy.n_action_steps` | Number of action steps to execute | `50` |
+| `--policy.max_action_tokens` | Maximum number of FAST tokens per action chunk | `256` |
+| `--policy.action_tokenizer_name` | FAST tokenizer to use | `lerobot/fast-action-tokenizer` |
+| `--policy.compile_model=true` | Enable torch.compile for faster training | `false` |
## Inference
diff --git a/docs/source/policy_multi_task_dit_README.md b/docs/source/policy_multi_task_dit_README.md
new file mode 100644
index 000000000..f24fa927e
--- /dev/null
+++ b/docs/source/policy_multi_task_dit_README.md
@@ -0,0 +1,37 @@
+# Multitask DiT Policy
+
+## Citation
+
+If you use this work, please cite the following works:
+
+```bibtex
+@misc{jones2025multitaskditpolicy,
+ author = {Bryson Jones},
+ title = {Dissecting and Open-Sourcing Multitask Diffusion Transformer Policy},
+ year = {2025},
+ url = {https://brysonkjones.substack.com/p/dissecting-and-open-sourcing-multitask-diffusion-transformer-policy},
+ note = {Blog post}
+}
+```
+
+```bibtex
+@misc{trilbmteam2025carefulexaminationlargebehaviormodels,
+ author = {TRI LBM Team},
+ title = {A Careful Examination of Large Behavior Models for Multitask Dexterous Manipulation},
+ year = {2025},
+ eprint = {arXiv:2507.05331},
+ archivePrefix = {arXiv},
+ primaryClass = {cs.RO},
+ url = {https://arxiv.org/abs/2507.05331}
+}
+```
+
+```bibtex
+@misc{bostondynamics2025largebehaviormodelsatlas,
+ author = {Boston Dynamics and TRI Research Team},
+ title = {Large Behavior Models and Atlas Find New Footing},
+ year = {2025},
+ url = {https://bostondynamics.com/blog/large-behavior-models-atlas-find-new-footing/},
+ note = {Blog post}
+}
+```
diff --git a/docs/source/reachy2.mdx b/docs/source/reachy2.mdx
index 51b09acd2..1b868711a 100644
--- a/docs/source/reachy2.mdx
+++ b/docs/source/reachy2.mdx
@@ -159,6 +159,9 @@ lerobot-record \
--dataset.fps=15 \
--dataset.push_to_hub=true \
--dataset.private=true \
+ --dataset.streaming_encoding=true \
+ --dataset.encoder_threads=2 \
+ # --dataset.vcodec=auto \
--display_data=true
```
@@ -198,6 +201,9 @@ lerobot-record \
--dataset.fps=15 \
--dataset.push_to_hub=true \
--dataset.private=true \
+ --dataset.streaming_encoding=true \
+ --dataset.encoder_threads=2 \
+ # --dataset.vcodec=auto \
--display_data=true
```
diff --git a/docs/source/rename_map.mdx b/docs/source/rename_map.mdx
new file mode 100644
index 000000000..6249faaca
--- /dev/null
+++ b/docs/source/rename_map.mdx
@@ -0,0 +1,114 @@
+# Rename Map and Empty Cameras
+
+When you train, evaluate, or record with a robot policy, your **dataset** or **environment** provides observations under one set of keys (e.g. `observation.images.front`, `observation.images.eagle`), while your **policy** expects another (e.g. `observation.images.image`, `observation.images.image2`). The **rename map** bridges that gap without changing the policy or data source.
+
+> **Scope:** The rename map only renames **observation** keys (images and state). Action keys are not affected.
+
+## Why observation keys don't always match
+
+Policies have a fixed set of **input feature names** baked into their pretrained config. For example:
+
+- [pi0fast-libero](https://huggingface.co/lerobot/pi0fast-libero) expects `observation.images.base_0_rgb` and `observation.images.left_wrist_0_rgb`.
+- [xvla-base](https://huggingface.co/lerobot/xvla-base) expects `observation.images.image`, `observation.images.image2`, and `observation.images.image3`.
+
+Your dataset might use different names entirely (e.g. `observation.images.front`, `observation.images.eagle`, `observation.images.glove`), and your eval environment might use yet another set. Rather than editing the policy config or renaming columns in the dataset, you pass a **rename map**: a JSON dictionary that maps source keys to the keys the policy expects. Renaming happens inside the preprocessor pipeline, so the policy always sees its expected keys.
+
+## Using the rename map
+
+Pass the mapping as a JSON string on the command line. The convention is always:
+
+```
+--rename_map='{"source_key": "policy_key", ...}'
+```
+
+where **source_key** is what the dataset or environment provides, and **policy_key** is what the policy expects.
+
+Only listed keys are renamed; everything else passes through unchanged. Order of entries doesn't matter.
+
+Supported policies: **PI0**, **PI05**, **PI0Fast**, **SmolVLA**, and **XVLA**.
+
+### Training
+
+Suppose you fine-tune [lerobot/xvla-base](https://huggingface.co/lerobot/xvla-base) on a dataset with images under `observation.images.front`, `observation.images.eagle`, and `observation.images.glove`. XVLA expects `observation.images.image`, `observation.images.image2`, and `observation.images.image3`:
+
+```bash
+lerobot-train \
+ --dataset.repo_id=YOUR_DATASET \
+ --output_dir=./outputs/xvla_training \
+ --job_name=xvla_training \
+ --policy.path="lerobot/xvla-base" \
+ --policy.repo_id="HF_USER/xvla-your-robot" \
+ --policy.dtype=bfloat16 \
+ --policy.action_mode=auto \
+ --steps=20000 \
+ --policy.device=cuda \
+ --policy.freeze_vision_encoder=false \
+ --policy.freeze_language_encoder=false \
+ --policy.train_policy_transformer=true \
+ --policy.train_soft_prompts=true \
+ --rename_map='{"observation.images.front": "observation.images.image", "observation.images.eagle": "observation.images.image2", "observation.images.glove": "observation.images.image3"}'
+```
+
+### Evaluation
+
+A policy that expects `observation.images.base_0_rgb` and `observation.images.left_wrist_0_rgb` (e.g. [pi0fast-libero](https://huggingface.co/lerobot/pi0fast-libero)), but the LIBERO environment returns `observation.images.image` and `observation.images.image2`:
+
+```bash
+lerobot-eval \
+ --policy.path=lerobot/pi0fast-libero \
+ --env.type=libero \
+ ... \
+ --rename_map='{"observation.images.image": "observation.images.base_0_rgb", "observation.images.image2": "observation.images.left_wrist_0_rgb"}'
+```
+
+### Recording
+
+`lerobot-record` also supports rename maps, nested under the dataset config:
+
+```bash
+lerobot-record \ # When running inference
+ --policy.path="/smolVLA_finetuned" \
+ ... \
+ --dataset.rename_map='{"observation.images.glove2": "observation.images.image"}'
+```
+
+## Alternative: edit the policy config directly
+
+If you always use the same dataset or environment, you can **edit the policy's `config.json`** so its observation keys match your data source. Then no rename map is needed.
+
+The tradeoff: modifying the policy config ties it to one data source. A rename map keeps one policy usable across many datasets and environments.
+
+## Empty cameras: fewer views than the policy expects
+
+Some policies are built for a fixed number of image inputs. If your dataset has fewer cameras, you can set **`empty_cameras`** in the policy config instead of modifying the model architecture.
+
+### How it works
+
+Setting `empty_cameras=N` adds N placeholder image features to the policy config, named:
+
+```
+observation.images.empty_camera_0
+observation.images.empty_camera_1
+...
+```
+
+At runtime, these keys have no corresponding data in the batch. The policy fills them with masked dummy tensors (padded with `-1` for SigLIP-based vision encoders, with a zero attention mask), so the extra image slots are effectively ignored during training and inference.
+
+### Example
+
+XVLA-base has three visual inputs and `empty_cameras=0` by default. Your dataset only has two cameras:
+
+1. Set `--policy.empty_cameras=1`.
+2. The config adds a third key: `observation.images.empty_camera_0`.
+3. Use the rename map for your two real cameras as usual.
+4. The third slot is masked out — no fake images needed in your dataset.
+
+## Quick reference
+
+| Goal | What to do |
+| ----------------------------------------- | --------------------------------------------------------------------------- |
+| Dataset keys ≠ policy keys | `--rename_map='{"dataset_key": "policy_key", ...}'` |
+| Env keys ≠ policy keys (eval) | `--rename_map='{"env_key": "policy_key", ...}'` |
+| Recording with different keys (inference) | `--dataset.rename_map='{"source_key": "policy_key", ...}'`. |
+| Fewer cameras than policy expects | `--policy.empty_cameras=N` (supported by PI0, PI05, PI0Fast, SmolVLA, XVLA) |
+| Avoid passing a rename map | Edit the policy's `config.json` so its keys match your data source |
diff --git a/docs/source/sarm.mdx b/docs/source/sarm.mdx
index 65e49792b..cd488fe1f 100644
--- a/docs/source/sarm.mdx
+++ b/docs/source/sarm.mdx
@@ -269,7 +269,7 @@ This generates visualizations showing video frames with subtask boundaries overl
Train with **no annotations** - uses linear progress from 0 to 1:
```bash
-python src/lerobot/scripts/lerobot_train.py \
+lerobot-train \
--dataset.repo_id=your-username/your-dataset \
--policy.type=sarm \
--policy.annotation_mode=single_stage \
@@ -288,7 +288,7 @@ python src/lerobot/scripts/lerobot_train.py \
Train with **dense annotations only** (sparse auto-generated):
```bash
-python src/lerobot/scripts/lerobot_train.py \
+lerobot-train \
--dataset.repo_id=your-username/your-dataset \
--policy.type=sarm \
--policy.annotation_mode=dense_only \
@@ -307,7 +307,7 @@ python src/lerobot/scripts/lerobot_train.py \
Train with **both sparse and dense annotations**:
```bash
-python src/lerobot/scripts/lerobot_train.py \
+lerobot-train \
--dataset.repo_id=your-username/your-dataset \
--policy.type=sarm \
--policy.annotation_mode=dual \
@@ -468,7 +468,7 @@ This script:
Once you have the progress file, train your policy with RA-BC weighting. The progress file is auto-detected from the dataset path (`sarm_progress.parquet`). Currently PI0, PI0.5 and SmolVLA are supported with RA-BC:
```bash
-python src/lerobot/scripts/lerobot_train.py \
+lerobot-train \
--dataset.repo_id=your-username/your-dataset \
--policy.type=pi0 \
--use_rabc=true \
diff --git a/docs/source/smolvla.mdx b/docs/source/smolvla.mdx
index a56298b5e..bf8a0d2f0 100644
--- a/docs/source/smolvla.mdx
+++ b/docs/source/smolvla.mdx
@@ -106,6 +106,9 @@ lerobot-record \
--dataset.repo_id=${HF_USER}/eval_DATASET_NAME_test \ # <- This will be the dataset name on HF Hub
--dataset.episode_time_s=50 \
--dataset.num_episodes=10 \
+ --dataset.streaming_encoding=true \
+ --dataset.encoder_threads=2 \
+ # --dataset.vcodec=auto \
# <- Teleop optional if you want to teleoperate in between episodes \
# --teleop.type=so100_leader \
# --teleop.port=/dev/ttyACM0 \
diff --git a/docs/source/so101.mdx b/docs/source/so101.mdx
index cf882b373..1274b8282 100644
--- a/docs/source/so101.mdx
+++ b/docs/source/so101.mdx
@@ -1,5 +1,18 @@
# SO-101
+
+

+

+
+
In the steps below, we explain how to assemble our flagship robot, the SO-101.
## Source the parts
@@ -223,10 +236,10 @@ It is advisable to install one 3-pin cable in the motor after placing them befor
### Joint 1
+- Install both motor horns. Secure the top horn with a M3x6mm screw. No screws are required for the bottom horn.
- Place the first motor into the base.
- Fasten the motor with 4 M2x6mm screws (smallest screws). Two from the top and two from the bottom.
- Slide over the first motor holder and fasten it using two M2x6mm screws (one on each side).
-- Install both motor horns, securing the top horn with a M3x6mm screw.
- Attach the shoulder part.
- Tighten the shoulder part with 4 M3x6mm screws on top and 4 M3x6mm screws on the bottom
- Add the shoulder motor holder.
@@ -242,9 +255,9 @@ It is advisable to install one 3-pin cable in the motor after placing them befor
### Joint 2
+- Install both motor horns. Secure the top horn with a M3x6mm screw. No screws are required for the bottom horn.
- Slide the second motor in from the top.
- Fasten the second motor with 4 M2x6mm screws.
-- Attach both motor horns to motor 2, again use the M3x6mm horn screw.
- Attach the upper arm with 4 M3x6mm screws on each side.
@@ -258,8 +271,8 @@ It is advisable to install one 3-pin cable in the motor after placing them befor
### Joint 3
-- Insert motor 3 and fasten using 4 M2x6mm screws
-- Attach both motor horns to motor 3 and secure one again with a M3x6mm horn screw.
+- Install both motor horns. Secure the top horn with a M3x6mm screw. No screws are required for the bottom horn.
+- Insert motor 3 and fasten using 4 M2x6mm screws.
- Connect the forearm to motor 3 using 4 M3x6mm screws on each side.
@@ -273,9 +286,10 @@ It is advisable to install one 3-pin cable in the motor after placing them befor
### Joint 4
+- Install both motor horns. Secure the top horn with a M3x6mm screw. No screws are required for the bottom horn.
- Slide over motor holder 4.
- Slide in motor 4.
-- Fasten motor 4 with 4 M2x6mm screws and attach its motor horns, use a M3x6mm horn screw.
+- Fasten motor 4 with 4 M2x6mm screws.