Compare commits

..

9 Commits

Author SHA1 Message Date
Pepijn a07ff640bb fix metadata info.json 2026-02-05 17:09:01 +01:00
Pepijn 0753415244 fix 2026-02-05 16:56:47 +01:00
Pepijn a054663e38 cleanup output from earlier runs 2026-02-05 16:54:16 +01:00
Pepijn aceb651e40 import logging 2026-02-05 16:15:53 +01:00
Pepijn 76a4529d29 fix bug 2026-02-05 16:03:45 +01:00
Pepijn 39e14c086c add push to hub 2026-02-05 15:20:57 +01:00
Pepijn 0af2029328 add slurm mirror dataset 2026-02-05 15:15:10 +01:00
Pepijn c027b2971c Merge branch 'main' into tmp/fold_training 2026-01-31 13:52:19 +01:00
Michel Aractingi 9cc203034e temp_training 2026-01-26 15:19:52 +00:00
353 changed files with 7143 additions and 20808 deletions
+1 -8
View File
@@ -44,7 +44,7 @@ permissions:
# Sets up the environment variables
env:
UV_VERSION: "0.8.0"
PYTHON_VERSION: "3.12"
PYTHON_VERSION: "3.10"
# Ensures that only the latest commit for a PR or branch is built, canceling older runs.
concurrency:
@@ -61,7 +61,6 @@ jobs:
MUJOCO_GL: egl
HF_HOME: /mnt/cache/.cache/huggingface
HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
steps:
- uses: actions/checkout@v6
with:
@@ -90,11 +89,5 @@ jobs:
- name: Install lerobot with test extras
run: uv sync --extra "test"
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
uv run hf auth whoami
- name: Run pytest
run: uv run pytest tests -vv --maxfail=10
+4 -21
View File
@@ -37,7 +37,7 @@ permissions:
# Sets up the environment variables
env:
UV_VERSION: "0.8.0"
PYTHON_VERSION: "3.12"
PYTHON_VERSION: "3.10"
DOCKER_IMAGE_NAME: huggingface/lerobot-gpu
# Ensures that only the latest action is built, canceling older runs.
@@ -60,7 +60,6 @@ jobs:
MUJOCO_GL: egl
HF_HOME: /mnt/cache/.cache/huggingface
HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
steps:
- uses: actions/checkout@v6
with:
@@ -88,12 +87,6 @@ jobs:
- name: Install lerobot with all extras
run: uv sync --extra all # TODO(Steven): Make flash-attn optional
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
uv run hf auth whoami
- name: Run pytest (all extras)
run: uv run pytest tests -vv --maxfail=10
@@ -108,11 +101,9 @@ jobs:
runs-on:
group: aws-general-8-plus
if: |
github.repository == 'huggingface/lerobot' && (
(github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && github.event.pull_request.head.repo.fork == false) ||
github.event_name == 'push' ||
github.event_name == 'workflow_dispatch'
)
(github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && github.event.pull_request.head.repo.fork == false) ||
github.event_name == 'push' ||
github.event_name == 'workflow_dispatch'
outputs:
image_tag: ${{ steps.set_tag.outputs.image_tag }}
env:
@@ -169,7 +160,6 @@ jobs:
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
TORCH_HOME: /home/user_lerobot/.cache/torch
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
container:
image: ${{ needs.build-and-push-docker.outputs.image_tag }} # zizmor: ignore[unpinned-images]
options: --gpus all --shm-size "16gb"
@@ -181,13 +171,6 @@ jobs:
shell: bash
working-directory: /lerobot
steps:
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
hf auth whoami
- name: Fix ptxas permissions
run: chmod +x /lerobot/.venv/lib/python3.12/site-packages/triton/backends/nvidia/bin/ptxas
- name: Run pytest on GPU
run: pytest tests -vv --maxfail=10
- name: Run end-to-end tests
+4 -20
View File
@@ -28,7 +28,7 @@ on:
# Sets up the environment variables
env:
UV_VERSION: "0.8.0"
PYTHON_VERSION: "3.12"
PYTHON_VERSION: "3.10"
DOCKER_IMAGE_NAME_CPU: huggingface/lerobot-cpu:latest
DOCKER_IMAGE_NAME_GPU: huggingface/lerobot-gpu:latest
@@ -119,7 +119,6 @@ jobs:
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
TORCH_HOME: /home/user_lerobot/.cache/torch
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
container:
image: ${{ needs.build-docker-cpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
options: --shm-size "16gb"
@@ -131,11 +130,6 @@ jobs:
shell: bash
working-directory: /lerobot
steps:
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
hf auth whoami
- name: Run pytest on CPU
run: pytest tests -vv --maxfail=10
- name: Run end-to-end tests
@@ -152,7 +146,6 @@ jobs:
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
TORCH_HOME: /home/user_lerobot/.cache/torch
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
container:
image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
options: --gpus all --shm-size "16gb"
@@ -164,11 +157,6 @@ jobs:
shell: bash
working-directory: /lerobot
steps:
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
hf auth whoami
- name: Run pytest on GPU
run: pytest tests -vv --maxfail=10
- name: Run end-to-end tests
@@ -186,7 +174,6 @@ jobs:
TORCH_HOME: /home/user_lerobot/.cache/torch
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
CUDA_VISIBLE_DEVICES: "0,1,2,3"
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
container:
image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
options: --gpus all --shm-size "16gb"
@@ -198,15 +185,12 @@ jobs:
shell: bash
working-directory: /lerobot
steps:
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
hf auth whoami
- name: Verify GPU availability
run: |
nvidia-smi
python -c "import torch; print(f'PyTorch CUDA available: {torch.cuda.is_available()}'); print(f'Number of GPUs: {torch.cuda.device_count()}')"
- name: Run multi-GPU training tests
run: pytest -vv tests/training/
# TODO(Steven): Investigate why motors tests are failing in multi-GPU setup
run: pytest tests -vv --maxfail=10 --ignore=tests/motors/
timeout-minutes: 10
+1 -1
View File
@@ -50,7 +50,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.12'
python-version: '3.10'
- name: Run pre-commit hooks
uses: pre-commit/action@v3.0.1 # zizmor: ignore[unpinned-uses]
+10 -2
View File
@@ -22,7 +22,7 @@ on:
# Sets up the environment variables
env:
UV_VERSION: "0.8.0"
PYTHON_VERSION: "3.12"
PYTHON_VERSION: "3.10"
jobs:
# This job builds the Python package and publishes it to PyPI
@@ -45,7 +45,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.12'
python-version: '3.10'
- name: Extract Version
id: extract_info
@@ -83,6 +83,14 @@ jobs:
exit 1
fi
- name: Remove Tags with Git dependencies
# TODO(Steven): Temporary patch to remove pi from PyPi 0.4.0 release due to its reliance on git dependencies.
run: |
echo "::info:: Checking for Git dependencies to remove from pyproject.toml..."
grep -E '@ git\+https|lerobot\[pi\]' pyproject.toml | sed 's/^/::warning:: Removing line: /' || true
sed -E -i '/@ git\+https|lerobot\[pi\]/d' pyproject.toml
echo "::info:: Git dependencies removed. Proceeding with build."
- name: Install build dependencies
run: python -m pip install build
+2 -14
View File
@@ -29,7 +29,7 @@ permissions:
# Sets up the environment variables
env:
UV_VERSION: "0.8.0"
PYTHON_VERSION: "3.12"
PYTHON_VERSION: "3.10"
DOCKER_IMAGE_NAME: huggingface/lerobot-gpu:unbound
# Ensures that only the latest action is built, canceling older runs.
@@ -48,7 +48,6 @@ jobs:
MUJOCO_GL: egl
HF_HOME: /mnt/cache/.cache/huggingface
HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
steps:
- uses: actions/checkout@v6
with:
@@ -80,11 +79,7 @@ jobs:
- name: Install lerobot with all extras
run: uv sync --extra all # TODO(Steven): Make flash-attn optional
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
uv run hf auth whoami
- name: Run pytest (all extras)
run: uv run pytest tests -vv
@@ -96,7 +91,6 @@ jobs:
name: Build and Push Docker
runs-on:
group: aws-general-8-plus
if: github.repository == 'huggingface/lerobot'
outputs:
image_tag: ${{ env.DOCKER_IMAGE_NAME }}
env:
@@ -142,7 +136,6 @@ jobs:
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
TORCH_HOME: /home/user_lerobot/.cache/torch
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
container:
image: ${{ needs.build-and-push-docker.outputs.image_tag }} # zizmor: ignore[unpinned-images]
options: --gpus all --shm-size "16gb"
@@ -154,11 +147,6 @@ jobs:
shell: bash
working-directory: /lerobot
steps:
- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
hf auth whoami
- name: Run pytest on GPU
run: pytest tests -vv
- name: Run end-to-end tests
+2
View File
@@ -173,5 +173,7 @@ outputs/
# Dev folders
.cache/*
*.stl
*.urdf
*.xml
*.part
+2 -2
View File
@@ -13,7 +13,7 @@
# limitations under the License.
default_language_version:
python: python3.12
python: python3.10
exclude: "tests/artifacts/.*\\.safetensors$"
@@ -55,7 +55,7 @@ repos:
rev: v3.21.0
hooks:
- id: pyupgrade
args: [--py312-plus]
args: [--py310-plus]
##### Markdown Quality #####
- repo: https://github.com/rbubley/mirrors-prettier
-25
View File
@@ -1,25 +0,0 @@
# AI Usage Policy
The LeRobot project welcomes contributions from everyone, and we have a few guidelines regarding AI usage to ensure high code quality, clear communication, and a healthy open-source ecosystem:
- **Please disclose significant AI assistance.** If you used AI tools (e.g., Copilot, Claude, Cursor, ChatGPT) to generate a substantial portion of your code or text, let us know in your PR description. Transparency helps us review your changes more effectively.
- **Own your code (The Human-in-the-Loop).** You must fully understand all the changes you are proposing. If you cannot explain what your AI-assisted code does or how it interacts with LeRobot's broader architecture, please take the time to learn and test it before submitting.
- **Keep issues and discussions focused.** You are welcome to use AI to help draft issues or PR descriptions, but please review and edit them carefully before posting. AI can often be overly verbose; trimming the noise and getting straight to the point helps our maintainers address your needs faster.
Our core maintainers also use AI tools to aid their workflows, but they do so while bringing deep contextual knowledge of the LeRobot codebase to validate the output. We ask all contributors to apply that same level of rigor.
## Remember the Human Maintainers
Please remember that LeRobot is maintained by a dedicated team of humans.
Every discussion, issue, and pull request is read and reviewed by real people. While AI tools can generate thousands of lines of code in seconds, reviewing that code still takes human time and energy. Submitting unverified or low-effort AI output puts an unfair burden on our maintainers.
Today, the quality of the AI output still heavily depends on the developer driving the tool. We ask that you respect our maintainers' time by thoroughly vetting, testing, and refining your submissions.
## AI is Welcome Here
LeRobot operates at the cutting edge of AI and robotics, and many of our maintainers actively embrace AI coding assistants as valuable productivity tools. We are a pro-AI project!
Our reason for having an AI policy is not an anti-AI stance. Rather, it exists to ensure that AI is used to enhance human contributions, not replace them with unverified noise. It's about how the tools are used, not the tools themselves.
We value the unique human insight you bring to the LeRobot community. Let AI empower your workflow, but always let your own judgment take the wheel.
+4 -4
View File
@@ -2,7 +2,7 @@
Everyone is welcome to contribute, and we value everybody's contribution. Code is not the only way to help the community. Answering questions, helping others, reaching out, and improving the documentation are immensely valuable.
Whichever way you choose to contribute, please be mindful to respect our [code of conduct](https://github.com/huggingface/lerobot/blob/main/CODE_OF_CONDUCT.md) and our [AI policy](https://github.com/huggingface/lerobot/blob/main/AI_POLICY.md).
Whichever way you choose to contribute, please be mindful to respect our [code of conduct](./CODE_OF_CONDUCT.md).
## Ways to Contribute
@@ -32,7 +32,7 @@ git remote add upstream https://github.com/huggingface/lerobot.git
### 2. Environment Installation
Please follow our [Installation Guide](https://huggingface.co/docs/lerobot/installation) for the environment setup & installation from source.
Please follow our [Installation Guide](./docs/source/installation.mdx) for the environment setup & installation from source.
## Running Tests & Quality Checks
@@ -75,8 +75,8 @@ pytest -sv tests/test_specific_feature.py
Use the templates for required fields and examples.
- **Issues:** Follow the [ticket template](https://github.com/huggingface/lerobot/blob/main/.github/ISSUE_TEMPLATE/bug-report.yml).
- **Pull requests:** Rebase on `upstream/main`, use a descriptive branch (don't work on `main`), run `pre-commit` and tests locally, and follow the [PR template](https://github.com/huggingface/lerobot/blob/main/.github/PULL_REQUEST_TEMPLATE.md).
- **Issues:** Follow the [ticket template](./.github/ISSUE_TEMPLATE/bug-report.yml).
- **Pull requests:** Rebase on `upstream/main`, use a descriptive branch (don't work on `main`), run `pre-commit` and tests locally, and follow the [PR template](./.github/PULL_REQUEST_TEMPLATE.md).
One member of the LeRobot team will then review your contribution.
-1
View File
@@ -1,3 +1,2 @@
include src/lerobot/templates/lerobot_modelcard_template.md
include src/lerobot/datasets/card_template.md
include src/lerobot/envs/metaworld_config.json
+7 -24
View File
@@ -100,11 +100,11 @@ lerobot-train \
--dataset.repo_id=lerobot/aloha_mobile_cabinet
```
| Category | Models |
| -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **Imitation Learning** | [ACT](./docs/source/policy_act_README.md), [Diffusion](./docs/source/policy_diffusion_README.md), [VQ-BeT](./docs/source/policy_vqbet_README.md), [Multitask DiT Policy](./docs/source/policy_multi_task_dit_README.md) |
| **Reinforcement Learning** | [HIL-SERL](./docs/source/hilserl.mdx), [TDMPC](./docs/source/policy_tdmpc_README.md) & QC-FQL (coming soon) |
| **VLAs Models** | [Pi0Fast](./docs/source/pi0fast.mdx), [Pi0.5](./docs/source/pi05.mdx), [GR00T N1.5](./docs/source/policy_groot_README.md), [SmolVLA](./docs/source/policy_smolvla_README.md), [XVLA](./docs/source/xvla.mdx) |
| Category | Models |
| -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| **Imitation Learning** | [ACT](./docs/source/policy_act_README.md), [Diffusion](./docs/source/policy_diffusion_README.md), [VQ-BeT](./docs/source/policy_vqbet_README.md) |
| **Reinforcement Learning** | [HIL-SERL](./docs/source/hilserl.mdx), [TDMPC](./docs/source/policy_tdmpc_README.md) & QC-FQL (coming soon) |
| **VLAs Models** | [Pi0Fast](./docs/source/pi0fast.mdx), [Pi0.5](./docs/source/pi05.mdx), [GR00T N1.5](./docs/source/policy_groot_README.md), [SmolVLA](./docs/source/policy_smolvla_README.md), [XVLA](./docs/source/xvla.mdx) |
Similarly to the hardware, you can easily implement your own policy & leverage LeRobot's data collection, training, and visualization tools, and share your model to the HF Hub
@@ -135,7 +135,7 @@ Learn how to implement your own simulation environment or benchmark and distribu
## Citation
If you use LeRobot in your project, please cite the GitHub repository to acknowledge the ongoing development and contributors:
If you use LeRobot in your research, please cite:
```bibtex
@misc{cadene2024lerobot,
@@ -146,26 +146,9 @@ If you use LeRobot in your project, please cite the GitHub repository to acknowl
}
```
If you are referencing our research or the academic paper, please also cite our ICLR publication:
<details>
<summary><b>ICLR 2026 Paper</b></summary>
```bibtex
@inproceedings{cadenelerobot,
title={LeRobot: An Open-Source Library for End-to-End Robot Learning},
author={Cadene, Remi and Alibert, Simon and Capuano, Francesco and Aractingi, Michel and Zouitine, Adil and Kooijmans, Pepijn and Choghari, Jade and Russi, Martino and Pascal, Caroline and Palma, Steven and Shukor, Mustafa and Moss, Jess and Soare, Alexander and Aubakirova, Dana and Lhoest, Quentin and Gallou\'edec, Quentin and Wolf, Thomas},
booktitle={The Fourteenth International Conference on Learning Representations},
year={2026},
url={https://arxiv.org/abs/2602.22818}
}
```
</details>
## Contribute
We welcome contributions from everyone in the community! To get started, please read our [CONTRIBUTING.md](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md) guide. Whether you're adding a new feature, improving documentation, or fixing a bug, your help and feedback are invaluable. We're incredibly excited about the future of open-source robotics and can't wait to work with you on what's next—thank you for your support!
We welcome contributions from everyone in the community! To get started, please read our [CONTRIBUTING.md](./CONTRIBUTING.md) guide. Whether you're adding a new feature, improving documentation, or fixing a bug, your help and feedback are invaluable. We're incredibly excited about the future of open-source robotics and can't wait to work with you on what's next—thank you for your support!
<p align="center">
<img alt="SO101 Video" src="./media/readme/so100_video.webp" width="640px">
+42 -42
View File
@@ -28,9 +28,9 @@ We don't expect the same optimal settings for a dataset of images from a simulat
For these reasons, we run this benchmark on four representative datasets:
- `lerobot/pusht_image`: (96 x 96 pixels) simulation with simple geometric shapes, fixed camera.
- `lerobot/aloha_mobile_shrimp_image`: (480 x 640 pixels) real-world indoor, moving camera.
- `lerobot/paris_street`: (720 x 1280 pixels) real-world outdoor, moving camera.
- `lerobot/kitchen`: (1080 x 1920 pixels) real-world indoor, fixed camera.
- `aliberts/aloha_mobile_shrimp_image`: (480 x 640 pixels) real-world indoor, moving camera.
- `aliberts/paris_street`: (720 x 1280 pixels) real-world outdoor, moving camera.
- `aliberts/kitchen`: (1080 x 1920 pixels) real-world indoor, fixed camera.
Note: The datasets used for this benchmark need to be image datasets, not video datasets.
@@ -179,7 +179,7 @@ python benchmark/video/run_video_benchmark.py \
--output-dir outputs/video_benchmark \
--repo-ids \
lerobot/pusht_image \
lerobot/aloha_mobile_shrimp_image \
aliberts/aloha_mobile_shrimp_image \
--vcodec libx264 libx265 \
--pix-fmt yuv444p yuv420p \
--g 2 20 None \
@@ -203,9 +203,9 @@ python benchmark/video/run_video_benchmark.py \
--output-dir outputs/video_benchmark \
--repo-ids \
lerobot/pusht_image \
lerobot/aloha_mobile_shrimp_image \
lerobot/paris_street \
lerobot/kitchen \
aliberts/aloha_mobile_shrimp_image \
aliberts/paris_street \
aliberts/kitchen \
--vcodec libx264 libx265 \
--pix-fmt yuv444p yuv420p \
--g 1 2 3 4 5 6 10 15 20 40 None \
@@ -221,9 +221,9 @@ python benchmark/video/run_video_benchmark.py \
--output-dir outputs/video_benchmark \
--repo-ids \
lerobot/pusht_image \
lerobot/aloha_mobile_shrimp_image \
lerobot/paris_street \
lerobot/kitchen \
aliberts/aloha_mobile_shrimp_image \
aliberts/paris_street \
aliberts/kitchen \
--vcodec libsvtav1 \
--pix-fmt yuv420p \
--g 1 2 3 4 5 6 10 15 20 40 None \
@@ -252,37 +252,37 @@ Since we're using av1 encoding, we're choosing the `pyav` decoder as `video_read
These tables show the results for `g=2` and `crf=30`, using `timestamps-modes=6_frames` and `backend=pyav`
| video_images_size_ratio | vcodec | pix_fmt | | | |
| --------------------------------- | ---------- | ------- | --------- | --------- | --------- |
| | libx264 | | libx265 | | libsvtav1 |
| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
| lerobot/pusht_image | **16.97%** | 17.58% | 18.57% | 18.86% | 22.06% |
| lerobot/aloha_mobile_shrimp_image | 2.14% | 2.11% | 1.38% | **1.37%** | 5.59% |
| lerobot/paris_street | 2.12% | 2.13% | **1.54%** | **1.54%** | 4.43% |
| lerobot/kitchen | 1.40% | 1.39% | **1.00%** | **1.00%** | 2.52% |
| video_images_size_ratio | vcodec | pix_fmt | | | |
| ---------------------------------- | ---------- | ------- | --------- | --------- | --------- |
| | libx264 | | libx265 | | libsvtav1 |
| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
| lerobot/pusht_image | **16.97%** | 17.58% | 18.57% | 18.86% | 22.06% |
| aliberts/aloha_mobile_shrimp_image | 2.14% | 2.11% | 1.38% | **1.37%** | 5.59% |
| aliberts/paris_street | 2.12% | 2.13% | **1.54%** | **1.54%** | 4.43% |
| aliberts/kitchen | 1.40% | 1.39% | **1.00%** | **1.00%** | 2.52% |
| video_images_load_time_ratio | vcodec | pix_fmt | | | |
| --------------------------------- | ------- | ------- | -------- | ------- | --------- |
| | libx264 | | libx265 | | libsvtav1 |
| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
| lerobot/pusht_image | 6.45 | 5.19 | **1.90** | 2.12 | 2.47 |
| lerobot/aloha_mobile_shrimp_image | 11.80 | 7.92 | 0.71 | 0.85 | **0.48** |
| lerobot/paris_street | 2.21 | 2.05 | 0.36 | 0.49 | **0.30** |
| lerobot/kitchen | 1.46 | 1.46 | 0.28 | 0.51 | **0.26** |
| video_images_load_time_ratio | vcodec | pix_fmt | | | |
| ---------------------------------- | ------- | ------- | -------- | ------- | --------- |
| | libx264 | | libx265 | | libsvtav1 |
| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
| lerobot/pusht_image | 6.45 | 5.19 | **1.90** | 2.12 | 2.47 |
| aliberts/aloha_mobile_shrimp_image | 11.80 | 7.92 | 0.71 | 0.85 | **0.48** |
| aliberts/paris_street | 2.21 | 2.05 | 0.36 | 0.49 | **0.30** |
| aliberts/kitchen | 1.46 | 1.46 | 0.28 | 0.51 | **0.26** |
| | | vcodec | pix_fmt | | | |
| --------------------------------- | -------- | -------- | ------------ | -------- | --------- | ------------ |
| | | libx264 | | libx265 | | libsvtav1 |
| repo_id | metric | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
| lerobot/pusht_image | avg_mse | 2.90E-04 | **2.03E-04** | 3.13E-04 | 2.29E-04 | 2.19E-04 |
| | avg_psnr | 35.44 | 37.07 | 35.49 | **37.30** | 37.20 |
| | avg_ssim | 98.28% | **98.85%** | 98.31% | 98.84% | 98.72% |
| lerobot/aloha_mobile_shrimp_image | avg_mse | 2.76E-04 | 2.59E-04 | 3.17E-04 | 3.06E-04 | **1.30E-04** |
| | avg_psnr | 35.91 | 36.21 | 35.88 | 36.09 | **40.17** |
| | avg_ssim | 95.19% | 95.18% | 95.00% | 95.05% | **97.73%** |
| lerobot/paris_street | avg_mse | 6.89E-04 | 6.70E-04 | 4.03E-03 | 4.02E-03 | **3.09E-04** |
| | avg_psnr | 33.48 | 33.68 | 32.05 | 32.15 | **35.40** |
| | avg_ssim | 93.76% | 93.75% | 89.46% | 89.46% | **95.46%** |
| lerobot/kitchen | avg_mse | 2.50E-04 | 2.24E-04 | 4.28E-04 | 4.18E-04 | **1.53E-04** |
| | avg_psnr | 36.73 | 37.33 | 36.56 | 36.75 | **39.12** |
| | avg_ssim | 95.47% | 95.58% | 95.52% | 95.53% | **96.82%** |
| | | vcodec | pix_fmt | | | |
| ---------------------------------- | -------- | -------- | ------------ | -------- | --------- | ------------ |
| | | libx264 | | libx265 | | libsvtav1 |
| repo_id | metric | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
| lerobot/pusht_image | avg_mse | 2.90E-04 | **2.03E-04** | 3.13E-04 | 2.29E-04 | 2.19E-04 |
| | avg_psnr | 35.44 | 37.07 | 35.49 | **37.30** | 37.20 |
| | avg_ssim | 98.28% | **98.85%** | 98.31% | 98.84% | 98.72% |
| aliberts/aloha_mobile_shrimp_image | avg_mse | 2.76E-04 | 2.59E-04 | 3.17E-04 | 3.06E-04 | **1.30E-04** |
| | avg_psnr | 35.91 | 36.21 | 35.88 | 36.09 | **40.17** |
| | avg_ssim | 95.19% | 95.18% | 95.00% | 95.05% | **97.73%** |
| aliberts/paris_street | avg_mse | 6.89E-04 | 6.70E-04 | 4.03E-03 | 4.02E-03 | **3.09E-04** |
| | avg_psnr | 33.48 | 33.68 | 32.05 | 32.15 | **35.40** |
| | avg_ssim | 93.76% | 93.75% | 89.46% | 89.46% | **95.46%** |
| aliberts/kitchen | avg_mse | 2.50E-04 | 2.24E-04 | 4.28E-04 | 4.18E-04 | **1.53E-04** |
| | avg_psnr | 36.73 | 37.33 | 36.56 | 36.75 | **39.12** |
| | avg_ssim | 95.47% | 95.58% | 95.52% | 95.53% | **96.82%** |
+1 -3
View File
@@ -24,7 +24,7 @@ ARG OS_VERSION=22.04
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION}
# Define Python version argument
ARG PYTHON_VERSION=3.12
ARG PYTHON_VERSION=3.10
# Configure environment variables
ENV DEBIAN_FRONTEND=noninteractive \
@@ -85,8 +85,6 @@ RUN if [ "$UNBOUND_DEPS" = "true" ]; then \
RUN uv pip install --no-cache ".[all]"
RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas
# Copy the rest of the application source code
# Make sure to have the git-LFS files for testing
COPY --chown=user_lerobot:user_lerobot . .
+1 -3
View File
@@ -18,10 +18,8 @@
# docker build -f docker/Dockerfile.user -t lerobot-user .
# docker run -it --rm lerobot-user
# With USB physical access : docker run -it --device=/dev/ -v /dev/:/dev/ --rm lerobot-user
# Configure the base image
ARG PYTHON_VERSION=3.12
ARG PYTHON_VERSION=3.10
FROM python:${PYTHON_VERSION}-slim
# Configure environment variables
-10
View File
@@ -19,10 +19,6 @@
title: Multi GPU training
- local: peft_training
title: Training with PEFT (e.g., LoRA)
- local: rename_map
title: Using Rename Map and Empty Cameras
- local: umi_pi0_relative_ee
title: UMI Data with pi0 Relative EE Actions
title: "Tutorials"
- sections:
- local: lerobot-dataset-v3
@@ -33,8 +29,6 @@
title: Using the Dataset Tools
- local: dataset_subtask
title: Using Subtasks in the Dataset
- local: streaming_video_encoding
title: Streaming Video Encoding
title: "Datasets"
- sections:
- local: act
@@ -51,8 +45,6 @@
title: NVIDIA GR00T N1.5
- local: xvla
title: X-VLA
- local: multi_task_dit
title: Multitask DiT Policy
- local: walloss
title: WALL-OSS
title: "Policies"
@@ -89,8 +81,6 @@
title: Processors for Robots and Teleoperators
- local: env_processor
title: Environment Processors
- local: action_representations
title: Action Representations
title: "Robot Processors"
- sections:
- local: so101
-3
View File
@@ -88,8 +88,5 @@ lerobot-record \
--dataset.repo_id=${HF_USER}/eval_act_your_dataset \
--dataset.num_episodes=10 \
--dataset.single_task="Your task description" \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
--policy.path=${HF_USER}/act_policy
```
-238
View File
@@ -1,238 +0,0 @@
# Action Representations
This guide explains the different ways robot actions can be represented in LeRobot, how they relate to each other, and when to use each one.
## Joint Space vs End-Effector Space
Before discussing action representations, it helps to understand the two coordinate spaces actions can live in.
### Joint Space
Joint-space actions directly specify target positions for each motor. For a 6-DOF arm with a gripper, a joint-space action might look like:
```
action = [shoulder_pan: 45.0, shoulder_lift: -20.0, elbow: -30.0, wrist_pitch: 10.0, wrist_roll: 0.0, wrist_yaw: 5.0, gripper: 0.8]
```
Joint space is the default in LeRobot. It is simple, requires no kinematics model, and maps directly to motor commands. Most beginner setups (SO-100, Koch) use joint-space actions.
### End-Effector (EE) Space
End-effector-space actions specify the desired position and orientation of the robot's tool tip (gripper) in Cartesian coordinates:
```
action = [x: 0.25, y: -0.10, z: 0.15, wx: 0.0, wy: 0.0, wz: 0.1, gripper: 0.8]
```
EE space is more intuitive for tasks like pick-and-place because it directly describes where the gripper should go, but it requires a kinematics model (URDF) to convert between EE poses and joint angles.
### Converting Between Spaces
LeRobot provides processor steps for converting between joint and EE spaces using forward and inverse kinematics. These are built on top of `RobotKinematics`, which loads a URDF model of your robot.
```python
from lerobot.model.kinematics import RobotKinematics
from lerobot.robots.so_follower.robot_kinematic_processor import (
ForwardKinematicsJointsToEE,
InverseKinematicsEEToJoints,
)
kinematics = RobotKinematics(
urdf_path="./SO101/so101_new_calib.urdf",
target_frame_name="gripper_frame_link",
joint_names=["shoulder", "elbow", "wrist_pitch", "wrist_roll", "wrist_yaw"],
)
# Joints → EE (for observations: "where is my gripper?")
fk_step = ForwardKinematicsJointsToEE(kinematics=kinematics, motor_names=[...])
# EE → Joints (for actions: "move my gripper here")
ik_step = InverseKinematicsEEToJoints(kinematics=kinematics, motor_names=[...])
```
See [`examples/so100_to_so100_EE/`](https://github.com/huggingface/lerobot/tree/main/examples/so100_to_so100_EE) for a complete working example of recording, replaying, and evaluating with EE-space actions on an SO-100 arm.
## Absolute, Relative, and Delta Actions
Regardless of whether you work in joint space or EE space, the action values can be expressed in three different ways. The terminology follows [UMI (Chi et al., 2024)](https://arxiv.org/abs/2402.10329).
### Absolute Actions (LeRobot default)
Each action specifies the target position directly.
**Example** (joint space, chunk of 4):
```
current_state = [45.0, -30.0, 10.0]
action_chunk = [
[46.0, -29.0, 11.0], # go to 46, -29, 11
[47.5, -27.0, 12.0], # go to 47.5, -27, 12
[49.0, -25.0, 13.5], # go to 49, -25, 13.5
[50.0, -24.0, 15.0], # go to 50, -24, 15
]
```
Each value is a target position in the robot's coordinate frame. Simple and direct, but requires a consistent global coordinate frame. This is the default in LeRobot.
### Relative Actions (used by OpenPI / pi0)
Each action in the chunk is an offset from the **current state at the moment of prediction**. All actions in the chunk share the same reference point:
```
current_state = [45.0, -30.0, 10.0]
relative_chunk = [
[1.0, 1.0, 1.0], # +1 from current → target 46, -29, 11
[2.5, 3.0, 2.0], # +2.5 from current → target 47.5, -27, 12
[4.0, 5.0, 3.5], # +4 from current → target 49, -25, 13.5
[5.0, 6.0, 5.0], # +5 from current → target 50, -24, 15
]
```
The conversion is straightforward: `relative = absolute - current_state`. To recover absolute: `absolute = relative + current_state`.
**Why use relative actions?** The model learns to predict offsets centered around zero, which is easier to normalize and leads to more stable training. Because every chunk references the same current state, there is no error accumulation across chunks.
### Delta Actions (sequential differences)
Each action is an offset from the **previous action** (or from the current state for the first step):
```
current_state = [45.0, -30.0, 10.0]
delta_chunk = [
[1.0, 1.0, 1.0], # current → 46, -29, 11
[1.5, 2.0, 1.0], # previous action → 47.5, -27, 12
[1.5, 2.0, 1.5], # previous action → 49, -25, 13.5
[1.0, 1.0, 1.5], # previous action → 50, -24, 15
]
```
Here each step is relative to the one before it. To recover absolute positions you must sum all previous deltas, which means errors accumulate over time. UMI explicitly argues against this representation for this reason.
### Visual Comparison
The figure below (based on a figure from [UMI, Chi et al., 2024](https://arxiv.org/abs/2402.10329)) illustrates the key difference. With **relative trajectory**, every action in the chunk points back to the same origin (current state), so a new inference step cleanly resets the reference. With **delta**, each action depends on the previous one, so errors accumulate. **Absolute** actions require a consistent global coordinate frame.
<img
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/action_representations_umi.png"
alt="Relative Trajectory as Action Representation (UMI, Chi et al., 2024)"
width="85%"
/>
## Using Relative Actions in LeRobot
LeRobot provides `RelativeActionsProcessorStep` to convert between absolute and relative actions inside the processor pipeline. This is how pi0, pi0.5, and pi0_fast support relative actions.
> **Note:** All pi models (pi0, pi0.5, pi0*fast) apply relative conversion \_before* normalization (`relative → normalize`), so the normalizer always sees delta (relative) values. This means **relative action stats are required** for all of them when training with `use_relative_actions=true`. In pi0_fast the `RelativeActionsProcessorStep` only modifies the action — the state observation is unchanged — so `NormalizerProcessorStep` still runs before the state tokenizer and the tokenizer continues to receive normalized state as expected.
### How it works
During **training** (preprocessing), actions are converted from absolute to relative before the model sees them:
```
raw absolute action → RelativeActionsProcessorStep → normalize → model
```
During **inference** (postprocessing), model predictions are converted back to absolute before being sent to the robot:
```
model output → unnormalize → AbsoluteActionsProcessorStep → robot
```
The `AbsoluteActionsProcessorStep` reads the cached current state from its paired `RelativeActionsProcessorStep`, so the two must be wired together (handled automatically by the policy factory).
### Enabling relative actions for the pi family (pi0, pi0.5, pi0_fast)
**Step 1**: Precompute relative action statistics for your dataset:
```bash
lerobot-edit-dataset \
--repo_id your_dataset \
--operation.type recompute_stats \
--operation.relative_action true \
--operation.chunk_size 50 \
--operation.relative_exclude_joints "['gripper']"
```
**Step 2**: Train with relative actions enabled:
```bash
lerobot-train \
--dataset.repo_id=your_dataset \
--policy.type=pi0 \
--policy.use_relative_actions=true \
--policy.relative_exclude_joints='["gripper"]'
```
The `relative_exclude_joints` parameter specifies joints that should remain in absolute space. For example, gripper commands are typically binary (open/close) and don't benefit from relative encoding.
### Combining relative actions with RTC
[RTC](https://arxiv.org/abs/2506.07339) runs policy inference at high frequency and sends actions to the robot as they are predicted rather than waiting for a full chunk. Relative actions and RTC are fully compatible: because every chunk in relative mode references the **same** current state (captured at the start of inference), each predicted action in the chunk remains a valid offset even if the robot has already moved. No special handling is needed — `RelativeActionsProcessorStep` caches the state once per inference call and `AbsoluteActionsProcessorStep` applies it to every action in the streamed output.
### Combining relative actions with EE space
Relative actions work in both joint space and EE space. For example, if your dataset stores EE actions, relative encoding converts them to offsets from the current EE pose:
```
current_ee_state = [x: 0.25, y: -0.10, z: 0.15, gripper: 0.8]
absolute_ee_chunk = [
[0.26, -0.09, 0.16, 0.8],
[0.28, -0.07, 0.18, 0.8],
]
relative_ee_chunk = [
[0.01, 0.01, 0.01, 0.0], # offset from current EE pose
[0.03, 0.03, 0.03, 0.0], # offset from current EE pose
]
```
## Processing Pipeline Summary
Here is how the different processors compose. Each arrow is a processor step, and they can be chained in a `RobotProcessorPipeline` or `PolicyProcessorPipeline`:
```
┌─────────────────────────────────────────┐
Action Space │ Joint Space ←──IK──→ EE Space │
│ ForwardKinematicsJointsToEE │
│ InverseKinematicsEEToJoints │
└─────────────────────────────────────────┘
┌─────────────────────────────────────────┐
State Derivation │ Action column ────→ State + Action │
│ DeriveStateFromActionStep (pre only) │
│ (UMI-style: state from action chunk) │
└─────────────────────────────────────────┘
┌─────────────────────────────────────────┐
Action Repr. │ Absolute ←────→ Relative │
│ RelativeActionsProcessorStep (pre) │
│ AbsoluteActionsProcessorStep (post) │
└─────────────────────────────────────────┘
┌─────────────────────────────────────────┐
State Repr. │ Absolute ────→ Relative │
│ RelativeStateProcessorStep (pre only) │
└─────────────────────────────────────────┘
┌─────────────────────────────────────────┐
Normalization │ Raw ←────→ Normalized │
│ NormalizerProcessorStep (pre) │
│ UnnormalizerProcessorStep (post) │
└─────────────────────────────────────────┘
```
A typical training preprocessor might chain: `raw absolute joint actions → relative → normalize`. A typical inference postprocessor: `unnormalize → absolute → (optionally IK to joints)`.
With UMI-style relative proprioception (`use_relative_state=True`), the preprocessor also converts observation.state to offsets from the current timestep via `RelativeStateProcessorStep` before normalization. This is a pre-processing-only step (state is an input, not an output).
With `derive_state_from_action=True`, the preprocessor first runs `DeriveStateFromActionStep` to extract a 2-step state from the extended action chunk. This enables full UMI-style training without a separate `observation.state` column. See the [UMI pi0 guide](umi_pi0_relative_ee) for details.
## References
- [Universal Manipulation Interface (UMI)](https://arxiv.org/abs/2402.10329) - Chi et al., 2024. Defines the relative trajectory action representation and compares it with absolute and delta actions.
- [Introduction to Processors](./introduction_processors) - How processor pipelines work in LeRobot.
- [`examples/so100_to_so100_EE/`](https://github.com/huggingface/lerobot/tree/main/examples/so100_to_so100_EE) - Complete example of recording and evaluating with EE-space actions.
+2 -2
View File
@@ -48,7 +48,7 @@ python -m lerobot.async_inference.robot_client \
--task="dummy" \ # POLICY: The task to run the policy on (`Fold my t-shirt`). Not necessarily defined for all policies, such as `act`
--policy_type=your_policy_type \ # POLICY: the type of policy to run (smolvla, act, etc)
--pretrained_name_or_path=user/model \ # POLICY: the model name/path on server to the checkpoint to run (e.g., lerobot/smolvla_base)
--policy_device=mps \ # POLICY: the device to run the policy on, on the server (cuda, mps, xpu, cpu)
--policy_device=mps \ # POLICY: the device to run the policy on, on the server
--actions_per_chunk=50 \ # POLICY: the number of actions to output at once
--chunk_size_threshold=0.5 \ # CLIENT: the threshold for the chunk size before sending a new observation to the server
--aggregate_fn_name=weighted_average \ # CLIENT: the function to aggregate actions on overlapping portions
@@ -310,4 +310,4 @@ Asynchronous inference represents a significant advancement in real-time robotic
- **Universal Compatibility**: Works with all LeRobot-supported policies, from lightweight ACT models to vision-language models like SmolVLA
Start experimenting with the default parameters, monitor your action queue sizes, and iteratively refine your setup to achieve optimal performance for your specific use case.
If you want to discuss this further, hop into our [Discord community](https://discord.gg/s3KuuzsPFb), or open an issue on our [GitHub repository](https://github.com/huggingface/lerobot/issues).
If you want to discuss this further, hop into our [Discord community](https://discord.gg/s3KuuzsPFb), or open an issue on our [GitHub repository](https://github.com/lerobot/lerobot/issues).
+17 -89
View File
@@ -32,7 +32,7 @@ version = "0.1.0"
dependencies = [
# your policy-specific dependencies
]
requires-python = ">= 3.12"
requires-python = ">= 3.11"
[build-system]
build-backend = # your-build-backend
@@ -41,15 +41,13 @@ requires = # your-build-system
## Step 2: Define the Policy Configuration
Create a configuration class that inherits from [`PreTrainedConfig`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/configs/policies.py) and registers your policy type:
Here is a template to get you started, customize the parameters and methods as needed for your policy's architecture and training requirements.
Create a configuration class that inherits from `PreTrainedConfig` and registers your policy type:
```python
# configuration_my_custom_policy.py
from dataclasses import dataclass, field
from lerobot.configs.policies import PreTrainedConfig
from lerobot.optim.optimizers import AdamWConfig
from lerobot.optim.schedulers import CosineDecayWithWarmupSchedulerConfig
from lerobot.configs.types import NormalizationMode
@PreTrainedConfig.register_subclass("my_custom_policy")
@dataclass
@@ -63,131 +61,61 @@ class MyCustomPolicyConfig(PreTrainedConfig):
hidden_dim: Hidden dimension for the policy network
# Add your policy-specific parameters here
"""
horizon: int = 50
n_action_steps: int = 50
hidden_dim: int = 256
optimizer_lr: float = 1e-4
optimizer_weight_decay: float = 1e-4
# ...PreTrainedConfig fields...
pass
def __post_init__(self):
super().__post_init__()
if self.n_action_steps > self.horizon:
raise ValueError("n_action_steps cannot exceed horizon")
# Add any validation logic here
def validate_features(self) -> None:
"""Validate input/output feature compatibility."""
if not self.image_features:
raise ValueError("MyCustomPolicy requires at least one image feature.")
if self.action_feature is None:
raise ValueError("MyCustomPolicy requires 'action' in output_features.")
def get_optimizer_preset(self) -> AdamWConfig:
return AdamWConfig(lr=self.optimizer_lr, weight_decay=self.optimizer_weight_decay)
def get_scheduler_preset(self):
return None
@property
def observation_delta_indices(self) -> list[int] | None:
"""Relative timestep offsets the dataset loader provides per observation.
Return `None` for single-frame policies. For temporal policies that consume
multiple past or future frames, return a list of offsets, e.g. `[-20, -10, 0, 10]` for
3 past frames at stride 10 and 1 future frame at stride 10.
"""
return None
@property
def action_delta_indices(self) -> list[int]:
"""Relative timestep offsets for the action chunk the dataset loader returns.
"""
return list(range(self.horizon))
@property
def reward_delta_indices(self) -> None:
return None
# Implement validation logic for your policy's requirements
pass
```
## Step 3: Implement the Policy Class
Create your policy implementation by inheriting from [`PreTrainedPolicy`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/pretrained.py):
Create your policy implementation by inheriting from LeRobot's base `PreTrainedPolicy` class:
```python
# modeling_my_custom_policy.py
import torch
import torch.nn as nn
from typing import Any
from typing import Dict, Any
from lerobot.policies.pretrained import PreTrainedPolicy
from lerobot.utils.constants import ACTION
from .configuration_my_custom_policy import MyCustomPolicyConfig
class MyCustomPolicy(PreTrainedPolicy):
config_class = MyCustomPolicyConfig # must match the string in @register_subclass
config_class = MyCustomPolicyConfig
name = "my_custom_policy"
def __init__(self, config: MyCustomPolicyConfig, dataset_stats: dict[str, Any] = None):
def __init__(self, config: MyCustomPolicyConfig, dataset_stats: Dict[str, Any] = None):
super().__init__(config, dataset_stats)
config.validate_features() # not called automatically by the base class
self.config = config
self.model = ... # your nn.Module here
def reset(self):
"""Reset episode state."""
...
def get_optim_params(self) -> dict:
"""Return parameters to pass to the optimizer (e.g. with per-group lr/wd)."""
return {"params": self.parameters()}
def predict_action_chunk(self, batch: dict[str, torch.Tensor], **kwargs) -> torch.Tensor:
"""Return the full action chunk (B, chunk_size, action_dim) for the current observation."""
...
def select_action(self, batch: dict[str, torch.Tensor], **kwargs) -> torch.Tensor:
"""Return a single action for the current timestep (called at inference)."""
...
def forward(self, batch: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
"""Compute the training loss.
`batch["action_is_pad"]` is a bool mask of shape (B, horizon) that marks
timesteps padded because the episode ended before `horizon` steps, you
can exclude those from your loss.
"""
actions = batch[ACTION]
action_is_pad = batch.get("action_is_pad")
...
return {"loss": ...}
```
## Step 4: Add Data Processors
Create processor functions. For a concrete reference, see [processor_act.py](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/act/processor_act.py) or [processor_diffusion.py](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/diffusion/processor_diffusion.py).
Create processor functions:
```python
# processor_my_custom_policy.py
from typing import Any
from typing import Dict, Any
import torch
from lerobot.processor import PolicyAction, PolicyProcessorPipeline
def make_my_custom_policy_pre_post_processors(
config,
dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None,
) -> tuple[
PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
PolicyProcessorPipeline[PolicyAction, PolicyAction],
]:
preprocessor = ... # build your PolicyProcessorPipeline for inputs
postprocessor = ... # build your PolicyProcessorPipeline for outputs
return preprocessor, postprocessor
```
"""Create preprocessing and postprocessing functions for your policy."""
pass # Define your preprocessing and postprocessing logic here
**Important - function naming:** LeRobot discovers your processor by name. The function **must** be called `make_{policy_name}_pre_post_processors` (matching the string you passed to `@PreTrainedConfig.register_subclass`).
```
## Step 5: Package Initialization
+11 -18
View File
@@ -13,7 +13,7 @@ The EarthRover Mini Plus is a fully open source mobile robot that connects throu
### Hardware
- EarthRover Mini robot
- Computer with Python 3.12 or newer
- Computer with Python 3.10 or newer
- Internet connection
### Setting Up the Frodobots SDK
@@ -170,13 +170,13 @@ Once you can drive the robot well, you can start recording data to train AI mode
We use Hugging Face to store your data online. First, log in with your token from [Hugging Face settings](https://huggingface.co/settings/tokens):
```bash
hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
```
Store your Hugging Face username:
```bash
HF_USER=$(hf auth whoami | awk -F': *' 'NR==1 {print $2}')
HF_USER=$(huggingface-cli whoami | head -n 1)
echo $HF_USER
```
@@ -185,16 +185,13 @@ echo $HF_USER
Use the standard recording command:
```bash
lerobot-record \
python src/lerobot/scripts/lerobot_record.py \
--robot.type=earthrover_mini_plus \
--teleop.type=keyboard_rover \
--dataset.repo_id=your_username/dataset_name \
--dataset.num_episodes=2 \
--dataset.fps=10 \
--dataset.single_task="Navigate around obstacles" \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
--display_data=true
```
@@ -204,26 +201,22 @@ Replace `your_username/dataset_name` with your Hugging Face username and a name
Your dataset includes:
**Your Actions (2 features)**:
**Your Actions (2 things)**:
- `linear_velocity`: How much you moved forward/backward
- `angular_velocity`: How much you turned left/right
- How much you moved forward/backward
- How much you turned left/right
**Robot Observations (24 features)**:
**Robot Observations (12 things)**:
- Front camera video
- Rear camera video
- Current speed
- Battery level
- Orientation
- GPS (latitude, longitude, signal strength)
- Which way the robot is facing
- GPS location (latitude, longitude, signal strength)
- Network signal strength
- Vibration level
- Lamp state (on/off)
- Accelerometer (x, y, z)
- Gyroscope (x, y, z)
- Magnetometer (x, y, z)
- Wheel RPMs (4 wheels)
- Lamp status (on/off)
### Where Your Data Goes
+2 -2
View File
@@ -155,10 +155,10 @@ Upload your repository to Hugging Face:
pip install huggingface_hub
# Login to Hugging Face
hf auth login
huggingface-cli login
# Create a new repository
hf repo create my-org/my-custom-env
huggingface-cli repo create my-custom-env --type space --org my-org
# Initialize git and push
git init
+3 -6
View File
@@ -120,12 +120,9 @@ lerobot-record \
--display_data=true \
--dataset.repo_id=<user>/eval_groot-bimanual \
--dataset.num_episodes=10 \
--dataset.single_task="Grab and handover the red cube to the other arm" \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
--policy.path=<user>/groot-bimanual \ # your trained model
--dataset.episode_time_s=30 \
--dataset.single_task="Grab and handover the red cube to the other arm"
--policy.path=<user>/groot-bimanual # your trained model
--dataset.episode_time_s=30
--dataset.reset_time_s=10
```
+5 -11
View File
@@ -224,15 +224,12 @@ lerobot-record \
--teleop.port=/dev/tty.usbmodem1201 \
--teleop.id=right \
--teleop.side=right \
--dataset.repo_id=<USER>/hand_record_test_with_video_data \
--dataset.repo_id=nepyope/hand_record_test_with_video_data \
--dataset.single_task="Hand recording test with video data" \
--dataset.num_episodes=1 \
--dataset.episode_time_s=5 \
--dataset.push_to_hub=true \
--dataset.private=true \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
--display_data=true
```
@@ -244,7 +241,7 @@ lerobot-replay \
--robot.port=/dev/tty.usbmodem58760432281 \
--robot.id=right \
--robot.side=right \
--dataset.repo_id=<USER>/hand_record_test_with_camera \
--dataset.repo_id=nepyope/hand_record_test_with_camera \
--dataset.episode=0
```
@@ -252,13 +249,13 @@ lerobot-replay \
```bash
lerobot-train \
--dataset.repo_id=<USER>/hand_record_test_with_video_data \
--dataset.repo_id=nepyope/hand_record_test_with_video_data \
--policy.type=act \
--output_dir=outputs/train/hopejr_hand \
--job_name=hopejr \
--policy.device=mps \
--wandb.enable=true \
--policy.repo_id=<USER>/hand_test_policy
--policy.repo_id=nepyope/hand_test_policy
```
### Evaluate
@@ -273,11 +270,8 @@ lerobot-record \
--robot.side=right \
--robot.cameras='{"main": {"type": "opencv", "index_or_path": 0, "width": 640, "height": 480, "fps": 30}}' \
--display_data=false \
--dataset.repo_id=<USER>/eval_hopejr \
--dataset.repo_id=nepyope/eval_hopejr \
--dataset.single_task="Evaluate hopejr hand policy" \
--dataset.num_episodes=10 \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
--policy.path=outputs/train/hopejr_hand/checkpoints/last/pretrained_model
```
+7 -13
View File
@@ -159,13 +159,13 @@ We use the Hugging Face hub features for uploading your dataset. If you haven't
Add your token to the CLI by running this command:
```bash
hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
```
Then store your Hugging Face repository name in a variable:
```bash
HF_USER=$(NO_COLOR=1 hf auth whoami | awk -F': *' 'NR==1 {print $2}')
HF_USER=$(hf auth whoami | head -n 1)
echo $HF_USER
```
@@ -185,10 +185,7 @@ lerobot-record \
--display_data=true \
--dataset.repo_id=${HF_USER}/record-test \
--dataset.num_episodes=5 \
--dataset.single_task="Grab the black cube" \
--dataset.streaming_encoding=true \
# --dataset.vcodec=auto \
--dataset.encoder_threads=2
--dataset.single_task="Grab the black cube"
```
</hfoption>
<hfoption id="API example">
@@ -327,7 +324,7 @@ You can look for other LeRobot datasets on the hub by searching for `LeRobot` [t
You can also push your local dataset to the Hub manually, running:
```bash
hf upload ${HF_USER}/record-test ~/.cache/huggingface/lerobot/{repo-id} --repo-type dataset
huggingface-cli upload ${HF_USER}/record-test ~/.cache/huggingface/lerobot/{repo-id} --repo-type dataset
```
#### Record function
@@ -424,7 +421,7 @@ robot = SO100Follower(robot_config)
robot.connect()
dataset = LeRobotDataset("<hf_username>/<dataset_repo_id>", episodes=[episode_idx])
actions = dataset.select_columns("action")
actions = dataset.hf_dataset.select_columns("action")
log_say(f"Replaying episode {episode_idx}")
for idx in range(dataset.num_frames):
@@ -491,7 +488,7 @@ If your local computer doesn't have a powerful GPU you could utilize Google Cola
Once training is done, upload the latest checkpoint with:
```bash
hf upload ${HF_USER}/act_so101_test \
huggingface-cli upload ${HF_USER}/act_so101_test \
outputs/train/act_so101_test/checkpoints/last/pretrained_model
```
@@ -499,7 +496,7 @@ You can also upload intermediate checkpoints with:
```bash
CKPT=010000
hf upload ${HF_USER}/act_so101_test${CKPT} \
huggingface-cli upload ${HF_USER}/act_so101_test${CKPT} \
outputs/train/act_so101_test/checkpoints/${CKPT}/pretrained_model
```
@@ -518,9 +515,6 @@ lerobot-record \
--display_data=false \
--dataset.repo_id=${HF_USER}/eval_so100 \
--dataset.single_task="Put lego brick into the transparent box" \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
# <- Teleop optional if you want to teleoperate in between episodes \
# --teleop.type=so100_leader \
# --teleop.port=/dev/ttyACM0 \
+14 -70
View File
@@ -1,57 +1,30 @@
# Installation
This guide uses `conda` (via miniforge) to manage environments (recommended). If you prefer another environment manager (e.g. `uv`, `venv`), ensure you have Python >=3.12 and `ffmpeg` installed with the `libsvtav1` encoder, then skip ahead to [Environment Setup](#step-2-environment-setup).
## Step 1 (`conda` only): Install [`miniforge`](https://conda-forge.org/download/)
## Install [`miniforge`](https://conda-forge.org/download/)
```bash
wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
bash Miniforge3-$(uname)-$(uname -m).sh
```
## Step 2: Environment Setup
## Environment Setup
Create a virtual environment with Python 3.12:
Create a virtual environment with Python 3.10, using conda:
<!-- prettier-ignore-start -->
<hfoptions id="create_venv">
<hfoption id="conda">
```bash
conda create -y -n lerobot python=3.12
conda create -y -n lerobot python=3.10
```
</hfoption>
<hfoption id="uv">
Then activate your conda environment, you have to do this each time you open a shell to use lerobot:
```bash
uv python install 3.12
uv venv --python 3.12
```
</hfoption>
</hfoptions>
<!-- prettier-ignore-end -->
Then activate your virtual environment, you have to do this each time you open a shell to use lerobot:
<!-- prettier-ignore-start -->
<hfoptions id="activate_venv">
<hfoption id="conda">```bash
conda activate lerobot
```</hfoption>
<hfoption id="uv">
```bash
# Linux/macOSsource
source .venv/bin/activate
# Windows PowerShell
source .venv\Scripts\Activate.ps1
```
</hfoption>
</hfoptions>
<!-- prettier-ignore-end -->
When using `conda`, install `ffmpeg` in your environment:
```bash
conda install ffmpeg -c conda-forge
ffmpeg -version # ffmpeg 8.X is not yet supported !
```
> [!TIP]
@@ -65,17 +38,7 @@ ffmpeg -version # ffmpeg 8.X is not yet supported !
>
> - _[On Linux only]_ If you want to bring your own ffmpeg: Install [ffmpeg build dependencies](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#GettheDependencies) and [compile ffmpeg from source with libsvtav1](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#libsvtav1), and make sure you use the corresponding ffmpeg binary to your install with `which ffmpeg`.
> [!NOTE]
> When installing LeRobot inside WSL (Windows Subsystem for Linux), make sure to install `evdev` with the following command:
>
> ```bash
> conda install evdev -c conda-forge
> ```
> [!IMPORTANT]
> If you are using `uv` you will have to install `ffmpeg` system-wide (outside of the virtual environment). You rely on `uv` and `torchcodec` ability to dynamically link to the system `ffmpeg`.
## Step 3: Install LeRobot 🤗
## Install LeRobot 🤗
### From Source
@@ -88,45 +51,23 @@ cd lerobot
Then, install the library in editable mode. This is useful if you plan to contribute to the code.
<!-- prettier-ignore-start -->
<hfoptions id="install_lerobot_src">
<hfoption id="conda">
```bash
pip install -e .
```
</hfoption>
<hfoption id="uv">
```bash
uv pip install -e .
```
</hfoption>
</hfoptions>
<!-- prettier-ignore-end -->
### Installation from PyPI
**Core Library:**
Install the base package with:
<!-- prettier-ignore-start -->
<hfoptions id="install_lerobot_pypi">
<hfoption id="conda">
```bash
pip install lerobot
```
</hfoption>
<hfoption id="uv">
```bash
uv pip install lerobot
```
</hfoption>
</hfoptions>
<!-- prettier-ignore-end -->
_This installs only the default dependencies._
**Extra Features:**
To install additional functionality, use one of the following (If you are using `uv`, replace `pip install` with `uv pip install` in the commands below.):
To install additional functionality, use one of the following:
```bash
pip install 'lerobot[all]' # All available features
@@ -140,10 +81,13 @@ _Replace `[...]` with your desired features._
For a full list of optional dependencies, see:
https://pypi.org/project/lerobot/
> [!NOTE]
> For lerobot 0.4.0, if you want to install pi, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`
### Troubleshooting
If you encounter build errors, you may need to install additional dependencies: `cmake`, `build-essential`, and `ffmpeg libs`.
To install these for Linux run:
To install these for linux run:
```bash
sudo apt-get install cmake build-essential python3-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev
@@ -153,7 +97,7 @@ For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/
## Optional dependencies
LeRobot provides optional extras for specific functionalities. Multiple extras can be combined (e.g., `.[aloha,feetech]`). For all available extras, refer to `pyproject.toml`. If you are using `uv`, replace `pip install` with `uv pip install` in the commands below.
LeRobot provides optional extras for specific functionalities. Multiple extras can be combined (e.g., `.[aloha,feetech]`). For all available extras, refer to `pyproject.toml`.
### Simulations
+2 -2
View File
@@ -279,13 +279,13 @@ We use the Hugging Face hub features for uploading your dataset. If you haven't
Add your token to the CLI by running this command:
```bash
hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
```
Then store your Hugging Face repository name in a variable:
```bash
HF_USER=$(hf auth whoami | awk -F': *' 'NR==1 {print $2}')
HF_USER=$(huggingface-cli whoami | head -n 1)
echo $HF_USER
```
+1 -4
View File
@@ -41,10 +41,7 @@ lerobot-record \
--display_data=true \
--dataset.repo_id=${HF_USER}/record-test \
--dataset.num_episodes=5 \
--dataset.single_task="Grab the black cube" \
--dataset.streaming_encoding=true \
# --dataset.vcodec=auto \
--dataset.encoder_threads=2
--dataset.single_task="Grab the black cube"
```
See the [recording guide](./il_robots#record-a-dataset) for more details.
-340
View File
@@ -1,340 +0,0 @@
# Multitask DiT Policy
Multitask Diffusion Transformer (DiT) Policy is an evolution of the original Diffusion Policy architecture, which leverages a large DiT with text and vision conditioning for multitask robot learning. This implementation supports both diffusion and flow matching objectives for action generation, enabling robots to perform diverse manipulation tasks conditioned on language instructions.
## Model Overview
The model uses:
- **CLIP Vision Encoder**: Processes RGB images from multiple camera views
- **CLIP Text Encoder**: Encodes language task instructions (frozen weights with learnable projection)
- **Diffusion Transformer**: Predicts action sequences conditioned on observations and language
- **Two Objectives**: Supports both diffusion (DDPM/DDIM) and flow matching for action generation
This model is exciting because you can achieve extremely high dexterity, competitive with multi-billion parameter
VLAs, with only ~450M parameters and significantly less training.
## Installation Requirements
Multitask DiT Policy has additional dependencies. Install it with:
```bash
pip install lerobot[multi_task_dit]
```
This will install all necessary dependencies including the HuggingFace Transformers library for CLIP models.
## Usage
To use Multitask DiT in your LeRobot configuration, specify the policy type as:
```python
policy.type=multi_task_dit
```
## Training
### Basic Training Command
Here's a complete training command for training Multitask DiT on your dataset:
```bash
lerobot-train \
--dataset.repo_id=YOUR_DATASET \
--output_dir=./outputs/multitask_dit_training \
--batch_size=32 \
--steps=5000 \
--save_freq=500 \
--log_freq=100 \
--policy.type=multi_task_dit \
--policy.device=cuda \
--policy.repo_id="HF_USER/multitask-dit-your-robot" \
--wandb.enable=true
```
### Recommended Hyperparameters and Dataset Details (30Hz Control Frequency)
For reliable performance, start with these suggested default hyperparameters:
```bash
lerobot-train \
--dataset.repo_id=YOUR_DATASET \
--output_dir=./outputs/mutitask_dit_training \
--batch_size=320 \
--steps=30000 \
--policy.type=multi_task_dit \
--policy.device=cuda \
--policy.horizon=32 \
--policy.n_action_steps=24 \
--policy.objective=diffusion \
--policy.noise_scheduler_type=DDPM \
--policy.num_train_timesteps=100 \
--policy.repo_id="HF_USER/multitask-dit-your-robot" \
--wandb.enable=true
```
**Key Parameters:**
- **Batch Size**: 192-320 - If you have access to a GPU that can support this, you will get the best training dynamics
- **Horizon**: 32 - number of action steps to predict, ~1.0 sec at 30Hz
- **n_action_steps**: 24 - ~0.8 seconds at 30Hz
- **Objective**: `diffusion` - start with diffusion and experiment with flow matching if generation quality is poor
- **Training Steps**: >30k steps recommended for a single task
### Training Configuration Parameters
#### Objective Selection
Choose between diffusion and flow matching:
```bash
# Diffusion objective (default)
--policy.objective=diffusion \
--policy.noise_scheduler_type=DDPM \ # or "DDIM"
--policy.num_train_timesteps=100 \
--policy.num_inference_steps=10 \ # For faster inference
--policy.beta_schedule=squaredcos_cap_v2 \ # Noise schedule type
--policy.prediction_type=epsilon \ # "epsilon" (predict noise) or "sample" (predict clean)
--policy.clip_sample=true \ # Clip samples during denoising
--policy.clip_sample_range=1.0 # Clipping range [-x, x]
# Flow matching objective
--policy.objective=flow_matching \
--policy.timestep_sampling_strategy=beta \ # or "uniform" | the beta sampling strategy performance appears much better in practice
--policy.num_integration_steps=100 \
--policy.integration_method=euler \ # or "rk4"
--policy.sigma_min=0.0 # Minimum noise in flow interpolation path
```
#### Transformer Architecture
Adjust model capacity based on dataset size:
```bash
# Small datasets (< 100 examples)
--policy.num_layers=4 \
--policy.hidden_dim=512 \
--policy.num_heads=8 # should ideally be hidden_dim // 64
# Medium datasets (100-5k examples) - default
--policy.num_layers=6 \
--policy.hidden_dim=512 \
--policy.num_heads=8 # should ideally be hidden_dim // 64
# Large datasets (> 5k examples)
--policy.num_layers=8 \
--policy.hidden_dim=512 \
--policy.num_heads=8 # should ideally be hidden_dim // 64
```
**Positional Encoding Options:**
The model supports two positional encoding methods for action sequences:
```bash
# Rotary Position Embedding (RoPE) - default, recommended
--policy.use_rope=true \
--policy.rope_base=10000.0 # Base frequency for RoPE
# Absolute positional encoding
--policy.use_positional_encoding=true # Disables RoPE when true
```
**Other Transformer Parameters:**
```bash
--policy.dropout=0.1 # Dropout rate for DiT blocks (0.0-1.0)
--policy.timestep_embed_dim=256 # Timestep embedding dimension
```
#### Vision Encoder Configuration
```bash
# Use different CLIP model for more expressivity at the cost of inference time
# experiment with larger or smaller models depending on the complexity of your tasks and size of dataset
--policy.vision_encoder_name=openai/clip-vit-large-patch14
# Use separate vision encoder per camera
# This may be useful when cameras have significantly different characteristics, but
# be wary of increased VRAM footprint.
--policy.use_separate_rgb_encoder_per_camera=true
# Image preprocessing
--policy.image_resize_shape=[XXX,YYY] \ # you may need to resize your images for inference speed ups
--policy.image_crop_shape=[224,224] \
--policy.image_crop_is_random=true # Random during training, center at inference
```
#### Text Encoder Configuration
```bash
# Use different CLIP text encoder model
# same as vision: experiment with larger or smaller models depending on the
# complexity of your tasks and size of dataset
--policy.text_encoder_name=openai/clip-vit-large-patch14
```
#### Learning Rate Configuration
The vision encoder uses a separate learning rate multiplier, where 1/10th is suggested to be the ideal staritng point:
```bash
--policy.optimizer_lr=2e-5 \
--policy.vision_encoder_lr_multiplier=0.1 # Vision encoder LR = 0.1 * optimizer_lr
```
### Training Tuning Guidelines
#### 1. Flow Matching with Beta Sampling
The original diffusion implementation here is based on the work described in [TRI's LBM paper](https://arxiv.org/abs/2507.05331)
Additionally, we have implemented a flow-matching objective, which is described at a high-level in [Boston Dynamics blog post](https://bostondynamics.com/blog/large-behavior-models-atlas-find-new-footing/).
Consider testing the flow-matching objective and evaluating performance differences for your task:
```bash
--policy.objective=flow_matching \
--policy.timestep_sampling_strategy=beta \
--policy.timestep_sampling_alpha=1.5 \
--policy.timestep_sampling_beta=1.0 \
--policy.timestep_sampling_s=0.999
```
This hasn't been shown to be a silver bullet across every user case, but it occasionally results in smoother and more consistent actions.
#### 2. Number of Transformer Layers
Match model capacity to your dataset size:
- **Small datasets** (< 100 examples): Reduce to 4 layers
- **Large datasets** (> 5k examples): Increase to 8 layers
#### 3. `horizon` Tuning
The model can be sensitive to the horizon you choose. Start with around a 1 second horizon based on your control frequency:
- **30 Hz frequency**: `horizon=30`
- **10 Hz frequency**: `horizon=10`
Then experiment with increasing from there. The horizon determines how far into the future the model predicts actions.
#### 4. `n_action_steps` Sensitivity
The model can also be very sensitive to `n_action_steps`. Start with it being around 0.8 seconds based on your control frequency and tune from there:
- **Lower values**: More reactive but potentially less stable for long-horizon tasks
- **Higher values**: Better for long-horizon execution but open-loop failures are limited in their recovery
### Inference Tuning
For faster inference, use DDIM with fewer sampling steps:
```bash
--policy.noise_scheduler_type=DDIM \
--policy.num_inference_steps=10
```
### Resuming Training
To resume training from a checkpoint:
```bash
lerobot-train \
--config_path=./outputs/mutitask_dit_training/checkpoints/last/pretrained_model/train_config.json \
--resume=true
```
The checkpoint directory should contain `model.safetensors` and `config.json` files (saved automatically during training). When resuming, the configuration is loaded from the checkpoint, so you don't need to specify other parameters.
## Common Failure Modes and Debugging
Training these models can be finicky. Here are common failure modes and debugging approaches:
### Idling / No Motion
The model may "collapse" during inference, resulting in static or no motion. This can occur when:
1. **Insufficient training data**: If you only have 20-50 examples, try to roughly double your dataset size. Once you have above 300 examples, if you're still seeing this, the task may be too complex.
2. **Multiple similar tasks**: When your dataset contains multiple similar tasks (e.g., picking up 2 different objects), the model may rely too heavily on language conditioning which might not be rich enough.
**Debugging tips:**
- Increase dataset size (double until you get to over 300 examples)
- Train for longer, up to 100k steps, even when the loss flatlines
- Check if the model is receiving proper language instructions or increase diversity of instruction
### Executing the Wrong Task
Sometimes the robot will completely ignore your instruction and perform some other task. This generally only happens if you have trained on multiple tasks.
**Potential causes:**
- Language instruction ambiguity
- Insufficient task-specific training data
- Model confusion between similar tasks in the multitask dataset
**Debugging tips:**
- Verify language instruction specificity, especially if descriptions are similar between multiple tasks
- Check task distribution in your training dataset and add weighting to the failing/ignored task
- Consider task-specific fine-tuning
### Training Instability
If training loss is unstable or diverging:
- Try adjusting learning rate between `1e-5` and `3e-4`
- Increase batch size if possible
- Check that your dataset normalization is correct
- Verify image preprocessing is working correctly
## Performance Considerations
### GPU Requirements
- **Inference**: At least an RTX 5070 Ti (or equivalent GPU) is recommended for reasonable speed performance
- **Training**: A GPU with enough VRAM to load batch sizes of >64 is ideal, which will vary depending on the number of image observations, etc
### Batch Size Recommendations
- **Minimum**: 64 (less than this may result in unstable training)
- **Recommended**: 256-320 (best performance, requires larger GPU)
## Example: Training on Custom Dataset
Here's a complete example training on a custom dataset:
```bash
lerobot-train \
--dataset.repo_id=YOUR_DATASET \
--output_dir=./outputs/mutitask_dit_training \
--batch_size=320 \
--steps=30000 \
--save_freq=1000 \
--log_freq=100 \
--eval_freq=1000 \
--policy.type=multi_task_dit \
--policy.device=cuda \
--policy.horizon=32 \
--policy.n_action_steps=24 \
--policy.objective=diffusion \
--policy.noise_scheduler_type=DDPM \
--policy.num_layers=6 \
--policy.hidden_dim=512 \
--policy.vision_encoder_name=openai/clip-vit-base-patch16 \
--policy.image_resize_shape=[320,240] \
--policy.image_crop_shape=[224,224] \
--policy.repo_id="HF_USER/multitask-dit-your-robot" \
--wandb.enable=true \
--wandb.project=multitask_dit
```
## References
For more details on the technical implementation and architecture, see:
- [A Careful Examination of Large Behavior Models for Multitask Dexterous Manipulation](https://arxiv.org/abs/2507.05331)
- [Large Behavior Models and Atlas Find New Footing](https://bostondynamics.com/blog/large-behavior-models-atlas-find-new-footing/)
- [Dissecting and Open-Sourcing Multitask Diffusion Transformer Policy](https://brysonkjones.substack.com/p/dissecting-and-open-sourcing-multitask-diffusion-transformer-policy)
+5 -9
View File
@@ -66,13 +66,12 @@ Run on of the examples scripts to teleoperate, record a dataset, replay a datase
All scripts assume you configured your robot (e.g., SO-100 follower) and set the correct serial port.
Additionally you need to **copy the URDF of the robot into the examples folder**. For the examples in this tutorial (using SO100/SO101), copy the `SO101` folder from the [SO-ARM100 repo](https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101) into the `examples/phone_to_so100/` directory, so that the URDF file path becomes `examples/phone_to_so100/SO101/so101_new_calib.urdf`.
Additionally you need to **copy the urdf of the robot to the examples folder**. For the examples in this tutorial (Using SO100/SO101) it is highly recommended to use the urdf in the [SO-ARM100 repo](https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf)
- Run this example to teleoperate:
```bash
cd examples/phone_to_so100
python teleoperate.py
python examples/phone_to_so100/teleoperate.py
```
After running the example:
@@ -85,22 +84,19 @@ Additionally you can customize mapping or safety limits by editing the processor
- Run this example to record a dataset, which saves absolute end effector observations and actions:
```bash
cd examples/phone_to_so100
python record.py
python examples/phone_to_so100/record.py
```
- Run this example to replay recorded episodes:
```bash
cd examples/phone_to_so100
python replay.py
python examples/phone_to_so100/replay.py
```
- Run this example to evaluate a pretrained policy:
```bash
cd examples/phone_to_so100
python evaluate.py
python examples/phone_to_so100/evaluate.py
```
### Important pipeline steps and options
+6 -41
View File
@@ -34,6 +34,11 @@ As described by Physical Intelligence, while AI has achieved remarkable success
pip install -e ".[pi]"
```
> [!NOTE]
> For lerobot 0.4.0, if you want to install pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
>
> This will be solved in the next patch release
## Training Data and Capabilities
π₀ is trained on the largest robot interaction dataset to date, combining three key data sources:
@@ -55,7 +60,7 @@ policy.type=pi0
For training π₀, you can use the standard LeRobot training script with the appropriate configuration:
```bash
lerobot-train \
python src/lerobot/scripts/lerobot_train.py \
--dataset.repo_id=your_dataset \
--policy.type=pi0 \
--output_dir=./outputs/pi0_training \
@@ -91,46 +96,6 @@ lerobot-train \
**💡 Tip**: Setting `train_expert_only=true` freezes the VLM and trains only the action expert and projections, allowing finetuning with reduced memory usage.
## Relative Actions
By default, π₀ predicts absolute actions. You can enable **relative actions** so the model predicts offsets relative to the current robot state. This can improve training stability for certain setups.
To use relative actions, first recompute your dataset stats in relative space via the CLI:
```bash
lerobot-edit-dataset \
--repo_id your_dataset \
--operation.type recompute_stats \
--operation.relative_action true \
--operation.chunk_size 50 \
--operation.relative_exclude_joints "['gripper']" \
--push_to_hub true
```
Or equivalently in Python:
```python
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.dataset_tools import recompute_stats
dataset = LeRobotDataset("your_dataset")
recompute_stats(dataset, relative_action=True, chunk_size=50, relative_exclude_joints=["gripper"])
dataset.push_to_hub()
```
The `chunk_size` should match your policy's `chunk_size` (default 50 for π₀). `relative_exclude_joints` lists joint names that should remain in absolute space (e.g. gripper commands). Use `--push_to_hub true` to upload the updated stats to the Hub.
Then train with relative actions enabled:
```bash
lerobot-train \
--dataset.repo_id=your_dataset \
--policy.type=pi0 \
--policy.use_relative_actions=true \
--policy.relative_exclude_joints='["gripper"]' \
...
```
## License
This model follows the **Apache 2.0 License**, consistent with the original [OpenPI repository](https://github.com/Physical-Intelligence/openpi).
+6 -41
View File
@@ -36,6 +36,11 @@ This diverse training mixture creates a "curriculum" that enables generalization
pip install -e ".[pi]"
```
> [!NOTE]
> For lerobot 0.4.0, if you want to install pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
>
> This will be solved in the next patch release
## Usage
To use π₀.₅ in your LeRobot configuration, specify the policy type as:
@@ -51,7 +56,7 @@ policy.type=pi05
Here's a complete training command for finetuning the base π₀.₅ model on your own dataset:
```bash
lerobot-train \
python src/lerobot/scripts/lerobot_train.py\
--dataset.repo_id=your_dataset \
--policy.type=pi05 \
--output_dir=./outputs/pi05_training \
@@ -97,46 +102,6 @@ python src/lerobot/datasets/v30/augment_dataset_quantile_stats.py \
Or train pi05 with this normalization mapping: `--policy.normalization_mapping='{"ACTION": "MEAN_STD", "STATE": "MEAN_STD", "VISUAL": "IDENTITY"}'`
## Relative Actions
By default, π₀.₅ predicts absolute actions. You can enable **relative actions** so the model predicts offsets relative to the current robot state. This can improve training stability for certain setups.
To use relative actions, first recompute your dataset stats in relative space via the CLI:
```bash
lerobot-edit-dataset \
--repo_id your_dataset \
--operation.type recompute_stats \
--operation.relative_action true \
--operation.chunk_size 50 \
--operation.relative_exclude_joints "['gripper']" \
--push_to_hub true
```
Or equivalently in Python:
```python
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.dataset_tools import recompute_stats
dataset = LeRobotDataset("your_dataset")
recompute_stats(dataset, relative_action=True, chunk_size=50, relative_exclude_joints=["gripper"])
dataset.push_to_hub()
```
The `chunk_size` should match your policy's `chunk_size` (default 50 for π₀.₅). `relative_exclude_joints` lists joint names that should remain in absolute space (e.g. gripper commands). Use `--push_to_hub true` to upload the updated stats to the Hub.
Then train with relative actions enabled:
```bash
lerobot-train \
--dataset.repo_id=your_dataset \
--policy.type=pi05 \
--policy.use_relative_actions=true \
--policy.relative_exclude_joints='["gripper"]' \
...
```
## Performance Results
### Libero Benchmark Results
+15 -10
View File
@@ -43,11 +43,16 @@ This approach can transform **any existing VLM** into a VLA by training it to pr
pip install -e ".[pi]"
```
> [!NOTE]
> For lerobot 0.4.0, if you want to install the pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
>
> This will be solved in the next patch release
## Training a Custom FAST Tokenizer
You have two options for the FAST tokenizer:
1. **Use the pre-trained tokenizer**: The `lerobot/fast-action-tokenizer` tokenizer was trained on 1M+ real robot action sequences and works as a general-purpose tokenizer.
1. **Use the pre-trained tokenizer**: The `physical-intelligence/fast` tokenizer was trained on 1M+ real robot action sequences and works as a general-purpose tokenizer.
2. **Train your own tokenizer**: For maximum performance on your specific dataset, you can finetune the tokenizer on your own data.
@@ -109,15 +114,15 @@ lerobot-train \
### Key Training Parameters
| Parameter | Description | Default |
| -------------------------------------- | -------------------------------------------------- | ------------------------------- |
| `--policy.gradient_checkpointing=true` | Reduces memory usage significantly during training | `false` |
| `--policy.dtype=bfloat16` | Use mixed precision training for efficiency | `float32` |
| `--policy.chunk_size` | Number of action steps to predict (action horizon) | `50` |
| `--policy.n_action_steps` | Number of action steps to execute | `50` |
| `--policy.max_action_tokens` | Maximum number of FAST tokens per action chunk | `256` |
| `--policy.action_tokenizer_name` | FAST tokenizer to use | `lerobot/fast-action-tokenizer` |
| `--policy.compile_model=true` | Enable torch.compile for faster training | `false` |
| Parameter | Description | Default |
| -------------------------------------- | -------------------------------------------------- | ---------------------------- |
| `--policy.gradient_checkpointing=true` | Reduces memory usage significantly during training | `false` |
| `--policy.dtype=bfloat16` | Use mixed precision training for efficiency | `float32` |
| `--policy.chunk_size` | Number of action steps to predict (action horizon) | `50` |
| `--policy.n_action_steps` | Number of action steps to execute | `50` |
| `--policy.max_action_tokens` | Maximum number of FAST tokens per action chunk | `256` |
| `--policy.action_tokenizer_name` | FAST tokenizer to use | `physical-intelligence/fast` |
| `--policy.compile_model=true` | Enable torch.compile for faster training | `false` |
## Inference
@@ -1,37 +0,0 @@
# Multitask DiT Policy
## Citation
If you use this work, please cite the following works:
```bibtex
@misc{jones2025multitaskditpolicy,
author = {Bryson Jones},
title = {Dissecting and Open-Sourcing Multitask Diffusion Transformer Policy},
year = {2025},
url = {https://brysonkjones.substack.com/p/dissecting-and-open-sourcing-multitask-diffusion-transformer-policy},
note = {Blog post}
}
```
```bibtex
@misc{trilbmteam2025carefulexaminationlargebehaviormodels,
author = {TRI LBM Team},
title = {A Careful Examination of Large Behavior Models for Multitask Dexterous Manipulation},
year = {2025},
eprint = {arXiv:2507.05331},
archivePrefix = {arXiv},
primaryClass = {cs.RO},
url = {https://arxiv.org/abs/2507.05331}
}
```
```bibtex
@misc{bostondynamics2025largebehaviormodelsatlas,
author = {Boston Dynamics and TRI Research Team},
title = {Large Behavior Models and Atlas Find New Footing},
year = {2025},
url = {https://bostondynamics.com/blog/large-behavior-models-atlas-find-new-footing/},
note = {Blog post}
}
```
-6
View File
@@ -159,9 +159,6 @@ lerobot-record \
--dataset.fps=15 \
--dataset.push_to_hub=true \
--dataset.private=true \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
--display_data=true
```
@@ -201,9 +198,6 @@ lerobot-record \
--dataset.fps=15 \
--dataset.push_to_hub=true \
--dataset.private=true \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
--display_data=true
```
-114
View File
@@ -1,114 +0,0 @@
# Rename Map and Empty Cameras
When you train, evaluate, or record with a robot policy, your **dataset** or **environment** provides observations under one set of keys (e.g. `observation.images.front`, `observation.images.eagle`), while your **policy** expects another (e.g. `observation.images.image`, `observation.images.image2`). The **rename map** bridges that gap without changing the policy or data source.
> **Scope:** The rename map only renames **observation** keys (images and state). Action keys are not affected.
## Why observation keys don't always match
Policies have a fixed set of **input feature names** baked into their pretrained config. For example:
- [pi0fast-libero](https://huggingface.co/lerobot/pi0fast-libero) expects `observation.images.base_0_rgb` and `observation.images.left_wrist_0_rgb`.
- [xvla-base](https://huggingface.co/lerobot/xvla-base) expects `observation.images.image`, `observation.images.image2`, and `observation.images.image3`.
Your dataset might use different names entirely (e.g. `observation.images.front`, `observation.images.eagle`, `observation.images.glove`), and your eval environment might use yet another set. Rather than editing the policy config or renaming columns in the dataset, you pass a **rename map**: a JSON dictionary that maps source keys to the keys the policy expects. Renaming happens inside the preprocessor pipeline, so the policy always sees its expected keys.
## Using the rename map
Pass the mapping as a JSON string on the command line. The convention is always:
```
--rename_map='{"source_key": "policy_key", ...}'
```
where **source_key** is what the dataset or environment provides, and **policy_key** is what the policy expects.
Only listed keys are renamed; everything else passes through unchanged. Order of entries doesn't matter.
Supported policies: **PI0**, **PI05**, **PI0Fast**, **SmolVLA**, and **XVLA**.
### Training
Suppose you fine-tune [lerobot/xvla-base](https://huggingface.co/lerobot/xvla-base) on a dataset with images under `observation.images.front`, `observation.images.eagle`, and `observation.images.glove`. XVLA expects `observation.images.image`, `observation.images.image2`, and `observation.images.image3`:
```bash
lerobot-train \
--dataset.repo_id=YOUR_DATASET \
--output_dir=./outputs/xvla_training \
--job_name=xvla_training \
--policy.path="lerobot/xvla-base" \
--policy.repo_id="HF_USER/xvla-your-robot" \
--policy.dtype=bfloat16 \
--policy.action_mode=auto \
--steps=20000 \
--policy.device=cuda \
--policy.freeze_vision_encoder=false \
--policy.freeze_language_encoder=false \
--policy.train_policy_transformer=true \
--policy.train_soft_prompts=true \
--rename_map='{"observation.images.front": "observation.images.image", "observation.images.eagle": "observation.images.image2", "observation.images.glove": "observation.images.image3"}'
```
### Evaluation
A policy that expects `observation.images.base_0_rgb` and `observation.images.left_wrist_0_rgb` (e.g. [pi0fast-libero](https://huggingface.co/lerobot/pi0fast-libero)), but the LIBERO environment returns `observation.images.image` and `observation.images.image2`:
```bash
lerobot-eval \
--policy.path=lerobot/pi0fast-libero \
--env.type=libero \
... \
--rename_map='{"observation.images.image": "observation.images.base_0_rgb", "observation.images.image2": "observation.images.left_wrist_0_rgb"}'
```
### Recording
`lerobot-record` also supports rename maps, nested under the dataset config:
```bash
lerobot-record \ # When running inference
--policy.path="<user>/smolVLA_finetuned" \
... \
--dataset.rename_map='{"observation.images.glove2": "observation.images.image"}'
```
## Alternative: edit the policy config directly
If you always use the same dataset or environment, you can **edit the policy's `config.json`** so its observation keys match your data source. Then no rename map is needed.
The tradeoff: modifying the policy config ties it to one data source. A rename map keeps one policy usable across many datasets and environments.
## Empty cameras: fewer views than the policy expects
Some policies are built for a fixed number of image inputs. If your dataset has fewer cameras, you can set **`empty_cameras`** in the policy config instead of modifying the model architecture.
### How it works
Setting `empty_cameras=N` adds N placeholder image features to the policy config, named:
```
observation.images.empty_camera_0
observation.images.empty_camera_1
...
```
At runtime, these keys have no corresponding data in the batch. The policy fills them with masked dummy tensors (padded with `-1` for SigLIP-based vision encoders, with a zero attention mask), so the extra image slots are effectively ignored during training and inference.
### Example
XVLA-base has three visual inputs and `empty_cameras=0` by default. Your dataset only has two cameras:
1. Set `--policy.empty_cameras=1`.
2. The config adds a third key: `observation.images.empty_camera_0`.
3. Use the rename map for your two real cameras as usual.
4. The third slot is masked out — no fake images needed in your dataset.
## Quick reference
| Goal | What to do |
| ----------------------------------------- | --------------------------------------------------------------------------- |
| Dataset keys ≠ policy keys | `--rename_map='{"dataset_key": "policy_key", ...}'` |
| Env keys ≠ policy keys (eval) | `--rename_map='{"env_key": "policy_key", ...}'` |
| Recording with different keys (inference) | `--dataset.rename_map='{"source_key": "policy_key", ...}'`. |
| Fewer cameras than policy expects | `--policy.empty_cameras=N` (supported by PI0, PI05, PI0Fast, SmolVLA, XVLA) |
| Avoid passing a rename map | Edit the policy's `config.json` so its keys match your data source |
+4 -4
View File
@@ -269,7 +269,7 @@ This generates visualizations showing video frames with subtask boundaries overl
Train with **no annotations** - uses linear progress from 0 to 1:
```bash
lerobot-train \
python src/lerobot/scripts/lerobot_train.py \
--dataset.repo_id=your-username/your-dataset \
--policy.type=sarm \
--policy.annotation_mode=single_stage \
@@ -288,7 +288,7 @@ lerobot-train \
Train with **dense annotations only** (sparse auto-generated):
```bash
lerobot-train \
python src/lerobot/scripts/lerobot_train.py \
--dataset.repo_id=your-username/your-dataset \
--policy.type=sarm \
--policy.annotation_mode=dense_only \
@@ -307,7 +307,7 @@ lerobot-train \
Train with **both sparse and dense annotations**:
```bash
lerobot-train \
python src/lerobot/scripts/lerobot_train.py \
--dataset.repo_id=your-username/your-dataset \
--policy.type=sarm \
--policy.annotation_mode=dual \
@@ -468,7 +468,7 @@ This script:
Once you have the progress file, train your policy with RA-BC weighting. The progress file is auto-detected from the dataset path (`sarm_progress.parquet`). Currently PI0, PI0.5 and SmolVLA are supported with RA-BC:
```bash
lerobot-train \
python src/lerobot/scripts/lerobot_train.py \
--dataset.repo_id=your-username/your-dataset \
--policy.type=pi0 \
--use_rabc=true \
-3
View File
@@ -106,9 +106,6 @@ lerobot-record \
--dataset.repo_id=${HF_USER}/eval_DATASET_NAME_test \ # <- This will be the dataset name on HF Hub
--dataset.episode_time_s=50 \
--dataset.num_episodes=10 \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2 \
# --dataset.vcodec=auto \
# <- Teleop optional if you want to teleoperate in between episodes \
# --teleop.type=so100_leader \
# --teleop.port=/dev/ttyACM0 \
+6 -7
View File
@@ -236,10 +236,10 @@ It is advisable to install one 3-pin cable in the motor after placing them befor
### Joint 1
- Install both motor horns. Secure the top horn with a M3x6mm screw. No screws are required for the bottom horn.
- Place the first motor into the base.
- Fasten the motor with 4 M2x6mm screws (smallest screws). Two from the top and two from the bottom.
- Slide over the first motor holder and fasten it using two M2x6mm screws (one on each side).
- Install both motor horns, securing the top horn with a M3x6mm screw.
- Attach the shoulder part.
- Tighten the shoulder part with 4 M3x6mm screws on top and 4 M3x6mm screws on the bottom
- Add the shoulder motor holder.
@@ -255,9 +255,9 @@ It is advisable to install one 3-pin cable in the motor after placing them befor
### Joint 2
- Install both motor horns. Secure the top horn with a M3x6mm screw. No screws are required for the bottom horn.
- Slide the second motor in from the top.
- Fasten the second motor with 4 M2x6mm screws.
- Attach both motor horns to motor 2, again use the M3x6mm horn screw.
- Attach the upper arm with 4 M3x6mm screws on each side.
<div class="video-container">
@@ -271,8 +271,8 @@ It is advisable to install one 3-pin cable in the motor after placing them befor
### Joint 3
- Install both motor horns. Secure the top horn with a M3x6mm screw. No screws are required for the bottom horn.
- Insert motor 3 and fasten using 4 M2x6mm screws.
- Insert motor 3 and fasten using 4 M2x6mm screws
- Attach both motor horns to motor 3 and secure one again with a M3x6mm horn screw.
- Connect the forearm to motor 3 using 4 M3x6mm screws on each side.
<div class="video-container">
@@ -286,10 +286,9 @@ It is advisable to install one 3-pin cable in the motor after placing them befor
### Joint 4
- Install both motor horns. Secure the top horn with a M3x6mm screw. No screws are required for the bottom horn.
- Slide over motor holder 4.
- Slide in motor 4.
- Fasten motor 4 with 4 M2x6mm screws.
- Fasten motor 4 with 4 M2x6mm screws and attach its motor horns, use a M3x6mm horn screw.
<div class="video-container">
<video controls width="600">
@@ -322,7 +321,7 @@ It is advisable to install one 3-pin cable in the motor after placing them befor
- Attach the gripper to motor 5, attach it to the motor horn on the wrist using 4 M3x6mm screws.
- Insert the gripper motor and secure it with 2 M2x6mm screws on each side.
- Install both motor horns on the gripper motor. Secure the top horn with a M3x6mm screw; no screws are required for the bottom horn.
- Attach the motor horns and again use a M3x6mm horn screw.
- Install the gripper claw and secure it with 4 M3x6mm screws on both sides.
<div class="video-container">
-155
View File
@@ -1,155 +0,0 @@
# Streaming Video Encoding Guide
## 1. Overview
Streaming video encoding eliminates the traditional PNG round-trip during video dataset recording. Instead of:
1. Capture frame -> write PNG to disk -> (at episode end) read PNG's -> encode to MP4 -> delete PNG's
Frames can be encoded in real-time during capture:
1. Capture frame -> queue to encoder thread -> encode to MP4 directly
This makes `save_episode()` near-instant (the video is already encoded by the time the episode ends) and removes the blocking wait that previously occurred between episodes, especially with multiple cameras in long episodes.
## 2. Tuning Parameters
| Parameter | CLI Flag | Type | Default | Description |
| ----------------------- | --------------------------------- | ------------- | ------------- | ----------------------------------------------------------------- |
| `streaming_encoding` | `--dataset.streaming_encoding` | `bool` | `True` | Enable real-time encoding during capture |
| `vcodec` | `--dataset.vcodec` | `str` | `"libsvtav1"` | Video codec. `"auto"` detects best HW encoder |
| `encoder_threads` | `--dataset.encoder_threads` | `int \| None` | `None` (auto) | Threads per encoder instance. `None` will leave the vcoded decide |
| `encoder_queue_maxsize` | `--dataset.encoder_queue_maxsize` | `int` | `60` | Max buffered frames per camera (~2s at 30fps). Consumes RAM |
## 3. Performance Considerations
Streaming encoding means the CPU is encoding video **during** the capture loop, not after. This creates a CPU budget that must be shared between:
- **Control loop** (reading cameras, control the robot, writing non-video data)
- **Encoder threads** (one pool per camera)
- **Rerun visualization** (if enabled)
- **OS and other processes**
### Resolution & Number of Cameras Impact
| Setup | Throughput (px/sec) | CPU Encoding Load | Notes |
| ------------------------- | ------------------- | ----------------- | ------------------------------ |
| 2camsx 640x480x3 @30fps | 55M | Low | Works on most systems |
| 2camsx 1280x720x3 @30fps | 165M | Moderate | Comfortable on modern systems |
| 2camsx 1920x1080x3 @30fps | 373M | High | Requires powerful high-end CPU |
### `encoder_threads` Tuning
This parameter controls how many threads each encoder instance uses internally:
- **Higher values** (e.g., 4-5): Faster encoding, but uses more CPU cores per camera. Good for high-end systems with many cores.
- **Lower values** (e.g., 1-2): Less CPU per camera, freeing cores for capture and visualization. Good for low-res images and capable CPUs.
- **`None` (default)**: Lets the codec decide. Information available in the codec logs.
### Backpressure and Frame Dropping
Each camera has a bounded queue (`encoder_queue_maxsize`, default 60 frames). When the encoder can't keep up:
1. The queue fills up (consuming RAM)
2. New frames are **dropped** (not blocked) — the capture loop continues uninterrupted
3. A warning is logged: `"Encoder queue full for {camera}, dropped N frame(s)"`
4. At episode end, total dropped frames per camera are reported
### Symptoms of Encoder Falling Behind
- **System feels laggy and freezes**: all CPUs are at 100%
- **Dropped frame warnings** in the log or lower frames/FPS than expected in the recorded dataset
- **Choppy robot movement**: If CPU is severely overloaded, even the capture loop may be affected
- **Accumulated rerun lag**: Visualization falls behind real-time
## 4. Hardware-Accelerated Encoding
### When to Use
Use HW encoding when:
- CPU is the bottleneck (dropped frames, choppy robot, rerun lag)
- You have compatible hardware (GPU or dedicated encoder)
- You're recording at high throughput (high resolution or with many cameras)
### Choosing a Codec
| Codec | CPU Usage | File Size | Quality | Notes |
| --------------------- | --------- | -------------- | ------- | ---------------------------------------------------------------- |
| `libsvtav1` (default) | High | Smallest | Best | Default. Best compression but most CPU-intensive |
| `h264` | Medium | ~30-50% larger | Good | Software H.264. Lower CPU |
| HW encoders | Very Low | Largest | Good | Offloads to dedicated hardware. Best for CPU-constrained systems |
### Available HW Encoders
| Encoder | Platform | Hardware | CLI Value |
| ------------------- | ------------- | ------------------------------------------------------------------------------------------------ | ------------------------------------ |
| `h264_videotoolbox` | macOS | Apple Silicon / Intel | `--dataset.vcodec=h264_videotoolbox` |
| `hevc_videotoolbox` | macOS | Apple Silicon / Intel | `--dataset.vcodec=hevc_videotoolbox` |
| `h264_nvenc` | Linux/Windows | NVIDIA GPU | `--dataset.vcodec=h264_nvenc` |
| `hevc_nvenc` | Linux/Windows | NVIDIA GPU | `--dataset.vcodec=hevc_nvenc` |
| `h264_vaapi` | Linux | Intel/AMD GPU | `--dataset.vcodec=h264_vaapi` |
| `h264_qsv` | Linux/Windows | Intel Quick Sync | `--dataset.vcodec=h264_qsv` |
| `auto` | Any | Probes the system for available HW encoders. Falls back to `libsvtav1` if no HW encoder is found | `--dataset.vcodec=auto` |
> [!NOTE]
> In order to use the HW accelerated encoders you might need to upgrade your GPU drivers.
> [!NOTE]
> `libsvtav1` is the default because it provides the best training performance; other vcodecs can reduce CPU usage and be faster, but they typically produce larger files and may affect training time.
## 5. Troubleshooting
| Symptom | Likely Cause | Fix |
| ------------------------------------------------------------------ | -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| System freezes or choppy robot movement or Rerun visualization lag | CPU starved (100% load usage) | Close other apps, reduce encoding throughput, lower `encoder_threads`, use `h264`, use `display_data=False`. If the CPU continues to be at 100% then it might be insufficient for your setup, consider `--dataset.streaming_encoding=false` or HW encoding (`--dataset.vcodec=auto`) |
| "Encoder queue full" warnings or dropped frames in dataset | Encoder can't keep up (Queue overflow) | If CPU is not at 100%: Increase `encoder_threads`, increase `encoder_queue_maxsize` or use HW encoding (`--dataset.vcodec=auto`). |
| High RAM usage | Queue filling faster than encoding | `encoder_threads` too low or CPU insufficient. Reduce `encoder_queue_maxsize` or use HW encoding |
| Large video files | Using HW encoder or H.264 | Expected trade-off. Switch to `libsvtav1` if CPU allows |
| `save_episode()` still slow | `streaming_encoding` is `False` | Set `--dataset.streaming_encoding=true` |
| Encoder thread crash | Codec not available or invalid settings | Check `vcodec` is installed, try `--dataset.vcodec=auto` |
| Recorded dataset is missing frames | CPU/GPU starvation or occasional load spikes | If ~5% of frames are missing, your system is likely overloaded — follow the recommendations above. If fewer frames are missing (~2%), they are probably due to occasional transient load spikes (often at startup) and can be considered expected. |
## 6. Recommended Configurations
These estimates are conservative; we recommend testing them on your setup—start with a low load and increase it gradually.
### High-End Systems: modern 12+ cores (24+ threads)
A throughput between ~250-500M px/sec should be comfortable in CPU. For even better results try HW encoding if available.
```bash
# 3camsx 1280x720x3 @30fps: Defaults work well. Optionally increase encoder parallelism.
# 2camsx 1920x1080x3 @30fps: Defaults work well. Optionally increase encoder parallelism.
lerobot-record --dataset.encoder_threads=5 ...
# 3camsx 1920x1080x3 @30fps: Might require some tuning.
```
### Mid-Range Systems: modern 8+ cores (16+ threads) or Apple Silicon
A throughput between ~80-300M px/sec should be possible in CPU.
```bash
# 3camsx 640x480x3 @30fps: Defaults work well. Optionally decrease encoder parallelism.
# 2camsx 1280x720x3 @30fps: Defaults work well. Optionally decrease encoder parallelism.
lerobot-record --dataset.encoder_threads=2 ...
# 2camsx 1920x1080x3 @30fps: Might require some tuning.
```
### Low-Resource Systems: modern 4+ cores (8+ threads) or Raspberry Pi 5
On very constrained systems, streaming encoding may compete too heavily with the capture loop. Disabling it falls back to the PNG-based approach where encoding happens between episodes (blocking, but doesn't interfere with capture). Alternatively, record at a lower throughput to reduce both capture and encoding load. Consider also changing codec to `h264` and using batch encoding.
```bash
# 2camsx 640x480x3 @30fps: Requires some tuning.
# Use H.264, disable streaming, consider batching encoding
lerobot-record --dataset.vcodec=h264 --dataset.streaming_encoding=false ...
```
## 7. Closing note
Performance ultimately depends on your exact setup — frames-per-second, resolution, CPU cores and load, available memory, episode length, and the encoder you choose. Always test with your target workload, be mindful about your CPU & system capabilities and tune `encoder_threads`, `encoder_queue_maxsize`, and
`vcodec` reasonably. That said, a common practical configuration (for many applications) is three cameras at 640×480x3 @30fps; this usually runs fine with the default streaming video encoding settings in modern systems. Always verify your recorded dataset is healthy by comparing the video duration to the CLI episode duration and confirming the row count equals FPS × CLI duration.
-227
View File
@@ -1,227 +0,0 @@
# UMI Data with pi0 Relative EE Actions
This guide explains how to train a pi0 policy with UMI-style relative end-effector (EE) actions and deploy it on a real OpenArm robot.
**What we will do:**
1. Prepare the dataset (EE pose + gripper in the action column).
2. Recompute statistics for relative actions.
3. Train pi0 with `derive_state_from_action=true`.
4. Evaluate the trained policy on a real robot.
## Background
[UMI (Universal Manipulation Interface)](https://umi-gripper.github.io) collects manipulation data with hand-held grippers, recovering 6-DoF EE poses via SLAM. The key insight from UMI (Chi et al., 2024) is that the action space must include **both EE trajectory and gripper width**, and actions should be expressed as **relative trajectories** (offsets from the current pose).
### Dataset layout
The dataset should have this structure:
| Feature | Shape | Content |
| ------------------------- | --------- | -------------------------------------------------------- |
| `observation.images.cam0` | `[3,H,W]` | Wrist camera image |
| `action` | `[8]` | `[x, y, z, ax, ay, az, proximal, distal]` (EE + gripper) |
No separate `observation.pose` or `observation.joints` columns are needed — the model derives its proprioception state directly from the action column (`derive_state_from_action=true`).
### Why relative actions?
With relative actions, each action in a chunk is an **offset from the current state** rather than an absolute target:
```
relative_action[i] = absolute_action[t + i] state[t]
```
UMI ablations show this is critical: absolute actions achieve only 25% success vs 100% for relative trajectory on the cup arrangement task. Compared to delta actions (each step relative to the previous), relative trajectory avoids error accumulation. See the [Action Representations](action_representations) guide for details.
### `derive_state_from_action`
When `derive_state_from_action=true`, pi0 derives `observation.state` from the action column during training — no separate state column needed. Under the hood:
- `action_delta_indices` extends to `[-1, 0, 1, ..., chunk_size-1]` (one extra leading timestep).
- `DeriveStateFromActionStep` extracts `[action[t-1], action[t]]` as a 2-step state and strips the extra timestep from the action chunk.
- `RelativeActionsProcessorStep` converts actions to offsets from `state[t]`.
- `RelativeStateProcessorStep` converts the 2-step state to relative proprioception (velocity + zeros) and flattens.
This implies `use_relative_state=true` and `state_obs_steps=2`.
During **inference**, `DeriveStateFromActionStep` is a no-op — state comes from the robot via forward kinematics. `RelativeStateProcessorStep` buffers the previous state and applies the same conversion automatically.
## Step 1: Recompute Stats
After preparing the dataset with EE pose in the action column, recompute statistics with `derive_state_from_action=true`. This computes relative action and state stats so the normalizer sees offset distributions:
```bash
lerobot-edit-dataset \
--repo-id=glannuzel/grabette-dataset \
--operation=recompute_stats \
--operation.relative_action=true \
--operation.relative_exclude_joints='["proximal", "distal"]' \
--operation.derive_state_from_action=true \
--operation.chunk_size=30 \
--push_to_hub=true
```
| Flag | Purpose |
| ------------------------------- | ------------------------------------------------------------------------------- |
| `relative_action=true` | Compute stats on `action state` (relative actions) |
| `relative_exclude_joints` | Keep gripper dims absolute (they don't benefit from relative encoding) |
| `derive_state_from_action=true` | Derive state from action column (implies `relative_state`, `state_obs_steps=2`) |
| `chunk_size=30` | Must match training chunk size |
## Step 2: Train
```bash
#!/bin/bash
set -euo pipefail
export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:${LD_LIBRARY_PATH:-}
DATASET="glannuzel/grabette-dataset"
NUM_PROCESSES=8
echo "=== Training pi0 on $DATASET (UMI relative EE, ${NUM_PROCESSES} GPUs) ==="
accelerate launch --multi_gpu --num_processes=$NUM_PROCESSES \
-m lerobot.scripts.lerobot_train \
--dataset.repo_id="$DATASET" \
--dataset.video_backend=pyav \
--policy.type=pi0 \
--policy.pretrained_path=lerobot/pi0_base \
--policy.repo_id=pepijn/grabette-umi-pi0 \
--policy.chunk_size=30 \
--policy.n_action_steps=30 \
--policy.derive_state_from_action=true \
--use_relative_actions=true \
--policy.relative_exclude_joints='["proximal", "distal"]' \
--batch_size=32 \
--steps=5000 \
--policy.scheduler_decay_steps=5000 \
--policy.dtype=bfloat16 \
--policy.compile_model=false \
--policy.gradient_checkpointing=true \
--policy.device=cuda \
--output_dir=/fsx/pepijn/outputs/grabette-umi \
--job_name=grabette-umi-v2 \
--wandb.enable=true \
--wandb.disable_artifact=true \
--wandb.project=grabette-umi \
--log_freq=100 \
--save_freq=5000
```
Key flags:
| Flag | Purpose |
| ------------------------------- | ---------------------------------------------------------------------- |
| `derive_state_from_action=true` | Derive proprioception from action column (full UMI mode) |
| `use_relative_actions=true` | Actions are offsets from current state |
| `relative_exclude_joints` | `["proximal", "distal"]` — gripper stays absolute, EE pose is relative |
| `chunk_size=30` | Action horizon: 30 steps (~0.65s at 46 FPS) |
| `n_action_steps=30` | Execute full chunk before replanning |
Note: `derive_state_from_action=true` automatically implies `use_relative_state=true` and `state_obs_steps=2`. No `rename_map` is needed since there are no separate observation columns to rename.
## Step 3: Evaluate
The evaluation script in `examples/umi_pi0_relative_ee/evaluate.py` runs inference on a real OpenArm robot:
```bash
python examples/umi_pi0_relative_ee/evaluate.py
```
Edit `HF_MODEL_ID`, camera index, and robot configuration at the top of the file.
### How inference works
At inference, the training dataset has no `observation.state` — it was derived from actions. The evaluate script provides `observation.state` from the robot via forward kinematics:
1. **Robot → FK** — Arm joint positions → EE pose `[x,y,z,ax,ay,az]`, gripper → `[proximal, distal]`. Combined into `observation.state` (8D).
2. **Preprocessor** (loaded from checkpoint) — `DeriveStateFromActionStep` is a no-op. `RelativeStateProcessorStep` buffers previous state, stacks `[prev, current]`, subtracts current → velocity info. `RelativeActionsProcessorStep` caches state. `NormalizerProcessorStep` normalizes.
3. **pi0 inference** — Predicts normalized relative action chunk (30 steps).
4. **Postprocessor** — `UnnormalizerProcessorStep` unnormalizes, `AbsoluteActionsProcessorStep` adds cached state → absolute EE targets.
5. **IK → Robot** — Absolute `[x,y,z,ax,ay,az]` → arm joint targets with full 6-DOF IK (orientation weight = 1.0). `[proximal, distal]` → direct gripper position commands.
### Latency compensation
Set `LATENCY_SKIP_STEPS` to skip the first few predicted action steps, compensating for system latency:
```python
LATENCY_SKIP_STEPS = 7 # ceil(total_latency_ms / (1000 / FPS))
```
At 46 FPS (~22ms/step) with ~150ms total latency: `ceil(150/22) ≈ 7`. Start with 0 for a safe first test.
## Replay Viewer
Visualize any dataset episode in a browser-based 3D viewer before running on hardware. The viewer shows the EE trajectory overlaid on the OpenArm URDF model.
### Quick start
```bash
python examples/umi_pi0_relative_ee/replay.py
```
### Options
| Flag | Default | Description |
| ----------- | ---------------------------- | ------------------------------------ |
| `--repo-id` | `glannuzel/grabette-dataset` | HuggingFace dataset repo to load |
| `--episode` | `0` | Episode index to replay |
| `--port` | `8765` | HTTP server port |
| `--force` | off | Re-extract trajectory even if cached |
### Viewer controls
The panel in the top-left corner shows live EE coordinates and gripper state. Transport controls:
- **Play / Pause** — toggle automatic playback.
- **Step buttons** (◀ ▶) — advance or rewind one frame.
- **Reset** (⟳) — jump to frame 0.
- **Scrubber** — drag to seek.
- **Speed selector** — 0.25× to 4× playback speed.
### Color legend
| Color | Meaning |
| ------------------ | --------------------------------------------- |
| Red sphere | Current EE position |
| Yellow trail | Past trajectory |
| Dark trail | Future trajectory |
| Orange ring + axes | URDF `ee_target` frame (zero-joint reference) |
## How the Pieces Fit Together
```
Training (derive_state_from_action=true):
DataLoader loads action: [B, 31, 8] (chunk_size=30 + 1 leading)
→ DeriveStateFromActionStep
state = action[:, :2, :] → [B, 2, 8]
action = action[:, 1:, :] → [B, 30, 8]
→ RelativeActionsProcessorStep (action -= state[:, -1, :])
→ RelativeStateProcessorStep (state offsets from current, flatten → [B, 16])
→ NormalizerProcessorStep → pi0 model
Inference:
arm joints → FK → observation.state [8D: x,y,z,ax,ay,az,prox,dist]
DeriveStateFromActionStep (no-op)
RelativeActionsProcessorStep (caches state)
RelativeStateProcessorStep (buffers prev, stacks, subtracts, flattens)
NormalizerProcessorStep → pi0 model → relative action chunk [30, 8]
UnnormalizerProcessorStep
AbsoluteActionsProcessorStep (+ cached state → absolute EE)
IK → joint targets → robot
```
## References
- [UMI: Universal Manipulation Interface](https://umi-gripper.github.io) — Chi et al., 2024. Defines relative trajectory actions.
- [Action Representations](action_representations) — LeRobot guide comparing absolute, relative, and delta actions.
- [pi0 documentation](pi0) — Full pi0 configuration including `use_relative_actions`.
- [`examples/so100_to_so100_EE/`](https://github.com/huggingface/lerobot/tree/main/examples/so100_to_so100_EE) — EE-space evaluation example this builds on.
+200 -201
View File
@@ -1,72 +1,23 @@
# Unitree G1
<img
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/unitree_thumbnail.jpg"
alt="Unitree G1 locomanipulation demo"
style={{ width: "100%" }}
/>
This guide covers the complete setup process for the Unitree G1 humanoid, from initial connection to running gr00t_wbc locomotion.
The Unitree G1 humanoid is now supported in LeRobot! You can teleoperate, train locomanipulation policies, test in sim, and more. Both 29 and 23 DoF variants are supported.
## About
We support both 29 and 23 DOF G1 EDU version. We introduce:
- **`unitree g1` robot class, handling low level read/write from/to the humanoid**
- **ZMQ socket bridge** for remote communication and camera streaming, allowing for remote policy deployment over wlan, eth or directly on the robot
- **Locomotion policies** from NVIDIA gr00t and Amazon FAR Holosoma
- **Simulation mode** for testing policies without the physical robot in mujoco
---
## Part 1: Getting Started
## Connection guide
### Install the Unitree SDK
### Step 1: Configure Ethernet Interface
Follow the [unitree_sdk2_python installation guide](https://github.com/unitreerobotics/unitree_sdk2_python#installation). Tested with `unitree_sdk2py==1.0.1` and `cyclonedds==0.10.2`:
```bash
conda create -y -n lerobot python=3.12
conda activate lerobot
git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
cd unitree_sdk2_python
pip install -e .
cd ..
```
### Install LeRobot
```bash
conda install ffmpeg -c conda-forge
conda install -c conda-forge "pinocchio>=3.0.0,<4.0.0"
git clone https://github.com/huggingface/lerobot.git
cd lerobot
pip install -e '.[unitree_g1]'
```
<Tip>
For now, pinocchio must be installed from conda-forge (not pip) to include the
CasADi bindings needed for arm IK.
</Tip>
### Test the Installation (Simulation)
The simulation environment has its own dependencies. Check the Simulation environment dependencies: [Unitree G1 Mujoco EnvHub](https://huggingface.co/lerobot/unitree-g1-mujoco/tree/main).
```bash
pip install mujoco loguru msgpack msgpack-numpy
```
```bash
lerobot-teleoperate \
--robot.type=unitree_g1 \
--robot.is_simulation=true \
--teleop.type=unitree_g1 \
--teleop.id=wbc_unitree \
--robot.cameras='{"global_view": {"type": "zmq", "server_address": "localhost", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30, "warmup_s": 5}}' \
--display_data=true \
--robot.controller=GrootLocomotionController
```
This will launch a [MuJoCo sim instance](https://huggingface.co/lerobot/unitree-g1-mujoco/tree/main) for the G1. You can connect a gamepad to your machine before launching in order to control the robot's locomotion in sim. We support both [HolosomaLocomotionController](https://github.com/amazon-far/holosoma) and [GrootLocomotionController](https://github.com/NVlabs/GR00T-WholeBodyControl) via `--robot.controller`.
- Press `9` to release the robot
- Press `7` / `8` to increase / decrease waist height
### Connect to the Physical Robot
The G1's Ethernet IP is fixed at `192.168.123.164`. Your machine must have a static IP on the same subnet: `192.168.123.x` where `x ≠ 164`.
Set a static IP on the same subnet as the robot:
```bash
# Replace 'enp131s0' with your ethernet interface name (check with `ip a`)
@@ -75,23 +26,47 @@ sudo ip addr add 192.168.123.200/24 dev enp131s0
sudo ip link set enp131s0 up
```
### SSH into the Robot
**Note**: The G1's Ethernet IP is fixed at `192.168.123.164`. Your computer must use `192.168.123.x` with x ≠ 164.
### Step 2: SSH into the Robot
```bash
ssh unitree@192.168.123.164
# Password: 123
```
### Share Internet via Ethernet
You should now be connected to the G1's Orin.
The G1 needs internet access to clone repos and install packages. Share your laptop's connection over Ethernet:
---
## Part 2: Enable WiFi on the Robot
Wlan0 is disabled by default on the G1. To enable it:
### Step 1: Enable WiFi Hardware
```bash
sudo rfkill unblock wifi
sudo rfkill unblock all
# Bring up wlan0
sudo ip link set wlan0 up
# Enable NetworkManager control of wlan0
sudo nmcli radio wifi on
sudo nmcli device set wlan0 managed yes
sudo systemctl restart NetworkManager
```
### Step 2: Enable Internet Forwarding
**On your laptop:**
```bash
# Enable IP forwarding
sudo sysctl -w net.ipv4.ip_forward=1
# Replace wlp132s0f0 with your WiFi interface name
# Set up NAT (replace wlp132s0f0 with your WiFi interface)
sudo iptables -t nat -A POSTROUTING -o wlp132s0f0 -s 192.168.123.0/24 -j MASQUERADE
sudo iptables -A FORWARD -i wlp132s0f0 -o enp131s0 -m state --state RELATED,ESTABLISHED -j ACCEPT
sudo iptables -A FORWARD -i enp131s0 -o wlp132s0f0 -j ACCEPT
@@ -100,193 +75,217 @@ sudo iptables -A FORWARD -i enp131s0 -o wlp132s0f0 -j ACCEPT
**On the G1:**
```bash
# Add laptop as default gateway
sudo ip route del default 2>/dev/null || true
sudo ip route add default via 192.168.123.200 dev eth0
echo "nameserver 8.8.8.8" | sudo tee /etc/resolv.conf
# Verify
# Test connection
ping -c 3 8.8.8.8
```
### Install the Unitree SDK on the G1
Follow the [unitree_sdk2_python installation guide](https://github.com/unitreerobotics/unitree_sdk2_python#installation):
```bash
conda create -y -n lerobot python=3.12
conda activate lerobot
git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
cd unitree_sdk2_python
python -m pip install -e .
cd ..
```
### Install LeRobot on the G1
```bash
git clone https://github.com/huggingface/lerobot.git
cd lerobot
conda install -c conda-forge "pinocchio>=3.0.0,<4.0.0"
python -m pip install -e '.[unitree_g1]'
```
<Tip>
For now, pinocchio must be installed from conda-forge (not pip) to include the
CasADi bindings needed for arm IK.
</Tip>
### (Optional) Enable WiFi on the Robot
For wireless SSH access, you can enable WiFi on the G1 (it's blocked by default):
```bash
sudo rfkill unblock all
sudo ip link set wlan0 up
sudo nmcli radio wifi on
sudo nmcli device set wlan0 managed yes
sudo systemctl restart NetworkManager
```
**Connect to a WiFi network:**
### Step 3: Connect to WiFi Network
```bash
# List available networks
nmcli device wifi list
# Connect to your WiFi (example)
sudo nmcli connection add type wifi ifname wlan0 con-name "YourNetwork" ssid "YourNetwork"
sudo nmcli connection modify "YourNetwork" wifi-sec.key-mgmt wpa-psk
sudo nmcli connection modify "YourNetwork" wifi-sec.psk "YourPassword"
sudo nmcli connection modify "YourNetwork" connection.autoconnect yes
sudo nmcli connection up "YourNetwork"
# Check WiFi IP address
ip a show wlan0
```
You can then SSH over WiFi instead of Ethernet:
### Step 4: SSH Over WiFi
Once connected to WiFi, note the robot's IP address and disconnect the Ethernet cable. You can now SSH over WiFi:
```bash
ssh unitree@<ROBOT_WIFI_IP>
ssh unitree@<YOUR_ROBOT_IP>
# Password: 123
```
---
## Part 2: Teleoperation & Locomotion
### Run the Robot Server
On the robot (from `~/lerobot`):
```bash
cd ~/lerobot
python src/lerobot/robots/unitree_g1/run_g1_server.py --camera
```
### Run the Locomotion Policy
You can run the teleoperation client from your laptop over Ethernet, over WiFi (experimental), or directly on the robot itself. Mind potential latency introduced by your network.
**From your laptop:**
```bash
lerobot-teleoperate \
--robot.type=unitree_g1 \
--robot.is_simulation=false \
--robot.robot_ip=<ROBOT_IP> \
--teleop.type=unitree_g1 \
--teleop.id=wbc_unitree \
--robot.cameras='{"global_view": {"type": "zmq", "server_address": "<ROBOT_IP>", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30}}' \
--display_data=true \
--robot.controller=HolosomaLocomotionController
```
We support both [GrootLocomotionController](https://github.com/NVlabs/GR00T-WholeBodyControl) and [HolosomaLocomotionController](https://github.com/amazon-far/holosoma) via `--robot.controller`.
Replace `<YOUR_ROBOT_IP>` with your robot's actual WiFi IP address.
---
## Part 3: Loco-Manipulation with the Homunculus Exoskeleton
## Part 3: Robot Server Setup
We provide a loco-manipulation solution via the Homunculus Exoskeleton — an open-source 7 DoF exoskeleton for whole-body control. Check it out [here](https://github.com/nepyope/hmc_exo).
### Step 1: Install LeRobot on the Orin
### Calibrate
SSH into the robot and install LeRobot:
```bash
ssh unitree@<YOUR_ROBOT_IP>
conda create -y -n lerobot python=3.10
conda activate lerobot
git clone https://github.com/huggingface/lerobot.git
cd lerobot
pip install -e '.[unitree_g1]'
git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
cd unitree_sdk2_python && pip install -e .
```
**Note**: The Unitree SDK requires CycloneDDS v0.10.2 to be installed. See the [Unitree SDK documentation](https://github.com/unitreerobotics/unitree_sdk2_python) for details.
### Step 2: Run the Robot Server
On the robot:
```bash
python src/lerobot/robots/unitree_g1/run_g1_server.py
```
**Important**: Keep this terminal running. The server must be active for remote control.
---
## Part 4: Controlling the robot
With the robot server running, you can now control the robot remotely. Let's launch a locomotion policy
### Step 1: Install LeRobot on your machine
```bash
conda create -y -n lerobot python=3.10
conda activate lerobot
git clone https://github.com/huggingface/lerobot.git
cd lerobot
pip install -e '.[unitree_g1]'
git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
cd unitree_sdk2_python && pip install -e .
```
### Step 2: Update Robot IP in Config
Edit the config file to match your robot's WiFi IP:
```python
# In src/lerobot/robots/unitree_g1/config_unitree_g1.py
robot_ip: str = "<YOUR_ROBOT_IP>" # Replace with your robot's WiFi IP.
```
### Step 3: Run the Locomotion Policy
```bash
# Run GR00T locomotion controller
python examples/unitree_g1/gr00t_locomotion.py --repo-id "nepyope/GR00T-WholeBodyControl_g1"
# Run Holosoma locomotion controller
python examples/unitree_g1/holosoma_locomotion.py
```
Press `Ctrl+C` to stop the policy.
---
## Running in Simulation Mode (MuJoCo)
You can test policies before deploying on the physical robot using MuJoCo simulation. Set `is_simulation=True` in config or pass `--robot.is_simulation=true` via CLI.
### Calibrate Exoskeleton Teleoperator
```bash
lerobot-calibrate \
--teleop.type=unitree_g1 \
--teleop.left_arm_config.port=/dev/ttyACM1 \
--teleop.right_arm_config.port=/dev/ttyACM0 \
--teleop.id=exo
--teleop.type=unitree_g1 \
--teleop.left_arm_config.port=/dev/ttyACM1 \
--teleop.right_arm_config.port=/dev/ttyACM0 \
--teleop.id=exo
```
During calibration move each joint through its entire range. After fitting, move the joint in a neutral position and press `n` to advance.
### Record a Dataset
### Teleoperate in Simulation
```bash
lerobot-record \
--robot.type=unitree_g1 \
--robot.is_simulation=true \
--robot.cameras='{"global_view": {"type": "zmq", "server_address": "localhost", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30}}' \
--teleop.type=unitree_g1 \
--teleop.left_arm_config.port=/dev/ttyACM1 \
--teleop.right_arm_config.port=/dev/ttyACM0 \
--teleop.id=exo \
--dataset.repo_id=your-username/dataset-name \
--dataset.single_task="Test" \
--dataset.num_episodes=2 \
--dataset.episode_time_s=5 \
--dataset.reset_time_s=5 \
--dataset.push_to_hub=true \
--dataset.streaming_encoding=true \
--dataset.encoder_threads=2
lerobot-teleoperate \
--robot.type=unitree_g1 \
--robot.is_simulation=true \
--teleop.type=unitree_g1 \
--teleop.left_arm_config.port=/dev/ttyACM1 \
--teleop.right_arm_config.port=/dev/ttyACM0 \
--teleop.id=exo \
--fps=100
```
> **Note:** Omit `--teleop.left_arm_config.port` and `--teleop.right_arm_config.port` if you're only using the joystick.
### Record Dataset in Simulation
Example dataset: [nepyope/unitree_box_move_blue_full](https://huggingface.co/datasets/nepyope/unitree_box_move_blue_full)
```bash
python -m lerobot.scripts.lerobot_record \
--robot.type=unitree_g1 \
--robot.is_simulation=true \
--robot.cameras='{"global_view": {"type": "zmq", "server_address": "localhost", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30}}' \
--teleop.type=unitree_g1 \
--teleop.left_arm_config.port=/dev/ttyACM1 \
--teleop.right_arm_config.port=/dev/ttyACM0 \
--teleop.id=exo \
--dataset.repo_id=your-username/dataset-name \
--dataset.single_task="Test" \
--dataset.num_episodes=2 \
--dataset.episode_time_s=5 \
--dataset.reset_time_s=5 \
--dataset.push_to_hub=true
```
Example simulation dataset: [nepyope/teleop_test_sim](https://huggingface.co/datasets/nepyope/teleop_test_sim)
---
## Part 4: Training & Inference
## Running on Real Robot
### Train
Once the robot server is running on the G1 (see Part 3), you can teleoperate and record on the real robot.
### Start the Camera Server
On the robot, start the ZMQ image server:
```bash
python src/lerobot/scripts/lerobot_train.py \
--dataset.repo_id=your-username/dataset-name \
--policy.type=pi05 \
--output_dir=./outputs/pi05_training \
--job_name=pi05_training \
--policy.repo_id=your-username/your-repo-id \
--policy.pretrained_path=lerobot/pi05_base \
--policy.compile_model=true \
--policy.gradient_checkpointing=true \
--wandb.enable=true \
--policy.dtype=bfloat16 \
--policy.freeze_vision_encoder=false \
--policy.train_expert_only=false \
--steps=3000 \
--policy.device=cuda \
--batch_size=32
python src/lerobot/cameras/zmq/image_server.py
```
### Inference with RTC
Keep this running in a separate terminal for camera streaming during recording.
Once trained, we recommend deploying policies using inference-time RTC:
### Teleoperate Real Robot
```bash
python examples/rtc/eval_with_real_robot.py \
--policy.path=your-username/your-repo-id \
--policy.device=cuda \
--robot.type=unitree_g1 \
--robot.is_simulation=false \
--robot.controller=HolosomaLocomotionController \
--robot.cameras='{"global_view": {"type": "zmq", "server_address": "<ROBOT_IP>", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30}}' \
--task="task_description" \
--duration=1000 \
--fps=30 \
--rtc.enabled=true
lerobot-teleoperate \
--robot.type=unitree_g1 \
--robot.is_simulation=false \
--teleop.type=unitree_g1 \
--teleop.left_arm_config.port=/dev/ttyACM1 \
--teleop.right_arm_config.port=/dev/ttyACM0 \
--teleop.id=exo \
--fps=100
```
### Record Dataset on Real Robot
```bash
python -m lerobot.scripts.lerobot_record \
--robot.type=unitree_g1 \
--robot.is_simulation=false \
--robot.cameras='{"global_view": {"type": "zmq", "server_address": "172.18.129.215", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30}}' \
--teleop.type=unitree_g1 \
--teleop.left_arm_config.port=/dev/ttyACM1 \
--teleop.right_arm_config.port=/dev/ttyACM0 \
--teleop.id=exo \
--dataset.repo_id=your-username/dataset-name \
--dataset.single_task="Test" \
--dataset.num_episodes=2 \
--dataset.episode_time_s=5 \
--dataset.reset_time_s=5 \
--dataset.push_to_hub=true
```
**Note**: Update `server_address` to match your robot's camera server IP.
Example real robot dataset: [nepyope/teleop_test_real](https://huggingface.co/datasets/nepyope/teleop_test_real)
---
## Additional Resources
@@ -295,8 +294,8 @@ python examples/rtc/eval_with_real_robot.py \
- [GR00T-WholeBodyControl](https://github.com/NVlabs/GR00T-WholeBodyControl)
- [Holosoma](https://github.com/amazon-far/holosoma)
- [LeRobot Documentation](https://github.com/huggingface/lerobot)
- [Unitree IL LeRobot](https://github.com/unitreerobotics/unitree_IL_lerobot)
- [Unitree_IL_Lerobot](https://github.com/unitreerobotics/unitree_IL_lerobot)
---
_Last updated: March 2026_
_Last updated: December 2025_
-25
View File
@@ -12,7 +12,6 @@ LeRobot provides several utilities for manipulating datasets:
4. **Add Features** - Add new features to a dataset
5. **Remove Features** - Remove features from a dataset
6. **Convert to Video** - Convert image-based datasets to video format for efficient storage
7. **Show the Info of Datasets** - Show the summary of datasets information such as number of episode etc.
The core implementation is in `lerobot.datasets.dataset_tools`.
An example script detailing how to use the tools API is available in `examples/dataset/use_dataset_tools.py`.
@@ -157,30 +156,6 @@ lerobot-edit-dataset \
**Note:** The resulting dataset will be a proper LeRobotDataset with all cameras encoded as videos in the `videos/` directory, with parquet files containing only metadata (no raw image data). All episodes, stats, and tasks are preserved.
### Show the information of datasets
Show the information of datasets such as number of episode, number of frame, File size and so on.
No change will be made to the dataset
```bash
# Show dataset information without feature details
lerobot-edit-dataset \
--repo_id lerobot/pusht_image \
--operation.type info \
# Show dataset information with feature details
lerobot-edit-dataset \
--repo_id lerobot/pusht_image \
--operation.type info \
--operation.show_features true
```
**Parameters:**
- `parameters`: The flag to control show or no show dataset information with feature details.(default=false)
### Push to Hub
Add the `--push_to_hub true` flag to any command to automatically upload the resulting dataset to the Hugging Face Hub:
+1 -1
View File
@@ -45,7 +45,7 @@ policy.type=wall_x
For training WallX, you can use the standard LeRobot training script with the appropriate configuration:
```bash
lerobot-train \
python src/lerobot/scripts/lerobot_train.py \
--dataset.repo_id=your_dataset \
--policy.type=wall_x \
--output_dir=./outputs/wallx_training \
+1 -1
View File
@@ -154,7 +154,7 @@ lerobot-train \
```bash
lerobot-train \
--dataset.repo_id=<USER>/bimanual-so100-handover-cube \
--dataset.repo_id=pepijn223/bimanual-so100-handover-cube \
--output_dir=./outputs/xvla_bimanual \
--job_name=xvla_so101_training \
--policy.path="lerobot/xvla-base" \
+3 -3
View File
@@ -22,7 +22,7 @@ lerobot-replay \
--robot.type=so100_follower \
--robot.port=/dev/tty.usbmodem58760431541 \
--robot.id=black \
--dataset.repo_id=<USER>/record-test \
--dataset.repo_id=aliberts/record-test \
--dataset.episode=2
```
"""
@@ -57,7 +57,7 @@ class DatasetReplayConfig:
repo_id: str
# Episode to replay.
episode: int
# Root directory where the dataset will be stored (e.g. 'dataset/path'). If None, defaults to $HF_LEROBOT_HOME/repo_id.
# Root directory where the dataset will be stored (e.g. 'dataset/path').
root: str | Path | None = None
# Limit the frames per second. By default, uses the policy fps.
fps: int = 30
@@ -78,7 +78,7 @@ def replay(cfg: ReplayConfig):
robot = make_robot_from_config(cfg.robot)
dataset = LeRobotDataset(cfg.dataset.repo_id, root=cfg.dataset.root, episodes=[cfg.dataset.episode])
actions = dataset.select_columns(ACTION)
actions = dataset.hf_dataset.select_columns(ACTION)
robot.connect()
try:
+4 -4
View File
@@ -32,8 +32,7 @@ import torch
from huggingface_hub import HfApi
import lerobot
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
def main():
@@ -88,8 +87,9 @@ def main():
# The previous metadata class is contained in the 'meta' attribute of the dataset:
print(dataset.meta)
# You can inspect the dataset using its repr:
print(dataset)
# LeRobotDataset actually wraps an underlying Hugging Face dataset
# (see https://huggingface.co/docs/datasets for more information).
print(dataset.hf_dataset)
# LeRobot datasets also subclasses PyTorch datasets so you can do everything you know and love from working
# with the latter, like iterating through the dataset.
-490
View File
@@ -1,490 +0,0 @@
#!/usr/bin/env python
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SLURM-distributed SARM RA-BC annotation pipeline.
Computes SARM progress values for all frames in a dataset, distributed across
SLURM workers, then merges the shards into a single sarm_progress.parquet.
Two subcommands, each a separate SLURM submission:
compute N workers, each computes progress for a subset of episodes
aggregate 1 worker, merges N shards into sarm_progress.parquet, pushes to hub
Usage:
python slurm_compute_rabc.py compute \\
--repo-id user/dataset --reward-model-path user/sarm_model \\
--stride 10 --device cpu --workers 50 --partition cpu
python slurm_compute_rabc.py aggregate \\
--repo-id user/dataset --reward-model-path user/sarm_model \\
--partition cpu --push-to-hub
"""
import argparse
from pathlib import Path
from datatrove.executor import LocalPipelineExecutor
from datatrove.executor.slurm import SlurmPipelineExecutor
from datatrove.pipeline.base import PipelineStep
class ComputeProgressShards(PipelineStep):
"""Each worker computes SARM progress for its assigned episodes."""
def __init__(
self, repo_id, reward_model_path, stride=1, head_mode="sparse", device="cpu", shard_dir="rabc_shards"
):
super().__init__()
if stride < 1:
raise ValueError(f"stride must be >= 1, got {stride}")
self.repo_id = repo_id
self.reward_model_path = reward_model_path
self.stride = stride
self.head_mode = head_mode
self.device = device
self.shard_dir = shard_dir
def run(self, data=None, rank: int = 0, world_size: int = 1):
import logging
from pathlib import Path
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq
import torch
from tqdm import tqdm
from lerobot.policies.sarm.compute_rabc_weights import (
generate_all_frame_indices,
interpolate_progress,
load_sarm_resources,
)
from lerobot.utils.utils import init_logging
init_logging()
dataset, reward_model, preprocess = load_sarm_resources(
self.repo_id,
self.reward_model_path,
self.device,
)
if hasattr(preprocess, "eval"):
preprocess.eval()
for step in preprocess.steps:
if hasattr(step, "eval"):
step.eval()
image_key = reward_model.config.image_key
state_key = reward_model.config.state_key
frame_gap = reward_model.config.frame_gap
center_idx = reward_model.config.n_obs_steps // 2
dual_mode = reward_model.config.uses_dual_heads
compute_sparse = self.head_mode in ("sparse", "both") or not dual_mode
compute_dense = self.head_mode in ("dense", "both") and dual_mode
my_episodes = list(range(dataset.num_episodes))[rank::world_size]
if not my_episodes:
logging.info(f"Rank {rank}: no episodes assigned")
return
logging.info(f"Rank {rank}: {len(my_episodes)} / {dataset.num_episodes} episodes")
all_rows = []
for ep_idx in tqdm(my_episodes, desc=f"Rank {rank}"):
ep = dataset.meta.episodes[ep_idx]
ep_start, ep_end = ep["dataset_from_index"], ep["dataset_to_index"]
task = dataset[ep_start].get("task", "perform the task")
all_ep_indices = generate_all_frame_indices(ep_start, ep_end, frame_gap)
if self.stride > 1:
compute_indices = [i for i in all_ep_indices if (i - ep_start) % self.stride == 0]
if (ep_end - 1) not in compute_indices:
compute_indices.append(ep_end - 1)
compute_indices = sorted(set(compute_indices))
else:
compute_indices = all_ep_indices
frame_results = {}
for qi in tqdm(compute_indices, desc=f" Ep {ep_idx}", leave=False):
try:
sample = dataset[qi]
batch = {
image_key: sample[image_key],
"task": task,
"index": qi,
"episode_index": ep_idx,
}
if state_key in sample:
batch[state_key] = sample[state_key]
with torch.no_grad():
processed = preprocess(batch)
vf = processed["video_features"].to(self.device)
tf = processed["text_features"].to(self.device)
sf = processed.get("state_features")
if sf is not None:
sf = sf.to(self.device)
lengths = processed.get("lengths")
sparse_val = dense_val = np.nan
if compute_sparse:
r = reward_model.calculate_rewards(
text_embeddings=tf,
video_embeddings=vf,
state_features=sf,
lengths=lengths,
return_all_frames=True,
head_mode="sparse",
)
sparse_val = float(r[0, center_idx] if r.ndim == 2 else r[center_idx])
if compute_dense:
r = reward_model.calculate_rewards(
text_embeddings=tf,
video_embeddings=vf,
state_features=sf,
lengths=lengths,
return_all_frames=True,
head_mode="dense",
)
dense_val = float(r[0, center_idx] if r.ndim == 2 else r[center_idx])
frame_results[qi] = (sparse_val, dense_val)
except Exception as e:
logging.warning(f"Failed frame {qi}: {e}")
if not frame_results:
logging.warning(f"Episode {ep_idx}: all frames failed, skipping")
continue
# Interpolate to all frames in this episode
computed_idx = np.array(sorted(frame_results.keys()))
all_frame_arr = np.arange(ep_start, ep_end)
sparse_vals = np.array([frame_results[i][0] for i in computed_idx]) if compute_sparse else None
dense_vals = np.array([frame_results[i][1] for i in computed_idx]) if compute_dense else None
if self.stride > 1 and len(computed_idx) > 1:
if compute_sparse:
sparse_vals = interpolate_progress(computed_idx, sparse_vals, all_frame_arr)
if compute_dense:
dense_vals = interpolate_progress(computed_idx, dense_vals, all_frame_arr)
output_frames = all_frame_arr
else:
# Use only successfully computed frames to avoid indexing mismatch on failures
output_frames = computed_idx
for i, fi in enumerate(output_frames):
row = {"index": int(fi), "episode_index": ep_idx, "frame_index": int(fi - ep_start)}
if compute_sparse:
row["progress_sparse"] = float(sparse_vals[i])
if compute_dense:
row["progress_dense"] = float(dense_vals[i])
all_rows.append(row)
if all_rows:
import pandas as pd
df = pd.DataFrame(all_rows).sort_values("index").reset_index(drop=True)
table = pa.Table.from_pandas(df, preserve_index=False)
table = table.replace_schema_metadata({b"reward_model_path": self.reward_model_path.encode()})
shard_dir = Path(self.shard_dir)
shard_dir.mkdir(parents=True, exist_ok=True)
out = shard_dir / f"shard_{rank:05d}.parquet"
pq.write_table(table, out)
logging.info(f"Rank {rank}: saved {len(df)} rows to {out}")
class AggregateProgress(PipelineStep):
"""Merge all shard parquets into final sarm_progress.parquet."""
def __init__(self, repo_id, reward_model_path, shard_dir="rabc_shards", push_to_hub=False):
super().__init__()
self.repo_id = repo_id
self.reward_model_path = reward_model_path
self.shard_dir = shard_dir
self.push_to_hub = push_to_hub
def run(self, data=None, rank: int = 0, world_size: int = 1):
import datetime
import logging
import os
from pathlib import Path
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.utils.utils import init_logging
init_logging()
if rank != 0:
return
shard_dir = Path(self.shard_dir)
shards = sorted(shard_dir.glob("shard_*.parquet"))
if not shards:
raise FileNotFoundError(f"No shards found in {shard_dir}")
# Log shard modification time range to help detect stale files
mtimes = [os.path.getmtime(s) for s in shards]
oldest = datetime.datetime.fromtimestamp(min(mtimes)).isoformat(timespec="seconds")
newest = datetime.datetime.fromtimestamp(max(mtimes)).isoformat(timespec="seconds")
logging.info(f"Aggregating {len(shards)} shards (oldest: {oldest}, newest: {newest})")
df = pd.concat([pd.read_parquet(s) for s in shards], ignore_index=True)
df = df.sort_values("index").reset_index(drop=True)
table = pa.Table.from_pandas(df, preserve_index=False)
table = table.replace_schema_metadata({b"reward_model_path": self.reward_model_path.encode()})
temp_ds = LeRobotDataset(self.repo_id, download_videos=False)
out_path = Path(temp_ds.root) / "sarm_progress.parquet"
out_path.parent.mkdir(parents=True, exist_ok=True)
pq.write_table(table, out_path)
logging.info(f"Saved {len(df)} rows to {out_path}")
for col in ["progress_sparse", "progress_dense"]:
if col in df.columns:
v = df[col].dropna()
logging.info(
f"{col}: mean={v.mean():.4f} std={v.std():.4f} min={v.min():.4f} max={v.max():.4f}"
)
if self.push_to_hub:
from huggingface_hub import HfApi
api = HfApi()
hub_path = "sarm_progress.parquet"
logging.info(f"Uploading to {self.repo_id}/{hub_path}")
api.upload_file(
path_or_fileobj=str(out_path),
path_in_repo=hub_path,
repo_id=self.repo_id,
repo_type="dataset",
)
logging.info(f"Uploaded: https://huggingface.co/datasets/{self.repo_id}/blob/main/{hub_path}")
def make_compute_executor(
repo_id,
reward_model_path,
stride,
head_mode,
device,
shard_dir,
logs_dir,
job_name,
slurm,
workers,
partition,
cpus_per_task,
mem_per_cpu,
):
kwargs = {
"pipeline": [
ComputeProgressShards(repo_id, reward_model_path, stride, head_mode, device, str(shard_dir)),
],
"logging_dir": str(logs_dir / job_name),
}
if slurm:
kwargs.update(
{
"job_name": job_name,
"tasks": workers,
"workers": workers,
"time": "24:00:00",
"partition": partition,
"cpus_per_task": cpus_per_task,
"sbatch_args": {"mem-per-cpu": mem_per_cpu},
}
)
return SlurmPipelineExecutor(**kwargs)
kwargs.update({"tasks": workers, "workers": 1})
return LocalPipelineExecutor(**kwargs)
def make_aggregate_executor(
repo_id,
reward_model_path,
shard_dir,
logs_dir,
job_name,
slurm,
partition,
cpus_per_task,
mem_per_cpu,
push_to_hub,
):
kwargs = {
"pipeline": [
AggregateProgress(repo_id, reward_model_path, str(shard_dir), push_to_hub),
],
"logging_dir": str(logs_dir / job_name),
}
if slurm:
kwargs.update(
{
"job_name": job_name,
"tasks": 1,
"workers": 1,
"time": "02:00:00",
"partition": partition,
"cpus_per_task": cpus_per_task,
"sbatch_args": {"mem-per-cpu": mem_per_cpu},
}
)
return SlurmPipelineExecutor(**kwargs)
kwargs.update({"tasks": 1, "workers": 1})
return LocalPipelineExecutor(**kwargs)
def _add_shared_args(p):
p.add_argument(
"--repo-id",
type=str,
required=True,
help="Hugging Face repository identifier, e.g. 'user/dataset'.",
)
p.add_argument(
"--shard-dir",
type=Path,
default=Path("rabc_shards"),
help="Directory to read/write per-rank parquet shards.",
)
p.add_argument(
"--logs-dir",
type=Path,
default=Path("logs"),
help="Directory for datatrove logs.",
)
p.add_argument(
"--job-name",
type=str,
default=None,
help="SLURM job name (defaults to rabc_<subcommand>).",
)
p.add_argument(
"--slurm",
type=int,
default=1,
help="1 = submit via SLURM; 0 = run locally (useful for debugging).",
)
p.add_argument(
"--partition",
type=str,
default=None,
help="SLURM partition to submit to.",
)
p.add_argument(
"--cpus-per-task",
type=int,
default=4,
help="Number of CPUs per SLURM task.",
)
p.add_argument(
"--mem-per-cpu",
type=str,
default="4G",
help="Memory per CPU, e.g. '4G' or '1950M'.",
)
def main():
parser = argparse.ArgumentParser(
description="SLURM-distributed SARM RA-BC annotation pipeline",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
sub = parser.add_subparsers(dest="command", required=True)
# compute subcommand
cp = sub.add_parser(
"compute",
help="Distribute progress computation across SLURM workers.",
)
_add_shared_args(cp)
cp.add_argument(
"--reward-model-path",
type=str,
required=True,
help="Path or HF repo id of the SARM reward model.",
)
cp.add_argument(
"--stride",
type=int,
default=1,
help="Compute every Nth frame; intermediate frames are interpolated (must be >= 1).",
)
cp.add_argument(
"--head-mode",
type=str,
default="sparse",
choices=["sparse", "dense", "both"],
help="Which reward head(s) to compute.",
)
cp.add_argument(
"--device",
type=str,
default="cpu",
help="Device for reward model inference, e.g. 'cpu' or 'cuda'.",
)
cp.add_argument(
"--workers",
type=int,
default=50,
help="Number of parallel SLURM tasks (one shard per worker).",
)
# aggregate subcommand
ap = sub.add_parser(
"aggregate",
help="Merge per-rank shards into a single sarm_progress.parquet.",
)
_add_shared_args(ap)
ap.add_argument(
"--reward-model-path",
type=str,
required=True,
help="Path or HF repo id of the SARM reward model (stored in parquet metadata).",
)
ap.add_argument(
"--push-to-hub",
action="store_true",
help="Upload sarm_progress.parquet to the Hugging Face Hub after aggregation.",
)
args = parser.parse_args()
job_name = args.job_name or f"rabc_{args.command}"
kwargs = vars(args)
kwargs["slurm"] = kwargs.pop("slurm") == 1
kwargs["job_name"] = job_name
command = kwargs.pop("command")
executor = make_compute_executor(**kwargs) if command == "compute" else make_aggregate_executor(**kwargs)
executor.run()
if __name__ == "__main__":
main()
+1 -1
View File
@@ -14,8 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from lerobot.datasets.feature_utils import hw_to_dataset_features
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.utils import hw_to_dataset_features
from lerobot.policies.act.modeling_act import ACTPolicy
from lerobot.policies.factory import make_pre_post_processors
from lerobot.processor import make_default_processors
+1 -1
View File
@@ -14,8 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from lerobot.datasets.feature_utils import hw_to_dataset_features
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.utils import hw_to_dataset_features
from lerobot.processor import make_default_processors
from lerobot.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
from lerobot.robots.lekiwi.lekiwi_client import LeKiwiClient
+4 -2
View File
@@ -35,7 +35,9 @@ def main():
# Fetch the dataset to replay
dataset = LeRobotDataset("<hf_username>/<dataset_repo_id>", episodes=[EPISODE_IDX])
actions = dataset.select_columns(ACTION)
# Filter dataset to only include frames from the specified episode since episodes are chunked in dataset V3.0
episode_frames = dataset.hf_dataset.filter(lambda x: x["episode_index"] == EPISODE_IDX)
actions = episode_frames.select_columns(ACTION)
# Connect to the robot
robot.connect()
@@ -46,7 +48,7 @@ def main():
print("Starting replay loop...")
log_say(f"Replaying episode {EPISODE_IDX}")
for idx in range(dataset.num_frames):
for idx in range(len(episode_frames)):
t0 = time.perf_counter()
# Get recorded action from dataset
+3 -2
View File
@@ -16,13 +16,15 @@
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.configs.types import FeatureType, PolicyFeature
from lerobot.datasets.feature_utils import combine_feature_dicts
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
from lerobot.datasets.utils import combine_feature_dicts
from lerobot.model.kinematics import RobotKinematics
from lerobot.policies.act.modeling_act import ACTPolicy
from lerobot.policies.factory import make_pre_post_processors
from lerobot.processor import (
RobotAction,
RobotObservation,
RobotProcessorPipeline,
make_default_teleop_action_processor,
)
@@ -38,7 +40,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
InverseKinematicsEEToJoints,
)
from lerobot.scripts.lerobot_record import record_loop
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.control_utils import init_keyboard_listener
from lerobot.utils.utils import log_say
from lerobot.utils.visualization_utils import init_rerun
+2 -3
View File
@@ -15,11 +15,11 @@
# limitations under the License.
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.datasets.feature_utils import combine_feature_dicts
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
from lerobot.datasets.utils import combine_feature_dicts
from lerobot.model.kinematics import RobotKinematics
from lerobot.processor import RobotProcessorPipeline
from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
from lerobot.processor.converters import (
observation_to_transition,
robot_action_observation_to_transition,
@@ -38,7 +38,6 @@ from lerobot.scripts.lerobot_record import record_loop
from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
from lerobot.teleoperators.phone.teleop_phone import Phone
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.control_utils import init_keyboard_listener
from lerobot.utils.utils import log_say
from lerobot.utils.visualization_utils import init_rerun
+5 -4
View File
@@ -18,7 +18,7 @@ import time
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.model.kinematics import RobotKinematics
from lerobot.processor import RobotProcessorPipeline
from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
from lerobot.processor.converters import (
robot_action_observation_to_transition,
transition_to_robot_action,
@@ -27,7 +27,6 @@ from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
from lerobot.robots.so_follower.robot_kinematic_processor import (
InverseKinematicsEEToJoints,
)
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.constants import ACTION
from lerobot.utils.robot_utils import precise_sleep
from lerobot.utils.utils import log_say
@@ -67,7 +66,9 @@ def main():
# Fetch the dataset to replay
dataset = LeRobotDataset(HF_REPO_ID, episodes=[EPISODE_IDX])
actions = dataset.select_columns(ACTION)
# Filter dataset to only include frames from the specified episode since episodes are chunked in dataset V3.0
episode_frames = dataset.hf_dataset.filter(lambda x: x["episode_index"] == EPISODE_IDX)
actions = episode_frames.select_columns(ACTION)
# Connect to the robot
robot.connect()
@@ -78,7 +79,7 @@ def main():
print("Starting replay loop...")
log_say(f"Replaying episode {EPISODE_IDX}")
for idx in range(dataset.num_frames):
for idx in range(len(episode_frames)):
t0 = time.perf_counter()
# Get recorded action from dataset
+1 -2
View File
@@ -16,7 +16,7 @@
import time
from lerobot.model.kinematics import RobotKinematics
from lerobot.processor import RobotProcessorPipeline
from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
from lerobot.processor.converters import (
robot_action_observation_to_transition,
transition_to_robot_action,
@@ -31,7 +31,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
from lerobot.teleoperators.phone.teleop_phone import Phone
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.robot_utils import precise_sleep
from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
+1 -2
View File
@@ -22,8 +22,7 @@ from pathlib import Path
import numpy as np
import tensorflow_datasets as tfds
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
from lerobot.utils.utils import get_elapsed_time_in_days_hours_minutes_seconds
DROID_SHARDS = 2048
@@ -0,0 +1,480 @@
#!/usr/bin/env python
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Mirror a bimanual robot dataset using SLURM for distributed video processing.
This script creates a mirrored version of a dataset where:
1. Left and right arm observations/actions are swapped
2. Joint values are inverted according to a mirroring mask
3. Video frames are horizontally flipped (parallelized via SLURM)
Example usage:
```shell
# SLURM execution
python examples/port_datasets/slurm_mirror_dataset.py \
--repo-id pepijn/openarm_bimanual \
--output-repo-id pepijn/openarm_bimanual_mirrored \
--logs-dir /fsx/user/logs \
--partition hopper-cpu
# Local execution (for debugging)
python examples/port_datasets/slurm_mirror_dataset.py \
--repo-id pepijn/openarm_bimanual \
--output-repo-id pepijn/openarm_bimanual_mirrored \
--slurm 0 \
--push-to-hub
```
"""
import argparse
import logging
from pathlib import Path
from datatrove.executor import LocalPipelineExecutor
from datatrove.executor.slurm import SlurmPipelineExecutor
from datatrove.pipeline.base import PipelineStep
logger = logging.getLogger(__name__)
OPENARM_MIRRORING_MASK = {
"joint_1": -1,
"joint_2": -1,
"joint_3": -1,
"joint_4": 1,
"joint_5": -1,
"joint_6": -1,
"joint_7": -1,
"gripper": 1,
}
class MirrorVideos(PipelineStep):
"""Pipeline step that mirrors video files for assigned episodes."""
def __init__(
self,
repo_id: str,
output_repo_id: str,
root: str | None = None,
output_root: str | None = None,
vcodec: str = "libsvtav1",
):
super().__init__()
self.repo_id = repo_id
self.output_repo_id = output_repo_id
self.root = root
self.output_root = output_root
self.vcodec = vcodec
def run(self, data=None, rank: int = 0, world_size: int = 1):
import logging
import subprocess
from pathlib import Path
from datasets.utils.tqdm import disable_progress_bars
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.utils.constants import HF_LEROBOT_HOME
from lerobot.utils.utils import init_logging
init_logging()
disable_progress_bars()
logger = logging.getLogger(__name__)
def swap_left_right_name(name: str) -> str:
result = name.replace("left_", "LEFT_PLACEHOLDER_")
result = result.replace("right_", "left_")
result = result.replace("LEFT_PLACEHOLDER_", "right_")
return result
def flip_video_frames(input_path: Path, output_path: Path, fps: float, vcodec: str):
output_path.parent.mkdir(parents=True, exist_ok=True)
cmd = [
"ffmpeg", "-y", "-i", str(input_path),
"-vf", "hflip",
"-c:v", vcodec,
"-g", "2",
"-crf", "30",
"-r", str(int(fps)),
"-pix_fmt", "yuv420p",
"-loglevel", "error",
]
if vcodec == "libsvtav1":
cmd.extend(["-preset", "12"])
cmd.append(str(output_path))
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg failed: {result.stderr}")
def video_is_valid(path: Path) -> bool:
if not path.exists():
return False
try:
result = subprocess.run(
["ffprobe", "-v", "error", "-select_streams", "v:0",
"-show_entries", "stream=nb_frames", "-of", "csv=p=0", str(path)],
capture_output=True, text=True, timeout=30
)
return result.returncode == 0 and result.stdout.strip().isdigit()
except Exception:
return False
root = Path(self.root) if self.root else None
output_root = Path(self.output_root) if self.output_root else None
dataset = LeRobotDataset(self.repo_id, root=root)
output_root = output_root or (HF_LEROBOT_HOME / self.output_repo_id)
if not dataset.meta.video_keys:
logger.info(f"Rank {rank}: No videos to process")
return
video_tasks = []
for old_video_key in dataset.meta.video_keys:
new_video_key = swap_left_right_name(old_video_key)
for ep_idx in range(dataset.meta.total_episodes):
try:
src_path = dataset.root / dataset.meta.get_video_file_path(ep_idx, old_video_key)
dst_relative = dataset.meta.get_video_file_path(ep_idx, old_video_key)
dst_relative_str = str(dst_relative).replace(old_video_key, new_video_key)
dst_path = output_root / dst_relative_str
if src_path.exists():
video_tasks.append((src_path, dst_path, ep_idx, old_video_key))
except KeyError:
continue
my_tasks = [t for i, t in enumerate(video_tasks) if i % world_size == rank]
logger.info(f"Rank {rank}/{world_size}: Processing {len(my_tasks)}/{len(video_tasks)} videos")
for src_path, dst_path, ep_idx, video_key in my_tasks:
if video_is_valid(dst_path):
logger.info(f"Rank {rank}: Skipping {dst_path.name} (already done)")
continue
logger.info(f"Rank {rank}: Processing {src_path.name} -> {dst_path.name}")
flip_video_frames(src_path, dst_path, dataset.meta.fps, self.vcodec)
class MirrorDataAndMetadata(PipelineStep):
"""Pipeline step that mirrors parquet data and metadata (runs once on rank 0)."""
def __init__(
self,
repo_id: str,
output_repo_id: str,
root: str | None = None,
output_root: str | None = None,
):
super().__init__()
self.repo_id = repo_id
self.output_repo_id = output_repo_id
self.root = root
self.output_root = output_root
def run(self, data=None, rank: int = 0, world_size: int = 1):
if rank != 0:
return
import logging
from pathlib import Path
import numpy as np
import pandas as pd
from datasets.utils.tqdm import disable_progress_bars
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
from lerobot.datasets.utils import DATA_DIR, DEFAULT_DATA_PATH, write_info, write_stats, write_tasks
from lerobot.utils.constants import HF_LEROBOT_HOME
from lerobot.utils.utils import init_logging
init_logging()
disable_progress_bars()
logger = logging.getLogger(__name__)
MIRRORING_MASK = {
"joint_1": -1, "joint_2": -1, "joint_3": -1, "joint_4": 1,
"joint_5": -1, "joint_6": -1, "joint_7": -1, "gripper": 1,
}
def get_mirroring_mask(robot_type: str) -> dict[str, int]:
if robot_type in ["bi_openarm_follower", "openarm_follower", "bi_openarms_follower", "openarms_follower"]:
return MIRRORING_MASK
raise ValueError(f"Unknown robot type: {robot_type}. Add a mirroring mask for this robot.")
def swap_left_right_name(name: str) -> str:
result = name.replace("left_", "LEFT_PLACEHOLDER_")
result = result.replace("right_", "left_")
result = result.replace("LEFT_PLACEHOLDER_", "right_")
return result
def mirror_feature_names(names: list[str]) -> tuple[list[str], dict[int, int]]:
mirrored_names = [swap_left_right_name(n) for n in names]
old_to_new_idx = {}
for old_idx, old_name in enumerate(names):
new_name = swap_left_right_name(old_name)
new_idx = mirrored_names.index(new_name)
old_to_new_idx[old_idx] = new_idx
return mirrored_names, old_to_new_idx
def apply_mirroring_mask(value: float, feature_name: str, mirroring_mask: dict[str, int]) -> float:
name_without_prefix = feature_name.split("_", 1)[1] if "_" in feature_name else feature_name
joint_name = name_without_prefix.split(".")[0]
if joint_name in mirroring_mask:
return value * mirroring_mask[joint_name]
return value
def mirror_array(array: np.ndarray, names: list[str], mirroring_mask: dict[str, int]) -> np.ndarray:
mirrored_names, idx_mapping = mirror_feature_names(names)
result = np.zeros_like(array)
for old_idx, new_idx in idx_mapping.items():
new_name = mirrored_names[new_idx]
value = array[old_idx]
mirrored_value = apply_mirroring_mask(value, new_name, mirroring_mask)
result[new_idx] = mirrored_value
return result
def mirror_stats(stats: dict) -> dict:
mirrored = {}
for key, value in stats.items():
new_key = swap_left_right_name(key)
if isinstance(value, dict):
mirrored[new_key] = mirror_stats(value)
else:
mirrored[new_key] = value
return mirrored
import shutil
root = Path(self.root) if self.root else None
output_root = Path(self.output_root) if self.output_root else None
dataset = LeRobotDataset(self.repo_id, root=root)
output_root = output_root or (HF_LEROBOT_HOME / self.output_repo_id)
done_marker = output_root / ".data_mirrored"
if done_marker.exists():
logger.info("Data and metadata already mirrored, skipping")
return
# Clean up partial output from previous failed runs
if output_root.exists():
logger.info(f"Removing existing partial output: {output_root}")
shutil.rmtree(output_root)
robot_type = dataset.meta.robot_type or "bi_openarms_follower"
mirroring_mask = get_mirroring_mask(robot_type)
mirrored_features = {}
for key, feat in dataset.meta.features.items():
new_key = swap_left_right_name(key)
new_feat = feat.copy()
if "names" in new_feat and new_feat["names"]:
new_feat["names"] = [swap_left_right_name(n) for n in new_feat["names"]]
mirrored_features[new_key] = new_feat
new_meta = LeRobotDatasetMetadata.create(
repo_id=self.output_repo_id,
fps=dataset.meta.fps,
features=mirrored_features,
robot_type=dataset.meta.robot_type,
root=output_root,
use_videos=len(dataset.meta.video_keys) > 0,
)
if dataset.meta.tasks is not None:
write_tasks(dataset.meta.tasks, new_meta.root)
data_dir = dataset.root / DATA_DIR
parquet_files = sorted(data_dir.glob("*/*.parquet"))
action_names = dataset.meta.features.get("action", {}).get("names", [])
state_names = dataset.meta.features.get("observation.state", {}).get("names", [])
for src_path in parquet_files:
df = pd.read_parquet(src_path).reset_index(drop=True)
relative_path = src_path.relative_to(dataset.root)
chunk_dir = relative_path.parts[1]
file_name = relative_path.parts[2]
chunk_idx = int(chunk_dir.split("-")[1])
file_idx = int(file_name.split("-")[1].split(".")[0])
if "action" in df.columns and action_names:
actions = np.stack(df["action"].values)
mirrored_actions = np.array([mirror_array(row, action_names, mirroring_mask) for row in actions])
df["action"] = list(mirrored_actions)
if "observation.state" in df.columns and state_names:
states = np.stack(df["observation.state"].values)
mirrored_states = np.array([mirror_array(row, state_names, mirroring_mask) for row in states])
df["observation.state"] = list(mirrored_states)
dst_path = new_meta.root / DEFAULT_DATA_PATH.format(chunk_index=chunk_idx, file_index=file_idx)
dst_path.parent.mkdir(parents=True, exist_ok=True)
df.to_parquet(dst_path, index=False)
episodes_dir = dataset.root / "meta/episodes"
dst_episodes_dir = new_meta.root / "meta/episodes"
if episodes_dir.exists():
dst_episodes_dir.mkdir(parents=True, exist_ok=True)
for src_parquet in episodes_dir.glob("*/*.parquet"):
df = pd.read_parquet(src_parquet)
columns_to_rename = {}
for col in df.columns:
if col.startswith("videos/"):
parts = col.split("/")
if len(parts) >= 2:
video_key = parts[1]
new_video_key = swap_left_right_name(video_key)
new_col = col.replace(f"videos/{video_key}/", f"videos/{new_video_key}/")
columns_to_rename[col] = new_col
if columns_to_rename:
df = df.rename(columns=columns_to_rename)
dst_parquet = dst_episodes_dir / src_parquet.relative_to(episodes_dir)
dst_parquet.parent.mkdir(parents=True, exist_ok=True)
df.to_parquet(dst_parquet, index=False)
new_meta.info.update({
"total_episodes": dataset.meta.info["total_episodes"],
"total_frames": dataset.meta.info["total_frames"],
"total_tasks": dataset.meta.info["total_tasks"],
"splits": dataset.meta.info.get("splits", {}),
})
write_info(new_meta.info, new_meta.root)
if dataset.meta.stats is not None:
mirrored_stats = mirror_stats(dataset.meta.stats)
write_stats(mirrored_stats, new_meta.root)
done_marker.touch()
logger.info(f"Data and metadata mirrored to {output_root}")
def swap_left_right_name(name: str) -> str:
result = name.replace("left_", "LEFT_PLACEHOLDER_")
result = result.replace("right_", "left_")
result = result.replace("LEFT_PLACEHOLDER_", "right_")
return result
def get_num_video_tasks(repo_id: str, root: str | None = None) -> int:
from lerobot.datasets.lerobot_dataset import LeRobotDataset
root_path = Path(root) if root else None
dataset = LeRobotDataset(repo_id, root=root_path)
count = 0
for video_key in dataset.meta.video_keys:
for ep_idx in range(dataset.meta.total_episodes):
try:
src_path = dataset.root / dataset.meta.get_video_file_path(ep_idx, video_key)
if src_path.exists():
count += 1
except KeyError:
continue
return count
def make_mirror_executor(
repo_id: str,
output_repo_id: str,
root: str | None,
output_root: str | None,
vcodec: str,
job_name: str,
logs_dir: Path,
workers: int,
partition: str,
cpus_per_task: int,
mem_per_cpu: str,
time_limit: str,
slurm: bool = True,
):
num_tasks = get_num_video_tasks(repo_id, root) if slurm else 1
num_tasks = max(1, num_tasks)
kwargs = {
"pipeline": [
MirrorDataAndMetadata(repo_id, output_repo_id, root, output_root),
MirrorVideos(repo_id, output_repo_id, root, output_root, vcodec),
],
"logging_dir": str(logs_dir / job_name),
}
if slurm:
kwargs.update({
"job_name": job_name,
"tasks": num_tasks,
"workers": min(workers, num_tasks),
"time": time_limit,
"partition": partition,
"cpus_per_task": cpus_per_task,
"sbatch_args": {
"mem-per-cpu": mem_per_cpu,
"requeue": True,
"signal": "USR1@30",
},
})
return SlurmPipelineExecutor(**kwargs)
else:
kwargs.update({"tasks": 1, "workers": 1})
return LocalPipelineExecutor(**kwargs)
def main():
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
parser = argparse.ArgumentParser(description="Mirror a bimanual robot dataset using SLURM")
parser.add_argument("--repo-id", type=str, required=True, help="Source dataset repo_id")
parser.add_argument("--output-repo-id", type=str, required=True, help="Output dataset repo_id")
parser.add_argument("--root", type=str, default=None, help="Source dataset root directory")
parser.add_argument("--output-root", type=str, default=None, help="Output dataset root directory")
parser.add_argument("--vcodec", type=str, default="libsvtav1", help="Video codec")
parser.add_argument("--logs-dir", type=Path, default=Path("logs"), help="Directory for datatrove logs")
parser.add_argument("--job-name", type=str, default="mirror_dataset", help="SLURM job name")
parser.add_argument("--slurm", type=int, default=1, help="Use SLURM (1) or local (0)")
parser.add_argument("--workers", type=int, default=64, help="Number of SLURM workers")
parser.add_argument("--partition", type=str, default="hopper-cpu", help="SLURM partition")
parser.add_argument("--cpus-per-task", type=int, default=4, help="CPUs per task")
parser.add_argument("--mem-per-cpu", type=str, default="2G", help="Memory per CPU")
parser.add_argument("--time-limit", type=str, default="04:00:00", help="SLURM time limit")
parser.add_argument("--push-to-hub", action="store_true", help="Push mirrored dataset to HuggingFace Hub")
args = parser.parse_args()
executor = make_mirror_executor(
repo_id=args.repo_id,
output_repo_id=args.output_repo_id,
root=args.root,
output_root=args.output_root,
vcodec=args.vcodec,
job_name=args.job_name,
logs_dir=args.logs_dir,
workers=args.workers,
partition=args.partition,
cpus_per_task=args.cpus_per_task,
mem_per_cpu=args.mem_per_cpu,
time_limit=args.time_limit,
slurm=args.slurm == 1,
)
executor.run()
if args.push_to_hub:
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.utils.constants import HF_LEROBOT_HOME
output_root = Path(args.output_root) if args.output_root else HF_LEROBOT_HOME / args.output_repo_id
logger.info(f"Pushing dataset to HuggingFace Hub: {args.output_repo_id}")
dataset = LeRobotDataset(args.output_repo_id, root=output_root)
dataset.push_to_hub()
if __name__ == "__main__":
main()
+2 -2
View File
@@ -26,7 +26,7 @@ from huggingface_hub import HfApi
from huggingface_hub.constants import REPOCARD_NAME
from port_droid import DROID_SHARDS
from lerobot.datasets.dataset_metadata import CODEBASE_VERSION, LeRobotDatasetMetadata
from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDatasetMetadata
from lerobot.datasets.utils import create_lerobot_dataset_card
from lerobot.utils.utils import init_logging
@@ -155,7 +155,7 @@ class UploadDataset(PipelineStep):
from datasets.utils.tqdm import disable_progress_bars
from huggingface_hub import CommitOperationAdd, preupload_lfs_files
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
from lerobot.utils.utils import init_logging
init_logging()
+11 -12
View File
@@ -27,8 +27,8 @@ measuring consistency and ground truth alignment.
Usage:
# Basic usage with smolvla policy
uv run python examples/rtc/eval_dataset.py \
--policy.path=<USER>/smolvla_check_rtc_last3 \
--dataset.repo_id=<USER>/check_rtc \
--policy.path=helper2424/smolvla_check_rtc_last3 \
--dataset.repo_id=helper2424/check_rtc \
--rtc.execution_horizon=8 \
--device=mps \
--rtc.max_guidance_weight=10.0 \
@@ -58,16 +58,16 @@ Usage:
--device=cuda
uv run python examples/rtc/eval_dataset.py \
--policy.path=<USER>/reuben_pi0 \
--dataset.repo_id=<USER>/so101_cube_in_cup \
--policy.path=lipsop/reuben_pi0 \
--dataset.repo_id=ReubenLim/so101_cube_in_cup \
--rtc.execution_horizon=8 \
--device=cuda
# With torch.compile for faster inference (PyTorch 2.0+)
# Note: CUDA graphs disabled by default due to in-place ops in denoising loop
uv run python examples/rtc/eval_dataset.py \
--policy.path=<USER>/smolvla_check_rtc_last3 \
--dataset.repo_id=<USER>/check_rtc \
--policy.path=helper2424/smolvla_check_rtc_last3 \
--dataset.repo_id=helper2424/check_rtc \
--rtc.execution_horizon=8 \
--device=mps \
--use_torch_compile=true \
@@ -75,8 +75,8 @@ Usage:
# With torch.compile on CUDA (CUDA graphs disabled by default)
uv run python examples/rtc/eval_dataset.py \
--policy.path=<USER>/smolvla_check_rtc_last3 \
--dataset.repo_id=<USER>/check_rtc \
--policy.path=helper2424/smolvla_check_rtc_last3 \
--dataset.repo_id=helper2424/check_rtc \
--rtc.execution_horizon=8 \
--device=cuda \
--use_torch_compile=true \
@@ -84,8 +84,8 @@ Usage:
# Enable CUDA graphs (advanced - may cause tensor aliasing errors)
uv run python examples/rtc/eval_dataset.py \
--policy.path=<USER>/smolvla_check_rtc_last3 \
--dataset.repo_id=<USER>/check_rtc \
--policy.path=helper2424/smolvla_check_rtc_last3 \
--dataset.repo_id=helper2424/check_rtc \
--use_torch_compile=true \
--torch_compile_backend=inductor \
--torch_compile_mode=max-autotune \
@@ -113,9 +113,8 @@ from lerobot.configs import parser
from lerobot.configs.default import DatasetConfig
from lerobot.configs.policies import PreTrainedConfig
from lerobot.configs.types import RTCAttentionSchedule
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
from lerobot.datasets.factory import resolve_delta_timestamps
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
from lerobot.policies.factory import get_policy_class, make_pre_post_processors
from lerobot.policies.rtc.configuration_rtc import RTCConfig
from lerobot.policies.rtc.debug_visualizer import RTCDebugVisualizer
+7 -117
View File
@@ -28,7 +28,7 @@ For simulation environments, see eval_with_simulation.py
Usage:
# Run RTC with Real robot with RTC
uv run examples/rtc/eval_with_real_robot.py \
--policy.path=<USER>/smolvla_check_rtc_last3 \
--policy.path=helper2424/smolvla_check_rtc_last3 \
--policy.device=mps \
--rtc.enabled=true \
--rtc.execution_horizon=20 \
@@ -41,7 +41,7 @@ Usage:
# Run RTC with Real robot without RTC
uv run examples/rtc/eval_with_real_robot.py \
--policy.path=<USER>/smolvla_check_rtc_last3 \
--policy.path=helper2424/smolvla_check_rtc_last3 \
--policy.device=mps \
--rtc.enabled=false \
--robot.type=so100_follower \
@@ -53,7 +53,7 @@ Usage:
# Run RTC with Real robot with pi0.5 policy
uv run examples/rtc/eval_with_real_robot.py \
--policy.path=<USER>/pi05_check_rtc \
--policy.path=helper2424/pi05_check_rtc \
--policy.device=mps \
--rtc.enabled=true \
--rtc.execution_horizon=20 \
@@ -63,26 +63,6 @@ Usage:
--robot.cameras="{ gripper: {type: opencv, index_or_path: 0, width: 640, height: 480, fps: 30}, front: {type: opencv, index_or_path: 1, width: 640, height: 480, fps: 30}}" \
--task="Move green small object into the purple platform" \
--duration=120
# Run RTC with bi_openarm_follower (dual-arm OpenArms) and pi0.5 policy
python examples/rtc/eval_with_real_robot.py \
--policy.path=lerobot-data-collection/folding_final \
--robot.type=bi_openarm_follower \
--robot.cameras='{left_wrist: {type: opencv, index_or_path: "/dev/video4", width: 1280, height: 720, fps: 30}, base: {type: opencv, index_or_path: "/dev/video2", width: 640, height: 480, fps: 30}, right_wrist: {type: opencv, index_or_path: "/dev/video0", width: 1280, height: 720, fps: 30}}' \
--robot.left_arm_config.port=can1 \
--robot.left_arm_config.side=left \
--robot.left_arm_config.can_interface=socketcan \
--robot.right_arm_config.port=can0 \
--robot.right_arm_config.side=right \
--robot.right_arm_config.can_interface=socketcan \
--task="Fold the T-shirt properly" \
--fps=30 \
--duration=2000 \
--rtc.enabled=true \
--rtc.execution_horizon=20 \
--rtc.max_guidance_weight=5.0 \
--rtc.prefix_attention_schedule=LINEAR \
--device=cuda
"""
import logging
@@ -98,38 +78,28 @@ from torch import Tensor
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig # noqa: F401
from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig # noqa: F401
from lerobot.cameras.zmq.configuration_zmq import ZMQCameraConfig # noqa: F401
from lerobot.configs import parser
from lerobot.configs.policies import PreTrainedConfig
from lerobot.configs.types import RTCAttentionSchedule
from lerobot.datasets.feature_utils import build_dataset_frame, hw_to_dataset_features
from lerobot.datasets.utils import build_dataset_frame, hw_to_dataset_features
from lerobot.policies.factory import get_policy_class, make_pre_post_processors
from lerobot.policies.rtc.action_queue import ActionQueue
from lerobot.policies.rtc.configuration_rtc import RTCConfig
from lerobot.policies.rtc.latency_tracker import LatencyTracker
from lerobot.processor import (
NormalizerProcessorStep,
RelativeActionsProcessorStep,
TransitionKey,
create_transition,
)
from lerobot.processor.factory import (
make_default_robot_action_processor,
make_default_robot_observation_processor,
)
from lerobot.processor.relative_action_processor import to_relative_actions
from lerobot.rl.process import ProcessSignalHandler
from lerobot.robots import ( # noqa: F401
Robot,
RobotConfig,
bi_openarm_follower,
bi_so_follower,
koch_follower,
so_follower,
unitree_g1,
)
from lerobot.robots.utils import make_robot_from_config
from lerobot.utils.constants import OBS_IMAGES, OBS_STATE
from lerobot.utils.constants import OBS_IMAGES
from lerobot.utils.hub import HubMixin
from lerobot.utils.utils import init_logging
@@ -240,35 +210,6 @@ def is_image_key(k: str) -> bool:
return k.startswith(OBS_IMAGES)
def _reanchor_relative_rtc_prefix(
prev_actions_absolute: Tensor,
current_state: Tensor,
relative_step: RelativeActionsProcessorStep,
normalizer_step: NormalizerProcessorStep | None,
policy_device: torch.device | str,
) -> Tensor:
"""Convert absolute leftovers into model-space for relative-action RTC policies.
When a policy uses relative actions, the RTC prefix (leftover actions from
the previous chunk) is stored in absolute space. Before feeding it back to
the policy we need to re-express it relative to the *current* robot state
and then re-normalize.
"""
state = current_state.detach().cpu()
if state.dim() == 1:
state = state.unsqueeze(0)
action_cpu = prev_actions_absolute.detach().cpu()
mask = relative_step._build_mask(action_cpu.shape[-1])
relative_actions = to_relative_actions(action_cpu, state, mask)
transition = create_transition(action=relative_actions)
if normalizer_step is not None:
transition = normalizer_step(transition)
return transition[TransitionKey.ACTION].to(policy_device)
def get_actions(
policy,
robot: RobotWrapper,
@@ -294,15 +235,7 @@ def get_actions(
fps = cfg.fps
time_per_chunk = 1.0 / fps
# Only keep .pos joints + camera streams if the policy was trained on positions,
# not the full pos/vel/torque state the robot exposes.
observation_features_hw = {
key: value
for key, value in robot.observation_features().items()
if key.endswith(".pos") or isinstance(value, tuple)
}
dataset_features = hw_to_dataset_features(observation_features_hw, "observation")
dataset_features = hw_to_dataset_features(robot.observation_features(), "observation")
policy_device = policy.config.device
# Load preprocessor and postprocessor from pretrained files
@@ -320,25 +253,6 @@ def get_actions(
logger.info("[GET_ACTIONS] Preprocessor/postprocessor loaded successfully with embedded stats")
relative_step = next(
(s for s in preprocessor.steps if isinstance(s, RelativeActionsProcessorStep) and s.enabled),
None,
)
normalizer_step = next(
(s for s in preprocessor.steps if isinstance(s, NormalizerProcessorStep)),
None,
)
if relative_step is not None:
if relative_step.action_names is None:
cfg_names = getattr(cfg.policy, "action_feature_names", None)
if cfg_names:
relative_step.action_names = list(cfg_names)
else:
relative_step.action_names = [
k for k in robot.robot.action_features if k.endswith(".pos")
]
logger.info("[GET_ACTIONS] Relative actions enabled: will re-anchor RTC prefix")
get_actions_threshold = cfg.action_queue_size_to_get_new_actions
if not cfg.rtc.enabled:
@@ -381,28 +295,6 @@ def get_actions(
preproceseded_obs = preprocessor(obs_with_policy_features)
# Re-anchor leftover actions for relative-action policies.
# We need the *postprocessed* (absolute) leftover, not the original
# (normalized/relative) one that get_left_over() returns.
if (
prev_actions is not None
and relative_step is not None
and OBS_STATE in obs_with_policy_features
):
with action_queue.lock:
if action_queue.queue is not None:
prev_actions_abs = action_queue.queue[action_queue.last_index :].clone()
else:
prev_actions_abs = None
if prev_actions_abs is not None and prev_actions_abs.numel() > 0:
prev_actions = _reanchor_relative_rtc_prefix(
prev_actions_absolute=prev_actions_abs,
current_state=obs_with_policy_features[OBS_STATE],
relative_step=relative_step,
normalizer_step=normalizer_step,
policy_device=policy_device,
)
# Generate actions WITH RTC
actions = policy.predict_action_chunk(
preproceseded_obs,
@@ -458,8 +350,6 @@ def actor_control(
try:
logger.info("[ACTOR] Starting actor thread")
action_keys = [k for k in robot.action_features() if k.endswith(".pos")]
action_count = 0
action_interval = 1.0 / cfg.fps
@@ -471,7 +361,7 @@ def actor_control(
if action is not None:
action = action.cpu()
action_dict = {key: action[i].item() for i, key in enumerate(action_keys)}
action_dict = {key: action[i].item() for i, key in enumerate(robot.action_features())}
action_processed = robot_action_processor((action_dict, None))
robot.send_action(action_processed)
+3 -2
View File
@@ -16,13 +16,15 @@
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.configs.types import FeatureType, PolicyFeature
from lerobot.datasets.feature_utils import combine_feature_dicts
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
from lerobot.datasets.utils import combine_feature_dicts
from lerobot.model.kinematics import RobotKinematics
from lerobot.policies.act.modeling_act import ACTPolicy
from lerobot.policies.factory import make_pre_post_processors
from lerobot.processor import (
RobotAction,
RobotObservation,
RobotProcessorPipeline,
make_default_teleop_action_processor,
)
@@ -38,7 +40,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
InverseKinematicsEEToJoints,
)
from lerobot.scripts.lerobot_record import record_loop
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.control_utils import init_keyboard_listener
from lerobot.utils.utils import log_say
from lerobot.utils.visualization_utils import init_rerun
+2 -3
View File
@@ -16,11 +16,11 @@
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.datasets.feature_utils import combine_feature_dicts
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
from lerobot.datasets.utils import combine_feature_dicts
from lerobot.model.kinematics import RobotKinematics
from lerobot.processor import RobotProcessorPipeline
from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
from lerobot.processor.converters import (
observation_to_transition,
robot_action_observation_to_transition,
@@ -35,7 +35,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
)
from lerobot.scripts.lerobot_record import record_loop
from lerobot.teleoperators.so_leader import SO100Leader, SO100LeaderConfig
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.control_utils import init_keyboard_listener
from lerobot.utils.utils import log_say
from lerobot.utils.visualization_utils import init_rerun
+5 -4
View File
@@ -19,7 +19,7 @@ import time
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.model.kinematics import RobotKinematics
from lerobot.processor import RobotProcessorPipeline
from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
from lerobot.processor.converters import (
robot_action_observation_to_transition,
transition_to_robot_action,
@@ -28,7 +28,6 @@ from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
from lerobot.robots.so_follower.robot_kinematic_processor import (
InverseKinematicsEEToJoints,
)
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.constants import ACTION
from lerobot.utils.robot_utils import precise_sleep
from lerobot.utils.utils import log_say
@@ -68,7 +67,9 @@ def main():
# Fetch the dataset to replay
dataset = LeRobotDataset(HF_REPO_ID, episodes=[EPISODE_IDX])
actions = dataset.select_columns(ACTION)
# Filter dataset to only include frames from the specified episode since episodes are chunked in dataset V3.0
episode_frames = dataset.hf_dataset.filter(lambda x: x["episode_index"] == EPISODE_IDX)
actions = episode_frames.select_columns(ACTION)
# Connect to the robot
robot.connect()
@@ -79,7 +80,7 @@ def main():
print("Starting replay loop...")
log_say(f"Replaying episode {EPISODE_IDX}")
for idx in range(dataset.num_frames):
for idx in range(len(episode_frames)):
t0 = time.perf_counter()
# Get recorded action from dataset
+1 -2
View File
@@ -17,7 +17,7 @@
import time
from lerobot.model.kinematics import RobotKinematics
from lerobot.processor import RobotProcessorPipeline
from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
from lerobot.processor.converters import (
robot_action_observation_to_transition,
robot_action_to_transition,
@@ -30,7 +30,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
InverseKinematicsEEToJoints,
)
from lerobot.teleoperators.so_leader import SO100Leader, SO100LeaderConfig
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.robot_utils import precise_sleep
from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
+2 -3
View File
@@ -19,9 +19,8 @@ from pathlib import Path
import torch
from lerobot.configs.types import FeatureType
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
from lerobot.datasets.feature_utils import dataset_to_policy_features
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
from lerobot.datasets.utils import dataset_to_policy_features
from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
from lerobot.policies.factory import make_pre_post_processors
+2 -2
View File
@@ -20,9 +20,9 @@ from pathlib import Path
import torch
from lerobot.configs.types import FeatureType
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
from lerobot.datasets.feature_utils import dataset_to_policy_features
from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
from lerobot.datasets.streaming_dataset import StreamingLeRobotDataset
from lerobot.datasets.utils import dataset_to_policy_features
from lerobot.policies.act.configuration_act import ACTConfig
from lerobot.policies.act.modeling_act import ACTPolicy
from lerobot.policies.factory import make_pre_post_processors
@@ -5,9 +5,8 @@ from pathlib import Path
import torch
from lerobot.configs.types import FeatureType
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
from lerobot.datasets.feature_utils import dataset_to_policy_features
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
from lerobot.datasets.utils import dataset_to_policy_features
from lerobot.policies.act.configuration_act import ACTConfig
from lerobot.policies.act.modeling_act import ACTPolicy
from lerobot.policies.factory import make_pre_post_processors
+1 -1
View File
@@ -1,7 +1,7 @@
import torch
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
from lerobot.policies.act.modeling_act import ACTPolicy
from lerobot.policies.factory import make_pre_post_processors
from lerobot.policies.utils import build_inference_frame, make_robot_action
@@ -5,9 +5,8 @@ from pathlib import Path
import torch
from lerobot.configs.types import FeatureType
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
from lerobot.datasets.feature_utils import dataset_to_policy_features
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
from lerobot.datasets.utils import dataset_to_policy_features
from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
from lerobot.policies.factory import make_pre_post_processors
@@ -1,7 +1,7 @@
import torch
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
from lerobot.policies.factory import make_pre_post_processors
from lerobot.policies.utils import build_inference_frame, make_robot_action
+1 -1
View File
@@ -1,7 +1,7 @@
import torch
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.datasets.feature_utils import hw_to_dataset_features
from lerobot.datasets.utils import hw_to_dataset_features
from lerobot.policies.factory import make_pre_post_processors
from lerobot.policies.pi0.modeling_pi0 import PI0Policy
from lerobot.policies.utils import build_inference_frame, make_robot_action
+1 -1
View File
@@ -6,8 +6,8 @@ from queue import Empty, Full
import torch
import torch.optim as optim
from lerobot.datasets.feature_utils import hw_to_dataset_features
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.utils import hw_to_dataset_features
from lerobot.envs.configs import HILSerlProcessorConfig, HILSerlRobotEnvConfig
from lerobot.policies.sac.configuration_sac import SACConfig
from lerobot.policies.sac.modeling_sac import SACPolicy
@@ -1,7 +1,7 @@
import torch
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.datasets.feature_utils import hw_to_dataset_features
from lerobot.datasets.utils import hw_to_dataset_features
from lerobot.policies.factory import make_pre_post_processors
from lerobot.policies.smolvla.modeling_smolvla import SmolVLAPolicy
from lerobot.policies.utils import build_inference_frame, make_robot_action
-297
View File
@@ -1,297 +0,0 @@
#!/usr/bin/env python
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Inference script for a pi0 model trained with UMI-style relative EE actions
on an OpenArm robot (single right arm, one wrist camera).
Training dataset layout:
observation.images.cam0 [3, 720, 960]
action [x, y, z, ax, ay, az, proximal, distal] (shape 8)
The model uses ``derive_state_from_action=true``, so observation.state is
derived from the action column during training. At inference the state must
be provided by the robot this script uses FK to compute the current EE
pose and gripper position, which it exposes as ``observation.state``.
Pipeline:
1. Read arm joints from robot FK observation.state [x,y,z,ax,ay,az,prox,dist]
2. Read camera image observation.images.cam0
3. pi0 preprocessor (loaded from checkpoint):
- DeriveStateFromActionStep: no-op at inference (state from robot)
- RelativeActionsProcessorStep: caches current state
- RelativeStateProcessorStep: buffers prev state, stacks [prev,cur],
subtracts current velocity info, flattens
- NormalizerProcessorStep: normalizes
4. pi0 predicts relative action chunk (30 steps)
5. pi0 postprocessor: unnormalize, add cached state absolute EE
6. IK: absolute EE [x,y,z,ax,ay,az] arm joint targets
7. Gripper [proximal, distal] gripper motor targets
8. Send to robot
Usage:
python evaluate.py
"""
from __future__ import annotations
import numpy as np
from scipy.spatial.transform import Rotation
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.model.kinematics import RobotKinematics
from lerobot.policies.factory import make_pre_post_processors
from lerobot.policies.pi0.modeling_pi0 import PI0Policy
from lerobot.processor import RelativeStateProcessorStep
from lerobot.robots.openarm_follower import OpenArmFollower, OpenArmFollowerConfig
from lerobot.scripts.lerobot_record import record_loop
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.control_utils import init_keyboard_listener
from lerobot.utils.utils import log_say
from lerobot.utils.visualization_utils import init_rerun
# ---------------------------------------------------------------------------
# Configuration — adapt these to your setup
# ---------------------------------------------------------------------------
FPS = 46
EPISODE_TIME_SEC = 60
TASK_DESCRIPTION = "red cube"
HF_MODEL_ID = "pepijn223/grabette-umi-pi0"
# Latency compensation: skip this many predicted action steps to account for
# camera + inference + execution latency. Formula: ceil(total_ms / (1000/FPS)).
# At 46 FPS (~22ms/step) with ~150ms total latency: ceil(150/22) ≈ 7.
# Start with 0 for a safe first test, then increase to match measured latency.
LATENCY_SKIP_STEPS = 0
URDF_PATH = "src/lerobot/robots/openarm_follower/urdf/openarm_bimanual_pybullet.urdf"
URDF_EE_FRAME = "openarm_right_ee_target"
IK_POSITION_WEIGHT = 1.0
IK_ORIENTATION_WEIGHT = 1.0
# ---------------------------------------------------------------------------
# Dataset features for inference
#
# The training dataset has only observation.images.cam0 and action.
# observation.state is derived from action during training
# (derive_state_from_action=true) but must be supplied by the robot at
# inference. We define it here so build_dataset_frame can map FK output
# to the right feature.
# ---------------------------------------------------------------------------
DATASET_FEATURES: dict = {
"observation.state": {
"dtype": "float32",
"shape": [8],
"names": ["x", "y", "z", "ax", "ay", "az", "proximal", "distal"],
},
"observation.images.cam0": {
"dtype": "video",
"shape": [3, 720, 960],
"names": ["channels", "height", "width"],
"info": {
"video.height": 720,
"video.width": 960,
"video.codec": "h264",
"video.pix_fmt": "yuv420p",
"video.is_depth_map": False,
"video.fps": FPS,
"video.channels": 3,
"has_audio": False,
},
},
"action": {
"dtype": "float32",
"shape": [8],
"names": ["x", "y", "z", "ax", "ay", "az", "proximal", "distal"],
},
"timestamp": {"dtype": "float32", "shape": [1], "names": None},
"frame_index": {"dtype": "int64", "shape": [1], "names": None},
"episode_index": {"dtype": "int64", "shape": [1], "names": None},
"index": {"dtype": "int64", "shape": [1], "names": None},
"task_index": {"dtype": "int64", "shape": [1], "names": None},
}
# ---------------------------------------------------------------------------
# FK / IK callables
# ---------------------------------------------------------------------------
class JointsToEE:
"""FK: raw robot observation → flat dict matching observation.state names.
Arm joint positions EE pose [x,y,z,ax,ay,az] via forward kinematics.
Gripper motor positions [proximal, distal].
Camera images pass through unchanged.
"""
def __init__(self, kinematics: RobotKinematics, arm_motor_names: list[str]):
self.kin = kinematics
self.arm = arm_motor_names
def __call__(self, obs: RobotObservation) -> RobotObservation:
q = np.array([float(obs[f"{m}.pos"]) for m in self.arm])
t = self.kin.forward_kinematics(q)
rot = Rotation.from_matrix(t[:3, :3]).as_rotvec()
out: dict = {
"x": float(t[0, 3]),
"y": float(t[1, 3]),
"z": float(t[2, 3]),
"ax": float(rot[0]),
"ay": float(rot[1]),
"az": float(rot[2]),
"proximal": float(obs["proximal.pos"]),
"distal": float(obs["distal.pos"]),
}
for k, v in obs.items():
if not k.endswith((".pos", ".vel", ".torque")):
out[k] = v
return out
class EEToJoints:
"""IK: policy action dict → motor position dict for the robot.
Reads [x,y,z,ax,ay,az] from the action, runs IK for arm joint targets.
Passes [proximal, distal] as direct gripper position commands.
"""
def __init__(
self,
kinematics: RobotKinematics,
arm_motor_names: list[str],
position_weight: float = 1.0,
orientation_weight: float = 1.0,
):
self.kin = kinematics
self.arm = arm_motor_names
self.pw = position_weight
self.ow = orientation_weight
self.q_curr: np.ndarray | None = None
def __call__(self, args: tuple[RobotAction, RobotObservation]) -> RobotAction:
action, obs = args
q_raw = np.array([float(obs[f"{m}.pos"]) for m in self.arm])
if self.q_curr is None:
self.q_curr = q_raw
t_des = np.eye(4)
t_des[:3, :3] = Rotation.from_rotvec([action["ax"], action["ay"], action["az"]]).as_matrix()
t_des[:3, 3] = [action["x"], action["y"], action["z"]]
q_target = self.kin.inverse_kinematics(
self.q_curr, t_des, position_weight=self.pw, orientation_weight=self.ow
)
self.q_curr = q_target
out: dict = {f"{m}.pos": float(q_target[i]) for i, m in enumerate(self.arm)}
out["proximal.pos"] = float(action["proximal"])
out["distal.pos"] = float(action["distal"])
return out
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
camera_config = {
"cam0": OpenCVCameraConfig(index_or_path=0, width=960, height=720, fps=FPS),
}
robot_config = OpenArmFollowerConfig(
port="can0",
id="right_openarm",
side="right",
cameras=camera_config,
max_relative_target=8.0,
gripper_port="/dev/ttyUSB0",
)
robot = OpenArmFollower(robot_config)
policy = PI0Policy.from_pretrained(HF_MODEL_ID)
policy.config.latency_skip_steps = LATENCY_SKIP_STEPS
arm_motor_names = list(robot.bus.motors.keys())
kinematics = RobotKinematics(
urdf_path=URDF_PATH,
target_frame_name=URDF_EE_FRAME,
joint_names=arm_motor_names,
)
fk = JointsToEE(kinematics, arm_motor_names)
ik = EEToJoints(kinematics, arm_motor_names, IK_POSITION_WEIGHT, IK_ORIENTATION_WEIGHT)
dataset = LeRobotDataset.create(
repo_id="tmp/openarm_eval_scratch",
fps=FPS,
features=DATASET_FEATURES,
robot_type=robot.name,
use_videos=True,
image_writer_threads=4,
)
preprocessor, postprocessor = make_pre_post_processors(
policy_cfg=policy,
pretrained_path=HF_MODEL_ID,
dataset_stats=dataset.meta.stats,
preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}},
)
relative_state_steps = [s for s in preprocessor.steps if isinstance(s, RelativeStateProcessorStep)]
robot.connect()
listener, events = init_keyboard_listener()
init_rerun(session_name="openarm_umi_pi0_relative_ee_evaluate")
try:
if not robot.is_connected:
raise ValueError("Robot is not connected!")
log_say("Starting policy execution")
for step in relative_state_steps:
step.reset()
record_loop(
robot=robot,
events=events,
fps=FPS,
policy=policy,
preprocessor=preprocessor,
postprocessor=postprocessor,
dataset=dataset,
control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
robot_action_processor=ik,
robot_observation_processor=fk,
)
finally:
robot.disconnect()
listener.stop()
if __name__ == "__main__":
main()
-113
View File
@@ -1,113 +0,0 @@
#!/usr/bin/env python
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Replay a dataset episode in EE frame using a browser-based URDF viewer.
Extracts ``observation.pose`` from the dataset, saves a trajectory JSON file,
then launches a local HTTP server and opens the replay viewer. The trajectory
is re-centered so frame 0 starts at the OpenArm ``openarm_right_ee_target``
EE tip (zero-joint pose).
Usage:
python replay.py
python replay.py --episode 3 --repo-id myuser/mydata
"""
from __future__ import annotations
import argparse
import http.server
import json
import os
import threading
import webbrowser
from pathlib import Path
VIEWER_DIR = Path(__file__).resolve().parents[2] / "src/lerobot/robots/openarm_follower/urdf"
TRAJECTORY_FILENAME = "trajectory_ep0.json"
def extract_trajectory(repo_id: str, episode: int, output_path: Path) -> dict:
from lerobot.datasets.lerobot_dataset import LeRobotDataset
dataset = LeRobotDataset(repo_id, episodes=[episode])
poses = dataset.select_columns("observation.pose")
actions = dataset.select_columns("action")
frames = []
for i in range(dataset.num_frames):
p = poses[i]["observation.pose"]
a = actions[i]["action"]
frames.append(
{
"x": float(p[0]),
"y": float(p[1]),
"z": float(p[2]),
"ax": float(p[3]),
"ay": float(p[4]),
"az": float(p[5]),
"proximal": float(a[0]),
"distal": float(a[1]),
}
)
payload = {"fps": dataset.fps, "num_frames": dataset.num_frames, "frames": frames}
with open(output_path, "w") as f:
json.dump(payload, f)
print(f"Extracted {dataset.num_frames} frames at {dataset.fps} FPS → {output_path}")
return payload
# ---------------------------------------------------------------------------
# Viewer mode
# ---------------------------------------------------------------------------
def serve_and_open(directory: Path, port: int = 8765):
os.chdir(directory)
handler = http.server.SimpleHTTPRequestHandler
httpd = http.server.HTTPServer(("", port), handler)
url = f"http://localhost:{port}/replay_viewer.html"
print(f"Serving at {url}")
threading.Thread(target=lambda: webbrowser.open(url), daemon=True).start()
try:
httpd.serve_forever()
except KeyboardInterrupt:
print("\nServer stopped.")
httpd.server_close()
def run_viewer(args):
trajectory_path = VIEWER_DIR / TRAJECTORY_FILENAME
if not trajectory_path.exists() or args.force:
extract_trajectory(args.repo_id, args.episode, trajectory_path)
else:
print(f"Using cached trajectory at {trajectory_path} (pass --force to re-extract)")
serve_and_open(VIEWER_DIR, args.port)
def main():
parser = argparse.ArgumentParser(description="Replay a dataset episode in EE frame (URDF viewer)")
parser.add_argument("--repo-id", default="glannuzel/grabette-dataset")
parser.add_argument("--episode", type=int, default=0)
parser.add_argument("--port", type=int, default=8765)
parser.add_argument("--force", action="store_true", help="Re-extract trajectory even if cached")
args = parser.parse_args()
run_viewer(args)
if __name__ == "__main__":
main()
@@ -14,20 +14,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import time
from collections import deque
import numpy as np
import onnxruntime as ort
from huggingface_hub import hf_hub_download
from lerobot.robots.unitree_g1.g1_utils import (
REMOTE_AXES,
REMOTE_BUTTONS,
G1_29_JointIndex,
get_gravity_orientation,
)
from lerobot.robots.unitree_g1.config_unitree_g1 import UnitreeG1Config
from lerobot.robots.unitree_g1.g1_utils import G1_29_JointIndex
from lerobot.robots.unitree_g1.unitree_g1 import UnitreeG1
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@@ -36,13 +36,18 @@ GROOT_DEFAULT_ANGLES[[0, 6]] = -0.1 # Hip pitch
GROOT_DEFAULT_ANGLES[[3, 9]] = 0.3 # Knee
GROOT_DEFAULT_ANGLES[[4, 10]] = -0.2 # Ankle pitch
MISSING_JOINTS = []
G1_MODEL = "g1_23" # Or "g1_29"
if G1_MODEL == "g1_23":
MISSING_JOINTS = [12, 14, 20, 21, 27, 28] # Waist yaw/pitch, wrist pitch/yaw
# Control parameters
ACTION_SCALE = 0.25
CONTROL_DT = 0.02 # 50Hz
ANG_VEL_SCALE: float = 0.25
DOF_POS_SCALE: float = 1.0
DOF_VEL_SCALE: float = 0.05
CMD_SCALE: list[float] = [2.0, 2.0, 0.25]
CMD_SCALE: list = [2.0, 2.0, 0.25]
DEFAULT_GROOT_REPO_ID = "nepyope/GR00T-WholeBodyControl_g1"
@@ -80,11 +85,11 @@ def load_groot_policies(
class GrootLocomotionController:
"""GR00T lower-body locomotion controller for the Unitree G1."""
control_dt = CONTROL_DT # Expose for unitree_g1.py
def __init__(self):
# Load policies
self.policy_balance, self.policy_walk = load_groot_policies()
def __init__(self, policy_balance, policy_walk, robot, config):
self.policy_balance = policy_balance
self.policy_walk = policy_walk
self.robot = robot
self.config = config
self.cmd = np.array([0.0, 0.0, 0.0], dtype=np.float32) # vx, vy, theta_dot
@@ -104,60 +109,45 @@ class GrootLocomotionController:
logger.info("GrootLocomotionController initialized")
def reset(self) -> None:
"""Reset internal state for a new episode."""
self.cmd[:] = 0.0
self.groot_qj_all[:] = 0.0
self.groot_dqj_all[:] = 0.0
self.groot_action[:] = 0.0
self.groot_obs_single[:] = 0.0
self.groot_obs_stacked[:] = 0.0
self.groot_height_cmd = 0.74
self.groot_orientation_cmd[:] = 0.0
self.groot_obs_history.clear()
for _ in range(6):
self.groot_obs_history.append(np.zeros(86, dtype=np.float32))
def run_step(self):
# Get current observation
obs = self.robot.get_observation()
def run_step(self, action: dict, lowstate) -> dict:
"""Run one step of the locomotion controller.
if not obs:
return
Args:
action: Action dict containing remote.lx/ly/rx/ry and buttons
lowstate: Robot lowstate containing motor positions/velocities and IMU
Returns:
Action dict for lower body joints (0-14)
"""
if lowstate is None:
return {}
buttons = [int(action.get(k, 0)) for k in REMOTE_BUTTONS]
if buttons[0]: # R1 - raise waist
# Get command from remote controller
if obs["remote.buttons"][0]: # R1 - raise waist
self.groot_height_cmd += 0.001
self.groot_height_cmd = np.clip(self.groot_height_cmd, 0.50, 1.00)
if buttons[4]: # R2 - lower waist
if obs["remote.buttons"][4]: # R2 - lower waist
self.groot_height_cmd -= 0.001
self.groot_height_cmd = np.clip(self.groot_height_cmd, 0.50, 1.00)
lx, ly, rx, _ry = (action.get(k, 0.0) for k in REMOTE_AXES)
self.cmd[0] = ly # Forward/backward
self.cmd[1] = -lx # Left/right (negated)
self.cmd[2] = -rx # Rotation rate (negated)
self.cmd[0] = obs["remote.ly"] # Forward/backward
self.cmd[1] = obs["remote.lx"] * -1 # Left/right
self.cmd[2] = obs["remote.rx"] * -1 # Rotation rate
# Get joint positions and velocities from lowstate
# Get joint positions and velocities from flat dict
for motor in G1_29_JointIndex:
name = motor.name
idx = motor.value
self.groot_qj_all[idx] = lowstate.motor_state[idx].q
self.groot_dqj_all[idx] = lowstate.motor_state[idx].dq
self.groot_qj_all[idx] = obs[f"{name}.q"]
self.groot_dqj_all[idx] = obs[f"{name}.dq"]
# Adapt observation for g1_23dof
for idx in MISSING_JOINTS:
self.groot_qj_all[idx] = 0.0
self.groot_dqj_all[idx] = 0.0
# Scale joint positions and velocities
qj_obs = self.groot_qj_all.copy()
dqj_obs = self.groot_dqj_all.copy()
# Express IMU data in gravity frame of reference
quat = lowstate.imu_state.quaternion
ang_vel = np.array(lowstate.imu_state.gyroscope, dtype=np.float32)
gravity_orientation = get_gravity_orientation(quat)
quat = [obs["imu.quat.w"], obs["imu.quat.x"], obs["imu.quat.y"], obs["imu.quat.z"]]
ang_vel = np.array([obs["imu.gyro.x"], obs["imu.gyro.y"], obs["imu.gyro.z"]], dtype=np.float32)
gravity_orientation = self.robot.get_gravity_orientation(quat)
# Scale joint positions and velocities before policy inference
qj_obs = (qj_obs - GROOT_DEFAULT_ANGLES) * DOF_POS_SCALE
@@ -196,10 +186,73 @@ class GrootLocomotionController:
# Transform action back to target joint positions
target_dof_pos_15 = GROOT_DEFAULT_ANGLES[:15] + self.groot_action * ACTION_SCALE
# Build action dict
# Build action dict (only first 15 joints for GR00T)
action_dict = {}
for i in range(15):
motor_name = G1_29_JointIndex(i).name
action_dict[f"{motor_name}.q"] = float(target_dof_pos_15[i])
return action_dict
# Zero out missing joints for g1_23dof
for joint_idx in MISSING_JOINTS:
motor_name = G1_29_JointIndex(joint_idx).name
action_dict[f"{motor_name}.q"] = 0.0
# Send action to robot
self.robot.send_action(action_dict)
def run(repo_id: str = DEFAULT_GROOT_REPO_ID) -> None:
"""Main function to run the GR00T locomotion controller.
Args:
repo_id: Hugging Face Hub repository ID for GR00T policies.
"""
# Load policies
policy_balance, policy_walk = load_groot_policies(repo_id=repo_id)
# Initialize robot
config = UnitreeG1Config()
robot = UnitreeG1(config)
robot.connect()
# Initialize gr00T locomotion controller
groot_controller = GrootLocomotionController(
policy_balance=policy_balance,
policy_walk=policy_walk,
robot=robot,
config=config,
)
try:
robot.reset(CONTROL_DT, GROOT_DEFAULT_ANGLES)
logger.info("Use joystick: LY=fwd/back, LX=left/right, RX=rotate, R1=raise waist, R2=lower waist")
logger.info("Press Ctrl+C to stop")
# Run step
while not robot._shutdown_event.is_set():
start_time = time.time()
groot_controller.run_step()
elapsed = time.time() - start_time
sleep_time = max(0, CONTROL_DT - elapsed)
time.sleep(sleep_time)
except KeyboardInterrupt:
logger.info("Stopping locomotion...")
finally:
if robot.is_connected:
robot.disconnect()
logger.info("Done!")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="GR00T Locomotion Controller for Unitree G1")
parser.add_argument(
"--repo-id",
type=str,
default=DEFAULT_GROOT_REPO_ID,
help=f"Hugging Face Hub repo ID for GR00T policies (default: {DEFAULT_GROOT_REPO_ID})",
)
args = parser.parse_args()
run(repo_id=args.repo_id)
@@ -14,21 +14,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import json
import logging
import time
import numpy as np
import onnx
import onnxruntime as ort
from huggingface_hub import hf_hub_download
from lerobot.robots.unitree_g1.g1_utils import (
REMOTE_AXES,
G1_29_JointArmIndex,
G1_29_JointIndex,
get_gravity_orientation,
)
from lerobot.robots.unitree_g1.config_unitree_g1 import UnitreeG1Config
from lerobot.robots.unitree_g1.g1_utils import G1_29_JointIndex
from lerobot.robots.unitree_g1.unitree_g1 import UnitreeG1
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
DEFAULT_ANGLES = np.zeros(29, dtype=np.float32)
@@ -40,13 +40,18 @@ DEFAULT_ANGLES[16] = 0.2 # Left shoulder roll
DEFAULT_ANGLES[23] = -0.2 # Right shoulder roll
DEFAULT_ANGLES[[18, 25]] = 0.6 # Elbow
MISSING_JOINTS = []
G1_MODEL = "g1_23" # Or "g1_29"
if G1_MODEL == "g1_23":
MISSING_JOINTS = [12, 14, 20, 21, 27, 28] # Waist yaw/pitch, wrist pitch/yaw
# Control parameters
ACTION_SCALE = 0.25
CONTROL_DT = 0.005 # 200Hz
CONTROL_DT = 0.02 # 50Hz
ANG_VEL_SCALE = 0.25
DOF_POS_SCALE = 1.0
DOF_VEL_SCALE = 0.05
GAIT_PERIOD = 0.5
GAIT_PERIOD = 1.0
DEFAULT_HOLOSOMA_REPO_ID = "nepyope/holosoma_locomotion"
@@ -82,7 +87,7 @@ def load_policy(
logger.info(f"Policy loaded: {policy.get_inputs()[0].shape}{policy.get_outputs()[0].shape}")
# Extract KP/KD from ONNX metadata
model = onnx.load(policy_path, load_external_data=False)
model = onnx.load(policy_path)
metadata = {prop.key: prop.value for prop in model.metadata_props}
if "kp" not in metadata or "kd" not in metadata:
@@ -96,13 +101,15 @@ def load_policy(
class HolosomaLocomotionController:
"""Holosoma lower-body locomotion controller for Unitree G1."""
"""Holosoma whole-body locomotion controller for Unitree G1."""
control_dt = CONTROL_DT # Expose for unitree_g1.py
def __init__(self, policy, robot, kp: np.ndarray, kd: np.ndarray):
self.policy = policy
self.robot = robot
def __init__(self):
# Load policy and gains
self.policy, self.kp, self.kd = load_policy()
# Override robot's PD gains with policy gains
self.robot.kp = kp
self.robot.kd = kd
self.cmd = np.zeros(3, dtype=np.float32)
@@ -117,55 +124,35 @@ class HolosomaLocomotionController:
self.phase_dt = 2 * np.pi / ((1.0 / CONTROL_DT) * GAIT_PERIOD)
self.is_standing = True
logger.info("HolosomaLocomotionController initialized")
def run_step(self):
# Get current observation
obs = self.robot.get_observation()
def reset(self) -> None:
"""Reset internal state for a new episode."""
self.cmd[:] = 0.0
self.qj[:] = 0.0
self.dqj[:] = 0.0
self.obs[:] = 0.0
self.last_action[:] = 0.0
self.phase = np.array([[0.0, np.pi]], dtype=np.float32)
self.is_standing = True
if not obs:
return
def run_step(self, action: dict, lowstate) -> dict:
"""Run one step of the locomotion controller.
Args:
action: Action dict containing remote.lx/ly/rx/ry
lowstate: Robot lowstate containing motor positions/velocities and IMU
Returns:
Action dict for lower body joints (0-14)
"""
if lowstate is None:
return {}
lx, ly, rx, _ry = (action.get(k, 0.0) for k in REMOTE_AXES)
ly = ly if abs(ly) > 0.1 else 0.0
lx = lx if abs(lx) > 0.1 else 0.0
rx = rx if abs(rx) > 0.1 else 0.0
ly = np.clip(ly, -0.3, 0.3)
lx = np.clip(lx, -0.3, 0.3)
# Get command from remote controller
ly = obs["remote.ly"] if abs(obs["remote.ly"]) > 0.1 else 0.0
lx = obs["remote.lx"] if abs(obs["remote.lx"]) > 0.1 else 0.0
rx = obs["remote.rx"] if abs(obs["remote.rx"]) > 0.1 else 0.0
self.cmd[:] = [ly, -lx, -rx]
# Get joint positions and velocities from lowstate
# Get joint positions and velocities
for motor in G1_29_JointIndex:
name = motor.name
idx = motor.value
self.qj[idx] = lowstate.motor_state[idx].q
self.dqj[idx] = lowstate.motor_state[idx].dq
self.qj[idx] = obs[f"{name}.q"]
self.dqj[idx] = obs[f"{name}.dq"]
# Hide arm positions from policy (show DEFAULT_ANGLES instead)
# This prevents policy from reacting to teleop arm movements
for arm_joint in G1_29_JointArmIndex:
self.qj[arm_joint.value] = DEFAULT_ANGLES[arm_joint.value]
self.dqj[arm_joint.value] = 0.0
# Adapt observation for g1_23dof
for idx in MISSING_JOINTS:
self.qj[idx] = 0.0
self.dqj[idx] = 0.0
# Express IMU data in gravity frame of reference
quat = lowstate.imu_state.quaternion
ang_vel = np.array(lowstate.imu_state.gyroscope, dtype=np.float32)
gravity = get_gravity_orientation(quat)
quat = [obs["imu.quat.w"], obs["imu.quat.x"], obs["imu.quat.y"], obs["imu.quat.z"]]
ang_vel = np.array([obs["imu.gyro.x"], obs["imu.gyro.y"], obs["imu.gyro.z"]], dtype=np.float32)
gravity = self.robot.get_gravity_orientation(quat)
# Scale joint positions and velocities before policy inference
qj_obs = (self.qj - DEFAULT_ANGLES) * DOF_POS_SCALE
@@ -199,16 +186,79 @@ class HolosomaLocomotionController:
# Run policy inference
ort_in = {self.policy.get_inputs()[0].name: self.obs.reshape(1, -1).astype(np.float32)}
raw_action = self.policy.run(None, ort_in)[0].squeeze()
policy_action = np.clip(raw_action, -100.0, 100.0)
self.last_action = policy_action.copy()
action = np.clip(raw_action, -100.0, 100.0)
self.last_action = action.copy()
# Transform action back to target joint positions
target = DEFAULT_ANGLES + policy_action * ACTION_SCALE
target = DEFAULT_ANGLES + action * ACTION_SCALE
# Build action dict (first 15 joints only)
# Build action dict
action_dict = {}
for i in range(15):
motor_name = G1_29_JointIndex(i).name
action_dict[f"{motor_name}.q"] = float(target[i])
for motor in G1_29_JointIndex:
action_dict[f"{motor.name}.q"] = float(target[motor.value])
return action_dict
# Zero out missing joints for g1_23dof
for joint_idx in MISSING_JOINTS:
motor_name = G1_29_JointIndex(joint_idx).name
action_dict[f"{motor_name}.q"] = 0.0
# Send action to robot
self.robot.send_action(action_dict)
def run(repo_id: str = DEFAULT_HOLOSOMA_REPO_ID, policy_type: str = "fastsac") -> None:
"""Main function to run the Holosoma locomotion controller.
Args:
repo_id: Hugging Face Hub repository ID for Holosoma policies.
policy_type: Policy type to use ('fastsac' or 'ppo').
"""
# Load policy and gains
policy, kp, kd = load_policy(repo_id=repo_id, policy_type=policy_type)
# Initialize robot
config = UnitreeG1Config()
robot = UnitreeG1(config)
robot.connect()
holosoma_controller = HolosomaLocomotionController(policy, robot, kp, kd)
try:
robot.reset(CONTROL_DT, DEFAULT_ANGLES)
logger.info("Use joystick: LY=fwd/back, LX=left/right, RX=rotate")
logger.info("Press Ctrl+C to stop")
# Run step
while not robot._shutdown_event.is_set():
start_time = time.time()
holosoma_controller.run_step()
elapsed = time.time() - start_time
sleep_time = max(0, CONTROL_DT - elapsed)
time.sleep(sleep_time)
except KeyboardInterrupt:
logger.info("Stopping locomotion...")
finally:
if robot.is_connected:
robot.disconnect()
logger.info("Done!")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Holosoma Locomotion Controller for Unitree G1")
parser.add_argument(
"--repo-id",
type=str,
default=DEFAULT_HOLOSOMA_REPO_ID,
help=f"Hugging Face Hub repo ID for Holosoma policies (default: {DEFAULT_HOLOSOMA_REPO_ID})",
)
parser.add_argument(
"--policy",
type=str,
choices=["fastsac", "ppo"],
default="fastsac",
help="Policy type to use: 'fastsac' (default) or 'ppo'",
)
args = parser.parse_args()
run(repo_id=args.repo_id, policy_type=args.policy)
+126 -66
View File
@@ -25,11 +25,11 @@ discord = "https://discord.gg/s3KuuzsPFb"
[project]
name = "lerobot"
version = "0.5.1"
version = "0.4.4"
description = "🤗 LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch"
dynamic = ["readme"]
license = { text = "Apache-2.0" }
requires-python = ">=3.12"
requires-python = ">=3.10"
authors = [
{ name = "Rémi Cadène", email = "re.cadene@gmail.com" },
{ name = "Simon Alibert", email = "alibert.sim@gmail.com" },
@@ -50,8 +50,7 @@ classifiers = [
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.10",
"Topic :: Software Development :: Build Tools",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]
@@ -60,30 +59,28 @@ keywords = ["lerobot", "huggingface", "robotics", "machine learning", "artifici
dependencies = [
# Hugging Face dependencies
"datasets>=4.0.0,<5.0.0",
"datasets>=4.0.0,<4.2.0",
"diffusers>=0.27.2,<0.36.0",
"huggingface-hub>=1.0.0,<2.0.0",
"huggingface-hub[hf-transfer,cli]>=0.34.2,<0.36.0",
"accelerate>=1.10.0,<2.0.0",
# Core dependencies
"numpy>=2.0.0,<2.3.0", # NOTE: Explicitly listing numpy helps the resolver converge faster. Upper bound imposed by opencv-python-headless.
"setuptools>=71.0.0,<81.0.0",
"cmake>=3.29.0.1,<4.2.0",
"packaging>=24.2,<26.0",
"torch>=2.2.1,<2.11.0",
"torchcodec>=0.2.1,<0.11.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')",
"torchvision>=0.21.0,<0.26.0",
"einops>=0.8.0,<0.9.0",
"opencv-python-headless>=4.9.0,<4.14.0",
"opencv-python-headless>=4.9.0,<4.13.0",
"av>=15.0.0,<16.0.0",
"jsonlines>=4.0.0,<5.0.0",
"pynput>=1.7.8,<1.9.0",
"packaging>=24.2,<26.0",
"pynput>=1.7.7,<1.9.0",
"pyserial>=3.5,<4.0",
"wandb>=0.24.0,<0.25.0",
"draccus==0.10.0", # TODO: Relax version constraint
"torch>=2.2.1,<2.8.0", # TODO: Bumb dependency
"torchcodec>=0.2.1,<0.6.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')", # TODO: Bumb dependency
"torchvision>=0.21.0,<0.23.0", # TODO: Bumb dependency
"draccus==0.10.0", # TODO: Remove ==
"gymnasium>=1.1.1,<2.0.0",
"rerun-sdk>=0.24.0,<0.27.0",
@@ -98,20 +95,14 @@ dependencies = [
# Common
pygame-dep = ["pygame>=2.5.1,<2.7.0"]
placo-dep = ["placo>=0.9.6,<0.9.17"]
transformers-dep = ["transformers==5.3.0"] # TODO(Steven): https://github.com/huggingface/lerobot/pull/3249
placo-dep = ["placo>=0.9.6,<0.10.0"]
transformers-dep = ["transformers>=4.57.1,<5.0.0"]
grpcio-dep = ["grpcio==1.73.1", "protobuf>=6.31.1,<6.32.0"]
can-dep = ["python-can>=4.2.0,<5.0.0"]
peft-dep = ["peft>=0.18.0,<1.0.0"]
scipy-dep = ["scipy>=1.14.0,<2.0.0"]
qwen-vl-utils-dep = ["qwen-vl-utils>=0.0.11,<0.1.0"]
matplotlib-dep = ["matplotlib>=3.10.3,<4.0.0", "contourpy>=1.3.0,<2.0.0"] # NOTE: Explicitly listing contourpy helps the resolver converge faster.
# Motors
feetech = ["feetech-servo-sdk>=1.0.0,<2.0.0"]
dynamixel = ["dynamixel-sdk>=3.7.31,<3.9.0"]
damiao = ["lerobot[can-dep]"]
robstride = ["lerobot[can-dep]"]
damiao = ["python-can>=4.2.0,<5.0.0"]
# Robots
openarms = ["lerobot[damiao]"]
@@ -119,36 +110,34 @@ gamepad = ["lerobot[pygame-dep]", "hidapi>=0.14.0,<0.15.0"]
hopejr = ["lerobot[feetech]", "lerobot[pygame-dep]"]
lekiwi = ["lerobot[feetech]", "pyzmq>=26.2.1,<28.0.0"]
unitree_g1 = [
# "unitree-sdk2==1.0.1",
"pyzmq>=26.2.1,<28.0.0",
"onnxruntime>=1.16.0,<2.0.0",
"onnx>=1.16.0,<2.0.0",
"pin>=3.0.0,<4.0.0",
"meshcat>=0.3.0,<0.4.0",
"lerobot[matplotlib-dep]",
"lerobot[pygame-dep]",
"matplotlib>=3.9.0,<4.0.0",
"casadi>=3.6.0,<4.0.0",
]
reachy2 = ["reachy2_sdk>=1.0.15,<1.1.0"]
kinematics = ["lerobot[placo-dep]"]
intelrealsense = [
"pyrealsense2>=2.55.1.6486,<2.57.0 ; sys_platform != 'darwin'",
"pyrealsense2-macosx>=2.54,<2.57.0 ; sys_platform == 'darwin'",
"pyrealsense2-macosx>=2.54,<2.55.0 ; sys_platform == 'darwin'",
]
phone = ["hebi-py>=2.8.0,<2.12.0", "teleop>=0.1.0,<0.2.0", "fastapi<1.0", "lerobot[scipy-dep]"]
phone = ["hebi-py>=2.8.0,<2.12.0", "teleop>=0.1.0,<0.2.0", "fastapi<1.0"]
# Policies
wallx = [
"lerobot[transformers-dep]",
"lerobot[peft]",
"lerobot[scipy-dep]",
"torchdiffeq>=0.2.4,<0.3.0",
"lerobot[qwen-vl-utils-dep]",
"transformers==4.49.0",
"peft==0.17.1",
"scipy==1.15.3",
"torchdiffeq==0.2.5",
"qwen_vl_utils==0.0.11"
]
pi = ["lerobot[transformers-dep]", "lerobot[scipy-dep]"]
pi = ["transformers @ git+https://github.com/huggingface/transformers.git@fix/lerobot_openpi", "scipy>=1.10.1,<1.15"]
smolvla = ["lerobot[transformers-dep]", "num2words>=0.5.14,<0.6.0", "accelerate>=1.7.0,<2.0.0", "safetensors>=0.4.3,<1.0.0"]
multi_task_dit = ["lerobot[transformers-dep]"]
groot = [
"lerobot[transformers-dep]",
"lerobot[peft]",
"peft>=0.13.0,<1.0.0",
"dm-tree>=0.1.8,<1.0.0",
"timm>=1.0.0,<1.1.0",
"safetensors>=0.4.3,<1.0.0",
@@ -157,13 +146,13 @@ groot = [
"ninja>=1.11.1,<2.0.0",
"flash-attn>=2.5.9,<3.0.0 ; sys_platform != 'darwin'"
]
sarm = ["lerobot[transformers-dep]", "faker>=33.0.0,<35.0.0", "lerobot[matplotlib-dep]", "lerobot[qwen-vl-utils-dep]"]
sarm = ["lerobot[transformers-dep]", "faker>=33.0.0,<35.0.0", "matplotlib>=3.10.3,<4.0.0", "qwen-vl-utils>=0.0.14,<0.1.0"]
xvla = ["lerobot[transformers-dep]"]
hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpcio-dep]", "lerobot[placo-dep]"]
# Features
async = ["lerobot[grpcio-dep]", "lerobot[matplotlib-dep]"]
peft = ["lerobot[transformers-dep]", "lerobot[peft-dep]"]
async = ["lerobot[grpcio-dep]", "matplotlib>=3.10.3,<4.0.0"]
peft = ["lerobot[transformers-dep]", "peft>=0.18.0,<1.0.0"]
# Development
dev = ["pre-commit>=3.7.0,<5.0.0", "debugpy>=1.8.1,<1.9.0", "lerobot[grpcio-dep]", "grpcio-tools==1.73.1", "mypy>=1.19.1"]
@@ -171,19 +160,13 @@ test = ["pytest>=8.1.0,<9.0.0", "pytest-timeout>=2.4.0,<3.0.0", "pytest-cov>=5.0
video_benchmark = ["scikit-image>=0.23.2,<0.26.0", "pandas>=2.2.2,<2.4.0"]
# Simulation
# NOTE: Explicitly listing scipy helps flatten the dependecy tree.
aloha = ["gym-aloha>=0.1.2,<0.2.0", "lerobot[scipy-dep]"]
aloha = ["gym-aloha>=0.1.2,<0.2.0"]
pusht = ["gym-pusht>=0.1.5,<0.2.0", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk version in gym-pusht instead
libero = ["lerobot[transformers-dep]", "hf-libero>=0.1.3,<0.2.0; sys_platform == 'linux'", "lerobot[scipy-dep]"]
metaworld = ["metaworld==3.0.0", "lerobot[scipy-dep]"]
libero = ["lerobot[transformers-dep]", "hf-libero>=0.1.3,<0.2.0"]
metaworld = ["metaworld==3.0.0"]
# All
all = [
# NOTE(resolver hint): scipy is pulled in transitively via lerobot[scipy-dep] through
# multiple extras (aloha, metaworld, pi, wallx, phone). Listing it explicitly
# helps pip's resolver converge by constraining scipy early, before it encounters
# the loose scipy requirements from transitive deps like dm-control and metaworld.
"scipy>=1.14.0,<2.0.0",
"lerobot[dynamixel]",
"lerobot[gamepad]",
"lerobot[hopejr]",
@@ -191,8 +174,8 @@ all = [
"lerobot[reachy2]",
"lerobot[kinematics]",
"lerobot[intelrealsense]",
"lerobot[wallx]",
"lerobot[pi]",
# "lerobot[wallx]",
# "lerobot[pi]", TODO(Pepijn): Update pi to transformers v5
"lerobot[smolvla]",
# "lerobot[groot]", TODO(Steven): Gr00t requires specific installation instructions for flash-attn
"lerobot[xvla]",
@@ -204,11 +187,10 @@ all = [
"lerobot[aloha]",
"lerobot[pusht]",
"lerobot[phone]",
"lerobot[libero]; sys_platform == 'linux'",
"lerobot[libero]",
"lerobot[metaworld]",
"lerobot[sarm]",
"lerobot[peft]",
# "lerobot[unitree_g1]", TODO: Unitree requires specific installation instructions for unitree_sdk2
]
[project.scripts]
@@ -230,14 +212,11 @@ lerobot-edit-dataset="lerobot.scripts.lerobot_edit_dataset:main"
lerobot-setup-can="lerobot.scripts.lerobot_setup_can:main"
# ---------------- Tool Configurations ----------------
[tool.setuptools.package-data]
lerobot = ["envs/*.json"]
[tool.setuptools.packages.find]
where = ["src"]
[tool.ruff]
target-version = "py312"
target-version = "py310"
line-length = 110
exclude = ["tests/artifacts/**/*.safetensors", "*_pb2.py", "*_pb2_grpc.py"]
@@ -306,8 +285,7 @@ default.extend-ignore-identifiers-re = [
"thw",
"inpt",
"ROBOTIS",
"OT_VALUE",
"metalness",
"OT_VALUE"
]
# TODO: Uncomment when ready to use
@@ -330,7 +308,7 @@ default.extend-ignore-identifiers-re = [
# Uncomment [tool.mypy] first, then uncomment individual module overrides as they get proper type annotations
[tool.mypy]
python_version = "3.12"
python_version = "3.10"
ignore_missing_imports = true
follow_imports = "skip"
# warn_return_any = true
@@ -382,9 +360,9 @@ ignore_errors = false
module = "lerobot.cameras.*"
ignore_errors = false
[[tool.mypy.overrides]]
module = "lerobot.motors.*"
ignore_errors = false
# [[tool.mypy.overrides]]
# module = "lerobot.motors.*"
# ignore_errors = false
# [[tool.mypy.overrides]]
# module = "lerobot.robots.*"
@@ -414,3 +392,85 @@ ignore_errors = false
# [[tool.mypy.overrides]]
# module = "lerobot.scripts.*"
# ignore_errors = false
[tool.uv]
# wallx requires transformers==4.49.0 which conflicts with other extras that need >=4.53.0
conflicts = [
[
{ extra = "wallx" },
{ extra = "transformers-dep" },
],
[
{ extra = "wallx" },
{ extra = "pi" },
],
[
{ extra = "wallx" },
{ extra = "smolvla" },
],
[
{ extra = "wallx" },
{ extra = "groot" },
],
[
{ extra = "wallx" },
{ extra = "xvla" },
],
[
{ extra = "wallx" },
{ extra = "sarm" },
],
[
{ extra = "wallx" },
{ extra = "hilserl" },
],
[
{ extra = "wallx" },
{ extra = "libero" },
],
[
{ extra = "wallx" },
{ extra = "peft" },
],
[
{ extra = "wallx" },
{ extra = "all" },
],
# pi uses custom branch which conflicts with transformers-dep
[
{ extra = "pi" },
{ extra = "transformers-dep" },
],
[
{ extra = "pi" },
{ extra = "smolvla" },
],
[
{ extra = "pi" },
{ extra = "groot" },
],
[
{ extra = "pi" },
{ extra = "xvla" },
],
[
{ extra = "pi" },
{ extra = "sarm" },
],
[
{ extra = "pi" },
{ extra = "hilserl" },
],
[
{ extra = "pi" },
{ extra = "libero" },
],
[
{ extra = "pi" },
{ extra = "peft" },
],
[
{ extra = "pi" },
{ extra = "all" },
],
]
+276 -175
View File
@@ -1,73 +1,76 @@
#
# This file is autogenerated by pip-compile with Python 3.12
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile --output-file=requirements-macos.txt requirements.in
#
-e .[all]
# via -[all]
absl-py==2.4.0
absl-py==2.3.1
# via
# dm-control
# dm-env
# dm-tree
# labmaze
# mujoco
accelerate==1.13.0
# tensorboard
accelerate==1.11.0
# via
# lerobot
# peft
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.13.3
aiohttp==3.13.1
# via fsspec
aiosignal==1.4.0
# via aiohttp
annotated-doc==0.0.4
# via
# fastapi
# typer
annotated-types==0.7.0
# via pydantic
anyio==4.12.1
antlr4-python3-runtime==4.9.3
# via
# hydra-core
# omegaconf
anyio==4.11.0
# via
# httpx
# starlette
# watchfiles
asttokens==3.0.1
asttokens==3.0.0
# via stack-data
async-timeout==5.0.1
# via aiohttp
attrs==25.4.0
# via
# aiohttp
# dm-tree
# jsonlines
# jsonschema
# referencing
# rerun-sdk
av==15.1.0
# via lerobot
bddl==1.0.1
# via libero
certifi==2025.10.5
# via
# lerobot
# qwen-vl-utils
certifi==2026.2.25
# via
# httpcore
# httpx
# requests
# sentry-sdk
cffi==2.0.0
# via pymunk
cfgv==3.5.0
cfgv==3.4.0
# via pre-commit
charset-normalizer==3.4.5
charset-normalizer==3.4.4
# via requests
click==8.3.1
click==8.3.0
# via
# typer
# uvicorn
# wandb
cloudpickle==3.1.2
# via gymnasium
cmake==4.1.3
cloudpickle==3.1.1
# via
# gymnasium
# libero
cmake==4.1.0
# via lerobot
cmeel==0.59.0
cmeel==0.57.3
# via
# cmeel-assimp
# cmeel-boost
@@ -105,17 +108,15 @@ cmeel-zlib==1.3.1
# via cmeel-assimp
coal-library==3.0.1
# via pin
contourpy==1.3.3
# via
# lerobot
# matplotlib
coverage[toml]==7.13.4
contourpy==1.3.2
# via matplotlib
coverage[toml]==7.11.0
# via pytest-cov
cycler==0.12.1
# via matplotlib
datasets==4.6.1
datasets==4.1.1
# via lerobot
debugpy==1.8.20
debugpy==1.8.17
# via lerobot
decorator==5.2.1
# via ipython
@@ -129,7 +130,7 @@ dill==0.4.0
# multiprocess
distlib==0.4.0
# via virtualenv
dm-control==1.0.37
dm-control==1.0.34
# via gym-aloha
dm-env==1.6
# via dm-control
@@ -137,55 +138,69 @@ dm-tree==0.1.9
# via
# dm-control
# dm-env
# lerobot
docopt==0.6.2
# via num2words
draccus==0.10.0
# via lerobot
dynamixel-sdk==3.8.4
# via lerobot
easydict==1.13
# via libero
egl-probe @ git+https://github.com/huggingface/egl_probe.git
# via
# libero
# robomimic
eigenpy==3.10.3
# via coal-library
einops==0.8.2
# via lerobot
eiquadprog==1.2.9
# via placo
etils[epath,epy]==1.14.0
# via mujoco
executing==2.2.1
# via stack-data
faker==34.0.2
# via lerobot
farama-notifications==0.0.4
# via gymnasium
fastapi==0.135.1
einops==0.8.1
# via
# lerobot
# teleop
# libero
eiquadprog==1.2.9
# via placo
etils[epath,epy]==1.13.0
# via mujoco
exceptiongroup==1.3.0
# via
# anyio
# ipython
# pytest
executing==2.2.1
# via stack-data
farama-notifications==0.0.4
# via gymnasium
fastapi==0.119.1
# via teleop
fastjsonschema==2.21.2
# via nbformat
feetech-servo-sdk==1.0.0
# via lerobot
filelock==3.25.0
filelock==3.20.0
# via
# datasets
# diffusers
# huggingface-hub
# python-discovery
# torch
# transformers
# virtualenv
fonttools==4.61.1
fonttools==4.60.1
# via matplotlib
frozenlist==1.8.0
# via
# aiohttp
# aiosignal
fsspec[http]==2026.2.0
fsspec[http]==2025.9.0
# via
# datasets
# etils
# huggingface-hub
# torch
future==1.0.0
# via libero
gitdb==4.0.12
# via gitpython
gitpython==3.1.46
gitpython==3.1.45
# via wandb
glfw==2.10.0
# via
@@ -197,6 +212,7 @@ grpcio==1.73.1
# lerobot
# reachy2-sdk
# reachy2-sdk-api
# tensorboard
grpcio-tools==1.73.1
# via
# lerobot
@@ -207,67 +223,71 @@ gym-hil==0.1.13
# via lerobot
gym-pusht==0.1.6
# via lerobot
gymnasium==1.2.3
gymnasium==1.2.1
# via
# gym-aloha
# gym-hil
# gym-pusht
# lerobot
# libero
# metaworld
h11==0.16.0
# via
# httpcore
# uvicorn
# via uvicorn
h5py==3.15.1
# via robomimic
hebi-py==2.11.0
# via lerobot
hf-xet==1.3.2
hf-transfer==0.1.9
# via huggingface-hub
hf-xet==1.1.10
# via huggingface-hub
hidapi==0.14.0.post4
# via
# gym-hil
# lerobot
httpcore==1.0.9
# via httpx
httptools==0.7.1
# via uvicorn
httpx==0.28.1
# via
# datasets
# huggingface-hub
huggingface-hub==1.6.0
huggingface-hub[cli,hf-transfer]==0.35.3
# via
# accelerate
# datasets
# diffusers
# lerobot
# peft
# timm
# tokenizers
# transformers
identify==2.6.17
hydra-core==1.3.2
# via libero
identify==2.6.15
# via pre-commit
idna==3.11
# via
# anyio
# httpx
# requests
# yarl
imageio[ffmpeg]==2.37.2
imageio[ffmpeg]==2.37.0
# via
# gym-aloha
# gym-hil
# lerobot
# metaworld
# robomimic
# scikit-image
imageio-ffmpeg==0.6.0
# via imageio
importlib-metadata==8.7.1
# via
# imageio
# robomimic
importlib-metadata==8.7.0
# via diffusers
importlib-resources==6.5.2
# via etils
iniconfig==2.3.0
# via pytest
ipython==9.11.0
inquirerpy==0.3.4
# via huggingface-hub
ipython==8.37.0
# via meshcat
ipython-pygments-lexers==1.1.1
# via ipython
ischedule==1.2.7
# via placo
jedi==0.19.2
@@ -276,24 +296,44 @@ jinja2==3.1.6
# via torch
jsonlines==4.0.0
# via lerobot
jsonschema==4.25.1
# via nbformat
jsonschema-specifications==2025.9.1
# via jsonschema
jupyter-core==5.9.1
# via nbformat
jupytext==1.18.1
# via bddl
kiwisolver==1.4.9
# via matplotlib
labmaze==1.0.6
# via dm-control
lazy-loader==0.5
lazy-loader==0.4
# via scikit-image
librt==0.8.1
# via mypy
libero @ git+https://github.com/huggingface/lerobot-libero.git@main
# via lerobot
llvmlite==0.45.1
# via numba
lxml==6.0.2
# via dm-control
markdown==3.9
# via tensorboard
markdown-it-py==4.0.0
# via rich
# via
# jupytext
# mdit-py-plugins
markupsafe==3.0.3
# via jinja2
matplotlib==3.10.8
# via lerobot
# via
# jinja2
# werkzeug
matplotlib==3.10.7
# via
# lerobot
# libero
matplotlib-inline==0.2.1
# via ipython
mdit-py-plugins==0.5.0
# via jupytext
mdurl==0.1.2
# via markdown-it-py
mergedeep==1.3.4
@@ -306,35 +346,41 @@ mock-serial==0.0.1
# via lerobot
mpmath==1.3.0
# via sympy
mujoco==3.5.0
mujoco==3.3.7
# via
# dm-control
# gym-aloha
# gym-hil
# libero
# metaworld
multidict==6.7.1
# robosuite
multidict==6.7.0
# via
# aiohttp
# yarl
multiprocess==0.70.18
multiprocess==0.70.16
# via datasets
mypy==1.19.1
# via lerobot
mypy-extensions==1.1.0
# via typing-inspect
nbformat==5.10.4
# via jupytext
networkx==3.4.2
# via
# mypy
# typing-inspect
networkx==3.6.1
# via
# bddl
# scikit-image
# torch
nodeenv==1.10.0
ninja==1.13.0
# via lerobot
nodeenv==1.9.1
# via pre-commit
num2words==0.5.14
# via lerobot
numba==0.62.1
# via robosuite
numpy==2.2.6
# via
# accelerate
# bddl
# cmeel-boost
# contourpy
# datasets
@@ -343,14 +389,16 @@ numpy==2.2.6
# dm-env
# dm-tree
# gymnasium
# h5py
# hebi-py
# imageio
# labmaze
# lerobot
# libero
# matplotlib
# meshcat
# metaworld
# mujoco
# numba
# opencv-python
# opencv-python-headless
# pandas
@@ -358,18 +406,26 @@ numpy==2.2.6
# pyquaternion
# reachy2-sdk
# rerun-sdk
# robomimic
# robosuite
# scikit-image
# scipy
# shapely
# teleop
# tensorboard
# tensorboardx
# tifffile
# torchvision
# transformers
# transforms3d
opencv-python==4.13.0.92
omegaconf==2.3.0
# via hydra-core
opencv-python==4.12.0.88
# via
# gym-pusht
# libero
# reachy2-sdk
# robosuite
opencv-python-headless==4.12.0.88
# via lerobot
orderly-set==5.5.0
@@ -379,87 +435,97 @@ packaging==25.0
# accelerate
# datasets
# huggingface-hub
# hydra-core
# jupytext
# lazy-loader
# lerobot
# matplotlib
# peft
# pytest
# qwen-vl-utils
# reachy2-sdk
# scikit-image
# tensorboard
# tensorboardx
# transformers
# wandb
pandas==2.3.3
# via
# datasets
# lerobot
parso==0.8.6
parso==0.8.5
# via jedi
pathspec==1.0.4
# via mypy
peft==0.18.1
peft==0.17.1
# via lerobot
pexpect==4.9.0
# via ipython
pillow==12.1.1
pfzy==0.3.4
# via inquirerpy
pillow==12.0.0
# via
# diffusers
# imageio
# lerobot
# matplotlib
# meshcat
# qwen-vl-utils
# rerun-sdk
# robosuite
# scikit-image
# tensorboard
# torchvision
pin==3.4.0
# via placo
placo==0.9.16
placo==0.9.14
# via lerobot
platformdirs==4.9.4
platformdirs==4.5.0
# via
# python-discovery
# jupyter-core
# virtualenv
# wandb
pluggy==1.6.0
# via
# pytest
# pytest-cov
pre-commit==4.5.1
pre-commit==4.3.0
# via lerobot
prompt-toolkit==3.0.52
# via ipython
# via
# inquirerpy
# ipython
propcache==0.4.1
# via
# aiohttp
# yarl
protobuf==6.31.1
protobuf==6.31.0
# via
# dm-control
# grpcio-tools
# lerobot
# reachy2-sdk
# reachy2-sdk-api
# tensorboard
# tensorboardx
# wandb
psutil==7.2.2
psutil==7.1.1
# via
# accelerate
# imageio
# peft
# robomimic
ptyprocess==0.7.0
# via pexpect
pure-eval==0.2.3
# via stack-data
pyarrow==23.0.1
pyarrow==21.0.0
# via
# datasets
# rerun-sdk
pycparser==3.0
pycparser==2.23
# via cffi
pydantic==2.12.5
pydantic==2.12.3
# via
# fastapi
# wandb
pydantic-core==2.41.5
pydantic-core==2.41.4
# via pydantic
pygame==2.6.1
# via
@@ -469,35 +535,33 @@ pygame==2.6.1
pygments==2.19.2
# via
# ipython
# ipython-pygments-lexers
# pytest
# rich
pymunk==6.11.1
# via
# gym-pusht
# lerobot
pyngrok==7.5.1
pyngrok==7.4.1
# via meshcat
pynput==1.8.1
# via
# gym-hil
# lerobot
pyobjc-core==12.1
pyobjc-core==12.0
# via
# pyobjc-framework-applicationservices
# pyobjc-framework-cocoa
# pyobjc-framework-coretext
# pyobjc-framework-quartz
pyobjc-framework-applicationservices==12.1
pyobjc-framework-applicationservices==12.0
# via pynput
pyobjc-framework-cocoa==12.1
pyobjc-framework-cocoa==12.0
# via
# pyobjc-framework-applicationservices
# pyobjc-framework-coretext
# pyobjc-framework-quartz
pyobjc-framework-coretext==12.1
pyobjc-framework-coretext==12.0
# via pyobjc-framework-applicationservices
pyobjc-framework-quartz==12.1
pyobjc-framework-quartz==12.0
# via
# pynput
# pyobjc-framework-applicationservices
@@ -506,13 +570,13 @@ pyopengl==3.1.10
# via
# dm-control
# mujoco
pyparsing==3.3.2
pyparsing==3.2.5
# via
# dm-control
# matplotlib
pyquaternion==0.9.9
# via reachy2-sdk
pyrealsense2-macosx==2.56.5
pyrealsense2-macosx==2.54.2
# via lerobot
pyserial==3.5
# via
@@ -521,6 +585,7 @@ pyserial==3.5
# lerobot
pytest==8.4.2
# via
# bddl
# lerobot
# pytest-cov
# pytest-timeout
@@ -531,14 +596,11 @@ pytest-timeout==2.4.0
# via lerobot
python-dateutil==2.9.0.post0
# via
# faker
# matplotlib
# pandas
python-discovery==1.1.1
# via virtualenv
python-dotenv==1.2.2
python-dotenv==1.1.1
# via uvicorn
pytz==2026.1.post1
pytz==2025.2
# via pandas
pyyaml==6.0.3
# via
@@ -547,10 +609,13 @@ pyyaml==6.0.3
# draccus
# hebi-py
# huggingface-hub
# jupytext
# omegaconf
# peft
# pre-commit
# pyngrok
# pyyaml-include
# timm
# transformers
# uvicorn
# wandb
@@ -560,13 +625,15 @@ pyzmq==27.1.0
# via
# lerobot
# meshcat
qwen-vl-utils==0.0.14
# via lerobot
reachy2-sdk==1.0.15
reachy2-sdk==1.0.14
# via lerobot
reachy2-sdk-api==1.0.21
# via reachy2-sdk
regex==2026.2.28
referencing==0.37.0
# via
# jsonschema
# jsonschema-specifications
regex==2025.10.23
# via
# diffusers
# transformers
@@ -575,150 +642,184 @@ requests==2.32.5
# datasets
# diffusers
# dm-control
# qwen-vl-utils
# huggingface-hub
# teleop
# transformers
# wandb
rerun-sdk==0.26.2
rerun-sdk==0.26.1
# via lerobot
rhoban-cmeel-jsoncpp==1.9.4.9
# via placo
rich==14.3.3
# via typer
safetensors==0.7.0
robomimic==0.2.0
# via libero
robosuite==1.4.0
# via libero
rpds-py==0.28.0
# via
# jsonschema
# referencing
safetensors==0.6.2
# via
# accelerate
# diffusers
# lerobot
# peft
# timm
# transformers
scikit-image==0.25.2
# via
# gym-pusht
# lerobot
scipy==1.17.1
scipy==1.15.3
# via
# dm-control
# lerobot
# metaworld
# robosuite
# scikit-image
# torchdiffeq
sentry-sdk==2.54.0
sentry-sdk==2.42.1
# via wandb
shapely==2.1.2
# via gym-pusht
shellingham==1.5.4
# via typer
six==1.17.0
# via
# pynput
# python-dateutil
smmap==5.0.3
smmap==5.0.2
# via gitdb
sniffio==1.3.1
# via anyio
stack-data==0.6.3
# via ipython
starlette==0.52.1
starlette==0.48.0
# via fastapi
sympy==1.14.0
# via torch
teleop==0.1.4
teleop==0.1.2
# via lerobot
termcolor==3.3.0
# via lerobot
tifffile==2026.3.3
tensorboard==2.20.0
# via robomimic
tensorboard-data-server==0.7.2
# via tensorboard
tensorboardx==2.6.4
# via robomimic
termcolor==3.1.0
# via
# lerobot
# robomimic
thop==0.1.1.post2209072238
# via libero
tifffile==2025.5.10
# via scikit-image
tokenizers==0.22.2
timm==1.0.20
# via lerobot
tokenizers==0.22.1
# via transformers
toml==0.10.2
# via draccus
torch==2.10.0
tomli==2.3.0
# via
# cmeel
# coverage
# jupytext
# pytest
torch==2.7.1
# via
# accelerate
# lerobot
# peft
# torchdiffeq
# robomimic
# thop
# timm
# torchvision
torchcodec==0.10.0
torchcodec==0.5
# via lerobot
torchdiffeq==0.2.5
# via lerobot
torchvision==0.25.0
# via lerobot
tornado==6.5.4
torchvision==0.22.1
# via
# lerobot
# robomimic
# timm
tornado==6.5.2
# via meshcat
tqdm==4.67.3
tqdm==4.67.1
# via
# datasets
# dm-control
# huggingface-hub
# peft
# robomimic
# transformers
traitlets==5.14.3
# via
# ipython
# jupyter-core
# matplotlib-inline
transformers==5.3.0
# nbformat
transformers==4.57.1
# via
# lerobot
# libero
# peft
transforms3d==0.4.2
# via teleop
typer==0.24.1
# via
# huggingface-hub
# transformers
typing-extensions==4.15.0
# via
# aiosignal
# anyio
# etils
# faker
# exceptiongroup
# fastapi
# gymnasium
# huggingface-hub
# mypy
# ipython
# multidict
# pydantic
# pydantic-core
# referencing
# rerun-sdk
# starlette
# torch
# typing-inspect
# typing-inspection
# uvicorn
# virtualenv
# wandb
typing-inspect==0.9.0
# via draccus
typing-inspection==0.4.2
# via
# fastapi
# pydantic
tzdata==2025.3
# via pydantic
tzdata==2025.2
# via pandas
u-msgpack-python==2.8.0
# via meshcat
urllib3==2.6.3
urllib3==2.5.0
# via
# requests
# sentry-sdk
uvicorn[standard]==0.41.0
uvicorn[standard]==0.38.0
# via teleop
uvloop==0.22.1
# via uvicorn
virtualenv==21.1.0
virtualenv==20.35.3
# via pre-commit
wandb==0.24.2
# via lerobot
wandb==0.21.4
# via
# lerobot
# libero
watchfiles==1.1.1
# via uvicorn
wcwidth==0.6.0
wcwidth==0.2.14
# via prompt-toolkit
websocket-client==1.9.0
# via teleop
websockets==16.0
websockets==15.0.1
# via uvicorn
wrapt==2.1.2
werkzeug==3.1.3
# via tensorboard
wrapt==2.0.0
# via dm-tree
xxhash==3.6.0
# via datasets
yarl==1.23.0
yarl==1.22.0
# via aiohttp
zipp==3.23.0
# via
+187 -208
View File
@@ -1,12 +1,12 @@
#
# This file is autogenerated by pip-compile with Python 3.12
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile --output-file=requirements-ubuntu.txt requirements.in
#
-e .[all]
# via -[all]
absl-py==2.4.0
absl-py==2.3.1
# via
# dm-control
# dm-env
@@ -14,33 +14,30 @@ absl-py==2.4.0
# labmaze
# mujoco
# tensorboard
accelerate==1.13.0
accelerate==1.11.0
# via
# lerobot
# peft
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.13.3
aiohttp==3.13.1
# via fsspec
aiosignal==1.4.0
# via aiohttp
annotated-doc==0.0.4
# via
# fastapi
# typer
annotated-types==0.7.0
# via pydantic
antlr4-python3-runtime==4.9.3
# via
# hydra-core
# omegaconf
anyio==4.12.1
anyio==4.11.0
# via
# httpx
# starlette
# watchfiles
asttokens==3.0.1
asttokens==3.0.0
# via stack-data
async-timeout==5.0.1
# via aiohttp
attrs==25.4.0
# via
# aiohttp
@@ -50,35 +47,30 @@ attrs==25.4.0
# referencing
# rerun-sdk
av==15.1.0
# via
# lerobot
# qwen-vl-utils
# via lerobot
bddl==1.0.1
# via hf-libero
certifi==2026.2.25
# via libero
certifi==2025.10.5
# via
# httpcore
# httpx
# requests
# sentry-sdk
cffi==2.0.0
# via pymunk
cfgv==3.5.0
cfgv==3.4.0
# via pre-commit
charset-normalizer==3.4.5
charset-normalizer==3.4.4
# via requests
click==8.3.1
click==8.3.0
# via
# typer
# uvicorn
# wandb
cloudpickle==3.1.2
cloudpickle==3.1.1
# via
# gymnasium
# hf-libero
cmake==4.1.3
# libero
cmake==4.1.0
# via lerobot
cmeel==0.59.0
cmeel==0.57.3
# via
# cmeel-assimp
# cmeel-boost
@@ -116,24 +108,20 @@ cmeel-zlib==1.3.1
# via cmeel-assimp
coal-library==3.0.1
# via pin
contourpy==1.3.3
# via
# lerobot
# matplotlib
coverage[toml]==7.13.4
contourpy==1.3.2
# via matplotlib
coverage[toml]==7.11.0
# via pytest-cov
cuda-bindings==12.9.4
# via torch
cuda-pathfinder==1.4.1
# via cuda-bindings
cycler==0.12.1
# via matplotlib
datasets==4.6.1
datasets==4.1.1
# via lerobot
debugpy==1.8.20
debugpy==1.8.17
# via lerobot
decorator==5.2.1
# via ipython
decord==0.6.0
# via lerobot
deepdiff==8.6.1
# via lerobot
diffusers==0.35.2
@@ -144,7 +132,7 @@ dill==0.4.0
# multiprocess
distlib==0.4.0
# via virtualenv
dm-control==1.0.37
dm-control==1.0.34
# via gym-aloha
dm-env==1.6
# via dm-control
@@ -152,6 +140,7 @@ dm-tree==0.1.9
# via
# dm-control
# dm-env
# lerobot
docopt==0.6.2
# via num2words
draccus==0.10.0
@@ -159,60 +148,66 @@ draccus==0.10.0
dynamixel-sdk==3.8.4
# via lerobot
easydict==1.13
# via hf-libero
egl-probe==1.0.2
# via robomimic
# via libero
egl-probe @ git+https://github.com/huggingface/egl_probe.git
# via
# libero
# robomimic
eigenpy==3.10.3
# via coal-library
einops==0.8.2
einops==0.8.1
# via
# hf-libero
# flash-attn
# lerobot
# libero
eiquadprog==1.2.9
# via placo
etils[epath,epy]==1.14.0
etils[epath,epy]==1.13.0
# via mujoco
evdev==1.9.3
evdev==1.9.2
# via pynput
exceptiongroup==1.3.0
# via
# anyio
# ipython
# pytest
executing==2.2.1
# via stack-data
faker==34.0.2
# via lerobot
farama-notifications==0.0.4
# via gymnasium
fastapi==0.135.1
# via
# lerobot
# teleop
fastapi==0.119.1
# via teleop
fastjsonschema==2.21.2
# via nbformat
feetech-servo-sdk==1.0.0
# via lerobot
filelock==3.25.0
filelock==3.20.0
# via
# datasets
# diffusers
# huggingface-hub
# python-discovery
# torch
# transformers
# virtualenv
fonttools==4.61.1
flash-attn==2.8.3
# via lerobot
fonttools==4.60.1
# via matplotlib
frozenlist==1.8.0
# via
# aiohttp
# aiosignal
fsspec[http]==2026.2.0
fsspec[http]==2025.9.0
# via
# datasets
# etils
# huggingface-hub
# torch
future==1.0.0
# via hf-libero
# via libero
gitdb==4.0.12
# via gitpython
gitpython==3.1.46
gitpython==3.1.45
# via wandb
glfw==2.10.0
# via
@@ -235,60 +230,50 @@ gym-hil==0.1.13
# via lerobot
gym-pusht==0.1.6
# via lerobot
gymnasium==1.2.3
gymnasium==1.2.1
# via
# gym-aloha
# gym-hil
# gym-pusht
# hf-libero
# lerobot
# libero
# metaworld
h11==0.16.0
# via
# httpcore
# uvicorn
h5py==3.16.0
# via uvicorn
h5py==3.15.1
# via robomimic
hebi-py==2.11.0
# via lerobot
hf-egl-probe==1.0.2
# via hf-libero
hf-libero==0.1.3
# via lerobot
hf-xet==1.3.2
hf-transfer==0.1.9
# via huggingface-hub
hf-xet==1.1.10
# via huggingface-hub
hidapi==0.14.0.post4
# via
# gym-hil
# lerobot
httpcore==1.0.9
# via httpx
httptools==0.7.1
# via uvicorn
httpx==0.28.1
# via
# datasets
# huggingface-hub
huggingface-hub==1.6.0
huggingface-hub[cli,hf-transfer]==0.35.3
# via
# accelerate
# datasets
# diffusers
# lerobot
# peft
# timm
# tokenizers
# transformers
hydra-core==1.3.2
# via hf-libero
identify==2.6.17
# via libero
identify==2.6.15
# via pre-commit
idna==3.11
# via
# anyio
# httpx
# requests
# yarl
imageio[ffmpeg]==2.37.2
imageio[ffmpeg]==2.37.0
# via
# gym-aloha
# gym-hil
@@ -300,14 +285,16 @@ imageio-ffmpeg==0.6.0
# via
# imageio
# robomimic
importlib-metadata==8.7.1
importlib-metadata==8.7.0
# via diffusers
importlib-resources==6.5.2
# via etils
iniconfig==2.3.0
# via pytest
ipython==9.11.0
inquirerpy==0.3.4
# via huggingface-hub
ipython==8.37.0
# via meshcat
ipython-pygments-lexers==1.1.1
# via ipython
ischedule==1.2.7
# via placo
jedi==0.19.2
@@ -316,41 +303,40 @@ jinja2==3.1.6
# via torch
jsonlines==4.0.0
# via lerobot
jsonschema==4.26.0
jsonschema==4.25.1
# via nbformat
jsonschema-specifications==2025.9.1
# via jsonschema
jupyter-core==5.9.1
# via nbformat
jupytext==1.19.1
jupytext==1.18.1
# via bddl
kiwisolver==1.4.9
# via matplotlib
labmaze==1.0.6
# via dm-control
lazy-loader==0.5
lazy-loader==0.4
# via scikit-image
librt==0.8.1
# via mypy
llvmlite==0.46.0
libero @ git+https://github.com/huggingface/lerobot-libero.git@main
# via lerobot
llvmlite==0.45.1
# via numba
lxml==6.0.2
# via dm-control
markdown==3.10.2
markdown==3.9
# via tensorboard
markdown-it-py==4.0.0
# via
# jupytext
# mdit-py-plugins
# rich
markupsafe==3.0.3
# via
# jinja2
# werkzeug
matplotlib==3.10.8
matplotlib==3.10.7
# via
# hf-libero
# lerobot
# libero
matplotlib-inline==0.2.1
# via ipython
mdit-py-plugins==0.5.0
@@ -367,38 +353,36 @@ mock-serial==0.0.1
# via lerobot
mpmath==1.3.0
# via sympy
mujoco==3.5.0
mujoco==3.3.7
# via
# dm-control
# gym-aloha
# gym-hil
# hf-libero
# libero
# metaworld
# robosuite
multidict==6.7.1
multidict==6.7.0
# via
# aiohttp
# yarl
multiprocess==0.70.18
multiprocess==0.70.16
# via datasets
mypy==1.19.1
# via lerobot
mypy-extensions==1.1.0
# via
# mypy
# typing-inspect
# via typing-inspect
nbformat==5.10.4
# via jupytext
networkx==3.6.1
networkx==3.4.2
# via
# bddl
# scikit-image
# torch
nodeenv==1.10.0
ninja==1.13.0
# via lerobot
nodeenv==1.9.1
# via pre-commit
num2words==0.5.14
# via lerobot
numba==0.64.0
numba==0.62.1
# via robosuite
numpy==2.2.6
# via
@@ -407,6 +391,7 @@ numpy==2.2.6
# cmeel-boost
# contourpy
# datasets
# decord
# diffusers
# dm-control
# dm-env
@@ -414,10 +399,9 @@ numpy==2.2.6
# gymnasium
# h5py
# hebi-py
# hf-libero
# imageio
# labmaze
# lerobot
# libero
# matplotlib
# meshcat
# metaworld
@@ -442,51 +426,49 @@ numpy==2.2.6
# torchvision
# transformers
# transforms3d
nvidia-cublas-cu12==12.8.4.1
nvidia-cublas-cu12==12.6.4.1
# via
# nvidia-cudnn-cu12
# nvidia-cusolver-cu12
# torch
nvidia-cuda-cupti-cu12==12.8.90
nvidia-cuda-cupti-cu12==12.6.80
# via torch
nvidia-cuda-nvrtc-cu12==12.8.93
nvidia-cuda-nvrtc-cu12==12.6.77
# via torch
nvidia-cuda-runtime-cu12==12.8.90
nvidia-cuda-runtime-cu12==12.6.77
# via torch
nvidia-cudnn-cu12==9.10.2.21
nvidia-cudnn-cu12==9.5.1.17
# via torch
nvidia-cufft-cu12==11.3.3.83
nvidia-cufft-cu12==11.3.0.4
# via torch
nvidia-cufile-cu12==1.13.1.3
nvidia-cufile-cu12==1.11.1.6
# via torch
nvidia-curand-cu12==10.3.9.90
nvidia-curand-cu12==10.3.7.77
# via torch
nvidia-cusolver-cu12==11.7.3.90
nvidia-cusolver-cu12==11.7.1.2
# via torch
nvidia-cusparse-cu12==12.5.8.93
nvidia-cusparse-cu12==12.5.4.2
# via
# nvidia-cusolver-cu12
# torch
nvidia-cusparselt-cu12==0.7.1
nvidia-cusparselt-cu12==0.6.3
# via torch
nvidia-nccl-cu12==2.27.5
nvidia-nccl-cu12==2.26.2
# via torch
nvidia-nvjitlink-cu12==12.8.93
nvidia-nvjitlink-cu12==12.6.85
# via
# nvidia-cufft-cu12
# nvidia-cusolver-cu12
# nvidia-cusparse-cu12
# torch
nvidia-nvshmem-cu12==3.4.5
# via torch
nvidia-nvtx-cu12==12.8.90
nvidia-nvtx-cu12==12.6.77
# via torch
omegaconf==2.3.0
# via hydra-core
opencv-python==4.13.0.92
opencv-python==4.12.0.88
# via
# gym-pusht
# hf-libero
# libero
# reachy2-sdk
# robosuite
opencv-python-headless==4.12.0.88
@@ -505,7 +487,6 @@ packaging==25.0
# matplotlib
# peft
# pytest
# qwen-vl-utils
# reachy2-sdk
# scikit-image
# tensorboard
@@ -516,21 +497,21 @@ pandas==2.3.3
# via
# datasets
# lerobot
parso==0.8.6
parso==0.8.5
# via jedi
pathspec==1.0.4
# via mypy
peft==0.18.1
peft==0.17.1
# via lerobot
pexpect==4.9.0
# via ipython
pillow==12.1.1
pfzy==0.3.4
# via inquirerpy
pillow==12.0.0
# via
# diffusers
# imageio
# lerobot
# matplotlib
# meshcat
# qwen-vl-utils
# rerun-sdk
# robosuite
# scikit-image
@@ -538,27 +519,28 @@ pillow==12.1.1
# torchvision
pin==3.4.0
# via placo
placo==0.9.16
placo==0.9.14
# via lerobot
platformdirs==4.9.4
platformdirs==4.5.0
# via
# jupyter-core
# python-discovery
# virtualenv
# wandb
pluggy==1.6.0
# via
# pytest
# pytest-cov
pre-commit==4.5.1
pre-commit==4.3.0
# via lerobot
prompt-toolkit==3.0.52
# via ipython
# via
# inquirerpy
# ipython
propcache==0.4.1
# via
# aiohttp
# yarl
protobuf==6.31.1
protobuf==6.31.0
# via
# dm-control
# grpcio-tools
@@ -568,7 +550,7 @@ protobuf==6.31.1
# tensorboard
# tensorboardx
# wandb
psutil==7.2.2
psutil==7.1.1
# via
# accelerate
# imageio
@@ -578,17 +560,17 @@ ptyprocess==0.7.0
# via pexpect
pure-eval==0.2.3
# via stack-data
pyarrow==23.0.1
pyarrow==21.0.0
# via
# datasets
# rerun-sdk
pycparser==3.0
pycparser==2.23
# via cffi
pydantic==2.12.5
pydantic==2.12.3
# via
# fastapi
# wandb
pydantic-core==2.41.5
pydantic-core==2.41.4
# via pydantic
pygame==2.6.1
# via
@@ -598,14 +580,12 @@ pygame==2.6.1
pygments==2.19.2
# via
# ipython
# ipython-pygments-lexers
# pytest
# rich
pymunk==6.11.1
# via
# gym-pusht
# lerobot
pyngrok==7.5.1
pyngrok==7.4.1
# via meshcat
pynput==1.8.1
# via
@@ -615,7 +595,7 @@ pyopengl==3.1.10
# via
# dm-control
# mujoco
pyparsing==3.3.2
pyparsing==3.2.5
# via
# dm-control
# matplotlib
@@ -641,16 +621,13 @@ pytest-timeout==2.4.0
# via lerobot
python-dateutil==2.9.0.post0
# via
# faker
# matplotlib
# pandas
python-discovery==1.1.1
# via virtualenv
python-dotenv==1.2.2
python-dotenv==1.1.1
# via uvicorn
python-xlib==0.33
# via pynput
pytz==2026.1.post1
pytz==2025.2
# via pandas
pyyaml==6.0.3
# via
@@ -665,6 +642,7 @@ pyyaml==6.0.3
# pre-commit
# pyngrok
# pyyaml-include
# timm
# transformers
# uvicorn
# wandb
@@ -674,9 +652,7 @@ pyzmq==27.1.0
# via
# lerobot
# meshcat
qwen-vl-utils==0.0.14
# via lerobot
reachy2-sdk==1.0.15
reachy2-sdk==1.0.14
# via lerobot
reachy2-sdk-api==1.0.21
# via reachy2-sdk
@@ -684,7 +660,7 @@ referencing==0.37.0
# via
# jsonschema
# jsonschema-specifications
regex==2026.2.28
regex==2025.10.23
# via
# diffusers
# transformers
@@ -693,62 +669,60 @@ requests==2.32.5
# datasets
# diffusers
# dm-control
# qwen-vl-utils
# huggingface-hub
# teleop
# transformers
# wandb
rerun-sdk==0.26.2
rerun-sdk==0.26.1
# via lerobot
rhoban-cmeel-jsoncpp==1.9.4.9
# via placo
rich==14.3.3
# via typer
robomimic==0.2.0
# via hf-libero
# via libero
robosuite==1.4.0
# via hf-libero
rpds-py==0.30.0
# via libero
rpds-py==0.28.0
# via
# jsonschema
# referencing
safetensors==0.7.0
safetensors==0.6.2
# via
# accelerate
# diffusers
# lerobot
# peft
# timm
# transformers
scikit-image==0.25.2
# via
# gym-pusht
# lerobot
scipy==1.17.1
scipy==1.15.3
# via
# dm-control
# lerobot
# metaworld
# robosuite
# scikit-image
# torchdiffeq
sentry-sdk==2.54.0
sentry-sdk==2.42.1
# via wandb
shapely==2.1.2
# via gym-pusht
shellingham==1.5.4
# via typer
six==1.17.0
# via
# pynput
# python-dateutil
# python-xlib
smmap==5.0.3
smmap==5.0.2
# via gitdb
sniffio==1.3.1
# via anyio
stack-data==0.6.3
# via ipython
starlette==0.52.1
starlette==0.48.0
# via fastapi
sympy==1.14.0
# via torch
teleop==0.1.4
teleop==0.1.2
# via lerobot
tensorboard==2.20.0
# via robomimic
@@ -756,38 +730,46 @@ tensorboard-data-server==0.7.2
# via tensorboard
tensorboardx==2.6.4
# via robomimic
termcolor==3.3.0
termcolor==3.1.0
# via
# lerobot
# robomimic
thop==0.1.1.post2209072238
# via hf-libero
tifffile==2026.3.3
# via libero
tifffile==2025.5.10
# via scikit-image
tokenizers==0.22.2
timm==1.0.20
# via lerobot
tokenizers==0.22.1
# via transformers
toml==0.10.2
# via draccus
torch==2.10.0
tomli==2.3.0
# via
# cmeel
# coverage
# jupytext
# pytest
torch==2.7.1
# via
# accelerate
# flash-attn
# lerobot
# peft
# robomimic
# thop
# torchdiffeq
# timm
# torchvision
torchcodec==0.10.0
torchcodec==0.5
# via lerobot
torchdiffeq==0.2.5
# via lerobot
torchvision==0.25.0
torchvision==0.22.1
# via
# lerobot
# robomimic
tornado==6.5.4
# timm
tornado==6.5.2
# via meshcat
tqdm==4.67.3
tqdm==4.67.1
# via
# datasets
# dm-control
@@ -801,29 +783,26 @@ traitlets==5.14.3
# jupyter-core
# matplotlib-inline
# nbformat
transformers==5.3.0
transformers==4.57.1
# via
# hf-libero
# lerobot
# libero
# peft
transforms3d==0.4.2
# via teleop
triton==3.6.0
triton==3.3.1
# via torch
typer==0.24.1
# via
# huggingface-hub
# transformers
typing-extensions==4.15.0
# via
# aiosignal
# anyio
# etils
# faker
# exceptiongroup
# fastapi
# gymnasium
# huggingface-hub
# mypy
# ipython
# multidict
# pydantic
# pydantic-core
# referencing
@@ -832,46 +811,46 @@ typing-extensions==4.15.0
# torch
# typing-inspect
# typing-inspection
# uvicorn
# virtualenv
# wandb
typing-inspect==0.9.0
# via draccus
typing-inspection==0.4.2
# via
# fastapi
# pydantic
tzdata==2025.3
# via pydantic
tzdata==2025.2
# via pandas
u-msgpack-python==2.8.0
# via meshcat
urllib3==2.6.3
urllib3==2.5.0
# via
# requests
# sentry-sdk
uvicorn[standard]==0.41.0
uvicorn[standard]==0.38.0
# via teleop
uvloop==0.22.1
# via uvicorn
virtualenv==21.1.0
virtualenv==20.35.3
# via pre-commit
wandb==0.24.2
wandb==0.21.4
# via
# hf-libero
# lerobot
# libero
watchfiles==1.1.1
# via uvicorn
wcwidth==0.6.0
wcwidth==0.2.14
# via prompt-toolkit
websocket-client==1.9.0
# via teleop
websockets==16.0
websockets==15.0.1
# via uvicorn
werkzeug==3.1.6
werkzeug==3.1.3
# via tensorboard
wrapt==2.1.2
wrapt==2.0.0
# via dm-tree
xxhash==3.6.0
# via datasets
yarl==1.23.0
yarl==1.22.0
# via aiohttp
zipp==3.23.0
# via
+4 -4
View File
@@ -1,9 +1,9 @@
# requirements.in
# requirements-macos.txt was generated on macOS and is platform-specific (macOS 26.3.1 25D2128 arm64).
# Darwin MacBook-Pro.local 25.3.0 Darwin Kernel Version 25.3.0: Wed Jan 28 20:54:55 PST 2026; root:xnu-12377.91.3~2/RELEASE_ARM64_T8132 arm64
# requirements-macos.txt was generated on macOS and is platform-specific (macOS 26.0.1 25A362 arm64).
# Darwin MacBook-Pro.local 25.0.0 Darwin Kernel Version 25.0.0: Wed Sep 17 21:42:08 PDT 2025; root:xnu-12377.1.9~141/RELEASE_ARM64_T8132 arm64
# requirements-ubuntu.txt was generated on Linux and is platform-specific (Ubuntu 24.04.4 LTS x86_64).
# Linux lerobot-linux 6.17.0-14-generic #14~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Jan 15 15:52:10 UTC 2 x86_64 x86_64 x86_64 GNU/Linux
# requirements-ubuntu.txt was generated on Linux and is platform-specific (Ubuntu 24.04.3 LTS x86_64).
# Linux mlerobot-linux 6.14.0-33-generic #33~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Fri Sep 19 17:02:30 UTC 2 x86_64 x86_64 x86_64 GNU/Linux
-e .[all]
+1 -1
View File
@@ -23,7 +23,7 @@ from typing import Any
import torch
from lerobot.configs.types import PolicyFeature
from lerobot.datasets.feature_utils import build_dataset_frame, hw_to_dataset_features
from lerobot.datasets.utils import build_dataset_frame, hw_to_dataset_features
# NOTE: Configs need to be loaded for the client to be able to instantiate the policy config
from lerobot.policies import ( # noqa: F401
+4 -2
View File
@@ -39,13 +39,15 @@ import grpc
import torch
from lerobot.policies.factory import get_policy_class, make_pre_post_processors
from lerobot.processor import PolicyProcessorPipeline
from lerobot.processor import (
PolicyAction,
PolicyProcessorPipeline,
)
from lerobot.transport import (
services_pb2, # type: ignore
services_pb2_grpc, # type: ignore
)
from lerobot.transport.utils import receive_bytes_in_chunks
from lerobot.types import PolicyAction
from .configs import PolicyServerConfig
from .constants import SUPPORTED_POLICIES
+3 -5
View File
@@ -63,9 +63,9 @@ from lerobot.transport import (
services_pb2_grpc, # type: ignore
)
from lerobot.transport.utils import grpc_channel_options, send_bytes_in_chunks
from lerobot.utils.import_utils import register_third_party_plugins
from .configs import RobotClientConfig
from .constants import SUPPORTED_ROBOTS
from .helpers import (
Action,
FPSTracker,
@@ -485,9 +485,8 @@ class RobotClient:
def async_client(cfg: RobotClientConfig):
logging.info(pformat(asdict(cfg)))
# TODO: Assert if checking robot support is still needed with the plugin system
# if cfg.robot.type not in SUPPORTED_ROBOTS:
# raise ValueError(f"Robot {cfg.robot.type} not yet supported!")
if cfg.robot.type not in SUPPORTED_ROBOTS:
raise ValueError(f"Robot {cfg.robot.type} not yet supported!")
client = RobotClient(cfg)
@@ -513,5 +512,4 @@ def async_client(cfg: RobotClientConfig):
if __name__ == "__main__":
register_third_party_plugins()
async_client() # run the client
+1 -1
View File
@@ -13,5 +13,5 @@
# limitations under the License.
from .camera import Camera
from .configs import CameraConfig, ColorMode, Cv2Backends, Cv2Rotation
from .configs import CameraConfig, ColorMode, Cv2Rotation
from .utils import make_cameras_from_configs
+1 -1
View File
@@ -150,7 +150,7 @@ class Camera(abc.ABC):
"""
pass
def read_latest(self, max_age_ms: int = 500) -> NDArray[Any]:
def read_latest(self, max_age_ms: int = 1000) -> NDArray[Any]:
"""Return the most recent frame captured immediately (Peeking).
This method is non-blocking and returns whatever is currently in the
-23
View File
@@ -25,10 +25,6 @@ class ColorMode(str, Enum):
RGB = "rgb"
BGR = "bgr"
@classmethod
def _missing_(cls, value: object) -> None:
raise ValueError(f"`color_mode` is expected to be in {list(cls)}, but {value} is provided.")
class Cv2Rotation(int, Enum):
NO_ROTATION = 0
@@ -36,25 +32,6 @@ class Cv2Rotation(int, Enum):
ROTATE_180 = 180
ROTATE_270 = -90
@classmethod
def _missing_(cls, value: object) -> None:
raise ValueError(f"`rotation` is expected to be in {list(cls)}, but {value} is provided.")
# Subset from https://docs.opencv.org/3.4/d4/d15/group__videoio__flags__base.html
class Cv2Backends(int, Enum):
ANY = 0
V4L2 = 200
DSHOW = 700
PVAPI = 800
ANDROID = 1000
AVFOUNDATION = 1200
MSMF = 1400
@classmethod
def _missing_(cls, value: object) -> None:
raise ValueError(f"`backend` is expected to be in {list(cls)}, but {value} is provided.")
@dataclass(kw_only=True)
class CameraConfig(draccus.ChoiceRegistry, abc.ABC): # type: ignore # TODO: add type stubs for draccus
+15 -10
View File
@@ -32,11 +32,10 @@ if platform.system() == "Windows" and "OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS"
os.environ["OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS"] = "0"
import cv2 # type: ignore # TODO: add type stubs for OpenCV
from lerobot.utils.decorators import check_if_already_connected, check_if_not_connected
from lerobot.utils.errors import DeviceNotConnectedError
from lerobot.utils.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
from ..camera import Camera
from ..utils import get_cv2_rotation
from ..utils import get_cv2_backend, get_cv2_rotation
from .configuration_opencv import ColorMode, OpenCVCameraConfig
# NOTE(Steven): The maximum opencv device index depends on your operating system. For instance,
@@ -118,7 +117,7 @@ class OpenCVCamera(Camera):
self.new_frame_event: Event = Event()
self.rotation: int | None = get_cv2_rotation(config.rotation)
self.backend: int = config.backend
self.backend: int = get_cv2_backend()
if self.height and self.width:
self.capture_width, self.capture_height = self.width, self.height
@@ -133,7 +132,6 @@ class OpenCVCamera(Camera):
"""Checks if the camera is currently connected and opened."""
return isinstance(self.videocapture, cv2.VideoCapture) and self.videocapture.isOpened()
@check_if_already_connected
def connect(self, warmup: bool = True) -> None:
"""
Connects to the OpenCV camera specified in the configuration.
@@ -150,6 +148,8 @@ class OpenCVCamera(Camera):
ConnectionError: If the specified camera index/path is not found or fails to open.
RuntimeError: If the camera opens but fails to apply requested settings.
"""
if self.is_connected:
raise DeviceAlreadyConnectedError(f"{self} is already connected.")
# Use 1 thread for OpenCV operations to avoid potential conflicts or
# blocking in multi-threaded applications, especially during data collection.
@@ -178,7 +178,6 @@ class OpenCVCamera(Camera):
logger.info(f"{self} connected.")
@check_if_not_connected
def _configure_capture_settings(self) -> None:
"""
Applies the specified FOURCC, FPS, width, and height settings to the connected camera.
@@ -198,6 +197,8 @@ class OpenCVCamera(Camera):
to the requested value.
DeviceNotConnectedError: If the camera is not connected.
"""
if not self.is_connected:
raise DeviceNotConnectedError(f"Cannot configure settings for {self} as it is not connected.")
# Set FOURCC first (if specified) as it can affect available FPS/resolution options
if self.config.fourcc is not None:
@@ -347,7 +348,6 @@ class OpenCVCamera(Camera):
return frame
@check_if_not_connected
def read(self, color_mode: ColorMode | None = None) -> NDArray[Any]:
"""
Reads a single frame synchronously from the camera.
@@ -374,6 +374,9 @@ class OpenCVCamera(Camera):
f"{self} read() color_mode parameter is deprecated and will be removed in future versions."
)
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.")
@@ -487,7 +490,6 @@ class OpenCVCamera(Camera):
self.latest_timestamp = None
self.new_frame_event.clear()
@check_if_not_connected
def async_read(self, timeout_ms: float = 200) -> NDArray[Any]:
"""
Reads the latest available frame asynchronously.
@@ -510,6 +512,8 @@ class OpenCVCamera(Camera):
TimeoutError: If no frame becomes available within the specified timeout.
RuntimeError: If an unexpected error occurs.
"""
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.")
@@ -529,8 +533,7 @@ class OpenCVCamera(Camera):
return frame
@check_if_not_connected
def read_latest(self, max_age_ms: int = 500) -> NDArray[Any]:
def read_latest(self, max_age_ms: int = 1000) -> NDArray[Any]:
"""Return the most recent frame captured immediately (Peeking).
This method is non-blocking and returns whatever is currently in the
@@ -545,6 +548,8 @@ class OpenCVCamera(Camera):
DeviceNotConnectedError: If the camera is not connected.
RuntimeError: If the camera is connected but has not captured any frames yet.
"""
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.")
@@ -15,9 +15,9 @@
from dataclasses import dataclass
from pathlib import Path
from ..configs import CameraConfig, ColorMode, Cv2Backends, Cv2Rotation
from ..configs import CameraConfig, ColorMode, Cv2Rotation
__all__ = ["OpenCVCameraConfig", "ColorMode", "Cv2Rotation", "Cv2Backends"]
__all__ = ["OpenCVCameraConfig", "ColorMode", "Cv2Rotation"]
@CameraConfig.register_subclass("opencv")
@@ -50,7 +50,6 @@ class OpenCVCameraConfig(CameraConfig):
rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation.
warmup_s: Time reading frames before returning from connect (in seconds)
fourcc: FOURCC code for video format (e.g., "MJPG", "YUYV", "I420"). Defaults to None (auto-detect).
backend: OpenCV backend identifier (https://docs.opencv.org/3.4/d4/d15/group__videoio__flags__base.html). Defaults to ANY.
Note:
- Only 3-channel color output (RGB/BGR) is currently supported.
@@ -63,12 +62,22 @@ class OpenCVCameraConfig(CameraConfig):
rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION
warmup_s: int = 1
fourcc: str | None = None
backend: Cv2Backends = Cv2Backends.ANY
def __post_init__(self) -> None:
self.color_mode = ColorMode(self.color_mode)
self.rotation = Cv2Rotation(self.rotation)
self.backend = Cv2Backends(self.backend)
if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
raise ValueError(
f"`color_mode` is expected to be {ColorMode.RGB.value} or {ColorMode.BGR.value}, but {self.color_mode} is provided."
)
if self.rotation not in (
Cv2Rotation.NO_ROTATION,
Cv2Rotation.ROTATE_90,
Cv2Rotation.ROTATE_180,
Cv2Rotation.ROTATE_270,
):
raise ValueError(
f"`rotation` is expected to be in {(Cv2Rotation.NO_ROTATION, Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_180, Cv2Rotation.ROTATE_270)}, but {self.rotation} is provided."
)
if self.fourcc is not None and (not isinstance(self.fourcc, str) or len(self.fourcc) != 4):
raise ValueError(
@@ -74,4 +74,7 @@ class Reachy2CameraConfig(CameraConfig):
f"`image_type` is expected to be 'left' or 'right' for teleop camera, and 'rgb' or 'depth' for depth camera, but {self.image_type} is provided."
)
self.color_mode = ColorMode(self.color_mode)
if self.color_mode not in ["rgb", "bgr"]:
raise ValueError(
f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
)
@@ -32,7 +32,6 @@ if platform.system() == "Windows" and "OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS"
import cv2 # type: ignore # TODO: add type stubs for OpenCV
import numpy as np # type: ignore # TODO: add type stubs for numpy
from lerobot.utils.decorators import check_if_not_connected
from lerobot.utils.import_utils import _reachy2_sdk_available
if TYPE_CHECKING or _reachy2_sdk_available:
@@ -124,7 +123,6 @@ class Reachy2Camera(Camera):
"""
raise NotImplementedError("Camera detection is not implemented for Reachy2 cameras.")
@check_if_not_connected
def read(self, color_mode: ColorMode | None = None) -> NDArray[Any]:
"""
Reads a single frame synchronously from the camera.
@@ -138,6 +136,9 @@ class Reachy2Camera(Camera):
"""
start_time = time.perf_counter()
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
if self.cam_manager is None:
raise DeviceNotConnectedError(f"{self} is not connected.")
@@ -183,7 +184,6 @@ class Reachy2Camera(Camera):
return frame
@check_if_not_connected
def async_read(self, timeout_ms: float = 200) -> NDArray[Any]:
"""
Same as read()
@@ -197,11 +197,12 @@ class Reachy2Camera(Camera):
TimeoutError: If no frame becomes available within the specified timeout.
RuntimeError: If an unexpected error occurs.
"""
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
return self.read()
@check_if_not_connected
def read_latest(self, max_age_ms: int = 500) -> NDArray[Any]:
def read_latest(self, max_age_ms: int = 1000) -> NDArray[Any]:
"""Return the most recent frame captured immediately (Peeking).
This method is non-blocking and returns whatever is currently in the
@@ -218,6 +219,8 @@ class Reachy2Camera(Camera):
DeviceNotConnectedError: If the camera is not connected.
RuntimeError: If the camera is connected but has not captured any frames yet.
"""
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
if self.latest_frame is None or self.latest_timestamp is None:
raise RuntimeError(f"{self} has not captured any frames yet.")
@@ -230,7 +233,6 @@ class Reachy2Camera(Camera):
return self.latest_frame
@check_if_not_connected
def disconnect(self) -> None:
"""
Stops the background read thread (if running).
@@ -238,6 +240,8 @@ class Reachy2Camera(Camera):
Raises:
DeviceNotConnectedError: If the camera is already disconnected.
"""
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} not connected.")
if self.cam_manager is not None:
self.cam_manager.disconnect()
@@ -30,8 +30,7 @@ try:
except Exception as e:
logging.info(f"Could not import realsense: {e}")
from lerobot.utils.decorators import check_if_already_connected, check_if_not_connected
from lerobot.utils.errors import DeviceNotConnectedError
from lerobot.utils.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
from ..camera import Camera
from ..configs import ColorMode
@@ -153,7 +152,6 @@ class RealSenseCamera(Camera):
"""Checks if the camera pipeline is started and streams are active."""
return self.rs_pipeline is not None and self.rs_profile is not None
@check_if_already_connected
def connect(self, warmup: bool = True) -> None:
"""
Connects to the RealSense camera specified in the configuration.
@@ -171,6 +169,8 @@ class RealSenseCamera(Camera):
ConnectionError: If the camera is found but fails to start the pipeline or no RealSense devices are detected at all.
RuntimeError: If the pipeline starts but fails to apply requested settings.
"""
if self.is_connected:
raise DeviceAlreadyConnectedError(f"{self} is already connected.")
self.rs_pipeline = rs.pipeline()
rs_config = rs.config()
@@ -290,7 +290,6 @@ class RealSenseCamera(Camera):
if self.use_depth:
rs_config.enable_stream(rs.stream.depth)
@check_if_not_connected
def _configure_capture_settings(self) -> None:
"""Sets fps, width, and height from device stream if not already configured.
@@ -300,6 +299,8 @@ class RealSenseCamera(Camera):
Raises:
DeviceNotConnectedError: If device is not connected.
"""
if not self.is_connected:
raise DeviceNotConnectedError(f"Cannot validate settings for {self} as it is not connected.")
if self.rs_profile is None:
raise RuntimeError(f"{self}: rs_profile must be initialized before use.")
@@ -319,7 +320,6 @@ class RealSenseCamera(Camera):
self.width, self.height = actual_width, actual_height
self.capture_width, self.capture_height = actual_width, actual_height
@check_if_not_connected
def read_depth(self, timeout_ms: int = 200) -> NDArray[Any]:
"""
Reads a single frame (depth) synchronously from the camera.
@@ -345,6 +345,9 @@ class RealSenseCamera(Camera):
f"Failed to capture depth frame '.read_depth()'. Depth stream is not enabled for {self}."
)
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.")
@@ -371,7 +374,6 @@ class RealSenseCamera(Camera):
return frame
@check_if_not_connected
def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 0) -> NDArray[Any]:
"""
Reads a single frame (color) synchronously from the camera.
@@ -401,6 +403,9 @@ class RealSenseCamera(Camera):
f"{self} read() timeout_ms parameter is deprecated and will be removed in future versions."
)
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.")
@@ -529,7 +534,6 @@ class RealSenseCamera(Camera):
self.new_frame_event.clear()
# NOTE(Steven): Missing implementation for depth for now
@check_if_not_connected
def async_read(self, timeout_ms: float = 200) -> NDArray[Any]:
"""
Reads the latest available frame data (color) asynchronously.
@@ -552,6 +556,8 @@ class RealSenseCamera(Camera):
TimeoutError: If no frame data becomes available within the specified timeout.
RuntimeError: If the background thread died unexpectedly or another error occurs.
"""
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.")
@@ -572,8 +578,7 @@ class RealSenseCamera(Camera):
return frame
# NOTE(Steven): Missing implementation for depth for now
@check_if_not_connected
def read_latest(self, max_age_ms: int = 500) -> NDArray[Any]:
def read_latest(self, max_age_ms: int = 1000) -> NDArray[Any]:
"""Return the most recent (color) frame captured immediately (Peeking).
This method is non-blocking and returns whatever is currently in the
@@ -588,6 +593,8 @@ class RealSenseCamera(Camera):
DeviceNotConnectedError: If the camera is not connected.
RuntimeError: If the camera is connected but has not captured any frames yet.
"""
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.")
@@ -60,8 +60,20 @@ class RealSenseCameraConfig(CameraConfig):
warmup_s: int = 1
def __post_init__(self) -> None:
self.color_mode = ColorMode(self.color_mode)
self.rotation = Cv2Rotation(self.rotation)
if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
raise ValueError(
f"`color_mode` is expected to be {ColorMode.RGB.value} or {ColorMode.BGR.value}, but {self.color_mode} is provided."
)
if self.rotation not in (
Cv2Rotation.NO_ROTATION,
Cv2Rotation.ROTATE_90,
Cv2Rotation.ROTATE_180,
Cv2Rotation.ROTATE_270,
):
raise ValueError(
f"`rotation` is expected to be in {(Cv2Rotation.NO_ROTATION, Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_180, Cv2Rotation.ROTATE_270)}, but {self.rotation} is provided."
)
values = (self.fps, self.width, self.height)
if any(v is not None for v in values) and any(v is None for v in values):
+12
View File
@@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import platform
from typing import cast
from lerobot.utils.import_utils import make_device_from_device_class
@@ -67,3 +68,14 @@ def get_cv2_rotation(rotation: Cv2Rotation) -> int | None:
return int(cv2.ROTATE_90_COUNTERCLOCKWISE)
else:
return None
def get_cv2_backend() -> int:
import cv2
if platform.system() == "Windows":
return int(cv2.CAP_MSMF) # Use MSMF for Windows instead of AVFOUNDATION
# elif platform.system() == "Darwin": # macOS
# return cv2.CAP_AVFOUNDATION
else: # Linux and others
return int(cv2.CAP_ANY)
+11 -7
View File
@@ -34,8 +34,7 @@ import cv2
import numpy as np
from numpy.typing import NDArray
from lerobot.utils.decorators import check_if_already_connected, check_if_not_connected
from lerobot.utils.errors import DeviceNotConnectedError
from lerobot.utils.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
from ..camera import Camera
from ..configs import ColorMode
@@ -105,7 +104,6 @@ class ZMQCamera(Camera):
"""Checks if the ZMQ socket is initialized and connected."""
return self._connected and self.context is not None and self.socket is not None
@check_if_already_connected
def connect(self, warmup: bool = True) -> None:
"""Connect to ZMQ camera server.
@@ -113,6 +111,8 @@ class ZMQCamera(Camera):
warmup (bool): If True, waits for the camera to provide at least one
valid frame before returning. Defaults to True.
"""
if self.is_connected:
raise DeviceAlreadyConnectedError(f"{self} is already connected.")
logger.info(f"Connecting to {self}...")
@@ -181,7 +181,7 @@ class ZMQCamera(Camera):
try:
message = self.socket.recv_string()
except Exception as e:
# zmq is lazy-imported in connect(), so check by name to avoid a top-level import
# Check for ZMQ timeout (EAGAIN/Again) without requiring global zmq import
if type(e).__name__ == "Again":
raise TimeoutError(f"{self} timeout after {self.timeout_ms}ms") from e
raise
@@ -211,7 +211,6 @@ class ZMQCamera(Camera):
return frame
@check_if_not_connected
def read(self, color_mode: ColorMode | None = None) -> NDArray[Any]:
"""
Reads a single frame synchronously from the camera.
@@ -229,6 +228,9 @@ class ZMQCamera(Camera):
f"{self} read() color_mode parameter is deprecated and will be removed in future versions."
)
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.")
@@ -299,7 +301,6 @@ class ZMQCamera(Camera):
self.latest_timestamp = None
self.new_frame_event.clear()
@check_if_not_connected
def async_read(self, timeout_ms: float = 200) -> NDArray[Any]:
"""
Reads the latest available frame asynchronously.
@@ -316,6 +317,8 @@ class ZMQCamera(Camera):
TimeoutError: If no frame data becomes available within the specified timeout.
RuntimeError: If the background thread is not running.
"""
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.")
@@ -332,7 +335,6 @@ class ZMQCamera(Camera):
return frame
@check_if_not_connected
def read_latest(self, max_age_ms: int = 1000) -> NDArray[Any]:
"""Return the most recent frame captured immediately (Peeking).
@@ -348,6 +350,8 @@ class ZMQCamera(Camera):
DeviceNotConnectedError: If the camera is not connected.
RuntimeError: If the camera is connected but has not captured any frames yet.
"""
if not self.is_connected:
raise DeviceNotConnectedError(f"{self} is not connected.")
if self.thread is None or not self.thread.is_alive():
raise RuntimeError(f"{self} read thread is not running.")
+4 -1
View File
@@ -32,7 +32,10 @@ class ZMQCameraConfig(CameraConfig):
warmup_s: int = 1
def __post_init__(self) -> None:
self.color_mode = ColorMode(self.color_mode)
if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
raise ValueError(
f"`color_mode` is expected to be {ColorMode.RGB.value} or {ColorMode.BGR.value}, but {self.color_mode} is provided."
)
if self.timeout_ms <= 0:
raise ValueError(f"`timeout_ms` must be positive, but {self.timeout_ms} is provided.")
+4 -72
View File
@@ -23,7 +23,6 @@ import base64
import contextlib
import json
import logging
import threading
import time
from collections import deque
@@ -43,57 +42,10 @@ def encode_image(image: np.ndarray, quality: int = 80) -> str:
return base64.b64encode(buffer).decode("utf-8")
class CameraCaptureThread:
"""Background thread that continuously captures and encodes frames from a camera."""
def __init__(self, camera: OpenCVCamera, name: str):
self.camera = camera
self.name = name
self.latest_encoded: str | None = None # Pre-encoded JPEG as base64
self.latest_timestamp: float = 0.0
self.frame_lock = threading.Lock()
self.running = False
self.thread: threading.Thread | None = None
def start(self):
"""Start the capture thread."""
self.running = True
self.thread = threading.Thread(target=self._capture_loop, daemon=True)
self.thread.start()
def stop(self):
"""Stop the capture thread."""
self.running = False
if self.thread:
self.thread.join(timeout=1.0)
def _capture_loop(self):
"""Continuously capture and encode frames at the camera's native rate."""
while self.running:
try:
frame = self.camera.read() # Blocks at camera's native rate
timestamp = time.time()
# Encode immediately in capture thread (this is the slow part)
encoded = encode_image(frame)
with self.frame_lock:
self.latest_encoded = encoded
self.latest_timestamp = timestamp
except Exception as e:
logger.warning(f"Camera {self.name} capture error: {e}")
time.sleep(0.01)
def get_latest(self) -> tuple[str | None, float]:
"""Get the latest encoded frame and its timestamp."""
with self.frame_lock:
return self.latest_encoded, self.latest_timestamp
class ImageServer:
def __init__(self, config: dict, port: int = 5555):
# fps controls the publish loop rate (how often frames are sent over ZMQ), not the camera capture rate
self.fps = config.get("fps", 30)
self.cameras: dict[str, OpenCVCamera] = {}
self.capture_threads: dict[str, CameraCaptureThread] = {}
for name, cfg in config.get("cameras", {}).items():
shape = cfg.get("shape", [480, 640])
@@ -109,10 +61,6 @@ class ImageServer:
self.cameras[name] = camera
logger.info(f"Camera {name}: {shape[1]}x{shape[0]}")
# Create capture thread for this camera
capture_thread = CameraCaptureThread(camera, name)
self.capture_threads[name] = capture_thread
# ZMQ PUB socket
self.context = zmq.Context()
self.socket = self.context.socket(zmq.PUB)
@@ -125,18 +73,6 @@ class ImageServer:
def run(self):
frame_count = 0
frame_times = deque(maxlen=60)
last_published_ts: dict[str, float] = {}
# Start all capture threads
for capture_thread in self.capture_threads.values():
capture_thread.start()
# Wait for first frames to be captured and encoded
logger.info("Waiting for cameras to start capturing...")
for name, capture_thread in self.capture_threads.items():
while capture_thread.get_latest()[0] is None:
time.sleep(0.01)
logger.info(f"Camera {name} ready (capture + encode in background)")
try:
while True:
@@ -144,12 +80,10 @@ class ImageServer:
# Build message
message = {"timestamps": {}, "images": {}}
for name, capture_thread in self.capture_threads.items():
encoded, timestamp = capture_thread.get_latest()
if encoded is not None and timestamp > last_published_ts.get(name, 0.0):
message["timestamps"][name] = timestamp
message["images"][name] = encoded
last_published_ts[name] = timestamp
for name, cam in self.cameras.items():
frame = cam.read() # Returns RGB
message["timestamps"][name] = time.time()
message["images"][name] = encode_image(frame)
# Send as JSON string (suppress if buffer full)
with contextlib.suppress(zmq.Again):
@@ -168,8 +102,6 @@ class ImageServer:
except KeyboardInterrupt:
pass
finally:
for capture_thread in self.capture_threads.values():
capture_thread.stop()
for cam in self.cameras.values():
cam.disconnect()
self.socket.close()
+1 -13
View File
@@ -27,8 +27,7 @@ class DatasetConfig:
# "dataset_index" into the returned item. The index mapping is made according to the order in which the
# datasets are provided.
repo_id: str
# Root directory for a concrete local dataset tree (e.g. 'dataset/path'). If None, local datasets are
# looked up under $HF_LEROBOT_HOME/repo_id and Hub downloads use a revision-safe cache under $HF_LEROBOT_HOME/hub.
# Root directory where the dataset will be stored (e.g. 'dataset/path').
root: str | None = None
episodes: list[int] | None = None
image_transforms: ImageTransformsConfig = field(default_factory=ImageTransformsConfig)
@@ -37,16 +36,6 @@ class DatasetConfig:
video_backend: str = field(default_factory=get_safe_default_codec)
streaming: bool = False
def __post_init__(self) -> None:
if self.episodes is not None:
if any(ep < 0 for ep in self.episodes):
raise ValueError(
f"Episode indices must be non-negative, got: {[ep for ep in self.episodes if ep < 0]}"
)
if len(self.episodes) != len(set(self.episodes)):
duplicates = sorted({ep for ep in self.episodes if self.episodes.count(ep) > 1})
raise ValueError(f"Episode indices contain duplicates: {duplicates}")
@dataclass
class WandBConfig:
@@ -58,7 +47,6 @@ class WandBConfig:
notes: str | None = None
run_id: str | None = None
mode: str | None = None # Allowed values: 'online', 'offline' 'disabled'. Defaults to 'online'
add_tags: bool = True # If True, save configuration as tags in the WandB run.
@dataclass
+7 -18
View File
@@ -30,8 +30,8 @@ from lerobot.configs.types import FeatureType, PolicyFeature
from lerobot.optim.optimizers import OptimizerConfig
from lerobot.optim.schedulers import LRSchedulerConfig
from lerobot.utils.constants import ACTION, OBS_STATE
from lerobot.utils.device_utils import auto_select_torch_device, is_amp_available, is_torch_device_available
from lerobot.utils.hub import HubMixin
from lerobot.utils.utils import auto_select_torch_device, is_amp_available, is_torch_device_available
T = TypeVar("T", bound="PreTrainedConfig")
logger = getLogger(__name__)
@@ -45,12 +45,12 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC): # type: igno
Args:
n_obs_steps: Number of environment steps worth of observations to pass to the policy (takes the
current step and additional steps going back).
input_features: A dictionary defining the PolicyFeature of the input data for the policy. The key represents
the input data name, and the value is PolicyFeature, which consists of FeatureType and shape attributes.
output_features: A dictionary defining the PolicyFeature of the output data for the policy. The key represents
the output data name, and the value is PolicyFeature, which consists of FeatureType and shape attributes.
normalization_mapping: A dictionary that maps from a str value of FeatureType (e.g., "STATE", "VISUAL") to
a corresponding NormalizationMode (e.g., NormalizationMode.MIN_MAX)
input_shapes: A dictionary defining the shapes of the input data for the policy.
output_shapes: A dictionary defining the shapes of the output data for the policy.
input_normalization_modes: A dictionary with key representing the modality and the value specifies the
normalization mode to apply.
output_normalization_modes: Similar dictionary as `input_normalization_modes`, but to unnormalize to
the original scale.
"""
n_obs_steps: int = 1
@@ -115,17 +115,6 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC): # type: igno
def reward_delta_indices(self) -> list | None: # type: ignore[type-arg] #TODO: No implementation
raise NotImplementedError
@property
def state_delta_indices(self) -> list | None: # type: ignore[type-arg]
"""Delta indices specifically for observation.state.
When not None, overrides ``observation_delta_indices`` for the
``observation.state`` key only. Useful for loading state history
(e.g. ``[-1, 0]`` for UMI-style relative proprioception) without
also loading multiple image timesteps.
"""
return None
@abc.abstractmethod
def get_optimizer_preset(self) -> OptimizerConfig:
raise NotImplementedError
-3
View File
@@ -50,9 +50,6 @@ class TrainPipelineConfig(HubMixin):
# `seed` is used for training (eg: model initialization, dataset shuffling)
# AND for the evaluation environments.
seed: int | None = 1000
# Set to True to use deterministic cuDNN algorithms for reproducibility.
# This disables cudnn.benchmark and may reduce training speed by ~10-20 percent.
cudnn_deterministic: bool = False
# Number of workers for the dataloader.
num_workers: int = 4
batch_size: int = 8

Some files were not shown because too many files have changed in this diff Show More