mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-11 14:49:43 +00:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 74b7cd246e |
@@ -18,11 +18,6 @@ name: Documentation
|
||||
on:
|
||||
# Allows running this workflow manually from the Actions tab
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag (e.g. v0.1.2) - Leave empty for standard main build'
|
||||
required: false
|
||||
type: string
|
||||
|
||||
# Triggers the workflow on push events to main for the docs folder
|
||||
push:
|
||||
@@ -59,13 +54,7 @@ jobs:
|
||||
with:
|
||||
commit_sha: ${{ github.sha }}
|
||||
package: lerobot
|
||||
additional_args: >-
|
||||
--not_python_module
|
||||
${{
|
||||
(github.event_name == 'release' && format('--version {0}', github.event.release.tag_name)) ||
|
||||
(inputs.version != '' && format('--version {0}', inputs.version)) ||
|
||||
''
|
||||
}}
|
||||
additional_args: --not_python_module ${{ github.event_name == 'release' && format('--version {0}', github.event.release.tag_name) || '' }}
|
||||
secrets:
|
||||
token: ${{ secrets.HUGGINGFACE_PUSH }}
|
||||
hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
|
||||
|
||||
@@ -27,7 +27,6 @@ on:
|
||||
- "tests/**"
|
||||
- ".github/workflows/**"
|
||||
- "pyproject.toml"
|
||||
- "uv.lock"
|
||||
- "Makefile"
|
||||
push:
|
||||
branches:
|
||||
@@ -37,7 +36,6 @@ on:
|
||||
- "tests/**"
|
||||
- ".github/workflows/**"
|
||||
- "pyproject.toml"
|
||||
- "uv.lock"
|
||||
- "Makefile"
|
||||
|
||||
permissions:
|
||||
@@ -46,7 +44,7 @@ permissions:
|
||||
# Sets up the environment variables
|
||||
env:
|
||||
UV_VERSION: "0.8.0"
|
||||
PYTHON_VERSION: "3.12"
|
||||
PYTHON_VERSION: "3.10"
|
||||
|
||||
# Ensures that only the latest commit for a PR or branch is built, canceling older runs.
|
||||
concurrency:
|
||||
@@ -63,7 +61,6 @@ jobs:
|
||||
MUJOCO_GL: egl
|
||||
HF_HOME: /mnt/cache/.cache/huggingface
|
||||
HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
@@ -90,13 +87,7 @@ jobs:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install lerobot with test extras
|
||||
run: uv sync --locked --extra "test"
|
||||
|
||||
- name: Login to Hugging Face
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
run: |
|
||||
uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
|
||||
uv run hf auth whoami
|
||||
run: uv sync --extra "test"
|
||||
|
||||
- name: Run pytest
|
||||
run: uv run pytest tests -vv --maxfail=10
|
||||
|
||||
@@ -29,7 +29,6 @@ on:
|
||||
- "tests/**"
|
||||
- ".github/workflows/**"
|
||||
- "pyproject.toml"
|
||||
- "uv.lock"
|
||||
- "Makefile"
|
||||
|
||||
permissions:
|
||||
@@ -38,7 +37,7 @@ permissions:
|
||||
# Sets up the environment variables
|
||||
env:
|
||||
UV_VERSION: "0.8.0"
|
||||
PYTHON_VERSION: "3.12"
|
||||
PYTHON_VERSION: "3.10"
|
||||
DOCKER_IMAGE_NAME: huggingface/lerobot-gpu
|
||||
|
||||
# Ensures that only the latest action is built, canceling older runs.
|
||||
@@ -61,7 +60,6 @@ jobs:
|
||||
MUJOCO_GL: egl
|
||||
HF_HOME: /mnt/cache/.cache/huggingface
|
||||
HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
@@ -87,13 +85,7 @@ jobs:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install lerobot with all extras
|
||||
run: uv sync --locked --extra all # TODO(Steven): Make flash-attn optional
|
||||
|
||||
- name: Login to Hugging Face
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
run: |
|
||||
uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
|
||||
uv run hf auth whoami
|
||||
run: uv sync --extra all # TODO(Steven): Make flash-attn optional
|
||||
|
||||
- name: Run pytest (all extras)
|
||||
run: uv run pytest tests -vv --maxfail=10
|
||||
@@ -109,11 +101,9 @@ jobs:
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
if: |
|
||||
github.repository == 'huggingface/lerobot' && (
|
||||
(github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && github.event.pull_request.head.repo.fork == false) ||
|
||||
github.event_name == 'push' ||
|
||||
github.event_name == 'workflow_dispatch'
|
||||
)
|
||||
(github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && github.event.pull_request.head.repo.fork == false) ||
|
||||
github.event_name == 'push' ||
|
||||
github.event_name == 'workflow_dispatch'
|
||||
outputs:
|
||||
image_tag: ${{ steps.set_tag.outputs.image_tag }}
|
||||
env:
|
||||
@@ -170,7 +160,6 @@ jobs:
|
||||
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
|
||||
TORCH_HOME: /home/user_lerobot/.cache/torch
|
||||
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
container:
|
||||
image: ${{ needs.build-and-push-docker.outputs.image_tag }} # zizmor: ignore[unpinned-images]
|
||||
options: --gpus all --shm-size "16gb"
|
||||
@@ -182,13 +171,6 @@ jobs:
|
||||
shell: bash
|
||||
working-directory: /lerobot
|
||||
steps:
|
||||
- name: Login to Hugging Face
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
run: |
|
||||
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
|
||||
hf auth whoami
|
||||
- name: Fix ptxas permissions
|
||||
run: chmod +x /lerobot/.venv/lib/python3.12/site-packages/triton/backends/nvidia/bin/ptxas
|
||||
- name: Run pytest on GPU
|
||||
run: pytest tests -vv --maxfail=10
|
||||
- name: Run end-to-end tests
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This workflow handles Docker image publishing & testing.
|
||||
name: Docker Publish & Test
|
||||
# This workflow handles nightly testing & docker images publishing.
|
||||
name: Nightly
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
@@ -28,7 +28,7 @@ on:
|
||||
# Sets up the environment variables
|
||||
env:
|
||||
UV_VERSION: "0.8.0"
|
||||
PYTHON_VERSION: "3.12"
|
||||
PYTHON_VERSION: "3.10"
|
||||
DOCKER_IMAGE_NAME_CPU: huggingface/lerobot-cpu:latest
|
||||
DOCKER_IMAGE_NAME_GPU: huggingface/lerobot-gpu:latest
|
||||
|
||||
@@ -39,8 +39,8 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
# This job builds a CPU image for testing & distribution
|
||||
build-docker-cpu:
|
||||
name: Build CPU Docker
|
||||
build-docker-cpu-nightly:
|
||||
name: Build CPU Docker for Nightly
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
if: github.repository == 'huggingface/lerobot'
|
||||
@@ -74,8 +74,8 @@ jobs:
|
||||
tags: ${{ env.DOCKER_IMAGE_NAME_CPU }}
|
||||
|
||||
# This job builds a GPU image for testing & distribution
|
||||
build-docker-gpu:
|
||||
name: Build GPU Docker
|
||||
build-docker-gpu-nightly:
|
||||
name: Build GPU Docker for Nightly
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
if: github.repository == 'huggingface/lerobot'
|
||||
@@ -109,9 +109,9 @@ jobs:
|
||||
tags: ${{ env.DOCKER_IMAGE_NAME_GPU }}
|
||||
|
||||
# This job runs the E2E tests + pytest with all extras in the CPU image
|
||||
cpu-tests:
|
||||
name: CPU Tests
|
||||
needs: [build-docker-cpu]
|
||||
nightly-cpu-tests:
|
||||
name: Nightly CPU Tests
|
||||
needs: [build-docker-cpu-nightly]
|
||||
runs-on:
|
||||
group: aws-g6-4xlarge-plus
|
||||
env:
|
||||
@@ -119,9 +119,8 @@ jobs:
|
||||
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
|
||||
TORCH_HOME: /home/user_lerobot/.cache/torch
|
||||
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
container:
|
||||
image: ${{ needs.build-docker-cpu.outputs.image_tag }} # zizmor: ignore[unpinned-images]
|
||||
image: ${{ needs.build-docker-cpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
|
||||
options: --shm-size "16gb"
|
||||
credentials:
|
||||
username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
@@ -131,20 +130,15 @@ jobs:
|
||||
shell: bash
|
||||
working-directory: /lerobot
|
||||
steps:
|
||||
- name: Login to Hugging Face
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
run: |
|
||||
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
|
||||
hf auth whoami
|
||||
- name: Run pytest on CPU
|
||||
run: pytest tests -vv --maxfail=10
|
||||
- name: Run end-to-end tests
|
||||
run: make test-end-to-end
|
||||
|
||||
# This job runs the E2E tests + pytest with all extras in the GPU image
|
||||
gpu-tests:
|
||||
name: GPU Tests
|
||||
needs: [build-docker-gpu]
|
||||
nightly-gpu-tests:
|
||||
name: Nightly GPU Tests
|
||||
needs: [build-docker-gpu-nightly]
|
||||
runs-on:
|
||||
group: aws-g6-4xlarge-plus
|
||||
env:
|
||||
@@ -152,9 +146,8 @@ jobs:
|
||||
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
|
||||
TORCH_HOME: /home/user_lerobot/.cache/torch
|
||||
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
container:
|
||||
image: ${{ needs.build-docker-gpu.outputs.image_tag }} # zizmor: ignore[unpinned-images]
|
||||
image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
|
||||
options: --gpus all --shm-size "16gb"
|
||||
credentials:
|
||||
username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
@@ -164,20 +157,15 @@ jobs:
|
||||
shell: bash
|
||||
working-directory: /lerobot
|
||||
steps:
|
||||
- name: Login to Hugging Face
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
run: |
|
||||
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
|
||||
hf auth whoami
|
||||
- name: Run pytest on GPU
|
||||
run: pytest tests -vv --maxfail=10
|
||||
- name: Run end-to-end tests
|
||||
run: make test-end-to-end
|
||||
|
||||
# This job runs multi-GPU training tests with 4 GPUs
|
||||
multi-gpu-tests:
|
||||
name: Multi-GPU Tests
|
||||
needs: [build-docker-gpu]
|
||||
nightly-multi-gpu-tests:
|
||||
name: Nightly Multi-GPU Tests
|
||||
needs: [build-docker-gpu-nightly]
|
||||
runs-on:
|
||||
group: aws-g4dn-12xlarge # Instance with 4 GPUs
|
||||
env:
|
||||
@@ -186,9 +174,8 @@ jobs:
|
||||
TORCH_HOME: /home/user_lerobot/.cache/torch
|
||||
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
|
||||
CUDA_VISIBLE_DEVICES: "0,1,2,3"
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
container:
|
||||
image: ${{ needs.build-docker-gpu.outputs.image_tag }} # zizmor: ignore[unpinned-images]
|
||||
image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images]
|
||||
options: --gpus all --shm-size "16gb"
|
||||
credentials:
|
||||
username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
|
||||
@@ -198,15 +185,12 @@ jobs:
|
||||
shell: bash
|
||||
working-directory: /lerobot
|
||||
steps:
|
||||
- name: Login to Hugging Face
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
run: |
|
||||
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
|
||||
hf auth whoami
|
||||
- name: Verify GPU availability
|
||||
run: |
|
||||
nvidia-smi
|
||||
python -c "import torch; print(f'PyTorch CUDA available: {torch.cuda.is_available()}'); print(f'Number of GPUs: {torch.cuda.device_count()}')"
|
||||
|
||||
- name: Run multi-GPU training tests
|
||||
run: pytest -vv tests/training/
|
||||
# TODO(Steven): Investigate why motors tests are failing in multi-GPU setup
|
||||
run: pytest tests -vv --maxfail=10 --ignore=tests/motors/
|
||||
timeout-minutes: 10
|
||||
@@ -50,7 +50,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.12'
|
||||
python-version: '3.10'
|
||||
|
||||
- name: Run pre-commit hooks
|
||||
uses: pre-commit/action@v3.0.1 # zizmor: ignore[unpinned-uses]
|
||||
|
||||
@@ -22,7 +22,7 @@ on:
|
||||
# Sets up the environment variables
|
||||
env:
|
||||
UV_VERSION: "0.8.0"
|
||||
PYTHON_VERSION: "3.12"
|
||||
PYTHON_VERSION: "3.10"
|
||||
|
||||
jobs:
|
||||
# This job builds the Python package and publishes it to PyPI
|
||||
@@ -45,7 +45,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.12'
|
||||
python-version: '3.10'
|
||||
|
||||
- name: Extract Version
|
||||
id: extract_info
|
||||
@@ -83,6 +83,14 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Remove Tags with Git dependencies
|
||||
# TODO(Steven): Temporary patch to remove pi from PyPi 0.4.0 release due to its reliance on git dependencies.
|
||||
run: |
|
||||
echo "::info:: Checking for Git dependencies to remove from pyproject.toml..."
|
||||
grep -E '@ git\+https|lerobot\[pi\]' pyproject.toml | sed 's/^/::warning:: Removing line: /' || true
|
||||
sed -E -i '/@ git\+https|lerobot\[pi\]/d' pyproject.toml
|
||||
echo "::info:: Git dependencies removed. Proceeding with build."
|
||||
|
||||
- name: Install build dependencies
|
||||
run: python -m pip install build
|
||||
|
||||
|
||||
+36
-150
@@ -12,97 +12,48 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This workflow tests the project against the latest upstream dependencies
|
||||
# (within pyproject.toml constraints) and opens a PR to update uv.lock
|
||||
# if the tests pass and the lockfile has changed.
|
||||
name: Latest Dependency Tests
|
||||
# This workflow handles full testing with unboud dependencies versions.
|
||||
name: Unbound Dependency Tests
|
||||
|
||||
on:
|
||||
# Allows running this workflow manually from the Actions tab
|
||||
workflow_dispatch:
|
||||
|
||||
# Runs at 03:00 UTC
|
||||
# Run on the 1st and 15th of every month at 09:00 UTC
|
||||
schedule:
|
||||
- cron: "0 3 * * *"
|
||||
- cron: '0 2 1,15 * *'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Sets up the environment variables
|
||||
env:
|
||||
UV_VERSION: "0.8.0"
|
||||
PYTHON_VERSION: "3.12"
|
||||
DOCKER_IMAGE_NAME: huggingface/lerobot-gpu:latest-deps
|
||||
PYTHON_VERSION: "3.10"
|
||||
DOCKER_IMAGE_NAME: huggingface/lerobot-gpu:unbound
|
||||
|
||||
# Ensures that only the latest run is active, canceling older runs.
|
||||
# Ensures that only the latest action is built, canceling older runs.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
|
||||
# This job upgrades the lockfile and checks if dependencies have changed
|
||||
upgrade-lock:
|
||||
name: Upgrade Lockfile
|
||||
# This job runs the E2E tests + pytest with all unbound extras
|
||||
full-tests:
|
||||
name: Full Unbound Tests
|
||||
runs-on: ubuntu-latest
|
||||
if: github.repository == 'huggingface/lerobot'
|
||||
permissions:
|
||||
contents: read
|
||||
outputs:
|
||||
changed: ${{ steps.diff.outputs.changed }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup uv and Python
|
||||
uses: astral-sh/setup-uv@v6 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
version: ${{ env.UV_VERSION }}
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Upgrade uv.lock
|
||||
run: uv lock --upgrade
|
||||
|
||||
- name: Check for changes
|
||||
id: diff
|
||||
run: |
|
||||
if git diff --quiet uv.lock; then
|
||||
echo "changed=false" >> "$GITHUB_OUTPUT"
|
||||
echo "uv.lock is up to date — no dependency changes."
|
||||
else
|
||||
echo "changed=true" >> "$GITHUB_OUTPUT"
|
||||
echo "uv.lock has changed — running tests."
|
||||
fi
|
||||
|
||||
- name: Upload updated lockfile
|
||||
if: steps.diff.outputs.changed == 'true'
|
||||
uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: uv-lock
|
||||
path: uv.lock
|
||||
|
||||
# This job runs the full test suite with the upgraded dependencies
|
||||
cpu-tests:
|
||||
name: CPU Tests (Latest Deps)
|
||||
needs: [upgrade-lock]
|
||||
if: needs.upgrade-lock.outputs.changed == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
env:
|
||||
MUJOCO_GL: egl
|
||||
HF_HOME: /mnt/cache/.cache/huggingface
|
||||
HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
lfs: true
|
||||
persist-credentials: false
|
||||
|
||||
- name: Download updated lockfile
|
||||
uses: actions/download-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: uv-lock
|
||||
|
||||
# NOTE(Steven): Mount to `/mnt` to avoid the limited storage on `/home`. Consider cleaning default SDKs or using self-hosted runners for more space.
|
||||
# (As of 2024-06-10, the runner's `/home` has only 6.2 GB free—8% of its 72 GB total.)
|
||||
- name: Setup /mnt storage
|
||||
@@ -121,32 +72,29 @@ jobs:
|
||||
version: ${{ env.UV_VERSION }}
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install lerobot with all extras
|
||||
run: uv sync --locked --extra all # TODO(Steven): Make flash-attn optional
|
||||
|
||||
- name: Login to Hugging Face
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
- name: Unbound dependencies
|
||||
run: |
|
||||
uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
|
||||
uv run hf auth whoami
|
||||
sed -i 's/,[[:space:]]*<[0-9\.]*//g' pyproject.toml
|
||||
echo "Dependencies unbound:" && cat pyproject.toml
|
||||
|
||||
- name: Install lerobot with all extras
|
||||
run: uv sync --extra all # TODO(Steven): Make flash-attn optional
|
||||
|
||||
- name: Run pytest (all extras)
|
||||
run: uv run pytest tests -vv --maxfail=10
|
||||
run: uv run pytest tests -vv
|
||||
|
||||
- name: Run end-to-end tests
|
||||
run: uv run make test-end-to-end
|
||||
|
||||
# This job builds a GPU-enabled Docker image with the upgraded dependencies
|
||||
# This job builds a GPU enabled image for testing
|
||||
build-and-push-docker:
|
||||
name: Build and Push Docker
|
||||
needs: [upgrade-lock]
|
||||
if: needs.upgrade-lock.outputs.changed == 'true'
|
||||
permissions:
|
||||
contents: read
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
outputs:
|
||||
image_tag: ${{ env.DOCKER_IMAGE_NAME }}
|
||||
env:
|
||||
GITHUB_REF: ${{ github.ref }}
|
||||
steps:
|
||||
- name: Install Git LFS
|
||||
run: |
|
||||
@@ -157,12 +105,6 @@ jobs:
|
||||
with:
|
||||
lfs: true
|
||||
persist-credentials: false
|
||||
|
||||
- name: Download updated lockfile
|
||||
uses: actions/download-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: uv-lock
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
@@ -179,13 +121,14 @@ jobs:
|
||||
file: ./docker/Dockerfile.internal
|
||||
push: true
|
||||
tags: ${{ env.DOCKER_IMAGE_NAME }}
|
||||
build-args: |
|
||||
UNBOUND_DEPS=true
|
||||
|
||||
# This job runs pytest with all extras on a GPU-enabled host
|
||||
# This job runs pytest with all unbound extras in a GPU enabled host
|
||||
# It runs everytime a test image is created
|
||||
gpu-tests:
|
||||
name: GPU Tests (Latest Deps)
|
||||
name: GPU Unbound Tests
|
||||
needs: [build-and-push-docker]
|
||||
permissions:
|
||||
contents: read
|
||||
runs-on:
|
||||
group: aws-g6-4xlarge-plus
|
||||
env:
|
||||
@@ -193,7 +136,6 @@ jobs:
|
||||
HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot
|
||||
TORCH_HOME: /home/user_lerobot/.cache/torch
|
||||
TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton
|
||||
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
|
||||
container:
|
||||
image: ${{ needs.build-and-push-docker.outputs.image_tag }} # zizmor: ignore[unpinned-images]
|
||||
options: --gpus all --shm-size "16gb"
|
||||
@@ -205,74 +147,17 @@ jobs:
|
||||
shell: bash
|
||||
working-directory: /lerobot
|
||||
steps:
|
||||
- name: Login to Hugging Face
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
run: |
|
||||
hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential
|
||||
hf auth whoami
|
||||
- name: Fix ptxas permissions
|
||||
run: chmod +x /lerobot/.venv/lib/python3.12/site-packages/triton/backends/nvidia/bin/ptxas
|
||||
- name: Run pytest on GPU
|
||||
run: pytest tests -vv --maxfail=10
|
||||
run: pytest tests -vv
|
||||
- name: Run end-to-end tests
|
||||
run: make test-end-to-end
|
||||
|
||||
# This job creates or updates a PR with the upgraded lockfile
|
||||
open-pr:
|
||||
name: Open PR
|
||||
needs: [cpu-tests, gpu-tests, upgrade-lock]
|
||||
if: success() && needs.upgrade-lock.outputs.changed == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Download updated lockfile
|
||||
uses: actions/download-artifact@v4 # zizmor: ignore[unpinned-uses]
|
||||
with:
|
||||
name: uv-lock
|
||||
|
||||
- name: Create or update PR
|
||||
run: |
|
||||
set -euo pipefail
|
||||
BRANCH="auto/update-uv-lock"
|
||||
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
git remote set-url origin "https://x-access-token:${GH_TOKEN}@github.com/${{ github.repository }}.git"
|
||||
|
||||
git checkout -B "$BRANCH"
|
||||
git add uv.lock
|
||||
git commit -m "chore(dependencies): update uv.lock"
|
||||
git push --force origin "$BRANCH"
|
||||
|
||||
# Create PR only if one doesn't already exist for this branch
|
||||
EXISTING_PR=$(gh pr list --head "$BRANCH" --state open --json number --jq '.[0].number')
|
||||
if [ -z "$EXISTING_PR" ]; then
|
||||
gh pr create \
|
||||
--title "chore(dependencies): update uv.lock" \
|
||||
--body "Automated update of \`uv.lock\` after successful latest dependency tests (CPU + GPU).
|
||||
|
||||
This PR upgrades all dependencies to their latest versions within the ranges specified in \`pyproject.toml\`." \
|
||||
--head "$BRANCH" \
|
||||
--base main
|
||||
else
|
||||
echo "PR #$EXISTING_PR already exists, branch has been updated."
|
||||
fi
|
||||
|
||||
# This job deletes the temporary Docker image after tests complete
|
||||
cleanup-docker:
|
||||
name: Cleanup Docker Image
|
||||
# This job deletes the test image recently created
|
||||
# It runs everytime after the gpu-tests have finished
|
||||
delete-unbound-image:
|
||||
name: Delete Unbound Image
|
||||
needs: [gpu-tests, build-and-push-docker]
|
||||
if: always() && needs.build-and-push-docker.result == 'success'
|
||||
permissions:
|
||||
contents: read
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get Docker Hub Token and Delete Image
|
||||
@@ -283,7 +168,8 @@ jobs:
|
||||
IMAGE_FULL: ${{ needs.build-and-push-docker.outputs.image_tag }}
|
||||
run: |
|
||||
IMAGE_NAME=$(echo "$IMAGE_FULL" | cut -d':' -f1)
|
||||
IMAGE_TAG=$(echo "$IMAGE_FULL" | cut -d':' -f2-)
|
||||
IMAGE_TAG=$(echo "$IMAGE_FULL" | cut -d':' -f2)
|
||||
|
||||
echo "Attempting to delete image: $IMAGE_NAME:$IMAGE_TAG"
|
||||
|
||||
TOKEN=$(curl -s -H "Content-Type: application/json" \
|
||||
@@ -25,6 +25,7 @@ node_modules/
|
||||
|
||||
# Lock files
|
||||
poetry.lock
|
||||
uv.lock
|
||||
Pipfile.lock
|
||||
|
||||
### Build & Distribution ###
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
default_language_version:
|
||||
python: python3.12
|
||||
python: python3.10
|
||||
|
||||
exclude: "tests/artifacts/.*\\.safetensors$"
|
||||
|
||||
@@ -55,7 +55,7 @@ repos:
|
||||
rev: v3.21.0
|
||||
hooks:
|
||||
- id: pyupgrade
|
||||
args: [--py312-plus]
|
||||
args: [--py310-plus]
|
||||
|
||||
##### Markdown Quality #####
|
||||
- repo: https://github.com/rbubley/mirrors-prettier
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
# AI Usage Policy
|
||||
|
||||
The LeRobot project welcomes contributions from everyone, and we have a few guidelines regarding AI usage to ensure high code quality, clear communication, and a healthy open-source ecosystem:
|
||||
|
||||
- **Please disclose significant AI assistance.** If you used AI tools (e.g., Copilot, Claude, Cursor, ChatGPT) to generate a substantial portion of your code or text, let us know in your PR description. Transparency helps us review your changes more effectively.
|
||||
- **Own your code (The Human-in-the-Loop).** You must fully understand all the changes you are proposing. If you cannot explain what your AI-assisted code does or how it interacts with LeRobot's broader architecture, please take the time to learn and test it before submitting.
|
||||
- **Keep issues and discussions focused.** You are welcome to use AI to help draft issues or PR descriptions, but please review and edit them carefully before posting. AI can often be overly verbose; trimming the noise and getting straight to the point helps our maintainers address your needs faster.
|
||||
|
||||
Our core maintainers also use AI tools to aid their workflows, but they do so while bringing deep contextual knowledge of the LeRobot codebase to validate the output. We ask all contributors to apply that same level of rigor.
|
||||
|
||||
## Remember the Human Maintainers
|
||||
|
||||
Please remember that LeRobot is maintained by a dedicated team of humans.
|
||||
|
||||
Every discussion, issue, and pull request is read and reviewed by real people. While AI tools can generate thousands of lines of code in seconds, reviewing that code still takes human time and energy. Submitting unverified or low-effort AI output puts an unfair burden on our maintainers.
|
||||
|
||||
Today, the quality of the AI output still heavily depends on the developer driving the tool. We ask that you respect our maintainers' time by thoroughly vetting, testing, and refining your submissions.
|
||||
|
||||
## AI is Welcome Here
|
||||
|
||||
LeRobot operates at the cutting edge of AI and robotics, and many of our maintainers actively embrace AI coding assistants as valuable productivity tools. We are a pro-AI project!
|
||||
|
||||
Our reason for having an AI policy is not an anti-AI stance. Rather, it exists to ensure that AI is used to enhance human contributions, not replace them with unverified noise. It's about how the tools are used, not the tools themselves.
|
||||
|
||||
We value the unique human insight you bring to the LeRobot community. Let AI empower your workflow, but always let your own judgment take the wheel.
|
||||
+5
-5
@@ -2,7 +2,7 @@
|
||||
|
||||
Everyone is welcome to contribute, and we value everybody's contribution. Code is not the only way to help the community. Answering questions, helping others, reaching out, and improving the documentation are immensely valuable.
|
||||
|
||||
Whichever way you choose to contribute, please be mindful to respect our [code of conduct](https://github.com/huggingface/lerobot/blob/main/CODE_OF_CONDUCT.md) and our [AI policy](https://github.com/huggingface/lerobot/blob/main/AI_POLICY.md).
|
||||
Whichever way you choose to contribute, please be mindful to respect our [code of conduct](./CODE_OF_CONDUCT.md).
|
||||
|
||||
## Ways to Contribute
|
||||
|
||||
@@ -14,7 +14,7 @@ You can contribute in many ways:
|
||||
- **Documentation:** Improve examples, guides, and docstrings.
|
||||
- **Feedback:** Submit tickets related to bugs or desired new features.
|
||||
|
||||
If you are unsure where to start, join our [Discord Channel](https://discord.gg/q8Dzzpym3f).
|
||||
If you are unsure where to start, join our [Discord Channel](https://discord.gg/JkrYNdmw).
|
||||
|
||||
## Development Setup
|
||||
|
||||
@@ -32,7 +32,7 @@ git remote add upstream https://github.com/huggingface/lerobot.git
|
||||
|
||||
### 2. Environment Installation
|
||||
|
||||
Please follow our [Installation Guide](https://huggingface.co/docs/lerobot/installation) for the environment setup & installation from source.
|
||||
Please follow our [Installation Guide](./docs/source/installation.mdx) for the environment setup & installation from source.
|
||||
|
||||
## Running Tests & Quality Checks
|
||||
|
||||
@@ -75,8 +75,8 @@ pytest -sv tests/test_specific_feature.py
|
||||
|
||||
Use the templates for required fields and examples.
|
||||
|
||||
- **Issues:** Follow the [ticket template](https://github.com/huggingface/lerobot/blob/main/.github/ISSUE_TEMPLATE/bug-report.yml).
|
||||
- **Pull requests:** Rebase on `upstream/main`, use a descriptive branch (don't work on `main`), run `pre-commit` and tests locally, and follow the [PR template](https://github.com/huggingface/lerobot/blob/main/.github/PULL_REQUEST_TEMPLATE.md).
|
||||
- **Issues:** Follow the [ticket template](./.github/ISSUE_TEMPLATE/bug-report.yml).
|
||||
- **Pull requests:** Rebase on `upstream/main`, use a descriptive branch (don't work on `main`), run `pre-commit` and tests locally, and follow the [PR template](./.github/PULL_REQUEST_TEMPLATE.md).
|
||||
|
||||
One member of the LeRobot team will then review your contribution.
|
||||
|
||||
|
||||
@@ -1,3 +1,2 @@
|
||||
include src/lerobot/templates/lerobot_modelcard_template.md
|
||||
include src/lerobot/datasets/card_template.md
|
||||
include src/lerobot/envs/metaworld_config.json
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://github.com/huggingface/lerobot/actions/workflows/docker_publish.yml?query=branch%3Amain)
|
||||
[](https://github.com/huggingface/lerobot/actions/workflows/nightly.yml?query=branch%3Amain)
|
||||
[](https://www.python.org/downloads/)
|
||||
[](https://github.com/huggingface/lerobot/blob/main/LICENSE)
|
||||
[](https://pypi.org/project/lerobot/)
|
||||
@@ -100,11 +100,11 @@ lerobot-train \
|
||||
--dataset.repo_id=lerobot/aloha_mobile_cabinet
|
||||
```
|
||||
|
||||
| Category | Models |
|
||||
| -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| **Imitation Learning** | [ACT](./docs/source/policy_act_README.md), [Diffusion](./docs/source/policy_diffusion_README.md), [VQ-BeT](./docs/source/policy_vqbet_README.md), [Multitask DiT Policy](./docs/source/policy_multi_task_dit_README.md) |
|
||||
| **Reinforcement Learning** | [HIL-SERL](./docs/source/hilserl.mdx), [TDMPC](./docs/source/policy_tdmpc_README.md) & QC-FQL (coming soon) |
|
||||
| **VLAs Models** | [Pi0Fast](./docs/source/pi0fast.mdx), [Pi0.5](./docs/source/pi05.mdx), [GR00T N1.5](./docs/source/policy_groot_README.md), [SmolVLA](./docs/source/policy_smolvla_README.md), [XVLA](./docs/source/xvla.mdx) |
|
||||
| Category | Models |
|
||||
| -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| **Imitation Learning** | [ACT](./docs/source/policy_act_README.md), [Diffusion](./docs/source/policy_diffusion_README.md), [VQ-BeT](./docs/source/policy_vqbet_README.md) |
|
||||
| **Reinforcement Learning** | [HIL-SERL](./docs/source/hilserl.mdx), [TDMPC](./docs/source/policy_tdmpc_README.md) & QC-FQL (coming soon) |
|
||||
| **VLAs Models** | [Pi0Fast](./docs/source/pi0fast.mdx), [Pi0.5](./docs/source/pi05.mdx), [GR00T N1.5](./docs/source/policy_groot_README.md), [SmolVLA](./docs/source/policy_smolvla_README.md), [XVLA](./docs/source/xvla.mdx) |
|
||||
|
||||
Similarly to the hardware, you can easily implement your own policy & leverage LeRobot's data collection, training, and visualization tools, and share your model to the HF Hub
|
||||
|
||||
@@ -128,14 +128,13 @@ Learn how to implement your own simulation environment or benchmark and distribu
|
||||
## Resources
|
||||
|
||||
- **[Documentation](https://huggingface.co/docs/lerobot/index):** The complete guide to tutorials & API.
|
||||
- **[Chinese Tutorials: LeRobot+SO-ARM101中文教程-同济子豪兄](https://zihao-ai.feishu.cn/wiki/space/7589642043471924447)** Detailed doc for assembling, teleoperate, dataset, train, deploy. Verified by Seed Studio and 5 global hackathon players.
|
||||
- **[Discord](https://discord.gg/q8Dzzpym3f):** Join the `LeRobot` server to discuss with the community.
|
||||
- **[X](https://x.com/LeRobotHF):** Follow us on X to stay up-to-date with the latest developments.
|
||||
- **[Robot Learning Tutorial](https://huggingface.co/spaces/lerobot/robot-learning-tutorial):** A free, hands-on course to learn robot learning using LeRobot.
|
||||
|
||||
## Citation
|
||||
|
||||
If you use LeRobot in your project, please cite the GitHub repository to acknowledge the ongoing development and contributors:
|
||||
If you use LeRobot in your research, please cite:
|
||||
|
||||
```bibtex
|
||||
@misc{cadene2024lerobot,
|
||||
@@ -146,26 +145,9 @@ If you use LeRobot in your project, please cite the GitHub repository to acknowl
|
||||
}
|
||||
```
|
||||
|
||||
If you are referencing our research or the academic paper, please also cite our ICLR publication:
|
||||
|
||||
<details>
|
||||
<summary><b>ICLR 2026 Paper</b></summary>
|
||||
|
||||
```bibtex
|
||||
@inproceedings{cadenelerobot,
|
||||
title={LeRobot: An Open-Source Library for End-to-End Robot Learning},
|
||||
author={Cadene, Remi and Alibert, Simon and Capuano, Francesco and Aractingi, Michel and Zouitine, Adil and Kooijmans, Pepijn and Choghari, Jade and Russi, Martino and Pascal, Caroline and Palma, Steven and Shukor, Mustafa and Moss, Jess and Soare, Alexander and Aubakirova, Dana and Lhoest, Quentin and Gallou\'edec, Quentin and Wolf, Thomas},
|
||||
booktitle={The Fourteenth International Conference on Learning Representations},
|
||||
year={2026},
|
||||
url={https://arxiv.org/abs/2602.22818}
|
||||
}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## Contribute
|
||||
|
||||
We welcome contributions from everyone in the community! To get started, please read our [CONTRIBUTING.md](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md) guide. Whether you're adding a new feature, improving documentation, or fixing a bug, your help and feedback are invaluable. We're incredibly excited about the future of open-source robotics and can't wait to work with you on what's next—thank you for your support!
|
||||
We welcome contributions from everyone in the community! To get started, please read our [CONTRIBUTING.md](./CONTRIBUTING.md) guide. Whether you're adding a new feature, improving documentation, or fixing a bug, your help and feedback are invaluable. We're incredibly excited about the future of open-source robotics and can't wait to work with you on what's next—thank you for your support!
|
||||
|
||||
<p align="center">
|
||||
<img alt="SO101 Video" src="./media/readme/so100_video.webp" width="640px">
|
||||
|
||||
-48
@@ -1,48 +0,0 @@
|
||||
# Security Policy
|
||||
|
||||
## Project Status & Philosophy
|
||||
|
||||
`lerobot` has so far been primarily a research and prototyping tool, which is why deployment security hasn’t been a strong focus until now. As `lerobot` continues to be adopted and deployed in production, we are paying much closer attention to these kinds of issues.
|
||||
|
||||
Fortunately, being an open-source project, the community can also help by reporting and fixing vulnerabilities. We appreciate your efforts to responsibly disclose your findings and will make every effort to acknowledge your contributions.
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/huggingface/lerobot/security/advisories/new) tab.
|
||||
|
||||
The `lerobot` team will send a response indicating the next steps in handling your report. After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.
|
||||
|
||||
#### Hugging Face Security Team
|
||||
|
||||
Since this project is part of the Hugging Face ecosystem, feel free to submit vulnerability reports directly to: **[security@huggingface.co](mailto:security@huggingface.co)**. Someone from the HF security team will review the report and recommend next steps.
|
||||
|
||||
#### Open Source Disclosures
|
||||
|
||||
If reporting a vulnerability specific to the open-source codebase (and not the underlying Hub infrastructure), you may also use [Huntr](https://huntr.com), a vulnerability disclosure program for open source software.
|
||||
|
||||
## Supported Versions
|
||||
|
||||
Currently, we treat `lerobot` as a rolling release. We prioritize security updates for the latest available version (`main` branch).
|
||||
|
||||
| Version | Supported |
|
||||
| -------- | --------- |
|
||||
| Latest | ✅ |
|
||||
| < Latest | ❌ |
|
||||
|
||||
## Secure Usage Guidelines
|
||||
|
||||
`lerobot` is tightly coupled to the Hugging Face Hub for sharing data and pretrained policies. When downloading artifacts uploaded by others, you expose yourself to risks. Please read below for recommendations to keep your runtime and robot environment safe.
|
||||
|
||||
### Remote Artefacts (Weights & Policies)
|
||||
|
||||
Models and policies uploaded to the Hugging Face Hub come in different formats. We heavily recommend uploading and downloading models in the [`safetensors`](https://github.com/huggingface/safetensors) format.
|
||||
|
||||
`safetensors` was developed specifically to prevent arbitrary code execution on your system, which is critical when running software on physical hardware/robots.
|
||||
|
||||
To avoid loading models from unsafe formats (e.g., `pickle`), you should ensure you are prioritizing `safetensors` files.
|
||||
|
||||
### Remote Code
|
||||
|
||||
Some models or environments on the Hub may require `trust_remote_code=True` to run custom architecture code.
|
||||
|
||||
Please **always** verify the content of the modeling files when using this argument. We recommend setting a specific `revision` (commit hash) when loading remote code to ensure you protect yourself from unverified updates to the repository.
|
||||
+42
-42
@@ -28,9 +28,9 @@ We don't expect the same optimal settings for a dataset of images from a simulat
|
||||
For these reasons, we run this benchmark on four representative datasets:
|
||||
|
||||
- `lerobot/pusht_image`: (96 x 96 pixels) simulation with simple geometric shapes, fixed camera.
|
||||
- `lerobot/aloha_mobile_shrimp_image`: (480 x 640 pixels) real-world indoor, moving camera.
|
||||
- `lerobot/paris_street`: (720 x 1280 pixels) real-world outdoor, moving camera.
|
||||
- `lerobot/kitchen`: (1080 x 1920 pixels) real-world indoor, fixed camera.
|
||||
- `aliberts/aloha_mobile_shrimp_image`: (480 x 640 pixels) real-world indoor, moving camera.
|
||||
- `aliberts/paris_street`: (720 x 1280 pixels) real-world outdoor, moving camera.
|
||||
- `aliberts/kitchen`: (1080 x 1920 pixels) real-world indoor, fixed camera.
|
||||
|
||||
Note: The datasets used for this benchmark need to be image datasets, not video datasets.
|
||||
|
||||
@@ -179,7 +179,7 @@ python benchmark/video/run_video_benchmark.py \
|
||||
--output-dir outputs/video_benchmark \
|
||||
--repo-ids \
|
||||
lerobot/pusht_image \
|
||||
lerobot/aloha_mobile_shrimp_image \
|
||||
aliberts/aloha_mobile_shrimp_image \
|
||||
--vcodec libx264 libx265 \
|
||||
--pix-fmt yuv444p yuv420p \
|
||||
--g 2 20 None \
|
||||
@@ -203,9 +203,9 @@ python benchmark/video/run_video_benchmark.py \
|
||||
--output-dir outputs/video_benchmark \
|
||||
--repo-ids \
|
||||
lerobot/pusht_image \
|
||||
lerobot/aloha_mobile_shrimp_image \
|
||||
lerobot/paris_street \
|
||||
lerobot/kitchen \
|
||||
aliberts/aloha_mobile_shrimp_image \
|
||||
aliberts/paris_street \
|
||||
aliberts/kitchen \
|
||||
--vcodec libx264 libx265 \
|
||||
--pix-fmt yuv444p yuv420p \
|
||||
--g 1 2 3 4 5 6 10 15 20 40 None \
|
||||
@@ -221,9 +221,9 @@ python benchmark/video/run_video_benchmark.py \
|
||||
--output-dir outputs/video_benchmark \
|
||||
--repo-ids \
|
||||
lerobot/pusht_image \
|
||||
lerobot/aloha_mobile_shrimp_image \
|
||||
lerobot/paris_street \
|
||||
lerobot/kitchen \
|
||||
aliberts/aloha_mobile_shrimp_image \
|
||||
aliberts/paris_street \
|
||||
aliberts/kitchen \
|
||||
--vcodec libsvtav1 \
|
||||
--pix-fmt yuv420p \
|
||||
--g 1 2 3 4 5 6 10 15 20 40 None \
|
||||
@@ -252,37 +252,37 @@ Since we're using av1 encoding, we're choosing the `pyav` decoder as `video_read
|
||||
|
||||
These tables show the results for `g=2` and `crf=30`, using `timestamps-modes=6_frames` and `backend=pyav`
|
||||
|
||||
| video_images_size_ratio | vcodec | pix_fmt | | | |
|
||||
| --------------------------------- | ---------- | ------- | --------- | --------- | --------- |
|
||||
| | libx264 | | libx265 | | libsvtav1 |
|
||||
| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
|
||||
| lerobot/pusht_image | **16.97%** | 17.58% | 18.57% | 18.86% | 22.06% |
|
||||
| lerobot/aloha_mobile_shrimp_image | 2.14% | 2.11% | 1.38% | **1.37%** | 5.59% |
|
||||
| lerobot/paris_street | 2.12% | 2.13% | **1.54%** | **1.54%** | 4.43% |
|
||||
| lerobot/kitchen | 1.40% | 1.39% | **1.00%** | **1.00%** | 2.52% |
|
||||
| video_images_size_ratio | vcodec | pix_fmt | | | |
|
||||
| ---------------------------------- | ---------- | ------- | --------- | --------- | --------- |
|
||||
| | libx264 | | libx265 | | libsvtav1 |
|
||||
| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
|
||||
| lerobot/pusht_image | **16.97%** | 17.58% | 18.57% | 18.86% | 22.06% |
|
||||
| aliberts/aloha_mobile_shrimp_image | 2.14% | 2.11% | 1.38% | **1.37%** | 5.59% |
|
||||
| aliberts/paris_street | 2.12% | 2.13% | **1.54%** | **1.54%** | 4.43% |
|
||||
| aliberts/kitchen | 1.40% | 1.39% | **1.00%** | **1.00%** | 2.52% |
|
||||
|
||||
| video_images_load_time_ratio | vcodec | pix_fmt | | | |
|
||||
| --------------------------------- | ------- | ------- | -------- | ------- | --------- |
|
||||
| | libx264 | | libx265 | | libsvtav1 |
|
||||
| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
|
||||
| lerobot/pusht_image | 6.45 | 5.19 | **1.90** | 2.12 | 2.47 |
|
||||
| lerobot/aloha_mobile_shrimp_image | 11.80 | 7.92 | 0.71 | 0.85 | **0.48** |
|
||||
| lerobot/paris_street | 2.21 | 2.05 | 0.36 | 0.49 | **0.30** |
|
||||
| lerobot/kitchen | 1.46 | 1.46 | 0.28 | 0.51 | **0.26** |
|
||||
| video_images_load_time_ratio | vcodec | pix_fmt | | | |
|
||||
| ---------------------------------- | ------- | ------- | -------- | ------- | --------- |
|
||||
| | libx264 | | libx265 | | libsvtav1 |
|
||||
| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
|
||||
| lerobot/pusht_image | 6.45 | 5.19 | **1.90** | 2.12 | 2.47 |
|
||||
| aliberts/aloha_mobile_shrimp_image | 11.80 | 7.92 | 0.71 | 0.85 | **0.48** |
|
||||
| aliberts/paris_street | 2.21 | 2.05 | 0.36 | 0.49 | **0.30** |
|
||||
| aliberts/kitchen | 1.46 | 1.46 | 0.28 | 0.51 | **0.26** |
|
||||
|
||||
| | | vcodec | pix_fmt | | | |
|
||||
| --------------------------------- | -------- | -------- | ------------ | -------- | --------- | ------------ |
|
||||
| | | libx264 | | libx265 | | libsvtav1 |
|
||||
| repo_id | metric | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
|
||||
| lerobot/pusht_image | avg_mse | 2.90E-04 | **2.03E-04** | 3.13E-04 | 2.29E-04 | 2.19E-04 |
|
||||
| | avg_psnr | 35.44 | 37.07 | 35.49 | **37.30** | 37.20 |
|
||||
| | avg_ssim | 98.28% | **98.85%** | 98.31% | 98.84% | 98.72% |
|
||||
| lerobot/aloha_mobile_shrimp_image | avg_mse | 2.76E-04 | 2.59E-04 | 3.17E-04 | 3.06E-04 | **1.30E-04** |
|
||||
| | avg_psnr | 35.91 | 36.21 | 35.88 | 36.09 | **40.17** |
|
||||
| | avg_ssim | 95.19% | 95.18% | 95.00% | 95.05% | **97.73%** |
|
||||
| lerobot/paris_street | avg_mse | 6.89E-04 | 6.70E-04 | 4.03E-03 | 4.02E-03 | **3.09E-04** |
|
||||
| | avg_psnr | 33.48 | 33.68 | 32.05 | 32.15 | **35.40** |
|
||||
| | avg_ssim | 93.76% | 93.75% | 89.46% | 89.46% | **95.46%** |
|
||||
| lerobot/kitchen | avg_mse | 2.50E-04 | 2.24E-04 | 4.28E-04 | 4.18E-04 | **1.53E-04** |
|
||||
| | avg_psnr | 36.73 | 37.33 | 36.56 | 36.75 | **39.12** |
|
||||
| | avg_ssim | 95.47% | 95.58% | 95.52% | 95.53% | **96.82%** |
|
||||
| | | vcodec | pix_fmt | | | |
|
||||
| ---------------------------------- | -------- | -------- | ------------ | -------- | --------- | ------------ |
|
||||
| | | libx264 | | libx265 | | libsvtav1 |
|
||||
| repo_id | metric | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
|
||||
| lerobot/pusht_image | avg_mse | 2.90E-04 | **2.03E-04** | 3.13E-04 | 2.29E-04 | 2.19E-04 |
|
||||
| | avg_psnr | 35.44 | 37.07 | 35.49 | **37.30** | 37.20 |
|
||||
| | avg_ssim | 98.28% | **98.85%** | 98.31% | 98.84% | 98.72% |
|
||||
| aliberts/aloha_mobile_shrimp_image | avg_mse | 2.76E-04 | 2.59E-04 | 3.17E-04 | 3.06E-04 | **1.30E-04** |
|
||||
| | avg_psnr | 35.91 | 36.21 | 35.88 | 36.09 | **40.17** |
|
||||
| | avg_ssim | 95.19% | 95.18% | 95.00% | 95.05% | **97.73%** |
|
||||
| aliberts/paris_street | avg_mse | 6.89E-04 | 6.70E-04 | 4.03E-03 | 4.02E-03 | **3.09E-04** |
|
||||
| | avg_psnr | 33.48 | 33.68 | 32.05 | 32.15 | **35.40** |
|
||||
| | avg_ssim | 93.76% | 93.75% | 89.46% | 89.46% | **95.46%** |
|
||||
| aliberts/kitchen | avg_mse | 2.50E-04 | 2.24E-04 | 4.28E-04 | 4.18E-04 | **1.53E-04** |
|
||||
| | avg_psnr | 36.73 | 37.33 | 36.56 | 36.75 | **39.12** |
|
||||
| | avg_ssim | 95.47% | 95.58% | 95.52% | 95.53% | **96.82%** |
|
||||
|
||||
@@ -24,7 +24,7 @@ ARG OS_VERSION=22.04
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION}
|
||||
|
||||
# Define Python version argument
|
||||
ARG PYTHON_VERSION=3.12
|
||||
ARG PYTHON_VERSION=3.10
|
||||
|
||||
# Configure environment variables
|
||||
ENV DEBIAN_FRONTEND=noninteractive \
|
||||
@@ -73,12 +73,17 @@ ENV HOME=/home/user_lerobot \
|
||||
RUN uv venv --python python${PYTHON_VERSION}
|
||||
|
||||
# Install Python dependencies for caching
|
||||
COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml uv.lock README.md MANIFEST.in ./
|
||||
COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml README.md MANIFEST.in ./
|
||||
COPY --chown=user_lerobot:user_lerobot src/ src/
|
||||
|
||||
RUN uv sync --locked --extra all --no-cache
|
||||
ARG UNBOUND_DEPS=false
|
||||
|
||||
RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas
|
||||
RUN if [ "$UNBOUND_DEPS" = "true" ]; then \
|
||||
sed -i 's/,[[:space:]]*<[0-9\.]*//g' pyproject.toml; \
|
||||
echo "Dependencies unbound:" && cat pyproject.toml; \
|
||||
fi
|
||||
|
||||
RUN uv pip install --no-cache ".[all]"
|
||||
|
||||
# Copy the rest of the application source code
|
||||
# Make sure to have the git-LFS files for testing
|
||||
|
||||
+10
-5
@@ -18,10 +18,8 @@
|
||||
# docker build -f docker/Dockerfile.user -t lerobot-user .
|
||||
# docker run -it --rm lerobot-user
|
||||
|
||||
# With USB physical access : docker run -it --device=/dev/ -v /dev/:/dev/ --rm lerobot-user
|
||||
|
||||
# Configure the base image
|
||||
ARG PYTHON_VERSION=3.12
|
||||
ARG PYTHON_VERSION=3.10
|
||||
FROM python:${PYTHON_VERSION}-slim
|
||||
|
||||
# Configure environment variables
|
||||
@@ -61,10 +59,17 @@ ENV HOME=/home/user_lerobot \
|
||||
RUN uv venv
|
||||
|
||||
# Install Python dependencies for caching
|
||||
COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml uv.lock README.md MANIFEST.in ./
|
||||
COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml README.md MANIFEST.in ./
|
||||
COPY --chown=user_lerobot:user_lerobot src/ src/
|
||||
|
||||
RUN uv sync --locked --extra all --no-cache
|
||||
ARG UNBOUND_DEPS=false
|
||||
|
||||
RUN if [ "$UNBOUND_DEPS" = "true" ]; then \
|
||||
sed -i 's/,[[:space:]]*<[0-9\.]*//g' pyproject.toml; \
|
||||
echo "Dependencies unbound:" && cat pyproject.toml; \
|
||||
fi
|
||||
|
||||
RUN uv pip install --no-cache ".[all]"
|
||||
|
||||
# Copy the rest of the application code
|
||||
# Make sure to have the git-LFS files for testing
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
# Docker
|
||||
|
||||
This directory contains Dockerfiles for running LeRobot in containerized environments. Both images are **built nightly from `main`** and published to Docker Hub with the full environment pre-baked — no dependency setup required.
|
||||
|
||||
## Pre-built Images
|
||||
|
||||
```bash
|
||||
# CPU-only image (based on Dockerfile.user)
|
||||
docker pull huggingface/lerobot-cpu:latest
|
||||
|
||||
# GPU image with CUDA support (based on Dockerfile.internal)
|
||||
docker pull huggingface/lerobot-gpu:latest
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
The fastest way to start training is to pull the GPU image and run `lerobot-train` directly. This is the same environment used for all of our CI, so it is a well-tested, batteries-included setup.
|
||||
|
||||
```bash
|
||||
docker run -it --rm --gpus all --shm-size 16gb huggingface/lerobot-gpu:latest
|
||||
|
||||
# inside the container:
|
||||
lerobot-train --policy.type=act --dataset.repo_id=lerobot/aloha_sim_transfer_cube_human
|
||||
```
|
||||
|
||||
## Dockerfiles
|
||||
|
||||
### `Dockerfile.user` (CPU)
|
||||
|
||||
A lightweight image based on `python:3.12-slim`. Includes all Python dependencies and system libraries but does not include CUDA — there is no GPU support. Useful for exploring the codebase, running scripts, or working with robots, but not practical for training.
|
||||
|
||||
### `Dockerfile.internal` (GPU)
|
||||
|
||||
A CUDA-enabled image based on `nvidia/cuda`. This is the image for training — mostly used for internal interactions with the GPU cluster.
|
||||
|
||||
## Usage
|
||||
|
||||
### Running a pre-built image
|
||||
|
||||
```bash
|
||||
# CPU
|
||||
docker run -it --rm huggingface/lerobot-cpu:latest
|
||||
|
||||
# GPU
|
||||
docker run -it --rm --gpus all --shm-size 16gb huggingface/lerobot-gpu:latest
|
||||
```
|
||||
|
||||
### Building locally
|
||||
|
||||
From the repo root:
|
||||
|
||||
```bash
|
||||
# CPU
|
||||
docker build -f docker/Dockerfile.user -t lerobot-user .
|
||||
docker run -it --rm lerobot-user
|
||||
|
||||
# GPU
|
||||
docker build -f docker/Dockerfile.internal -t lerobot-internal .
|
||||
docker run -it --rm --gpus all --shm-size 16gb lerobot-internal
|
||||
```
|
||||
|
||||
### Multi-GPU training
|
||||
|
||||
To select specific GPUs, set `CUDA_VISIBLE_DEVICES` when launching the container:
|
||||
|
||||
```bash
|
||||
# Use 4 GPUs
|
||||
docker run -it --rm --gpus all --shm-size 16gb \
|
||||
-e CUDA_VISIBLE_DEVICES=0,1,2,3 \
|
||||
huggingface/lerobot-gpu:latest
|
||||
```
|
||||
|
||||
### USB device access (e.g. robots, cameras)
|
||||
|
||||
```bash
|
||||
docker run -it --device=/dev/ -v /dev/:/dev/ --rm huggingface/lerobot-cpu:latest
|
||||
```
|
||||
@@ -7,6 +7,8 @@
|
||||
- sections:
|
||||
- local: il_robots
|
||||
title: Imitation Learning for Robots
|
||||
- local: cameras
|
||||
title: Cameras
|
||||
- local: bring_your_own_policies
|
||||
title: Bring Your Own Policies
|
||||
- local: integrate_hardware
|
||||
@@ -17,12 +19,8 @@
|
||||
title: Train RL in Simulation
|
||||
- local: multi_gpu_training
|
||||
title: Multi GPU training
|
||||
- local: hil_data_collection
|
||||
title: Human In the Loop Data Collection
|
||||
- local: peft_training
|
||||
title: Training with PEFT (e.g., LoRA)
|
||||
- local: rename_map
|
||||
title: Using Rename Map and Empty Cameras
|
||||
title: "Tutorials"
|
||||
- sections:
|
||||
- local: lerobot-dataset-v3
|
||||
@@ -31,10 +29,6 @@
|
||||
title: Porting Large Datasets
|
||||
- local: using_dataset_tools
|
||||
title: Using the Dataset Tools
|
||||
- local: dataset_subtask
|
||||
title: Using Subtasks in the Dataset
|
||||
- local: streaming_video_encoding
|
||||
title: Streaming Video Encoding
|
||||
title: "Datasets"
|
||||
- sections:
|
||||
- local: act
|
||||
@@ -51,8 +45,6 @@
|
||||
title: NVIDIA GR00T N1.5
|
||||
- local: xvla
|
||||
title: X-VLA
|
||||
- local: multi_task_dit
|
||||
title: Multitask DiT Policy
|
||||
- local: walloss
|
||||
title: WALL-OSS
|
||||
title: "Policies"
|
||||
@@ -71,17 +63,13 @@
|
||||
title: Environments from the Hub
|
||||
- local: envhub_leisaac
|
||||
title: Control & Train Robots in Sim (LeIsaac)
|
||||
title: "Simulation"
|
||||
- sections:
|
||||
- local: adding_benchmarks
|
||||
title: Adding a New Benchmark
|
||||
- local: libero
|
||||
title: LIBERO
|
||||
- local: metaworld
|
||||
title: Meta-World
|
||||
- local: envhub_isaaclab_arena
|
||||
title: NVIDIA IsaacLab Arena Environments
|
||||
title: "Benchmarks"
|
||||
- local: libero
|
||||
title: Using Libero
|
||||
- local: metaworld
|
||||
title: Using MetaWorld
|
||||
title: "Simulation"
|
||||
- sections:
|
||||
- local: introduction_processors
|
||||
title: Introduction to Robot Processors
|
||||
@@ -93,8 +81,6 @@
|
||||
title: Processors for Robots and Teleoperators
|
||||
- local: env_processor
|
||||
title: Environment Processors
|
||||
- local: action_representations
|
||||
title: Action Representations
|
||||
title: "Robot Processors"
|
||||
- sections:
|
||||
- local: so101
|
||||
@@ -113,19 +99,11 @@
|
||||
title: Unitree G1
|
||||
- local: earthrover_mini_plus
|
||||
title: Earth Rover Mini
|
||||
- local: omx
|
||||
title: OMX
|
||||
- local: openarm
|
||||
title: OpenArm
|
||||
title: "Robots"
|
||||
- sections:
|
||||
- local: phone_teleop
|
||||
title: Phone
|
||||
title: "Teleoperators"
|
||||
- sections:
|
||||
- local: cameras
|
||||
title: Cameras
|
||||
title: "Sensors"
|
||||
- sections:
|
||||
- local: torch_accelerators
|
||||
title: PyTorch accelerators
|
||||
@@ -135,8 +113,6 @@
|
||||
title: Notebooks
|
||||
- local: feetech
|
||||
title: Updating Feetech Firmware
|
||||
- local: damiao
|
||||
title: Damiao Motors and CAN Bus
|
||||
title: "Resources"
|
||||
- sections:
|
||||
- local: contributing
|
||||
|
||||
@@ -88,8 +88,5 @@ lerobot-record \
|
||||
--dataset.repo_id=${HF_USER}/eval_act_your_dataset \
|
||||
--dataset.num_episodes=10 \
|
||||
--dataset.single_task="Your task description" \
|
||||
--dataset.streaming_encoding=true \
|
||||
--dataset.encoder_threads=2 \
|
||||
# --dataset.vcodec=auto \
|
||||
--policy.path=${HF_USER}/act_policy
|
||||
```
|
||||
|
||||
@@ -1,223 +0,0 @@
|
||||
# Action Representations
|
||||
|
||||
This guide explains the different ways robot actions can be represented in LeRobot, how they relate to each other, and when to use each one.
|
||||
|
||||
## Joint Space vs End-Effector Space
|
||||
|
||||
Before discussing action representations, it helps to understand the two coordinate spaces actions can live in.
|
||||
|
||||
### Joint Space
|
||||
|
||||
Joint-space actions directly specify target positions for each motor. For a 6-DOF arm with a gripper, a joint-space action might look like:
|
||||
|
||||
```
|
||||
action = [shoulder_pan: 45.0, shoulder_lift: -20.0, elbow: -30.0, wrist_pitch: 10.0, wrist_roll: 0.0, wrist_yaw: 5.0, gripper: 0.8]
|
||||
```
|
||||
|
||||
Joint space is the default in LeRobot. It is simple, requires no kinematics model, and maps directly to motor commands. Most beginner setups (SO-100, Koch) use joint-space actions.
|
||||
|
||||
### End-Effector (EE) Space
|
||||
|
||||
End-effector-space actions specify the desired position and orientation of the robot's tool tip (gripper) in Cartesian coordinates:
|
||||
|
||||
```
|
||||
action = [x: 0.25, y: -0.10, z: 0.15, wx: 0.0, wy: 0.0, wz: 0.1, gripper: 0.8]
|
||||
```
|
||||
|
||||
EE space is more intuitive for tasks like pick-and-place because it directly describes where the gripper should go, but it requires a kinematics model (URDF) to convert between EE poses and joint angles.
|
||||
|
||||
### Converting Between Spaces
|
||||
|
||||
LeRobot provides processor steps for converting between joint and EE spaces using forward and inverse kinematics. These are built on top of `RobotKinematics`, which loads a URDF model of your robot.
|
||||
|
||||
```python
|
||||
from lerobot.model.kinematics import RobotKinematics
|
||||
from lerobot.robots.so_follower.robot_kinematic_processor import (
|
||||
ForwardKinematicsJointsToEE,
|
||||
InverseKinematicsEEToJoints,
|
||||
)
|
||||
|
||||
kinematics = RobotKinematics(
|
||||
urdf_path="./SO101/so101_new_calib.urdf",
|
||||
target_frame_name="gripper_frame_link",
|
||||
joint_names=["shoulder", "elbow", "wrist_pitch", "wrist_roll", "wrist_yaw"],
|
||||
)
|
||||
|
||||
# Joints → EE (for observations: "where is my gripper?")
|
||||
fk_step = ForwardKinematicsJointsToEE(kinematics=kinematics, motor_names=[...])
|
||||
|
||||
# EE → Joints (for actions: "move my gripper here")
|
||||
ik_step = InverseKinematicsEEToJoints(kinematics=kinematics, motor_names=[...])
|
||||
```
|
||||
|
||||
See [`examples/so100_to_so100_EE/`](https://github.com/huggingface/lerobot/tree/main/examples/so100_to_so100_EE) for a complete working example of recording, replaying, and evaluating with EE-space actions on an SO-100 arm.
|
||||
|
||||
## Absolute, Relative, and Delta Actions
|
||||
|
||||
Regardless of whether you work in joint space or EE space, the action values can be expressed in three different ways. The terminology follows [UMI (Chi et al., 2024)](https://arxiv.org/abs/2402.10329).
|
||||
|
||||
### Absolute Actions (LeRobot default)
|
||||
|
||||
Each action specifies the target position directly.
|
||||
|
||||
**Example** (joint space, chunk of 4):
|
||||
|
||||
```
|
||||
current_state = [45.0, -30.0, 10.0]
|
||||
|
||||
action_chunk = [
|
||||
[46.0, -29.0, 11.0], # go to 46, -29, 11
|
||||
[47.5, -27.0, 12.0], # go to 47.5, -27, 12
|
||||
[49.0, -25.0, 13.5], # go to 49, -25, 13.5
|
||||
[50.0, -24.0, 15.0], # go to 50, -24, 15
|
||||
]
|
||||
```
|
||||
|
||||
Each value is a target position in the robot's coordinate frame. Simple and direct, but requires a consistent global coordinate frame. This is the default in LeRobot.
|
||||
|
||||
### Relative Actions (used by OpenPI / pi0)
|
||||
|
||||
Each action in the chunk is an offset from the **current state at the moment of prediction**. All actions in the chunk share the same reference point:
|
||||
|
||||
```
|
||||
current_state = [45.0, -30.0, 10.0]
|
||||
|
||||
relative_chunk = [
|
||||
[1.0, 1.0, 1.0], # +1 from current → target 46, -29, 11
|
||||
[2.5, 3.0, 2.0], # +2.5 from current → target 47.5, -27, 12
|
||||
[4.0, 5.0, 3.5], # +4 from current → target 49, -25, 13.5
|
||||
[5.0, 6.0, 5.0], # +5 from current → target 50, -24, 15
|
||||
]
|
||||
```
|
||||
|
||||
The conversion is straightforward: `relative = absolute - current_state`. To recover absolute: `absolute = relative + current_state`.
|
||||
|
||||
**Why use relative actions?** The model learns to predict offsets centered around zero, which is easier to normalize and leads to more stable training. Because every chunk references the same current state, there is no error accumulation across chunks.
|
||||
|
||||
### Delta Actions (sequential differences)
|
||||
|
||||
Each action is an offset from the **previous action** (or from the current state for the first step):
|
||||
|
||||
```
|
||||
current_state = [45.0, -30.0, 10.0]
|
||||
|
||||
delta_chunk = [
|
||||
[1.0, 1.0, 1.0], # current → 46, -29, 11
|
||||
[1.5, 2.0, 1.0], # previous action → 47.5, -27, 12
|
||||
[1.5, 2.0, 1.5], # previous action → 49, -25, 13.5
|
||||
[1.0, 1.0, 1.5], # previous action → 50, -24, 15
|
||||
]
|
||||
```
|
||||
|
||||
Here each step is relative to the one before it. To recover absolute positions you must sum all previous deltas, which means errors accumulate over time. UMI explicitly argues against this representation for this reason.
|
||||
|
||||
### Visual Comparison
|
||||
|
||||
The figure below (based on a figure from [UMI, Chi et al., 2024](https://arxiv.org/abs/2402.10329)) illustrates the key difference. With **relative trajectory**, every action in the chunk points back to the same origin (current state), so a new inference step cleanly resets the reference. With **delta**, each action depends on the previous one, so errors accumulate. **Absolute** actions require a consistent global coordinate frame.
|
||||
|
||||
<img
|
||||
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/action_representations_umi.png"
|
||||
alt="Relative Trajectory as Action Representation (UMI, Chi et al., 2024)"
|
||||
width="85%"
|
||||
/>
|
||||
|
||||
## Using Relative Actions in LeRobot
|
||||
|
||||
LeRobot provides `RelativeActionsProcessorStep` to convert between absolute and relative actions inside the processor pipeline. This is how pi0, pi0.5, and pi0_fast support relative actions.
|
||||
|
||||
> **Note:** All pi models (pi0, pi0.5, pi0*fast) apply relative conversion \_before* normalization (`relative → normalize`), so the normalizer always sees delta (relative) values. This means **relative action stats are required** for all of them when training with `use_relative_actions=true`. In pi0_fast the `RelativeActionsProcessorStep` only modifies the action — the state observation is unchanged — so `NormalizerProcessorStep` still runs before the state tokenizer and the tokenizer continues to receive normalized state as expected.
|
||||
|
||||
### How it works
|
||||
|
||||
During **training** (preprocessing), actions are converted from absolute to relative before the model sees them:
|
||||
|
||||
```
|
||||
raw absolute action → RelativeActionsProcessorStep → normalize → model
|
||||
```
|
||||
|
||||
During **inference** (postprocessing), model predictions are converted back to absolute before being sent to the robot:
|
||||
|
||||
```
|
||||
model output → unnormalize → AbsoluteActionsProcessorStep → robot
|
||||
```
|
||||
|
||||
The `AbsoluteActionsProcessorStep` reads the cached current state from its paired `RelativeActionsProcessorStep`, so the two must be wired together (handled automatically by the policy factory).
|
||||
|
||||
### Enabling relative actions for the pi family (pi0, pi0.5, pi0_fast)
|
||||
|
||||
**Step 1**: Precompute relative action statistics for your dataset:
|
||||
|
||||
```bash
|
||||
lerobot-edit-dataset \
|
||||
--repo_id your_dataset \
|
||||
--operation.type recompute_stats \
|
||||
--operation.relative_action true \
|
||||
--operation.chunk_size 50 \
|
||||
--operation.relative_exclude_joints "['gripper']"
|
||||
```
|
||||
|
||||
**Step 2**: Train with relative actions enabled:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--dataset.repo_id=your_dataset \
|
||||
--policy.type=pi0 \
|
||||
--policy.use_relative_actions=true \
|
||||
--policy.relative_exclude_joints='["gripper"]'
|
||||
```
|
||||
|
||||
The `relative_exclude_joints` parameter specifies joints that should remain in absolute space. For example, gripper commands are typically binary (open/close) and don't benefit from relative encoding.
|
||||
|
||||
### Combining relative actions with RTC
|
||||
|
||||
[RTC](https://arxiv.org/abs/2506.07339) runs policy inference at high frequency and sends actions to the robot as they are predicted rather than waiting for a full chunk. Relative actions and RTC are fully compatible: because every chunk in relative mode references the **same** current state (captured at the start of inference), each predicted action in the chunk remains a valid offset even if the robot has already moved. No special handling is needed — `RelativeActionsProcessorStep` caches the state once per inference call and `AbsoluteActionsProcessorStep` applies it to every action in the streamed output.
|
||||
|
||||
### Combining relative actions with EE space
|
||||
|
||||
Relative actions work in both joint space and EE space. For example, if your dataset stores EE actions, relative encoding converts them to offsets from the current EE pose:
|
||||
|
||||
```
|
||||
current_ee_state = [x: 0.25, y: -0.10, z: 0.15, gripper: 0.8]
|
||||
|
||||
absolute_ee_chunk = [
|
||||
[0.26, -0.09, 0.16, 0.8],
|
||||
[0.28, -0.07, 0.18, 0.8],
|
||||
]
|
||||
|
||||
relative_ee_chunk = [
|
||||
[0.01, 0.01, 0.01, 0.0], # offset from current EE pose
|
||||
[0.03, 0.03, 0.03, 0.0], # offset from current EE pose
|
||||
]
|
||||
```
|
||||
|
||||
## Processing Pipeline Summary
|
||||
|
||||
Here is how the different processors compose. Each arrow is a processor step, and they can be chained in a `RobotProcessorPipeline` or `PolicyProcessorPipeline`:
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────┐
|
||||
Action Space │ Joint Space ←──IK──→ EE Space │
|
||||
│ ForwardKinematicsJointsToEE │
|
||||
│ InverseKinematicsEEToJoints │
|
||||
└─────────────────────────────────────────┘
|
||||
|
||||
┌─────────────────────────────────────────┐
|
||||
Representation │ Absolute ←────→ Relative │
|
||||
│ RelativeActionsProcessorStep (pre) │
|
||||
│ AbsoluteActionsProcessorStep (post) │
|
||||
└─────────────────────────────────────────┘
|
||||
|
||||
┌─────────────────────────────────────────┐
|
||||
Normalization │ Raw ←────→ Normalized │
|
||||
│ NormalizerProcessorStep (pre) │
|
||||
│ UnnormalizerProcessorStep (post) │
|
||||
└─────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
A typical training preprocessor might chain: `raw absolute joint actions → relative → normalize`. A typical inference postprocessor: `unnormalize → absolute → (optionally IK to joints)`.
|
||||
|
||||
## References
|
||||
|
||||
- [Universal Manipulation Interface (UMI)](https://arxiv.org/abs/2402.10329) - Chi et al., 2024. Defines the relative trajectory action representation and compares it with absolute and delta actions.
|
||||
- [Introduction to Processors](./introduction_processors) - How processor pipelines work in LeRobot.
|
||||
- [`examples/so100_to_so100_EE/`](https://github.com/huggingface/lerobot/tree/main/examples/so100_to_so100_EE) - Complete example of recording and evaluating with EE-space actions.
|
||||
@@ -1,329 +0,0 @@
|
||||
# Adding a New Benchmark
|
||||
|
||||
This guide walks you through adding a new simulation benchmark to LeRobot. Follow the steps in order and use the existing benchmarks as templates.
|
||||
|
||||
A benchmark in LeRobot is a set of [Gymnasium](https://gymnasium.farama.org/) environments that wrap a third-party simulator (like LIBERO or Meta-World) behind a standard `gym.Env` interface. The `lerobot-eval` CLI then runs evaluation uniformly across all benchmarks.
|
||||
|
||||
## Existing benchmarks at a glance
|
||||
|
||||
Before diving in, here is what is already integrated:
|
||||
|
||||
| Benchmark | Env file | Config class | Tasks | Action dim | Processor |
|
||||
| -------------- | ------------------- | ------------------ | ------------------- | ------------ | ---------------------------- |
|
||||
| LIBERO | `envs/libero.py` | `LiberoEnv` | 130 across 5 suites | 7 | `LiberoProcessorStep` |
|
||||
| Meta-World | `envs/metaworld.py` | `MetaworldEnv` | 50 (MT50) | 4 | None |
|
||||
| IsaacLab Arena | Hub-hosted | `IsaaclabArenaEnv` | Configurable | Configurable | `IsaaclabArenaProcessorStep` |
|
||||
|
||||
Use `src/lerobot/envs/libero.py` and `src/lerobot/envs/metaworld.py` as reference implementations.
|
||||
|
||||
## How it all fits together
|
||||
|
||||
### Data flow
|
||||
|
||||
During evaluation, data moves through four stages:
|
||||
|
||||
```
|
||||
1. gym.Env ──→ raw observations (numpy dicts)
|
||||
|
||||
2. Preprocessing ──→ standard LeRobot keys + task description
|
||||
(preprocess_observation, add_envs_task in envs/utils.py)
|
||||
|
||||
3. Processors ──→ env-specific then policy-specific transforms
|
||||
(env_preprocessor, policy_preprocessor)
|
||||
|
||||
4. Policy ──→ select_action() ──→ action tensor
|
||||
then reverse: policy_postprocessor → env_postprocessor → numpy action → env.step()
|
||||
```
|
||||
|
||||
Most benchmarks only need to care about stage 1 (producing observations in the right format) and optionally stage 3 (if env-specific transforms are needed).
|
||||
|
||||
### Environment structure
|
||||
|
||||
`make_env()` returns a nested dict of vectorized environments:
|
||||
|
||||
```python
|
||||
dict[str, dict[int, gym.vector.VectorEnv]]
|
||||
# ^suite ^task_id
|
||||
```
|
||||
|
||||
A single-task env (e.g. PushT) looks like `{"pusht": {0: vec_env}}`.
|
||||
A multi-task benchmark (e.g. LIBERO) looks like `{"libero_spatial": {0: vec0, 1: vec1, ...}, ...}`.
|
||||
|
||||
### How evaluation runs
|
||||
|
||||
All benchmarks are evaluated the same way by `lerobot-eval`:
|
||||
|
||||
1. `make_env()` builds the nested `{suite: {task_id: VectorEnv}}` dict.
|
||||
2. `eval_policy_all()` iterates over every suite and task.
|
||||
3. For each task, it runs `n_episodes` rollouts via `rollout()`.
|
||||
4. Results are aggregated hierarchically: episode, task, suite, overall.
|
||||
5. Metrics include `pc_success` (success rate), `avg_sum_reward`, and `avg_max_reward`.
|
||||
|
||||
The critical piece: your env must return `info["is_success"]` on every `step()` call. This is how the eval loop knows whether a task was completed.
|
||||
|
||||
## What your environment must provide
|
||||
|
||||
LeRobot does not enforce a strict observation schema. Instead it relies on a set of conventions that all benchmarks follow.
|
||||
|
||||
### Env attributes
|
||||
|
||||
Your `gym.Env` must set these attributes:
|
||||
|
||||
| Attribute | Type | Why |
|
||||
| -------------------- | ----- | ---------------------------------------------------- |
|
||||
| `_max_episode_steps` | `int` | `rollout()` uses this to cap episode length |
|
||||
| `task_description` | `str` | Passed to VLA policies as a language instruction |
|
||||
| `task` | `str` | Fallback identifier if `task_description` is not set |
|
||||
|
||||
### Success reporting
|
||||
|
||||
Your `step()` and `reset()` must include `"is_success"` in the `info` dict:
|
||||
|
||||
```python
|
||||
info = {"is_success": True} # or False
|
||||
return observation, reward, terminated, truncated, info
|
||||
```
|
||||
|
||||
### Observations
|
||||
|
||||
The simplest approach is to map your simulator's outputs to the standard keys that `preprocess_observation()` already understands. Do this inside your `gym.Env` (e.g. in a `_format_raw_obs()` helper):
|
||||
|
||||
| Your env should output | LeRobot maps it to | What it is |
|
||||
| ------------------------- | -------------------------- | ------------------------------------- |
|
||||
| `"pixels"` (single array) | `observation.image` | Single camera image, HWC uint8 |
|
||||
| `"pixels"` (dict) | `observation.images.<cam>` | Multiple cameras, each HWC uint8 |
|
||||
| `"agent_pos"` | `observation.state` | Proprioceptive state vector |
|
||||
| `"environment_state"` | `observation.env_state` | Full environment state (e.g. PushT) |
|
||||
| `"robot_state"` | `observation.robot_state` | Nested robot state dict (e.g. LIBERO) |
|
||||
|
||||
If your simulator uses different key names, you have two options:
|
||||
|
||||
1. **Recommended:** Rename them to the standard keys inside your `gym.Env` wrapper.
|
||||
2. **Alternative:** Write an env processor to transform observations after `preprocess_observation()` runs (see step 4 below).
|
||||
|
||||
### Actions
|
||||
|
||||
Actions are continuous numpy arrays in a `gym.spaces.Box`. The dimensionality depends on your benchmark (7 for LIBERO, 4 for Meta-World, etc.). Policies adapt to different action dimensions through their `input_features` / `output_features` config.
|
||||
|
||||
### Feature declaration
|
||||
|
||||
Each `EnvConfig` subclass declares two dicts that tell the policy what to expect:
|
||||
|
||||
- `features` — maps feature names to `PolicyFeature(type, shape)` (e.g. action dim, image shape).
|
||||
- `features_map` — maps raw observation keys to LeRobot convention keys (e.g. `"agent_pos"` to `"observation.state"`).
|
||||
|
||||
## Step by step
|
||||
|
||||
<Tip>
|
||||
At minimum, you need three files: a **gym.Env wrapper**, an **EnvConfig
|
||||
subclass**, and a **factory dispatch branch**. Everything else is optional or
|
||||
documentation.
|
||||
</Tip>
|
||||
|
||||
### Checklist
|
||||
|
||||
| File | Required | Why |
|
||||
| ---------------------------------------- | -------- | ----------------------------------------- |
|
||||
| `src/lerobot/envs/<benchmark>.py` | Yes | Wraps the simulator as a standard gym.Env |
|
||||
| `src/lerobot/envs/configs.py` | Yes | Registers your benchmark for the CLI |
|
||||
| `src/lerobot/envs/factory.py` | Yes | Tells `make_env()` how to build your envs |
|
||||
| `src/lerobot/processor/env_processor.py` | Optional | Custom observation/action transforms |
|
||||
| `src/lerobot/envs/utils.py` | Optional | Only if you need new raw observation keys |
|
||||
| `pyproject.toml` | Yes | Declares benchmark-specific dependencies |
|
||||
| `docs/source/<benchmark>.mdx` | Yes | User-facing documentation page |
|
||||
| `docs/source/_toctree.yml` | Yes | Adds your page to the docs sidebar |
|
||||
|
||||
### 1. The gym.Env wrapper (`src/lerobot/envs/<benchmark>.py`)
|
||||
|
||||
Create a `gym.Env` subclass that wraps the third-party simulator:
|
||||
|
||||
```python
|
||||
class MyBenchmarkEnv(gym.Env):
|
||||
metadata = {"render_modes": ["rgb_array"], "render_fps": <fps>}
|
||||
|
||||
def __init__(self, task_suite, task_id, ...):
|
||||
super().__init__()
|
||||
self.task = <task_name_string>
|
||||
self.task_description = <natural_language_instruction>
|
||||
self._max_episode_steps = <max_steps>
|
||||
self.observation_space = spaces.Dict({...})
|
||||
self.action_space = spaces.Box(low=..., high=..., shape=(...,), dtype=np.float32)
|
||||
|
||||
def reset(self, seed=None, **kwargs):
|
||||
... # return (observation, info) — info must contain {"is_success": False}
|
||||
|
||||
def step(self, action: np.ndarray):
|
||||
... # return (obs, reward, terminated, truncated, info) — info must contain {"is_success": <bool>}
|
||||
|
||||
def render(self):
|
||||
... # return RGB image as numpy array
|
||||
|
||||
def close(self):
|
||||
...
|
||||
```
|
||||
|
||||
Also provide a factory function that returns the nested dict structure:
|
||||
|
||||
```python
|
||||
def create_mybenchmark_envs(
|
||||
task: str,
|
||||
n_envs: int,
|
||||
gym_kwargs: dict | None = None,
|
||||
env_cls: type | None = None,
|
||||
) -> dict[str, dict[int, Any]]:
|
||||
"""Create {suite_name: {task_id: VectorEnv}} for MyBenchmark."""
|
||||
...
|
||||
```
|
||||
|
||||
See `create_libero_envs()` (multi-suite, multi-task) and `create_metaworld_envs()` (difficulty-grouped tasks) for reference.
|
||||
|
||||
### 2. The config (`src/lerobot/envs/configs.py`)
|
||||
|
||||
Register a config dataclass so users can select your benchmark with `--env.type=<name>`:
|
||||
|
||||
```python
|
||||
@EnvConfig.register_subclass("<benchmark_name>")
|
||||
@dataclass
|
||||
class MyBenchmarkEnvConfig(EnvConfig):
|
||||
task: str = "<default_task>"
|
||||
fps: int = <fps>
|
||||
obs_type: str = "pixels_agent_pos"
|
||||
|
||||
features: dict[str, PolicyFeature] = field(default_factory=lambda: {
|
||||
ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(<action_dim>,)),
|
||||
})
|
||||
features_map: dict[str, str] = field(default_factory=lambda: {
|
||||
ACTION: ACTION,
|
||||
"agent_pos": OBS_STATE,
|
||||
"pixels": OBS_IMAGE,
|
||||
})
|
||||
|
||||
def __post_init__(self):
|
||||
... # populate features based on obs_type
|
||||
|
||||
@property
|
||||
def gym_kwargs(self) -> dict:
|
||||
return {"obs_type": self.obs_type, "render_mode": self.render_mode}
|
||||
```
|
||||
|
||||
Key points:
|
||||
|
||||
- The `register_subclass` name is what users pass on the CLI (`--env.type=<name>`).
|
||||
- `features` tells the policy what the environment produces.
|
||||
- `features_map` maps raw observation keys to LeRobot convention keys.
|
||||
|
||||
### 3. The factory dispatch (`src/lerobot/envs/factory.py`)
|
||||
|
||||
Add a branch in `make_env()` to call your factory function:
|
||||
|
||||
```python
|
||||
elif "<benchmark_name>" in cfg.type:
|
||||
from lerobot.envs.<benchmark> import create_<benchmark>_envs
|
||||
|
||||
if cfg.task is None:
|
||||
raise ValueError("<BenchmarkName> requires a task to be specified")
|
||||
|
||||
return create_<benchmark>_envs(
|
||||
task=cfg.task,
|
||||
n_envs=n_envs,
|
||||
gym_kwargs=cfg.gym_kwargs,
|
||||
env_cls=env_cls,
|
||||
)
|
||||
```
|
||||
|
||||
If your benchmark needs an env processor, add it in `make_env_pre_post_processors()`:
|
||||
|
||||
```python
|
||||
if isinstance(env_cfg, MyBenchmarkEnvConfig) or "<benchmark_name>" in env_cfg.type:
|
||||
preprocessor_steps.append(MyBenchmarkProcessorStep())
|
||||
```
|
||||
|
||||
### 4. Env processor (optional — `src/lerobot/processor/env_processor.py`)
|
||||
|
||||
Only needed if your benchmark requires observation transforms beyond what `preprocess_observation()` handles (e.g. image flipping, coordinate conversion):
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
@ProcessorStepRegistry.register(name="<benchmark>_processor")
|
||||
class MyBenchmarkProcessorStep(ObservationProcessorStep):
|
||||
def _process_observation(self, observation):
|
||||
processed = observation.copy()
|
||||
# your transforms here
|
||||
return processed
|
||||
|
||||
def transform_features(self, features):
|
||||
return features # update if shapes change
|
||||
|
||||
def observation(self, observation):
|
||||
return self._process_observation(observation)
|
||||
```
|
||||
|
||||
See `LiberoProcessorStep` for a full example (image rotation, quaternion-to-axis-angle conversion).
|
||||
|
||||
### 5. Dependencies (`pyproject.toml`)
|
||||
|
||||
Add a new optional-dependency group:
|
||||
|
||||
```toml
|
||||
mybenchmark = ["my-benchmark-pkg==1.2.3", "lerobot[scipy-dep]"]
|
||||
```
|
||||
|
||||
Pinning rules:
|
||||
|
||||
- **Always pin** benchmark packages to exact versions for reproducibility (e.g. `metaworld==3.0.0`).
|
||||
- **Add platform markers** when needed (e.g. `; sys_platform == 'linux'`).
|
||||
- **Pin fragile transitive deps** if known (e.g. `gymnasium==1.1.0` for Meta-World).
|
||||
- **Document constraints** in your benchmark doc page.
|
||||
|
||||
Users install with:
|
||||
|
||||
```bash
|
||||
pip install -e ".[mybenchmark]"
|
||||
```
|
||||
|
||||
### 6. Documentation (`docs/source/<benchmark>.mdx`)
|
||||
|
||||
Write a user-facing page following the template in the next section. See `docs/source/libero.mdx` and `docs/source/metaworld.mdx` for full examples.
|
||||
|
||||
### 7. Table of contents (`docs/source/_toctree.yml`)
|
||||
|
||||
Add your benchmark to the "Benchmarks" section:
|
||||
|
||||
```yaml
|
||||
- sections:
|
||||
- local: libero
|
||||
title: LIBERO
|
||||
- local: metaworld
|
||||
title: Meta-World
|
||||
- local: envhub_isaaclab_arena
|
||||
title: NVIDIA IsaacLab Arena Environments
|
||||
- local: <your_benchmark>
|
||||
title: <Your Benchmark Name>
|
||||
title: "Benchmarks"
|
||||
```
|
||||
|
||||
## Verifying your integration
|
||||
|
||||
After completing the steps above, confirm that everything works:
|
||||
|
||||
1. **Install** — `pip install -e ".[mybenchmark]"` and verify the dependency group installs cleanly.
|
||||
2. **Smoke test env creation** — call `make_env()` with your config in Python, check that the returned dict has the expected `{suite: {task_id: VectorEnv}}` shape, and that `reset()` returns observations with the right keys.
|
||||
3. **Run a full eval** — `lerobot-eval --env.type=<name> --env.task=<task> --eval.n_episodes=1 --eval.batch_size=1 --policy.path=<any_compatible_policy>` to exercise the full pipeline end-to-end.
|
||||
4. **Check success detection** — verify that `info["is_success"]` flips to `True` when the task is actually completed. This is what the eval loop uses to compute success rates.
|
||||
|
||||
## Writing a benchmark doc page
|
||||
|
||||
Each benchmark `.mdx` page should include:
|
||||
|
||||
- **Title and description** — 1-2 paragraphs on what the benchmark tests and why it matters.
|
||||
- **Links** — paper, GitHub repo, project website (if available).
|
||||
- **Overview image or GIF.**
|
||||
- **Available tasks** — table of task suites with counts and brief descriptions.
|
||||
- **Installation** — `pip install -e ".[<benchmark>]"` plus any extra steps (env vars, system packages).
|
||||
- **Evaluation** — recommended `lerobot-eval` command with `n_episodes` and `batch_size` for reproducible results. Include single-task and multi-task examples if applicable.
|
||||
- **Policy inputs and outputs** — observation keys with shapes, action space description.
|
||||
- **Recommended evaluation episodes** — how many episodes per task is standard.
|
||||
- **Training** — example `lerobot-train` command.
|
||||
- **Reproducing published results** — link to pretrained model, eval command, results table (if available).
|
||||
|
||||
See `docs/source/libero.mdx` and `docs/source/metaworld.mdx` for complete examples.
|
||||
@@ -48,7 +48,7 @@ python -m lerobot.async_inference.robot_client \
|
||||
--task="dummy" \ # POLICY: The task to run the policy on (`Fold my t-shirt`). Not necessarily defined for all policies, such as `act`
|
||||
--policy_type=your_policy_type \ # POLICY: the type of policy to run (smolvla, act, etc)
|
||||
--pretrained_name_or_path=user/model \ # POLICY: the model name/path on server to the checkpoint to run (e.g., lerobot/smolvla_base)
|
||||
--policy_device=mps \ # POLICY: the device to run the policy on, on the server (cuda, mps, xpu, cpu)
|
||||
--policy_device=mps \ # POLICY: the device to run the policy on, on the server
|
||||
--actions_per_chunk=50 \ # POLICY: the number of actions to output at once
|
||||
--chunk_size_threshold=0.5 \ # CLIENT: the threshold for the chunk size before sending a new observation to the server
|
||||
--aggregate_fn_name=weighted_average \ # CLIENT: the function to aggregate actions on overlapping portions
|
||||
@@ -195,7 +195,6 @@ client_cfg = RobotClientConfig(
|
||||
robot=robot_cfg,
|
||||
server_address="localhost:8080",
|
||||
policy_device="mps",
|
||||
client_device="cpu",
|
||||
policy_type="smolvla",
|
||||
pretrained_name_or_path="<user>/smolvla_async",
|
||||
chunk_size_threshold=0.5,
|
||||
@@ -310,4 +309,4 @@ Asynchronous inference represents a significant advancement in real-time robotic
|
||||
- **Universal Compatibility**: Works with all LeRobot-supported policies, from lightweight ACT models to vision-language models like SmolVLA
|
||||
|
||||
Start experimenting with the default parameters, monitor your action queue sizes, and iteratively refine your setup to achieve optimal performance for your specific use case.
|
||||
If you want to discuss this further, hop into our [Discord community](https://discord.gg/s3KuuzsPFb), or open an issue on our [GitHub repository](https://github.com/huggingface/lerobot/issues).
|
||||
If you want to discuss this further, hop into our [Discord community](https://discord.gg/s3KuuzsPFb), or open an issue on our [GitHub repository](https://github.com/lerobot/lerobot/issues).
|
||||
|
||||
@@ -32,7 +32,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
# your policy-specific dependencies
|
||||
]
|
||||
requires-python = ">= 3.12"
|
||||
requires-python = ">= 3.11"
|
||||
|
||||
[build-system]
|
||||
build-backend = # your-build-backend
|
||||
@@ -41,15 +41,13 @@ requires = # your-build-system
|
||||
|
||||
## Step 2: Define the Policy Configuration
|
||||
|
||||
Create a configuration class that inherits from [`PreTrainedConfig`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/configs/policies.py) and registers your policy type:
|
||||
Here is a template to get you started, customize the parameters and methods as needed for your policy's architecture and training requirements.
|
||||
Create a configuration class that inherits from `PreTrainedConfig` and registers your policy type:
|
||||
|
||||
```python
|
||||
# configuration_my_custom_policy.py
|
||||
from dataclasses import dataclass, field
|
||||
from lerobot.configs.policies import PreTrainedConfig
|
||||
from lerobot.optim.optimizers import AdamWConfig
|
||||
from lerobot.optim.schedulers import CosineDecayWithWarmupSchedulerConfig
|
||||
from lerobot.configs.types import NormalizationMode
|
||||
|
||||
@PreTrainedConfig.register_subclass("my_custom_policy")
|
||||
@dataclass
|
||||
@@ -63,131 +61,61 @@ class MyCustomPolicyConfig(PreTrainedConfig):
|
||||
hidden_dim: Hidden dimension for the policy network
|
||||
# Add your policy-specific parameters here
|
||||
"""
|
||||
|
||||
horizon: int = 50
|
||||
n_action_steps: int = 50
|
||||
hidden_dim: int = 256
|
||||
|
||||
optimizer_lr: float = 1e-4
|
||||
optimizer_weight_decay: float = 1e-4
|
||||
# ...PreTrainedConfig fields...
|
||||
pass
|
||||
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
if self.n_action_steps > self.horizon:
|
||||
raise ValueError("n_action_steps cannot exceed horizon")
|
||||
# Add any validation logic here
|
||||
|
||||
def validate_features(self) -> None:
|
||||
"""Validate input/output feature compatibility."""
|
||||
if not self.image_features:
|
||||
raise ValueError("MyCustomPolicy requires at least one image feature.")
|
||||
if self.action_feature is None:
|
||||
raise ValueError("MyCustomPolicy requires 'action' in output_features.")
|
||||
|
||||
def get_optimizer_preset(self) -> AdamWConfig:
|
||||
return AdamWConfig(lr=self.optimizer_lr, weight_decay=self.optimizer_weight_decay)
|
||||
|
||||
def get_scheduler_preset(self):
|
||||
return None
|
||||
|
||||
@property
|
||||
def observation_delta_indices(self) -> list[int] | None:
|
||||
"""Relative timestep offsets the dataset loader provides per observation.
|
||||
|
||||
Return `None` for single-frame policies. For temporal policies that consume
|
||||
multiple past or future frames, return a list of offsets, e.g. `[-20, -10, 0, 10]` for
|
||||
3 past frames at stride 10 and 1 future frame at stride 10.
|
||||
"""
|
||||
return None
|
||||
|
||||
@property
|
||||
def action_delta_indices(self) -> list[int]:
|
||||
"""Relative timestep offsets for the action chunk the dataset loader returns.
|
||||
"""
|
||||
return list(range(self.horizon))
|
||||
|
||||
@property
|
||||
def reward_delta_indices(self) -> None:
|
||||
return None
|
||||
# Implement validation logic for your policy's requirements
|
||||
pass
|
||||
```
|
||||
|
||||
## Step 3: Implement the Policy Class
|
||||
|
||||
Create your policy implementation by inheriting from [`PreTrainedPolicy`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/pretrained.py):
|
||||
Create your policy implementation by inheriting from LeRobot's base `PreTrainedPolicy` class:
|
||||
|
||||
```python
|
||||
# modeling_my_custom_policy.py
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from typing import Any
|
||||
from typing import Dict, Any
|
||||
|
||||
from lerobot.policies.pretrained import PreTrainedPolicy
|
||||
from lerobot.utils.constants import ACTION
|
||||
from .configuration_my_custom_policy import MyCustomPolicyConfig
|
||||
|
||||
class MyCustomPolicy(PreTrainedPolicy):
|
||||
config_class = MyCustomPolicyConfig # must match the string in @register_subclass
|
||||
config_class = MyCustomPolicyConfig
|
||||
name = "my_custom_policy"
|
||||
|
||||
def __init__(self, config: MyCustomPolicyConfig, dataset_stats: dict[str, Any] = None):
|
||||
def __init__(self, config: MyCustomPolicyConfig, dataset_stats: Dict[str, Any] = None):
|
||||
super().__init__(config, dataset_stats)
|
||||
config.validate_features() # not called automatically by the base class
|
||||
self.config = config
|
||||
self.model = ... # your nn.Module here
|
||||
|
||||
def reset(self):
|
||||
"""Reset episode state."""
|
||||
...
|
||||
|
||||
def get_optim_params(self) -> dict:
|
||||
"""Return parameters to pass to the optimizer (e.g. with per-group lr/wd)."""
|
||||
return {"params": self.parameters()}
|
||||
|
||||
def predict_action_chunk(self, batch: dict[str, torch.Tensor], **kwargs) -> torch.Tensor:
|
||||
"""Return the full action chunk (B, chunk_size, action_dim) for the current observation."""
|
||||
...
|
||||
|
||||
def select_action(self, batch: dict[str, torch.Tensor], **kwargs) -> torch.Tensor:
|
||||
"""Return a single action for the current timestep (called at inference)."""
|
||||
...
|
||||
|
||||
def forward(self, batch: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
|
||||
"""Compute the training loss.
|
||||
|
||||
`batch["action_is_pad"]` is a bool mask of shape (B, horizon) that marks
|
||||
timesteps padded because the episode ended before `horizon` steps, you
|
||||
can exclude those from your loss.
|
||||
"""
|
||||
actions = batch[ACTION]
|
||||
action_is_pad = batch.get("action_is_pad")
|
||||
...
|
||||
return {"loss": ...}
|
||||
```
|
||||
|
||||
## Step 4: Add Data Processors
|
||||
|
||||
Create processor functions. For a concrete reference, see [processor_act.py](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/act/processor_act.py) or [processor_diffusion.py](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/diffusion/processor_diffusion.py).
|
||||
Create processor functions:
|
||||
|
||||
```python
|
||||
# processor_my_custom_policy.py
|
||||
from typing import Any
|
||||
from typing import Dict, Any
|
||||
import torch
|
||||
|
||||
from lerobot.processor import PolicyAction, PolicyProcessorPipeline
|
||||
|
||||
|
||||
def make_my_custom_policy_pre_post_processors(
|
||||
config,
|
||||
dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None,
|
||||
) -> tuple[
|
||||
PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
||||
PolicyProcessorPipeline[PolicyAction, PolicyAction],
|
||||
]:
|
||||
preprocessor = ... # build your PolicyProcessorPipeline for inputs
|
||||
postprocessor = ... # build your PolicyProcessorPipeline for outputs
|
||||
return preprocessor, postprocessor
|
||||
```
|
||||
"""Create preprocessing and postprocessing functions for your policy."""
|
||||
pass # Define your preprocessing and postprocessing logic here
|
||||
|
||||
**Important - function naming:** LeRobot discovers your processor by name. The function **must** be called `make_{policy_name}_pre_post_processors` (matching the string you passed to `@PreTrainedConfig.register_subclass`).
|
||||
```
|
||||
|
||||
## Step 5: Package Initialization
|
||||
|
||||
|
||||
+81
-95
@@ -1,22 +1,12 @@
|
||||
# Cameras
|
||||
|
||||
LeRobot offers multiple options for video capture:
|
||||
LeRobot offers multiple options for video capture, including phone cameras, built-in laptop cameras, external webcams, and Intel RealSense cameras. To efficiently record frames from most cameras, you can use either the `OpenCVCamera` or `RealSenseCamera` class. For additional compatibility details on the `OpenCVCamera` class, refer to the [Video I/O with OpenCV Overview](https://docs.opencv.org/4.x/d0/da7/videoio_overview.html).
|
||||
|
||||
| Class | Supported Cameras |
|
||||
| ----------------- | ----------------------------------- |
|
||||
| `OpenCVCamera` | Phone, built-in laptop, USB webcams |
|
||||
| `ZMQCamera` | Network-connected cameras |
|
||||
| `RealSenseCamera` | Intel RealSense (with depth) |
|
||||
| `Reachy2Camera` | Reachy 2 robot cameras |
|
||||
### Finding your camera
|
||||
|
||||
> [!TIP]
|
||||
> For `OpenCVCamera` compatibility details, see the [Video I/O with OpenCV Overview](https://docs.opencv.org/4.x/d0/da7/videoio_overview.html).
|
||||
To instantiate a camera, you need a camera identifier. This identifier might change if you reboot your computer or re-plug your camera, a behavior mostly dependant on your operating system.
|
||||
|
||||
### Find your camera
|
||||
|
||||
Every camera requires a unique identifier to be instantiated, allowing you to distinguish between multiple connected devices.
|
||||
|
||||
`OpenCVCamera` and `RealSenseCamera` support auto-discovery. Run the command below to list available devices and their identifiers. Note that these identifiers may change after rebooting your computer or re-plugging the camera, depending on your operating system.
|
||||
To find the camera indices of the cameras plugged into your system, run the following script:
|
||||
|
||||
```bash
|
||||
lerobot-find-cameras opencv # or realsense for Intel Realsense cameras
|
||||
@@ -24,7 +14,7 @@ lerobot-find-cameras opencv # or realsense for Intel Realsense cameras
|
||||
|
||||
The output will look something like this if you have two cameras connected:
|
||||
|
||||
```bash
|
||||
```
|
||||
--- Detected Cameras ---
|
||||
Camera #0:
|
||||
Name: OpenCV Camera @ 0
|
||||
@@ -43,37 +33,13 @@ Camera #0:
|
||||
> [!WARNING]
|
||||
> When using Intel RealSense cameras in `macOS`, you could get this [error](https://github.com/IntelRealSense/librealsense/issues/12307): `Error finding RealSense cameras: failed to set power state`, this can be solved by running the same command with `sudo` permissions. Note that using RealSense cameras in `macOS` is unstable.
|
||||
|
||||
`ZMQCamera` and `Reachy2Camera` do not support auto-discovery. They must be configured manually by providing their network address and port or robot SDK settings.
|
||||
## Use Cameras
|
||||
|
||||
## Use cameras
|
||||
Below are two examples, demonstrating how to work with the API.
|
||||
|
||||
### Frame access modes
|
||||
|
||||
All camera classes implement three access modes for capturing frames:
|
||||
|
||||
| Method | Behavior | Blocks? | Best For |
|
||||
| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------- | ---------------------------------------- |
|
||||
| `read()` | Waits for the camera hardware to return a frame. May block for a long time depending on the camera and SDK. | Yes | Simple scripts, sequential capture |
|
||||
| `async_read(timeout_ms)` | Returns the latest unconsumed frame from background thread. Blocks only if buffer is empty, up to `timeout_ms`. Raises `TimeoutError` if no frame arrives. | With a timeout | Control loops synchronized to camera FPS |
|
||||
| `read_latest(max_age_ms)` | Peeks at the most recent frame in buffer (may be stale). Raises `TimeoutError` if frame is older than `max_age_ms`. | No | UI visualization, logging, monitoring |
|
||||
|
||||
### Usage examples
|
||||
|
||||
The following examples show how to use the camera API to configure and capture frames from different camera types.
|
||||
|
||||
- **Blocking and non-blocking frame capture** using an OpenCV-based camera
|
||||
- **Asynchronous frame capture** using an OpenCV-based camera
|
||||
- **Color and depth capture** using an Intel RealSense camera
|
||||
|
||||
> [!WARNING]
|
||||
> Failing to cleanly disconnect cameras can cause resource leaks. Use the context manager protocol to ensure automatic cleanup:
|
||||
>
|
||||
> ```python
|
||||
> with OpenCVCamera(config) as camera:
|
||||
> ...
|
||||
> ```
|
||||
>
|
||||
> You can also call `connect()` and `disconnect()` manually, but always use a `finally` block for the latter.
|
||||
|
||||
<hfoptions id="shell_restart">
|
||||
<hfoption id="Open CV Camera">
|
||||
|
||||
@@ -94,30 +60,16 @@ config = OpenCVCameraConfig(
|
||||
)
|
||||
|
||||
# Instantiate and connect an `OpenCVCamera`, performing a warm-up read (default).
|
||||
with OpenCVCamera(config) as camera:
|
||||
|
||||
# Read a frame synchronously — blocks until hardware delivers a new frame
|
||||
frame = camera.read()
|
||||
print(f"read() call returned frame with shape:", frame.shape)
|
||||
|
||||
# Read a frame asynchronously with a timeout — returns the latest unconsumed frame or waits up to timeout_ms for a new one
|
||||
try:
|
||||
for i in range(10):
|
||||
frame = camera.async_read(timeout_ms=200)
|
||||
print(f"async_read call returned frame {i} with shape:", frame.shape)
|
||||
except TimeoutError as e:
|
||||
print(f"No frame received within timeout: {e}")
|
||||
|
||||
# Instantly return a frame - returns the most recent frame captured by the camera
|
||||
try:
|
||||
initial_frame = camera.read_latest(max_age_ms=1000)
|
||||
for i in range(10):
|
||||
frame = camera.read_latest(max_age_ms=1000)
|
||||
print(f"read_latest call returned frame {i} with shape:", frame.shape)
|
||||
print(f"Was a new frame received by the camera? {not (initial_frame == frame).any()}")
|
||||
except TimeoutError as e:
|
||||
print(f"Frame too old: {e}")
|
||||
camera = OpenCVCamera(config)
|
||||
camera.connect()
|
||||
|
||||
# Read frames asynchronously in a loop via `async_read(timeout_ms)`
|
||||
try:
|
||||
for i in range(10):
|
||||
frame = camera.async_read(timeout_ms=200)
|
||||
print(f"Async frame {i} shape:", frame.shape)
|
||||
finally:
|
||||
camera.disconnect()
|
||||
```
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
@@ -159,10 +111,10 @@ finally:
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
## Use your phone's camera
|
||||
## Use your phone
|
||||
|
||||
<hfoptions id="use phone">
|
||||
<hfoption id="iPhone & macOS">
|
||||
<hfoption id="Mac">
|
||||
|
||||
To use your iPhone as a camera on macOS, enable the Continuity Camera feature:
|
||||
|
||||
@@ -172,49 +124,83 @@ To use your iPhone as a camera on macOS, enable the Continuity Camera feature:
|
||||
|
||||
For more details, visit [Apple support](https://support.apple.com/en-gb/guide/mac-help/mchl77879b8a/mac).
|
||||
|
||||
Your iPhone should be detected automatically when running the camera setup script in the next section.
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="OBS virtual camera">
|
||||
<hfoption id="Linux">
|
||||
|
||||
If you want to use your phone as a camera using OBS, follow these steps to set up a virtual camera.
|
||||
If you want to use your phone as a camera on Linux, follow these steps to set up a virtual camera
|
||||
|
||||
1. _(Linux only) Install `v4l2loopback-dkms` and `v4l-utils`_. These packages create virtual camera devices and verify their settings. Install with:
|
||||
1. _Install `v4l2loopback-dkms` and `v4l-utils`_. Those packages are required to create virtual camera devices (`v4l2loopback`) and verify their settings with the `v4l2-ctl` utility from `v4l-utils`. Install them using:
|
||||
|
||||
```bash
|
||||
<!-- prettier-ignore-start -->
|
||||
```python
|
||||
sudo apt install v4l2loopback-dkms v4l-utils
|
||||
```
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
2. _Install the [DroidCam app](https://droidcam.app) on your phone_. This app is available for both iOS and Android.
|
||||
3. _Download and install [OBS Studio](https://obsproject.com)_.
|
||||
4. _Download and install the [DroidCam OBS plugin](https://droidcam.app/obs)_.
|
||||
5. _Start OBS Studio_.
|
||||
2. _Install [DroidCam](https://droidcam.app) on your phone_. This app is available for both iOS and Android.
|
||||
3. _Install [OBS Studio](https://obsproject.com)_. This software will help you manage the camera feed. Install it using [Flatpak](https://flatpak.org):
|
||||
|
||||
6. _Add your phone as a source_. Follow the instructions [here](https://droidcam.app/obs/usage). Be sure to set the resolution to `640x480` to avoid the watermarks.
|
||||
7. _Adjust resolution settings_. In OBS Studio, go to `File > Settings > Video` or `OBS > Preferences... > Video`. Change the `Base(Canvas) Resolution` and the `Output(Scaled) Resolution` to `640x480` by manually typing it.
|
||||
<!-- prettier-ignore-start -->
|
||||
```python
|
||||
flatpak install flathub com.obsproject.Studio
|
||||
```
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
4. _Install the DroidCam OBS plugin_. This plugin integrates DroidCam with OBS Studio. Install it with:
|
||||
|
||||
<!-- prettier-ignore-start -->
|
||||
```python
|
||||
flatpak install flathub com.obsproject.Studio.Plugin.DroidCam
|
||||
```
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
5. _Start OBS Studio_. Launch with:
|
||||
|
||||
<!-- prettier-ignore-start -->
|
||||
```python
|
||||
flatpak run com.obsproject.Studio
|
||||
```
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
6. _Add your phone as a source_. Follow the instructions [here](https://droidcam.app/obs/usage). Be sure to set the resolution to `640x480`.
|
||||
7. _Adjust resolution settings_. In OBS Studio, go to `File > Settings > Video`. Change the `Base(Canvas) Resolution` and the `Output(Scaled) Resolution` to `640x480` by manually typing it in.
|
||||
8. _Start virtual camera_. In OBS Studio, follow the instructions [here](https://obsproject.com/kb/virtual-camera-guide).
|
||||
9. _Verify the virtual camera setup and resolution_.
|
||||
- **Linux**: Use `v4l2-ctl` to list devices and check resolution:
|
||||
```bash
|
||||
v4l2-ctl --list-devices # find VirtualCam and note its /dev/videoX path
|
||||
v4l2-ctl -d /dev/videoX --get-fmt-video # replace with your VirtualCam path
|
||||
```
|
||||
You should see `VirtualCam` listed and resolution `640x480`.
|
||||
- **macOS**: Open Photo Booth or FaceTime and select "OBS Virtual Camera" as the input.
|
||||
- **Windows**: The native Camera app doesn't support virtual cameras. Use a video conferencing app (Zoom, Teams) or run `lerobot-find-cameras opencv` directly to verify.
|
||||
9. _Verify the virtual camera setup_. Use `v4l2-ctl` to list the devices:
|
||||
|
||||
<details>
|
||||
<summary><strong>Troubleshooting</strong></summary>
|
||||
<!-- prettier-ignore-start -->
|
||||
```python
|
||||
v4l2-ctl --list-devices
|
||||
```
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
> The virtual camera resolution is incorrect.
|
||||
You should see an entry like:
|
||||
|
||||
Delete the virtual camera source and recreate it. The resolution cannot be changed after creation.
|
||||
```
|
||||
VirtualCam (platform:v4l2loopback-000):
|
||||
/dev/video1
|
||||
```
|
||||
|
||||
> Error reading frame in background thread for OpenCVCamera(X): OpenCVCamera(X) frame width=640 or height=480 do not match configured width=1920 or height=1080.
|
||||
10. _Check the camera resolution_. Use `v4l2-ctl` to ensure that the virtual camera output resolution is `640x480`. Change `/dev/video1` to the port of your virtual camera from the output of `v4l2-ctl --list-devices`.
|
||||
|
||||
This error is caused by OBS Virtual Camera advertising a `1920x1080` resolution despite rescaling. The only fix for now is to comment out the width and height check in `_postprocess_image()`.
|
||||
<!-- prettier-ignore-start -->
|
||||
```python
|
||||
v4l2-ctl -d /dev/video1 --get-fmt-video
|
||||
```
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
</details>
|
||||
You should see an entry like:
|
||||
|
||||
```
|
||||
>>> Format Video Capture:
|
||||
>>> Width/Height : 640/480
|
||||
>>> Pixel Format : 'YUYV' (YUYV 4:2:2)
|
||||
```
|
||||
|
||||
Troubleshooting: If the resolution is not correct you will have to delete the Virtual Camera port and try again as it cannot be changed.
|
||||
|
||||
If everything is set up correctly, you can proceed with the rest of the tutorial.
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
If everything is set up correctly, your phone will appear as a standard OpenCV camera and can be used with `OpenCVCamera`.
|
||||
|
||||
@@ -1,165 +0,0 @@
|
||||
# Damiao Motors and CAN Bus
|
||||
|
||||
This guide covers setup and usage of Damiao motors with LeRobot via CAN bus communication.
|
||||
|
||||
Currently, only Linux is supported, as the OpenArms CAN adapter only has drivers for Linux.
|
||||
|
||||
## Linux CAN Setup
|
||||
|
||||
Before using Damiao motors, you need to set up the CAN interface on your Linux system.
|
||||
|
||||
### Install CAN Utilities
|
||||
|
||||
```bash
|
||||
sudo apt-get install can-utils
|
||||
```
|
||||
|
||||
### Configure CAN Interface (Manual)
|
||||
|
||||
For standard CAN FD (recommended for OpenArms):
|
||||
|
||||
```bash
|
||||
sudo ip link set can0 down
|
||||
sudo ip link set can0 type can bitrate 1000000 dbitrate 5000000 fd on
|
||||
sudo ip link set can0 up
|
||||
```
|
||||
|
||||
For standard CAN (without FD):
|
||||
|
||||
```bash
|
||||
sudo ip link set can0 down
|
||||
sudo ip link set can0 type can bitrate 1000000
|
||||
sudo ip link set can0 up
|
||||
```
|
||||
|
||||
### Configure CAN Interface (Using LeRobot)
|
||||
|
||||
LeRobot provides a utility script to setup and test CAN interfaces:
|
||||
|
||||
```bash
|
||||
# Setup multiple interfaces (e.g., OpenArms Followers with 2 CAN buses)
|
||||
lerobot-setup-can --mode=setup --interfaces=can0,can1
|
||||
```
|
||||
|
||||
## Debugging CAN Communication
|
||||
|
||||
Use the built-in debug tools to test motor communication:
|
||||
|
||||
```bash
|
||||
# Test motors on all interfaces
|
||||
lerobot-setup-can --mode=test --interfaces=can0,can1
|
||||
|
||||
# Run speed/latency test
|
||||
lerobot-setup-can --mode=speed --interfaces=can0
|
||||
```
|
||||
|
||||
The test mode will scan for motors (IDs 0x01-0x08) and report which ones respond. Example output:
|
||||
|
||||
```
|
||||
can0: UP (CAN FD)
|
||||
Motor 0x01 (joint_1): ✓ FOUND
|
||||
→ Response 0x11 [FD]: 00112233...
|
||||
Motor 0x02 (joint_2): ✓ FOUND
|
||||
Motor 0x03 (joint_3): ✗ No response
|
||||
...
|
||||
Summary: 2/8 motors found
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Setup
|
||||
|
||||
```python
|
||||
from lerobot.motors import Motor
|
||||
from lerobot.motors.damiao import DamiaoMotorsBus
|
||||
|
||||
# Define your motors with send/receive CAN IDs
|
||||
motors = {
|
||||
"joint_1": Motor(id=0x01, motor_type_str="dm8009", recv_id=0x11),
|
||||
"joint_2": Motor(id=0x02, motor_type_str="dm4340", recv_id=0x12),
|
||||
"joint_3": Motor(id=0x03, motor_type_str="dm4310", recv_id=0x13),
|
||||
}
|
||||
|
||||
# Create the bus
|
||||
bus = DamiaoMotorsBus(
|
||||
port="can0", # Linux socketcan interface
|
||||
motors=motors,
|
||||
)
|
||||
|
||||
# Connect
|
||||
bus.connect()
|
||||
```
|
||||
|
||||
### Reading Motor States
|
||||
|
||||
```python
|
||||
# Read single motor position (degrees)
|
||||
position = bus.read("Present_Position", "joint_1")
|
||||
|
||||
# Read from multiple motors
|
||||
positions = bus.sync_read("Present_Position") # All motors
|
||||
positions = bus.sync_read("Present_Position", ["joint_1", "joint_2"])
|
||||
|
||||
# Read all states at once (position, velocity, torque)
|
||||
states = bus.sync_read_all_states()
|
||||
# Returns: {'joint_1': {'position': 45.2, 'velocity': 1.3, 'torque': 0.5}, ...}
|
||||
```
|
||||
|
||||
### Writing Motor Commands
|
||||
|
||||
```python
|
||||
# Enable torque
|
||||
bus.enable_torque()
|
||||
|
||||
# Set goal position (degrees)
|
||||
bus.write("Goal_Position", "joint_1", 45.0)
|
||||
|
||||
# Set positions for multiple motors
|
||||
bus.sync_write("Goal_Position", {
|
||||
"joint_1": 45.0,
|
||||
"joint_2": -30.0,
|
||||
"joint_3": 90.0,
|
||||
})
|
||||
|
||||
# Disable torque
|
||||
bus.disable_torque()
|
||||
```
|
||||
|
||||
## Configuration Options
|
||||
|
||||
| Parameter | Default | Description |
|
||||
| -------------- | --------- | ----------------------------------------------------------- |
|
||||
| `port` | - | CAN interface (`can0`) or serial port (`/dev/cu.usbmodem*`) |
|
||||
| `use_can_fd` | `True` | Enable CAN FD for higher data rates |
|
||||
| `bitrate` | `1000000` | Nominal bitrate (1 Mbps) |
|
||||
| `data_bitrate` | `5000000` | CAN FD data bitrate (5 Mbps) |
|
||||
|
||||
## Motor Configuration
|
||||
|
||||
Each motor requires:
|
||||
|
||||
- `id`: CAN ID for sending commands
|
||||
- `motor_type`: One of the supported motor types (e.g., `"dm8009"`, `"dm4340"`)
|
||||
- `recv_id`: CAN ID for receiving responses
|
||||
|
||||
OpenArms default IDs follow the pattern: send ID `0x0N`, receive ID `0x1N` where N is the joint number.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### No Response from Motors
|
||||
|
||||
1. **Check power**
|
||||
2. **Verify CAN wiring**: Check CAN-H, CAN-L, and GND connections
|
||||
3. **Check motor IDs**: Use Damiao Debugging Tools to verify/configure IDs
|
||||
4. **Test CAN interface**: Run `candump can0` to see if messages are being received
|
||||
5. **Run diagnostics**: `lerobot-setup-can --mode=test --interfaces=can0`
|
||||
|
||||
### Motor Timeout Parameter
|
||||
|
||||
If motors were configured with timeout=0, they won't respond to commands. Use Damiao Debugging Tools to set a non-zero timeout value.
|
||||
|
||||
### Verify CAN FD Status
|
||||
|
||||
```bash
|
||||
ip -d link show can0 | grep fd
|
||||
```
|
||||
@@ -1,278 +0,0 @@
|
||||
# Using Subtasks in LeRobot Datasets
|
||||
|
||||
Subtask support in robotics datasets has proven effective in improving robot reasoning and understanding. Subtasks are particularly useful for:
|
||||
|
||||
- **Hierarchical policies**: Building policies that include subtask predictions to visualize robot reasoning in real time
|
||||
- **Reward modeling**: Helping reward models understand task progression (e.g., SARM-style stage-aware reward models)
|
||||
- **Task decomposition**: Breaking down complex manipulation tasks into atomic, interpretable steps
|
||||
|
||||
LeRobotDataset now supports subtasks as part of its dataset structure, alongside tasks.
|
||||
|
||||
## What are Subtasks?
|
||||
|
||||
While a **task** describes the overall goal (e.g., "Pick up the apple and place it in the basket"), **subtasks** break down the execution into finer-grained steps:
|
||||
|
||||
1. "Approach the apple"
|
||||
2. "Grasp the apple"
|
||||
3. "Lift the apple"
|
||||
4. "Move to basket"
|
||||
5. "Release the apple"
|
||||
|
||||
Each frame in the dataset can be annotated with its corresponding subtask, enabling models to learn and predict these intermediate stages.
|
||||
|
||||
<img
|
||||
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/subtask-asset.png"
|
||||
alt="An overview of subtask annotation showing how frames are labeled with intermediate subtask stages"
|
||||
width="80%"
|
||||
/>
|
||||
|
||||
<p>
|
||||
<em>Figure: Overview of subtask annotation.</em>
|
||||
</p>
|
||||
|
||||
**Reference:** _Subtask-learning based for robot self-assembly in flexible collaborative assembly in manufacturing_, Original Article, Published: 19 April 2022.
|
||||
|
||||
## Dataset Structure
|
||||
|
||||
Subtask information is stored in the dataset metadata:
|
||||
|
||||
```
|
||||
my-dataset/
|
||||
├── data/
|
||||
│ └── ...
|
||||
├── meta/
|
||||
│ ├── info.json
|
||||
│ ├── stats.json
|
||||
│ ├── tasks.parquet
|
||||
│ ├── subtasks.parquet # Subtask index → subtask string mapping
|
||||
│ └── episodes/
|
||||
│ └── ...
|
||||
└── videos/
|
||||
└── ...
|
||||
```
|
||||
|
||||
### Subtasks Parquet File
|
||||
|
||||
The `meta/subtasks.parquet` file maps subtask indices to their natural language descriptions:
|
||||
|
||||
| subtask_index | subtask (index column) |
|
||||
| ------------- | ---------------------- |
|
||||
| 0 | "Approach the apple" |
|
||||
| 1 | "Grasp the apple" |
|
||||
| 2 | "Lift the apple" |
|
||||
| ... | ... |
|
||||
|
||||
### Frame-Level Annotations
|
||||
|
||||
Each frame in the dataset can include a `subtask_index` field that references the subtasks parquet file:
|
||||
|
||||
```python
|
||||
# Example frame data in the parquet file
|
||||
{
|
||||
"index": 42,
|
||||
"timestamp": 1.4,
|
||||
"episode_index": 0,
|
||||
"task_index": 0,
|
||||
"subtask_index": 2, # References "Lift the apple"
|
||||
"observation.state": [...],
|
||||
"action": [...],
|
||||
}
|
||||
```
|
||||
|
||||
## Annotating Datasets with Subtasks
|
||||
|
||||
We provide a HuggingFace Space for easily annotating any LeRobotDataset with subtasks:
|
||||
|
||||
**[https://huggingface.co/spaces/lerobot/annotate](https://huggingface.co/spaces/lerobot/annotate)**
|
||||
|
||||
After completing your annotation:
|
||||
|
||||
1. Click "Push to Hub" to upload your annotated dataset
|
||||
2. You can also run the annotation space locally by following the instructions at [github.com/huggingface/lerobot-annotate](https://github.com/huggingface/lerobot-annotate)
|
||||
|
||||
## Loading Datasets with Subtasks
|
||||
|
||||
When you load a dataset with subtask annotations, the subtask information is automatically available:
|
||||
|
||||
```python
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
|
||||
# Load a dataset with subtask annotations
|
||||
dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated")
|
||||
|
||||
# Access a sample
|
||||
sample = dataset[100]
|
||||
|
||||
# The sample includes both task and subtask information
|
||||
print(sample["task"]) # "Collect the fruit"
|
||||
print(sample["subtask"]) # "Grasp the apple"
|
||||
print(sample["task_index"]) # tensor(0)
|
||||
print(sample["subtask_index"]) # tensor(2)
|
||||
```
|
||||
|
||||
### Checking for Subtask Support
|
||||
|
||||
You can check if a dataset has subtask annotations:
|
||||
|
||||
```python
|
||||
# Check if subtasks are available
|
||||
has_subtasks = (
|
||||
"subtask_index" in dataset.features
|
||||
and dataset.meta.subtasks is not None
|
||||
)
|
||||
|
||||
if has_subtasks:
|
||||
print(f"Dataset has {len(dataset.meta.subtasks)} unique subtasks")
|
||||
print("Subtasks:", list(dataset.meta.subtasks.index))
|
||||
```
|
||||
|
||||
## Using Subtasks for Training
|
||||
|
||||
### With the Tokenizer Processor
|
||||
|
||||
The `TokenizerProcessor` automatically handles subtask tokenization for Vision-Language Action (VLA) models:
|
||||
|
||||
```python
|
||||
from lerobot.processor.tokenizer_processor import TokenizerProcessor
|
||||
from lerobot.processor.pipeline import ProcessorPipeline
|
||||
|
||||
# Create a tokenizer processor
|
||||
tokenizer_processor = TokenizerProcessor(
|
||||
tokenizer_name_or_path="google/paligemma-3b-pt-224",
|
||||
padding="max_length",
|
||||
max_length=64,
|
||||
)
|
||||
|
||||
# The processor will automatically tokenize subtasks if present in the batch
|
||||
# and add them to the observation under:
|
||||
# - "observation.subtask.tokens"
|
||||
# - "observation.subtask.attention_mask"
|
||||
```
|
||||
|
||||
When subtasks are available in the batch, the tokenizer processor adds:
|
||||
|
||||
- `observation.subtask.tokens`: Tokenized subtask text
|
||||
- `observation.subtask.attention_mask`: Attention mask for the subtask tokens
|
||||
|
||||
### DataLoader with Subtasks
|
||||
|
||||
```python
|
||||
import torch
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
|
||||
dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated")
|
||||
|
||||
dataloader = torch.utils.data.DataLoader(
|
||||
dataset,
|
||||
batch_size=16,
|
||||
shuffle=True,
|
||||
)
|
||||
|
||||
for batch in dataloader:
|
||||
# Access subtask information in the batch
|
||||
subtasks = batch["subtask"] # List of subtask strings
|
||||
subtask_indices = batch["subtask_index"] # Tensor of subtask indices
|
||||
|
||||
# Use for training hierarchical policies or reward models
|
||||
print(f"Batch subtasks: {set(subtasks)}")
|
||||
```
|
||||
|
||||
## Example Datasets with Subtask Annotations
|
||||
|
||||
Try loading a dataset with subtask annotations:
|
||||
|
||||
```python
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
|
||||
# Example dataset with subtask annotations
|
||||
dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated")
|
||||
|
||||
# Explore the subtasks
|
||||
print("Available subtasks:")
|
||||
for subtask_name in dataset.meta.subtasks.index:
|
||||
print(f" - {subtask_name}")
|
||||
|
||||
# Get subtask distribution
|
||||
subtask_counts = {}
|
||||
for i in range(len(dataset)):
|
||||
sample = dataset[i]
|
||||
subtask = sample["subtask"]
|
||||
subtask_counts[subtask] = subtask_counts.get(subtask, 0) + 1
|
||||
|
||||
print("\nSubtask distribution:")
|
||||
for subtask, count in sorted(subtask_counts.items(), key=lambda x: -x[1]):
|
||||
print(f" {subtask}: {count} frames")
|
||||
```
|
||||
|
||||
## Use Cases
|
||||
|
||||
### 1. Hierarchical Policy Training
|
||||
|
||||
Train policies that predict both actions and current subtask:
|
||||
|
||||
```python
|
||||
class HierarchicalPolicy(nn.Module):
|
||||
def __init__(self, num_subtasks):
|
||||
super().__init__()
|
||||
self.action_head = nn.Linear(hidden_dim, action_dim)
|
||||
self.subtask_head = nn.Linear(hidden_dim, num_subtasks)
|
||||
|
||||
def forward(self, observations):
|
||||
features = self.encoder(observations)
|
||||
actions = self.action_head(features)
|
||||
subtask_logits = self.subtask_head(features)
|
||||
return actions, subtask_logits
|
||||
```
|
||||
|
||||
### 2. Stage-Aware Reward Modeling (SARM)
|
||||
|
||||
Build reward models that understand task progression:
|
||||
|
||||
```python
|
||||
# SARM predicts:
|
||||
# - Stage: Which subtask is being executed (discrete)
|
||||
# - Progress: How far along the subtask (continuous 0-1)
|
||||
|
||||
class SARMRewardModel(nn.Module):
|
||||
def forward(self, observations):
|
||||
features = self.encoder(observations)
|
||||
stage_logits = self.stage_classifier(features)
|
||||
progress = self.progress_regressor(features)
|
||||
return stage_logits, progress
|
||||
```
|
||||
|
||||
### 3. Progress Visualization
|
||||
|
||||
Monitor robot execution by tracking subtask progression:
|
||||
|
||||
```python
|
||||
def visualize_execution(model, observations):
|
||||
for t, obs in enumerate(observations):
|
||||
action, subtask_logits = model(obs)
|
||||
predicted_subtask = subtask_names[subtask_logits.argmax()]
|
||||
print(f"t={t}: Executing '{predicted_subtask}'")
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
### LeRobotDataset Properties
|
||||
|
||||
| Property | Type | Description |
|
||||
| --------------------------- | ---------------------- | ------------------------------------------ |
|
||||
| `meta.subtasks` | `pd.DataFrame \| None` | DataFrame mapping subtask names to indices |
|
||||
| `features["subtask_index"]` | `dict` | Feature spec for subtask_index if present |
|
||||
|
||||
### Sample Keys
|
||||
|
||||
When subtasks are available, each sample includes:
|
||||
|
||||
| Key | Type | Description |
|
||||
| --------------- | -------------- | ------------------------------------ |
|
||||
| `subtask_index` | `torch.Tensor` | Integer index of the current subtask |
|
||||
| `subtask` | `str` | Natural language subtask description |
|
||||
|
||||
## Related Resources
|
||||
|
||||
- [SARM Paper](https://arxiv.org/pdf/2509.25358) - Stage-Aware Reward Modeling for Long Horizon Robot Manipulation
|
||||
- [LeRobot Annotate Space](https://huggingface.co/spaces/lerobot/annotate) - Interactive annotation tool
|
||||
- [LeRobotDataset v3.0](./lerobot-dataset-v3) - Dataset format documentation
|
||||
@@ -1,11 +1,5 @@
|
||||
# EarthRover Mini Plus
|
||||
|
||||
<img
|
||||
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/Earth_Rover_Mini_5_240c9adc-4f9e-44b7-982f-5d1dc24af1d8.png.webp"
|
||||
alt="EarthRover Mini Plus"
|
||||
width="70%"
|
||||
/>
|
||||
|
||||
The EarthRover Mini Plus is a fully open source mobile robot that connects through the cloud using the Frodobots SDK. This lets you control the robot and record datasets for training AI models.
|
||||
|
||||
## What You Need
|
||||
@@ -13,7 +7,7 @@ The EarthRover Mini Plus is a fully open source mobile robot that connects throu
|
||||
### Hardware
|
||||
|
||||
- EarthRover Mini robot
|
||||
- Computer with Python 3.12 or newer
|
||||
- Computer with Python 3.10 or newer
|
||||
- Internet connection
|
||||
|
||||
### Setting Up the Frodobots SDK
|
||||
@@ -170,13 +164,13 @@ Once you can drive the robot well, you can start recording data to train AI mode
|
||||
We use Hugging Face to store your data online. First, log in with your token from [Hugging Face settings](https://huggingface.co/settings/tokens):
|
||||
|
||||
```bash
|
||||
hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
|
||||
huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
|
||||
```
|
||||
|
||||
Store your Hugging Face username:
|
||||
|
||||
```bash
|
||||
HF_USER=$(hf auth whoami | awk -F': *' 'NR==1 {print $2}')
|
||||
HF_USER=$(huggingface-cli whoami | head -n 1)
|
||||
echo $HF_USER
|
||||
```
|
||||
|
||||
@@ -185,16 +179,13 @@ echo $HF_USER
|
||||
Use the standard recording command:
|
||||
|
||||
```bash
|
||||
lerobot-record \
|
||||
python src/lerobot/scripts/lerobot_record.py \
|
||||
--robot.type=earthrover_mini_plus \
|
||||
--teleop.type=keyboard_rover \
|
||||
--dataset.repo_id=your_username/dataset_name \
|
||||
--dataset.num_episodes=2 \
|
||||
--dataset.fps=10 \
|
||||
--dataset.single_task="Navigate around obstacles" \
|
||||
--dataset.streaming_encoding=true \
|
||||
--dataset.encoder_threads=2 \
|
||||
# --dataset.vcodec=auto \
|
||||
--display_data=true
|
||||
```
|
||||
|
||||
@@ -204,26 +195,22 @@ Replace `your_username/dataset_name` with your Hugging Face username and a name
|
||||
|
||||
Your dataset includes:
|
||||
|
||||
**Your Actions (2 features)**:
|
||||
**Your Actions (2 things)**:
|
||||
|
||||
- `linear_velocity`: How much you moved forward/backward
|
||||
- `angular_velocity`: How much you turned left/right
|
||||
- How much you moved forward/backward
|
||||
- How much you turned left/right
|
||||
|
||||
**Robot Observations (24 features)**:
|
||||
**Robot Observations (12 things)**:
|
||||
|
||||
- Front camera video
|
||||
- Rear camera video
|
||||
- Current speed
|
||||
- Battery level
|
||||
- Orientation
|
||||
- GPS (latitude, longitude, signal strength)
|
||||
- Which way the robot is facing
|
||||
- GPS location (latitude, longitude, signal strength)
|
||||
- Network signal strength
|
||||
- Vibration level
|
||||
- Lamp state (on/off)
|
||||
- Accelerometer (x, y, z)
|
||||
- Gyroscope (x, y, z)
|
||||
- Magnetometer (x, y, z)
|
||||
- Wheel RPMs (4 wheels)
|
||||
- Lamp status (on/off)
|
||||
|
||||
### Where Your Data Goes
|
||||
|
||||
|
||||
@@ -155,10 +155,10 @@ Upload your repository to Hugging Face:
|
||||
pip install huggingface_hub
|
||||
|
||||
# Login to Hugging Face
|
||||
hf auth login
|
||||
huggingface-cli login
|
||||
|
||||
# Create a new repository
|
||||
hf repo create my-org/my-custom-env
|
||||
huggingface-cli repo create my-custom-env --type space --org my-org
|
||||
|
||||
# Initialize git and push
|
||||
git init
|
||||
|
||||
@@ -120,15 +120,12 @@ lerobot-record \
|
||||
--display_data=true \
|
||||
--dataset.repo_id=<user>/eval_groot-bimanual \
|
||||
--dataset.num_episodes=10 \
|
||||
--dataset.single_task="Grab and handover the red cube to the other arm" \
|
||||
--dataset.streaming_encoding=true \
|
||||
--dataset.encoder_threads=2 \
|
||||
# --dataset.vcodec=auto \
|
||||
--policy.path=<user>/groot-bimanual \ # your trained model
|
||||
--dataset.episode_time_s=30 \
|
||||
--dataset.single_task="Grab and handover the red cube to the other arm"
|
||||
--policy.path=<user>/groot-bimanual # your trained model
|
||||
--dataset.episode_time_s=30
|
||||
--dataset.reset_time_s=10
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
This model follows NVIDIA's proprietary license, consistent with the original [GR00T repository](https://github.com/NVIDIA/Isaac-GR00T). Future versions (starting from N1.7) will follow **Apache 2.0 License**.
|
||||
This model follows the **Apache 2.0 License**, consistent with the original [GR00T repository](https://github.com/NVIDIA/Isaac-GR00T).
|
||||
|
||||
@@ -1,269 +0,0 @@
|
||||
# Human-In-the-Loop Data Collection
|
||||
|
||||
Human-In-the-Loop (HIL) data collection lets you improve a trained policy by deploying it on a real robot while a human operator monitors and intervenes when needed. The intervention data (recovery movements and corrections) is recorded alongside autonomous segments, producing a richer training dataset that teaches the policy how to handle failures.
|
||||
|
||||
---
|
||||
|
||||
## Why Human-In-the-Loop?
|
||||
|
||||
Standard behavioral cloning trains policies on successful demonstrations only. During deployment, small errors can compound and push the robot into states never seen during training (distribution shift). HIL data collection addresses this by:
|
||||
|
||||
- Running the trained policy on the real robot
|
||||
- Having a human intervene when the robot is about to fail
|
||||
- Recording the human's recovery and correction as training data
|
||||
- Fine-tuning the policy on the combined dataset
|
||||
|
||||
This produces a policy that not only knows how to perform the task, but also how to recover when things go wrong.
|
||||
|
||||
---
|
||||
|
||||
## How It Works
|
||||
|
||||
During a HIL session, the human operator follows this loop within each episode:
|
||||
|
||||
1. **Watch** the policy run autonomously
|
||||
2. **Pause** when failure is imminent, the robot holds its position
|
||||
3. **Take control** and teleoperate the robot back to a good state (recovery), then correct the behavior
|
||||
4. **Return control to the policy**, the policy resumes autonomous execution
|
||||
5. Repeat steps 2–4 as many times as needed during the episode
|
||||
6. **End the episode** when the task is complete, save and move on to the next rollout
|
||||
|
||||
Both autonomous and human-controlled segments are recorded. The policy and human can alternate control multiple times within a single episode, and the episode continues from the current state after each handoff (no reset required just because intervention happened). This captures autonomous execution, recovery, and correction in one continuous trajectory. After collection, the combined dataset (original demonstrations + HIL data) is used to fine-tune the policy.
|
||||
|
||||
This process can be repeated iteratively: deploy, collect, fine-tune, repeat. Each round targets the current policy's failure modes.
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────┐
|
||||
│ Policy v0 (trained on demos) │
|
||||
│ ↓ │
|
||||
│ HIL Collection (target current failure modes) → Fine-tune → Policy v1 │
|
||||
│ ↓ │
|
||||
│ HIL Collection (target new failure modes) → Fine-tune → Policy v2 │
|
||||
│ ↓ │
|
||||
│ ... (repeat until satisfactory performance) │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Hardware Requirements
|
||||
|
||||
### Teleoperator Requirements
|
||||
|
||||
The `examples/hil` HIL scripts require **teleoperators with active motors** that can:
|
||||
|
||||
- Enable/disable torque programmatically
|
||||
- Move to target positions (to mirror the robot state when pausing)
|
||||
|
||||
**Compatible teleoperators in the current `examples/hil` scripts:**
|
||||
|
||||
- `openarm_mini` - OpenArm Mini
|
||||
- `so_leader` - SO100 / SO101 leader arm
|
||||
|
||||
> [!IMPORTANT]
|
||||
> The provided `examples/hil` commands default to `bi_openarm_follower` + `openarm_mini`.
|
||||
> `so_follower` + `so_leader` configs are also registered and can be used via CLI flags.
|
||||
|
||||
---
|
||||
|
||||
## Script
|
||||
|
||||
A single script handles both synchronous and RTC-based inference. Toggle RTC with `--rtc.enabled=true`:
|
||||
|
||||
| Mode | Flag | Models |
|
||||
| ------------------------ | -------------------- | --------------------- |
|
||||
| Standard (default) | _(no flag needed)_ | ACT, Diffusion Policy |
|
||||
| Real-Time Chunking (RTC) | `--rtc.enabled=true` | Pi0, Pi0.5, SmolVLA |
|
||||
|
||||
---
|
||||
|
||||
## Step-by-Step Guide
|
||||
|
||||
### Step 1: Pre-train a Base Policy
|
||||
|
||||
First, train a policy on your demonstration dataset:
|
||||
|
||||
```bash
|
||||
python src/lerobot/scripts/lerobot_train.py \
|
||||
--dataset.repo_id=your-username/demo-dataset \
|
||||
--policy.type=pi0 \
|
||||
--output_dir=outputs/pretrain \
|
||||
--batch_size=32 \
|
||||
--steps=50000
|
||||
```
|
||||
|
||||
### Step 2: Collect HIL Data
|
||||
|
||||
**Standard inference (ACT, Diffusion Policy):**
|
||||
|
||||
```bash
|
||||
python examples/hil/hil_data_collection.py \
|
||||
--robot.type=bi_openarm_follower \
|
||||
--robot.left_arm_config.port=can1 \
|
||||
--robot.left_arm_config.side=left \
|
||||
--robot.right_arm_config.port=can0 \
|
||||
--robot.right_arm_config.side=right \
|
||||
--robot.cameras='{left_wrist: {type: opencv, index_or_path: "/dev/video0", width: 1280, height: 720, fps: 30}, right_wrist: {type: opencv, index_or_path: "/dev/video4", width: 1280, height: 720, fps: 30}, base: {type: opencv, index_or_path: "/dev/video2", width: 640, height: 480, fps: 30}}' \
|
||||
--teleop.type=openarm_mini \
|
||||
--teleop.port_left=/dev/ttyACM0 \
|
||||
--teleop.port_right=/dev/ttyACM1 \
|
||||
--policy.path=outputs/pretrain/checkpoints/last/pretrained_model \
|
||||
--dataset.repo_id=your-username/hil-dataset \
|
||||
--dataset.single_task="Fold the T-shirt properly" \
|
||||
--dataset.fps=30 \
|
||||
--dataset.episode_time_s=1000 \
|
||||
--dataset.num_episodes=50 \
|
||||
--interpolation_multiplier=2
|
||||
```
|
||||
|
||||
**With RTC for large models (Pi0, Pi0.5, SmolVLA):**
|
||||
|
||||
For models with high inference latency, enable RTC for smooth execution:
|
||||
|
||||
```bash
|
||||
python examples/hil/hil_data_collection.py \
|
||||
--rtc.enabled=true \
|
||||
--rtc.execution_horizon=20 \
|
||||
--rtc.max_guidance_weight=5.0 \
|
||||
--rtc.prefix_attention_schedule=LINEAR \
|
||||
--robot.type=bi_openarm_follower \
|
||||
--robot.left_arm_config.port=can1 \
|
||||
--robot.left_arm_config.side=left \
|
||||
--robot.right_arm_config.port=can0 \
|
||||
--robot.right_arm_config.side=right \
|
||||
--robot.cameras='{left_wrist: {type: opencv, index_or_path: "/dev/video0", width: 1280, height: 720, fps: 30}, right_wrist: {type: opencv, index_or_path: "/dev/video4", width: 1280, height: 720, fps: 30}, base: {type: opencv, index_or_path: "/dev/video2", width: 640, height: 480, fps: 30}}' \
|
||||
--teleop.type=openarm_mini \
|
||||
--teleop.port_left=/dev/ttyACM0 \
|
||||
--teleop.port_right=/dev/ttyACM1 \
|
||||
--policy.path=outputs/pretrain/checkpoints/last/pretrained_model \
|
||||
--dataset.repo_id=your-username/hil-rtc-dataset \
|
||||
--dataset.single_task="Fold the T-shirt properly" \
|
||||
--dataset.fps=30 \
|
||||
--dataset.episode_time_s=1000 \
|
||||
--dataset.num_episodes=50 \
|
||||
--interpolation_multiplier=3
|
||||
```
|
||||
|
||||
**Controls (Conceptual):**
|
||||
|
||||
The interaction model is:
|
||||
|
||||
- **Pause input**: pause autonomous policy execution
|
||||
- **Takeover input**: transfer control to the human operator and record intervention data
|
||||
- **Return-to-policy input**: hand control back to the policy and continue the same episode
|
||||
- **Episode control inputs**: save/re-record/stop/reset as needed
|
||||
|
||||
Exact key/pedal bindings can differ across scripts and hardware integrations. Use each script's printed controls as the source of truth for the concrete mapping on your setup.
|
||||
|
||||
**The HIL Protocol:**
|
||||
|
||||
1. Watch the policy run autonomously (teleop is idle/free)
|
||||
2. When you see imminent failure, trigger the **pause input**
|
||||
- Policy stops
|
||||
- Teleoperator moves to match robot position (torque enabled)
|
||||
- No frames recorded during pause
|
||||
3. Trigger the **takeover input** to take control
|
||||
- Teleoperator torque disabled, free to move
|
||||
- **Recovery**: Teleoperate the robot back to a good state
|
||||
- **Correction**: Correct the behavior
|
||||
- All movements are recorded
|
||||
4. Trigger the **return-to-policy input**
|
||||
- Policy resumes autonomous execution from the current state
|
||||
- You can intervene again at any time (repeat steps 2–4)
|
||||
5. End and save the episode when the task is complete (or episode time limit is reached)
|
||||
6. **Reset**: Teleop moves to robot position, you can move the robot to the starting position
|
||||
7. Start the next episode
|
||||
|
||||
**Foot Pedal Setup (Linux):**
|
||||
|
||||
If using a USB foot pedal (PCsensor FootSwitch), ensure access:
|
||||
|
||||
```bash
|
||||
sudo setfacl -m u:$USER:rw /dev/input/by-id/usb-PCsensor_FootSwitch-event-kbd
|
||||
```
|
||||
|
||||
### Step 3: Fine-tune the Policy
|
||||
|
||||
Fine-tune on the **combined** dataset (`demo-dataset` + `hil-dataset` merged together):
|
||||
|
||||
```bash
|
||||
python src/lerobot/scripts/lerobot_train.py \
|
||||
--dataset.repo_id=your-username/hil-dataset \
|
||||
--policy.type=pi0 \
|
||||
--policy.pretrained_path=outputs/pretrain/checkpoints/last/pretrained_model \
|
||||
--output_dir=outputs/hil_finetune \
|
||||
--steps=20000
|
||||
```
|
||||
|
||||
Then deploy the fine-tuned policy and repeat from Step 2 to target its remaining failure modes.
|
||||
|
||||
---
|
||||
|
||||
## Tips for Effective HIL Collection
|
||||
|
||||
### When to Intervene
|
||||
|
||||
Intervene when you see:
|
||||
|
||||
- Robot about to make an irreversible mistake
|
||||
- Robot hesitating or showing uncertain behavior
|
||||
- Robot deviating from the expected trajectory
|
||||
|
||||
### Recovery: Teleoperating Back to a Good State
|
||||
|
||||
During recovery, teleoperate the robot back to a state where:
|
||||
|
||||
- The robot is in a familiar, in-distribution configuration
|
||||
- The current subtask can still be completed
|
||||
- The recovery trajectory itself is informative training data
|
||||
|
||||
### Quality of Corrections
|
||||
|
||||
During correction:
|
||||
|
||||
- Provide **confident, clean** trajectories
|
||||
- Complete the current subtask fully
|
||||
- Don't overcorrect or add unnecessary movements
|
||||
|
||||
---
|
||||
|
||||
## Related Work
|
||||
|
||||
This HIL data collection approach builds on ideas from interactive imitation learning:
|
||||
|
||||
- **DAgger** (Ross et al., 2011) introduced the core idea: instead of only training on expert demonstrations, query the expert for corrections on states the _learner_ visits. This breaks the compounding-error cycle of standard behavioral cloning by iteratively collecting on-policy data.
|
||||
|
||||
- **HG-DAgger** (Kelly et al., 2019) made this practical for robotics: a human expert monitors the robot and only intervenes when needed, rather than labeling every state. The gating between autonomous and human control is exactly the pause → takeover → return-to-policy loop used in the scripts here.
|
||||
|
||||
- **RaC** (Hu et al., 2025) scales this loop to long-horizon tasks by explicitly decomposing interventions into **recovery** (teleoperating back to a good state) and **correction** (demonstrating the right behavior from there). This decomposition is the protocol followed by the HIL scripts in `examples/hil`.
|
||||
|
||||
- **π0.6/RECAP** (Physical Intelligence, 2025) applies the same iterative collect-and-finetune loop at scale with VLA models, showing that even large pretrained policies benefit substantially from targeted human corrections on their own failure modes. π0.6 is trained using RECAP.
|
||||
|
||||
```bibtex
|
||||
@article{ross2011dagger,
|
||||
title={A Reduction of Imitation Learning and Structured Prediction to No-Regret Online Learning},
|
||||
author={Ross, Stéphane and Gordon, Geoffrey and Bagnell, Drew},
|
||||
journal={Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics},
|
||||
year={2011}
|
||||
}
|
||||
|
||||
@article{kelly2019hgdagger,
|
||||
title={HG-DAgger: Interactive Imitation Learning with Human Experts},
|
||||
author={Kelly, Michael and Sidrane, Chelsea and Driggs-Campbell, Katherine and Kochenderfer, Mykel J},
|
||||
journal={arXiv preprint arXiv:1810.02890},
|
||||
year={2019}
|
||||
}
|
||||
|
||||
@article{hu2025rac,
|
||||
title={RaC: Robot Learning for Long-Horizon Tasks by Scaling Recovery and Correction},
|
||||
author={Hu, Zheyuan and Wu, Robyn and Enock, Naveen and Li, Jasmine and Kadakia, Riya and Erickson, Zackory and Kumar, Aviral},
|
||||
journal={arXiv preprint arXiv:2509.07953},
|
||||
year={2025}
|
||||
}
|
||||
|
||||
@article{pi2025recap,
|
||||
title={π0.6: a VLA That Learns From Experience},
|
||||
author={Physical Intelligence},
|
||||
year={2025}
|
||||
}
|
||||
```
|
||||
+5
-11
@@ -224,15 +224,12 @@ lerobot-record \
|
||||
--teleop.port=/dev/tty.usbmodem1201 \
|
||||
--teleop.id=right \
|
||||
--teleop.side=right \
|
||||
--dataset.repo_id=<USER>/hand_record_test_with_video_data \
|
||||
--dataset.repo_id=nepyope/hand_record_test_with_video_data \
|
||||
--dataset.single_task="Hand recording test with video data" \
|
||||
--dataset.num_episodes=1 \
|
||||
--dataset.episode_time_s=5 \
|
||||
--dataset.push_to_hub=true \
|
||||
--dataset.private=true \
|
||||
--dataset.streaming_encoding=true \
|
||||
--dataset.encoder_threads=2 \
|
||||
# --dataset.vcodec=auto \
|
||||
--display_data=true
|
||||
```
|
||||
|
||||
@@ -244,7 +241,7 @@ lerobot-replay \
|
||||
--robot.port=/dev/tty.usbmodem58760432281 \
|
||||
--robot.id=right \
|
||||
--robot.side=right \
|
||||
--dataset.repo_id=<USER>/hand_record_test_with_camera \
|
||||
--dataset.repo_id=nepyope/hand_record_test_with_camera \
|
||||
--dataset.episode=0
|
||||
```
|
||||
|
||||
@@ -252,13 +249,13 @@ lerobot-replay \
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--dataset.repo_id=<USER>/hand_record_test_with_video_data \
|
||||
--dataset.repo_id=nepyope/hand_record_test_with_video_data \
|
||||
--policy.type=act \
|
||||
--output_dir=outputs/train/hopejr_hand \
|
||||
--job_name=hopejr \
|
||||
--policy.device=mps \
|
||||
--wandb.enable=true \
|
||||
--policy.repo_id=<USER>/hand_test_policy
|
||||
--policy.repo_id=nepyope/hand_test_policy
|
||||
```
|
||||
|
||||
### Evaluate
|
||||
@@ -273,11 +270,8 @@ lerobot-record \
|
||||
--robot.side=right \
|
||||
--robot.cameras='{"main": {"type": "opencv", "index_or_path": 0, "width": 640, "height": 480, "fps": 30}}' \
|
||||
--display_data=false \
|
||||
--dataset.repo_id=<USER>/eval_hopejr \
|
||||
--dataset.repo_id=nepyope/eval_hopejr \
|
||||
--dataset.single_task="Evaluate hopejr hand policy" \
|
||||
--dataset.num_episodes=10 \
|
||||
--dataset.streaming_encoding=true \
|
||||
--dataset.encoder_threads=2 \
|
||||
# --dataset.vcodec=auto \
|
||||
--policy.path=outputs/train/hopejr_hand/checkpoints/last/pretrained_model
|
||||
```
|
||||
|
||||
@@ -159,13 +159,13 @@ We use the Hugging Face hub features for uploading your dataset. If you haven't
|
||||
Add your token to the CLI by running this command:
|
||||
|
||||
```bash
|
||||
hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
|
||||
huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
|
||||
```
|
||||
|
||||
Then store your Hugging Face repository name in a variable:
|
||||
|
||||
```bash
|
||||
HF_USER=$(NO_COLOR=1 hf auth whoami | awk -F': *' 'NR==1 {print $2}')
|
||||
HF_USER=$(hf auth whoami | head -n 1)
|
||||
echo $HF_USER
|
||||
```
|
||||
|
||||
@@ -185,10 +185,7 @@ lerobot-record \
|
||||
--display_data=true \
|
||||
--dataset.repo_id=${HF_USER}/record-test \
|
||||
--dataset.num_episodes=5 \
|
||||
--dataset.single_task="Grab the black cube" \
|
||||
--dataset.streaming_encoding=true \
|
||||
# --dataset.vcodec=auto \
|
||||
--dataset.encoder_threads=2
|
||||
--dataset.single_task="Grab the black cube"
|
||||
```
|
||||
</hfoption>
|
||||
<hfoption id="API example">
|
||||
@@ -327,7 +324,7 @@ You can look for other LeRobot datasets on the hub by searching for `LeRobot` [t
|
||||
You can also push your local dataset to the Hub manually, running:
|
||||
|
||||
```bash
|
||||
hf upload ${HF_USER}/record-test ~/.cache/huggingface/lerobot/{repo-id} --repo-type dataset
|
||||
huggingface-cli upload ${HF_USER}/record-test ~/.cache/huggingface/lerobot/{repo-id} --repo-type dataset
|
||||
```
|
||||
|
||||
#### Record function
|
||||
@@ -424,7 +421,7 @@ robot = SO100Follower(robot_config)
|
||||
robot.connect()
|
||||
|
||||
dataset = LeRobotDataset("<hf_username>/<dataset_repo_id>", episodes=[episode_idx])
|
||||
actions = dataset.select_columns("action")
|
||||
actions = dataset.hf_dataset.select_columns("action")
|
||||
|
||||
log_say(f"Replaying episode {episode_idx}")
|
||||
for idx in range(dataset.num_frames):
|
||||
@@ -491,7 +488,7 @@ If your local computer doesn't have a powerful GPU you could utilize Google Cola
|
||||
Once training is done, upload the latest checkpoint with:
|
||||
|
||||
```bash
|
||||
hf upload ${HF_USER}/act_so101_test \
|
||||
huggingface-cli upload ${HF_USER}/act_so101_test \
|
||||
outputs/train/act_so101_test/checkpoints/last/pretrained_model
|
||||
```
|
||||
|
||||
@@ -499,7 +496,7 @@ You can also upload intermediate checkpoints with:
|
||||
|
||||
```bash
|
||||
CKPT=010000
|
||||
hf upload ${HF_USER}/act_so101_test${CKPT} \
|
||||
huggingface-cli upload ${HF_USER}/act_so101_test${CKPT} \
|
||||
outputs/train/act_so101_test/checkpoints/${CKPT}/pretrained_model
|
||||
```
|
||||
|
||||
@@ -518,9 +515,6 @@ lerobot-record \
|
||||
--display_data=false \
|
||||
--dataset.repo_id=${HF_USER}/eval_so100 \
|
||||
--dataset.single_task="Put lego brick into the transparent box" \
|
||||
--dataset.streaming_encoding=true \
|
||||
--dataset.encoder_threads=2 \
|
||||
# --dataset.vcodec=auto \
|
||||
# <- Teleop optional if you want to teleoperate in between episodes \
|
||||
# --teleop.type=so100_leader \
|
||||
# --teleop.port=/dev/ttyACM0 \
|
||||
|
||||
+18
-113
@@ -1,120 +1,44 @@
|
||||
# Installation
|
||||
|
||||
This guide uses `conda` (via miniforge) to manage environments (recommended). If you prefer another environment manager (e.g. `uv`, `venv`), ensure you have Python >=3.12 and support PyTorch >= 2.10, then skip ahead to [Environment Setup](#step-2-environment-setup).
|
||||
|
||||
## Step 1 (`conda` only): Install [`miniforge`](https://conda-forge.org/download/)
|
||||
## Install [`miniforge`](https://conda-forge.org/download/)
|
||||
|
||||
```bash
|
||||
wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
|
||||
bash Miniforge3-$(uname)-$(uname -m).sh
|
||||
```
|
||||
|
||||
## Step 2: Environment Setup
|
||||
## Environment Setup
|
||||
|
||||
Create a virtual environment with Python 3.12:
|
||||
Create a virtual environment with Python 3.10, using conda:
|
||||
|
||||
<!-- prettier-ignore-start -->
|
||||
<hfoptions id="create_venv">
|
||||
<hfoption id="conda">
|
||||
```bash
|
||||
conda create -y -n lerobot python=3.12
|
||||
conda create -y -n lerobot python=3.10
|
||||
```
|
||||
</hfoption>
|
||||
<hfoption id="uv (PyTorch >= 2.10 only)">
|
||||
```bash
|
||||
uv python install 3.12
|
||||
uv venv --python 3.12
|
||||
```
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
Then activate your virtual environment, you have to do this each time you open a shell to use lerobot:
|
||||
Then activate your conda environment, you have to do this each time you open a shell to use lerobot:
|
||||
|
||||
<!-- prettier-ignore-start -->
|
||||
|
||||
<hfoptions id="activate_venv">
|
||||
<hfoption id="conda">
|
||||
```bash
|
||||
conda activate lerobot
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> When installing LeRobot inside WSL (Windows Subsystem for Linux), make sure to also install `evdev`:
|
||||
>
|
||||
> ```bash
|
||||
> conda install evdev -c conda-forge
|
||||
> ```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="uv (PyTorch >= 2.10 only)">
|
||||
```bash
|
||||
# Linux/macOS
|
||||
source .venv/bin/activate
|
||||
# Windows PowerShell
|
||||
.venv\Scripts\activate
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> When installing LeRobot inside WSL (Windows Subsystem for Linux), make sure to also install `evdev`:
|
||||
>
|
||||
> ```bash
|
||||
> sudo apt install libevdev-dev
|
||||
> uv pip install evdev
|
||||
> ```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
### Install `ffmpeg` (for video decoding)
|
||||
|
||||
LeRobot uses [TorchCodec](https://github.com/meta-pytorch/torchcodec) for video decoding by default, which requires `ffmpeg`.
|
||||
|
||||
> [!NOTE]
|
||||
> **Platform support:** TorchCodec is **not available** on macOS Intel (x86_64), Linux ARM (aarch64, arm64, armv7l), or Windows with PyTorch < 2.8. On these platforms, LeRobot automatically falls back to `pyav` — so you do not need to install `ffmpeg` and can skip to Step 3.
|
||||
|
||||
If your platform supports TorchCodec, install `ffmpeg` using one of the methods below:
|
||||
|
||||
<!-- prettier-ignore-start -->
|
||||
|
||||
<hfoptions id="install_ffmpeg">
|
||||
<hfoption id="conda (any PyTorch version)">
|
||||
|
||||
Install `ffmpeg` in your conda environment. This works with **all PyTorch versions** and is **required for PyTorch < 2.10**:
|
||||
When using `conda`, install `ffmpeg` in your environment:
|
||||
|
||||
```bash
|
||||
conda install ffmpeg -c conda-forge
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> This usually installs `ffmpeg 8.X` with the `libsvtav1` encoder. If you run into issues (e.g. `libsvtav1` missing — check with `ffmpeg -encoders` — or a version mismatch with `torchcodec`), you can explicitly install `ffmpeg 7.1.1` using:
|
||||
> This usually installs `ffmpeg 7.X` for your platform compiled with the `libsvtav1` encoder. If `libsvtav1` is not supported (check supported encoders with `ffmpeg -encoders`), you can:
|
||||
>
|
||||
> - _[On any platform]_ Explicitly install `ffmpeg 7.X` using:
|
||||
>
|
||||
> ```bash
|
||||
> conda install ffmpeg=7.1.1 -c conda-forge
|
||||
> ```
|
||||
>
|
||||
> - _[On Linux only]_ If you want to bring your own ffmpeg: Install [ffmpeg build dependencies](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#GettheDependencies) and [compile ffmpeg from source with libsvtav1](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#libsvtav1), and make sure you use the corresponding ffmpeg binary to your install with `which ffmpeg`.
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="uv (PyTorch >= 2.10 only)">
|
||||
|
||||
Starting with **PyTorch >= 2.10** (TorchCodec ≥ 0.10), TorchCodec can dynamically link to a system-wide `ffmpeg` installation. This is useful when using `uv` or other non-`conda` environment managers:
|
||||
|
||||
```bash
|
||||
# Ubuntu/Debian
|
||||
sudo apt install ffmpeg
|
||||
|
||||
# macOS (Apple Silicon)
|
||||
brew install ffmpeg
|
||||
```
|
||||
|
||||
> [!IMPORTANT]
|
||||
> System-wide `ffmpeg` is **only supported with PyTorch >= 2.10** (TorchCodec ≥ 0.10). For older PyTorch versions, you **must** use `conda install ffmpeg -c conda-forge` instead.
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
## Step 3: Install LeRobot 🤗
|
||||
## Install LeRobot 🤗
|
||||
|
||||
### From Source
|
||||
|
||||
@@ -127,45 +51,23 @@ cd lerobot
|
||||
|
||||
Then, install the library in editable mode. This is useful if you plan to contribute to the code.
|
||||
|
||||
<!-- prettier-ignore-start -->
|
||||
<hfoptions id="install_lerobot_src">
|
||||
<hfoption id="conda">
|
||||
```bash
|
||||
pip install -e .
|
||||
```
|
||||
</hfoption>
|
||||
<hfoption id="uv">
|
||||
```bash
|
||||
uv pip install -e .
|
||||
```
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
### Installation from PyPI
|
||||
|
||||
**Core Library:**
|
||||
Install the base package with:
|
||||
|
||||
<!-- prettier-ignore-start -->
|
||||
<hfoptions id="install_lerobot_pypi">
|
||||
<hfoption id="conda">
|
||||
```bash
|
||||
pip install lerobot
|
||||
```
|
||||
</hfoption>
|
||||
<hfoption id="uv">
|
||||
```bash
|
||||
uv pip install lerobot
|
||||
```
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
_This installs only the default dependencies._
|
||||
|
||||
**Extra Features:**
|
||||
To install additional functionality, use one of the following (If you are using `uv`, replace `pip install` with `uv pip install` in the commands below.):
|
||||
To install additional functionality, use one of the following:
|
||||
|
||||
```bash
|
||||
pip install 'lerobot[all]' # All available features
|
||||
@@ -179,10 +81,13 @@ _Replace `[...]` with your desired features._
|
||||
For a full list of optional dependencies, see:
|
||||
https://pypi.org/project/lerobot/
|
||||
|
||||
> [!NOTE]
|
||||
> For lerobot 0.4.0, if you want to install pi, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
If you encounter build errors, you may need to install additional dependencies: `cmake`, `build-essential`, and `ffmpeg libs`.
|
||||
To install these for Linux run:
|
||||
To install these for linux run:
|
||||
|
||||
```bash
|
||||
sudo apt-get install cmake build-essential python3-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev
|
||||
@@ -192,7 +97,7 @@ For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/
|
||||
|
||||
## Optional dependencies
|
||||
|
||||
LeRobot provides optional extras for specific functionalities. Multiple extras can be combined (e.g., `.[aloha,feetech]`). For all available extras, refer to `pyproject.toml`. If you are using `uv`, replace `pip install` with `uv pip install` in the commands below.
|
||||
LeRobot provides optional extras for specific functionalities. Multiple extras can be combined (e.g., `.[aloha,feetech]`). For all available extras, refer to `pyproject.toml`.
|
||||
|
||||
### Simulations
|
||||
|
||||
|
||||
@@ -1,11 +1,5 @@
|
||||
# LeKiwi
|
||||
|
||||
<img
|
||||
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/1740517739083.jpeg"
|
||||
alt="LeKiwi"
|
||||
width="70%"
|
||||
/>
|
||||
|
||||
In the steps below, we explain how to assemble the LeKiwi mobile robot.
|
||||
|
||||
## Source the parts
|
||||
@@ -279,13 +273,13 @@ We use the Hugging Face hub features for uploading your dataset. If you haven't
|
||||
Add your token to the CLI by running this command:
|
||||
|
||||
```bash
|
||||
hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
|
||||
huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
|
||||
```
|
||||
|
||||
Then store your Hugging Face repository name in a variable:
|
||||
|
||||
```bash
|
||||
HF_USER=$(hf auth whoami | awk -F': *' 'NR==1 {print $2}')
|
||||
HF_USER=$(huggingface-cli whoami | head -n 1)
|
||||
echo $HF_USER
|
||||
```
|
||||
|
||||
|
||||
@@ -41,10 +41,7 @@ lerobot-record \
|
||||
--display_data=true \
|
||||
--dataset.repo_id=${HF_USER}/record-test \
|
||||
--dataset.num_episodes=5 \
|
||||
--dataset.single_task="Grab the black cube" \
|
||||
--dataset.streaming_encoding=true \
|
||||
# --dataset.vcodec=auto \
|
||||
--dataset.encoder_threads=2
|
||||
--dataset.single_task="Grab the black cube"
|
||||
```
|
||||
|
||||
See the [recording guide](./il_robots#record-a-dataset) for more details.
|
||||
|
||||
+80
-90
@@ -1,61 +1,36 @@
|
||||
# LIBERO
|
||||
|
||||
LIBERO is a benchmark designed to study **lifelong robot learning** — the idea that robots need to keep learning and adapting with their users over time, not just be pretrained once. It provides a set of standardized manipulation tasks that focus on **knowledge transfer**: how well a robot can apply what it has already learned to new situations. By evaluating on LIBERO, different algorithms can be compared fairly and researchers can build on each other's work.
|
||||
**LIBERO** is a benchmark designed to study **lifelong robot learning**. The idea is that robots won’t just be pretrained once in a factory, they’ll need to keep learning and adapting with their human users over time. This ongoing adaptation is called **lifelong learning in decision making (LLDM)**, and it’s a key step toward building robots that become truly personalized helpers.
|
||||
|
||||
- Paper: [Benchmarking Knowledge Transfer for Lifelong Robot Learning](https://arxiv.org/abs/2306.03310)
|
||||
- GitHub: [Lifelong-Robot-Learning/LIBERO](https://github.com/Lifelong-Robot-Learning/LIBERO)
|
||||
- Project website: [libero-project.github.io](https://libero-project.github.io)
|
||||
- 📄 [LIBERO paper](https://arxiv.org/abs/2306.03310)
|
||||
- 💻 [Original LIBERO repo](https://github.com/Lifelong-Robot-Learning/LIBERO)
|
||||
|
||||
To make progress on this challenge, LIBERO provides a set of standardized tasks that focus on **knowledge transfer**: how well a robot can apply what it has already learned to new situations. By evaluating on LIBERO, different algorithms can be compared fairly and researchers can build on each other’s work.
|
||||
|
||||
LIBERO includes **five task suites**:
|
||||
|
||||
- **LIBERO-Spatial (`libero_spatial`)** – tasks that require reasoning about spatial relations.
|
||||
- **LIBERO-Object (`libero_object`)** – tasks centered on manipulating different objects.
|
||||
- **LIBERO-Goal (`libero_goal`)** – goal-conditioned tasks where the robot must adapt to changing targets.
|
||||
- **LIBERO-90 (`libero_90`)** – 90 short-horizon tasks from the LIBERO-100 collection.
|
||||
- **LIBERO-Long (`libero_10`)** – 10 long-horizon tasks from the LIBERO-100 collection.
|
||||
|
||||
Together, these suites cover **130 tasks**, ranging from simple object manipulations to complex multi-step scenarios. LIBERO is meant to grow over time, and to serve as a shared benchmark where the community can test and improve lifelong learning algorithms.
|
||||
|
||||

|
||||
|
||||
## Available tasks
|
||||
## Evaluating with LIBERO
|
||||
|
||||
LIBERO includes **five task suites** covering **130 tasks**, ranging from simple object manipulations to complex multi-step scenarios:
|
||||
At **LeRobot**, we ported [LIBERO](https://github.com/Lifelong-Robot-Learning/LIBERO) into our framework and used it mainly to **evaluate [SmolVLA](https://huggingface.co/docs/lerobot/en/smolvla)**, our lightweight Vision-Language-Action model.
|
||||
|
||||
| Suite | CLI name | Tasks | Description |
|
||||
| -------------- | ---------------- | ----- | -------------------------------------------------- |
|
||||
| LIBERO-Spatial | `libero_spatial` | 10 | Tasks requiring reasoning about spatial relations |
|
||||
| LIBERO-Object | `libero_object` | 10 | Tasks centered on manipulating different objects |
|
||||
| LIBERO-Goal | `libero_goal` | 10 | Goal-conditioned tasks with changing targets |
|
||||
| LIBERO-90 | `libero_90` | 90 | Short-horizon tasks from the LIBERO-100 collection |
|
||||
| LIBERO-Long | `libero_10` | 10 | Long-horizon tasks from the LIBERO-100 collection |
|
||||
LIBERO is now part of our **multi-eval supported simulation**, meaning you can benchmark your policies either on a **single suite of tasks** or across **multiple suites at once** with just a flag.
|
||||
|
||||
## Installation
|
||||
|
||||
After following the LeRobot installation instructions:
|
||||
|
||||
```bash
|
||||
pip install -e ".[libero]"
|
||||
```
|
||||
|
||||
<Tip>
|
||||
LIBERO requires Linux (`sys_platform == 'linux'`). LeRobot uses MuJoCo for simulation — set the rendering backend before training or evaluation:
|
||||
|
||||
```bash
|
||||
export MUJOCO_GL=egl # for headless servers (HPC, cloud)
|
||||
```
|
||||
|
||||
</Tip>
|
||||
|
||||
## Evaluation
|
||||
|
||||
### Default evaluation (recommended)
|
||||
|
||||
Evaluate across the four standard suites (10 episodes per task):
|
||||
|
||||
```bash
|
||||
lerobot-eval \
|
||||
--policy.path="your-policy-id" \
|
||||
--env.type=libero \
|
||||
--env.task=libero_spatial,libero_object,libero_goal,libero_10 \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=10 \
|
||||
--env.max_parallel_tasks=1
|
||||
```
|
||||
To Install LIBERO, after following LeRobot official instructions, just do:
|
||||
`pip install -e ".[libero]"`
|
||||
|
||||
### Single-suite evaluation
|
||||
|
||||
Evaluate on one LIBERO suite:
|
||||
Evaluate a policy on one LIBERO suite:
|
||||
|
||||
```bash
|
||||
lerobot-eval \
|
||||
@@ -67,13 +42,14 @@ lerobot-eval \
|
||||
```
|
||||
|
||||
- `--env.task` picks the suite (`libero_object`, `libero_spatial`, etc.).
|
||||
- `--env.task_ids` restricts to specific task indices (`[0]`, `[1,2,3]`, etc.). Omit to run all tasks in the suite.
|
||||
- `--eval.batch_size` controls how many environments run in parallel.
|
||||
- `--eval.n_episodes` sets how many episodes to run per task.
|
||||
- `--eval.n_episodes` sets how many episodes to run in total.
|
||||
|
||||
---
|
||||
|
||||
### Multi-suite evaluation
|
||||
|
||||
Benchmark a policy across multiple suites at once by passing a comma-separated list:
|
||||
Benchmark a policy across multiple suites at once:
|
||||
|
||||
```bash
|
||||
lerobot-eval \
|
||||
@@ -84,49 +60,50 @@ lerobot-eval \
|
||||
--eval.n_episodes=2
|
||||
```
|
||||
|
||||
### Control mode
|
||||
- Pass a comma-separated list to `--env.task` for multi-suite evaluation.
|
||||
|
||||
LIBERO supports two control modes — `relative` (default) and `absolute`. Different VLA checkpoints are trained with different action parameterizations, so make sure the mode matches your policy:
|
||||
### Control Mode
|
||||
|
||||
```bash
|
||||
--env.control_mode=relative # or "absolute"
|
||||
```
|
||||
LIBERO now supports two control modes: relative and absolute. This matters because different VLA checkpoints are trained with different mode of action to output hence control parameterizations.
|
||||
You can switch them with: `env.control_mode = "relative"` and `env.control_mode = "absolute"`
|
||||
|
||||
### Policy inputs and outputs
|
||||
|
||||
**Observations:**
|
||||
When using LIBERO through LeRobot, policies interact with the environment via **observations** and **actions**:
|
||||
|
||||
- `observation.state` — 8-dim proprioceptive features (eef position, axis-angle orientation, gripper qpos)
|
||||
- `observation.images.image` — main camera view (`agentview_image`), HWC uint8
|
||||
- `observation.images.image2` — wrist camera view (`robot0_eye_in_hand_image`), HWC uint8
|
||||
- **Observations**
|
||||
- `observation.state` – proprioceptive features (agent state).
|
||||
- `observation.images.image` – main camera view (`agentview_image`).
|
||||
- `observation.images.image2` – wrist camera view (`robot0_eye_in_hand_image`).
|
||||
|
||||
<Tip warning={true}>
|
||||
LeRobot enforces the `.images.*` prefix for visual features. Ensure your
|
||||
policy config `input_features` use the same naming keys, and that your dataset
|
||||
metadata keys follow this convention. If your data contains different keys,
|
||||
you must rename the observations to match what the policy expects, since
|
||||
naming keys are encoded inside the normalization statistics layer.
|
||||
</Tip>
|
||||
⚠️ **Note:** LeRobot enforces the `.images.*` prefix for any multi-modal visual features. Always ensure that your policy config `input_features` use the same naming keys, and that your dataset metadata keys follow this convention during evaluation.
|
||||
If your data contains different keys, you must rename the observations to match what the policy expects, since naming keys are encoded inside the normalization statistics layer.
|
||||
This will be fixed with the upcoming Pipeline PR.
|
||||
|
||||
**Actions:**
|
||||
- **Actions**
|
||||
- Continuous control values in a `Box(-1, 1, shape=(7,))` space.
|
||||
|
||||
- Continuous control in `Box(-1, 1, shape=(7,))` — 6D end-effector delta + 1D gripper
|
||||
We also provide a notebook for quick testing:
|
||||
Training with LIBERO
|
||||
|
||||
### Recommended evaluation episodes
|
||||
## Training with LIBERO
|
||||
|
||||
For reproducible benchmarking, use **10 episodes per task** across all four standard suites (Spatial, Object, Goal, Long). This gives 400 total episodes and matches the protocol used for published results.
|
||||
When training on LIBERO tasks, make sure your dataset parquet and metadata keys follow the LeRobot convention.
|
||||
|
||||
## Training
|
||||
The environment expects:
|
||||
|
||||
### Dataset
|
||||
- `observation.state` → 8-dim agent state
|
||||
- `observation.images.image` → main camera (`agentview_image`)
|
||||
- `observation.images.image2` → wrist camera (`robot0_eye_in_hand_image`)
|
||||
|
||||
We provide a preprocessed LIBERO dataset fully compatible with LeRobot:
|
||||
⚠️ Cleaning the dataset upfront is **cleaner and more efficient** than remapping keys inside the code.
|
||||
To avoid potential mismatches and key errors, we provide a **preprocessed LIBERO dataset** that is fully compatible with the current LeRobot codebase and requires no additional manipulation:
|
||||
👉 [HuggingFaceVLA/libero](https://huggingface.co/datasets/HuggingFaceVLA/libero)
|
||||
|
||||
- [HuggingFaceVLA/libero](https://huggingface.co/datasets/HuggingFaceVLA/libero)
|
||||
For reference, here is the **original dataset** published by Physical Intelligence:
|
||||
👉 [physical-intelligence/libero](https://huggingface.co/datasets/physical-intelligence/libero)
|
||||
|
||||
For reference, the original dataset published by Physical Intelligence:
|
||||
|
||||
- [physical-intelligence/libero](https://huggingface.co/datasets/physical-intelligence/libero)
|
||||
---
|
||||
|
||||
### Example training command
|
||||
|
||||
@@ -143,39 +120,52 @@ lerobot-train \
|
||||
--batch_size=4 \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval_freq=1000
|
||||
--eval_freq=1000 \
|
||||
```
|
||||
|
||||
## Reproducing published results
|
||||
---
|
||||
|
||||
We reproduce the results of Pi0.5 on the LIBERO benchmark. We take the Physical Intelligence LIBERO base model (`pi05_libero`) and finetune for an additional 6k steps in bfloat16, with batch size of 256 on 8 H100 GPUs using the [HuggingFace LIBERO dataset](https://huggingface.co/datasets/HuggingFaceVLA/libero).
|
||||
### Note on rendering
|
||||
|
||||
The finetuned model: [lerobot/pi05_libero_finetuned](https://huggingface.co/lerobot/pi05_libero_finetuned)
|
||||
LeRobot uses MuJoCo for simulation. You need to set the rendering backend before training or evaluation:
|
||||
|
||||
### Evaluation command
|
||||
- `export MUJOCO_GL=egl` → for headless servers (e.g. HPC, cloud)
|
||||
|
||||
## Reproducing π₀.₅ results
|
||||
|
||||
We reproduce the results of π₀.₅ on the LIBERO benchmark using the LeRobot implementation. We take the Physical Intelligence LIBERO base model (`pi05_libero`) and finetune for an additional 6k steps in bfloat16, with batch size of 256 on 8 H100 GPUs using the [HuggingFace LIBERO dataset](https://huggingface.co/datasets/HuggingFaceVLA/libero).
|
||||
|
||||
The finetuned model can be found here:
|
||||
|
||||
- **π₀.₅ LIBERO**: [lerobot/pi05_libero_finetuned](https://huggingface.co/lerobot/pi05_libero_finetuned)
|
||||
|
||||
We then evaluate the finetuned model using the LeRobot LIBERO implementation, by running the following command:
|
||||
|
||||
```bash
|
||||
lerobot-eval \
|
||||
--output_dir=./eval_logs/ \
|
||||
--output_dir=/logs/ \
|
||||
--env.type=libero \
|
||||
--env.task=libero_spatial,libero_object,libero_goal,libero_10 \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=10 \
|
||||
--policy.path=pi05_libero_finetuned \
|
||||
--policy.n_action_steps=10 \
|
||||
--output_dir=./eval_logs/ \
|
||||
--env.max_parallel_tasks=1
|
||||
```
|
||||
|
||||
We set `n_action_steps=10`, matching the original OpenPI implementation.
|
||||
**Note:** We set `n_action_steps=10`, similar to the original OpenPI implementation.
|
||||
|
||||
### Results
|
||||
|
||||
| Model | LIBERO Spatial | LIBERO Object | LIBERO Goal | LIBERO 10 | Average |
|
||||
| ------------------- | -------------- | ------------- | ----------- | --------- | -------- |
|
||||
| **Pi0.5 (LeRobot)** | 97.0 | 99.0 | 98.0 | 96.0 | **97.5** |
|
||||
We obtain the following results on the LIBERO benchmark:
|
||||
|
||||
These results are consistent with the [original results](https://github.com/Physical-Intelligence/openpi/tree/main/examples/libero#results) reported by Physical Intelligence:
|
||||
| Model | LIBERO Spatial | LIBERO Object | LIBERO Goal | LIBERO 10 | Average |
|
||||
| -------- | -------------- | ------------- | ----------- | --------- | -------- |
|
||||
| **π₀.₅** | 97.0 | 99.0 | 98.0 | 96.0 | **97.5** |
|
||||
|
||||
| Model | LIBERO Spatial | LIBERO Object | LIBERO Goal | LIBERO 10 | Average |
|
||||
| ------------------ | -------------- | ------------- | ----------- | --------- | --------- |
|
||||
| **Pi0.5 (OpenPI)** | 98.8 | 98.2 | 98.0 | 92.4 | **96.85** |
|
||||
These results are consistent with the original [results](https://github.com/Physical-Intelligence/openpi/tree/main/examples/libero#results) reported by Physical Intelligence:
|
||||
|
||||
| Model | LIBERO Spatial | LIBERO Object | LIBERO Goal | LIBERO 10 | Average |
|
||||
| -------- | -------------- | ------------- | ----------- | --------- | --------- |
|
||||
| **π₀.₅** | 98.8 | 98.2 | 98.0 | 92.4 | **96.85** |
|
||||
|
||||
+47
-97
@@ -1,111 +1,32 @@
|
||||
# Meta-World
|
||||
|
||||
Meta-World is an open-source simulation benchmark for **multi-task and meta reinforcement learning** in continuous-control robotic manipulation. It bundles 50 diverse manipulation tasks using everyday objects and a common tabletop Sawyer arm, providing a standardized playground to test whether algorithms can learn many different tasks and generalize quickly to new ones.
|
||||
Meta-World is a well-designed, open-source simulation benchmark for multi-task and meta reinforcement learning in continuous-control robotic manipulation. It gives researchers a shared, realistic playground to test whether algorithms can _learn many different tasks_ and _generalize quickly to new ones_ — two central challenges for real-world robotics.
|
||||
|
||||
- Paper: [Meta-World: A Benchmark and Evaluation for Multi-Task and Meta Reinforcement Learning](https://arxiv.org/abs/1910.10897)
|
||||
- GitHub: [Farama-Foundation/Metaworld](https://github.com/Farama-Foundation/Metaworld)
|
||||
- Project website: [metaworld.farama.org](https://metaworld.farama.org)
|
||||
- 📄 [MetaWorld paper](https://arxiv.org/pdf/1910.10897)
|
||||
- 💻 [Original MetaWorld repo](https://github.com/Farama-Foundation/Metaworld)
|
||||
|
||||

|
||||
|
||||
## Available tasks
|
||||
## Why Meta-World matters
|
||||
|
||||
Meta-World provides 50 tasks organized into difficulty groups. In LeRobot, you can evaluate on individual tasks, difficulty groups, or the full MT50 suite:
|
||||
- **Diverse, realistic tasks.** Meta-World bundles a large suite of simulated manipulation tasks (50 in the MT50 suite) using everyday objects and a common tabletop Sawyer arm. This diversity exposes algorithms to a wide variety of dynamics, contacts and goal specifications while keeping a consistent control and observation structure.
|
||||
- **Focus on generalization and multi-task learning.** By evaluating across task distributions that share structure but differ in goals and objects, Meta-World reveals whether an agent truly learns transferable skills rather than overfitting to a narrow task.
|
||||
- **Standardized evaluation protocol.** It provides clear evaluation modes and difficulty splits, so different methods can be compared fairly across easy, medium, hard and very-hard regimes.
|
||||
- **Empirical insight.** Past evaluations on Meta-World show impressive progress on some fronts, but also highlight that current multi-task and meta-RL methods still struggle with large, diverse task sets. That gap points to important research directions.
|
||||
|
||||
| Group | CLI name | Tasks | Description |
|
||||
| ---------- | -------------------- | ----- | ------------------------------------------------------ |
|
||||
| Easy | `easy` | 28 | Tasks with simple dynamics and single-step goals |
|
||||
| Medium | `medium` | 11 | Tasks requiring multi-step reasoning |
|
||||
| Hard | `hard` | 6 | Tasks with complex contacts and precise manipulation |
|
||||
| Very Hard | `very_hard` | 5 | The most challenging tasks in the suite |
|
||||
| MT50 (all) | Comma-separated list | 50 | All 50 tasks — the most challenging multi-task setting |
|
||||
## What it enables in LeRobot
|
||||
|
||||
You can also pass individual task names directly (e.g., `assembly-v3`, `dial-turn-v3`).
|
||||
In LeRobot, you can evaluate any policy or vision-language-action (VLA) model on Meta-World tasks and get a clear success-rate measure. The integration is designed to be straightforward:
|
||||
|
||||
We provide a LeRobot-ready dataset for Meta-World MT50 on the HF Hub: [lerobot/metaworld_mt50](https://huggingface.co/datasets/lerobot/metaworld_mt50). This dataset is formatted for the MT50 evaluation that uses all 50 tasks with fixed object/goal positions and one-hot task vectors for consistency.
|
||||
- We provide a LeRobot-ready dataset for Meta-World (MT50) on the HF Hub: `https://huggingface.co/datasets/lerobot/metaworld_mt50`.
|
||||
- This dataset is formatted for the MT50 evaluation that uses all 50 tasks (the most challenging multi-task setting).
|
||||
- MT50 gives the policy a one-hot task vector and uses fixed object/goal positions for consistency.
|
||||
|
||||
## Installation
|
||||
- Task descriptions and the exact keys required for evaluation are available in the repo/dataset — use these to ensure your policy outputs the right success signals.
|
||||
|
||||
After following the LeRobot installation instructions:
|
||||
## Quick start, train a SmolVLA policy on Meta-World
|
||||
|
||||
```bash
|
||||
pip install -e ".[metaworld]"
|
||||
```
|
||||
|
||||
<Tip warning={true}>
|
||||
If you encounter an `AssertionError: ['human', 'rgb_array', 'depth_array']` when running Meta-World environments, this is a mismatch between Meta-World and your Gymnasium version. Fix it with:
|
||||
|
||||
```bash
|
||||
pip install "gymnasium==1.1.0"
|
||||
```
|
||||
|
||||
</Tip>
|
||||
|
||||
## Evaluation
|
||||
|
||||
### Default evaluation (recommended)
|
||||
|
||||
Evaluate on the medium difficulty split (a good balance of coverage and compute):
|
||||
|
||||
```bash
|
||||
lerobot-eval \
|
||||
--policy.path="your-policy-id" \
|
||||
--env.type=metaworld \
|
||||
--env.task=medium \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=10
|
||||
```
|
||||
|
||||
### Single-task evaluation
|
||||
|
||||
Evaluate on a specific task:
|
||||
|
||||
```bash
|
||||
lerobot-eval \
|
||||
--policy.path="your-policy-id" \
|
||||
--env.type=metaworld \
|
||||
--env.task=assembly-v3 \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=10
|
||||
```
|
||||
|
||||
### Multi-task evaluation
|
||||
|
||||
Evaluate across multiple tasks or difficulty groups:
|
||||
|
||||
```bash
|
||||
lerobot-eval \
|
||||
--policy.path="your-policy-id" \
|
||||
--env.type=metaworld \
|
||||
--env.task=assembly-v3,dial-turn-v3,handle-press-side-v3 \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=10
|
||||
```
|
||||
|
||||
- `--env.task` accepts explicit task lists (comma-separated) or difficulty groups (e.g., `easy`, `medium`, `hard`, `very_hard`).
|
||||
- `--eval.batch_size` controls how many environments run in parallel.
|
||||
- `--eval.n_episodes` sets how many episodes to run per task.
|
||||
|
||||
### Policy inputs and outputs
|
||||
|
||||
**Observations:**
|
||||
|
||||
- `observation.image` — single camera view (`corner2`), 480x480 HWC uint8
|
||||
- `observation.state` — 4-dim proprioceptive state (end-effector position + gripper)
|
||||
|
||||
**Actions:**
|
||||
|
||||
- Continuous control in `Box(-1, 1, shape=(4,))` — 3D end-effector delta + 1D gripper
|
||||
|
||||
### Recommended evaluation episodes
|
||||
|
||||
For reproducible benchmarking, use **10 episodes per task**. For the full MT50 suite this gives 500 total episodes. If you care about generalization, run on the full MT50 — it is intentionally challenging and reveals strengths/weaknesses better than a few narrow tasks.
|
||||
|
||||
## Training
|
||||
|
||||
### Example training command
|
||||
|
||||
Train a SmolVLA policy on a subset of Meta-World tasks:
|
||||
Example command to train a SmolVLA policy on a subset of tasks:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
@@ -123,8 +44,37 @@ lerobot-train \
|
||||
--eval_freq=1000
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
- `--env.task` accepts explicit task lists (comma separated) or difficulty groups (e.g., `env.task="hard"`).
|
||||
- Adjust `batch_size`, `steps`, and `eval_freq` to match your compute budget.
|
||||
- **Gymnasium Assertion Error**: if you encounter an error like
|
||||
`AssertionError: ['human', 'rgb_array', 'depth_array']` when running MetaWorld environments, this comes from a mismatch between MetaWorld and your Gymnasium version.
|
||||
We recommend using:
|
||||
|
||||
```bash
|
||||
pip install "gymnasium==1.1.0"
|
||||
```
|
||||
|
||||
to ensure proper compatibility.
|
||||
|
||||
## Quick start — evaluate a trained policy
|
||||
|
||||
To evaluate a trained policy on the Meta-World medium difficulty split:
|
||||
|
||||
```bash
|
||||
lerobot-eval \
|
||||
--policy.path="your-policy-id" \
|
||||
--env.type=metaworld \
|
||||
--env.task=medium \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=2
|
||||
```
|
||||
|
||||
This will run episodes and return per-task success rates using the standard Meta-World evaluation keys.
|
||||
|
||||
## Practical tips
|
||||
|
||||
- Use the one-hot task conditioning for multi-task training (MT10/MT50 conventions) so policies have explicit task context.
|
||||
- If you care about generalization, run on the full MT50 suite — it’s intentionally challenging and reveals strengths/weaknesses better than a few narrow tasks.
|
||||
- Use the one-hot task conditioning for multi-task training (MT10 / MT50 conventions) so policies have explicit task context.
|
||||
- Inspect the dataset task descriptions and the `info["is_success"]` keys when writing post-processing or logging so your success metrics line up with the benchmark.
|
||||
- Adjust `batch_size`, `steps`, and `eval_freq` to match your compute budget.
|
||||
|
||||
@@ -1,388 +0,0 @@
|
||||
# Multitask DiT Policy
|
||||
|
||||
Multitask Diffusion Transformer (DiT) Policy is an evolution of the original Diffusion Policy architecture, which leverages a large DiT with text and vision conditioning for multitask robot learning. This implementation supports both diffusion and flow matching objectives for action generation, enabling robots to perform diverse manipulation tasks conditioned on language instructions.
|
||||
|
||||
## Model Overview
|
||||
|
||||
The model uses:
|
||||
|
||||
- **CLIP Vision Encoder**: Processes RGB images from multiple camera views
|
||||
- **CLIP Text Encoder**: Encodes language task instructions (frozen weights with learnable projection)
|
||||
- **Diffusion Transformer**: Predicts action sequences conditioned on observations and language
|
||||
- **Two Objectives**: Supports both diffusion (DDPM/DDIM) and flow matching for action generation
|
||||
|
||||
This model is exciting because you can achieve extremely high dexterity, competitive with multi-billion parameter
|
||||
VLAs, with only ~450M parameters and significantly less training.
|
||||
|
||||
## Installation Requirements
|
||||
|
||||
Multitask DiT Policy has additional dependencies. Install it with:
|
||||
|
||||
```bash
|
||||
pip install lerobot[multi_task_dit]
|
||||
```
|
||||
|
||||
This will install all necessary dependencies including the HuggingFace Transformers library for CLIP models.
|
||||
|
||||
## Usage
|
||||
|
||||
To use Multitask DiT in your LeRobot configuration, specify the policy type as:
|
||||
|
||||
```python
|
||||
policy.type=multi_task_dit
|
||||
```
|
||||
|
||||
## Training
|
||||
|
||||
### Basic Training Command
|
||||
|
||||
Here's a complete training command for training Multitask DiT on your dataset:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--dataset.repo_id=YOUR_DATASET \
|
||||
--output_dir=./outputs/multitask_dit_training \
|
||||
--batch_size=32 \
|
||||
--steps=5000 \
|
||||
--save_freq=500 \
|
||||
--log_freq=100 \
|
||||
--policy.type=multi_task_dit \
|
||||
--policy.device=cuda \
|
||||
--policy.repo_id="HF_USER/multitask-dit-your-robot" \
|
||||
--wandb.enable=true
|
||||
```
|
||||
|
||||
### Recommended Hyperparameters and Dataset Details (30Hz Control Frequency)
|
||||
|
||||
For reliable performance, start with these suggested default hyperparameters:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--dataset.repo_id=YOUR_DATASET \
|
||||
--output_dir=./outputs/mutitask_dit_training \
|
||||
--batch_size=320 \
|
||||
--steps=30000 \
|
||||
--policy.type=multi_task_dit \
|
||||
--policy.device=cuda \
|
||||
--policy.horizon=32 \
|
||||
--policy.n_action_steps=24 \
|
||||
--policy.objective=diffusion \
|
||||
--policy.noise_scheduler_type=DDPM \
|
||||
--policy.num_train_timesteps=100 \
|
||||
--policy.repo_id="HF_USER/multitask-dit-your-robot" \
|
||||
--wandb.enable=true
|
||||
```
|
||||
|
||||
**Key Parameters:**
|
||||
|
||||
- **Batch Size**: 192-320 - If you have access to a GPU that can support this, you will get the best training dynamics
|
||||
- **Horizon**: 32 - number of action steps to predict, ~1.0 sec at 30Hz
|
||||
- **n_action_steps**: 24 - ~0.8 seconds at 30Hz
|
||||
- **Objective**: `diffusion` - start with diffusion and experiment with flow matching if generation quality is poor
|
||||
- **Training Steps**: >30k steps recommended for a single task
|
||||
|
||||
### Training Configuration Parameters
|
||||
|
||||
#### Objective Selection
|
||||
|
||||
Choose between diffusion and flow matching:
|
||||
|
||||
```bash
|
||||
# Diffusion objective (default)
|
||||
--policy.objective=diffusion \
|
||||
--policy.noise_scheduler_type=DDPM \ # or "DDIM"
|
||||
--policy.num_train_timesteps=100 \
|
||||
--policy.num_inference_steps=10 \ # For faster inference
|
||||
--policy.beta_schedule=squaredcos_cap_v2 \ # Noise schedule type
|
||||
--policy.prediction_type=epsilon \ # "epsilon" (predict noise) or "sample" (predict clean)
|
||||
--policy.clip_sample=true \ # Clip samples during denoising
|
||||
--policy.clip_sample_range=1.0 # Clipping range [-x, x]
|
||||
|
||||
# Flow matching objective
|
||||
--policy.objective=flow_matching \
|
||||
--policy.timestep_sampling_strategy=beta \ # or "uniform" | the beta sampling strategy performance appears much better in practice
|
||||
--policy.num_integration_steps=100 \
|
||||
--policy.integration_method=euler \ # or "rk4"
|
||||
--policy.sigma_min=0.0 # Minimum noise in flow interpolation path
|
||||
```
|
||||
|
||||
#### Transformer Architecture
|
||||
|
||||
Adjust model capacity based on dataset size:
|
||||
|
||||
```bash
|
||||
# Small datasets (< 100 examples)
|
||||
--policy.num_layers=4 \
|
||||
--policy.hidden_dim=512 \
|
||||
--policy.num_heads=8 # should ideally be hidden_dim // 64
|
||||
|
||||
# Medium datasets (100-5k examples) - default
|
||||
--policy.num_layers=6 \
|
||||
--policy.hidden_dim=512 \
|
||||
--policy.num_heads=8 # should ideally be hidden_dim // 64
|
||||
|
||||
# Large datasets (> 5k examples)
|
||||
--policy.num_layers=8 \
|
||||
--policy.hidden_dim=512 \
|
||||
--policy.num_heads=8 # should ideally be hidden_dim // 64
|
||||
```
|
||||
|
||||
**Positional Encoding Options:**
|
||||
|
||||
The model supports two positional encoding methods for action sequences:
|
||||
|
||||
```bash
|
||||
# Rotary Position Embedding (RoPE) - default, recommended
|
||||
--policy.use_rope=true \
|
||||
--policy.rope_base=10000.0 # Base frequency for RoPE
|
||||
|
||||
# Absolute positional encoding
|
||||
--policy.use_positional_encoding=true # Disables RoPE when true
|
||||
```
|
||||
|
||||
**Other Transformer Parameters:**
|
||||
|
||||
```bash
|
||||
--policy.dropout=0.1 # Dropout rate for DiT blocks (0.0-1.0)
|
||||
--policy.timestep_embed_dim=256 # Timestep embedding dimension
|
||||
```
|
||||
|
||||
#### Vision Encoder Configuration
|
||||
|
||||
```bash
|
||||
# Use different CLIP model for more expressivity at the cost of inference time
|
||||
# experiment with larger or smaller models depending on the complexity of your tasks and size of dataset
|
||||
--policy.vision_encoder_name=openai/clip-vit-large-patch14
|
||||
|
||||
# Use separate vision encoder per camera
|
||||
# This may be useful when cameras have significantly different characteristics, but
|
||||
# be wary of increased VRAM footprint.
|
||||
--policy.use_separate_rgb_encoder_per_camera=true
|
||||
|
||||
# Image preprocessing
|
||||
--policy.image_resize_shape=[XXX,YYY] \ # you may need to resize your images for inference speed ups
|
||||
--policy.image_crop_shape=[224,224] \
|
||||
--policy.image_crop_is_random=true # Random during training, center at inference
|
||||
```
|
||||
|
||||
#### Text Encoder Configuration
|
||||
|
||||
```bash
|
||||
# Use different CLIP text encoder model
|
||||
# same as vision: experiment with larger or smaller models depending on the
|
||||
# complexity of your tasks and size of dataset
|
||||
--policy.text_encoder_name=openai/clip-vit-large-patch14
|
||||
```
|
||||
|
||||
#### Learning Rate Configuration
|
||||
|
||||
The vision encoder uses a separate learning rate multiplier, where 1/10th is suggested to be the ideal staritng point:
|
||||
|
||||
```bash
|
||||
--policy.optimizer_lr=2e-5 \
|
||||
--policy.vision_encoder_lr_multiplier=0.1 # Vision encoder LR = 0.1 * optimizer_lr
|
||||
```
|
||||
|
||||
### Training Tuning Guidelines
|
||||
|
||||
#### 1. Flow Matching with Beta Sampling
|
||||
|
||||
The original diffusion implementation here is based on the work described in [TRI's LBM paper](https://arxiv.org/abs/2507.05331)
|
||||
|
||||
Additionally, we have implemented a flow-matching objective, which is described at a high-level in [Boston Dynamics blog post](https://bostondynamics.com/blog/large-behavior-models-atlas-find-new-footing/).
|
||||
|
||||
Consider testing the flow-matching objective and evaluating performance differences for your task:
|
||||
|
||||
```bash
|
||||
--policy.objective=flow_matching \
|
||||
--policy.timestep_sampling_strategy=beta \
|
||||
--policy.timestep_sampling_alpha=1.5 \
|
||||
--policy.timestep_sampling_beta=1.0 \
|
||||
--policy.timestep_sampling_s=0.999
|
||||
```
|
||||
|
||||
This hasn't been shown to be a silver bullet across every user case, but it occasionally results in smoother and more consistent actions.
|
||||
|
||||
#### 2. Number of Transformer Layers
|
||||
|
||||
Match model capacity to your dataset size:
|
||||
|
||||
- **Small datasets** (< 100 examples): Reduce to 4 layers
|
||||
- **Large datasets** (> 5k examples): Increase to 8 layers
|
||||
|
||||
#### 3. `horizon` Tuning
|
||||
|
||||
The model can be sensitive to the horizon you choose. Start with around a 1 second horizon based on your control frequency:
|
||||
|
||||
- **30 Hz frequency**: `horizon=30`
|
||||
- **10 Hz frequency**: `horizon=10`
|
||||
|
||||
Then experiment with increasing from there. The horizon determines how far into the future the model predicts actions.
|
||||
|
||||
#### 4. `n_action_steps` Sensitivity
|
||||
|
||||
The model can also be very sensitive to `n_action_steps`. Start with it being around 0.8 seconds based on your control frequency and tune from there:
|
||||
|
||||
- **Lower values**: More reactive but potentially less stable for long-horizon tasks
|
||||
- **Higher values**: Better for long-horizon execution but open-loop failures are limited in their recovery
|
||||
|
||||
### Inference Tuning
|
||||
|
||||
For faster inference, use DDIM with fewer sampling steps:
|
||||
|
||||
```bash
|
||||
--policy.noise_scheduler_type=DDIM \
|
||||
--policy.num_inference_steps=10
|
||||
```
|
||||
|
||||
### Resuming Training
|
||||
|
||||
To resume training from a checkpoint:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--config_path=./outputs/mutitask_dit_training/checkpoints/last/pretrained_model/train_config.json \
|
||||
--resume=true
|
||||
```
|
||||
|
||||
The checkpoint directory should contain `model.safetensors` and `config.json` files (saved automatically during training). When resuming, the configuration is loaded from the checkpoint, so you don't need to specify other parameters.
|
||||
|
||||
## Common Failure Modes and Debugging
|
||||
|
||||
Training these models can be finicky. Here are common failure modes and debugging approaches:
|
||||
|
||||
### Idling / No Motion
|
||||
|
||||
The model may "collapse" during inference, resulting in static or no motion. This can occur when:
|
||||
|
||||
1. **Insufficient training data**: If you only have 20-50 examples, try to roughly double your dataset size. Once you have above 300 examples, if you're still seeing this, the task may be too complex.
|
||||
|
||||
2. **Multiple similar tasks**: When your dataset contains multiple similar tasks (e.g., picking up 2 different objects), the model may rely too heavily on language conditioning which might not be rich enough.
|
||||
|
||||
**Debugging tips:**
|
||||
|
||||
- Increase dataset size (double until you get to over 300 examples)
|
||||
- Train for longer, up to 100k steps, even when the loss flatlines
|
||||
- Check if the model is receiving proper language instructions or increase diversity of instruction
|
||||
|
||||
### Executing the Wrong Task
|
||||
|
||||
Sometimes the robot will completely ignore your instruction and perform some other task. This generally only happens if you have trained on multiple tasks.
|
||||
|
||||
**Potential causes:**
|
||||
|
||||
- Language instruction ambiguity
|
||||
- Insufficient task-specific training data
|
||||
- Model confusion between similar tasks in the multitask dataset
|
||||
|
||||
**Debugging tips:**
|
||||
|
||||
- Verify language instruction specificity, especially if descriptions are similar between multiple tasks
|
||||
- Check task distribution in your training dataset and add weighting to the failing/ignored task
|
||||
- Consider task-specific fine-tuning
|
||||
|
||||
### Training Instability
|
||||
|
||||
If training loss is unstable or diverging:
|
||||
|
||||
- Try adjusting learning rate between `1e-5` and `3e-4`
|
||||
- Increase batch size if possible
|
||||
- Check that your dataset normalization is correct
|
||||
- Verify image preprocessing is working correctly
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### GPU Requirements
|
||||
|
||||
- **Inference**: At least an RTX 5070 Ti (or equivalent GPU) is recommended for reasonable speed performance
|
||||
- **Training**: A GPU with enough VRAM to load batch sizes of >64 is ideal, which will vary depending on the number of image observations, etc
|
||||
|
||||
### Batch Size Recommendations
|
||||
|
||||
- **Minimum**: 64 (less than this may result in unstable training)
|
||||
- **Recommended**: 256-320 (best performance, requires larger GPU)
|
||||
|
||||
## Example: Training on Custom Dataset
|
||||
|
||||
Here's a complete example training on a custom dataset:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--dataset.repo_id=YOUR_DATASET \
|
||||
--output_dir=./outputs/mutitask_dit_training \
|
||||
--batch_size=320 \
|
||||
--steps=30000 \
|
||||
--save_freq=1000 \
|
||||
--log_freq=100 \
|
||||
--eval_freq=1000 \
|
||||
--policy.type=multi_task_dit \
|
||||
--policy.device=cuda \
|
||||
--policy.horizon=32 \
|
||||
--policy.n_action_steps=24 \
|
||||
--policy.objective=diffusion \
|
||||
--policy.noise_scheduler_type=DDPM \
|
||||
--policy.num_layers=6 \
|
||||
--policy.hidden_dim=512 \
|
||||
--policy.vision_encoder_name=openai/clip-vit-base-patch16 \
|
||||
--policy.image_resize_shape=[320,240] \
|
||||
--policy.image_crop_shape=[224,224] \
|
||||
--policy.repo_id="HF_USER/multitask-dit-your-robot" \
|
||||
--wandb.enable=true \
|
||||
--wandb.project=multitask_dit
|
||||
```
|
||||
|
||||
## Libero Results
|
||||
|
||||
```
|
||||
python -m lerobot.scripts.lerobot_train \
|
||||
--dataset.repo_id=HuggingFaceVLA/libero \
|
||||
--policy.type=multi_task_dit \
|
||||
--policy.push_to_hub=false \
|
||||
--output_dir="./outputs/multitask_dit_libero" \
|
||||
--job_name="multitask-dit-libero" \
|
||||
--wandb.enable=true \
|
||||
--wandb.project=multitask_dit_libero \
|
||||
--dataset.image_transforms.enable=true \
|
||||
--dataset.image_transforms.max_num_transforms=4 \
|
||||
--dataset.image_transforms.tfs='{"brightness":{"type":"ColorJitter","kwargs":{"brightness":[0.75,1.25]}},"contrast":{"type":"ColorJitter","kwargs":{"contrast":[0.6,1.4]}},"saturation":{"type":"ColorJitter","kwargs":{"saturation":[0.8,1.2]}},"hue":{"type":"ColorJitter","kwargs":{"hue":[-0.05,0.05]}},"sharpness":{"type":"SharpnessJitter","kwargs":{"sharpness":[0.6,1.4]}},"rotation":{"type":"RandomRotation","kwargs":{"degrees":[-5,5]}},"translation":{"type":"RandomAffine","kwargs":{"degrees":0,"translate":[0.1,0.1]}}}' \
|
||||
--dataset.video_backend=torchcodec \
|
||||
--policy.use_amp=true \
|
||||
--policy.horizon=48 \
|
||||
--policy.n_obs_steps=2 \
|
||||
--policy.use_rope=true \
|
||||
--policy.use_positional_encoding=false \
|
||||
--policy.hidden_dim=768 \
|
||||
--policy.num_layers=8 \
|
||||
--policy.num_heads=12 \
|
||||
--policy.dropout=0.1 \
|
||||
--policy.timestep_embed_dim=256 \
|
||||
--policy.objective=diffusion \
|
||||
--policy.optimizer_lr=3e-4 \
|
||||
--policy.optimizer_weight_decay=0 \
|
||||
--policy.scheduler_warmup_steps=0 \
|
||||
--policy.vision_encoder_name=openai/clip-vit-base-patch16 \
|
||||
--policy.image_resize_shape=[256,256] \
|
||||
--policy.image_crop_is_random=true \
|
||||
--policy.text_encoder_name=openai/clip-vit-base-patch16 \
|
||||
--policy.vision_encoder_lr_multiplier=0.1 \
|
||||
--policy.device=cuda \
|
||||
--num_workers=8 \
|
||||
--save_freq=4000 \
|
||||
--log_freq=100 \
|
||||
--steps=100000 \
|
||||
--batch_size=320
|
||||
```
|
||||
|
||||
Results:
|
||||
|
||||
| LIBERO Spatial | LIBERO Object | LIBERO Goal | LIBERO 10 | Average |
|
||||
| -------------- | ------------- | ----------- | --------- | ------- |
|
||||
| 87.0 | 98.2 | 93.8 | 83.2 | 90.6 |
|
||||
|
||||
## References
|
||||
|
||||
For more details on the technical implementation and architecture, see:
|
||||
|
||||
- [A Careful Examination of Large Behavior Models for Multitask Dexterous Manipulation](https://arxiv.org/abs/2507.05331)
|
||||
- [Large Behavior Models and Atlas Find New Footing](https://bostondynamics.com/blog/large-behavior-models-atlas-find-new-footing/)
|
||||
- [Dissecting and Open-Sourcing Multitask Diffusion Transformer Policy](https://brysonkjones.substack.com/p/dissecting-and-open-sourcing-multitask-diffusion-transformer-policy)
|
||||
@@ -1,197 +0,0 @@
|
||||
## Order and Assemble the parts
|
||||
|
||||
First, assemble the OMX hardware following the official assembly guide.
|
||||
|
||||
OMX Assembly Guide: https://ai.robotis.com/omx/assembly_guide_omx.html
|
||||
|
||||
OMX robots are shipped preconfigured from the factory. Motor IDs, communication parameters, and joint offsets are already set, so no additional motor setup or calibration is required before using LeRobot.
|
||||
|
||||
## Install LeRobot 🤗
|
||||
|
||||
To install LeRobot, follow our [Installation Guide](./installation)
|
||||
|
||||
In addition to these instructions, you need to install the Dynamixel SDK:
|
||||
|
||||
```bash
|
||||
pip install -e ".[dynamixel]"
|
||||
```
|
||||
|
||||
## Connect the robot
|
||||
|
||||
To find the port for each bus servo adapter, run this script:
|
||||
|
||||
```bash
|
||||
lerobot-find-port
|
||||
```
|
||||
|
||||
This command runs and when prompted, disconnect the USB cable from either the leader or follower arm and press Enter. The output will show 'The port of this MotorsBus is [port]'. This identifies the port for the disconnected arm. Repeat for the other arm to identify both ports.
|
||||
|
||||
<hfoptions id="find_port">
|
||||
<hfoption id="Mac">
|
||||
|
||||
Example output on macOS:
|
||||
|
||||
```
|
||||
Finding all available ports for the MotorBus.
|
||||
['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
|
||||
Remove the USB cable from your MotorsBus and press Enter when done.
|
||||
|
||||
[...Disconnect corresponding leader or follower arm and press Enter...]
|
||||
|
||||
The port of this MotorsBus is /dev/tty.usbmodem575E0032081
|
||||
Reconnect the USB cable.
|
||||
```
|
||||
|
||||
Where the found port is: `/dev/tty.usbmodem575E0032081` corresponding to your leader or follower arm.
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="Linux">
|
||||
|
||||
On Linux, we strongly recommend using udev rules to assign persistent and human-readable device names to the OMX leader and follower arms. This avoids issues where device names such as ttyACM0 and ttyACM1 change when the robot is unplugged, replugged, or when the system is rebooted.
|
||||
|
||||
#### 1. Find your device serial numbers
|
||||
|
||||
You should have obtained the port numbers like ../../ttyACM? for the leader and follower using `lerobot-find-port`. You can match those results with the serial numbers using the `ls -l /dev/serial/by-id/` command.
|
||||
To create udev rules, you need the unique serial number for each OMX device. The easiest way is to list devices under:
|
||||
|
||||
```bash
|
||||
ls -l /dev/serial/by-id/
|
||||
```
|
||||
|
||||
You will see output similar to:
|
||||
|
||||
```bash
|
||||
usb-ROBOTIS_OpenRB-150_228BDD7B503059384C2E3120FF0A2B19-if00 -> ../../ttyACM0
|
||||
usb-ROBOTIS_OpenRB-150_67E1ED68503059384C2E3120FF092234-if00 -> ../../ttyACM1
|
||||
```
|
||||
|
||||
In each line, the serial number is the long string after `usb-ROBOTIS_OpenRB-150_` and before `-if00`.
|
||||
|
||||
Follower serial: `228BDD7B503059384C2E3120FF0A2B19`
|
||||
|
||||
Leader serial: `67E1ED68503059384C2E3120FF092234`
|
||||
|
||||
#### 2. Create the udev rule
|
||||
|
||||
Create a new udev rule file:
|
||||
|
||||
```bash
|
||||
sudo nano /etc/udev/rules.d/99-omx.rules
|
||||
```
|
||||
|
||||
Paste the following lines, replacing the serial numbers with the values you found above:
|
||||
|
||||
```bash
|
||||
SUBSYSTEM=="tty", ATTRS{idVendor}=="0403", ATTRS{serial}=="228BDD7B503059384C2E3120FF0A2B19", SYMLINK+="omx_follower"
|
||||
SUBSYSTEM=="tty", ATTRS{idVendor}=="0403", ATTRS{serial}=="67E1ED68503059384C2E3120FF092234", SYMLINK+="omx_leader"
|
||||
```
|
||||
|
||||
Save the file and reload udev rules:
|
||||
|
||||
```bash
|
||||
sudo udevadm control --reload-rules
|
||||
sudo udevadm trigger
|
||||
```
|
||||
|
||||
Now unplug and replug both devices once.
|
||||
|
||||
#### 3. Verify the symlinks
|
||||
|
||||
Check that the persistent device names exist:
|
||||
|
||||
```bash
|
||||
ls -l /dev/omx_follower /dev/omx_leader
|
||||
```
|
||||
|
||||
You should see them pointing to ttyACM\* devices:
|
||||
|
||||
```bash
|
||||
/dev/omx_follower -> ttyACM*
|
||||
/dev/omx_leader -> ttyACM*
|
||||
```
|
||||
|
||||
These names remain stable across reboots and reconnections.
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
## Teleoperate
|
||||
|
||||
After identifying the correct ports, you can directly teleoperate the follower arm using the leader arm.
|
||||
|
||||
<hfoptions id="teleoperate">
|
||||
<hfoption id="Mac">
|
||||
|
||||
### Teleoperate without camera
|
||||
|
||||
```bash
|
||||
lerobot-teleoperate \
|
||||
--robot.type=omx_follower \
|
||||
--robot.port=<your_follower_port> \
|
||||
--robot.id=omx_follower_arm \
|
||||
--teleop.type=omx_leader \
|
||||
--teleop.port=<your_leader_port> \
|
||||
--teleop.id=omx_leader_arm
|
||||
```
|
||||
|
||||
During teleoperation, motions of the leader arm are mirrored in real time by the follower arm. OMX is already preconfigured, teleoperation can begin immediately without any calibration steps.
|
||||
|
||||
### Teleoperate with camera
|
||||
|
||||
You can also enable camera input during teleoperation by providing a camera configuration for the follower arm.
|
||||
|
||||
```bash
|
||||
lerobot-teleoperate \
|
||||
--robot.type=omx_follower \
|
||||
--robot.port=<your_follower_port> \
|
||||
--robot.id=omx_follower_arm \
|
||||
--robot.cameras="{front: {type: opencv, index_or_path: '/dev/video0', width: 640, height: 480, fps: 30}}" \
|
||||
--teleop.type=omx_leader \
|
||||
--teleop.port=<your_leader_port> \
|
||||
--teleop.id=omx_leader_arm \
|
||||
--display_data=true
|
||||
```
|
||||
|
||||
When the camera is enabled, the camera stream is displayed in real time and synchronized with the robot state. This setup is useful for visual monitoring and can be reused later for demonstration recording and imitation learning.
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="Linux">
|
||||
|
||||
### Teleoperate without camera
|
||||
|
||||
```bash
|
||||
lerobot-teleoperate \
|
||||
--robot.type=omx_follower \
|
||||
--robot.port=/dev/omx_follower \
|
||||
--robot.id=omx_follower_arm \
|
||||
--teleop.type=omx_leader \
|
||||
--teleop.port=/dev/omx_leader \
|
||||
--teleop.id=omx_leader_arm
|
||||
```
|
||||
|
||||
During teleoperation, motions of the leader arm are mirrored in real time by the follower arm. OMX is already preconfigured, teleoperation can begin immediately without any calibration steps.
|
||||
|
||||
### Teleoperate with camera
|
||||
|
||||
You can also enable camera input during teleoperation by providing a camera configuration for the follower arm.
|
||||
|
||||
```bash
|
||||
lerobot-teleoperate \
|
||||
--robot.type=omx_follower \
|
||||
--robot.port=/dev/omx_follower \
|
||||
--robot.id=omx_follower_arm \
|
||||
--robot.cameras="{front: {type: opencv, index_or_path: '/dev/video0', width: 640, height: 480, fps: 30}}" \
|
||||
--teleop.type=omx_leader \
|
||||
--teleop.port=/dev/omx_leader \
|
||||
--teleop.id=omx_leader_arm \
|
||||
--display_data=true
|
||||
```
|
||||
|
||||
When the camera is enabled, the camera stream is displayed in real time and synchronized with the robot state. This setup is useful for visual monitoring and can be reused later for demonstration recording and imitation learning.
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
Congrats 🎉, your robot is all set to learn a task on its own.
|
||||
|
||||
> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/robotis).
|
||||
@@ -1,276 +0,0 @@
|
||||
# OpenArm
|
||||
|
||||
[OpenArm](https://openarm.dev) is an open-source 7DOF humanoid arm designed for physical AI research and deployment.
|
||||
|
||||
To get your OpenArm, assembled or DIY, and join the global community, browse verified and certified manufacturers worldwide at [openarm.dev](https://openarm.dev).
|
||||
|
||||
## What's Unique?
|
||||
|
||||
- **Human-Scale Design**: OpenArm is designed with human-like proportions, scaled for a person around 160-165cm tall. This provides an optimal balance between practical reach and manageable inertia for safe, responsive operation.
|
||||
|
||||
- **Safety-First Architecture**: Built with QDD backdrivable motors and high compliance, OpenArm prioritizes safe human-robot interaction while maintaining practical payload capabilities (6.0kg peak / 4.1kg nominal) for real-world tasks.
|
||||
|
||||
- **Built for Durability**: Critical structural components use aluminum and stainless steel construction, ensuring robust performance for repetitive data collection and continuous research use.
|
||||
|
||||
- **Fully Accessible & Buildable**: Every component, from CNC parts and 3D-printed casings to electrical wiring is designed to be purchasable and buildable by individual researchers and labs, with complete fabrication data provided.
|
||||
|
||||
- **Practical & Affordable**: At $6,500 USD for a complete bimanual system, OpenArm delivers research-grade capabilities at a fraction of traditional humanoid robot costs.
|
||||
|
||||
## Platform Requirements
|
||||
|
||||
<Tip warning={true}>
|
||||
**Linux Only**: OpenArm currently only works on Linux. The CAN bus USB adapter
|
||||
does not have macOS drivers and has not been tested on Windows.
|
||||
</Tip>
|
||||
|
||||
## Safety Guide
|
||||
|
||||
Before operating OpenArm, please read the [official safety guide](https://docs.openarm.dev/getting-started/safety-guide). Key points:
|
||||
|
||||
- **Secure installation**: Fasten the arm to a flat, stable surface with screws or clamps
|
||||
- **Safe distance**: Keep body parts and objects outside the range of motion during operation
|
||||
- **Protective equipment**: Always wear safety goggles; use additional PPE as needed
|
||||
- **Payload limits**: Do not exceed specified payload limits (6.0kg peak / 4.1kg nominal per arm)
|
||||
- **Emergency stop**: Know the location and operation of the emergency stop device
|
||||
- **Regular inspection**: Check for loose screws, damaged mechanical limits, unusual noises, and wiring damage
|
||||
|
||||
## Hardware Setup
|
||||
|
||||
Follow the official [OpenArm hardware documentation](https://docs.openarm.dev) for:
|
||||
|
||||
- Bill of materials and sourcing
|
||||
- 3D printing instructions
|
||||
- Mechanical assembly
|
||||
- Electrical wiring
|
||||
|
||||
The hardware repositories are available at [github.com/enactic/openarm](https://github.com/enactic/openarm).
|
||||
|
||||
## CAN Bus Setup
|
||||
|
||||
OpenArm uses CAN bus communication with Damiao motors. Once you have the CAN bus USB adapter plugged into your Linux PC, follow the [Damiao Motors and CAN Bus guide](./damiao) to configure the interface.
|
||||
|
||||
Quick setup:
|
||||
|
||||
```bash
|
||||
# Setup CAN interfaces
|
||||
lerobot-setup-can --mode=setup --interfaces=can0,can1
|
||||
|
||||
# Test motor communication
|
||||
lerobot-setup-can --mode=test --interfaces=can0,can1
|
||||
```
|
||||
|
||||
## Install LeRobot 🤗
|
||||
|
||||
Follow our [Installation Guide](./installation), then install the Damiao motor support:
|
||||
|
||||
```bash
|
||||
pip install -e ".[damiao]"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Follower Arm (Robot)
|
||||
|
||||
<hfoptions id="follower">
|
||||
<hfoption id="Command">
|
||||
|
||||
```bash
|
||||
lerobot-calibrate \
|
||||
--robot.type=openarm_follower \
|
||||
--robot.port=can0 \
|
||||
--robot.side=right \
|
||||
--robot.id=my_openarm_follower
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="API example">
|
||||
|
||||
```python
|
||||
from lerobot.robots.openarm_follower import OpenArmFollower, OpenArmFollowerConfig
|
||||
|
||||
config = OpenArmFollowerConfig(
|
||||
port="can0",
|
||||
side="right", # or "left" for left arm
|
||||
id="my_openarm_follower",
|
||||
)
|
||||
|
||||
follower = OpenArmFollower(config)
|
||||
follower.connect()
|
||||
|
||||
# Read current state
|
||||
obs = follower.get_observation()
|
||||
print(obs)
|
||||
|
||||
# Send action (position in degrees)
|
||||
action = {
|
||||
"joint_1.pos": 0.0,
|
||||
"joint_2.pos": 0.0,
|
||||
"joint_3.pos": 0.0,
|
||||
"joint_4.pos": 45.0,
|
||||
"joint_5.pos": 0.0,
|
||||
"joint_6.pos": 0.0,
|
||||
"joint_7.pos": 0.0,
|
||||
"gripper.pos": 0.0,
|
||||
}
|
||||
follower.send_action(action)
|
||||
|
||||
follower.disconnect()
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
### Leader Arm (Teleoperator)
|
||||
|
||||
The leader arm is used for teleoperation - manually moving it to control the follower arm.
|
||||
|
||||
<hfoptions id="leader">
|
||||
<hfoption id="Command">
|
||||
|
||||
```bash
|
||||
lerobot-calibrate \
|
||||
--teleop.type=openarm_leader \
|
||||
--teleop.port=can1 \
|
||||
--teleop.id=my_openarm_leader
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="API example">
|
||||
|
||||
```python
|
||||
from lerobot.teleoperators.openarm_leader import OpenArmLeader, OpenArmLeaderConfig
|
||||
|
||||
config = OpenArmLeaderConfig(
|
||||
port="can1",
|
||||
id="my_openarm_leader",
|
||||
manual_control=True, # Disable torque for manual movement
|
||||
)
|
||||
|
||||
leader = OpenArmLeader(config)
|
||||
leader.connect()
|
||||
|
||||
# Read current position (as action to send to follower)
|
||||
action = leader.get_action()
|
||||
print(action)
|
||||
|
||||
leader.disconnect()
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
### Teleoperation
|
||||
|
||||
To teleoperate OpenArm with leader-follower control:
|
||||
|
||||
```bash
|
||||
lerobot-teleoperate \
|
||||
--robot.type=openarm_follower \
|
||||
--robot.port=can0 \
|
||||
--robot.side=right \
|
||||
--robot.id=my_follower \
|
||||
--teleop.type=openarm_leader \
|
||||
--teleop.port=can1 \
|
||||
--teleop.id=my_leader
|
||||
```
|
||||
|
||||
### Bimanual Teleoperation
|
||||
|
||||
To teleoperate a bimanual OpenArm setup with two leader and two follower arms:
|
||||
|
||||
```bash
|
||||
lerobot-teleoperate \
|
||||
--robot.type=bi_openarm_follower \
|
||||
--robot.left_arm_config.port=can0 \
|
||||
--robot.left_arm_config.side=left \
|
||||
--robot.right_arm_config.port=can1 \
|
||||
--robot.right_arm_config.side=right \
|
||||
--robot.id=my_bimanual_follower \
|
||||
--teleop.type=bi_openarm_leader \
|
||||
--teleop.left_arm_config.port=can2 \
|
||||
--teleop.right_arm_config.port=can3 \
|
||||
--teleop.id=my_bimanual_leader
|
||||
```
|
||||
|
||||
### Recording Data
|
||||
|
||||
To record a dataset during teleoperation:
|
||||
|
||||
```bash
|
||||
lerobot-record \
|
||||
--robot.type=openarm_follower \
|
||||
--robot.port=can0 \
|
||||
--robot.side=right \
|
||||
--robot.id=my_follower \
|
||||
--teleop.type=openarm_leader \
|
||||
--teleop.port=can1 \
|
||||
--teleop.id=my_leader \
|
||||
--repo-id=my_hf_username/my_openarm_dataset \
|
||||
--fps=30 \
|
||||
--num-episodes=10
|
||||
```
|
||||
|
||||
## Configuration Options
|
||||
|
||||
### Follower Configuration
|
||||
|
||||
| Parameter | Default | Description |
|
||||
| --------------------- | --------- | ---------------------------------------------------------- |
|
||||
| `port` | - | CAN interface (e.g., `can0`) |
|
||||
| `side` | `None` | Arm side: `"left"`, `"right"`, or `None` for custom limits |
|
||||
| `use_can_fd` | `True` | Enable CAN FD for higher data rates |
|
||||
| `can_bitrate` | `1000000` | Nominal bitrate (1 Mbps) |
|
||||
| `can_data_bitrate` | `5000000` | CAN FD data bitrate (5 Mbps) |
|
||||
| `max_relative_target` | `None` | Safety limit for relative target positions |
|
||||
| `position_kp` | Per-joint | Position control proportional gains |
|
||||
| `position_kd` | Per-joint | Position control derivative gains |
|
||||
|
||||
### Leader Configuration
|
||||
|
||||
| Parameter | Default | Description |
|
||||
| ------------------ | --------- | ----------------------------------- |
|
||||
| `port` | - | CAN interface (e.g., `can1`) |
|
||||
| `manual_control` | `True` | Disable torque for manual movement |
|
||||
| `use_can_fd` | `True` | Enable CAN FD for higher data rates |
|
||||
| `can_bitrate` | `1000000` | Nominal bitrate (1 Mbps) |
|
||||
| `can_data_bitrate` | `5000000` | CAN FD data bitrate (5 Mbps) |
|
||||
|
||||
## Motor Configuration
|
||||
|
||||
OpenArm uses Damiao motors with the following default configuration:
|
||||
|
||||
| Joint | Motor Type | Send ID | Recv ID |
|
||||
| --------------------------- | ---------- | ------- | ------- |
|
||||
| joint_1 (Shoulder pan) | DM8009 | 0x01 | 0x11 |
|
||||
| joint_2 (Shoulder lift) | DM8009 | 0x02 | 0x12 |
|
||||
| joint_3 (Shoulder rotation) | DM4340 | 0x03 | 0x13 |
|
||||
| joint_4 (Elbow flex) | DM4340 | 0x04 | 0x14 |
|
||||
| joint_5 (Wrist roll) | DM4310 | 0x05 | 0x15 |
|
||||
| joint_6 (Wrist pitch) | DM4310 | 0x06 | 0x16 |
|
||||
| joint_7 (Wrist rotation) | DM4310 | 0x07 | 0x17 |
|
||||
| gripper | DM4310 | 0x08 | 0x18 |
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### No Response from Motors
|
||||
|
||||
1. Check power supply connections
|
||||
2. Verify CAN wiring (CAN-H, CAN-L, GND)
|
||||
3. Run diagnostics: `lerobot-setup-can --mode=test --interfaces=can0`
|
||||
4. See the [Damiao troubleshooting guide](./damiao#troubleshooting) for more details
|
||||
|
||||
### CAN Interface Not Found
|
||||
|
||||
Ensure the CAN interface is configured:
|
||||
|
||||
```bash
|
||||
ip link show can0
|
||||
```
|
||||
|
||||
## Resources
|
||||
|
||||
- [OpenArm Website](https://openarm.dev)
|
||||
- [OpenArm Documentation](https://docs.openarm.dev)
|
||||
- [OpenArm GitHub](https://github.com/enactic/openarm)
|
||||
- [Safety Guide](https://docs.openarm.dev/getting-started/safety-guide)
|
||||
- [Damiao Motors and CAN Bus](./damiao)
|
||||
@@ -66,13 +66,12 @@ Run on of the examples scripts to teleoperate, record a dataset, replay a datase
|
||||
|
||||
All scripts assume you configured your robot (e.g., SO-100 follower) and set the correct serial port.
|
||||
|
||||
Additionally you need to **copy the URDF of the robot into the examples folder**. For the examples in this tutorial (using SO100/SO101), copy the `SO101` folder from the [SO-ARM100 repo](https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101) into the `examples/phone_to_so100/` directory, so that the URDF file path becomes `examples/phone_to_so100/SO101/so101_new_calib.urdf`.
|
||||
Additionally you need to **copy the urdf of the robot to the examples folder**. For the examples in this tutorial (Using SO100/SO101) it is highly recommended to use the urdf in the [SO-ARM100 repo](https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf)
|
||||
|
||||
- Run this example to teleoperate:
|
||||
|
||||
```bash
|
||||
cd examples/phone_to_so100
|
||||
python teleoperate.py
|
||||
python examples/phone_to_so100/teleoperate.py
|
||||
```
|
||||
|
||||
After running the example:
|
||||
@@ -85,22 +84,19 @@ Additionally you can customize mapping or safety limits by editing the processor
|
||||
- Run this example to record a dataset, which saves absolute end effector observations and actions:
|
||||
|
||||
```bash
|
||||
cd examples/phone_to_so100
|
||||
python record.py
|
||||
python examples/phone_to_so100/record.py
|
||||
```
|
||||
|
||||
- Run this example to replay recorded episodes:
|
||||
|
||||
```bash
|
||||
cd examples/phone_to_so100
|
||||
python replay.py
|
||||
python examples/phone_to_so100/replay.py
|
||||
```
|
||||
|
||||
- Run this example to evaluate a pretrained policy:
|
||||
|
||||
```bash
|
||||
cd examples/phone_to_so100
|
||||
python evaluate.py
|
||||
python examples/phone_to_so100/evaluate.py
|
||||
```
|
||||
|
||||
### Important pipeline steps and options
|
||||
|
||||
+6
-41
@@ -34,6 +34,11 @@ As described by Physical Intelligence, while AI has achieved remarkable success
|
||||
pip install -e ".[pi]"
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> For lerobot 0.4.0, if you want to install pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
|
||||
>
|
||||
> This will be solved in the next patch release
|
||||
|
||||
## Training Data and Capabilities
|
||||
|
||||
π₀ is trained on the largest robot interaction dataset to date, combining three key data sources:
|
||||
@@ -55,7 +60,7 @@ policy.type=pi0
|
||||
For training π₀, you can use the standard LeRobot training script with the appropriate configuration:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
python src/lerobot/scripts/lerobot_train.py \
|
||||
--dataset.repo_id=your_dataset \
|
||||
--policy.type=pi0 \
|
||||
--output_dir=./outputs/pi0_training \
|
||||
@@ -91,46 +96,6 @@ lerobot-train \
|
||||
|
||||
**💡 Tip**: Setting `train_expert_only=true` freezes the VLM and trains only the action expert and projections, allowing finetuning with reduced memory usage.
|
||||
|
||||
## Relative Actions
|
||||
|
||||
By default, π₀ predicts absolute actions. You can enable **relative actions** so the model predicts offsets relative to the current robot state. This can improve training stability for certain setups.
|
||||
|
||||
To use relative actions, first recompute your dataset stats in relative space via the CLI:
|
||||
|
||||
```bash
|
||||
lerobot-edit-dataset \
|
||||
--repo_id your_dataset \
|
||||
--operation.type recompute_stats \
|
||||
--operation.relative_action true \
|
||||
--operation.chunk_size 50 \
|
||||
--operation.relative_exclude_joints "['gripper']" \
|
||||
--push_to_hub true
|
||||
```
|
||||
|
||||
Or equivalently in Python:
|
||||
|
||||
```python
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.dataset_tools import recompute_stats
|
||||
|
||||
dataset = LeRobotDataset("your_dataset")
|
||||
recompute_stats(dataset, relative_action=True, chunk_size=50, relative_exclude_joints=["gripper"])
|
||||
dataset.push_to_hub()
|
||||
```
|
||||
|
||||
The `chunk_size` should match your policy's `chunk_size` (default 50 for π₀). `relative_exclude_joints` lists joint names that should remain in absolute space (e.g. gripper commands). Use `--push_to_hub true` to upload the updated stats to the Hub.
|
||||
|
||||
Then train with relative actions enabled:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--dataset.repo_id=your_dataset \
|
||||
--policy.type=pi0 \
|
||||
--policy.use_relative_actions=true \
|
||||
--policy.relative_exclude_joints='["gripper"]' \
|
||||
...
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
This model follows the **Apache 2.0 License**, consistent with the original [OpenPI repository](https://github.com/Physical-Intelligence/openpi).
|
||||
|
||||
+6
-41
@@ -36,6 +36,11 @@ This diverse training mixture creates a "curriculum" that enables generalization
|
||||
pip install -e ".[pi]"
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> For lerobot 0.4.0, if you want to install pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
|
||||
>
|
||||
> This will be solved in the next patch release
|
||||
|
||||
## Usage
|
||||
|
||||
To use π₀.₅ in your LeRobot configuration, specify the policy type as:
|
||||
@@ -51,7 +56,7 @@ policy.type=pi05
|
||||
Here's a complete training command for finetuning the base π₀.₅ model on your own dataset:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
python src/lerobot/scripts/lerobot_train.py\
|
||||
--dataset.repo_id=your_dataset \
|
||||
--policy.type=pi05 \
|
||||
--output_dir=./outputs/pi05_training \
|
||||
@@ -97,46 +102,6 @@ python src/lerobot/datasets/v30/augment_dataset_quantile_stats.py \
|
||||
|
||||
Or train pi05 with this normalization mapping: `--policy.normalization_mapping='{"ACTION": "MEAN_STD", "STATE": "MEAN_STD", "VISUAL": "IDENTITY"}'`
|
||||
|
||||
## Relative Actions
|
||||
|
||||
By default, π₀.₅ predicts absolute actions. You can enable **relative actions** so the model predicts offsets relative to the current robot state. This can improve training stability for certain setups.
|
||||
|
||||
To use relative actions, first recompute your dataset stats in relative space via the CLI:
|
||||
|
||||
```bash
|
||||
lerobot-edit-dataset \
|
||||
--repo_id your_dataset \
|
||||
--operation.type recompute_stats \
|
||||
--operation.relative_action true \
|
||||
--operation.chunk_size 50 \
|
||||
--operation.relative_exclude_joints "['gripper']" \
|
||||
--push_to_hub true
|
||||
```
|
||||
|
||||
Or equivalently in Python:
|
||||
|
||||
```python
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.dataset_tools import recompute_stats
|
||||
|
||||
dataset = LeRobotDataset("your_dataset")
|
||||
recompute_stats(dataset, relative_action=True, chunk_size=50, relative_exclude_joints=["gripper"])
|
||||
dataset.push_to_hub()
|
||||
```
|
||||
|
||||
The `chunk_size` should match your policy's `chunk_size` (default 50 for π₀.₅). `relative_exclude_joints` lists joint names that should remain in absolute space (e.g. gripper commands). Use `--push_to_hub true` to upload the updated stats to the Hub.
|
||||
|
||||
Then train with relative actions enabled:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--dataset.repo_id=your_dataset \
|
||||
--policy.type=pi05 \
|
||||
--policy.use_relative_actions=true \
|
||||
--policy.relative_exclude_joints='["gripper"]' \
|
||||
...
|
||||
```
|
||||
|
||||
## Performance Results
|
||||
|
||||
### Libero Benchmark Results
|
||||
|
||||
+15
-10
@@ -43,11 +43,16 @@ This approach can transform **any existing VLM** into a VLA by training it to pr
|
||||
pip install -e ".[pi]"
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> For lerobot 0.4.0, if you want to install the pi tag, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
|
||||
>
|
||||
> This will be solved in the next patch release
|
||||
|
||||
## Training a Custom FAST Tokenizer
|
||||
|
||||
You have two options for the FAST tokenizer:
|
||||
|
||||
1. **Use the pre-trained tokenizer**: The `lerobot/fast-action-tokenizer` tokenizer was trained on 1M+ real robot action sequences and works as a general-purpose tokenizer.
|
||||
1. **Use the pre-trained tokenizer**: The `physical-intelligence/fast` tokenizer was trained on 1M+ real robot action sequences and works as a general-purpose tokenizer.
|
||||
|
||||
2. **Train your own tokenizer**: For maximum performance on your specific dataset, you can finetune the tokenizer on your own data.
|
||||
|
||||
@@ -109,15 +114,15 @@ lerobot-train \
|
||||
|
||||
### Key Training Parameters
|
||||
|
||||
| Parameter | Description | Default |
|
||||
| -------------------------------------- | -------------------------------------------------- | ------------------------------- |
|
||||
| `--policy.gradient_checkpointing=true` | Reduces memory usage significantly during training | `false` |
|
||||
| `--policy.dtype=bfloat16` | Use mixed precision training for efficiency | `float32` |
|
||||
| `--policy.chunk_size` | Number of action steps to predict (action horizon) | `50` |
|
||||
| `--policy.n_action_steps` | Number of action steps to execute | `50` |
|
||||
| `--policy.max_action_tokens` | Maximum number of FAST tokens per action chunk | `256` |
|
||||
| `--policy.action_tokenizer_name` | FAST tokenizer to use | `lerobot/fast-action-tokenizer` |
|
||||
| `--policy.compile_model=true` | Enable torch.compile for faster training | `false` |
|
||||
| Parameter | Description | Default |
|
||||
| -------------------------------------- | -------------------------------------------------- | ---------------------------- |
|
||||
| `--policy.gradient_checkpointing=true` | Reduces memory usage significantly during training | `false` |
|
||||
| `--policy.dtype=bfloat16` | Use mixed precision training for efficiency | `float32` |
|
||||
| `--policy.chunk_size` | Number of action steps to predict (action horizon) | `50` |
|
||||
| `--policy.n_action_steps` | Number of action steps to execute | `50` |
|
||||
| `--policy.max_action_tokens` | Maximum number of FAST tokens per action chunk | `256` |
|
||||
| `--policy.action_tokenizer_name` | FAST tokenizer to use | `physical-intelligence/fast` |
|
||||
| `--policy.compile_model=true` | Enable torch.compile for faster training | `false` |
|
||||
|
||||
## Inference
|
||||
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
# Multitask DiT Policy
|
||||
|
||||
## Citation
|
||||
|
||||
If you use this work, please cite the following works:
|
||||
|
||||
```bibtex
|
||||
@misc{jones2025multitaskditpolicy,
|
||||
author = {Bryson Jones},
|
||||
title = {Dissecting and Open-Sourcing Multitask Diffusion Transformer Policy},
|
||||
year = {2025},
|
||||
url = {https://brysonkjones.substack.com/p/dissecting-and-open-sourcing-multitask-diffusion-transformer-policy},
|
||||
note = {Blog post}
|
||||
}
|
||||
```
|
||||
|
||||
```bibtex
|
||||
@misc{trilbmteam2025carefulexaminationlargebehaviormodels,
|
||||
author = {TRI LBM Team},
|
||||
title = {A Careful Examination of Large Behavior Models for Multitask Dexterous Manipulation},
|
||||
year = {2025},
|
||||
eprint = {arXiv:2507.05331},
|
||||
archivePrefix = {arXiv},
|
||||
primaryClass = {cs.RO},
|
||||
url = {https://arxiv.org/abs/2507.05331}
|
||||
}
|
||||
```
|
||||
|
||||
```bibtex
|
||||
@misc{bostondynamics2025largebehaviormodelsatlas,
|
||||
author = {Boston Dynamics and TRI Research Team},
|
||||
title = {Large Behavior Models and Atlas Find New Footing},
|
||||
year = {2025},
|
||||
url = {https://bostondynamics.com/blog/large-behavior-models-atlas-find-new-footing/},
|
||||
note = {Blog post}
|
||||
}
|
||||
```
|
||||
@@ -1,91 +0,0 @@
|
||||
# π₀.₅ (pi05)
|
||||
|
||||
This repository contains the Hugging Face port of **π₀.₅**, adapted from [OpenPI](https://github.com/Physical-Intelligence/openpi) by the Physical Intelligence.
|
||||
It is designed as a **Vision-Language-Action model with open-world generalization**.
|
||||
|
||||
---
|
||||
|
||||
## Model Overview
|
||||
|
||||
| Feature | π₀ | π₀.₅ |
|
||||
| -------------------- | ------------------------------------------------------ | ----------------------------------------- |
|
||||
| Time Conditioning | Concatenates time with actions via `action_time_mlp_*` | Uses `time_mlp_*` for AdaRMS conditioning |
|
||||
| AdaRMS | Not used | Used in action expert |
|
||||
| Tokenizer Length | 48 tokens | 200 tokens |
|
||||
| Discrete State Input | False (Uses `state_proj` layer) | True |
|
||||
| Parameter Count | Higher (includes state embedding) | Lower (no state embedding) |
|
||||
|
||||
---
|
||||
|
||||
## Relative Actions
|
||||
|
||||
π₀.₅ supports training with **relative actions**, where the model learns relative offsets
|
||||
from the current robot state instead of absolute joint positions. This mirrors the
|
||||
relative-action transform in OpenPI (`DeltaActions`) and can improve performance.
|
||||
|
||||
### How it works
|
||||
|
||||
1. **During preprocessing**, absolute actions are converted to relative offsets:
|
||||
`relative = action - state` (for selected joints).
|
||||
2. The relative actions are normalized using statistics computed from the relative distribution.
|
||||
3. **During postprocessing**, predicted relative actions are converted back to absolute:
|
||||
`absolute = relative + state`.
|
||||
|
||||
Joints listed in `relative_exclude_joints` (e.g., gripper) are kept absolute.
|
||||
|
||||
### Configuration
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
| ------------------------- | ----------- | ------------- | ---------------------------------------------------------------- |
|
||||
| `use_relative_actions` | `bool` | `False` | Enable relative-action training |
|
||||
| `relative_exclude_joints` | `list[str]` | `["gripper"]` | Joint names to keep absolute (matched by substring) |
|
||||
| `action_feature_names` | `list[str]` | `None` | Auto-populated from dataset metadata at runtime by `make_policy` |
|
||||
|
||||
### Training example
|
||||
|
||||
```bash
|
||||
python -m lerobot.scripts.lerobot_train \
|
||||
--policy.type=pi05 \
|
||||
--dataset.repo_id=your_org/your_dataset \
|
||||
--policy.use_relative_actions=true \
|
||||
--policy.relative_exclude_joints='["gripper"]'
|
||||
```
|
||||
|
||||
When `use_relative_actions=true`, the training script automatically:
|
||||
|
||||
- Computes relative action statistics from the dataset (sampled chunk-level relative actions)
|
||||
- Replaces the standard action stats with relative stats for normalization
|
||||
- Broadcasts these stats across all ranks in distributed training
|
||||
|
||||
---
|
||||
|
||||
## Citation
|
||||
|
||||
If you use this work, please cite both **OpenPI** and the π₀.₅ paper:
|
||||
|
||||
```bibtex
|
||||
@misc{openpi2024,
|
||||
author = {Physical Intelligence Lab},
|
||||
title = {OpenPI: PyTorch Implementation of π0 and π0.5 Policies},
|
||||
year = {2024},
|
||||
publisher = {GitHub},
|
||||
howpublished = {\url{https://github.com/Physical-Intelligence/openpi}},
|
||||
license = {Apache-2.0}
|
||||
}
|
||||
|
||||
@misc{intelligence2025pi05visionlanguageactionmodelopenworld,
|
||||
title = {π₀.₅: a Vision-Language-Action Model with Open-World Generalization},
|
||||
author = {Physical Intelligence and Kevin Black and Noah Brown and James Darpinian and Karan Dhabalia and Danny Driess and Adnan Esmail and Michael Equi and Chelsea Finn and Niccolo Fusai and Manuel Y. Galliker and Dibya Ghosh and Lachy Groom and Karol Hausman and Brian Ichter and Szymon Jakubczak and Tim Jones and Liyiming Ke and Devin LeBlanc and Sergey Levine and Adrian Li-Bell and Mohith Mothukuri and Suraj Nair and Karl Pertsch and Allen Z. Ren and Lucy Xiaoyang Shi and Laura Smith and Jost Tobias Springenberg and Kyle Stachowicz and James Tanner and Quan Vuong and Homer Walke and Anna Walling and Haohuan Wang and Lili Yu and Ury Zhilinsky},
|
||||
year = {2025},
|
||||
eprint = {2504.16054},
|
||||
archivePrefix= {arXiv},
|
||||
primaryClass = {cs.LG},
|
||||
url = {https://arxiv.org/abs/2504.16054},
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
This port follows the **Apache 2.0 License**, consistent with the original [OpenPI repository](https://github.com/Physical-Intelligence/openpi).
|
||||
@@ -1,108 +0,0 @@
|
||||
# π₀ (pi0)
|
||||
|
||||
This repository contains the Hugging Face port of **π₀**, adapted from [OpenPI](https://github.com/Physical-Intelligence/openpi) by the Physical Intelligence.
|
||||
It is designed as a **Vision-Language-Action model for general robot control**.
|
||||
|
||||
---
|
||||
|
||||
## Model Overview
|
||||
|
||||
| Feature | π₀ | π₀.₅ |
|
||||
| -------------------- | ------------------------------------------------------ | ----------------------------------------- |
|
||||
| Time Conditioning | Concatenates time with actions via `action_time_mlp_*` | Uses `time_mlp_*` for AdaRMS conditioning |
|
||||
| AdaRMS | Not used | Used in action expert |
|
||||
| Tokenizer Length | 48 tokens | 200 tokens |
|
||||
| Discrete State Input | False (Uses `state_proj` layer) | True |
|
||||
| Parameter Count | Higher (includes state embedding) | Lower (no state embedding) |
|
||||
|
||||
---
|
||||
|
||||
## Relative Actions
|
||||
|
||||
π₀ supports training with **relative actions**, where the model learns relative offsets
|
||||
from the current robot state instead of absolute joint positions. This mirrors the
|
||||
relative-action transform in OpenPI (`DeltaActions`) and can improve performance.
|
||||
|
||||
### How it works
|
||||
|
||||
1. **During preprocessing**, absolute actions are converted to relative offsets:
|
||||
`relative = action - state` (for selected joints).
|
||||
2. The relative actions are normalized using statistics computed from the relative distribution.
|
||||
3. **During postprocessing**, predicted relative actions are converted back to absolute:
|
||||
`absolute = relative + state`.
|
||||
|
||||
Joints listed in `relative_exclude_joints` (e.g., gripper) are kept absolute.
|
||||
|
||||
### Configuration
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
| ------------------------- | ----------- | ------------- | ---------------------------------------------------------------- |
|
||||
| `use_relative_actions` | `bool` | `False` | Enable relative-action training |
|
||||
| `relative_exclude_joints` | `list[str]` | `["gripper"]` | Joint names to keep absolute (matched by substring) |
|
||||
| `action_feature_names` | `list[str]` | `None` | Auto-populated from dataset metadata at runtime by `make_policy` |
|
||||
|
||||
### Training example
|
||||
|
||||
```bash
|
||||
python -m lerobot.scripts.lerobot_train \
|
||||
--policy.type=pi0 \
|
||||
--dataset.repo_id=your_org/your_dataset \
|
||||
--policy.use_relative_actions=true \
|
||||
--policy.relative_exclude_joints='["gripper"]'
|
||||
```
|
||||
|
||||
When `use_relative_actions=true`, the training script automatically:
|
||||
|
||||
- Computes relative action statistics from the dataset (sampled chunk-level relative actions)
|
||||
- Replaces the standard action stats with relative stats for normalization
|
||||
- Broadcasts these stats across all ranks in distributed training
|
||||
|
||||
### Recomputing stats for an existing dataset
|
||||
|
||||
If you want to precompute relative action stats offline, use `recompute_stats` from
|
||||
`lerobot.datasets.dataset_tools`:
|
||||
|
||||
```python
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.dataset_tools import recompute_stats
|
||||
|
||||
dataset = LeRobotDataset("your_org/your_dataset")
|
||||
dataset = recompute_stats(
|
||||
dataset,
|
||||
relative_action=True,
|
||||
relative_exclude_joints=["gripper"],
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Citation
|
||||
|
||||
If you use this work, please cite both **OpenPI** and the π₀ paper:
|
||||
|
||||
```bibtex
|
||||
@misc{openpi2024,
|
||||
author = {Physical Intelligence Lab},
|
||||
title = {OpenPI: PyTorch Implementation of π0 and π0.5 Policies},
|
||||
year = {2024},
|
||||
publisher = {GitHub},
|
||||
howpublished = {\url{https://github.com/Physical-Intelligence/openpi}},
|
||||
license = {Apache-2.0}
|
||||
}
|
||||
|
||||
@misc{black2024pi0visionlanguageactionflowmodel,
|
||||
title = {π₀: A Vision-Language-Action Flow Model for General Robot Control},
|
||||
author = {Kevin Black and Noah Brown and Danny Driess and Adnan Esmail and Michael Equi and Chelsea Finn and Niccolo Fusai and Lachy Groom and Karol Hausman and Brian Ichter and Szymon Jakubczak and Tim Jones and Liyiming Ke and Sergey Levine and Adrian Li-Bell and Mohith Mothukuri and Suraj Nair and Karl Pertsch and Lucy Xiaoyang Shi and James Tanner and Quan Vuong and Anna Walling and Haohuan Wang and Ury Zhilinsky},
|
||||
year = {2024},
|
||||
eprint = {2410.24164},
|
||||
archivePrefix= {arXiv},
|
||||
primaryClass = {cs.LG},
|
||||
url = {https://arxiv.org/abs/2410.24164},
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
This port follows the **Apache 2.0 License**, consistent with the original [OpenPI repository](https://github.com/Physical-Intelligence/openpi).
|
||||
@@ -1,38 +0,0 @@
|
||||
# Real-Time Chunking (RTC)
|
||||
|
||||
This module contains the LeRobot implementation of **Real-Time Chunking (RTC)**, an inference-time technique for flow-matching based policies.
|
||||
|
||||
**Note**: RTC is not a policy itself, but rather an inference enhancement that works with flow-matching based policies including [π₀](../pi0/), [π₀.₅](../pi05/), and [SmolVLA](../smolvla/).
|
||||
|
||||
---
|
||||
|
||||
## Citation
|
||||
|
||||
If you use Real-Time Chunking in your work, please cite:
|
||||
|
||||
```bibtex
|
||||
@misc{openpi2024,
|
||||
author = {Physical Intelligence Lab},
|
||||
title = {OpenPI: PyTorch Implementation of π0 and π0.5 Policies},
|
||||
year = {2024},
|
||||
publisher = {GitHub},
|
||||
howpublished = {\url{https://github.com/Physical-Intelligence/openpi}},
|
||||
license = {Apache-2.0}
|
||||
}
|
||||
|
||||
@misc{black2025realtimeexecutionactionchunking,
|
||||
title={Real-Time Execution of Action Chunking Flow Policies},
|
||||
author={Kevin Black and Manuel Y. Galliker and Sergey Levine},
|
||||
year={2025},
|
||||
eprint={2506.07339},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.RO},
|
||||
url={https://arxiv.org/abs/2506.07339},
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
This implementation follows the **Apache 2.0 License**, consistent with the LeRobot project.
|
||||
@@ -1,14 +0,0 @@
|
||||
## Paper
|
||||
|
||||
https://arxiv.org/abs/2509.25358
|
||||
|
||||
## Citation
|
||||
|
||||
```bibtex
|
||||
@article{chen2025sarm,
|
||||
title={SARM: Stage-Aware Reward Modeling for Long Horizon Robot Manipulation},
|
||||
author={Chen, Qianzhong and Yu, Justin and Schwager, Mac and Abbeel, Pieter and Shentu, Yide and Wu, Philipp},
|
||||
journal={arXiv preprint arXiv:2509.25358},
|
||||
year={2025}
|
||||
}
|
||||
```
|
||||
@@ -159,9 +159,6 @@ lerobot-record \
|
||||
--dataset.fps=15 \
|
||||
--dataset.push_to_hub=true \
|
||||
--dataset.private=true \
|
||||
--dataset.streaming_encoding=true \
|
||||
--dataset.encoder_threads=2 \
|
||||
# --dataset.vcodec=auto \
|
||||
--display_data=true
|
||||
```
|
||||
|
||||
@@ -201,9 +198,6 @@ lerobot-record \
|
||||
--dataset.fps=15 \
|
||||
--dataset.push_to_hub=true \
|
||||
--dataset.private=true \
|
||||
--dataset.streaming_encoding=true \
|
||||
--dataset.encoder_threads=2 \
|
||||
# --dataset.vcodec=auto \
|
||||
--display_data=true
|
||||
```
|
||||
|
||||
|
||||
@@ -1,114 +0,0 @@
|
||||
# Rename Map and Empty Cameras
|
||||
|
||||
When you train, evaluate, or record with a robot policy, your **dataset** or **environment** provides observations under one set of keys (e.g. `observation.images.front`, `observation.images.eagle`), while your **policy** expects another (e.g. `observation.images.image`, `observation.images.image2`). The **rename map** bridges that gap without changing the policy or data source.
|
||||
|
||||
> **Scope:** The rename map only renames **observation** keys (images and state). Action keys are not affected.
|
||||
|
||||
## Why observation keys don't always match
|
||||
|
||||
Policies have a fixed set of **input feature names** baked into their pretrained config. For example:
|
||||
|
||||
- [pi0fast-libero](https://huggingface.co/lerobot/pi0fast-libero) expects `observation.images.base_0_rgb` and `observation.images.left_wrist_0_rgb`.
|
||||
- [xvla-base](https://huggingface.co/lerobot/xvla-base) expects `observation.images.image`, `observation.images.image2`, and `observation.images.image3`.
|
||||
|
||||
Your dataset might use different names entirely (e.g. `observation.images.front`, `observation.images.eagle`, `observation.images.glove`), and your eval environment might use yet another set. Rather than editing the policy config or renaming columns in the dataset, you pass a **rename map**: a JSON dictionary that maps source keys to the keys the policy expects. Renaming happens inside the preprocessor pipeline, so the policy always sees its expected keys.
|
||||
|
||||
## Using the rename map
|
||||
|
||||
Pass the mapping as a JSON string on the command line. The convention is always:
|
||||
|
||||
```
|
||||
--rename_map='{"source_key": "policy_key", ...}'
|
||||
```
|
||||
|
||||
where **source_key** is what the dataset or environment provides, and **policy_key** is what the policy expects.
|
||||
|
||||
Only listed keys are renamed; everything else passes through unchanged. Order of entries doesn't matter.
|
||||
|
||||
Supported policies: **PI0**, **PI05**, **PI0Fast**, **SmolVLA**, and **XVLA**.
|
||||
|
||||
### Training
|
||||
|
||||
Suppose you fine-tune [lerobot/xvla-base](https://huggingface.co/lerobot/xvla-base) on a dataset with images under `observation.images.front`, `observation.images.eagle`, and `observation.images.glove`. XVLA expects `observation.images.image`, `observation.images.image2`, and `observation.images.image3`:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--dataset.repo_id=YOUR_DATASET \
|
||||
--output_dir=./outputs/xvla_training \
|
||||
--job_name=xvla_training \
|
||||
--policy.path="lerobot/xvla-base" \
|
||||
--policy.repo_id="HF_USER/xvla-your-robot" \
|
||||
--policy.dtype=bfloat16 \
|
||||
--policy.action_mode=auto \
|
||||
--steps=20000 \
|
||||
--policy.device=cuda \
|
||||
--policy.freeze_vision_encoder=false \
|
||||
--policy.freeze_language_encoder=false \
|
||||
--policy.train_policy_transformer=true \
|
||||
--policy.train_soft_prompts=true \
|
||||
--rename_map='{"observation.images.front": "observation.images.image", "observation.images.eagle": "observation.images.image2", "observation.images.glove": "observation.images.image3"}'
|
||||
```
|
||||
|
||||
### Evaluation
|
||||
|
||||
A policy that expects `observation.images.base_0_rgb` and `observation.images.left_wrist_0_rgb` (e.g. [pi0fast-libero](https://huggingface.co/lerobot/pi0fast-libero)), but the LIBERO environment returns `observation.images.image` and `observation.images.image2`:
|
||||
|
||||
```bash
|
||||
lerobot-eval \
|
||||
--policy.path=lerobot/pi0fast-libero \
|
||||
--env.type=libero \
|
||||
... \
|
||||
--rename_map='{"observation.images.image": "observation.images.base_0_rgb", "observation.images.image2": "observation.images.left_wrist_0_rgb"}'
|
||||
```
|
||||
|
||||
### Recording
|
||||
|
||||
`lerobot-record` also supports rename maps, nested under the dataset config:
|
||||
|
||||
```bash
|
||||
lerobot-record \ # When running inference
|
||||
--policy.path="<user>/smolVLA_finetuned" \
|
||||
... \
|
||||
--dataset.rename_map='{"observation.images.glove2": "observation.images.image"}'
|
||||
```
|
||||
|
||||
## Alternative: edit the policy config directly
|
||||
|
||||
If you always use the same dataset or environment, you can **edit the policy's `config.json`** so its observation keys match your data source. Then no rename map is needed.
|
||||
|
||||
The tradeoff: modifying the policy config ties it to one data source. A rename map keeps one policy usable across many datasets and environments.
|
||||
|
||||
## Empty cameras: fewer views than the policy expects
|
||||
|
||||
Some policies are built for a fixed number of image inputs. If your dataset has fewer cameras, you can set **`empty_cameras`** in the policy config instead of modifying the model architecture.
|
||||
|
||||
### How it works
|
||||
|
||||
Setting `empty_cameras=N` adds N placeholder image features to the policy config, named:
|
||||
|
||||
```
|
||||
observation.images.empty_camera_0
|
||||
observation.images.empty_camera_1
|
||||
...
|
||||
```
|
||||
|
||||
At runtime, these keys have no corresponding data in the batch. The policy fills them with masked dummy tensors (padded with `-1` for SigLIP-based vision encoders, with a zero attention mask), so the extra image slots are effectively ignored during training and inference.
|
||||
|
||||
### Example
|
||||
|
||||
XVLA-base has three visual inputs and `empty_cameras=0` by default. Your dataset only has two cameras:
|
||||
|
||||
1. Set `--policy.empty_cameras=1`.
|
||||
2. The config adds a third key: `observation.images.empty_camera_0`.
|
||||
3. Use the rename map for your two real cameras as usual.
|
||||
4. The third slot is masked out — no fake images needed in your dataset.
|
||||
|
||||
## Quick reference
|
||||
|
||||
| Goal | What to do |
|
||||
| ----------------------------------------- | --------------------------------------------------------------------------- |
|
||||
| Dataset keys ≠ policy keys | `--rename_map='{"dataset_key": "policy_key", ...}'` |
|
||||
| Env keys ≠ policy keys (eval) | `--rename_map='{"env_key": "policy_key", ...}'` |
|
||||
| Recording with different keys (inference) | `--dataset.rename_map='{"source_key": "policy_key", ...}'`. |
|
||||
| Fewer cameras than policy expects | `--policy.empty_cameras=N` (supported by PI0, PI05, PI0Fast, SmolVLA, XVLA) |
|
||||
| Avoid passing a rename map | Edit the policy's `config.json` so its keys match your data source |
|
||||
@@ -269,7 +269,7 @@ This generates visualizations showing video frames with subtask boundaries overl
|
||||
Train with **no annotations** - uses linear progress from 0 to 1:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
python src/lerobot/scripts/lerobot_train.py \
|
||||
--dataset.repo_id=your-username/your-dataset \
|
||||
--policy.type=sarm \
|
||||
--policy.annotation_mode=single_stage \
|
||||
@@ -288,7 +288,7 @@ lerobot-train \
|
||||
Train with **dense annotations only** (sparse auto-generated):
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
python src/lerobot/scripts/lerobot_train.py \
|
||||
--dataset.repo_id=your-username/your-dataset \
|
||||
--policy.type=sarm \
|
||||
--policy.annotation_mode=dense_only \
|
||||
@@ -307,7 +307,7 @@ lerobot-train \
|
||||
Train with **both sparse and dense annotations**:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
python src/lerobot/scripts/lerobot_train.py \
|
||||
--dataset.repo_id=your-username/your-dataset \
|
||||
--policy.type=sarm \
|
||||
--policy.annotation_mode=dual \
|
||||
@@ -468,7 +468,7 @@ This script:
|
||||
Once you have the progress file, train your policy with RA-BC weighting. The progress file is auto-detected from the dataset path (`sarm_progress.parquet`). Currently PI0, PI0.5 and SmolVLA are supported with RA-BC:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
python src/lerobot/scripts/lerobot_train.py \
|
||||
--dataset.repo_id=your-username/your-dataset \
|
||||
--policy.type=pi0 \
|
||||
--use_rabc=true \
|
||||
|
||||
@@ -106,9 +106,6 @@ lerobot-record \
|
||||
--dataset.repo_id=${HF_USER}/eval_DATASET_NAME_test \ # <- This will be the dataset name on HF Hub
|
||||
--dataset.episode_time_s=50 \
|
||||
--dataset.num_episodes=10 \
|
||||
--dataset.streaming_encoding=true \
|
||||
--dataset.encoder_threads=2 \
|
||||
# --dataset.vcodec=auto \
|
||||
# <- Teleop optional if you want to teleoperate in between episodes \
|
||||
# --teleop.type=so100_leader \
|
||||
# --teleop.port=/dev/ttyACM0 \
|
||||
|
||||
+6
-20
@@ -1,18 +1,5 @@
|
||||
# SO-101
|
||||
|
||||
<div style="display: flex; align-items: center; gap: 10px;">
|
||||
<img
|
||||
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/SO101_Follower.webp"
|
||||
alt="SO-101"
|
||||
width="60%"
|
||||
/>
|
||||
<img
|
||||
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/SO101_Leader.webp"
|
||||
alt="SO-101"
|
||||
width="60%"
|
||||
/>
|
||||
</div>
|
||||
|
||||
In the steps below, we explain how to assemble our flagship robot, the SO-101.
|
||||
|
||||
## Source the parts
|
||||
@@ -236,10 +223,10 @@ It is advisable to install one 3-pin cable in the motor after placing them befor
|
||||
|
||||
### Joint 1
|
||||
|
||||
- Install both motor horns. Secure the top horn with a M3x6mm screw. No screws are required for the bottom horn.
|
||||
- Place the first motor into the base.
|
||||
- Fasten the motor with 4 M2x6mm screws (smallest screws). Two from the top and two from the bottom.
|
||||
- Slide over the first motor holder and fasten it using two M2x6mm screws (one on each side).
|
||||
- Install both motor horns, securing the top horn with a M3x6mm screw.
|
||||
- Attach the shoulder part.
|
||||
- Tighten the shoulder part with 4 M3x6mm screws on top and 4 M3x6mm screws on the bottom
|
||||
- Add the shoulder motor holder.
|
||||
@@ -255,9 +242,9 @@ It is advisable to install one 3-pin cable in the motor after placing them befor
|
||||
|
||||
### Joint 2
|
||||
|
||||
- Install both motor horns. Secure the top horn with a M3x6mm screw. No screws are required for the bottom horn.
|
||||
- Slide the second motor in from the top.
|
||||
- Fasten the second motor with 4 M2x6mm screws.
|
||||
- Attach both motor horns to motor 2, again use the M3x6mm horn screw.
|
||||
- Attach the upper arm with 4 M3x6mm screws on each side.
|
||||
|
||||
<div class="video-container">
|
||||
@@ -271,8 +258,8 @@ It is advisable to install one 3-pin cable in the motor after placing them befor
|
||||
|
||||
### Joint 3
|
||||
|
||||
- Install both motor horns. Secure the top horn with a M3x6mm screw. No screws are required for the bottom horn.
|
||||
- Insert motor 3 and fasten using 4 M2x6mm screws.
|
||||
- Insert motor 3 and fasten using 4 M2x6mm screws
|
||||
- Attach both motor horns to motor 3 and secure one again with a M3x6mm horn screw.
|
||||
- Connect the forearm to motor 3 using 4 M3x6mm screws on each side.
|
||||
|
||||
<div class="video-container">
|
||||
@@ -286,10 +273,9 @@ It is advisable to install one 3-pin cable in the motor after placing them befor
|
||||
|
||||
### Joint 4
|
||||
|
||||
- Install both motor horns. Secure the top horn with a M3x6mm screw. No screws are required for the bottom horn.
|
||||
- Slide over motor holder 4.
|
||||
- Slide in motor 4.
|
||||
- Fasten motor 4 with 4 M2x6mm screws.
|
||||
- Fasten motor 4 with 4 M2x6mm screws and attach its motor horns, use a M3x6mm horn screw.
|
||||
|
||||
<div class="video-container">
|
||||
<video controls width="600">
|
||||
@@ -322,7 +308,7 @@ It is advisable to install one 3-pin cable in the motor after placing them befor
|
||||
|
||||
- Attach the gripper to motor 5, attach it to the motor horn on the wrist using 4 M3x6mm screws.
|
||||
- Insert the gripper motor and secure it with 2 M2x6mm screws on each side.
|
||||
- Install both motor horns on the gripper motor. Secure the top horn with a M3x6mm screw; no screws are required for the bottom horn.
|
||||
- Attach the motor horns and again use a M3x6mm horn screw.
|
||||
- Install the gripper claw and secure it with 4 M3x6mm screws on both sides.
|
||||
|
||||
<div class="video-container">
|
||||
|
||||
@@ -1,155 +0,0 @@
|
||||
# Streaming Video Encoding Guide
|
||||
|
||||
## 1. Overview
|
||||
|
||||
Streaming video encoding eliminates the traditional PNG round-trip during video dataset recording. Instead of:
|
||||
|
||||
1. Capture frame -> write PNG to disk -> (at episode end) read PNG's -> encode to MP4 -> delete PNG's
|
||||
|
||||
Frames can be encoded in real-time during capture:
|
||||
|
||||
1. Capture frame -> queue to encoder thread -> encode to MP4 directly
|
||||
|
||||
This makes `save_episode()` near-instant (the video is already encoded by the time the episode ends) and removes the blocking wait that previously occurred between episodes, especially with multiple cameras in long episodes.
|
||||
|
||||
## 2. Tuning Parameters
|
||||
|
||||
| Parameter | CLI Flag | Type | Default | Description |
|
||||
| ----------------------- | --------------------------------- | ------------- | ------------- | ----------------------------------------------------------------- |
|
||||
| `streaming_encoding` | `--dataset.streaming_encoding` | `bool` | `True` | Enable real-time encoding during capture |
|
||||
| `vcodec` | `--dataset.vcodec` | `str` | `"libsvtav1"` | Video codec. `"auto"` detects best HW encoder |
|
||||
| `encoder_threads` | `--dataset.encoder_threads` | `int \| None` | `None` (auto) | Threads per encoder instance. `None` will leave the vcoded decide |
|
||||
| `encoder_queue_maxsize` | `--dataset.encoder_queue_maxsize` | `int` | `60` | Max buffered frames per camera (~2s at 30fps). Consumes RAM |
|
||||
|
||||
## 3. Performance Considerations
|
||||
|
||||
Streaming encoding means the CPU is encoding video **during** the capture loop, not after. This creates a CPU budget that must be shared between:
|
||||
|
||||
- **Control loop** (reading cameras, control the robot, writing non-video data)
|
||||
- **Encoder threads** (one pool per camera)
|
||||
- **Rerun visualization** (if enabled)
|
||||
- **OS and other processes**
|
||||
|
||||
### Resolution & Number of Cameras Impact
|
||||
|
||||
| Setup | Throughput (px/sec) | CPU Encoding Load | Notes |
|
||||
| ------------------------- | ------------------- | ----------------- | ------------------------------ |
|
||||
| 2camsx 640x480x3 @30fps | 55M | Low | Works on most systems |
|
||||
| 2camsx 1280x720x3 @30fps | 165M | Moderate | Comfortable on modern systems |
|
||||
| 2camsx 1920x1080x3 @30fps | 373M | High | Requires powerful high-end CPU |
|
||||
|
||||
### `encoder_threads` Tuning
|
||||
|
||||
This parameter controls how many threads each encoder instance uses internally:
|
||||
|
||||
- **Higher values** (e.g., 4-5): Faster encoding, but uses more CPU cores per camera. Good for high-end systems with many cores.
|
||||
- **Lower values** (e.g., 1-2): Less CPU per camera, freeing cores for capture and visualization. Good for low-res images and capable CPUs.
|
||||
- **`None` (default)**: Lets the codec decide. Information available in the codec logs.
|
||||
|
||||
### Backpressure and Frame Dropping
|
||||
|
||||
Each camera has a bounded queue (`encoder_queue_maxsize`, default 60 frames). When the encoder can't keep up:
|
||||
|
||||
1. The queue fills up (consuming RAM)
|
||||
2. New frames are **dropped** (not blocked) — the capture loop continues uninterrupted
|
||||
3. A warning is logged: `"Encoder queue full for {camera}, dropped N frame(s)"`
|
||||
4. At episode end, total dropped frames per camera are reported
|
||||
|
||||
### Symptoms of Encoder Falling Behind
|
||||
|
||||
- **System feels laggy and freezes**: all CPUs are at 100%
|
||||
- **Dropped frame warnings** in the log or lower frames/FPS than expected in the recorded dataset
|
||||
- **Choppy robot movement**: If CPU is severely overloaded, even the capture loop may be affected
|
||||
- **Accumulated rerun lag**: Visualization falls behind real-time
|
||||
|
||||
## 4. Hardware-Accelerated Encoding
|
||||
|
||||
### When to Use
|
||||
|
||||
Use HW encoding when:
|
||||
|
||||
- CPU is the bottleneck (dropped frames, choppy robot, rerun lag)
|
||||
- You have compatible hardware (GPU or dedicated encoder)
|
||||
- You're recording at high throughput (high resolution or with many cameras)
|
||||
|
||||
### Choosing a Codec
|
||||
|
||||
| Codec | CPU Usage | File Size | Quality | Notes |
|
||||
| --------------------- | --------- | -------------- | ------- | ---------------------------------------------------------------- |
|
||||
| `libsvtav1` (default) | High | Smallest | Best | Default. Best compression but most CPU-intensive |
|
||||
| `h264` | Medium | ~30-50% larger | Good | Software H.264. Lower CPU |
|
||||
| HW encoders | Very Low | Largest | Good | Offloads to dedicated hardware. Best for CPU-constrained systems |
|
||||
|
||||
### Available HW Encoders
|
||||
|
||||
| Encoder | Platform | Hardware | CLI Value |
|
||||
| ------------------- | ------------- | ------------------------------------------------------------------------------------------------ | ------------------------------------ |
|
||||
| `h264_videotoolbox` | macOS | Apple Silicon / Intel | `--dataset.vcodec=h264_videotoolbox` |
|
||||
| `hevc_videotoolbox` | macOS | Apple Silicon / Intel | `--dataset.vcodec=hevc_videotoolbox` |
|
||||
| `h264_nvenc` | Linux/Windows | NVIDIA GPU | `--dataset.vcodec=h264_nvenc` |
|
||||
| `hevc_nvenc` | Linux/Windows | NVIDIA GPU | `--dataset.vcodec=hevc_nvenc` |
|
||||
| `h264_vaapi` | Linux | Intel/AMD GPU | `--dataset.vcodec=h264_vaapi` |
|
||||
| `h264_qsv` | Linux/Windows | Intel Quick Sync | `--dataset.vcodec=h264_qsv` |
|
||||
| `auto` | Any | Probes the system for available HW encoders. Falls back to `libsvtav1` if no HW encoder is found | `--dataset.vcodec=auto` |
|
||||
|
||||
> [!NOTE]
|
||||
> In order to use the HW accelerated encoders you might need to upgrade your GPU drivers.
|
||||
|
||||
> [!NOTE]
|
||||
> `libsvtav1` is the default because it provides the best training performance; other vcodecs can reduce CPU usage and be faster, but they typically produce larger files and may affect training time.
|
||||
|
||||
## 5. Troubleshooting
|
||||
|
||||
| Symptom | Likely Cause | Fix |
|
||||
| ------------------------------------------------------------------ | -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| System freezes or choppy robot movement or Rerun visualization lag | CPU starved (100% load usage) | Close other apps, reduce encoding throughput, lower `encoder_threads`, use `h264`, use `display_data=False`. If the CPU continues to be at 100% then it might be insufficient for your setup, consider `--dataset.streaming_encoding=false` or HW encoding (`--dataset.vcodec=auto`) |
|
||||
| "Encoder queue full" warnings or dropped frames in dataset | Encoder can't keep up (Queue overflow) | If CPU is not at 100%: Increase `encoder_threads`, increase `encoder_queue_maxsize` or use HW encoding (`--dataset.vcodec=auto`). |
|
||||
| High RAM usage | Queue filling faster than encoding | `encoder_threads` too low or CPU insufficient. Reduce `encoder_queue_maxsize` or use HW encoding |
|
||||
| Large video files | Using HW encoder or H.264 | Expected trade-off. Switch to `libsvtav1` if CPU allows |
|
||||
| `save_episode()` still slow | `streaming_encoding` is `False` | Set `--dataset.streaming_encoding=true` |
|
||||
| Encoder thread crash | Codec not available or invalid settings | Check `vcodec` is installed, try `--dataset.vcodec=auto` |
|
||||
| Recorded dataset is missing frames | CPU/GPU starvation or occasional load spikes | If ~5% of frames are missing, your system is likely overloaded — follow the recommendations above. If fewer frames are missing (~2%), they are probably due to occasional transient load spikes (often at startup) and can be considered expected. |
|
||||
|
||||
## 6. Recommended Configurations
|
||||
|
||||
These estimates are conservative; we recommend testing them on your setup—start with a low load and increase it gradually.
|
||||
|
||||
### High-End Systems: modern 12+ cores (24+ threads)
|
||||
|
||||
A throughput between ~250-500M px/sec should be comfortable in CPU. For even better results try HW encoding if available.
|
||||
|
||||
```bash
|
||||
# 3camsx 1280x720x3 @30fps: Defaults work well. Optionally increase encoder parallelism.
|
||||
# 2camsx 1920x1080x3 @30fps: Defaults work well. Optionally increase encoder parallelism.
|
||||
lerobot-record --dataset.encoder_threads=5 ...
|
||||
|
||||
# 3camsx 1920x1080x3 @30fps: Might require some tuning.
|
||||
```
|
||||
|
||||
### Mid-Range Systems: modern 8+ cores (16+ threads) or Apple Silicon
|
||||
|
||||
A throughput between ~80-300M px/sec should be possible in CPU.
|
||||
|
||||
```bash
|
||||
# 3camsx 640x480x3 @30fps: Defaults work well. Optionally decrease encoder parallelism.
|
||||
# 2camsx 1280x720x3 @30fps: Defaults work well. Optionally decrease encoder parallelism.
|
||||
lerobot-record --dataset.encoder_threads=2 ...
|
||||
|
||||
# 2camsx 1920x1080x3 @30fps: Might require some tuning.
|
||||
```
|
||||
|
||||
### Low-Resource Systems: modern 4+ cores (8+ threads) or Raspberry Pi 5
|
||||
|
||||
On very constrained systems, streaming encoding may compete too heavily with the capture loop. Disabling it falls back to the PNG-based approach where encoding happens between episodes (blocking, but doesn't interfere with capture). Alternatively, record at a lower throughput to reduce both capture and encoding load. Consider also changing codec to `h264` and using batch encoding.
|
||||
|
||||
```bash
|
||||
# 2camsx 640x480x3 @30fps: Requires some tuning.
|
||||
|
||||
# Use H.264, disable streaming, consider batching encoding
|
||||
lerobot-record --dataset.vcodec=h264 --dataset.streaming_encoding=false ...
|
||||
```
|
||||
|
||||
## 7. Closing note
|
||||
|
||||
Performance ultimately depends on your exact setup — frames-per-second, resolution, CPU cores and load, available memory, episode length, and the encoder you choose. Always test with your target workload, be mindful about your CPU & system capabilities and tune `encoder_threads`, `encoder_queue_maxsize`, and
|
||||
`vcodec` reasonably. That said, a common practical configuration (for many applications) is three cameras at 640×480x3 @30fps; this usually runs fine with the default streaming video encoding settings in modern systems. Always verify your recorded dataset is healthy by comparing the video duration to the CLI episode duration and confirming the row count equals FPS × CLI duration.
|
||||
+103
-202
@@ -1,72 +1,23 @@
|
||||
# Unitree G1
|
||||
|
||||
<img
|
||||
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/unitree_thumbnail.jpg"
|
||||
alt="Unitree G1 locomanipulation demo"
|
||||
style={{ width: "100%" }}
|
||||
/>
|
||||
This guide covers the complete setup process for the Unitree G1 humanoid, from initial connection to running gr00t_wbc locomotion.
|
||||
|
||||
The Unitree G1 humanoid is now supported in LeRobot! You can teleoperate, train locomanipulation policies, test in sim, and more. Both 29 and 23 DoF variants are supported.
|
||||
## About
|
||||
|
||||
We support both 29 and 23 DOF G1 EDU version. We introduce:
|
||||
|
||||
- **`unitree g1` robot class, handling low level read/write from/to the humanoid**
|
||||
- **ZMQ socket bridge** for remote communication and camera streaming, allowing for remote policy deployment over wlan, eth or directly on the robot
|
||||
- **Locomotion policies** from NVIDIA gr00t and Amazon FAR Holosoma
|
||||
- **Simulation mode** for testing policies without the physical robot in mujoco
|
||||
|
||||
---
|
||||
|
||||
## Part 1: Getting Started
|
||||
## Connection guide
|
||||
|
||||
### Install the Unitree SDK
|
||||
### Step 1: Configure Ethernet Interface
|
||||
|
||||
Follow the [unitree_sdk2_python installation guide](https://github.com/unitreerobotics/unitree_sdk2_python#installation). Tested with `unitree_sdk2py==1.0.1` and `cyclonedds==0.10.2`:
|
||||
|
||||
```bash
|
||||
conda create -y -n lerobot python=3.12
|
||||
conda activate lerobot
|
||||
git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
|
||||
cd unitree_sdk2_python
|
||||
pip install -e .
|
||||
cd ..
|
||||
```
|
||||
|
||||
### Install LeRobot
|
||||
|
||||
```bash
|
||||
conda install ffmpeg -c conda-forge
|
||||
conda install -c conda-forge "pinocchio>=3.0.0,<4.0.0"
|
||||
git clone https://github.com/huggingface/lerobot.git
|
||||
cd lerobot
|
||||
pip install -e '.[unitree_g1]'
|
||||
```
|
||||
|
||||
<Tip>
|
||||
For now, pinocchio must be installed from conda-forge (not pip) to include the
|
||||
CasADi bindings needed for arm IK.
|
||||
</Tip>
|
||||
|
||||
### Test the Installation (Simulation)
|
||||
|
||||
The simulation environment has its own dependencies. Check the Simulation environment dependencies: [Unitree G1 Mujoco EnvHub](https://huggingface.co/lerobot/unitree-g1-mujoco/tree/main).
|
||||
|
||||
```bash
|
||||
pip install mujoco loguru msgpack msgpack-numpy
|
||||
```
|
||||
|
||||
```bash
|
||||
lerobot-teleoperate \
|
||||
--robot.type=unitree_g1 \
|
||||
--robot.is_simulation=true \
|
||||
--teleop.type=unitree_g1 \
|
||||
--teleop.id=wbc_unitree \
|
||||
--robot.cameras='{"global_view": {"type": "zmq", "server_address": "localhost", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30, "warmup_s": 5}}' \
|
||||
--display_data=true \
|
||||
--robot.controller=GrootLocomotionController
|
||||
```
|
||||
|
||||
This will launch a [MuJoCo sim instance](https://huggingface.co/lerobot/unitree-g1-mujoco/tree/main) for the G1. You can connect a gamepad to your machine before launching in order to control the robot's locomotion in sim. We support both [HolosomaLocomotionController](https://github.com/amazon-far/holosoma) and [GrootLocomotionController](https://github.com/NVlabs/GR00T-WholeBodyControl) via `--robot.controller`.
|
||||
|
||||
- Press `9` to release the robot
|
||||
- Press `7` / `8` to increase / decrease waist height
|
||||
|
||||
### Connect to the Physical Robot
|
||||
|
||||
The G1's Ethernet IP is fixed at `192.168.123.164`. Your machine must have a static IP on the same subnet: `192.168.123.x` where `x ≠ 164`.
|
||||
Set a static IP on the same subnet as the robot:
|
||||
|
||||
```bash
|
||||
# Replace 'enp131s0' with your ethernet interface name (check with `ip a`)
|
||||
@@ -75,23 +26,47 @@ sudo ip addr add 192.168.123.200/24 dev enp131s0
|
||||
sudo ip link set enp131s0 up
|
||||
```
|
||||
|
||||
### SSH into the Robot
|
||||
**Note**: The G1's Ethernet IP is fixed at `192.168.123.164`. Your computer must use `192.168.123.x` with x ≠ 164.
|
||||
|
||||
### Step 2: SSH into the Robot
|
||||
|
||||
```bash
|
||||
ssh unitree@192.168.123.164
|
||||
# Password: 123
|
||||
```
|
||||
|
||||
### Share Internet via Ethernet
|
||||
You should now be connected to the G1's Orin.
|
||||
|
||||
The G1 needs internet access to clone repos and install packages. Share your laptop's connection over Ethernet:
|
||||
---
|
||||
|
||||
## Part 2: Enable WiFi on the Robot
|
||||
|
||||
Wlan0 is disabled by default on the G1. To enable it:
|
||||
|
||||
### Step 1: Enable WiFi Hardware
|
||||
|
||||
```bash
|
||||
sudo rfkill unblock wifi
|
||||
sudo rfkill unblock all
|
||||
|
||||
# Bring up wlan0
|
||||
sudo ip link set wlan0 up
|
||||
|
||||
# Enable NetworkManager control of wlan0
|
||||
sudo nmcli radio wifi on
|
||||
sudo nmcli device set wlan0 managed yes
|
||||
sudo systemctl restart NetworkManager
|
||||
```
|
||||
|
||||
### Step 2: Enable Internet Forwarding
|
||||
|
||||
**On your laptop:**
|
||||
|
||||
```bash
|
||||
# Enable IP forwarding
|
||||
sudo sysctl -w net.ipv4.ip_forward=1
|
||||
|
||||
# Replace wlp132s0f0 with your WiFi interface name
|
||||
# Set up NAT (replace wlp132s0f0 with your WiFi interface)
|
||||
sudo iptables -t nat -A POSTROUTING -o wlp132s0f0 -s 192.168.123.0/24 -j MASQUERADE
|
||||
sudo iptables -A FORWARD -i wlp132s0f0 -o enp131s0 -m state --state RELATED,ESTABLISHED -j ACCEPT
|
||||
sudo iptables -A FORWARD -i enp131s0 -o wlp132s0f0 -j ACCEPT
|
||||
@@ -100,203 +75,129 @@ sudo iptables -A FORWARD -i enp131s0 -o wlp132s0f0 -j ACCEPT
|
||||
**On the G1:**
|
||||
|
||||
```bash
|
||||
# Add laptop as default gateway
|
||||
sudo ip route del default 2>/dev/null || true
|
||||
sudo ip route add default via 192.168.123.200 dev eth0
|
||||
echo "nameserver 8.8.8.8" | sudo tee /etc/resolv.conf
|
||||
|
||||
# Verify
|
||||
# Test connection
|
||||
ping -c 3 8.8.8.8
|
||||
```
|
||||
|
||||
### Install the Unitree SDK on the G1
|
||||
|
||||
Follow the [unitree_sdk2_python installation guide](https://github.com/unitreerobotics/unitree_sdk2_python#installation):
|
||||
|
||||
```bash
|
||||
conda create -y -n lerobot python=3.12
|
||||
conda activate lerobot
|
||||
git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
|
||||
cd unitree_sdk2_python
|
||||
python -m pip install -e .
|
||||
cd ..
|
||||
```
|
||||
|
||||
### Install LeRobot on the G1
|
||||
|
||||
```bash
|
||||
git clone https://github.com/huggingface/lerobot.git
|
||||
cd lerobot
|
||||
conda install -c conda-forge "pinocchio>=3.0.0,<4.0.0"
|
||||
python -m pip install -e '.[unitree_g1]'
|
||||
```
|
||||
|
||||
<Tip>
|
||||
For now, pinocchio must be installed from conda-forge (not pip) to include the
|
||||
CasADi bindings needed for arm IK.
|
||||
</Tip>
|
||||
|
||||
### (Optional) Enable WiFi on the Robot
|
||||
|
||||
For wireless SSH access, you can enable WiFi on the G1 (it's blocked by default):
|
||||
|
||||
```bash
|
||||
sudo rfkill unblock all
|
||||
sudo ip link set wlan0 up
|
||||
sudo nmcli radio wifi on
|
||||
sudo nmcli device set wlan0 managed yes
|
||||
sudo systemctl restart NetworkManager
|
||||
```
|
||||
|
||||
**Connect to a WiFi network:**
|
||||
### Step 3: Connect to WiFi Network
|
||||
|
||||
```bash
|
||||
# List available networks
|
||||
nmcli device wifi list
|
||||
|
||||
# Connect to your WiFi (example)
|
||||
sudo nmcli connection add type wifi ifname wlan0 con-name "YourNetwork" ssid "YourNetwork"
|
||||
sudo nmcli connection modify "YourNetwork" wifi-sec.key-mgmt wpa-psk
|
||||
sudo nmcli connection modify "YourNetwork" wifi-sec.psk "YourPassword"
|
||||
sudo nmcli connection modify "YourNetwork" connection.autoconnect yes
|
||||
sudo nmcli connection up "YourNetwork"
|
||||
|
||||
# Check WiFi IP address
|
||||
ip a show wlan0
|
||||
```
|
||||
|
||||
You can then SSH over WiFi instead of Ethernet:
|
||||
### Step 4: SSH Over WiFi
|
||||
|
||||
Once connected to WiFi, note the robot's IP address and disconnect the Ethernet cable. You can now SSH over WiFi:
|
||||
|
||||
```bash
|
||||
ssh unitree@<ROBOT_WIFI_IP>
|
||||
ssh unitree@<YOUR_ROBOT_IP>
|
||||
# Password: 123
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Part 2: Teleoperation & Locomotion
|
||||
|
||||
### Run the Robot Server
|
||||
|
||||
On the robot (from `~/lerobot`):
|
||||
|
||||
```bash
|
||||
cd ~/lerobot
|
||||
python src/lerobot/robots/unitree_g1/run_g1_server.py --camera
|
||||
```
|
||||
|
||||
### Run the Locomotion Policy
|
||||
|
||||
You can run the teleoperation client from your laptop over Ethernet, over WiFi (experimental), or directly on the robot itself. Mind potential latency introduced by your network.
|
||||
|
||||
**From your laptop:**
|
||||
|
||||
```bash
|
||||
lerobot-teleoperate \
|
||||
--robot.type=unitree_g1 \
|
||||
--robot.is_simulation=false \
|
||||
--robot.robot_ip=<ROBOT_IP> \
|
||||
--teleop.type=unitree_g1 \
|
||||
--teleop.id=wbc_unitree \
|
||||
--robot.cameras='{"global_view": {"type": "zmq", "server_address": "<ROBOT_IP>", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30}}' \
|
||||
--display_data=true \
|
||||
--robot.controller=HolosomaLocomotionController
|
||||
```
|
||||
|
||||
We support both [GrootLocomotionController](https://github.com/NVlabs/GR00T-WholeBodyControl) and [HolosomaLocomotionController](https://github.com/amazon-far/holosoma) via `--robot.controller`.
|
||||
Replace `<YOUR_ROBOT_IP>` with your robot's actual WiFi IP address.
|
||||
|
||||
---
|
||||
|
||||
## Part 3: Loco-Manipulation with the Homunculus Exoskeleton
|
||||
## Part 3: Robot Server Setup
|
||||
|
||||
We provide a loco-manipulation solution via the Homunculus Exoskeleton — an open-source 7 DoF exoskeleton for whole-body control. Check it out [here](https://github.com/nepyope/hmc_exo).
|
||||
### Step 1: Install LeRobot on the Orin
|
||||
|
||||
### Calibrate
|
||||
SSH into the robot and install LeRobot:
|
||||
|
||||
```bash
|
||||
lerobot-calibrate \
|
||||
--teleop.type=unitree_g1 \
|
||||
--teleop.left_arm_config.port=/dev/ttyACM1 \
|
||||
--teleop.right_arm_config.port=/dev/ttyACM0 \
|
||||
--teleop.id=exo
|
||||
ssh unitree@<YOUR_ROBOT_IP>
|
||||
|
||||
conda create -y -n lerobot python=3.10
|
||||
conda activate lerobot
|
||||
git clone https://github.com/huggingface/lerobot.git
|
||||
cd lerobot
|
||||
pip install -e '.[unitree_g1]'
|
||||
git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
|
||||
cd unitree_sdk2_python && pip install -e .
|
||||
```
|
||||
|
||||
During calibration move each joint through its entire range. After fitting, move the joint in a neutral position and press `n` to advance.
|
||||
**Note**: The Unitree SDK requires CycloneDDS v0.10.2 to be installed. See the [Unitree SDK documentation](https://github.com/unitreerobotics/unitree_sdk2_python) for details.
|
||||
|
||||
### Record a Dataset
|
||||
### Step 2: Run the Robot Server
|
||||
|
||||
On the robot:
|
||||
|
||||
```bash
|
||||
lerobot-record \
|
||||
--robot.type=unitree_g1 \
|
||||
--robot.is_simulation=true \
|
||||
--robot.cameras='{"global_view": {"type": "zmq", "server_address": "localhost", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30}}' \
|
||||
--teleop.type=unitree_g1 \
|
||||
--teleop.left_arm_config.port=/dev/ttyACM1 \
|
||||
--teleop.right_arm_config.port=/dev/ttyACM0 \
|
||||
--teleop.id=exo \
|
||||
--dataset.repo_id=your-username/dataset-name \
|
||||
--dataset.single_task="Test" \
|
||||
--dataset.num_episodes=2 \
|
||||
--dataset.episode_time_s=5 \
|
||||
--dataset.reset_time_s=5 \
|
||||
--dataset.push_to_hub=true \
|
||||
--dataset.streaming_encoding=true \
|
||||
--dataset.encoder_threads=2
|
||||
python src/lerobot/robots/unitree_g1/run_g1_server.py
|
||||
```
|
||||
|
||||
> **Note:** Omit `--teleop.left_arm_config.port` and `--teleop.right_arm_config.port` if you're only using the joystick.
|
||||
|
||||
Example dataset: [nepyope/unitree_box_move_blue_full](https://huggingface.co/datasets/nepyope/unitree_box_move_blue_full)
|
||||
**Important**: Keep this terminal running. The server must be active for remote control.
|
||||
|
||||
---
|
||||
|
||||
## Part 4: Training & Inference
|
||||
## Part 4: Controlling the robot
|
||||
|
||||
### Train
|
||||
With the robot server running, you can now control the robot remotely. Let's launch a locomotion policy
|
||||
|
||||
### Step 1: Install LeRobot on your machine
|
||||
|
||||
```bash
|
||||
python src/lerobot/scripts/lerobot_train.py \
|
||||
--dataset.repo_id=your-username/dataset-name \
|
||||
--policy.type=pi05 \
|
||||
--output_dir=./outputs/pi05_training \
|
||||
--job_name=pi05_training \
|
||||
--policy.repo_id=your-username/your-repo-id \
|
||||
--policy.pretrained_path=lerobot/pi05_base \
|
||||
--policy.compile_model=true \
|
||||
--policy.gradient_checkpointing=true \
|
||||
--wandb.enable=true \
|
||||
--policy.dtype=bfloat16 \
|
||||
--policy.freeze_vision_encoder=false \
|
||||
--policy.train_expert_only=false \
|
||||
--steps=3000 \
|
||||
--policy.device=cuda \
|
||||
--batch_size=32
|
||||
conda create -y -n lerobot python=3.10
|
||||
conda activate lerobot
|
||||
git clone https://github.com/huggingface/lerobot.git
|
||||
cd lerobot
|
||||
pip install -e '.[unitree_g1]'
|
||||
git clone https://github.com/unitreerobotics/unitree_sdk2_python.git
|
||||
cd unitree_sdk2_python && pip install -e .
|
||||
```
|
||||
|
||||
### Inference with RTC
|
||||
### Step 2: Update Robot IP in Config
|
||||
|
||||
Once trained, we recommend deploying policies using inference-time RTC:
|
||||
Edit the config file to match your robot's WiFi IP:
|
||||
|
||||
```python
|
||||
# In src/lerobot/robots/unitree_g1/config_unitree_g1.py
|
||||
robot_ip: str = "<YOUR_ROBOT_IP>" # Replace with your robot's WiFi IP.
|
||||
```
|
||||
|
||||
### Step 3: Run the Locomotion Policy
|
||||
|
||||
```bash
|
||||
python examples/rtc/eval_with_real_robot.py \
|
||||
--policy.path=your-username/your-repo-id \
|
||||
--policy.device=cuda \
|
||||
--robot.type=unitree_g1 \
|
||||
--robot.is_simulation=false \
|
||||
--robot.controller=HolosomaLocomotionController \
|
||||
--robot.cameras='{"global_view": {"type": "zmq", "server_address": "<ROBOT_IP>", "port": 5555, "camera_name": "head_camera", "width": 640, "height": 480, "fps": 30}}' \
|
||||
--task="task_description" \
|
||||
--duration=1000 \
|
||||
--fps=30 \
|
||||
--rtc.enabled=true
|
||||
# Run GR00T locomotion controller
|
||||
python examples/unitree_g1/gr00t_locomotion.py --repo-id "nepyope/GR00T-WholeBodyControl_g1"
|
||||
|
||||
# Run Holosoma locomotion controller
|
||||
python examples/unitree_g1/holosoma_locomotion.py
|
||||
|
||||
```
|
||||
|
||||
Press `Ctrl+C` to stop the policy.
|
||||
|
||||
---
|
||||
|
||||
## Running in Simulation Mode (MuJoCo)
|
||||
|
||||
You can now test policies before unleashing them on the physical robot using MuJoCo. To do so simply set `is_simulation=True` in config.
|
||||
|
||||
## Additional Resources
|
||||
|
||||
- [Unitree SDK Documentation](https://github.com/unitreerobotics/unitree_sdk2_python)
|
||||
- [GR00T-WholeBodyControl](https://github.com/NVlabs/GR00T-WholeBodyControl)
|
||||
- [Holosoma](https://github.com/amazon-far/holosoma)
|
||||
- [LeRobot Documentation](https://github.com/huggingface/lerobot)
|
||||
- [Unitree IL LeRobot](https://github.com/unitreerobotics/unitree_IL_lerobot)
|
||||
- [Unitree_IL_Lerobot](https://github.com/unitreerobotics/unitree_IL_lerobot)
|
||||
|
||||
---
|
||||
|
||||
_Last updated: March 2026_
|
||||
_Last updated: December 2025_
|
||||
|
||||
@@ -12,7 +12,6 @@ LeRobot provides several utilities for manipulating datasets:
|
||||
4. **Add Features** - Add new features to a dataset
|
||||
5. **Remove Features** - Remove features from a dataset
|
||||
6. **Convert to Video** - Convert image-based datasets to video format for efficient storage
|
||||
7. **Show the Info of Datasets** - Show the summary of datasets information such as number of episode etc.
|
||||
|
||||
The core implementation is in `lerobot.datasets.dataset_tools`.
|
||||
An example script detailing how to use the tools API is available in `examples/dataset/use_dataset_tools.py`.
|
||||
@@ -96,26 +95,26 @@ Convert an image-based dataset to video format, creating a new LeRobotDataset wh
|
||||
# Local-only: Save to a custom output directory (no hub push)
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht_image \
|
||||
--operation.type convert_image_to_video \
|
||||
--operation.type convert_to_video \
|
||||
--operation.output_dir /path/to/output/pusht_video
|
||||
|
||||
# Save with new repo_id (local storage)
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht_image \
|
||||
--new_repo_id lerobot/pusht_video \
|
||||
--operation.type convert_image_to_video
|
||||
--operation.type convert_to_video
|
||||
|
||||
# Convert and push to Hugging Face Hub
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht_image \
|
||||
--new_repo_id lerobot/pusht_video \
|
||||
--operation.type convert_image_to_video \
|
||||
--operation.type convert_to_video \
|
||||
--push_to_hub true
|
||||
|
||||
# Convert with custom video codec and quality settings
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht_image \
|
||||
--operation.type convert_image_to_video \
|
||||
--operation.type convert_to_video \
|
||||
--operation.output_dir outputs/pusht_video \
|
||||
--operation.vcodec libsvtav1 \
|
||||
--operation.pix_fmt yuv420p \
|
||||
@@ -125,23 +124,16 @@ lerobot-edit-dataset \
|
||||
# Convert only specific episodes
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht_image \
|
||||
--operation.type convert_image_to_video \
|
||||
--operation.type convert_to_video \
|
||||
--operation.output_dir outputs/pusht_video \
|
||||
--operation.episode_indices "[0, 1, 2, 5, 10]"
|
||||
|
||||
# Convert with multiple workers for parallel processing
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht_image \
|
||||
--operation.type convert_image_to_video \
|
||||
--operation.type convert_to_video \
|
||||
--operation.output_dir outputs/pusht_video \
|
||||
--operation.num_workers 8
|
||||
|
||||
# For memory-constrained systems, users can now specify limits:
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht_image \
|
||||
--operation.type convert_to_video \
|
||||
--operation.max_episodes_per_batch 50 \
|
||||
--operation.max_frames_per_batch 10000
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
@@ -157,30 +149,6 @@ lerobot-edit-dataset \
|
||||
|
||||
**Note:** The resulting dataset will be a proper LeRobotDataset with all cameras encoded as videos in the `videos/` directory, with parquet files containing only metadata (no raw image data). All episodes, stats, and tasks are preserved.
|
||||
|
||||
### Show the information of datasets
|
||||
|
||||
Show the information of datasets such as number of episode, number of frame, File size and so on.
|
||||
No change will be made to the dataset
|
||||
|
||||
```bash
|
||||
|
||||
# Show dataset information without feature details
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht_image \
|
||||
--operation.type info \
|
||||
|
||||
# Show dataset information with feature details
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht_image \
|
||||
--operation.type info \
|
||||
--operation.show_features true
|
||||
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
|
||||
- `parameters`: The flag to control show or no show dataset information with feature details.(default=false)
|
||||
|
||||
### Push to Hub
|
||||
|
||||
Add the `--push_to_hub true` flag to any command to automatically upload the resulting dataset to the Hugging Face Hub:
|
||||
|
||||
@@ -45,7 +45,7 @@ policy.type=wall_x
|
||||
For training WallX, you can use the standard LeRobot training script with the appropriate configuration:
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
python src/lerobot/scripts/lerobot_train.py \
|
||||
--dataset.repo_id=your_dataset \
|
||||
--policy.type=wall_x \
|
||||
--output_dir=./outputs/wallx_training \
|
||||
|
||||
@@ -154,7 +154,7 @@ lerobot-train \
|
||||
|
||||
```bash
|
||||
lerobot-train \
|
||||
--dataset.repo_id=<USER>/bimanual-so100-handover-cube \
|
||||
--dataset.repo_id=pepijn223/bimanual-so100-handover-cube \
|
||||
--output_dir=./outputs/xvla_bimanual \
|
||||
--job_name=xvla_so101_training \
|
||||
--policy.path="lerobot/xvla-base" \
|
||||
|
||||
@@ -22,7 +22,7 @@ lerobot-replay \
|
||||
--robot.type=so100_follower \
|
||||
--robot.port=/dev/tty.usbmodem58760431541 \
|
||||
--robot.id=black \
|
||||
--dataset.repo_id=<USER>/record-test \
|
||||
--dataset.repo_id=aliberts/record-test \
|
||||
--dataset.episode=2
|
||||
```
|
||||
"""
|
||||
@@ -57,7 +57,7 @@ class DatasetReplayConfig:
|
||||
repo_id: str
|
||||
# Episode to replay.
|
||||
episode: int
|
||||
# Root directory where the dataset will be stored (e.g. 'dataset/path'). If None, defaults to $HF_LEROBOT_HOME/repo_id.
|
||||
# Root directory where the dataset will be stored (e.g. 'dataset/path').
|
||||
root: str | Path | None = None
|
||||
# Limit the frames per second. By default, uses the policy fps.
|
||||
fps: int = 30
|
||||
@@ -78,28 +78,27 @@ def replay(cfg: ReplayConfig):
|
||||
|
||||
robot = make_robot_from_config(cfg.robot)
|
||||
dataset = LeRobotDataset(cfg.dataset.repo_id, root=cfg.dataset.root, episodes=[cfg.dataset.episode])
|
||||
actions = dataset.select_columns(ACTION)
|
||||
actions = dataset.hf_dataset.select_columns(ACTION)
|
||||
robot.connect()
|
||||
|
||||
try:
|
||||
log_say("Replaying episode", cfg.play_sounds, blocking=True)
|
||||
for idx in range(dataset.num_frames):
|
||||
start_episode_t = time.perf_counter()
|
||||
log_say("Replaying episode", cfg.play_sounds, blocking=True)
|
||||
for idx in range(dataset.num_frames):
|
||||
start_episode_t = time.perf_counter()
|
||||
|
||||
action_array = actions[idx][ACTION]
|
||||
action = {}
|
||||
for i, name in enumerate(dataset.features[ACTION]["names"]):
|
||||
key = f"{name.removeprefix('main_')}.pos"
|
||||
action[key] = action_array[i].item()
|
||||
action_array = actions[idx][ACTION]
|
||||
action = {}
|
||||
for i, name in enumerate(dataset.features[ACTION]["names"]):
|
||||
key = f"{name.removeprefix('main_')}.pos"
|
||||
action[key] = action_array[i].item()
|
||||
|
||||
action["shoulder_lift.pos"] = -(action["shoulder_lift.pos"] - 90)
|
||||
action["elbow_flex.pos"] -= 90
|
||||
robot.send_action(action)
|
||||
action["shoulder_lift.pos"] = -(action["shoulder_lift.pos"] - 90)
|
||||
action["elbow_flex.pos"] -= 90
|
||||
robot.send_action(action)
|
||||
|
||||
dt_s = time.perf_counter() - start_episode_t
|
||||
precise_sleep(max(1 / dataset.fps - dt_s, 0.0))
|
||||
finally:
|
||||
robot.disconnect()
|
||||
dt_s = time.perf_counter() - start_episode_t
|
||||
precise_sleep(max(1 / dataset.fps - dt_s, 0.0))
|
||||
|
||||
robot.disconnect()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,680 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Create MP4 (or GIF) videos with sarm_progress overlay for specified episodes.
|
||||
|
||||
Downloads datasets from HuggingFace, seeks directly into the episode segment
|
||||
of the source video, draws a progress line on each frame, and writes the result.
|
||||
|
||||
Usage:
|
||||
python examples/dataset/create_progress_videos.py \
|
||||
--repo-id lerobot-data-collection/level2_final_quality3 \
|
||||
--episode 1100
|
||||
|
||||
python examples/dataset/create_progress_videos.py \
|
||||
--repo-id lerobot-data-collection/level2_final_quality3 \
|
||||
--episode 1100 \
|
||||
--camera-key observation.images.top \
|
||||
--output-dir ./my_videos \
|
||||
--gif
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
GRAPH_Y_TOP_FRAC = 0.01
|
||||
GRAPH_Y_BOT_FRAC = 0.99
|
||||
LINE_THICKNESS = 3
|
||||
SHADOW_THICKNESS = 6
|
||||
REF_ALPHA = 0.45
|
||||
FILL_ALPHA = 0.55
|
||||
SCORE_FONT_SCALE = 0.8
|
||||
TASK_FONT_SCALE = 0.55
|
||||
|
||||
|
||||
def download_episode_metadata(repo_id: str, episode: int) -> Path:
|
||||
"""Download only the metadata and sarm_progress files for a dataset.
|
||||
|
||||
Args:
|
||||
repo_id: HuggingFace dataset repository ID.
|
||||
episode: Episode index (used for logging only; all meta is fetched).
|
||||
|
||||
Returns:
|
||||
Local cache path for the downloaded snapshot.
|
||||
"""
|
||||
logging.info("[1/4] Downloading metadata for %s (episode %d) ...", repo_id, episode)
|
||||
local_path = Path(
|
||||
snapshot_download(
|
||||
repo_id=repo_id,
|
||||
repo_type="dataset",
|
||||
allow_patterns=["meta/**", "sarm_progress.parquet"],
|
||||
ignore_patterns=["*.mp4"],
|
||||
)
|
||||
)
|
||||
return local_path
|
||||
|
||||
|
||||
def load_episode_meta(local_path: Path, episode: int, camera_key: str | None) -> dict:
|
||||
"""Read info.json and episode parquet to resolve fps, video path, and timestamps.
|
||||
|
||||
Args:
|
||||
local_path: Local cache directory containing meta/.
|
||||
episode: Episode index to look up.
|
||||
camera_key: Camera observation key (e.g. "observation.images.base").
|
||||
If None, the first available video key is used.
|
||||
|
||||
Returns:
|
||||
Dict with keys: fps, camera, video_rel, chunk_index, file_index,
|
||||
from_ts, to_ts, task_name.
|
||||
"""
|
||||
info = json.loads((local_path / "meta" / "info.json").read_text())
|
||||
fps = info["fps"]
|
||||
features = info["features"]
|
||||
|
||||
video_keys = [k for k, v in features.items() if v.get("dtype") == "video"]
|
||||
if not video_keys:
|
||||
raise RuntimeError("No video keys found in dataset features")
|
||||
|
||||
if camera_key is not None:
|
||||
if camera_key not in video_keys:
|
||||
raise RuntimeError(f"camera_key='{camera_key}' not found. Available: {video_keys}")
|
||||
selected_camera = camera_key
|
||||
else:
|
||||
selected_camera = video_keys[0]
|
||||
logging.info(" fps=%d camera='%s' all_cams=%s", fps, selected_camera, video_keys)
|
||||
|
||||
episode_rows = []
|
||||
for parquet_file in sorted((local_path / "meta" / "episodes").glob("**/*.parquet")):
|
||||
episode_rows.append(pd.read_parquet(parquet_file))
|
||||
episode_df = pd.concat(episode_rows, ignore_index=True)
|
||||
row = episode_df[episode_df["episode_index"] == episode]
|
||||
if row.empty:
|
||||
raise RuntimeError(f"Episode {episode} not found in episode metadata")
|
||||
row = row.iloc[0]
|
||||
|
||||
chunk_col = f"videos/{selected_camera}/chunk_index"
|
||||
file_col = f"videos/{selected_camera}/file_index"
|
||||
ts_from_col = f"videos/{selected_camera}/from_timestamp"
|
||||
ts_to_col = f"videos/{selected_camera}/to_timestamp"
|
||||
|
||||
if chunk_col not in row.index:
|
||||
chunk_col = f"{selected_camera}/chunk_index"
|
||||
file_col = f"{selected_camera}/file_index"
|
||||
ts_from_col = f"{selected_camera}/from_timestamp"
|
||||
ts_to_col = f"{selected_camera}/to_timestamp"
|
||||
if chunk_col not in row.index:
|
||||
raise RuntimeError(
|
||||
f"Cannot find video metadata columns for {selected_camera}.\nAvailable: {list(row.index)}"
|
||||
)
|
||||
|
||||
chunk_index = int(row[chunk_col])
|
||||
file_index = int(row[file_col])
|
||||
from_timestamp = float(row[ts_from_col])
|
||||
to_timestamp = float(row[ts_to_col])
|
||||
|
||||
video_template = info.get(
|
||||
"video_path", "videos/{video_key}/chunk-{chunk_index:03d}/file-{file_index:03d}.mp4"
|
||||
)
|
||||
video_rel = video_template.format(
|
||||
video_key=selected_camera,
|
||||
chunk_index=chunk_index,
|
||||
file_index=file_index,
|
||||
)
|
||||
|
||||
task_name = _resolve_task_name(row, local_path)
|
||||
|
||||
return {
|
||||
"fps": fps,
|
||||
"camera": selected_camera,
|
||||
"video_rel": video_rel,
|
||||
"chunk_index": chunk_index,
|
||||
"file_index": file_index,
|
||||
"from_ts": from_timestamp,
|
||||
"to_ts": to_timestamp,
|
||||
"task_name": task_name,
|
||||
}
|
||||
|
||||
|
||||
def _resolve_task_name(row: pd.Series, local_path: Path) -> str:
|
||||
"""Best-effort extraction of the task name for an episode row.
|
||||
|
||||
Args:
|
||||
row: Single-episode row from the episodes parquet.
|
||||
local_path: Dataset cache root.
|
||||
|
||||
Returns:
|
||||
Task name string, or empty string if unavailable.
|
||||
"""
|
||||
try:
|
||||
if "tasks" in row.index and row["tasks"] is not None:
|
||||
tasks_val = row["tasks"]
|
||||
if isinstance(tasks_val, (list, tuple, np.ndarray)) and len(tasks_val) > 0:
|
||||
return str(tasks_val[0])
|
||||
return str(tasks_val).strip("[]'")
|
||||
|
||||
tasks_parquet = local_path / "meta" / "tasks.parquet"
|
||||
if tasks_parquet.exists():
|
||||
tasks_df = pd.read_parquet(tasks_parquet)
|
||||
task_idx = int(row.get("task_index", 0)) if "task_index" in row.index else 0
|
||||
match = tasks_df[tasks_df["task_index"] == task_idx]
|
||||
if not match.empty:
|
||||
return str(match.index[0])
|
||||
except Exception as exc:
|
||||
logging.warning("Could not load task name: %s", exc)
|
||||
return ""
|
||||
|
||||
|
||||
def download_video_file(repo_id: str, local_path: Path, video_rel: str) -> Path:
|
||||
"""Download the specific video file if not already cached.
|
||||
|
||||
Args:
|
||||
repo_id: HuggingFace dataset repository ID.
|
||||
local_path: Local cache directory.
|
||||
video_rel: Relative path to the video file within the dataset.
|
||||
|
||||
Returns:
|
||||
Absolute path to the downloaded video file.
|
||||
"""
|
||||
video_path = local_path / video_rel
|
||||
if video_path.exists():
|
||||
logging.info(" Video already cached: %s", video_path)
|
||||
return video_path
|
||||
logging.info("[2/4] Downloading video file %s ...", video_rel)
|
||||
snapshot_download(
|
||||
repo_id=repo_id,
|
||||
repo_type="dataset",
|
||||
local_dir=str(local_path),
|
||||
allow_patterns=[video_rel],
|
||||
)
|
||||
if not video_path.exists():
|
||||
raise RuntimeError(f"Video not found after download: {video_path}")
|
||||
return video_path
|
||||
|
||||
|
||||
def load_progress_data(local_path: Path, episode: int) -> np.ndarray | None:
|
||||
"""Load sarm_progress values for an episode.
|
||||
|
||||
Args:
|
||||
local_path: Dataset cache root.
|
||||
episode: Episode index.
|
||||
|
||||
Returns:
|
||||
Sorted (N, 2) array of (frame_index, progress), or None if unavailable.
|
||||
"""
|
||||
parquet_path = local_path / "sarm_progress.parquet"
|
||||
if not parquet_path.exists():
|
||||
logging.warning("sarm_progress.parquet not found")
|
||||
return None
|
||||
df = pd.read_parquet(parquet_path)
|
||||
logging.info(" sarm_progress.parquet columns: %s", list(df.columns))
|
||||
episode_df = df[df["episode_index"] == episode].copy()
|
||||
if episode_df.empty:
|
||||
logging.warning("No sarm_progress rows for episode %d", episode)
|
||||
return None
|
||||
episode_df = episode_df.sort_values("frame_index")
|
||||
|
||||
if "progress_dense" in episode_df.columns and episode_df["progress_dense"].notna().any():
|
||||
progress_column = "progress_dense"
|
||||
elif "progress_sparse" in episode_df.columns:
|
||||
progress_column = "progress_sparse"
|
||||
else:
|
||||
progress_columns = [c for c in episode_df.columns if "progress" in c.lower()]
|
||||
if not progress_columns:
|
||||
return None
|
||||
progress_column = progress_columns[0]
|
||||
|
||||
logging.info(" Using progress column: '%s'", progress_column)
|
||||
return episode_df[["frame_index", progress_column]].rename(columns={progress_column: "progress"}).values
|
||||
|
||||
|
||||
def _precompute_pixel_coords(
|
||||
progress_data: np.ndarray,
|
||||
num_frames: int,
|
||||
frame_width: int,
|
||||
frame_height: int,
|
||||
) -> np.ndarray:
|
||||
"""Map progress samples to pixel coordinates for overlay drawing.
|
||||
|
||||
Args:
|
||||
progress_data: (N, 2) array of (frame_index, progress).
|
||||
num_frames: Total number of video frames.
|
||||
frame_width: Video width in pixels.
|
||||
frame_height: Video height in pixels.
|
||||
|
||||
Returns:
|
||||
(N, 2) array of (x, y) pixel coordinates.
|
||||
"""
|
||||
frame_indices = progress_data[:, 0].astype(float)
|
||||
progress_values = np.clip(progress_data[:, 1].astype(float), 0.0, 1.0)
|
||||
|
||||
y_top = int(frame_height * GRAPH_Y_TOP_FRAC)
|
||||
y_bot = int(frame_height * GRAPH_Y_BOT_FRAC)
|
||||
graph_height = y_bot - y_top
|
||||
|
||||
x_coords = (frame_indices / (num_frames - 1) * (frame_width - 1)).astype(int)
|
||||
y_coords = (y_bot - progress_values * graph_height).astype(int)
|
||||
|
||||
return np.stack([x_coords, y_coords], axis=1)
|
||||
|
||||
|
||||
def _progress_color(normalized_position: float) -> tuple[int, int, int]:
|
||||
"""Interpolate BGR color from red to green based on position in [0, 1].
|
||||
|
||||
Args:
|
||||
normalized_position: Value in [0, 1] indicating how far along the episode.
|
||||
|
||||
Returns:
|
||||
BGR color tuple.
|
||||
"""
|
||||
red = int(255 * (1.0 - normalized_position))
|
||||
green = int(255 * normalized_position)
|
||||
return (0, green, red)
|
||||
|
||||
|
||||
def _prerender_fill_polygon(
|
||||
pixel_coords: np.ndarray,
|
||||
frame_width: int,
|
||||
frame_height: int,
|
||||
) -> np.ndarray:
|
||||
"""Pre-render the grey fill polygon under the progress curve as a BGRA image.
|
||||
|
||||
Args:
|
||||
pixel_coords: (N, 2) array of (x, y) pixel coordinates.
|
||||
frame_width: Video width in pixels.
|
||||
frame_height: Video height in pixels.
|
||||
|
||||
Returns:
|
||||
BGRA image array of shape (frame_height, frame_width, 4).
|
||||
"""
|
||||
y_bot = int(frame_height * GRAPH_Y_BOT_FRAC)
|
||||
fill_image = np.zeros((frame_height, frame_width, 4), dtype=np.uint8)
|
||||
polygon = np.concatenate(
|
||||
[
|
||||
pixel_coords,
|
||||
[[pixel_coords[-1][0], y_bot], [pixel_coords[0][0], y_bot]],
|
||||
],
|
||||
axis=0,
|
||||
).astype(np.int32)
|
||||
cv2.fillPoly(fill_image, [polygon], color=(128, 128, 128, int(255 * FILL_ALPHA)))
|
||||
return fill_image
|
||||
|
||||
|
||||
def _alpha_composite_region(base: np.ndarray, overlay_bgra: np.ndarray, x_limit: int) -> None:
|
||||
"""Blend BGRA overlay onto BGR base in-place, up to x_limit columns.
|
||||
|
||||
Args:
|
||||
base: BGR frame to draw on (modified in-place).
|
||||
overlay_bgra: BGRA overlay image.
|
||||
x_limit: Only blend columns [0, x_limit).
|
||||
"""
|
||||
if x_limit <= 0:
|
||||
return
|
||||
region_base = base[:, :x_limit]
|
||||
region_overlay = overlay_bgra[:, :x_limit]
|
||||
alpha = region_overlay[:, :, 3:4].astype(np.float32) / 255.0
|
||||
region_base[:] = np.clip(
|
||||
region_overlay[:, :, :3].astype(np.float32) * alpha + region_base.astype(np.float32) * (1.0 - alpha),
|
||||
0,
|
||||
255,
|
||||
).astype(np.uint8)
|
||||
|
||||
|
||||
def _draw_text_outlined(
|
||||
frame: np.ndarray,
|
||||
text: str,
|
||||
position: tuple[int, int],
|
||||
font_scale: float,
|
||||
thickness: int = 1,
|
||||
) -> None:
|
||||
"""Draw white text with a dark outline for readability on any background.
|
||||
|
||||
Args:
|
||||
frame: BGR image to draw on (modified in-place).
|
||||
text: String to render.
|
||||
position: (x, y) bottom-left corner of the text.
|
||||
font_scale: OpenCV font scale.
|
||||
thickness: Text stroke thickness.
|
||||
"""
|
||||
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||
cv2.putText(frame, text, position, font, font_scale, (0, 0, 0), thickness + 2, cv2.LINE_AA)
|
||||
cv2.putText(frame, text, position, font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)
|
||||
|
||||
|
||||
def composite_progress_video(
|
||||
video_path: Path,
|
||||
from_timestamp: float,
|
||||
to_timestamp: float,
|
||||
progress_data: np.ndarray,
|
||||
output_path: Path,
|
||||
fps: float,
|
||||
task_name: str = "",
|
||||
) -> Path:
|
||||
"""Read episode frames by seeking into the source video, draw progress overlay, write output.
|
||||
|
||||
Uses cv2.CAP_PROP_POS_MSEC to seek directly into the source video,
|
||||
eliminating the need for an intermediate clip file.
|
||||
|
||||
Args:
|
||||
video_path: Path to the full source video file.
|
||||
from_timestamp: Start timestamp of the episode in seconds.
|
||||
to_timestamp: End timestamp of the episode in seconds.
|
||||
progress_data: (N, 2) array of (frame_index, progress).
|
||||
output_path: Path to write the output MP4.
|
||||
fps: Frames per second for the output video.
|
||||
task_name: Optional task name to display at the top of the video.
|
||||
|
||||
Returns:
|
||||
Path to the written output file (MP4).
|
||||
"""
|
||||
capture = cv2.VideoCapture(str(video_path))
|
||||
try:
|
||||
capture.set(cv2.CAP_PROP_POS_MSEC, from_timestamp * 1000)
|
||||
|
||||
frame_width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
frame_height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
duration_seconds = to_timestamp - from_timestamp
|
||||
num_frames = int(round(duration_seconds * fps))
|
||||
|
||||
logging.info(
|
||||
" Video: %dx%d, %d frames @ %.1f fps (%.2fs)",
|
||||
frame_width,
|
||||
frame_height,
|
||||
num_frames,
|
||||
fps,
|
||||
duration_seconds,
|
||||
)
|
||||
|
||||
pixel_coords = _precompute_pixel_coords(progress_data, num_frames, frame_width, frame_height)
|
||||
y_ref = int(frame_height * GRAPH_Y_TOP_FRAC)
|
||||
|
||||
fill_image = _prerender_fill_polygon(pixel_coords, frame_width, frame_height)
|
||||
|
||||
ref_line_image = np.zeros((frame_height, frame_width, 4), dtype=np.uint8)
|
||||
cv2.line(
|
||||
ref_line_image,
|
||||
(0, y_ref),
|
||||
(frame_width - 1, y_ref),
|
||||
(200, 200, 200, int(255 * REF_ALPHA)),
|
||||
1,
|
||||
cv2.LINE_AA,
|
||||
)
|
||||
|
||||
frame_indices = progress_data[:, 0].astype(int)
|
||||
progress_values = progress_data[:, 1].astype(float)
|
||||
|
||||
logging.info("[3/4] Compositing %d frames ...", num_frames)
|
||||
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
||||
writer = cv2.VideoWriter(str(output_path), fourcc, fps, (frame_width, frame_height))
|
||||
|
||||
for frame_idx in range(num_frames):
|
||||
ret, frame = capture.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
drawn_count = int(np.searchsorted(frame_indices, frame_idx, side="right"))
|
||||
x_current = (
|
||||
int(pixel_coords[min(drawn_count, len(pixel_coords)) - 1][0]) + 1 if drawn_count > 0 else 0
|
||||
)
|
||||
|
||||
_alpha_composite_region(frame, ref_line_image, frame_width)
|
||||
_alpha_composite_region(frame, fill_image, x_current)
|
||||
|
||||
if drawn_count >= 2:
|
||||
time_position = (drawn_count - 1) / max(len(progress_values) - 1, 1)
|
||||
line_color = _progress_color(time_position)
|
||||
points = pixel_coords[:drawn_count].reshape(-1, 1, 2).astype(np.int32)
|
||||
cv2.polylines(
|
||||
frame,
|
||||
[points],
|
||||
isClosed=False,
|
||||
color=(255, 255, 255),
|
||||
thickness=SHADOW_THICKNESS,
|
||||
lineType=cv2.LINE_AA,
|
||||
)
|
||||
cv2.polylines(
|
||||
frame,
|
||||
[points],
|
||||
isClosed=False,
|
||||
color=line_color,
|
||||
thickness=LINE_THICKNESS,
|
||||
lineType=cv2.LINE_AA,
|
||||
)
|
||||
|
||||
if drawn_count > 0:
|
||||
score = float(progress_values[min(drawn_count, len(progress_values)) - 1])
|
||||
score_text = f"{score:.2f}"
|
||||
(text_width, _), _ = cv2.getTextSize(
|
||||
score_text, cv2.FONT_HERSHEY_SIMPLEX, SCORE_FONT_SCALE, 2
|
||||
)
|
||||
score_x = frame_width - text_width - 12
|
||||
score_y = frame_height - 12
|
||||
time_position = (drawn_count - 1) / max(len(progress_values) - 1, 1)
|
||||
score_color = _progress_color(time_position)
|
||||
cv2.putText(
|
||||
frame,
|
||||
score_text,
|
||||
(score_x, score_y),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
SCORE_FONT_SCALE,
|
||||
(0, 0, 0),
|
||||
4,
|
||||
cv2.LINE_AA,
|
||||
)
|
||||
cv2.putText(
|
||||
frame,
|
||||
score_text,
|
||||
(score_x, score_y),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
SCORE_FONT_SCALE,
|
||||
score_color,
|
||||
2,
|
||||
cv2.LINE_AA,
|
||||
)
|
||||
|
||||
if task_name:
|
||||
(text_width, _), _ = cv2.getTextSize(task_name, cv2.FONT_HERSHEY_SIMPLEX, TASK_FONT_SCALE, 1)
|
||||
task_x = max((frame_width - text_width) // 2, 4)
|
||||
_draw_text_outlined(frame, task_name, (task_x, 22), TASK_FONT_SCALE)
|
||||
|
||||
writer.write(frame)
|
||||
if frame_idx % 100 == 0:
|
||||
logging.info(" Frame %d/%d ...", frame_idx, num_frames)
|
||||
|
||||
writer.release()
|
||||
finally:
|
||||
capture.release()
|
||||
|
||||
logging.info(" MP4 written: %s", output_path)
|
||||
return output_path
|
||||
|
||||
|
||||
def convert_mp4_to_gif(mp4_path: Path) -> Path:
|
||||
"""Convert an MP4 to an optimized GIF using ffmpeg palette generation.
|
||||
|
||||
Args:
|
||||
mp4_path: Path to the source MP4 file.
|
||||
|
||||
Returns:
|
||||
Path to the generated GIF file.
|
||||
"""
|
||||
capture = cv2.VideoCapture(str(mp4_path))
|
||||
frame_width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
capture.release()
|
||||
|
||||
gif_path = mp4_path.with_suffix(".gif")
|
||||
palette_path = mp4_path.parent / "_palette.png"
|
||||
|
||||
logging.info("[4/4] Converting to GIF ...")
|
||||
result_palette = subprocess.run( # nosec B607
|
||||
[
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-i",
|
||||
str(mp4_path),
|
||||
"-vf",
|
||||
f"fps=10,scale={frame_width}:-1:flags=lanczos,palettegen=max_colors=128:stats_mode=diff",
|
||||
"-update",
|
||||
"1",
|
||||
str(palette_path),
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result_palette.returncode != 0:
|
||||
logging.warning("palettegen failed:\n%s", result_palette.stderr[-500:])
|
||||
|
||||
result_gif = subprocess.run( # nosec B607
|
||||
[
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-i",
|
||||
str(mp4_path),
|
||||
"-i",
|
||||
str(palette_path),
|
||||
"-filter_complex",
|
||||
f"fps=10,scale={frame_width}:-1:flags=lanczos[v];[v][1:v]paletteuse=dither=bayer:bayer_scale=3",
|
||||
str(gif_path),
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result_gif.returncode != 0:
|
||||
logging.warning("GIF encode failed:\n%s", result_gif.stderr[-500:])
|
||||
|
||||
palette_path.unlink(missing_ok=True)
|
||||
logging.info(" GIF written: %s", gif_path)
|
||||
return gif_path
|
||||
|
||||
|
||||
def process_dataset(
|
||||
repo_id: str,
|
||||
episode: int,
|
||||
camera_key: str | None,
|
||||
output_dir: Path,
|
||||
create_gif: bool = False,
|
||||
) -> Path | None:
|
||||
"""Full pipeline: download, extract metadata, composite progress, write output.
|
||||
|
||||
Args:
|
||||
repo_id: HuggingFace dataset repository ID.
|
||||
episode: Episode index.
|
||||
camera_key: Camera key to use, or None for auto-selection.
|
||||
output_dir: Directory to write output files.
|
||||
create_gif: If True, also generate a GIF from the MP4.
|
||||
|
||||
Returns:
|
||||
Path to the final output file, or None on failure.
|
||||
"""
|
||||
safe_name = repo_id.replace("/", "_")
|
||||
logging.info("Processing: %s | episode %d", repo_id, episode)
|
||||
|
||||
local_path = download_episode_metadata(repo_id, episode)
|
||||
logging.info(" Local cache: %s", local_path)
|
||||
|
||||
episode_meta = load_episode_meta(local_path, episode, camera_key)
|
||||
logging.info(" Episode meta: %s", episode_meta)
|
||||
|
||||
video_path = download_video_file(repo_id, local_path, episode_meta["video_rel"])
|
||||
|
||||
progress_data = load_progress_data(local_path, episode)
|
||||
if progress_data is None:
|
||||
logging.error("Could not load sarm_progress data. Skipping overlay.")
|
||||
return None
|
||||
|
||||
logging.info(" Progress frames: %d", len(progress_data))
|
||||
|
||||
output_path = output_dir / f"{safe_name}_ep{episode}_progress.mp4"
|
||||
final_path = composite_progress_video(
|
||||
video_path=video_path,
|
||||
from_timestamp=episode_meta["from_ts"],
|
||||
to_timestamp=episode_meta["to_ts"],
|
||||
progress_data=progress_data,
|
||||
output_path=output_path,
|
||||
fps=episode_meta["fps"],
|
||||
task_name=episode_meta.get("task_name", ""),
|
||||
)
|
||||
|
||||
if create_gif:
|
||||
final_path = convert_mp4_to_gif(final_path)
|
||||
|
||||
logging.info("Done: %s", final_path)
|
||||
return final_path
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Create MP4/GIF videos with sarm_progress overlay for dataset episodes."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--repo-id",
|
||||
type=str,
|
||||
required=True,
|
||||
help="HuggingFace dataset repository ID (e.g. 'lerobot-data-collection/level2_final_quality3').",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--episode",
|
||||
type=int,
|
||||
required=True,
|
||||
help="Episode index to visualize.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--camera-key",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Camera observation key (e.g. 'observation.images.base'). Auto-selects first camera if omitted.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
type=Path,
|
||||
default=Path("progress_videos"),
|
||||
help="Directory to write output files (default: ./progress_videos).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gif",
|
||||
action="store_true",
|
||||
help="Also generate a GIF from the MP4 output.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
||||
|
||||
args.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
result = process_dataset(
|
||||
repo_id=args.repo_id,
|
||||
episode=args.episode,
|
||||
camera_key=args.camera_key,
|
||||
output_dir=args.output_dir,
|
||||
create_gif=args.gif,
|
||||
)
|
||||
|
||||
if result:
|
||||
logging.info("Output: %s", result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -32,8 +32,7 @@ import torch
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
import lerobot
|
||||
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
||||
|
||||
|
||||
def main():
|
||||
@@ -88,8 +87,9 @@ def main():
|
||||
# The previous metadata class is contained in the 'meta' attribute of the dataset:
|
||||
print(dataset.meta)
|
||||
|
||||
# You can inspect the dataset using its repr:
|
||||
print(dataset)
|
||||
# LeRobotDataset actually wraps an underlying Hugging Face dataset
|
||||
# (see https://huggingface.co/docs/datasets for more information).
|
||||
print(dataset.hf_dataset)
|
||||
|
||||
# LeRobot datasets also subclasses PyTorch datasets so you can do everything you know and love from working
|
||||
# with the latter, like iterating through the dataset.
|
||||
|
||||
@@ -1,490 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
SLURM-distributed SARM RA-BC annotation pipeline.
|
||||
|
||||
Computes SARM progress values for all frames in a dataset, distributed across
|
||||
SLURM workers, then merges the shards into a single sarm_progress.parquet.
|
||||
|
||||
Two subcommands, each a separate SLURM submission:
|
||||
|
||||
compute – N workers, each computes progress for a subset of episodes
|
||||
aggregate – 1 worker, merges N shards into sarm_progress.parquet, pushes to hub
|
||||
|
||||
Usage:
|
||||
python slurm_compute_rabc.py compute \\
|
||||
--repo-id user/dataset --reward-model-path user/sarm_model \\
|
||||
--stride 10 --device cpu --workers 50 --partition cpu
|
||||
|
||||
python slurm_compute_rabc.py aggregate \\
|
||||
--repo-id user/dataset --reward-model-path user/sarm_model \\
|
||||
--partition cpu --push-to-hub
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from datatrove.executor import LocalPipelineExecutor
|
||||
from datatrove.executor.slurm import SlurmPipelineExecutor
|
||||
from datatrove.pipeline.base import PipelineStep
|
||||
|
||||
|
||||
class ComputeProgressShards(PipelineStep):
|
||||
"""Each worker computes SARM progress for its assigned episodes."""
|
||||
|
||||
def __init__(
|
||||
self, repo_id, reward_model_path, stride=1, head_mode="sparse", device="cpu", shard_dir="rabc_shards"
|
||||
):
|
||||
super().__init__()
|
||||
if stride < 1:
|
||||
raise ValueError(f"stride must be >= 1, got {stride}")
|
||||
self.repo_id = repo_id
|
||||
self.reward_model_path = reward_model_path
|
||||
self.stride = stride
|
||||
self.head_mode = head_mode
|
||||
self.device = device
|
||||
self.shard_dir = shard_dir
|
||||
|
||||
def run(self, data=None, rank: int = 0, world_size: int = 1):
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
|
||||
from lerobot.policies.sarm.compute_rabc_weights import (
|
||||
generate_all_frame_indices,
|
||||
interpolate_progress,
|
||||
load_sarm_resources,
|
||||
)
|
||||
from lerobot.utils.utils import init_logging
|
||||
|
||||
init_logging()
|
||||
|
||||
dataset, reward_model, preprocess = load_sarm_resources(
|
||||
self.repo_id,
|
||||
self.reward_model_path,
|
||||
self.device,
|
||||
)
|
||||
|
||||
if hasattr(preprocess, "eval"):
|
||||
preprocess.eval()
|
||||
for step in preprocess.steps:
|
||||
if hasattr(step, "eval"):
|
||||
step.eval()
|
||||
|
||||
image_key = reward_model.config.image_key
|
||||
state_key = reward_model.config.state_key
|
||||
frame_gap = reward_model.config.frame_gap
|
||||
center_idx = reward_model.config.n_obs_steps // 2
|
||||
|
||||
dual_mode = reward_model.config.uses_dual_heads
|
||||
compute_sparse = self.head_mode in ("sparse", "both") or not dual_mode
|
||||
compute_dense = self.head_mode in ("dense", "both") and dual_mode
|
||||
|
||||
my_episodes = list(range(dataset.num_episodes))[rank::world_size]
|
||||
if not my_episodes:
|
||||
logging.info(f"Rank {rank}: no episodes assigned")
|
||||
return
|
||||
logging.info(f"Rank {rank}: {len(my_episodes)} / {dataset.num_episodes} episodes")
|
||||
|
||||
all_rows = []
|
||||
|
||||
for ep_idx in tqdm(my_episodes, desc=f"Rank {rank}"):
|
||||
ep = dataset.meta.episodes[ep_idx]
|
||||
ep_start, ep_end = ep["dataset_from_index"], ep["dataset_to_index"]
|
||||
task = dataset[ep_start].get("task", "perform the task")
|
||||
|
||||
all_ep_indices = generate_all_frame_indices(ep_start, ep_end, frame_gap)
|
||||
if self.stride > 1:
|
||||
compute_indices = [i for i in all_ep_indices if (i - ep_start) % self.stride == 0]
|
||||
if (ep_end - 1) not in compute_indices:
|
||||
compute_indices.append(ep_end - 1)
|
||||
compute_indices = sorted(set(compute_indices))
|
||||
else:
|
||||
compute_indices = all_ep_indices
|
||||
|
||||
frame_results = {}
|
||||
for qi in tqdm(compute_indices, desc=f" Ep {ep_idx}", leave=False):
|
||||
try:
|
||||
sample = dataset[qi]
|
||||
batch = {
|
||||
image_key: sample[image_key],
|
||||
"task": task,
|
||||
"index": qi,
|
||||
"episode_index": ep_idx,
|
||||
}
|
||||
if state_key in sample:
|
||||
batch[state_key] = sample[state_key]
|
||||
|
||||
with torch.no_grad():
|
||||
processed = preprocess(batch)
|
||||
vf = processed["video_features"].to(self.device)
|
||||
tf = processed["text_features"].to(self.device)
|
||||
sf = processed.get("state_features")
|
||||
if sf is not None:
|
||||
sf = sf.to(self.device)
|
||||
lengths = processed.get("lengths")
|
||||
|
||||
sparse_val = dense_val = np.nan
|
||||
if compute_sparse:
|
||||
r = reward_model.calculate_rewards(
|
||||
text_embeddings=tf,
|
||||
video_embeddings=vf,
|
||||
state_features=sf,
|
||||
lengths=lengths,
|
||||
return_all_frames=True,
|
||||
head_mode="sparse",
|
||||
)
|
||||
sparse_val = float(r[0, center_idx] if r.ndim == 2 else r[center_idx])
|
||||
if compute_dense:
|
||||
r = reward_model.calculate_rewards(
|
||||
text_embeddings=tf,
|
||||
video_embeddings=vf,
|
||||
state_features=sf,
|
||||
lengths=lengths,
|
||||
return_all_frames=True,
|
||||
head_mode="dense",
|
||||
)
|
||||
dense_val = float(r[0, center_idx] if r.ndim == 2 else r[center_idx])
|
||||
|
||||
frame_results[qi] = (sparse_val, dense_val)
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed frame {qi}: {e}")
|
||||
|
||||
if not frame_results:
|
||||
logging.warning(f"Episode {ep_idx}: all frames failed, skipping")
|
||||
continue
|
||||
|
||||
# Interpolate to all frames in this episode
|
||||
computed_idx = np.array(sorted(frame_results.keys()))
|
||||
all_frame_arr = np.arange(ep_start, ep_end)
|
||||
|
||||
sparse_vals = np.array([frame_results[i][0] for i in computed_idx]) if compute_sparse else None
|
||||
dense_vals = np.array([frame_results[i][1] for i in computed_idx]) if compute_dense else None
|
||||
|
||||
if self.stride > 1 and len(computed_idx) > 1:
|
||||
if compute_sparse:
|
||||
sparse_vals = interpolate_progress(computed_idx, sparse_vals, all_frame_arr)
|
||||
if compute_dense:
|
||||
dense_vals = interpolate_progress(computed_idx, dense_vals, all_frame_arr)
|
||||
output_frames = all_frame_arr
|
||||
else:
|
||||
# Use only successfully computed frames to avoid indexing mismatch on failures
|
||||
output_frames = computed_idx
|
||||
|
||||
for i, fi in enumerate(output_frames):
|
||||
row = {"index": int(fi), "episode_index": ep_idx, "frame_index": int(fi - ep_start)}
|
||||
if compute_sparse:
|
||||
row["progress_sparse"] = float(sparse_vals[i])
|
||||
if compute_dense:
|
||||
row["progress_dense"] = float(dense_vals[i])
|
||||
all_rows.append(row)
|
||||
|
||||
if all_rows:
|
||||
import pandas as pd
|
||||
|
||||
df = pd.DataFrame(all_rows).sort_values("index").reset_index(drop=True)
|
||||
table = pa.Table.from_pandas(df, preserve_index=False)
|
||||
table = table.replace_schema_metadata({b"reward_model_path": self.reward_model_path.encode()})
|
||||
shard_dir = Path(self.shard_dir)
|
||||
shard_dir.mkdir(parents=True, exist_ok=True)
|
||||
out = shard_dir / f"shard_{rank:05d}.parquet"
|
||||
pq.write_table(table, out)
|
||||
logging.info(f"Rank {rank}: saved {len(df)} rows to {out}")
|
||||
|
||||
|
||||
class AggregateProgress(PipelineStep):
|
||||
"""Merge all shard parquets into final sarm_progress.parquet."""
|
||||
|
||||
def __init__(self, repo_id, reward_model_path, shard_dir="rabc_shards", push_to_hub=False):
|
||||
super().__init__()
|
||||
self.repo_id = repo_id
|
||||
self.reward_model_path = reward_model_path
|
||||
self.shard_dir = shard_dir
|
||||
self.push_to_hub = push_to_hub
|
||||
|
||||
def run(self, data=None, rank: int = 0, world_size: int = 1):
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.utils.utils import init_logging
|
||||
|
||||
init_logging()
|
||||
if rank != 0:
|
||||
return
|
||||
|
||||
shard_dir = Path(self.shard_dir)
|
||||
shards = sorted(shard_dir.glob("shard_*.parquet"))
|
||||
if not shards:
|
||||
raise FileNotFoundError(f"No shards found in {shard_dir}")
|
||||
|
||||
# Log shard modification time range to help detect stale files
|
||||
mtimes = [os.path.getmtime(s) for s in shards]
|
||||
oldest = datetime.datetime.fromtimestamp(min(mtimes)).isoformat(timespec="seconds")
|
||||
newest = datetime.datetime.fromtimestamp(max(mtimes)).isoformat(timespec="seconds")
|
||||
logging.info(f"Aggregating {len(shards)} shards (oldest: {oldest}, newest: {newest})")
|
||||
|
||||
df = pd.concat([pd.read_parquet(s) for s in shards], ignore_index=True)
|
||||
df = df.sort_values("index").reset_index(drop=True)
|
||||
|
||||
table = pa.Table.from_pandas(df, preserve_index=False)
|
||||
table = table.replace_schema_metadata({b"reward_model_path": self.reward_model_path.encode()})
|
||||
|
||||
temp_ds = LeRobotDataset(self.repo_id, download_videos=False)
|
||||
out_path = Path(temp_ds.root) / "sarm_progress.parquet"
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
pq.write_table(table, out_path)
|
||||
logging.info(f"Saved {len(df)} rows to {out_path}")
|
||||
|
||||
for col in ["progress_sparse", "progress_dense"]:
|
||||
if col in df.columns:
|
||||
v = df[col].dropna()
|
||||
logging.info(
|
||||
f"{col}: mean={v.mean():.4f} std={v.std():.4f} min={v.min():.4f} max={v.max():.4f}"
|
||||
)
|
||||
|
||||
if self.push_to_hub:
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
api = HfApi()
|
||||
hub_path = "sarm_progress.parquet"
|
||||
logging.info(f"Uploading to {self.repo_id}/{hub_path}")
|
||||
api.upload_file(
|
||||
path_or_fileobj=str(out_path),
|
||||
path_in_repo=hub_path,
|
||||
repo_id=self.repo_id,
|
||||
repo_type="dataset",
|
||||
)
|
||||
logging.info(f"Uploaded: https://huggingface.co/datasets/{self.repo_id}/blob/main/{hub_path}")
|
||||
|
||||
|
||||
def make_compute_executor(
|
||||
repo_id,
|
||||
reward_model_path,
|
||||
stride,
|
||||
head_mode,
|
||||
device,
|
||||
shard_dir,
|
||||
logs_dir,
|
||||
job_name,
|
||||
slurm,
|
||||
workers,
|
||||
partition,
|
||||
cpus_per_task,
|
||||
mem_per_cpu,
|
||||
):
|
||||
kwargs = {
|
||||
"pipeline": [
|
||||
ComputeProgressShards(repo_id, reward_model_path, stride, head_mode, device, str(shard_dir)),
|
||||
],
|
||||
"logging_dir": str(logs_dir / job_name),
|
||||
}
|
||||
|
||||
if slurm:
|
||||
kwargs.update(
|
||||
{
|
||||
"job_name": job_name,
|
||||
"tasks": workers,
|
||||
"workers": workers,
|
||||
"time": "24:00:00",
|
||||
"partition": partition,
|
||||
"cpus_per_task": cpus_per_task,
|
||||
"sbatch_args": {"mem-per-cpu": mem_per_cpu},
|
||||
}
|
||||
)
|
||||
return SlurmPipelineExecutor(**kwargs)
|
||||
|
||||
kwargs.update({"tasks": workers, "workers": 1})
|
||||
return LocalPipelineExecutor(**kwargs)
|
||||
|
||||
|
||||
def make_aggregate_executor(
|
||||
repo_id,
|
||||
reward_model_path,
|
||||
shard_dir,
|
||||
logs_dir,
|
||||
job_name,
|
||||
slurm,
|
||||
partition,
|
||||
cpus_per_task,
|
||||
mem_per_cpu,
|
||||
push_to_hub,
|
||||
):
|
||||
kwargs = {
|
||||
"pipeline": [
|
||||
AggregateProgress(repo_id, reward_model_path, str(shard_dir), push_to_hub),
|
||||
],
|
||||
"logging_dir": str(logs_dir / job_name),
|
||||
}
|
||||
|
||||
if slurm:
|
||||
kwargs.update(
|
||||
{
|
||||
"job_name": job_name,
|
||||
"tasks": 1,
|
||||
"workers": 1,
|
||||
"time": "02:00:00",
|
||||
"partition": partition,
|
||||
"cpus_per_task": cpus_per_task,
|
||||
"sbatch_args": {"mem-per-cpu": mem_per_cpu},
|
||||
}
|
||||
)
|
||||
return SlurmPipelineExecutor(**kwargs)
|
||||
|
||||
kwargs.update({"tasks": 1, "workers": 1})
|
||||
return LocalPipelineExecutor(**kwargs)
|
||||
|
||||
|
||||
def _add_shared_args(p):
|
||||
p.add_argument(
|
||||
"--repo-id",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Hugging Face repository identifier, e.g. 'user/dataset'.",
|
||||
)
|
||||
p.add_argument(
|
||||
"--shard-dir",
|
||||
type=Path,
|
||||
default=Path("rabc_shards"),
|
||||
help="Directory to read/write per-rank parquet shards.",
|
||||
)
|
||||
p.add_argument(
|
||||
"--logs-dir",
|
||||
type=Path,
|
||||
default=Path("logs"),
|
||||
help="Directory for datatrove logs.",
|
||||
)
|
||||
p.add_argument(
|
||||
"--job-name",
|
||||
type=str,
|
||||
default=None,
|
||||
help="SLURM job name (defaults to rabc_<subcommand>).",
|
||||
)
|
||||
p.add_argument(
|
||||
"--slurm",
|
||||
type=int,
|
||||
default=1,
|
||||
help="1 = submit via SLURM; 0 = run locally (useful for debugging).",
|
||||
)
|
||||
p.add_argument(
|
||||
"--partition",
|
||||
type=str,
|
||||
default=None,
|
||||
help="SLURM partition to submit to.",
|
||||
)
|
||||
p.add_argument(
|
||||
"--cpus-per-task",
|
||||
type=int,
|
||||
default=4,
|
||||
help="Number of CPUs per SLURM task.",
|
||||
)
|
||||
p.add_argument(
|
||||
"--mem-per-cpu",
|
||||
type=str,
|
||||
default="4G",
|
||||
help="Memory per CPU, e.g. '4G' or '1950M'.",
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="SLURM-distributed SARM RA-BC annotation pipeline",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
sub = parser.add_subparsers(dest="command", required=True)
|
||||
|
||||
# compute subcommand
|
||||
cp = sub.add_parser(
|
||||
"compute",
|
||||
help="Distribute progress computation across SLURM workers.",
|
||||
)
|
||||
_add_shared_args(cp)
|
||||
cp.add_argument(
|
||||
"--reward-model-path",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path or HF repo id of the SARM reward model.",
|
||||
)
|
||||
cp.add_argument(
|
||||
"--stride",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Compute every Nth frame; intermediate frames are interpolated (must be >= 1).",
|
||||
)
|
||||
cp.add_argument(
|
||||
"--head-mode",
|
||||
type=str,
|
||||
default="sparse",
|
||||
choices=["sparse", "dense", "both"],
|
||||
help="Which reward head(s) to compute.",
|
||||
)
|
||||
cp.add_argument(
|
||||
"--device",
|
||||
type=str,
|
||||
default="cpu",
|
||||
help="Device for reward model inference, e.g. 'cpu' or 'cuda'.",
|
||||
)
|
||||
cp.add_argument(
|
||||
"--workers",
|
||||
type=int,
|
||||
default=50,
|
||||
help="Number of parallel SLURM tasks (one shard per worker).",
|
||||
)
|
||||
|
||||
# aggregate subcommand
|
||||
ap = sub.add_parser(
|
||||
"aggregate",
|
||||
help="Merge per-rank shards into a single sarm_progress.parquet.",
|
||||
)
|
||||
_add_shared_args(ap)
|
||||
ap.add_argument(
|
||||
"--reward-model-path",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path or HF repo id of the SARM reward model (stored in parquet metadata).",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--push-to-hub",
|
||||
action="store_true",
|
||||
help="Upload sarm_progress.parquet to the Hugging Face Hub after aggregation.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
job_name = args.job_name or f"rabc_{args.command}"
|
||||
kwargs = vars(args)
|
||||
kwargs["slurm"] = kwargs.pop("slurm") == 1
|
||||
kwargs["job_name"] = job_name
|
||||
command = kwargs.pop("command")
|
||||
|
||||
executor = make_compute_executor(**kwargs) if command == "compute" else make_aggregate_executor(**kwargs)
|
||||
|
||||
executor.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,228 +0,0 @@
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Shared utilities for Human-in-the-Loop data collection scripts."""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
from lerobot.processor import (
|
||||
IdentityProcessorStep,
|
||||
RobotAction,
|
||||
RobotObservation,
|
||||
RobotProcessorPipeline,
|
||||
)
|
||||
from lerobot.processor.converters import (
|
||||
observation_to_transition,
|
||||
robot_action_observation_to_transition,
|
||||
transition_to_observation,
|
||||
transition_to_robot_action,
|
||||
)
|
||||
from lerobot.robots import Robot
|
||||
from lerobot.teleoperators import Teleoperator
|
||||
from lerobot.utils.control_utils import is_headless
|
||||
from lerobot.utils.robot_utils import precise_sleep
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class HILDatasetConfig:
|
||||
repo_id: str
|
||||
single_task: str
|
||||
root: str | Path | None = None
|
||||
fps: int = 30
|
||||
episode_time_s: float = 120
|
||||
num_episodes: int = 50
|
||||
video: bool = True
|
||||
push_to_hub: bool = True
|
||||
private: bool = False
|
||||
tags: list[str] | None = None
|
||||
num_image_writer_processes: int = 0
|
||||
num_image_writer_threads_per_camera: int = 4
|
||||
video_encoding_batch_size: int = 1
|
||||
vcodec: str = "auto"
|
||||
streaming_encoding: bool = True
|
||||
encoder_queue_maxsize: int = 30
|
||||
encoder_threads: int | None = None
|
||||
rename_map: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
|
||||
def teleop_has_motor_control(teleop: Teleoperator) -> bool:
|
||||
"""Check if teleoperator has motor control capabilities."""
|
||||
return all(hasattr(teleop, attr) for attr in ("enable_torque", "disable_torque", "write_goal_positions"))
|
||||
|
||||
|
||||
def teleop_disable_torque(teleop: Teleoperator) -> None:
|
||||
"""Disable teleop torque if supported."""
|
||||
if hasattr(teleop, "disable_torque"):
|
||||
teleop.disable_torque()
|
||||
|
||||
|
||||
def teleop_enable_torque(teleop: Teleoperator) -> None:
|
||||
"""Enable teleop torque if supported."""
|
||||
if hasattr(teleop, "enable_torque"):
|
||||
teleop.enable_torque()
|
||||
|
||||
|
||||
def teleop_smooth_move_to(teleop: Teleoperator, target_pos: dict, duration_s: float = 2.0, fps: int = 50):
|
||||
"""Smoothly move teleop to target position if motor control is available."""
|
||||
if not teleop_has_motor_control(teleop):
|
||||
logger.warning("Teleop does not support motor control - cannot mirror robot position")
|
||||
return
|
||||
|
||||
teleop_enable_torque(teleop)
|
||||
current = teleop.get_action()
|
||||
steps = max(int(duration_s * fps), 1)
|
||||
|
||||
for step in range(steps + 1):
|
||||
t = step / steps
|
||||
interp = {}
|
||||
for k in current:
|
||||
if k in target_pos:
|
||||
interp[k] = current[k] * (1 - t) + target_pos[k] * t
|
||||
else:
|
||||
interp[k] = current[k]
|
||||
teleop.write_goal_positions(interp)
|
||||
time.sleep(1 / fps)
|
||||
|
||||
|
||||
def init_keyboard_listener():
|
||||
"""Initialize keyboard listener with HIL controls."""
|
||||
events = {
|
||||
"exit_early": False,
|
||||
"rerecord_episode": False,
|
||||
"stop_recording": False,
|
||||
"policy_paused": False,
|
||||
"correction_active": False,
|
||||
"resume_policy": False,
|
||||
"in_reset": False,
|
||||
"start_next_episode": False,
|
||||
}
|
||||
|
||||
if is_headless():
|
||||
logger.warning("Headless environment - keyboard controls unavailable")
|
||||
return None, events
|
||||
|
||||
from pynput import keyboard
|
||||
|
||||
def on_press(key):
|
||||
try:
|
||||
if events["in_reset"]:
|
||||
if key in [keyboard.Key.space, keyboard.Key.right]:
|
||||
logger.info("[HIL] Starting next episode...")
|
||||
events["start_next_episode"] = True
|
||||
elif hasattr(key, "char") and key.char == "c":
|
||||
events["start_next_episode"] = True
|
||||
elif key == keyboard.Key.esc:
|
||||
logger.info("[HIL] ESC - Stop recording, pushing to hub...")
|
||||
events["stop_recording"] = True
|
||||
events["start_next_episode"] = True
|
||||
else:
|
||||
if key == keyboard.Key.space:
|
||||
if not events["policy_paused"] and not events["correction_active"]:
|
||||
logger.info("[HIL] PAUSED - Press 'c' to take control or 'p' to resume policy")
|
||||
events["policy_paused"] = True
|
||||
elif hasattr(key, "char") and key.char == "c":
|
||||
if events["policy_paused"] and not events["correction_active"]:
|
||||
logger.info("[HIL] Taking control...")
|
||||
events["start_next_episode"] = True
|
||||
elif hasattr(key, "char") and key.char == "p":
|
||||
if events["policy_paused"] or events["correction_active"]:
|
||||
logger.info("[HIL] Resuming policy...")
|
||||
events["resume_policy"] = True
|
||||
elif key == keyboard.Key.right:
|
||||
logger.info("[HIL] End episode")
|
||||
events["exit_early"] = True
|
||||
elif key == keyboard.Key.left:
|
||||
logger.info("[HIL] Re-record episode")
|
||||
events["rerecord_episode"] = True
|
||||
events["exit_early"] = True
|
||||
elif key == keyboard.Key.esc:
|
||||
logger.info("[HIL] ESC - Stop recording...")
|
||||
events["stop_recording"] = True
|
||||
events["exit_early"] = True
|
||||
except Exception as e:
|
||||
logger.info(f"Key error: {e}")
|
||||
|
||||
listener = keyboard.Listener(on_press=on_press)
|
||||
listener.start()
|
||||
return listener, events
|
||||
|
||||
|
||||
def make_identity_processors():
|
||||
"""Create identity processors for recording."""
|
||||
teleop_proc = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction](
|
||||
steps=[IdentityProcessorStep()],
|
||||
to_transition=robot_action_observation_to_transition,
|
||||
to_output=transition_to_robot_action,
|
||||
)
|
||||
obs_proc = RobotProcessorPipeline[RobotObservation, RobotObservation](
|
||||
steps=[IdentityProcessorStep()],
|
||||
to_transition=observation_to_transition,
|
||||
to_output=transition_to_observation,
|
||||
)
|
||||
return teleop_proc, obs_proc
|
||||
|
||||
|
||||
def reset_loop(robot: Robot, teleop: Teleoperator, events: dict, fps: int):
|
||||
"""Reset period where human repositions environment."""
|
||||
logger.info("[HIL] RESET")
|
||||
|
||||
events["in_reset"] = True
|
||||
events["start_next_episode"] = False
|
||||
|
||||
obs = robot.get_observation()
|
||||
robot_pos = {k: v for k, v in obs.items() if k.endswith(".pos") and k in robot.observation_features}
|
||||
teleop_smooth_move_to(teleop, robot_pos, duration_s=2.0, fps=50)
|
||||
|
||||
logger.info("Press any key to enable teleoperation")
|
||||
while not events["start_next_episode"] and not events["stop_recording"]:
|
||||
precise_sleep(0.05)
|
||||
|
||||
if events["stop_recording"]:
|
||||
return
|
||||
|
||||
events["start_next_episode"] = False
|
||||
teleop_disable_torque(teleop)
|
||||
logger.info("Teleop enabled - press any key to start episode")
|
||||
|
||||
while not events["start_next_episode"] and not events["stop_recording"]:
|
||||
loop_start = time.perf_counter()
|
||||
action = teleop.get_action()
|
||||
robot.send_action(action)
|
||||
precise_sleep(1 / fps - (time.perf_counter() - loop_start))
|
||||
|
||||
events["in_reset"] = False
|
||||
events["start_next_episode"] = False
|
||||
events["exit_early"] = False
|
||||
events["policy_paused"] = False
|
||||
events["correction_active"] = False
|
||||
events["resume_policy"] = False
|
||||
|
||||
|
||||
def print_controls(rtc: bool = False):
|
||||
"""Print control instructions."""
|
||||
mode = "Human-in-the-Loop Data Collection" + (" (RTC)" if rtc else "")
|
||||
logger.info(
|
||||
"%s\n Controls:\n"
|
||||
" SPACE - Pause policy\n"
|
||||
" c - Take control\n"
|
||||
" p - Resume policy after pause/correction\n"
|
||||
" → - End episode\n"
|
||||
" ESC - Stop and push to hub",
|
||||
mode,
|
||||
)
|
||||
+44
-46
@@ -14,8 +14,8 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from lerobot.datasets.feature_utils import hw_to_dataset_features
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.utils import hw_to_dataset_features
|
||||
from lerobot.policies.act.modeling_act import ACTPolicy
|
||||
from lerobot.policies.factory import make_pre_post_processors
|
||||
from lerobot.processor import make_default_processors
|
||||
@@ -78,24 +78,40 @@ def main():
|
||||
listener, events = init_keyboard_listener()
|
||||
init_rerun(session_name="lekiwi_evaluate")
|
||||
|
||||
try:
|
||||
if not robot.is_connected:
|
||||
raise ValueError("Robot is not connected!")
|
||||
if not robot.is_connected:
|
||||
raise ValueError("Robot is not connected!")
|
||||
|
||||
print("Starting evaluate loop...")
|
||||
recorded_episodes = 0
|
||||
while recorded_episodes < NUM_EPISODES and not events["stop_recording"]:
|
||||
log_say(f"Running inference, recording eval episode {recorded_episodes} of {NUM_EPISODES}")
|
||||
print("Starting evaluate loop...")
|
||||
recorded_episodes = 0
|
||||
while recorded_episodes < NUM_EPISODES and not events["stop_recording"]:
|
||||
log_say(f"Running inference, recording eval episode {recorded_episodes} of {NUM_EPISODES}")
|
||||
|
||||
# Main record loop
|
||||
# Main record loop
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
policy=policy,
|
||||
preprocessor=preprocessor, # Pass the pre and post policy processors
|
||||
postprocessor=postprocessor,
|
||||
dataset=dataset,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=teleop_action_processor,
|
||||
robot_action_processor=robot_action_processor,
|
||||
robot_observation_processor=robot_observation_processor,
|
||||
)
|
||||
|
||||
# Reset the environment if not stopping or re-recording
|
||||
if not events["stop_recording"] and (
|
||||
(recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"]
|
||||
):
|
||||
log_say("Reset the environment")
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
policy=policy,
|
||||
preprocessor=preprocessor, # Pass the pre and post policy processors
|
||||
postprocessor=postprocessor,
|
||||
dataset=dataset,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
@@ -104,42 +120,24 @@ def main():
|
||||
robot_observation_processor=robot_observation_processor,
|
||||
)
|
||||
|
||||
# Reset the environment if not stopping or re-recording
|
||||
if not events["stop_recording"] and (
|
||||
(recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"]
|
||||
):
|
||||
log_say("Reset the environment")
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=teleop_action_processor,
|
||||
robot_action_processor=robot_action_processor,
|
||||
robot_observation_processor=robot_observation_processor,
|
||||
)
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-record episode")
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-record episode")
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
# Save episode
|
||||
dataset.save_episode()
|
||||
recorded_episodes += 1
|
||||
|
||||
# Save episode
|
||||
dataset.save_episode()
|
||||
recorded_episodes += 1
|
||||
# Clean up
|
||||
log_say("Stop recording")
|
||||
robot.disconnect()
|
||||
listener.stop()
|
||||
|
||||
finally:
|
||||
# Clean up
|
||||
log_say("Stop recording")
|
||||
robot.disconnect()
|
||||
listener.stop()
|
||||
|
||||
dataset.finalize()
|
||||
dataset.push_to_hub()
|
||||
dataset.finalize()
|
||||
dataset.push_to_hub()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
+45
-46
@@ -14,8 +14,8 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from lerobot.datasets.feature_utils import hw_to_dataset_features
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.utils import hw_to_dataset_features
|
||||
from lerobot.processor import make_default_processors
|
||||
from lerobot.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
|
||||
from lerobot.robots.lekiwi.lekiwi_client import LeKiwiClient
|
||||
@@ -74,23 +74,40 @@ def main():
|
||||
listener, events = init_keyboard_listener()
|
||||
init_rerun(session_name="lekiwi_record")
|
||||
|
||||
try:
|
||||
if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected:
|
||||
raise ValueError("Robot or teleop is not connected!")
|
||||
if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected:
|
||||
raise ValueError("Robot or teleop is not connected!")
|
||||
|
||||
print("Starting record loop...")
|
||||
recorded_episodes = 0
|
||||
while recorded_episodes < NUM_EPISODES and not events["stop_recording"]:
|
||||
log_say(f"Recording episode {recorded_episodes}")
|
||||
print("Starting record loop...")
|
||||
recorded_episodes = 0
|
||||
while recorded_episodes < NUM_EPISODES and not events["stop_recording"]:
|
||||
log_say(f"Recording episode {recorded_episodes}")
|
||||
|
||||
# Main record loop
|
||||
# Main record loop
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
dataset=dataset,
|
||||
teleop=[leader_arm, keyboard],
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=teleop_action_processor,
|
||||
robot_action_processor=robot_action_processor,
|
||||
robot_observation_processor=robot_observation_processor,
|
||||
)
|
||||
|
||||
# Reset the environment if not stopping or re-recording
|
||||
if not events["stop_recording"] and (
|
||||
(recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"]
|
||||
):
|
||||
log_say("Reset the environment")
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
dataset=dataset,
|
||||
teleop=[leader_arm, keyboard],
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
control_time_s=RESET_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=teleop_action_processor,
|
||||
@@ -98,44 +115,26 @@ def main():
|
||||
robot_observation_processor=robot_observation_processor,
|
||||
)
|
||||
|
||||
# Reset the environment if not stopping or re-recording
|
||||
if not events["stop_recording"] and (
|
||||
(recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"]
|
||||
):
|
||||
log_say("Reset the environment")
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
teleop=[leader_arm, keyboard],
|
||||
control_time_s=RESET_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=teleop_action_processor,
|
||||
robot_action_processor=robot_action_processor,
|
||||
robot_observation_processor=robot_observation_processor,
|
||||
)
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-record episode")
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-record episode")
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
# Save episode
|
||||
dataset.save_episode()
|
||||
recorded_episodes += 1
|
||||
|
||||
# Save episode
|
||||
dataset.save_episode()
|
||||
recorded_episodes += 1
|
||||
finally:
|
||||
# Clean up
|
||||
log_say("Stop recording")
|
||||
robot.disconnect()
|
||||
leader_arm.disconnect()
|
||||
keyboard.disconnect()
|
||||
listener.stop()
|
||||
# Clean up
|
||||
log_say("Stop recording")
|
||||
robot.disconnect()
|
||||
leader_arm.disconnect()
|
||||
keyboard.disconnect()
|
||||
listener.stop()
|
||||
|
||||
dataset.finalize()
|
||||
dataset.push_to_hub()
|
||||
dataset.finalize()
|
||||
dataset.push_to_hub()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
+18
-18
@@ -35,32 +35,32 @@ def main():
|
||||
|
||||
# Fetch the dataset to replay
|
||||
dataset = LeRobotDataset("<hf_username>/<dataset_repo_id>", episodes=[EPISODE_IDX])
|
||||
actions = dataset.select_columns(ACTION)
|
||||
# Filter dataset to only include frames from the specified episode since episodes are chunked in dataset V3.0
|
||||
episode_frames = dataset.hf_dataset.filter(lambda x: x["episode_index"] == EPISODE_IDX)
|
||||
actions = episode_frames.select_columns(ACTION)
|
||||
|
||||
# Connect to the robot
|
||||
robot.connect()
|
||||
|
||||
try:
|
||||
if not robot.is_connected:
|
||||
raise ValueError("Robot is not connected!")
|
||||
if not robot.is_connected:
|
||||
raise ValueError("Robot is not connected!")
|
||||
|
||||
print("Starting replay loop...")
|
||||
log_say(f"Replaying episode {EPISODE_IDX}")
|
||||
for idx in range(dataset.num_frames):
|
||||
t0 = time.perf_counter()
|
||||
print("Starting replay loop...")
|
||||
log_say(f"Replaying episode {EPISODE_IDX}")
|
||||
for idx in range(len(episode_frames)):
|
||||
t0 = time.perf_counter()
|
||||
|
||||
# Get recorded action from dataset
|
||||
action = {
|
||||
name: float(actions[idx][ACTION][i])
|
||||
for i, name in enumerate(dataset.features[ACTION]["names"])
|
||||
}
|
||||
# Get recorded action from dataset
|
||||
action = {
|
||||
name: float(actions[idx][ACTION][i]) for i, name in enumerate(dataset.features[ACTION]["names"])
|
||||
}
|
||||
|
||||
# Send action to robot
|
||||
_ = robot.send_action(action)
|
||||
# Send action to robot
|
||||
_ = robot.send_action(action)
|
||||
|
||||
precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
|
||||
finally:
|
||||
robot.disconnect()
|
||||
precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
|
||||
|
||||
robot.disconnect()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -16,13 +16,15 @@
|
||||
|
||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.configs.types import FeatureType, PolicyFeature
|
||||
from lerobot.datasets.feature_utils import combine_feature_dicts
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
|
||||
from lerobot.datasets.utils import combine_feature_dicts
|
||||
from lerobot.model.kinematics import RobotKinematics
|
||||
from lerobot.policies.act.modeling_act import ACTPolicy
|
||||
from lerobot.policies.factory import make_pre_post_processors
|
||||
from lerobot.processor import (
|
||||
RobotAction,
|
||||
RobotObservation,
|
||||
RobotProcessorPipeline,
|
||||
make_default_teleop_action_processor,
|
||||
)
|
||||
@@ -38,7 +40,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
|
||||
InverseKinematicsEEToJoints,
|
||||
)
|
||||
from lerobot.scripts.lerobot_record import record_loop
|
||||
from lerobot.types import RobotAction, RobotObservation
|
||||
from lerobot.utils.control_utils import init_keyboard_listener
|
||||
from lerobot.utils.utils import log_say
|
||||
from lerobot.utils.visualization_utils import init_rerun
|
||||
@@ -141,24 +142,38 @@ def main():
|
||||
listener, events = init_keyboard_listener()
|
||||
init_rerun(session_name="phone_so100_evaluate")
|
||||
|
||||
try:
|
||||
if not robot.is_connected:
|
||||
raise ValueError("Robot is not connected!")
|
||||
if not robot.is_connected:
|
||||
raise ValueError("Robot is not connected!")
|
||||
|
||||
print("Starting evaluate loop...")
|
||||
episode_idx = 0
|
||||
for episode_idx in range(NUM_EPISODES):
|
||||
log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}")
|
||||
print("Starting evaluate loop...")
|
||||
episode_idx = 0
|
||||
for episode_idx in range(NUM_EPISODES):
|
||||
log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}")
|
||||
|
||||
# Main record loop
|
||||
# Main record loop
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
policy=policy,
|
||||
preprocessor=preprocessor, # Pass the pre and post policy processors
|
||||
postprocessor=postprocessor,
|
||||
dataset=dataset,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=make_default_teleop_action_processor(),
|
||||
robot_action_processor=robot_ee_to_joints_processor,
|
||||
robot_observation_processor=robot_joints_to_ee_pose_processor,
|
||||
)
|
||||
|
||||
# Reset the environment if not stopping or re-recording
|
||||
if not events["stop_recording"] and ((episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]):
|
||||
log_say("Reset the environment")
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
policy=policy,
|
||||
preprocessor=preprocessor, # Pass the pre and post policy processors
|
||||
postprocessor=postprocessor,
|
||||
dataset=dataset,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
@@ -167,41 +182,24 @@ def main():
|
||||
robot_observation_processor=robot_joints_to_ee_pose_processor,
|
||||
)
|
||||
|
||||
# Reset the environment if not stopping or re-recording
|
||||
if not events["stop_recording"] and (
|
||||
(episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]
|
||||
):
|
||||
log_say("Reset the environment")
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=make_default_teleop_action_processor(),
|
||||
robot_action_processor=robot_ee_to_joints_processor,
|
||||
robot_observation_processor=robot_joints_to_ee_pose_processor,
|
||||
)
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-record episode")
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-record episode")
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
# Save episode
|
||||
dataset.save_episode()
|
||||
episode_idx += 1
|
||||
|
||||
# Save episode
|
||||
dataset.save_episode()
|
||||
episode_idx += 1
|
||||
finally:
|
||||
# Clean up
|
||||
log_say("Stop recording")
|
||||
robot.disconnect()
|
||||
listener.stop()
|
||||
# Clean up
|
||||
log_say("Stop recording")
|
||||
robot.disconnect()
|
||||
listener.stop()
|
||||
|
||||
dataset.finalize()
|
||||
dataset.push_to_hub()
|
||||
dataset.finalize()
|
||||
dataset.push_to_hub()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -15,11 +15,11 @@
|
||||
# limitations under the License.
|
||||
|
||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.datasets.feature_utils import combine_feature_dicts
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
|
||||
from lerobot.datasets.utils import combine_feature_dicts
|
||||
from lerobot.model.kinematics import RobotKinematics
|
||||
from lerobot.processor import RobotProcessorPipeline
|
||||
from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
|
||||
from lerobot.processor.converters import (
|
||||
observation_to_transition,
|
||||
robot_action_observation_to_transition,
|
||||
@@ -38,7 +38,6 @@ from lerobot.scripts.lerobot_record import record_loop
|
||||
from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
|
||||
from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
|
||||
from lerobot.teleoperators.phone.teleop_phone import Phone
|
||||
from lerobot.types import RobotAction, RobotObservation
|
||||
from lerobot.utils.control_utils import init_keyboard_listener
|
||||
from lerobot.utils.utils import log_say
|
||||
from lerobot.utils.visualization_utils import init_rerun
|
||||
@@ -150,23 +149,38 @@ def main():
|
||||
listener, events = init_keyboard_listener()
|
||||
init_rerun(session_name="phone_so100_record")
|
||||
|
||||
try:
|
||||
if not robot.is_connected or not phone.is_connected:
|
||||
raise ValueError("Robot or teleop is not connected!")
|
||||
if not robot.is_connected or not phone.is_connected:
|
||||
raise ValueError("Robot or teleop is not connected!")
|
||||
|
||||
print("Starting record loop. Move your phone to teleoperate the robot...")
|
||||
episode_idx = 0
|
||||
while episode_idx < NUM_EPISODES and not events["stop_recording"]:
|
||||
log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}")
|
||||
print("Starting record loop. Move your phone to teleoperate the robot...")
|
||||
episode_idx = 0
|
||||
while episode_idx < NUM_EPISODES and not events["stop_recording"]:
|
||||
log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}")
|
||||
|
||||
# Main record loop
|
||||
# Main record loop
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
teleop=phone,
|
||||
dataset=dataset,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=phone_to_robot_ee_pose_processor,
|
||||
robot_action_processor=robot_ee_to_joints_processor,
|
||||
robot_observation_processor=robot_joints_to_ee_pose,
|
||||
)
|
||||
|
||||
# Reset the environment if not stopping or re-recording
|
||||
if not events["stop_recording"] and (episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]):
|
||||
log_say("Reset the environment")
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
teleop=phone,
|
||||
dataset=dataset,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
control_time_s=RESET_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=phone_to_robot_ee_pose_processor,
|
||||
@@ -174,43 +188,25 @@ def main():
|
||||
robot_observation_processor=robot_joints_to_ee_pose,
|
||||
)
|
||||
|
||||
# Reset the environment if not stopping or re-recording
|
||||
if not events["stop_recording"] and (
|
||||
episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]
|
||||
):
|
||||
log_say("Reset the environment")
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
teleop=phone,
|
||||
control_time_s=RESET_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=phone_to_robot_ee_pose_processor,
|
||||
robot_action_processor=robot_ee_to_joints_processor,
|
||||
robot_observation_processor=robot_joints_to_ee_pose,
|
||||
)
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-recording episode")
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-recording episode")
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
# Save episode
|
||||
dataset.save_episode()
|
||||
episode_idx += 1
|
||||
|
||||
# Save episode
|
||||
dataset.save_episode()
|
||||
episode_idx += 1
|
||||
finally:
|
||||
# Clean up
|
||||
log_say("Stop recording")
|
||||
robot.disconnect()
|
||||
phone.disconnect()
|
||||
listener.stop()
|
||||
# Clean up
|
||||
log_say("Stop recording")
|
||||
robot.disconnect()
|
||||
phone.disconnect()
|
||||
listener.stop()
|
||||
|
||||
dataset.finalize()
|
||||
dataset.push_to_hub()
|
||||
dataset.finalize()
|
||||
dataset.push_to_hub()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -18,7 +18,7 @@ import time
|
||||
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.model.kinematics import RobotKinematics
|
||||
from lerobot.processor import RobotProcessorPipeline
|
||||
from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
|
||||
from lerobot.processor.converters import (
|
||||
robot_action_observation_to_transition,
|
||||
transition_to_robot_action,
|
||||
@@ -27,7 +27,6 @@ from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
|
||||
from lerobot.robots.so_follower.robot_kinematic_processor import (
|
||||
InverseKinematicsEEToJoints,
|
||||
)
|
||||
from lerobot.types import RobotAction, RobotObservation
|
||||
from lerobot.utils.constants import ACTION
|
||||
from lerobot.utils.robot_utils import precise_sleep
|
||||
from lerobot.utils.utils import log_say
|
||||
@@ -67,39 +66,39 @@ def main():
|
||||
|
||||
# Fetch the dataset to replay
|
||||
dataset = LeRobotDataset(HF_REPO_ID, episodes=[EPISODE_IDX])
|
||||
actions = dataset.select_columns(ACTION)
|
||||
# Filter dataset to only include frames from the specified episode since episodes are chunked in dataset V3.0
|
||||
episode_frames = dataset.hf_dataset.filter(lambda x: x["episode_index"] == EPISODE_IDX)
|
||||
actions = episode_frames.select_columns(ACTION)
|
||||
|
||||
# Connect to the robot
|
||||
robot.connect()
|
||||
|
||||
try:
|
||||
if not robot.is_connected:
|
||||
raise ValueError("Robot is not connected!")
|
||||
if not robot.is_connected:
|
||||
raise ValueError("Robot is not connected!")
|
||||
|
||||
print("Starting replay loop...")
|
||||
log_say(f"Replaying episode {EPISODE_IDX}")
|
||||
for idx in range(dataset.num_frames):
|
||||
t0 = time.perf_counter()
|
||||
print("Starting replay loop...")
|
||||
log_say(f"Replaying episode {EPISODE_IDX}")
|
||||
for idx in range(len(episode_frames)):
|
||||
t0 = time.perf_counter()
|
||||
|
||||
# Get recorded action from dataset
|
||||
ee_action = {
|
||||
name: float(actions[idx][ACTION][i])
|
||||
for i, name in enumerate(dataset.features[ACTION]["names"])
|
||||
}
|
||||
# Get recorded action from dataset
|
||||
ee_action = {
|
||||
name: float(actions[idx][ACTION][i]) for i, name in enumerate(dataset.features[ACTION]["names"])
|
||||
}
|
||||
|
||||
# Get robot observation
|
||||
robot_obs = robot.get_observation()
|
||||
# Get robot observation
|
||||
robot_obs = robot.get_observation()
|
||||
|
||||
# Dataset EE -> robot joints
|
||||
joint_action = robot_ee_to_joints_processor((ee_action, robot_obs))
|
||||
# Dataset EE -> robot joints
|
||||
joint_action = robot_ee_to_joints_processor((ee_action, robot_obs))
|
||||
|
||||
# Send action to robot
|
||||
_ = robot.send_action(joint_action)
|
||||
# Send action to robot
|
||||
_ = robot.send_action(joint_action)
|
||||
|
||||
precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
|
||||
finally:
|
||||
# Clean up
|
||||
robot.disconnect()
|
||||
precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
|
||||
|
||||
# Clean up
|
||||
robot.disconnect()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
import time
|
||||
|
||||
from lerobot.model.kinematics import RobotKinematics
|
||||
from lerobot.processor import RobotProcessorPipeline
|
||||
from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
|
||||
from lerobot.processor.converters import (
|
||||
robot_action_observation_to_transition,
|
||||
transition_to_robot_action,
|
||||
@@ -31,7 +31,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
|
||||
from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
|
||||
from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
|
||||
from lerobot.teleoperators.phone.teleop_phone import Phone
|
||||
from lerobot.types import RobotAction, RobotObservation
|
||||
from lerobot.utils.robot_utils import precise_sleep
|
||||
from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
|
||||
|
||||
|
||||
@@ -22,8 +22,7 @@ from pathlib import Path
|
||||
import numpy as np
|
||||
import tensorflow_datasets as tfds
|
||||
|
||||
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
||||
from lerobot.utils.utils import get_elapsed_time_in_days_hours_minutes_seconds
|
||||
|
||||
DROID_SHARDS = 2048
|
||||
|
||||
@@ -26,7 +26,7 @@ from huggingface_hub import HfApi
|
||||
from huggingface_hub.constants import REPOCARD_NAME
|
||||
from port_droid import DROID_SHARDS
|
||||
|
||||
from lerobot.datasets.dataset_metadata import CODEBASE_VERSION, LeRobotDatasetMetadata
|
||||
from lerobot.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDatasetMetadata
|
||||
from lerobot.datasets.utils import create_lerobot_dataset_card
|
||||
from lerobot.utils.utils import init_logging
|
||||
|
||||
@@ -155,7 +155,7 @@ class UploadDataset(PipelineStep):
|
||||
from datasets.utils.tqdm import disable_progress_bars
|
||||
from huggingface_hub import CommitOperationAdd, preupload_lfs_files
|
||||
|
||||
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
|
||||
from lerobot.utils.utils import init_logging
|
||||
|
||||
init_logging()
|
||||
|
||||
@@ -27,8 +27,8 @@ measuring consistency and ground truth alignment.
|
||||
Usage:
|
||||
# Basic usage with smolvla policy
|
||||
uv run python examples/rtc/eval_dataset.py \
|
||||
--policy.path=<USER>/smolvla_check_rtc_last3 \
|
||||
--dataset.repo_id=<USER>/check_rtc \
|
||||
--policy.path=helper2424/smolvla_check_rtc_last3 \
|
||||
--dataset.repo_id=helper2424/check_rtc \
|
||||
--rtc.execution_horizon=8 \
|
||||
--device=mps \
|
||||
--rtc.max_guidance_weight=10.0 \
|
||||
@@ -58,16 +58,16 @@ Usage:
|
||||
--device=cuda
|
||||
|
||||
uv run python examples/rtc/eval_dataset.py \
|
||||
--policy.path=<USER>/reuben_pi0 \
|
||||
--dataset.repo_id=<USER>/so101_cube_in_cup \
|
||||
--policy.path=lipsop/reuben_pi0 \
|
||||
--dataset.repo_id=ReubenLim/so101_cube_in_cup \
|
||||
--rtc.execution_horizon=8 \
|
||||
--device=cuda
|
||||
|
||||
# With torch.compile for faster inference (PyTorch 2.0+)
|
||||
# Note: CUDA graphs disabled by default due to in-place ops in denoising loop
|
||||
uv run python examples/rtc/eval_dataset.py \
|
||||
--policy.path=<USER>/smolvla_check_rtc_last3 \
|
||||
--dataset.repo_id=<USER>/check_rtc \
|
||||
--policy.path=helper2424/smolvla_check_rtc_last3 \
|
||||
--dataset.repo_id=helper2424/check_rtc \
|
||||
--rtc.execution_horizon=8 \
|
||||
--device=mps \
|
||||
--use_torch_compile=true \
|
||||
@@ -75,8 +75,8 @@ Usage:
|
||||
|
||||
# With torch.compile on CUDA (CUDA graphs disabled by default)
|
||||
uv run python examples/rtc/eval_dataset.py \
|
||||
--policy.path=<USER>/smolvla_check_rtc_last3 \
|
||||
--dataset.repo_id=<USER>/check_rtc \
|
||||
--policy.path=helper2424/smolvla_check_rtc_last3 \
|
||||
--dataset.repo_id=helper2424/check_rtc \
|
||||
--rtc.execution_horizon=8 \
|
||||
--device=cuda \
|
||||
--use_torch_compile=true \
|
||||
@@ -84,8 +84,8 @@ Usage:
|
||||
|
||||
# Enable CUDA graphs (advanced - may cause tensor aliasing errors)
|
||||
uv run python examples/rtc/eval_dataset.py \
|
||||
--policy.path=<USER>/smolvla_check_rtc_last3 \
|
||||
--dataset.repo_id=<USER>/check_rtc \
|
||||
--policy.path=helper2424/smolvla_check_rtc_last3 \
|
||||
--dataset.repo_id=helper2424/check_rtc \
|
||||
--use_torch_compile=true \
|
||||
--torch_compile_backend=inductor \
|
||||
--torch_compile_mode=max-autotune \
|
||||
@@ -113,9 +113,8 @@ from lerobot.configs import parser
|
||||
from lerobot.configs.default import DatasetConfig
|
||||
from lerobot.configs.policies import PreTrainedConfig
|
||||
from lerobot.configs.types import RTCAttentionSchedule
|
||||
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
|
||||
from lerobot.datasets.factory import resolve_delta_timestamps
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
||||
from lerobot.policies.factory import get_policy_class, make_pre_post_processors
|
||||
from lerobot.policies.rtc.configuration_rtc import RTCConfig
|
||||
from lerobot.policies.rtc.debug_visualizer import RTCDebugVisualizer
|
||||
|
||||
@@ -28,7 +28,7 @@ For simulation environments, see eval_with_simulation.py
|
||||
Usage:
|
||||
# Run RTC with Real robot with RTC
|
||||
uv run examples/rtc/eval_with_real_robot.py \
|
||||
--policy.path=<USER>/smolvla_check_rtc_last3 \
|
||||
--policy.path=helper2424/smolvla_check_rtc_last3 \
|
||||
--policy.device=mps \
|
||||
--rtc.enabled=true \
|
||||
--rtc.execution_horizon=20 \
|
||||
@@ -41,7 +41,7 @@ Usage:
|
||||
|
||||
# Run RTC with Real robot without RTC
|
||||
uv run examples/rtc/eval_with_real_robot.py \
|
||||
--policy.path=<USER>/smolvla_check_rtc_last3 \
|
||||
--policy.path=helper2424/smolvla_check_rtc_last3 \
|
||||
--policy.device=mps \
|
||||
--rtc.enabled=false \
|
||||
--robot.type=so100_follower \
|
||||
@@ -53,7 +53,7 @@ Usage:
|
||||
|
||||
# Run RTC with Real robot with pi0.5 policy
|
||||
uv run examples/rtc/eval_with_real_robot.py \
|
||||
--policy.path=<USER>/pi05_check_rtc \
|
||||
--policy.path=helper2424/pi05_check_rtc \
|
||||
--policy.device=mps \
|
||||
--rtc.enabled=true \
|
||||
--rtc.execution_horizon=20 \
|
||||
@@ -63,31 +63,6 @@ Usage:
|
||||
--robot.cameras="{ gripper: {type: opencv, index_or_path: 0, width: 640, height: 480, fps: 30}, front: {type: opencv, index_or_path: 1, width: 640, height: 480, fps: 30}}" \
|
||||
--task="Move green small object into the purple platform" \
|
||||
--duration=120
|
||||
|
||||
# Run RTC with bi_openarm_follower (dual-arm OpenArms) and pi0.5 policy
|
||||
python examples/rtc/eval_with_real_robot.py \
|
||||
--policy.path=lerobot-data-collection/folding_final \
|
||||
--robot.type=bi_openarm_follower \
|
||||
--robot.cameras='{left_wrist: {type: opencv, index_or_path: "/dev/video4", width: 1280, height: 720, fps: 30}, base: {type: opencv, index_or_path: "/dev/video2", width: 640, height: 480, fps: 30}, right_wrist: {type: opencv, index_or_path: "/dev/video0", width: 1280, height: 720, fps: 30}}' \
|
||||
--robot.left_arm_config.port=can0 \
|
||||
--robot.left_arm_config.side=left \
|
||||
--robot.left_arm_config.can_interface=socketcan \
|
||||
--robot.left_arm_config.disable_torque_on_disconnect=true \
|
||||
--robot.left_arm_config.max_relative_target=8.0 \
|
||||
--robot.right_arm_config.port=can1 \
|
||||
--robot.right_arm_config.side=right \
|
||||
--robot.right_arm_config.can_interface=socketcan \
|
||||
--robot.right_arm_config.disable_torque_on_disconnect=true \
|
||||
--robot.right_arm_config.max_relative_target=8.0 \
|
||||
--task="Fold the T-shirt properly" \
|
||||
--fps=30 \
|
||||
--duration=2000 \
|
||||
--interpolation_multiplier=3 \
|
||||
--rtc.enabled=true \
|
||||
--rtc.execution_horizon=20 \
|
||||
--rtc.max_guidance_weight=5.0 \
|
||||
--rtc.prefix_attention_schedule=LINEAR \
|
||||
--device=cuda
|
||||
"""
|
||||
|
||||
import logging
|
||||
@@ -103,36 +78,28 @@ from torch import Tensor
|
||||
|
||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig # noqa: F401
|
||||
from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig # noqa: F401
|
||||
from lerobot.cameras.zmq.configuration_zmq import ZMQCameraConfig # noqa: F401
|
||||
from lerobot.configs import parser
|
||||
from lerobot.configs.policies import PreTrainedConfig
|
||||
from lerobot.configs.types import RTCAttentionSchedule
|
||||
from lerobot.datasets.feature_utils import build_dataset_frame, hw_to_dataset_features
|
||||
from lerobot.datasets.utils import build_dataset_frame, hw_to_dataset_features
|
||||
from lerobot.policies.factory import get_policy_class, make_pre_post_processors
|
||||
from lerobot.policies.rtc import ActionInterpolator, ActionQueue, LatencyTracker, RTCConfig
|
||||
from lerobot.processor import (
|
||||
NormalizerProcessorStep,
|
||||
RelativeActionsProcessorStep,
|
||||
TransitionKey,
|
||||
create_transition,
|
||||
)
|
||||
from lerobot.policies.rtc.action_queue import ActionQueue
|
||||
from lerobot.policies.rtc.configuration_rtc import RTCConfig
|
||||
from lerobot.policies.rtc.latency_tracker import LatencyTracker
|
||||
from lerobot.processor.factory import (
|
||||
make_default_robot_action_processor,
|
||||
make_default_robot_observation_processor,
|
||||
)
|
||||
from lerobot.processor.relative_action_processor import to_relative_actions
|
||||
from lerobot.rl.process import ProcessSignalHandler
|
||||
from lerobot.robots import ( # noqa: F401
|
||||
Robot,
|
||||
RobotConfig,
|
||||
bi_openarm_follower,
|
||||
bi_so_follower,
|
||||
koch_follower,
|
||||
so_follower,
|
||||
unitree_g1,
|
||||
)
|
||||
from lerobot.robots.utils import make_robot_from_config
|
||||
from lerobot.utils.constants import OBS_IMAGES, OBS_STATE
|
||||
from lerobot.utils.constants import OBS_IMAGES
|
||||
from lerobot.utils.hub import HubMixin
|
||||
from lerobot.utils.utils import init_logging
|
||||
|
||||
@@ -184,7 +151,6 @@ class RTCDemoConfig(HubMixin):
|
||||
# Demo parameters
|
||||
duration: float = 30.0 # Duration to run the demo (seconds)
|
||||
fps: float = 10.0 # Action execution frequency (Hz)
|
||||
interpolation_multiplier: int = 1 # Control rate multiplier (1=off, 2=2x, 3=3x)
|
||||
|
||||
# Compute device
|
||||
device: str | None = None # Device to run on (cuda, cpu, auto)
|
||||
@@ -244,35 +210,6 @@ def is_image_key(k: str) -> bool:
|
||||
return k.startswith(OBS_IMAGES)
|
||||
|
||||
|
||||
def _reanchor_relative_rtc_prefix(
|
||||
prev_actions_absolute: Tensor,
|
||||
current_state: Tensor,
|
||||
relative_step: RelativeActionsProcessorStep,
|
||||
normalizer_step: NormalizerProcessorStep | None,
|
||||
policy_device: torch.device | str,
|
||||
) -> Tensor:
|
||||
"""Convert absolute leftovers into model-space for relative-action RTC policies.
|
||||
|
||||
When a policy uses relative actions, the RTC prefix (leftover actions from
|
||||
the previous chunk) is stored in absolute space. Before feeding it back to
|
||||
the policy we need to re-express it relative to the *current* robot state
|
||||
and then re-normalize.
|
||||
"""
|
||||
state = current_state.detach().cpu()
|
||||
if state.dim() == 1:
|
||||
state = state.unsqueeze(0)
|
||||
|
||||
action_cpu = prev_actions_absolute.detach().cpu()
|
||||
mask = relative_step._build_mask(action_cpu.shape[-1])
|
||||
relative_actions = to_relative_actions(action_cpu, state, mask)
|
||||
|
||||
transition = create_transition(action=relative_actions)
|
||||
if normalizer_step is not None:
|
||||
transition = normalizer_step(transition)
|
||||
|
||||
return transition[TransitionKey.ACTION].to(policy_device)
|
||||
|
||||
|
||||
def get_actions(
|
||||
policy,
|
||||
robot: RobotWrapper,
|
||||
@@ -298,15 +235,7 @@ def get_actions(
|
||||
fps = cfg.fps
|
||||
time_per_chunk = 1.0 / fps
|
||||
|
||||
# Only keep .pos joints + camera streams if the policy was trained on positions,
|
||||
# not the full pos/vel/torque state the robot exposes.
|
||||
observation_features_hw = {
|
||||
key: value
|
||||
for key, value in robot.observation_features().items()
|
||||
if key.endswith(".pos") or isinstance(value, tuple)
|
||||
}
|
||||
|
||||
dataset_features = hw_to_dataset_features(observation_features_hw, "observation")
|
||||
dataset_features = hw_to_dataset_features(robot.observation_features(), "observation")
|
||||
policy_device = policy.config.device
|
||||
|
||||
# Load preprocessor and postprocessor from pretrained files
|
||||
@@ -324,25 +253,6 @@ def get_actions(
|
||||
|
||||
logger.info("[GET_ACTIONS] Preprocessor/postprocessor loaded successfully with embedded stats")
|
||||
|
||||
relative_step = next(
|
||||
(s for s in preprocessor.steps if isinstance(s, RelativeActionsProcessorStep) and s.enabled),
|
||||
None,
|
||||
)
|
||||
normalizer_step = next(
|
||||
(s for s in preprocessor.steps if isinstance(s, NormalizerProcessorStep)),
|
||||
None,
|
||||
)
|
||||
if relative_step is not None:
|
||||
if relative_step.action_names is None:
|
||||
cfg_names = getattr(cfg.policy, "action_feature_names", None)
|
||||
if cfg_names:
|
||||
relative_step.action_names = list(cfg_names)
|
||||
else:
|
||||
relative_step.action_names = [
|
||||
k for k in robot.robot.action_features if k.endswith(".pos")
|
||||
]
|
||||
logger.info("[GET_ACTIONS] Relative actions enabled: will re-anchor RTC prefix")
|
||||
|
||||
get_actions_threshold = cfg.action_queue_size_to_get_new_actions
|
||||
|
||||
if not cfg.rtc.enabled:
|
||||
@@ -385,28 +295,6 @@ def get_actions(
|
||||
|
||||
preproceseded_obs = preprocessor(obs_with_policy_features)
|
||||
|
||||
# Re-anchor leftover actions for relative-action policies.
|
||||
# We need the *postprocessed* (absolute) leftover, not the original
|
||||
# (normalized/relative) one that get_left_over() returns.
|
||||
if (
|
||||
prev_actions is not None
|
||||
and relative_step is not None
|
||||
and OBS_STATE in obs_with_policy_features
|
||||
):
|
||||
with action_queue.lock:
|
||||
if action_queue.queue is not None:
|
||||
prev_actions_abs = action_queue.queue[action_queue.last_index :].clone()
|
||||
else:
|
||||
prev_actions_abs = None
|
||||
if prev_actions_abs is not None and prev_actions_abs.numel() > 0:
|
||||
prev_actions = _reanchor_relative_rtc_prefix(
|
||||
prev_actions_absolute=prev_actions_abs,
|
||||
current_state=obs_with_policy_features[OBS_STATE],
|
||||
relative_step=relative_step,
|
||||
normalizer_step=normalizer_step,
|
||||
policy_device=policy_device,
|
||||
)
|
||||
|
||||
# Generate actions WITH RTC
|
||||
actions = policy.predict_action_chunk(
|
||||
preproceseded_obs,
|
||||
@@ -462,26 +350,21 @@ def actor_control(
|
||||
try:
|
||||
logger.info("[ACTOR] Starting actor thread")
|
||||
|
||||
action_keys = [k for k in robot.action_features() if k.endswith(".pos")]
|
||||
|
||||
action_count = 0
|
||||
interpolator = ActionInterpolator(multiplier=cfg.interpolation_multiplier)
|
||||
action_interval = interpolator.get_control_interval(cfg.fps)
|
||||
action_interval = 1.0 / cfg.fps
|
||||
|
||||
while not shutdown_event.is_set():
|
||||
start_time = time.perf_counter()
|
||||
|
||||
if interpolator.needs_new_action():
|
||||
new_action = action_queue.get()
|
||||
if new_action is not None:
|
||||
interpolator.add(new_action.cpu())
|
||||
# Try to get an action from the queue with timeout
|
||||
action = action_queue.get()
|
||||
|
||||
action = interpolator.get()
|
||||
if action is not None:
|
||||
action = action.cpu()
|
||||
action_dict = {key: action[i].item() for i, key in enumerate(action_keys)}
|
||||
action_dict = {key: action[i].item() for i, key in enumerate(robot.action_features())}
|
||||
action_processed = robot_action_processor((action_dict, None))
|
||||
robot.send_action(action_processed)
|
||||
|
||||
action_count += 1
|
||||
|
||||
dt_s = time.perf_counter() - start_time
|
||||
|
||||
@@ -16,13 +16,15 @@
|
||||
|
||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.configs.types import FeatureType, PolicyFeature
|
||||
from lerobot.datasets.feature_utils import combine_feature_dicts
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
|
||||
from lerobot.datasets.utils import combine_feature_dicts
|
||||
from lerobot.model.kinematics import RobotKinematics
|
||||
from lerobot.policies.act.modeling_act import ACTPolicy
|
||||
from lerobot.policies.factory import make_pre_post_processors
|
||||
from lerobot.processor import (
|
||||
RobotAction,
|
||||
RobotObservation,
|
||||
RobotProcessorPipeline,
|
||||
make_default_teleop_action_processor,
|
||||
)
|
||||
@@ -38,7 +40,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
|
||||
InverseKinematicsEEToJoints,
|
||||
)
|
||||
from lerobot.scripts.lerobot_record import record_loop
|
||||
from lerobot.types import RobotAction, RobotObservation
|
||||
from lerobot.utils.control_utils import init_keyboard_listener
|
||||
from lerobot.utils.utils import log_say
|
||||
from lerobot.utils.visualization_utils import init_rerun
|
||||
@@ -141,24 +142,38 @@ def main():
|
||||
listener, events = init_keyboard_listener()
|
||||
init_rerun(session_name="so100_so100_evaluate")
|
||||
|
||||
try:
|
||||
if not robot.is_connected:
|
||||
raise ValueError("Robot is not connected!")
|
||||
if not robot.is_connected:
|
||||
raise ValueError("Robot is not connected!")
|
||||
|
||||
print("Starting evaluate loop...")
|
||||
episode_idx = 0
|
||||
for episode_idx in range(NUM_EPISODES):
|
||||
log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}")
|
||||
print("Starting evaluate loop...")
|
||||
episode_idx = 0
|
||||
for episode_idx in range(NUM_EPISODES):
|
||||
log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}")
|
||||
|
||||
# Main record loop
|
||||
# Main record loop
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
policy=policy,
|
||||
preprocessor=preprocessor, # Pass the pre and post policy processors
|
||||
postprocessor=postprocessor,
|
||||
dataset=dataset,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=make_default_teleop_action_processor(),
|
||||
robot_action_processor=robot_ee_to_joints_processor,
|
||||
robot_observation_processor=robot_joints_to_ee_pose_processor,
|
||||
)
|
||||
|
||||
# Reset the environment if not stopping or re-recording
|
||||
if not events["stop_recording"] and ((episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]):
|
||||
log_say("Reset the environment")
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
policy=policy,
|
||||
preprocessor=preprocessor, # Pass the pre and post policy processors
|
||||
postprocessor=postprocessor,
|
||||
dataset=dataset,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
@@ -167,41 +182,24 @@ def main():
|
||||
robot_observation_processor=robot_joints_to_ee_pose_processor,
|
||||
)
|
||||
|
||||
# Reset the environment if not stopping or re-recording
|
||||
if not events["stop_recording"] and (
|
||||
(episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]
|
||||
):
|
||||
log_say("Reset the environment")
|
||||
record_loop(
|
||||
robot=robot,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=make_default_teleop_action_processor(),
|
||||
robot_action_processor=robot_ee_to_joints_processor,
|
||||
robot_observation_processor=robot_joints_to_ee_pose_processor,
|
||||
)
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-record episode")
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-record episode")
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
# Save episode
|
||||
dataset.save_episode()
|
||||
episode_idx += 1
|
||||
|
||||
# Save episode
|
||||
dataset.save_episode()
|
||||
episode_idx += 1
|
||||
finally:
|
||||
# Clean up
|
||||
log_say("Stop recording")
|
||||
robot.disconnect()
|
||||
listener.stop()
|
||||
# Clean up
|
||||
log_say("Stop recording")
|
||||
robot.disconnect()
|
||||
listener.stop()
|
||||
|
||||
dataset.finalize()
|
||||
dataset.push_to_hub()
|
||||
dataset.finalize()
|
||||
dataset.push_to_hub()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -16,11 +16,11 @@
|
||||
|
||||
|
||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.datasets.feature_utils import combine_feature_dicts
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
|
||||
from lerobot.datasets.utils import combine_feature_dicts
|
||||
from lerobot.model.kinematics import RobotKinematics
|
||||
from lerobot.processor import RobotProcessorPipeline
|
||||
from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
|
||||
from lerobot.processor.converters import (
|
||||
observation_to_transition,
|
||||
robot_action_observation_to_transition,
|
||||
@@ -35,7 +35,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
|
||||
)
|
||||
from lerobot.scripts.lerobot_record import record_loop
|
||||
from lerobot.teleoperators.so_leader import SO100Leader, SO100LeaderConfig
|
||||
from lerobot.types import RobotAction, RobotObservation
|
||||
from lerobot.utils.control_utils import init_keyboard_listener
|
||||
from lerobot.utils.utils import log_say
|
||||
from lerobot.utils.visualization_utils import init_rerun
|
||||
@@ -147,23 +146,38 @@ def main():
|
||||
listener, events = init_keyboard_listener()
|
||||
init_rerun(session_name="recording_phone")
|
||||
|
||||
try:
|
||||
if not leader.is_connected or not follower.is_connected:
|
||||
raise ValueError("Robot or teleop is not connected!")
|
||||
if not leader.is_connected or not follower.is_connected:
|
||||
raise ValueError("Robot or teleop is not connected!")
|
||||
|
||||
print("Starting record loop...")
|
||||
episode_idx = 0
|
||||
while episode_idx < NUM_EPISODES and not events["stop_recording"]:
|
||||
log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}")
|
||||
print("Starting record loop...")
|
||||
episode_idx = 0
|
||||
while episode_idx < NUM_EPISODES and not events["stop_recording"]:
|
||||
log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}")
|
||||
|
||||
# Main record loop
|
||||
# Main record loop
|
||||
record_loop(
|
||||
robot=follower,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
teleop=leader,
|
||||
dataset=dataset,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=leader_joints_to_ee,
|
||||
robot_action_processor=ee_to_follower_joints,
|
||||
robot_observation_processor=follower_joints_to_ee,
|
||||
)
|
||||
|
||||
# Reset the environment if not stopping or re-recording
|
||||
if not events["stop_recording"] and (episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]):
|
||||
log_say("Reset the environment")
|
||||
record_loop(
|
||||
robot=follower,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
teleop=leader,
|
||||
dataset=dataset,
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
control_time_s=RESET_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=leader_joints_to_ee,
|
||||
@@ -171,44 +185,25 @@ def main():
|
||||
robot_observation_processor=follower_joints_to_ee,
|
||||
)
|
||||
|
||||
# Reset the environment if not stopping or re-recording
|
||||
if not events["stop_recording"] and (
|
||||
episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]
|
||||
):
|
||||
log_say("Reset the environment")
|
||||
record_loop(
|
||||
robot=follower,
|
||||
events=events,
|
||||
fps=FPS,
|
||||
teleop=leader,
|
||||
control_time_s=RESET_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=leader_joints_to_ee,
|
||||
robot_action_processor=ee_to_follower_joints,
|
||||
robot_observation_processor=follower_joints_to_ee,
|
||||
)
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-recording episode")
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
|
||||
if events["rerecord_episode"]:
|
||||
log_say("Re-recording episode")
|
||||
events["rerecord_episode"] = False
|
||||
events["exit_early"] = False
|
||||
dataset.clear_episode_buffer()
|
||||
continue
|
||||
# Save episode
|
||||
dataset.save_episode()
|
||||
episode_idx += 1
|
||||
|
||||
# Save episode
|
||||
dataset.save_episode()
|
||||
episode_idx += 1
|
||||
# Clean up
|
||||
log_say("Stop recording")
|
||||
leader.disconnect()
|
||||
follower.disconnect()
|
||||
listener.stop()
|
||||
|
||||
finally:
|
||||
# Clean up
|
||||
log_say("Stop recording")
|
||||
leader.disconnect()
|
||||
follower.disconnect()
|
||||
listener.stop()
|
||||
|
||||
dataset.finalize()
|
||||
dataset.push_to_hub()
|
||||
dataset.finalize()
|
||||
dataset.push_to_hub()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -19,7 +19,7 @@ import time
|
||||
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.model.kinematics import RobotKinematics
|
||||
from lerobot.processor import RobotProcessorPipeline
|
||||
from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
|
||||
from lerobot.processor.converters import (
|
||||
robot_action_observation_to_transition,
|
||||
transition_to_robot_action,
|
||||
@@ -28,7 +28,6 @@ from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
|
||||
from lerobot.robots.so_follower.robot_kinematic_processor import (
|
||||
InverseKinematicsEEToJoints,
|
||||
)
|
||||
from lerobot.types import RobotAction, RobotObservation
|
||||
from lerobot.utils.constants import ACTION
|
||||
from lerobot.utils.robot_utils import precise_sleep
|
||||
from lerobot.utils.utils import log_say
|
||||
@@ -68,40 +67,39 @@ def main():
|
||||
|
||||
# Fetch the dataset to replay
|
||||
dataset = LeRobotDataset(HF_REPO_ID, episodes=[EPISODE_IDX])
|
||||
actions = dataset.select_columns(ACTION)
|
||||
# Filter dataset to only include frames from the specified episode since episodes are chunked in dataset V3.0
|
||||
episode_frames = dataset.hf_dataset.filter(lambda x: x["episode_index"] == EPISODE_IDX)
|
||||
actions = episode_frames.select_columns(ACTION)
|
||||
|
||||
# Connect to the robot
|
||||
robot.connect()
|
||||
|
||||
try:
|
||||
if not robot.is_connected:
|
||||
raise ValueError("Robot is not connected!")
|
||||
if not robot.is_connected:
|
||||
raise ValueError("Robot is not connected!")
|
||||
|
||||
print("Starting replay loop...")
|
||||
log_say(f"Replaying episode {EPISODE_IDX}")
|
||||
for idx in range(dataset.num_frames):
|
||||
t0 = time.perf_counter()
|
||||
print("Starting replay loop...")
|
||||
log_say(f"Replaying episode {EPISODE_IDX}")
|
||||
for idx in range(len(episode_frames)):
|
||||
t0 = time.perf_counter()
|
||||
|
||||
# Get recorded action from dataset
|
||||
ee_action = {
|
||||
name: float(actions[idx][ACTION][i])
|
||||
for i, name in enumerate(dataset.features[ACTION]["names"])
|
||||
}
|
||||
# Get recorded action from dataset
|
||||
ee_action = {
|
||||
name: float(actions[idx][ACTION][i]) for i, name in enumerate(dataset.features[ACTION]["names"])
|
||||
}
|
||||
|
||||
# Get robot observation
|
||||
robot_obs = robot.get_observation()
|
||||
# Get robot observation
|
||||
robot_obs = robot.get_observation()
|
||||
|
||||
# Dataset EE -> robot joints
|
||||
joint_action = robot_ee_to_joints_processor((ee_action, robot_obs))
|
||||
# Dataset EE -> robot joints
|
||||
joint_action = robot_ee_to_joints_processor((ee_action, robot_obs))
|
||||
|
||||
# Send action to robot
|
||||
_ = robot.send_action(joint_action)
|
||||
# Send action to robot
|
||||
_ = robot.send_action(joint_action)
|
||||
|
||||
precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
|
||||
precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
|
||||
|
||||
finally:
|
||||
# Clean up
|
||||
robot.disconnect()
|
||||
# Clean up
|
||||
robot.disconnect()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
import time
|
||||
|
||||
from lerobot.model.kinematics import RobotKinematics
|
||||
from lerobot.processor import RobotProcessorPipeline
|
||||
from lerobot.processor import RobotAction, RobotObservation, RobotProcessorPipeline
|
||||
from lerobot.processor.converters import (
|
||||
robot_action_observation_to_transition,
|
||||
robot_action_to_transition,
|
||||
@@ -30,7 +30,6 @@ from lerobot.robots.so_follower.robot_kinematic_processor import (
|
||||
InverseKinematicsEEToJoints,
|
||||
)
|
||||
from lerobot.teleoperators.so_leader import SO100Leader, SO100LeaderConfig
|
||||
from lerobot.types import RobotAction, RobotObservation
|
||||
from lerobot.utils.robot_utils import precise_sleep
|
||||
from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
|
||||
|
||||
|
||||
@@ -19,9 +19,8 @@ from pathlib import Path
|
||||
import torch
|
||||
|
||||
from lerobot.configs.types import FeatureType
|
||||
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
|
||||
from lerobot.datasets.feature_utils import dataset_to_policy_features
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
||||
from lerobot.datasets.utils import dataset_to_policy_features
|
||||
from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
|
||||
from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
|
||||
from lerobot.policies.factory import make_pre_post_processors
|
||||
|
||||
@@ -20,9 +20,9 @@ from pathlib import Path
|
||||
import torch
|
||||
|
||||
from lerobot.configs.types import FeatureType
|
||||
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
|
||||
from lerobot.datasets.feature_utils import dataset_to_policy_features
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
|
||||
from lerobot.datasets.streaming_dataset import StreamingLeRobotDataset
|
||||
from lerobot.datasets.utils import dataset_to_policy_features
|
||||
from lerobot.policies.act.configuration_act import ACTConfig
|
||||
from lerobot.policies.act.modeling_act import ACTPolicy
|
||||
from lerobot.policies.factory import make_pre_post_processors
|
||||
|
||||
@@ -5,9 +5,8 @@ from pathlib import Path
|
||||
import torch
|
||||
|
||||
from lerobot.configs.types import FeatureType
|
||||
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
|
||||
from lerobot.datasets.feature_utils import dataset_to_policy_features
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
||||
from lerobot.datasets.utils import dataset_to_policy_features
|
||||
from lerobot.policies.act.configuration_act import ACTConfig
|
||||
from lerobot.policies.act.modeling_act import ACTPolicy
|
||||
from lerobot.policies.factory import make_pre_post_processors
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import torch
|
||||
|
||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
|
||||
from lerobot.policies.act.modeling_act import ACTPolicy
|
||||
from lerobot.policies.factory import make_pre_post_processors
|
||||
from lerobot.policies.utils import build_inference_frame, make_robot_action
|
||||
|
||||
@@ -30,7 +30,6 @@ def main():
|
||||
robot=robot_cfg,
|
||||
server_address=server_address,
|
||||
policy_device="mps",
|
||||
client_device="cpu",
|
||||
policy_type="act",
|
||||
pretrained_name_or_path="<user>/robot_learning_tutorial_act",
|
||||
chunk_size_threshold=0.5, # g
|
||||
|
||||
@@ -5,9 +5,8 @@ from pathlib import Path
|
||||
import torch
|
||||
|
||||
from lerobot.configs.types import FeatureType
|
||||
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
|
||||
from lerobot.datasets.feature_utils import dataset_to_policy_features
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
||||
from lerobot.datasets.utils import dataset_to_policy_features
|
||||
from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
|
||||
from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
|
||||
from lerobot.policies.factory import make_pre_post_processors
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import torch
|
||||
|
||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
|
||||
from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
|
||||
from lerobot.policies.factory import make_pre_post_processors
|
||||
from lerobot.policies.utils import build_inference_frame, make_robot_action
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import torch
|
||||
|
||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.datasets.feature_utils import hw_to_dataset_features
|
||||
from lerobot.datasets.utils import hw_to_dataset_features
|
||||
from lerobot.policies.factory import make_pre_post_processors
|
||||
from lerobot.policies.pi0.modeling_pi0 import PI0Policy
|
||||
from lerobot.policies.utils import build_inference_frame, make_robot_action
|
||||
|
||||
@@ -6,8 +6,8 @@ from queue import Empty, Full
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
|
||||
from lerobot.datasets.feature_utils import hw_to_dataset_features
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.datasets.utils import hw_to_dataset_features
|
||||
from lerobot.envs.configs import HILSerlProcessorConfig, HILSerlRobotEnvConfig
|
||||
from lerobot.policies.sac.configuration_sac import SACConfig
|
||||
from lerobot.policies.sac.modeling_sac import SACPolicy
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import torch
|
||||
|
||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||
from lerobot.datasets.feature_utils import hw_to_dataset_features
|
||||
from lerobot.datasets.utils import hw_to_dataset_features
|
||||
from lerobot.policies.factory import make_pre_post_processors
|
||||
from lerobot.policies.smolvla.modeling_smolvla import SmolVLAPolicy
|
||||
from lerobot.policies.utils import build_inference_frame, make_robot_action
|
||||
|
||||
+105
-52
@@ -14,20 +14,20 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import time
|
||||
from collections import deque
|
||||
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
from huggingface_hub import hf_hub_download
|
||||
|
||||
from lerobot.robots.unitree_g1.g1_utils import (
|
||||
REMOTE_AXES,
|
||||
REMOTE_BUTTONS,
|
||||
G1_29_JointIndex,
|
||||
get_gravity_orientation,
|
||||
)
|
||||
from lerobot.robots.unitree_g1.config_unitree_g1 import UnitreeG1Config
|
||||
from lerobot.robots.unitree_g1.g1_utils import G1_29_JointIndex
|
||||
from lerobot.robots.unitree_g1.unitree_g1 import UnitreeG1
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -36,13 +36,18 @@ GROOT_DEFAULT_ANGLES[[0, 6]] = -0.1 # Hip pitch
|
||||
GROOT_DEFAULT_ANGLES[[3, 9]] = 0.3 # Knee
|
||||
GROOT_DEFAULT_ANGLES[[4, 10]] = -0.2 # Ankle pitch
|
||||
|
||||
MISSING_JOINTS = []
|
||||
G1_MODEL = "g1_23" # Or "g1_29"
|
||||
if G1_MODEL == "g1_23":
|
||||
MISSING_JOINTS = [12, 14, 20, 21, 27, 28] # Waist yaw/pitch, wrist pitch/yaw
|
||||
|
||||
# Control parameters
|
||||
ACTION_SCALE = 0.25
|
||||
CONTROL_DT = 0.02 # 50Hz
|
||||
ANG_VEL_SCALE: float = 0.25
|
||||
DOF_POS_SCALE: float = 1.0
|
||||
DOF_VEL_SCALE: float = 0.05
|
||||
CMD_SCALE: list[float] = [2.0, 2.0, 0.25]
|
||||
CMD_SCALE: list = [2.0, 2.0, 0.25]
|
||||
|
||||
|
||||
DEFAULT_GROOT_REPO_ID = "nepyope/GR00T-WholeBodyControl_g1"
|
||||
@@ -80,11 +85,11 @@ def load_groot_policies(
|
||||
class GrootLocomotionController:
|
||||
"""GR00T lower-body locomotion controller for the Unitree G1."""
|
||||
|
||||
control_dt = CONTROL_DT # Expose for unitree_g1.py
|
||||
|
||||
def __init__(self):
|
||||
# Load policies
|
||||
self.policy_balance, self.policy_walk = load_groot_policies()
|
||||
def __init__(self, policy_balance, policy_walk, robot, config):
|
||||
self.policy_balance = policy_balance
|
||||
self.policy_walk = policy_walk
|
||||
self.robot = robot
|
||||
self.config = config
|
||||
|
||||
self.cmd = np.array([0.0, 0.0, 0.0], dtype=np.float32) # vx, vy, theta_dot
|
||||
|
||||
@@ -104,60 +109,45 @@ class GrootLocomotionController:
|
||||
|
||||
logger.info("GrootLocomotionController initialized")
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Reset internal state for a new episode."""
|
||||
self.cmd[:] = 0.0
|
||||
self.groot_qj_all[:] = 0.0
|
||||
self.groot_dqj_all[:] = 0.0
|
||||
self.groot_action[:] = 0.0
|
||||
self.groot_obs_single[:] = 0.0
|
||||
self.groot_obs_stacked[:] = 0.0
|
||||
self.groot_height_cmd = 0.74
|
||||
self.groot_orientation_cmd[:] = 0.0
|
||||
self.groot_obs_history.clear()
|
||||
for _ in range(6):
|
||||
self.groot_obs_history.append(np.zeros(86, dtype=np.float32))
|
||||
def run_step(self):
|
||||
# Get current observation
|
||||
obs = self.robot.get_observation()
|
||||
|
||||
def run_step(self, action: dict, lowstate) -> dict:
|
||||
"""Run one step of the locomotion controller.
|
||||
if not obs:
|
||||
return
|
||||
|
||||
Args:
|
||||
action: Action dict containing remote.lx/ly/rx/ry and buttons
|
||||
lowstate: Robot lowstate containing motor positions/velocities and IMU
|
||||
|
||||
Returns:
|
||||
Action dict for lower body joints (0-14)
|
||||
"""
|
||||
if lowstate is None:
|
||||
return {}
|
||||
|
||||
buttons = [int(action.get(k, 0)) for k in REMOTE_BUTTONS]
|
||||
if buttons[0]: # R1 - raise waist
|
||||
# Get command from remote controller
|
||||
if obs["remote.buttons"][0]: # R1 - raise waist
|
||||
self.groot_height_cmd += 0.001
|
||||
self.groot_height_cmd = np.clip(self.groot_height_cmd, 0.50, 1.00)
|
||||
if buttons[4]: # R2 - lower waist
|
||||
if obs["remote.buttons"][4]: # R2 - lower waist
|
||||
self.groot_height_cmd -= 0.001
|
||||
self.groot_height_cmd = np.clip(self.groot_height_cmd, 0.50, 1.00)
|
||||
|
||||
lx, ly, rx, _ry = (action.get(k, 0.0) for k in REMOTE_AXES)
|
||||
self.cmd[0] = ly # Forward/backward
|
||||
self.cmd[1] = -lx # Left/right (negated)
|
||||
self.cmd[2] = -rx # Rotation rate (negated)
|
||||
self.cmd[0] = obs["remote.ly"] # Forward/backward
|
||||
self.cmd[1] = obs["remote.lx"] * -1 # Left/right
|
||||
self.cmd[2] = obs["remote.rx"] * -1 # Rotation rate
|
||||
|
||||
# Get joint positions and velocities from lowstate
|
||||
# Get joint positions and velocities from flat dict
|
||||
for motor in G1_29_JointIndex:
|
||||
name = motor.name
|
||||
idx = motor.value
|
||||
self.groot_qj_all[idx] = lowstate.motor_state[idx].q
|
||||
self.groot_dqj_all[idx] = lowstate.motor_state[idx].dq
|
||||
self.groot_qj_all[idx] = obs[f"{name}.q"]
|
||||
self.groot_dqj_all[idx] = obs[f"{name}.dq"]
|
||||
|
||||
# Adapt observation for g1_23dof
|
||||
for idx in MISSING_JOINTS:
|
||||
self.groot_qj_all[idx] = 0.0
|
||||
self.groot_dqj_all[idx] = 0.0
|
||||
|
||||
# Scale joint positions and velocities
|
||||
qj_obs = self.groot_qj_all.copy()
|
||||
dqj_obs = self.groot_dqj_all.copy()
|
||||
|
||||
# Express IMU data in gravity frame of reference
|
||||
quat = lowstate.imu_state.quaternion
|
||||
ang_vel = np.array(lowstate.imu_state.gyroscope, dtype=np.float32)
|
||||
gravity_orientation = get_gravity_orientation(quat)
|
||||
quat = [obs["imu.quat.w"], obs["imu.quat.x"], obs["imu.quat.y"], obs["imu.quat.z"]]
|
||||
ang_vel = np.array([obs["imu.gyro.x"], obs["imu.gyro.y"], obs["imu.gyro.z"]], dtype=np.float32)
|
||||
gravity_orientation = self.robot.get_gravity_orientation(quat)
|
||||
|
||||
# Scale joint positions and velocities before policy inference
|
||||
qj_obs = (qj_obs - GROOT_DEFAULT_ANGLES) * DOF_POS_SCALE
|
||||
@@ -196,10 +186,73 @@ class GrootLocomotionController:
|
||||
# Transform action back to target joint positions
|
||||
target_dof_pos_15 = GROOT_DEFAULT_ANGLES[:15] + self.groot_action * ACTION_SCALE
|
||||
|
||||
# Build action dict
|
||||
# Build action dict (only first 15 joints for GR00T)
|
||||
action_dict = {}
|
||||
for i in range(15):
|
||||
motor_name = G1_29_JointIndex(i).name
|
||||
action_dict[f"{motor_name}.q"] = float(target_dof_pos_15[i])
|
||||
|
||||
return action_dict
|
||||
# Zero out missing joints for g1_23dof
|
||||
for joint_idx in MISSING_JOINTS:
|
||||
motor_name = G1_29_JointIndex(joint_idx).name
|
||||
action_dict[f"{motor_name}.q"] = 0.0
|
||||
|
||||
# Send action to robot
|
||||
self.robot.send_action(action_dict)
|
||||
|
||||
|
||||
def run(repo_id: str = DEFAULT_GROOT_REPO_ID) -> None:
|
||||
"""Main function to run the GR00T locomotion controller.
|
||||
|
||||
Args:
|
||||
repo_id: Hugging Face Hub repository ID for GR00T policies.
|
||||
"""
|
||||
# Load policies
|
||||
policy_balance, policy_walk = load_groot_policies(repo_id=repo_id)
|
||||
|
||||
# Initialize robot
|
||||
config = UnitreeG1Config()
|
||||
robot = UnitreeG1(config)
|
||||
|
||||
robot.connect()
|
||||
|
||||
# Initialize gr00T locomotion controller
|
||||
groot_controller = GrootLocomotionController(
|
||||
policy_balance=policy_balance,
|
||||
policy_walk=policy_walk,
|
||||
robot=robot,
|
||||
config=config,
|
||||
)
|
||||
|
||||
try:
|
||||
robot.reset(CONTROL_DT, GROOT_DEFAULT_ANGLES)
|
||||
|
||||
logger.info("Use joystick: LY=fwd/back, LX=left/right, RX=rotate, R1=raise waist, R2=lower waist")
|
||||
logger.info("Press Ctrl+C to stop")
|
||||
|
||||
# Run step
|
||||
while not robot._shutdown_event.is_set():
|
||||
start_time = time.time()
|
||||
groot_controller.run_step()
|
||||
elapsed = time.time() - start_time
|
||||
sleep_time = max(0, CONTROL_DT - elapsed)
|
||||
time.sleep(sleep_time)
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Stopping locomotion...")
|
||||
finally:
|
||||
if robot.is_connected:
|
||||
robot.disconnect()
|
||||
logger.info("Done!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="GR00T Locomotion Controller for Unitree G1")
|
||||
parser.add_argument(
|
||||
"--repo-id",
|
||||
type=str,
|
||||
default=DEFAULT_GROOT_REPO_ID,
|
||||
help=f"Hugging Face Hub repo ID for GR00T policies (default: {DEFAULT_GROOT_REPO_ID})",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
run(repo_id=args.repo_id)
|
||||
+112
-62
@@ -14,21 +14,21 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import onnx
|
||||
import onnxruntime as ort
|
||||
from huggingface_hub import hf_hub_download
|
||||
|
||||
from lerobot.robots.unitree_g1.g1_utils import (
|
||||
REMOTE_AXES,
|
||||
G1_29_JointArmIndex,
|
||||
G1_29_JointIndex,
|
||||
get_gravity_orientation,
|
||||
)
|
||||
from lerobot.robots.unitree_g1.config_unitree_g1 import UnitreeG1Config
|
||||
from lerobot.robots.unitree_g1.g1_utils import G1_29_JointIndex
|
||||
from lerobot.robots.unitree_g1.unitree_g1 import UnitreeG1
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_ANGLES = np.zeros(29, dtype=np.float32)
|
||||
@@ -40,13 +40,18 @@ DEFAULT_ANGLES[16] = 0.2 # Left shoulder roll
|
||||
DEFAULT_ANGLES[23] = -0.2 # Right shoulder roll
|
||||
DEFAULT_ANGLES[[18, 25]] = 0.6 # Elbow
|
||||
|
||||
MISSING_JOINTS = []
|
||||
G1_MODEL = "g1_23" # Or "g1_29"
|
||||
if G1_MODEL == "g1_23":
|
||||
MISSING_JOINTS = [12, 14, 20, 21, 27, 28] # Waist yaw/pitch, wrist pitch/yaw
|
||||
|
||||
# Control parameters
|
||||
ACTION_SCALE = 0.25
|
||||
CONTROL_DT = 0.005 # 200Hz
|
||||
CONTROL_DT = 0.02 # 50Hz
|
||||
ANG_VEL_SCALE = 0.25
|
||||
DOF_POS_SCALE = 1.0
|
||||
DOF_VEL_SCALE = 0.05
|
||||
GAIT_PERIOD = 0.5
|
||||
GAIT_PERIOD = 1.0
|
||||
|
||||
|
||||
DEFAULT_HOLOSOMA_REPO_ID = "nepyope/holosoma_locomotion"
|
||||
@@ -82,7 +87,7 @@ def load_policy(
|
||||
logger.info(f"Policy loaded: {policy.get_inputs()[0].shape} → {policy.get_outputs()[0].shape}")
|
||||
|
||||
# Extract KP/KD from ONNX metadata
|
||||
model = onnx.load(policy_path, load_external_data=False)
|
||||
model = onnx.load(policy_path)
|
||||
metadata = {prop.key: prop.value for prop in model.metadata_props}
|
||||
|
||||
if "kp" not in metadata or "kd" not in metadata:
|
||||
@@ -96,13 +101,15 @@ def load_policy(
|
||||
|
||||
|
||||
class HolosomaLocomotionController:
|
||||
"""Holosoma lower-body locomotion controller for Unitree G1."""
|
||||
"""Holosoma whole-body locomotion controller for Unitree G1."""
|
||||
|
||||
control_dt = CONTROL_DT # Expose for unitree_g1.py
|
||||
def __init__(self, policy, robot, kp: np.ndarray, kd: np.ndarray):
|
||||
self.policy = policy
|
||||
self.robot = robot
|
||||
|
||||
def __init__(self):
|
||||
# Load policy and gains
|
||||
self.policy, self.kp, self.kd = load_policy()
|
||||
# Override robot's PD gains with policy gains
|
||||
self.robot.kp = kp
|
||||
self.robot.kd = kd
|
||||
|
||||
self.cmd = np.zeros(3, dtype=np.float32)
|
||||
|
||||
@@ -117,55 +124,35 @@ class HolosomaLocomotionController:
|
||||
self.phase_dt = 2 * np.pi / ((1.0 / CONTROL_DT) * GAIT_PERIOD)
|
||||
self.is_standing = True
|
||||
|
||||
logger.info("HolosomaLocomotionController initialized")
|
||||
def run_step(self):
|
||||
# Get current observation
|
||||
obs = self.robot.get_observation()
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Reset internal state for a new episode."""
|
||||
self.cmd[:] = 0.0
|
||||
self.qj[:] = 0.0
|
||||
self.dqj[:] = 0.0
|
||||
self.obs[:] = 0.0
|
||||
self.last_action[:] = 0.0
|
||||
self.phase = np.array([[0.0, np.pi]], dtype=np.float32)
|
||||
self.is_standing = True
|
||||
if not obs:
|
||||
return
|
||||
|
||||
def run_step(self, action: dict, lowstate) -> dict:
|
||||
"""Run one step of the locomotion controller.
|
||||
|
||||
Args:
|
||||
action: Action dict containing remote.lx/ly/rx/ry
|
||||
lowstate: Robot lowstate containing motor positions/velocities and IMU
|
||||
|
||||
Returns:
|
||||
Action dict for lower body joints (0-14)
|
||||
"""
|
||||
if lowstate is None:
|
||||
return {}
|
||||
|
||||
lx, ly, rx, _ry = (action.get(k, 0.0) for k in REMOTE_AXES)
|
||||
ly = ly if abs(ly) > 0.1 else 0.0
|
||||
lx = lx if abs(lx) > 0.1 else 0.0
|
||||
rx = rx if abs(rx) > 0.1 else 0.0
|
||||
ly = np.clip(ly, -0.3, 0.3)
|
||||
lx = np.clip(lx, -0.3, 0.3)
|
||||
# Get command from remote controller
|
||||
ly = obs["remote.ly"] if abs(obs["remote.ly"]) > 0.1 else 0.0
|
||||
lx = obs["remote.lx"] if abs(obs["remote.lx"]) > 0.1 else 0.0
|
||||
rx = obs["remote.rx"] if abs(obs["remote.rx"]) > 0.1 else 0.0
|
||||
self.cmd[:] = [ly, -lx, -rx]
|
||||
|
||||
# Get joint positions and velocities from lowstate
|
||||
# Get joint positions and velocities
|
||||
for motor in G1_29_JointIndex:
|
||||
name = motor.name
|
||||
idx = motor.value
|
||||
self.qj[idx] = lowstate.motor_state[idx].q
|
||||
self.dqj[idx] = lowstate.motor_state[idx].dq
|
||||
self.qj[idx] = obs[f"{name}.q"]
|
||||
self.dqj[idx] = obs[f"{name}.dq"]
|
||||
|
||||
# Hide arm positions from policy (show DEFAULT_ANGLES instead)
|
||||
# This prevents policy from reacting to teleop arm movements
|
||||
for arm_joint in G1_29_JointArmIndex:
|
||||
self.qj[arm_joint.value] = DEFAULT_ANGLES[arm_joint.value]
|
||||
self.dqj[arm_joint.value] = 0.0
|
||||
# Adapt observation for g1_23dof
|
||||
for idx in MISSING_JOINTS:
|
||||
self.qj[idx] = 0.0
|
||||
self.dqj[idx] = 0.0
|
||||
|
||||
# Express IMU data in gravity frame of reference
|
||||
quat = lowstate.imu_state.quaternion
|
||||
ang_vel = np.array(lowstate.imu_state.gyroscope, dtype=np.float32)
|
||||
gravity = get_gravity_orientation(quat)
|
||||
quat = [obs["imu.quat.w"], obs["imu.quat.x"], obs["imu.quat.y"], obs["imu.quat.z"]]
|
||||
ang_vel = np.array([obs["imu.gyro.x"], obs["imu.gyro.y"], obs["imu.gyro.z"]], dtype=np.float32)
|
||||
gravity = self.robot.get_gravity_orientation(quat)
|
||||
|
||||
# Scale joint positions and velocities before policy inference
|
||||
qj_obs = (self.qj - DEFAULT_ANGLES) * DOF_POS_SCALE
|
||||
@@ -199,16 +186,79 @@ class HolosomaLocomotionController:
|
||||
# Run policy inference
|
||||
ort_in = {self.policy.get_inputs()[0].name: self.obs.reshape(1, -1).astype(np.float32)}
|
||||
raw_action = self.policy.run(None, ort_in)[0].squeeze()
|
||||
policy_action = np.clip(raw_action, -100.0, 100.0)
|
||||
self.last_action = policy_action.copy()
|
||||
action = np.clip(raw_action, -100.0, 100.0)
|
||||
self.last_action = action.copy()
|
||||
|
||||
# Transform action back to target joint positions
|
||||
target = DEFAULT_ANGLES + policy_action * ACTION_SCALE
|
||||
target = DEFAULT_ANGLES + action * ACTION_SCALE
|
||||
|
||||
# Build action dict (first 15 joints only)
|
||||
# Build action dict
|
||||
action_dict = {}
|
||||
for i in range(15):
|
||||
motor_name = G1_29_JointIndex(i).name
|
||||
action_dict[f"{motor_name}.q"] = float(target[i])
|
||||
for motor in G1_29_JointIndex:
|
||||
action_dict[f"{motor.name}.q"] = float(target[motor.value])
|
||||
|
||||
return action_dict
|
||||
# Zero out missing joints for g1_23dof
|
||||
for joint_idx in MISSING_JOINTS:
|
||||
motor_name = G1_29_JointIndex(joint_idx).name
|
||||
action_dict[f"{motor_name}.q"] = 0.0
|
||||
|
||||
# Send action to robot
|
||||
self.robot.send_action(action_dict)
|
||||
|
||||
|
||||
def run(repo_id: str = DEFAULT_HOLOSOMA_REPO_ID, policy_type: str = "fastsac") -> None:
|
||||
"""Main function to run the Holosoma locomotion controller.
|
||||
|
||||
Args:
|
||||
repo_id: Hugging Face Hub repository ID for Holosoma policies.
|
||||
policy_type: Policy type to use ('fastsac' or 'ppo').
|
||||
"""
|
||||
# Load policy and gains
|
||||
policy, kp, kd = load_policy(repo_id=repo_id, policy_type=policy_type)
|
||||
|
||||
# Initialize robot
|
||||
config = UnitreeG1Config()
|
||||
robot = UnitreeG1(config)
|
||||
robot.connect()
|
||||
|
||||
holosoma_controller = HolosomaLocomotionController(policy, robot, kp, kd)
|
||||
|
||||
try:
|
||||
robot.reset(CONTROL_DT, DEFAULT_ANGLES)
|
||||
|
||||
logger.info("Use joystick: LY=fwd/back, LX=left/right, RX=rotate")
|
||||
logger.info("Press Ctrl+C to stop")
|
||||
|
||||
# Run step
|
||||
while not robot._shutdown_event.is_set():
|
||||
start_time = time.time()
|
||||
holosoma_controller.run_step()
|
||||
elapsed = time.time() - start_time
|
||||
sleep_time = max(0, CONTROL_DT - elapsed)
|
||||
time.sleep(sleep_time)
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Stopping locomotion...")
|
||||
finally:
|
||||
if robot.is_connected:
|
||||
robot.disconnect()
|
||||
logger.info("Done!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Holosoma Locomotion Controller for Unitree G1")
|
||||
parser.add_argument(
|
||||
"--repo-id",
|
||||
type=str,
|
||||
default=DEFAULT_HOLOSOMA_REPO_ID,
|
||||
help=f"Hugging Face Hub repo ID for Holosoma policies (default: {DEFAULT_HOLOSOMA_REPO_ID})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--policy",
|
||||
type=str,
|
||||
choices=["fastsac", "ppo"],
|
||||
default="fastsac",
|
||||
help="Policy type to use: 'fastsac' (default) or 'ppo'",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
run(repo_id=args.repo_id, policy_type=args.policy)
|
||||
+122
-69
@@ -25,11 +25,11 @@ discord = "https://discord.gg/s3KuuzsPFb"
|
||||
|
||||
[project]
|
||||
name = "lerobot"
|
||||
version = "0.5.1"
|
||||
version = "0.4.3"
|
||||
description = "🤗 LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch"
|
||||
dynamic = ["readme"]
|
||||
license = { text = "Apache-2.0" }
|
||||
requires-python = ">=3.12"
|
||||
requires-python = ">=3.10"
|
||||
authors = [
|
||||
{ name = "Rémi Cadène", email = "re.cadene@gmail.com" },
|
||||
{ name = "Simon Alibert", email = "alibert.sim@gmail.com" },
|
||||
@@ -50,8 +50,7 @@ classifiers = [
|
||||
"Intended Audience :: Education",
|
||||
"Intended Audience :: Science/Research",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Topic :: Software Development :: Build Tools",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
]
|
||||
@@ -60,30 +59,28 @@ keywords = ["lerobot", "huggingface", "robotics", "machine learning", "artifici
|
||||
dependencies = [
|
||||
|
||||
# Hugging Face dependencies
|
||||
"datasets>=4.0.0,<5.0.0",
|
||||
"datasets>=4.0.0,<4.2.0",
|
||||
"diffusers>=0.27.2,<0.36.0",
|
||||
"huggingface-hub>=1.0.0,<2.0.0",
|
||||
"huggingface-hub[hf-transfer,cli]>=0.34.2,<0.36.0",
|
||||
"accelerate>=1.10.0,<2.0.0",
|
||||
|
||||
# Core dependencies
|
||||
"numpy>=2.0.0,<2.3.0", # NOTE: Explicitly listing numpy helps the resolver converge faster. Upper bound imposed by opencv-python-headless.
|
||||
"setuptools>=71.0.0,<81.0.0",
|
||||
"cmake>=3.29.0.1,<4.2.0",
|
||||
"packaging>=24.2,<26.0",
|
||||
|
||||
"torch>=2.7,<2.11.0",
|
||||
"torchcodec>=0.3.0,<0.11.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')", # NOTE: Windows support starts at version 0.7 (needs torch==2.8), ffmpeg>=8 support starts at version 0.8.1 (needs torch==2.9), system-wide ffmpeg support starts at version 0.10 (needs torch==2.10).
|
||||
"torchvision>=0.22.0,<0.26.0",
|
||||
|
||||
"einops>=0.8.0,<0.9.0",
|
||||
"opencv-python-headless>=4.9.0,<4.14.0",
|
||||
"opencv-python-headless>=4.9.0,<4.13.0",
|
||||
"av>=15.0.0,<16.0.0",
|
||||
"jsonlines>=4.0.0,<5.0.0",
|
||||
"pynput>=1.7.8,<1.9.0",
|
||||
"packaging>=24.2,<26.0",
|
||||
"pynput>=1.7.7,<1.9.0",
|
||||
"pyserial>=3.5,<4.0",
|
||||
|
||||
"wandb>=0.24.0,<0.25.0",
|
||||
"draccus==0.10.0", # TODO: Relax version constraint
|
||||
|
||||
"torch>=2.2.1,<2.8.0", # TODO: Bumb dependency
|
||||
"torchcodec>=0.2.1,<0.6.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')", # TODO: Bumb dependency
|
||||
"torchvision>=0.21.0,<0.23.0", # TODO: Bumb dependency
|
||||
|
||||
"draccus==0.10.0", # TODO: Remove ==
|
||||
"gymnasium>=1.1.1,<2.0.0",
|
||||
"rerun-sdk>=0.24.0,<0.27.0",
|
||||
|
||||
@@ -98,57 +95,43 @@ dependencies = [
|
||||
|
||||
# Common
|
||||
pygame-dep = ["pygame>=2.5.1,<2.7.0"]
|
||||
placo-dep = ["placo>=0.9.6,<0.9.17"]
|
||||
transformers-dep = ["transformers==5.3.0"] # TODO(Steven): https://github.com/huggingface/lerobot/pull/3249
|
||||
placo-dep = ["placo>=0.9.6,<0.10.0"]
|
||||
transformers-dep = ["transformers>=4.57.1,<5.0.0"]
|
||||
grpcio-dep = ["grpcio==1.73.1", "protobuf>=6.31.1,<6.32.0"]
|
||||
can-dep = ["python-can>=4.2.0,<5.0.0"]
|
||||
peft-dep = ["peft>=0.18.0,<1.0.0"]
|
||||
scipy-dep = ["scipy>=1.14.0,<2.0.0"]
|
||||
qwen-vl-utils-dep = ["qwen-vl-utils>=0.0.11,<0.1.0"]
|
||||
matplotlib-dep = ["matplotlib>=3.10.3,<4.0.0", "contourpy>=1.3.0,<2.0.0"] # NOTE: Explicitly listing contourpy helps the resolver converge faster.
|
||||
|
||||
# Motors
|
||||
feetech = ["feetech-servo-sdk>=1.0.0,<2.0.0"]
|
||||
dynamixel = ["dynamixel-sdk>=3.7.31,<3.9.0"]
|
||||
damiao = ["lerobot[can-dep]"]
|
||||
robstride = ["lerobot[can-dep]"]
|
||||
|
||||
# Robots
|
||||
openarms = ["lerobot[damiao]"]
|
||||
gamepad = ["lerobot[pygame-dep]", "hidapi>=0.14.0,<0.15.0"]
|
||||
hopejr = ["lerobot[feetech]", "lerobot[pygame-dep]"]
|
||||
lekiwi = ["lerobot[feetech]", "pyzmq>=26.2.1,<28.0.0"]
|
||||
unitree_g1 = [
|
||||
# "unitree-sdk2==1.0.1",
|
||||
"pyzmq>=26.2.1,<28.0.0",
|
||||
"onnxruntime>=1.16.0,<2.0.0",
|
||||
"onnx>=1.16.0,<2.0.0",
|
||||
"meshcat>=0.3.0,<0.4.0",
|
||||
"lerobot[matplotlib-dep]",
|
||||
"lerobot[pygame-dep]",
|
||||
"onnxruntime>=1.16.0,<2.0.0"
|
||||
]
|
||||
reachy2 = ["reachy2_sdk>=1.0.15,<1.1.0"]
|
||||
kinematics = ["lerobot[placo-dep]"]
|
||||
intelrealsense = [
|
||||
"pyrealsense2>=2.55.1.6486,<2.57.0 ; sys_platform != 'darwin'",
|
||||
"pyrealsense2-macosx>=2.54,<2.57.0 ; sys_platform == 'darwin'",
|
||||
"pyrealsense2-macosx>=2.54,<2.55.0 ; sys_platform == 'darwin'",
|
||||
]
|
||||
phone = ["hebi-py>=2.8.0,<2.12.0", "teleop>=0.1.0,<0.2.0", "fastapi<1.0", "lerobot[scipy-dep]"]
|
||||
phone = ["hebi-py>=2.8.0,<2.12.0", "teleop>=0.1.0,<0.2.0", "fastapi<1.0"]
|
||||
|
||||
# Policies
|
||||
wallx = [
|
||||
"lerobot[transformers-dep]",
|
||||
"lerobot[peft]",
|
||||
"lerobot[scipy-dep]",
|
||||
"torchdiffeq>=0.2.4,<0.3.0",
|
||||
"lerobot[qwen-vl-utils-dep]",
|
||||
"transformers==4.49.0",
|
||||
"peft==0.17.1",
|
||||
"scipy==1.15.3",
|
||||
"torchdiffeq==0.2.5",
|
||||
"qwen_vl_utils==0.0.11"
|
||||
]
|
||||
pi = ["lerobot[transformers-dep]", "lerobot[scipy-dep]"]
|
||||
pi = ["transformers @ git+https://github.com/huggingface/transformers.git@fix/lerobot_openpi", "scipy>=1.10.1,<1.15"]
|
||||
smolvla = ["lerobot[transformers-dep]", "num2words>=0.5.14,<0.6.0", "accelerate>=1.7.0,<2.0.0", "safetensors>=0.4.3,<1.0.0"]
|
||||
multi_task_dit = ["lerobot[transformers-dep]"]
|
||||
groot = [
|
||||
"lerobot[transformers-dep]",
|
||||
"lerobot[peft]",
|
||||
"peft>=0.13.0,<1.0.0",
|
||||
"dm-tree>=0.1.8,<1.0.0",
|
||||
"timm>=1.0.0,<1.1.0",
|
||||
"safetensors>=0.4.3,<1.0.0",
|
||||
@@ -157,13 +140,13 @@ groot = [
|
||||
"ninja>=1.11.1,<2.0.0",
|
||||
"flash-attn>=2.5.9,<3.0.0 ; sys_platform != 'darwin'"
|
||||
]
|
||||
sarm = ["lerobot[transformers-dep]", "faker>=33.0.0,<35.0.0", "lerobot[matplotlib-dep]", "lerobot[qwen-vl-utils-dep]"]
|
||||
sarm = ["lerobot[transformers-dep]", "faker>=33.0.0,<35.0.0", "matplotlib>=3.10.3,<4.0.0", "qwen-vl-utils>=0.0.14,<0.1.0"]
|
||||
xvla = ["lerobot[transformers-dep]"]
|
||||
hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpcio-dep]", "lerobot[placo-dep]"]
|
||||
|
||||
# Features
|
||||
async = ["lerobot[grpcio-dep]", "lerobot[matplotlib-dep]"]
|
||||
peft = ["lerobot[transformers-dep]", "lerobot[peft-dep]"]
|
||||
async = ["lerobot[grpcio-dep]", "matplotlib>=3.10.3,<4.0.0"]
|
||||
peft = ["lerobot[transformers-dep]", "peft>=0.18.0,<1.0.0"]
|
||||
|
||||
# Development
|
||||
dev = ["pre-commit>=3.7.0,<5.0.0", "debugpy>=1.8.1,<1.9.0", "lerobot[grpcio-dep]", "grpcio-tools==1.73.1", "mypy>=1.19.1"]
|
||||
@@ -171,19 +154,13 @@ test = ["pytest>=8.1.0,<9.0.0", "pytest-timeout>=2.4.0,<3.0.0", "pytest-cov>=5.0
|
||||
video_benchmark = ["scikit-image>=0.23.2,<0.26.0", "pandas>=2.2.2,<2.4.0"]
|
||||
|
||||
# Simulation
|
||||
# NOTE: Explicitly listing scipy helps flatten the dependecy tree.
|
||||
aloha = ["gym-aloha>=0.1.2,<0.2.0", "lerobot[scipy-dep]"]
|
||||
aloha = ["gym-aloha>=0.1.2,<0.2.0"]
|
||||
pusht = ["gym-pusht>=0.1.5,<0.2.0", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk version in gym-pusht instead
|
||||
libero = ["lerobot[transformers-dep]", "hf-libero>=0.1.3,<0.2.0; sys_platform == 'linux'", "lerobot[scipy-dep]"]
|
||||
metaworld = ["metaworld==3.0.0", "lerobot[scipy-dep]"]
|
||||
libero = ["lerobot[transformers-dep]", "hf-libero>=0.1.3,<0.2.0"]
|
||||
metaworld = ["metaworld==3.0.0"]
|
||||
|
||||
# All
|
||||
all = [
|
||||
# NOTE(resolver hint): scipy is pulled in transitively via lerobot[scipy-dep] through
|
||||
# multiple extras (aloha, metaworld, pi, wallx, phone). Listing it explicitly
|
||||
# helps pip's resolver converge by constraining scipy early, before it encounters
|
||||
# the loose scipy requirements from transitive deps like dm-control and metaworld.
|
||||
"scipy>=1.14.0,<2.0.0",
|
||||
"lerobot[dynamixel]",
|
||||
"lerobot[gamepad]",
|
||||
"lerobot[hopejr]",
|
||||
@@ -191,8 +168,8 @@ all = [
|
||||
"lerobot[reachy2]",
|
||||
"lerobot[kinematics]",
|
||||
"lerobot[intelrealsense]",
|
||||
"lerobot[wallx]",
|
||||
"lerobot[pi]",
|
||||
# "lerobot[wallx]",
|
||||
# "lerobot[pi]", TODO(Pepijn): Update pi to transformers v5
|
||||
"lerobot[smolvla]",
|
||||
# "lerobot[groot]", TODO(Steven): Gr00t requires specific installation instructions for flash-attn
|
||||
"lerobot[xvla]",
|
||||
@@ -204,11 +181,10 @@ all = [
|
||||
"lerobot[aloha]",
|
||||
"lerobot[pusht]",
|
||||
"lerobot[phone]",
|
||||
"lerobot[libero]; sys_platform == 'linux'",
|
||||
"lerobot[libero]",
|
||||
"lerobot[metaworld]",
|
||||
"lerobot[sarm]",
|
||||
"lerobot[peft]",
|
||||
# "lerobot[unitree_g1]", TODO: Unitree requires specific installation instructions for unitree_sdk2
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
@@ -227,17 +203,13 @@ lerobot-info="lerobot.scripts.lerobot_info:main"
|
||||
lerobot-find-joint-limits="lerobot.scripts.lerobot_find_joint_limits:main"
|
||||
lerobot-imgtransform-viz="lerobot.scripts.lerobot_imgtransform_viz:main"
|
||||
lerobot-edit-dataset="lerobot.scripts.lerobot_edit_dataset:main"
|
||||
lerobot-setup-can="lerobot.scripts.lerobot_setup_can:main"
|
||||
|
||||
# ---------------- Tool Configurations ----------------
|
||||
[tool.setuptools.package-data]
|
||||
lerobot = ["envs/*.json"]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["src"]
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py312"
|
||||
target-version = "py310"
|
||||
line-length = 110
|
||||
exclude = ["tests/artifacts/**/*.safetensors", "*_pb2.py", "*_pb2_grpc.py"]
|
||||
|
||||
@@ -306,7 +278,6 @@ default.extend-ignore-identifiers-re = [
|
||||
"thw",
|
||||
"inpt",
|
||||
"ROBOTIS",
|
||||
"OT_VALUE"
|
||||
]
|
||||
|
||||
# TODO: Uncomment when ready to use
|
||||
@@ -329,7 +300,7 @@ default.extend-ignore-identifiers-re = [
|
||||
# Uncomment [tool.mypy] first, then uncomment individual module overrides as they get proper type annotations
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.12"
|
||||
python_version = "3.10"
|
||||
ignore_missing_imports = true
|
||||
follow_imports = "skip"
|
||||
# warn_return_any = true
|
||||
@@ -381,9 +352,9 @@ ignore_errors = false
|
||||
module = "lerobot.cameras.*"
|
||||
ignore_errors = false
|
||||
|
||||
[[tool.mypy.overrides]]
|
||||
module = "lerobot.motors.*"
|
||||
ignore_errors = false
|
||||
# [[tool.mypy.overrides]]
|
||||
# module = "lerobot.motors.*"
|
||||
# ignore_errors = false
|
||||
|
||||
# [[tool.mypy.overrides]]
|
||||
# module = "lerobot.robots.*"
|
||||
@@ -413,3 +384,85 @@ ignore_errors = false
|
||||
# [[tool.mypy.overrides]]
|
||||
# module = "lerobot.scripts.*"
|
||||
# ignore_errors = false
|
||||
|
||||
[tool.uv]
|
||||
# wallx requires transformers==4.49.0 which conflicts with other extras that need >=4.53.0
|
||||
conflicts = [
|
||||
[
|
||||
{ extra = "wallx" },
|
||||
{ extra = "transformers-dep" },
|
||||
],
|
||||
[
|
||||
{ extra = "wallx" },
|
||||
{ extra = "pi" },
|
||||
],
|
||||
[
|
||||
{ extra = "wallx" },
|
||||
{ extra = "smolvla" },
|
||||
],
|
||||
[
|
||||
{ extra = "wallx" },
|
||||
{ extra = "groot" },
|
||||
],
|
||||
[
|
||||
{ extra = "wallx" },
|
||||
{ extra = "xvla" },
|
||||
],
|
||||
[
|
||||
{ extra = "wallx" },
|
||||
{ extra = "sarm" },
|
||||
],
|
||||
[
|
||||
{ extra = "wallx" },
|
||||
{ extra = "hilserl" },
|
||||
],
|
||||
[
|
||||
{ extra = "wallx" },
|
||||
{ extra = "libero" },
|
||||
],
|
||||
[
|
||||
{ extra = "wallx" },
|
||||
{ extra = "peft" },
|
||||
],
|
||||
[
|
||||
{ extra = "wallx" },
|
||||
{ extra = "all" },
|
||||
],
|
||||
# pi uses custom branch which conflicts with transformers-dep
|
||||
[
|
||||
{ extra = "pi" },
|
||||
{ extra = "transformers-dep" },
|
||||
],
|
||||
[
|
||||
{ extra = "pi" },
|
||||
{ extra = "smolvla" },
|
||||
],
|
||||
[
|
||||
{ extra = "pi" },
|
||||
{ extra = "groot" },
|
||||
],
|
||||
[
|
||||
{ extra = "pi" },
|
||||
{ extra = "xvla" },
|
||||
],
|
||||
[
|
||||
{ extra = "pi" },
|
||||
{ extra = "sarm" },
|
||||
],
|
||||
[
|
||||
{ extra = "pi" },
|
||||
{ extra = "hilserl" },
|
||||
],
|
||||
[
|
||||
{ extra = "pi" },
|
||||
{ extra = "libero" },
|
||||
],
|
||||
[
|
||||
{ extra = "pi" },
|
||||
{ extra = "peft" },
|
||||
],
|
||||
[
|
||||
{ extra = "pi" },
|
||||
{ extra = "all" },
|
||||
],
|
||||
]
|
||||
|
||||
+276
-175
@@ -1,73 +1,76 @@
|
||||
#
|
||||
# This file is autogenerated by pip-compile with Python 3.12
|
||||
# This file is autogenerated by pip-compile with Python 3.10
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile --output-file=requirements-macos.txt requirements.in
|
||||
#
|
||||
-e .[all]
|
||||
# via -[all]
|
||||
absl-py==2.4.0
|
||||
absl-py==2.3.1
|
||||
# via
|
||||
# dm-control
|
||||
# dm-env
|
||||
# dm-tree
|
||||
# labmaze
|
||||
# mujoco
|
||||
accelerate==1.13.0
|
||||
# tensorboard
|
||||
accelerate==1.11.0
|
||||
# via
|
||||
# lerobot
|
||||
# peft
|
||||
aiohappyeyeballs==2.6.1
|
||||
# via aiohttp
|
||||
aiohttp==3.13.3
|
||||
aiohttp==3.13.1
|
||||
# via fsspec
|
||||
aiosignal==1.4.0
|
||||
# via aiohttp
|
||||
annotated-doc==0.0.4
|
||||
# via
|
||||
# fastapi
|
||||
# typer
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
anyio==4.12.1
|
||||
antlr4-python3-runtime==4.9.3
|
||||
# via
|
||||
# hydra-core
|
||||
# omegaconf
|
||||
anyio==4.11.0
|
||||
# via
|
||||
# httpx
|
||||
# starlette
|
||||
# watchfiles
|
||||
asttokens==3.0.1
|
||||
asttokens==3.0.0
|
||||
# via stack-data
|
||||
async-timeout==5.0.1
|
||||
# via aiohttp
|
||||
attrs==25.4.0
|
||||
# via
|
||||
# aiohttp
|
||||
# dm-tree
|
||||
# jsonlines
|
||||
# jsonschema
|
||||
# referencing
|
||||
# rerun-sdk
|
||||
av==15.1.0
|
||||
# via lerobot
|
||||
bddl==1.0.1
|
||||
# via libero
|
||||
certifi==2025.10.5
|
||||
# via
|
||||
# lerobot
|
||||
# qwen-vl-utils
|
||||
certifi==2026.2.25
|
||||
# via
|
||||
# httpcore
|
||||
# httpx
|
||||
# requests
|
||||
# sentry-sdk
|
||||
cffi==2.0.0
|
||||
# via pymunk
|
||||
cfgv==3.5.0
|
||||
cfgv==3.4.0
|
||||
# via pre-commit
|
||||
charset-normalizer==3.4.5
|
||||
charset-normalizer==3.4.4
|
||||
# via requests
|
||||
click==8.3.1
|
||||
click==8.3.0
|
||||
# via
|
||||
# typer
|
||||
# uvicorn
|
||||
# wandb
|
||||
cloudpickle==3.1.2
|
||||
# via gymnasium
|
||||
cmake==4.1.3
|
||||
cloudpickle==3.1.1
|
||||
# via
|
||||
# gymnasium
|
||||
# libero
|
||||
cmake==4.1.0
|
||||
# via lerobot
|
||||
cmeel==0.59.0
|
||||
cmeel==0.57.3
|
||||
# via
|
||||
# cmeel-assimp
|
||||
# cmeel-boost
|
||||
@@ -105,17 +108,15 @@ cmeel-zlib==1.3.1
|
||||
# via cmeel-assimp
|
||||
coal-library==3.0.1
|
||||
# via pin
|
||||
contourpy==1.3.3
|
||||
# via
|
||||
# lerobot
|
||||
# matplotlib
|
||||
coverage[toml]==7.13.4
|
||||
contourpy==1.3.2
|
||||
# via matplotlib
|
||||
coverage[toml]==7.11.0
|
||||
# via pytest-cov
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
datasets==4.6.1
|
||||
datasets==4.1.1
|
||||
# via lerobot
|
||||
debugpy==1.8.20
|
||||
debugpy==1.8.17
|
||||
# via lerobot
|
||||
decorator==5.2.1
|
||||
# via ipython
|
||||
@@ -129,7 +130,7 @@ dill==0.4.0
|
||||
# multiprocess
|
||||
distlib==0.4.0
|
||||
# via virtualenv
|
||||
dm-control==1.0.37
|
||||
dm-control==1.0.34
|
||||
# via gym-aloha
|
||||
dm-env==1.6
|
||||
# via dm-control
|
||||
@@ -137,55 +138,69 @@ dm-tree==0.1.9
|
||||
# via
|
||||
# dm-control
|
||||
# dm-env
|
||||
# lerobot
|
||||
docopt==0.6.2
|
||||
# via num2words
|
||||
draccus==0.10.0
|
||||
# via lerobot
|
||||
dynamixel-sdk==3.8.4
|
||||
# via lerobot
|
||||
easydict==1.13
|
||||
# via libero
|
||||
egl-probe @ git+https://github.com/huggingface/egl_probe.git
|
||||
# via
|
||||
# libero
|
||||
# robomimic
|
||||
eigenpy==3.10.3
|
||||
# via coal-library
|
||||
einops==0.8.2
|
||||
# via lerobot
|
||||
eiquadprog==1.2.9
|
||||
# via placo
|
||||
etils[epath,epy]==1.14.0
|
||||
# via mujoco
|
||||
executing==2.2.1
|
||||
# via stack-data
|
||||
faker==34.0.2
|
||||
# via lerobot
|
||||
farama-notifications==0.0.4
|
||||
# via gymnasium
|
||||
fastapi==0.135.1
|
||||
einops==0.8.1
|
||||
# via
|
||||
# lerobot
|
||||
# teleop
|
||||
# libero
|
||||
eiquadprog==1.2.9
|
||||
# via placo
|
||||
etils[epath,epy]==1.13.0
|
||||
# via mujoco
|
||||
exceptiongroup==1.3.0
|
||||
# via
|
||||
# anyio
|
||||
# ipython
|
||||
# pytest
|
||||
executing==2.2.1
|
||||
# via stack-data
|
||||
farama-notifications==0.0.4
|
||||
# via gymnasium
|
||||
fastapi==0.119.1
|
||||
# via teleop
|
||||
fastjsonschema==2.21.2
|
||||
# via nbformat
|
||||
feetech-servo-sdk==1.0.0
|
||||
# via lerobot
|
||||
filelock==3.25.0
|
||||
filelock==3.20.0
|
||||
# via
|
||||
# datasets
|
||||
# diffusers
|
||||
# huggingface-hub
|
||||
# python-discovery
|
||||
# torch
|
||||
# transformers
|
||||
# virtualenv
|
||||
fonttools==4.61.1
|
||||
fonttools==4.60.1
|
||||
# via matplotlib
|
||||
frozenlist==1.8.0
|
||||
# via
|
||||
# aiohttp
|
||||
# aiosignal
|
||||
fsspec[http]==2026.2.0
|
||||
fsspec[http]==2025.9.0
|
||||
# via
|
||||
# datasets
|
||||
# etils
|
||||
# huggingface-hub
|
||||
# torch
|
||||
future==1.0.0
|
||||
# via libero
|
||||
gitdb==4.0.12
|
||||
# via gitpython
|
||||
gitpython==3.1.46
|
||||
gitpython==3.1.45
|
||||
# via wandb
|
||||
glfw==2.10.0
|
||||
# via
|
||||
@@ -197,6 +212,7 @@ grpcio==1.73.1
|
||||
# lerobot
|
||||
# reachy2-sdk
|
||||
# reachy2-sdk-api
|
||||
# tensorboard
|
||||
grpcio-tools==1.73.1
|
||||
# via
|
||||
# lerobot
|
||||
@@ -207,67 +223,71 @@ gym-hil==0.1.13
|
||||
# via lerobot
|
||||
gym-pusht==0.1.6
|
||||
# via lerobot
|
||||
gymnasium==1.2.3
|
||||
gymnasium==1.2.1
|
||||
# via
|
||||
# gym-aloha
|
||||
# gym-hil
|
||||
# gym-pusht
|
||||
# lerobot
|
||||
# libero
|
||||
# metaworld
|
||||
h11==0.16.0
|
||||
# via
|
||||
# httpcore
|
||||
# uvicorn
|
||||
# via uvicorn
|
||||
h5py==3.15.1
|
||||
# via robomimic
|
||||
hebi-py==2.11.0
|
||||
# via lerobot
|
||||
hf-xet==1.3.2
|
||||
hf-transfer==0.1.9
|
||||
# via huggingface-hub
|
||||
hf-xet==1.1.10
|
||||
# via huggingface-hub
|
||||
hidapi==0.14.0.post4
|
||||
# via
|
||||
# gym-hil
|
||||
# lerobot
|
||||
httpcore==1.0.9
|
||||
# via httpx
|
||||
httptools==0.7.1
|
||||
# via uvicorn
|
||||
httpx==0.28.1
|
||||
# via
|
||||
# datasets
|
||||
# huggingface-hub
|
||||
huggingface-hub==1.6.0
|
||||
huggingface-hub[cli,hf-transfer]==0.35.3
|
||||
# via
|
||||
# accelerate
|
||||
# datasets
|
||||
# diffusers
|
||||
# lerobot
|
||||
# peft
|
||||
# timm
|
||||
# tokenizers
|
||||
# transformers
|
||||
identify==2.6.17
|
||||
hydra-core==1.3.2
|
||||
# via libero
|
||||
identify==2.6.15
|
||||
# via pre-commit
|
||||
idna==3.11
|
||||
# via
|
||||
# anyio
|
||||
# httpx
|
||||
# requests
|
||||
# yarl
|
||||
imageio[ffmpeg]==2.37.2
|
||||
imageio[ffmpeg]==2.37.0
|
||||
# via
|
||||
# gym-aloha
|
||||
# gym-hil
|
||||
# lerobot
|
||||
# metaworld
|
||||
# robomimic
|
||||
# scikit-image
|
||||
imageio-ffmpeg==0.6.0
|
||||
# via imageio
|
||||
importlib-metadata==8.7.1
|
||||
# via
|
||||
# imageio
|
||||
# robomimic
|
||||
importlib-metadata==8.7.0
|
||||
# via diffusers
|
||||
importlib-resources==6.5.2
|
||||
# via etils
|
||||
iniconfig==2.3.0
|
||||
# via pytest
|
||||
ipython==9.11.0
|
||||
inquirerpy==0.3.4
|
||||
# via huggingface-hub
|
||||
ipython==8.37.0
|
||||
# via meshcat
|
||||
ipython-pygments-lexers==1.1.1
|
||||
# via ipython
|
||||
ischedule==1.2.7
|
||||
# via placo
|
||||
jedi==0.19.2
|
||||
@@ -276,24 +296,44 @@ jinja2==3.1.6
|
||||
# via torch
|
||||
jsonlines==4.0.0
|
||||
# via lerobot
|
||||
jsonschema==4.25.1
|
||||
# via nbformat
|
||||
jsonschema-specifications==2025.9.1
|
||||
# via jsonschema
|
||||
jupyter-core==5.9.1
|
||||
# via nbformat
|
||||
jupytext==1.18.1
|
||||
# via bddl
|
||||
kiwisolver==1.4.9
|
||||
# via matplotlib
|
||||
labmaze==1.0.6
|
||||
# via dm-control
|
||||
lazy-loader==0.5
|
||||
lazy-loader==0.4
|
||||
# via scikit-image
|
||||
librt==0.8.1
|
||||
# via mypy
|
||||
libero @ git+https://github.com/huggingface/lerobot-libero.git@main
|
||||
# via lerobot
|
||||
llvmlite==0.45.1
|
||||
# via numba
|
||||
lxml==6.0.2
|
||||
# via dm-control
|
||||
markdown==3.9
|
||||
# via tensorboard
|
||||
markdown-it-py==4.0.0
|
||||
# via rich
|
||||
# via
|
||||
# jupytext
|
||||
# mdit-py-plugins
|
||||
markupsafe==3.0.3
|
||||
# via jinja2
|
||||
matplotlib==3.10.8
|
||||
# via lerobot
|
||||
# via
|
||||
# jinja2
|
||||
# werkzeug
|
||||
matplotlib==3.10.7
|
||||
# via
|
||||
# lerobot
|
||||
# libero
|
||||
matplotlib-inline==0.2.1
|
||||
# via ipython
|
||||
mdit-py-plugins==0.5.0
|
||||
# via jupytext
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
mergedeep==1.3.4
|
||||
@@ -306,35 +346,41 @@ mock-serial==0.0.1
|
||||
# via lerobot
|
||||
mpmath==1.3.0
|
||||
# via sympy
|
||||
mujoco==3.5.0
|
||||
mujoco==3.3.7
|
||||
# via
|
||||
# dm-control
|
||||
# gym-aloha
|
||||
# gym-hil
|
||||
# libero
|
||||
# metaworld
|
||||
multidict==6.7.1
|
||||
# robosuite
|
||||
multidict==6.7.0
|
||||
# via
|
||||
# aiohttp
|
||||
# yarl
|
||||
multiprocess==0.70.18
|
||||
multiprocess==0.70.16
|
||||
# via datasets
|
||||
mypy==1.19.1
|
||||
# via lerobot
|
||||
mypy-extensions==1.1.0
|
||||
# via typing-inspect
|
||||
nbformat==5.10.4
|
||||
# via jupytext
|
||||
networkx==3.4.2
|
||||
# via
|
||||
# mypy
|
||||
# typing-inspect
|
||||
networkx==3.6.1
|
||||
# via
|
||||
# bddl
|
||||
# scikit-image
|
||||
# torch
|
||||
nodeenv==1.10.0
|
||||
ninja==1.13.0
|
||||
# via lerobot
|
||||
nodeenv==1.9.1
|
||||
# via pre-commit
|
||||
num2words==0.5.14
|
||||
# via lerobot
|
||||
numba==0.62.1
|
||||
# via robosuite
|
||||
numpy==2.2.6
|
||||
# via
|
||||
# accelerate
|
||||
# bddl
|
||||
# cmeel-boost
|
||||
# contourpy
|
||||
# datasets
|
||||
@@ -343,14 +389,16 @@ numpy==2.2.6
|
||||
# dm-env
|
||||
# dm-tree
|
||||
# gymnasium
|
||||
# h5py
|
||||
# hebi-py
|
||||
# imageio
|
||||
# labmaze
|
||||
# lerobot
|
||||
# libero
|
||||
# matplotlib
|
||||
# meshcat
|
||||
# metaworld
|
||||
# mujoco
|
||||
# numba
|
||||
# opencv-python
|
||||
# opencv-python-headless
|
||||
# pandas
|
||||
@@ -358,18 +406,26 @@ numpy==2.2.6
|
||||
# pyquaternion
|
||||
# reachy2-sdk
|
||||
# rerun-sdk
|
||||
# robomimic
|
||||
# robosuite
|
||||
# scikit-image
|
||||
# scipy
|
||||
# shapely
|
||||
# teleop
|
||||
# tensorboard
|
||||
# tensorboardx
|
||||
# tifffile
|
||||
# torchvision
|
||||
# transformers
|
||||
# transforms3d
|
||||
opencv-python==4.13.0.92
|
||||
omegaconf==2.3.0
|
||||
# via hydra-core
|
||||
opencv-python==4.12.0.88
|
||||
# via
|
||||
# gym-pusht
|
||||
# libero
|
||||
# reachy2-sdk
|
||||
# robosuite
|
||||
opencv-python-headless==4.12.0.88
|
||||
# via lerobot
|
||||
orderly-set==5.5.0
|
||||
@@ -379,87 +435,97 @@ packaging==25.0
|
||||
# accelerate
|
||||
# datasets
|
||||
# huggingface-hub
|
||||
# hydra-core
|
||||
# jupytext
|
||||
# lazy-loader
|
||||
# lerobot
|
||||
# matplotlib
|
||||
# peft
|
||||
# pytest
|
||||
# qwen-vl-utils
|
||||
# reachy2-sdk
|
||||
# scikit-image
|
||||
# tensorboard
|
||||
# tensorboardx
|
||||
# transformers
|
||||
# wandb
|
||||
pandas==2.3.3
|
||||
# via
|
||||
# datasets
|
||||
# lerobot
|
||||
parso==0.8.6
|
||||
parso==0.8.5
|
||||
# via jedi
|
||||
pathspec==1.0.4
|
||||
# via mypy
|
||||
peft==0.18.1
|
||||
peft==0.17.1
|
||||
# via lerobot
|
||||
pexpect==4.9.0
|
||||
# via ipython
|
||||
pillow==12.1.1
|
||||
pfzy==0.3.4
|
||||
# via inquirerpy
|
||||
pillow==12.0.0
|
||||
# via
|
||||
# diffusers
|
||||
# imageio
|
||||
# lerobot
|
||||
# matplotlib
|
||||
# meshcat
|
||||
# qwen-vl-utils
|
||||
# rerun-sdk
|
||||
# robosuite
|
||||
# scikit-image
|
||||
# tensorboard
|
||||
# torchvision
|
||||
pin==3.4.0
|
||||
# via placo
|
||||
placo==0.9.16
|
||||
placo==0.9.14
|
||||
# via lerobot
|
||||
platformdirs==4.9.4
|
||||
platformdirs==4.5.0
|
||||
# via
|
||||
# python-discovery
|
||||
# jupyter-core
|
||||
# virtualenv
|
||||
# wandb
|
||||
pluggy==1.6.0
|
||||
# via
|
||||
# pytest
|
||||
# pytest-cov
|
||||
pre-commit==4.5.1
|
||||
pre-commit==4.3.0
|
||||
# via lerobot
|
||||
prompt-toolkit==3.0.52
|
||||
# via ipython
|
||||
# via
|
||||
# inquirerpy
|
||||
# ipython
|
||||
propcache==0.4.1
|
||||
# via
|
||||
# aiohttp
|
||||
# yarl
|
||||
protobuf==6.31.1
|
||||
protobuf==6.31.0
|
||||
# via
|
||||
# dm-control
|
||||
# grpcio-tools
|
||||
# lerobot
|
||||
# reachy2-sdk
|
||||
# reachy2-sdk-api
|
||||
# tensorboard
|
||||
# tensorboardx
|
||||
# wandb
|
||||
psutil==7.2.2
|
||||
psutil==7.1.1
|
||||
# via
|
||||
# accelerate
|
||||
# imageio
|
||||
# peft
|
||||
# robomimic
|
||||
ptyprocess==0.7.0
|
||||
# via pexpect
|
||||
pure-eval==0.2.3
|
||||
# via stack-data
|
||||
pyarrow==23.0.1
|
||||
pyarrow==21.0.0
|
||||
# via
|
||||
# datasets
|
||||
# rerun-sdk
|
||||
pycparser==3.0
|
||||
pycparser==2.23
|
||||
# via cffi
|
||||
pydantic==2.12.5
|
||||
pydantic==2.12.3
|
||||
# via
|
||||
# fastapi
|
||||
# wandb
|
||||
pydantic-core==2.41.5
|
||||
pydantic-core==2.41.4
|
||||
# via pydantic
|
||||
pygame==2.6.1
|
||||
# via
|
||||
@@ -469,35 +535,33 @@ pygame==2.6.1
|
||||
pygments==2.19.2
|
||||
# via
|
||||
# ipython
|
||||
# ipython-pygments-lexers
|
||||
# pytest
|
||||
# rich
|
||||
pymunk==6.11.1
|
||||
# via
|
||||
# gym-pusht
|
||||
# lerobot
|
||||
pyngrok==7.5.1
|
||||
pyngrok==7.4.1
|
||||
# via meshcat
|
||||
pynput==1.8.1
|
||||
# via
|
||||
# gym-hil
|
||||
# lerobot
|
||||
pyobjc-core==12.1
|
||||
pyobjc-core==12.0
|
||||
# via
|
||||
# pyobjc-framework-applicationservices
|
||||
# pyobjc-framework-cocoa
|
||||
# pyobjc-framework-coretext
|
||||
# pyobjc-framework-quartz
|
||||
pyobjc-framework-applicationservices==12.1
|
||||
pyobjc-framework-applicationservices==12.0
|
||||
# via pynput
|
||||
pyobjc-framework-cocoa==12.1
|
||||
pyobjc-framework-cocoa==12.0
|
||||
# via
|
||||
# pyobjc-framework-applicationservices
|
||||
# pyobjc-framework-coretext
|
||||
# pyobjc-framework-quartz
|
||||
pyobjc-framework-coretext==12.1
|
||||
pyobjc-framework-coretext==12.0
|
||||
# via pyobjc-framework-applicationservices
|
||||
pyobjc-framework-quartz==12.1
|
||||
pyobjc-framework-quartz==12.0
|
||||
# via
|
||||
# pynput
|
||||
# pyobjc-framework-applicationservices
|
||||
@@ -506,13 +570,13 @@ pyopengl==3.1.10
|
||||
# via
|
||||
# dm-control
|
||||
# mujoco
|
||||
pyparsing==3.3.2
|
||||
pyparsing==3.2.5
|
||||
# via
|
||||
# dm-control
|
||||
# matplotlib
|
||||
pyquaternion==0.9.9
|
||||
# via reachy2-sdk
|
||||
pyrealsense2-macosx==2.56.5
|
||||
pyrealsense2-macosx==2.54.2
|
||||
# via lerobot
|
||||
pyserial==3.5
|
||||
# via
|
||||
@@ -521,6 +585,7 @@ pyserial==3.5
|
||||
# lerobot
|
||||
pytest==8.4.2
|
||||
# via
|
||||
# bddl
|
||||
# lerobot
|
||||
# pytest-cov
|
||||
# pytest-timeout
|
||||
@@ -531,14 +596,11 @@ pytest-timeout==2.4.0
|
||||
# via lerobot
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# faker
|
||||
# matplotlib
|
||||
# pandas
|
||||
python-discovery==1.1.1
|
||||
# via virtualenv
|
||||
python-dotenv==1.2.2
|
||||
python-dotenv==1.1.1
|
||||
# via uvicorn
|
||||
pytz==2026.1.post1
|
||||
pytz==2025.2
|
||||
# via pandas
|
||||
pyyaml==6.0.3
|
||||
# via
|
||||
@@ -547,10 +609,13 @@ pyyaml==6.0.3
|
||||
# draccus
|
||||
# hebi-py
|
||||
# huggingface-hub
|
||||
# jupytext
|
||||
# omegaconf
|
||||
# peft
|
||||
# pre-commit
|
||||
# pyngrok
|
||||
# pyyaml-include
|
||||
# timm
|
||||
# transformers
|
||||
# uvicorn
|
||||
# wandb
|
||||
@@ -560,13 +625,15 @@ pyzmq==27.1.0
|
||||
# via
|
||||
# lerobot
|
||||
# meshcat
|
||||
qwen-vl-utils==0.0.14
|
||||
# via lerobot
|
||||
reachy2-sdk==1.0.15
|
||||
reachy2-sdk==1.0.14
|
||||
# via lerobot
|
||||
reachy2-sdk-api==1.0.21
|
||||
# via reachy2-sdk
|
||||
regex==2026.2.28
|
||||
referencing==0.37.0
|
||||
# via
|
||||
# jsonschema
|
||||
# jsonschema-specifications
|
||||
regex==2025.10.23
|
||||
# via
|
||||
# diffusers
|
||||
# transformers
|
||||
@@ -575,150 +642,184 @@ requests==2.32.5
|
||||
# datasets
|
||||
# diffusers
|
||||
# dm-control
|
||||
# qwen-vl-utils
|
||||
# huggingface-hub
|
||||
# teleop
|
||||
# transformers
|
||||
# wandb
|
||||
rerun-sdk==0.26.2
|
||||
rerun-sdk==0.26.1
|
||||
# via lerobot
|
||||
rhoban-cmeel-jsoncpp==1.9.4.9
|
||||
# via placo
|
||||
rich==14.3.3
|
||||
# via typer
|
||||
safetensors==0.7.0
|
||||
robomimic==0.2.0
|
||||
# via libero
|
||||
robosuite==1.4.0
|
||||
# via libero
|
||||
rpds-py==0.28.0
|
||||
# via
|
||||
# jsonschema
|
||||
# referencing
|
||||
safetensors==0.6.2
|
||||
# via
|
||||
# accelerate
|
||||
# diffusers
|
||||
# lerobot
|
||||
# peft
|
||||
# timm
|
||||
# transformers
|
||||
scikit-image==0.25.2
|
||||
# via
|
||||
# gym-pusht
|
||||
# lerobot
|
||||
scipy==1.17.1
|
||||
scipy==1.15.3
|
||||
# via
|
||||
# dm-control
|
||||
# lerobot
|
||||
# metaworld
|
||||
# robosuite
|
||||
# scikit-image
|
||||
# torchdiffeq
|
||||
sentry-sdk==2.54.0
|
||||
sentry-sdk==2.42.1
|
||||
# via wandb
|
||||
shapely==2.1.2
|
||||
# via gym-pusht
|
||||
shellingham==1.5.4
|
||||
# via typer
|
||||
six==1.17.0
|
||||
# via
|
||||
# pynput
|
||||
# python-dateutil
|
||||
smmap==5.0.3
|
||||
smmap==5.0.2
|
||||
# via gitdb
|
||||
sniffio==1.3.1
|
||||
# via anyio
|
||||
stack-data==0.6.3
|
||||
# via ipython
|
||||
starlette==0.52.1
|
||||
starlette==0.48.0
|
||||
# via fastapi
|
||||
sympy==1.14.0
|
||||
# via torch
|
||||
teleop==0.1.4
|
||||
teleop==0.1.2
|
||||
# via lerobot
|
||||
termcolor==3.3.0
|
||||
# via lerobot
|
||||
tifffile==2026.3.3
|
||||
tensorboard==2.20.0
|
||||
# via robomimic
|
||||
tensorboard-data-server==0.7.2
|
||||
# via tensorboard
|
||||
tensorboardx==2.6.4
|
||||
# via robomimic
|
||||
termcolor==3.1.0
|
||||
# via
|
||||
# lerobot
|
||||
# robomimic
|
||||
thop==0.1.1.post2209072238
|
||||
# via libero
|
||||
tifffile==2025.5.10
|
||||
# via scikit-image
|
||||
tokenizers==0.22.2
|
||||
timm==1.0.20
|
||||
# via lerobot
|
||||
tokenizers==0.22.1
|
||||
# via transformers
|
||||
toml==0.10.2
|
||||
# via draccus
|
||||
torch==2.10.0
|
||||
tomli==2.3.0
|
||||
# via
|
||||
# cmeel
|
||||
# coverage
|
||||
# jupytext
|
||||
# pytest
|
||||
torch==2.7.1
|
||||
# via
|
||||
# accelerate
|
||||
# lerobot
|
||||
# peft
|
||||
# torchdiffeq
|
||||
# robomimic
|
||||
# thop
|
||||
# timm
|
||||
# torchvision
|
||||
torchcodec==0.10.0
|
||||
torchcodec==0.5
|
||||
# via lerobot
|
||||
torchdiffeq==0.2.5
|
||||
# via lerobot
|
||||
torchvision==0.25.0
|
||||
# via lerobot
|
||||
tornado==6.5.4
|
||||
torchvision==0.22.1
|
||||
# via
|
||||
# lerobot
|
||||
# robomimic
|
||||
# timm
|
||||
tornado==6.5.2
|
||||
# via meshcat
|
||||
tqdm==4.67.3
|
||||
tqdm==4.67.1
|
||||
# via
|
||||
# datasets
|
||||
# dm-control
|
||||
# huggingface-hub
|
||||
# peft
|
||||
# robomimic
|
||||
# transformers
|
||||
traitlets==5.14.3
|
||||
# via
|
||||
# ipython
|
||||
# jupyter-core
|
||||
# matplotlib-inline
|
||||
transformers==5.3.0
|
||||
# nbformat
|
||||
transformers==4.57.1
|
||||
# via
|
||||
# lerobot
|
||||
# libero
|
||||
# peft
|
||||
transforms3d==0.4.2
|
||||
# via teleop
|
||||
typer==0.24.1
|
||||
# via
|
||||
# huggingface-hub
|
||||
# transformers
|
||||
typing-extensions==4.15.0
|
||||
# via
|
||||
# aiosignal
|
||||
# anyio
|
||||
# etils
|
||||
# faker
|
||||
# exceptiongroup
|
||||
# fastapi
|
||||
# gymnasium
|
||||
# huggingface-hub
|
||||
# mypy
|
||||
# ipython
|
||||
# multidict
|
||||
# pydantic
|
||||
# pydantic-core
|
||||
# referencing
|
||||
# rerun-sdk
|
||||
# starlette
|
||||
# torch
|
||||
# typing-inspect
|
||||
# typing-inspection
|
||||
# uvicorn
|
||||
# virtualenv
|
||||
# wandb
|
||||
typing-inspect==0.9.0
|
||||
# via draccus
|
||||
typing-inspection==0.4.2
|
||||
# via
|
||||
# fastapi
|
||||
# pydantic
|
||||
tzdata==2025.3
|
||||
# via pydantic
|
||||
tzdata==2025.2
|
||||
# via pandas
|
||||
u-msgpack-python==2.8.0
|
||||
# via meshcat
|
||||
urllib3==2.6.3
|
||||
urllib3==2.5.0
|
||||
# via
|
||||
# requests
|
||||
# sentry-sdk
|
||||
uvicorn[standard]==0.41.0
|
||||
uvicorn[standard]==0.38.0
|
||||
# via teleop
|
||||
uvloop==0.22.1
|
||||
# via uvicorn
|
||||
virtualenv==21.1.0
|
||||
virtualenv==20.35.3
|
||||
# via pre-commit
|
||||
wandb==0.24.2
|
||||
# via lerobot
|
||||
wandb==0.21.4
|
||||
# via
|
||||
# lerobot
|
||||
# libero
|
||||
watchfiles==1.1.1
|
||||
# via uvicorn
|
||||
wcwidth==0.6.0
|
||||
wcwidth==0.2.14
|
||||
# via prompt-toolkit
|
||||
websocket-client==1.9.0
|
||||
# via teleop
|
||||
websockets==16.0
|
||||
websockets==15.0.1
|
||||
# via uvicorn
|
||||
wrapt==2.1.2
|
||||
werkzeug==3.1.3
|
||||
# via tensorboard
|
||||
wrapt==2.0.0
|
||||
# via dm-tree
|
||||
xxhash==3.6.0
|
||||
# via datasets
|
||||
yarl==1.23.0
|
||||
yarl==1.22.0
|
||||
# via aiohttp
|
||||
zipp==3.23.0
|
||||
# via
|
||||
|
||||
+187
-208
@@ -1,12 +1,12 @@
|
||||
#
|
||||
# This file is autogenerated by pip-compile with Python 3.12
|
||||
# This file is autogenerated by pip-compile with Python 3.10
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile --output-file=requirements-ubuntu.txt requirements.in
|
||||
#
|
||||
-e .[all]
|
||||
# via -[all]
|
||||
absl-py==2.4.0
|
||||
absl-py==2.3.1
|
||||
# via
|
||||
# dm-control
|
||||
# dm-env
|
||||
@@ -14,33 +14,30 @@ absl-py==2.4.0
|
||||
# labmaze
|
||||
# mujoco
|
||||
# tensorboard
|
||||
accelerate==1.13.0
|
||||
accelerate==1.11.0
|
||||
# via
|
||||
# lerobot
|
||||
# peft
|
||||
aiohappyeyeballs==2.6.1
|
||||
# via aiohttp
|
||||
aiohttp==3.13.3
|
||||
aiohttp==3.13.1
|
||||
# via fsspec
|
||||
aiosignal==1.4.0
|
||||
# via aiohttp
|
||||
annotated-doc==0.0.4
|
||||
# via
|
||||
# fastapi
|
||||
# typer
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
antlr4-python3-runtime==4.9.3
|
||||
# via
|
||||
# hydra-core
|
||||
# omegaconf
|
||||
anyio==4.12.1
|
||||
anyio==4.11.0
|
||||
# via
|
||||
# httpx
|
||||
# starlette
|
||||
# watchfiles
|
||||
asttokens==3.0.1
|
||||
asttokens==3.0.0
|
||||
# via stack-data
|
||||
async-timeout==5.0.1
|
||||
# via aiohttp
|
||||
attrs==25.4.0
|
||||
# via
|
||||
# aiohttp
|
||||
@@ -50,35 +47,30 @@ attrs==25.4.0
|
||||
# referencing
|
||||
# rerun-sdk
|
||||
av==15.1.0
|
||||
# via
|
||||
# lerobot
|
||||
# qwen-vl-utils
|
||||
# via lerobot
|
||||
bddl==1.0.1
|
||||
# via hf-libero
|
||||
certifi==2026.2.25
|
||||
# via libero
|
||||
certifi==2025.10.5
|
||||
# via
|
||||
# httpcore
|
||||
# httpx
|
||||
# requests
|
||||
# sentry-sdk
|
||||
cffi==2.0.0
|
||||
# via pymunk
|
||||
cfgv==3.5.0
|
||||
cfgv==3.4.0
|
||||
# via pre-commit
|
||||
charset-normalizer==3.4.5
|
||||
charset-normalizer==3.4.4
|
||||
# via requests
|
||||
click==8.3.1
|
||||
click==8.3.0
|
||||
# via
|
||||
# typer
|
||||
# uvicorn
|
||||
# wandb
|
||||
cloudpickle==3.1.2
|
||||
cloudpickle==3.1.1
|
||||
# via
|
||||
# gymnasium
|
||||
# hf-libero
|
||||
cmake==4.1.3
|
||||
# libero
|
||||
cmake==4.1.0
|
||||
# via lerobot
|
||||
cmeel==0.59.0
|
||||
cmeel==0.57.3
|
||||
# via
|
||||
# cmeel-assimp
|
||||
# cmeel-boost
|
||||
@@ -116,24 +108,20 @@ cmeel-zlib==1.3.1
|
||||
# via cmeel-assimp
|
||||
coal-library==3.0.1
|
||||
# via pin
|
||||
contourpy==1.3.3
|
||||
# via
|
||||
# lerobot
|
||||
# matplotlib
|
||||
coverage[toml]==7.13.4
|
||||
contourpy==1.3.2
|
||||
# via matplotlib
|
||||
coverage[toml]==7.11.0
|
||||
# via pytest-cov
|
||||
cuda-bindings==12.9.4
|
||||
# via torch
|
||||
cuda-pathfinder==1.4.1
|
||||
# via cuda-bindings
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
datasets==4.6.1
|
||||
datasets==4.1.1
|
||||
# via lerobot
|
||||
debugpy==1.8.20
|
||||
debugpy==1.8.17
|
||||
# via lerobot
|
||||
decorator==5.2.1
|
||||
# via ipython
|
||||
decord==0.6.0
|
||||
# via lerobot
|
||||
deepdiff==8.6.1
|
||||
# via lerobot
|
||||
diffusers==0.35.2
|
||||
@@ -144,7 +132,7 @@ dill==0.4.0
|
||||
# multiprocess
|
||||
distlib==0.4.0
|
||||
# via virtualenv
|
||||
dm-control==1.0.37
|
||||
dm-control==1.0.34
|
||||
# via gym-aloha
|
||||
dm-env==1.6
|
||||
# via dm-control
|
||||
@@ -152,6 +140,7 @@ dm-tree==0.1.9
|
||||
# via
|
||||
# dm-control
|
||||
# dm-env
|
||||
# lerobot
|
||||
docopt==0.6.2
|
||||
# via num2words
|
||||
draccus==0.10.0
|
||||
@@ -159,60 +148,66 @@ draccus==0.10.0
|
||||
dynamixel-sdk==3.8.4
|
||||
# via lerobot
|
||||
easydict==1.13
|
||||
# via hf-libero
|
||||
egl-probe==1.0.2
|
||||
# via robomimic
|
||||
# via libero
|
||||
egl-probe @ git+https://github.com/huggingface/egl_probe.git
|
||||
# via
|
||||
# libero
|
||||
# robomimic
|
||||
eigenpy==3.10.3
|
||||
# via coal-library
|
||||
einops==0.8.2
|
||||
einops==0.8.1
|
||||
# via
|
||||
# hf-libero
|
||||
# flash-attn
|
||||
# lerobot
|
||||
# libero
|
||||
eiquadprog==1.2.9
|
||||
# via placo
|
||||
etils[epath,epy]==1.14.0
|
||||
etils[epath,epy]==1.13.0
|
||||
# via mujoco
|
||||
evdev==1.9.3
|
||||
evdev==1.9.2
|
||||
# via pynput
|
||||
exceptiongroup==1.3.0
|
||||
# via
|
||||
# anyio
|
||||
# ipython
|
||||
# pytest
|
||||
executing==2.2.1
|
||||
# via stack-data
|
||||
faker==34.0.2
|
||||
# via lerobot
|
||||
farama-notifications==0.0.4
|
||||
# via gymnasium
|
||||
fastapi==0.135.1
|
||||
# via
|
||||
# lerobot
|
||||
# teleop
|
||||
fastapi==0.119.1
|
||||
# via teleop
|
||||
fastjsonschema==2.21.2
|
||||
# via nbformat
|
||||
feetech-servo-sdk==1.0.0
|
||||
# via lerobot
|
||||
filelock==3.25.0
|
||||
filelock==3.20.0
|
||||
# via
|
||||
# datasets
|
||||
# diffusers
|
||||
# huggingface-hub
|
||||
# python-discovery
|
||||
# torch
|
||||
# transformers
|
||||
# virtualenv
|
||||
fonttools==4.61.1
|
||||
flash-attn==2.8.3
|
||||
# via lerobot
|
||||
fonttools==4.60.1
|
||||
# via matplotlib
|
||||
frozenlist==1.8.0
|
||||
# via
|
||||
# aiohttp
|
||||
# aiosignal
|
||||
fsspec[http]==2026.2.0
|
||||
fsspec[http]==2025.9.0
|
||||
# via
|
||||
# datasets
|
||||
# etils
|
||||
# huggingface-hub
|
||||
# torch
|
||||
future==1.0.0
|
||||
# via hf-libero
|
||||
# via libero
|
||||
gitdb==4.0.12
|
||||
# via gitpython
|
||||
gitpython==3.1.46
|
||||
gitpython==3.1.45
|
||||
# via wandb
|
||||
glfw==2.10.0
|
||||
# via
|
||||
@@ -235,60 +230,50 @@ gym-hil==0.1.13
|
||||
# via lerobot
|
||||
gym-pusht==0.1.6
|
||||
# via lerobot
|
||||
gymnasium==1.2.3
|
||||
gymnasium==1.2.1
|
||||
# via
|
||||
# gym-aloha
|
||||
# gym-hil
|
||||
# gym-pusht
|
||||
# hf-libero
|
||||
# lerobot
|
||||
# libero
|
||||
# metaworld
|
||||
h11==0.16.0
|
||||
# via
|
||||
# httpcore
|
||||
# uvicorn
|
||||
h5py==3.16.0
|
||||
# via uvicorn
|
||||
h5py==3.15.1
|
||||
# via robomimic
|
||||
hebi-py==2.11.0
|
||||
# via lerobot
|
||||
hf-egl-probe==1.0.2
|
||||
# via hf-libero
|
||||
hf-libero==0.1.3
|
||||
# via lerobot
|
||||
hf-xet==1.3.2
|
||||
hf-transfer==0.1.9
|
||||
# via huggingface-hub
|
||||
hf-xet==1.1.10
|
||||
# via huggingface-hub
|
||||
hidapi==0.14.0.post4
|
||||
# via
|
||||
# gym-hil
|
||||
# lerobot
|
||||
httpcore==1.0.9
|
||||
# via httpx
|
||||
httptools==0.7.1
|
||||
# via uvicorn
|
||||
httpx==0.28.1
|
||||
# via
|
||||
# datasets
|
||||
# huggingface-hub
|
||||
huggingface-hub==1.6.0
|
||||
huggingface-hub[cli,hf-transfer]==0.35.3
|
||||
# via
|
||||
# accelerate
|
||||
# datasets
|
||||
# diffusers
|
||||
# lerobot
|
||||
# peft
|
||||
# timm
|
||||
# tokenizers
|
||||
# transformers
|
||||
hydra-core==1.3.2
|
||||
# via hf-libero
|
||||
identify==2.6.17
|
||||
# via libero
|
||||
identify==2.6.15
|
||||
# via pre-commit
|
||||
idna==3.11
|
||||
# via
|
||||
# anyio
|
||||
# httpx
|
||||
# requests
|
||||
# yarl
|
||||
imageio[ffmpeg]==2.37.2
|
||||
imageio[ffmpeg]==2.37.0
|
||||
# via
|
||||
# gym-aloha
|
||||
# gym-hil
|
||||
@@ -300,14 +285,16 @@ imageio-ffmpeg==0.6.0
|
||||
# via
|
||||
# imageio
|
||||
# robomimic
|
||||
importlib-metadata==8.7.1
|
||||
importlib-metadata==8.7.0
|
||||
# via diffusers
|
||||
importlib-resources==6.5.2
|
||||
# via etils
|
||||
iniconfig==2.3.0
|
||||
# via pytest
|
||||
ipython==9.11.0
|
||||
inquirerpy==0.3.4
|
||||
# via huggingface-hub
|
||||
ipython==8.37.0
|
||||
# via meshcat
|
||||
ipython-pygments-lexers==1.1.1
|
||||
# via ipython
|
||||
ischedule==1.2.7
|
||||
# via placo
|
||||
jedi==0.19.2
|
||||
@@ -316,41 +303,40 @@ jinja2==3.1.6
|
||||
# via torch
|
||||
jsonlines==4.0.0
|
||||
# via lerobot
|
||||
jsonschema==4.26.0
|
||||
jsonschema==4.25.1
|
||||
# via nbformat
|
||||
jsonschema-specifications==2025.9.1
|
||||
# via jsonschema
|
||||
jupyter-core==5.9.1
|
||||
# via nbformat
|
||||
jupytext==1.19.1
|
||||
jupytext==1.18.1
|
||||
# via bddl
|
||||
kiwisolver==1.4.9
|
||||
# via matplotlib
|
||||
labmaze==1.0.6
|
||||
# via dm-control
|
||||
lazy-loader==0.5
|
||||
lazy-loader==0.4
|
||||
# via scikit-image
|
||||
librt==0.8.1
|
||||
# via mypy
|
||||
llvmlite==0.46.0
|
||||
libero @ git+https://github.com/huggingface/lerobot-libero.git@main
|
||||
# via lerobot
|
||||
llvmlite==0.45.1
|
||||
# via numba
|
||||
lxml==6.0.2
|
||||
# via dm-control
|
||||
markdown==3.10.2
|
||||
markdown==3.9
|
||||
# via tensorboard
|
||||
markdown-it-py==4.0.0
|
||||
# via
|
||||
# jupytext
|
||||
# mdit-py-plugins
|
||||
# rich
|
||||
markupsafe==3.0.3
|
||||
# via
|
||||
# jinja2
|
||||
# werkzeug
|
||||
matplotlib==3.10.8
|
||||
matplotlib==3.10.7
|
||||
# via
|
||||
# hf-libero
|
||||
# lerobot
|
||||
# libero
|
||||
matplotlib-inline==0.2.1
|
||||
# via ipython
|
||||
mdit-py-plugins==0.5.0
|
||||
@@ -367,38 +353,36 @@ mock-serial==0.0.1
|
||||
# via lerobot
|
||||
mpmath==1.3.0
|
||||
# via sympy
|
||||
mujoco==3.5.0
|
||||
mujoco==3.3.7
|
||||
# via
|
||||
# dm-control
|
||||
# gym-aloha
|
||||
# gym-hil
|
||||
# hf-libero
|
||||
# libero
|
||||
# metaworld
|
||||
# robosuite
|
||||
multidict==6.7.1
|
||||
multidict==6.7.0
|
||||
# via
|
||||
# aiohttp
|
||||
# yarl
|
||||
multiprocess==0.70.18
|
||||
multiprocess==0.70.16
|
||||
# via datasets
|
||||
mypy==1.19.1
|
||||
# via lerobot
|
||||
mypy-extensions==1.1.0
|
||||
# via
|
||||
# mypy
|
||||
# typing-inspect
|
||||
# via typing-inspect
|
||||
nbformat==5.10.4
|
||||
# via jupytext
|
||||
networkx==3.6.1
|
||||
networkx==3.4.2
|
||||
# via
|
||||
# bddl
|
||||
# scikit-image
|
||||
# torch
|
||||
nodeenv==1.10.0
|
||||
ninja==1.13.0
|
||||
# via lerobot
|
||||
nodeenv==1.9.1
|
||||
# via pre-commit
|
||||
num2words==0.5.14
|
||||
# via lerobot
|
||||
numba==0.64.0
|
||||
numba==0.62.1
|
||||
# via robosuite
|
||||
numpy==2.2.6
|
||||
# via
|
||||
@@ -407,6 +391,7 @@ numpy==2.2.6
|
||||
# cmeel-boost
|
||||
# contourpy
|
||||
# datasets
|
||||
# decord
|
||||
# diffusers
|
||||
# dm-control
|
||||
# dm-env
|
||||
@@ -414,10 +399,9 @@ numpy==2.2.6
|
||||
# gymnasium
|
||||
# h5py
|
||||
# hebi-py
|
||||
# hf-libero
|
||||
# imageio
|
||||
# labmaze
|
||||
# lerobot
|
||||
# libero
|
||||
# matplotlib
|
||||
# meshcat
|
||||
# metaworld
|
||||
@@ -442,51 +426,49 @@ numpy==2.2.6
|
||||
# torchvision
|
||||
# transformers
|
||||
# transforms3d
|
||||
nvidia-cublas-cu12==12.8.4.1
|
||||
nvidia-cublas-cu12==12.6.4.1
|
||||
# via
|
||||
# nvidia-cudnn-cu12
|
||||
# nvidia-cusolver-cu12
|
||||
# torch
|
||||
nvidia-cuda-cupti-cu12==12.8.90
|
||||
nvidia-cuda-cupti-cu12==12.6.80
|
||||
# via torch
|
||||
nvidia-cuda-nvrtc-cu12==12.8.93
|
||||
nvidia-cuda-nvrtc-cu12==12.6.77
|
||||
# via torch
|
||||
nvidia-cuda-runtime-cu12==12.8.90
|
||||
nvidia-cuda-runtime-cu12==12.6.77
|
||||
# via torch
|
||||
nvidia-cudnn-cu12==9.10.2.21
|
||||
nvidia-cudnn-cu12==9.5.1.17
|
||||
# via torch
|
||||
nvidia-cufft-cu12==11.3.3.83
|
||||
nvidia-cufft-cu12==11.3.0.4
|
||||
# via torch
|
||||
nvidia-cufile-cu12==1.13.1.3
|
||||
nvidia-cufile-cu12==1.11.1.6
|
||||
# via torch
|
||||
nvidia-curand-cu12==10.3.9.90
|
||||
nvidia-curand-cu12==10.3.7.77
|
||||
# via torch
|
||||
nvidia-cusolver-cu12==11.7.3.90
|
||||
nvidia-cusolver-cu12==11.7.1.2
|
||||
# via torch
|
||||
nvidia-cusparse-cu12==12.5.8.93
|
||||
nvidia-cusparse-cu12==12.5.4.2
|
||||
# via
|
||||
# nvidia-cusolver-cu12
|
||||
# torch
|
||||
nvidia-cusparselt-cu12==0.7.1
|
||||
nvidia-cusparselt-cu12==0.6.3
|
||||
# via torch
|
||||
nvidia-nccl-cu12==2.27.5
|
||||
nvidia-nccl-cu12==2.26.2
|
||||
# via torch
|
||||
nvidia-nvjitlink-cu12==12.8.93
|
||||
nvidia-nvjitlink-cu12==12.6.85
|
||||
# via
|
||||
# nvidia-cufft-cu12
|
||||
# nvidia-cusolver-cu12
|
||||
# nvidia-cusparse-cu12
|
||||
# torch
|
||||
nvidia-nvshmem-cu12==3.4.5
|
||||
# via torch
|
||||
nvidia-nvtx-cu12==12.8.90
|
||||
nvidia-nvtx-cu12==12.6.77
|
||||
# via torch
|
||||
omegaconf==2.3.0
|
||||
# via hydra-core
|
||||
opencv-python==4.13.0.92
|
||||
opencv-python==4.12.0.88
|
||||
# via
|
||||
# gym-pusht
|
||||
# hf-libero
|
||||
# libero
|
||||
# reachy2-sdk
|
||||
# robosuite
|
||||
opencv-python-headless==4.12.0.88
|
||||
@@ -505,7 +487,6 @@ packaging==25.0
|
||||
# matplotlib
|
||||
# peft
|
||||
# pytest
|
||||
# qwen-vl-utils
|
||||
# reachy2-sdk
|
||||
# scikit-image
|
||||
# tensorboard
|
||||
@@ -516,21 +497,21 @@ pandas==2.3.3
|
||||
# via
|
||||
# datasets
|
||||
# lerobot
|
||||
parso==0.8.6
|
||||
parso==0.8.5
|
||||
# via jedi
|
||||
pathspec==1.0.4
|
||||
# via mypy
|
||||
peft==0.18.1
|
||||
peft==0.17.1
|
||||
# via lerobot
|
||||
pexpect==4.9.0
|
||||
# via ipython
|
||||
pillow==12.1.1
|
||||
pfzy==0.3.4
|
||||
# via inquirerpy
|
||||
pillow==12.0.0
|
||||
# via
|
||||
# diffusers
|
||||
# imageio
|
||||
# lerobot
|
||||
# matplotlib
|
||||
# meshcat
|
||||
# qwen-vl-utils
|
||||
# rerun-sdk
|
||||
# robosuite
|
||||
# scikit-image
|
||||
@@ -538,27 +519,28 @@ pillow==12.1.1
|
||||
# torchvision
|
||||
pin==3.4.0
|
||||
# via placo
|
||||
placo==0.9.16
|
||||
placo==0.9.14
|
||||
# via lerobot
|
||||
platformdirs==4.9.4
|
||||
platformdirs==4.5.0
|
||||
# via
|
||||
# jupyter-core
|
||||
# python-discovery
|
||||
# virtualenv
|
||||
# wandb
|
||||
pluggy==1.6.0
|
||||
# via
|
||||
# pytest
|
||||
# pytest-cov
|
||||
pre-commit==4.5.1
|
||||
pre-commit==4.3.0
|
||||
# via lerobot
|
||||
prompt-toolkit==3.0.52
|
||||
# via ipython
|
||||
# via
|
||||
# inquirerpy
|
||||
# ipython
|
||||
propcache==0.4.1
|
||||
# via
|
||||
# aiohttp
|
||||
# yarl
|
||||
protobuf==6.31.1
|
||||
protobuf==6.31.0
|
||||
# via
|
||||
# dm-control
|
||||
# grpcio-tools
|
||||
@@ -568,7 +550,7 @@ protobuf==6.31.1
|
||||
# tensorboard
|
||||
# tensorboardx
|
||||
# wandb
|
||||
psutil==7.2.2
|
||||
psutil==7.1.1
|
||||
# via
|
||||
# accelerate
|
||||
# imageio
|
||||
@@ -578,17 +560,17 @@ ptyprocess==0.7.0
|
||||
# via pexpect
|
||||
pure-eval==0.2.3
|
||||
# via stack-data
|
||||
pyarrow==23.0.1
|
||||
pyarrow==21.0.0
|
||||
# via
|
||||
# datasets
|
||||
# rerun-sdk
|
||||
pycparser==3.0
|
||||
pycparser==2.23
|
||||
# via cffi
|
||||
pydantic==2.12.5
|
||||
pydantic==2.12.3
|
||||
# via
|
||||
# fastapi
|
||||
# wandb
|
||||
pydantic-core==2.41.5
|
||||
pydantic-core==2.41.4
|
||||
# via pydantic
|
||||
pygame==2.6.1
|
||||
# via
|
||||
@@ -598,14 +580,12 @@ pygame==2.6.1
|
||||
pygments==2.19.2
|
||||
# via
|
||||
# ipython
|
||||
# ipython-pygments-lexers
|
||||
# pytest
|
||||
# rich
|
||||
pymunk==6.11.1
|
||||
# via
|
||||
# gym-pusht
|
||||
# lerobot
|
||||
pyngrok==7.5.1
|
||||
pyngrok==7.4.1
|
||||
# via meshcat
|
||||
pynput==1.8.1
|
||||
# via
|
||||
@@ -615,7 +595,7 @@ pyopengl==3.1.10
|
||||
# via
|
||||
# dm-control
|
||||
# mujoco
|
||||
pyparsing==3.3.2
|
||||
pyparsing==3.2.5
|
||||
# via
|
||||
# dm-control
|
||||
# matplotlib
|
||||
@@ -641,16 +621,13 @@ pytest-timeout==2.4.0
|
||||
# via lerobot
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# faker
|
||||
# matplotlib
|
||||
# pandas
|
||||
python-discovery==1.1.1
|
||||
# via virtualenv
|
||||
python-dotenv==1.2.2
|
||||
python-dotenv==1.1.1
|
||||
# via uvicorn
|
||||
python-xlib==0.33
|
||||
# via pynput
|
||||
pytz==2026.1.post1
|
||||
pytz==2025.2
|
||||
# via pandas
|
||||
pyyaml==6.0.3
|
||||
# via
|
||||
@@ -665,6 +642,7 @@ pyyaml==6.0.3
|
||||
# pre-commit
|
||||
# pyngrok
|
||||
# pyyaml-include
|
||||
# timm
|
||||
# transformers
|
||||
# uvicorn
|
||||
# wandb
|
||||
@@ -674,9 +652,7 @@ pyzmq==27.1.0
|
||||
# via
|
||||
# lerobot
|
||||
# meshcat
|
||||
qwen-vl-utils==0.0.14
|
||||
# via lerobot
|
||||
reachy2-sdk==1.0.15
|
||||
reachy2-sdk==1.0.14
|
||||
# via lerobot
|
||||
reachy2-sdk-api==1.0.21
|
||||
# via reachy2-sdk
|
||||
@@ -684,7 +660,7 @@ referencing==0.37.0
|
||||
# via
|
||||
# jsonschema
|
||||
# jsonschema-specifications
|
||||
regex==2026.2.28
|
||||
regex==2025.10.23
|
||||
# via
|
||||
# diffusers
|
||||
# transformers
|
||||
@@ -693,62 +669,60 @@ requests==2.32.5
|
||||
# datasets
|
||||
# diffusers
|
||||
# dm-control
|
||||
# qwen-vl-utils
|
||||
# huggingface-hub
|
||||
# teleop
|
||||
# transformers
|
||||
# wandb
|
||||
rerun-sdk==0.26.2
|
||||
rerun-sdk==0.26.1
|
||||
# via lerobot
|
||||
rhoban-cmeel-jsoncpp==1.9.4.9
|
||||
# via placo
|
||||
rich==14.3.3
|
||||
# via typer
|
||||
robomimic==0.2.0
|
||||
# via hf-libero
|
||||
# via libero
|
||||
robosuite==1.4.0
|
||||
# via hf-libero
|
||||
rpds-py==0.30.0
|
||||
# via libero
|
||||
rpds-py==0.28.0
|
||||
# via
|
||||
# jsonschema
|
||||
# referencing
|
||||
safetensors==0.7.0
|
||||
safetensors==0.6.2
|
||||
# via
|
||||
# accelerate
|
||||
# diffusers
|
||||
# lerobot
|
||||
# peft
|
||||
# timm
|
||||
# transformers
|
||||
scikit-image==0.25.2
|
||||
# via
|
||||
# gym-pusht
|
||||
# lerobot
|
||||
scipy==1.17.1
|
||||
scipy==1.15.3
|
||||
# via
|
||||
# dm-control
|
||||
# lerobot
|
||||
# metaworld
|
||||
# robosuite
|
||||
# scikit-image
|
||||
# torchdiffeq
|
||||
sentry-sdk==2.54.0
|
||||
sentry-sdk==2.42.1
|
||||
# via wandb
|
||||
shapely==2.1.2
|
||||
# via gym-pusht
|
||||
shellingham==1.5.4
|
||||
# via typer
|
||||
six==1.17.0
|
||||
# via
|
||||
# pynput
|
||||
# python-dateutil
|
||||
# python-xlib
|
||||
smmap==5.0.3
|
||||
smmap==5.0.2
|
||||
# via gitdb
|
||||
sniffio==1.3.1
|
||||
# via anyio
|
||||
stack-data==0.6.3
|
||||
# via ipython
|
||||
starlette==0.52.1
|
||||
starlette==0.48.0
|
||||
# via fastapi
|
||||
sympy==1.14.0
|
||||
# via torch
|
||||
teleop==0.1.4
|
||||
teleop==0.1.2
|
||||
# via lerobot
|
||||
tensorboard==2.20.0
|
||||
# via robomimic
|
||||
@@ -756,38 +730,46 @@ tensorboard-data-server==0.7.2
|
||||
# via tensorboard
|
||||
tensorboardx==2.6.4
|
||||
# via robomimic
|
||||
termcolor==3.3.0
|
||||
termcolor==3.1.0
|
||||
# via
|
||||
# lerobot
|
||||
# robomimic
|
||||
thop==0.1.1.post2209072238
|
||||
# via hf-libero
|
||||
tifffile==2026.3.3
|
||||
# via libero
|
||||
tifffile==2025.5.10
|
||||
# via scikit-image
|
||||
tokenizers==0.22.2
|
||||
timm==1.0.20
|
||||
# via lerobot
|
||||
tokenizers==0.22.1
|
||||
# via transformers
|
||||
toml==0.10.2
|
||||
# via draccus
|
||||
torch==2.10.0
|
||||
tomli==2.3.0
|
||||
# via
|
||||
# cmeel
|
||||
# coverage
|
||||
# jupytext
|
||||
# pytest
|
||||
torch==2.7.1
|
||||
# via
|
||||
# accelerate
|
||||
# flash-attn
|
||||
# lerobot
|
||||
# peft
|
||||
# robomimic
|
||||
# thop
|
||||
# torchdiffeq
|
||||
# timm
|
||||
# torchvision
|
||||
torchcodec==0.10.0
|
||||
torchcodec==0.5
|
||||
# via lerobot
|
||||
torchdiffeq==0.2.5
|
||||
# via lerobot
|
||||
torchvision==0.25.0
|
||||
torchvision==0.22.1
|
||||
# via
|
||||
# lerobot
|
||||
# robomimic
|
||||
tornado==6.5.4
|
||||
# timm
|
||||
tornado==6.5.2
|
||||
# via meshcat
|
||||
tqdm==4.67.3
|
||||
tqdm==4.67.1
|
||||
# via
|
||||
# datasets
|
||||
# dm-control
|
||||
@@ -801,29 +783,26 @@ traitlets==5.14.3
|
||||
# jupyter-core
|
||||
# matplotlib-inline
|
||||
# nbformat
|
||||
transformers==5.3.0
|
||||
transformers==4.57.1
|
||||
# via
|
||||
# hf-libero
|
||||
# lerobot
|
||||
# libero
|
||||
# peft
|
||||
transforms3d==0.4.2
|
||||
# via teleop
|
||||
triton==3.6.0
|
||||
triton==3.3.1
|
||||
# via torch
|
||||
typer==0.24.1
|
||||
# via
|
||||
# huggingface-hub
|
||||
# transformers
|
||||
typing-extensions==4.15.0
|
||||
# via
|
||||
# aiosignal
|
||||
# anyio
|
||||
# etils
|
||||
# faker
|
||||
# exceptiongroup
|
||||
# fastapi
|
||||
# gymnasium
|
||||
# huggingface-hub
|
||||
# mypy
|
||||
# ipython
|
||||
# multidict
|
||||
# pydantic
|
||||
# pydantic-core
|
||||
# referencing
|
||||
@@ -832,46 +811,46 @@ typing-extensions==4.15.0
|
||||
# torch
|
||||
# typing-inspect
|
||||
# typing-inspection
|
||||
# uvicorn
|
||||
# virtualenv
|
||||
# wandb
|
||||
typing-inspect==0.9.0
|
||||
# via draccus
|
||||
typing-inspection==0.4.2
|
||||
# via
|
||||
# fastapi
|
||||
# pydantic
|
||||
tzdata==2025.3
|
||||
# via pydantic
|
||||
tzdata==2025.2
|
||||
# via pandas
|
||||
u-msgpack-python==2.8.0
|
||||
# via meshcat
|
||||
urllib3==2.6.3
|
||||
urllib3==2.5.0
|
||||
# via
|
||||
# requests
|
||||
# sentry-sdk
|
||||
uvicorn[standard]==0.41.0
|
||||
uvicorn[standard]==0.38.0
|
||||
# via teleop
|
||||
uvloop==0.22.1
|
||||
# via uvicorn
|
||||
virtualenv==21.1.0
|
||||
virtualenv==20.35.3
|
||||
# via pre-commit
|
||||
wandb==0.24.2
|
||||
wandb==0.21.4
|
||||
# via
|
||||
# hf-libero
|
||||
# lerobot
|
||||
# libero
|
||||
watchfiles==1.1.1
|
||||
# via uvicorn
|
||||
wcwidth==0.6.0
|
||||
wcwidth==0.2.14
|
||||
# via prompt-toolkit
|
||||
websocket-client==1.9.0
|
||||
# via teleop
|
||||
websockets==16.0
|
||||
websockets==15.0.1
|
||||
# via uvicorn
|
||||
werkzeug==3.1.6
|
||||
werkzeug==3.1.3
|
||||
# via tensorboard
|
||||
wrapt==2.1.2
|
||||
wrapt==2.0.0
|
||||
# via dm-tree
|
||||
xxhash==3.6.0
|
||||
# via datasets
|
||||
yarl==1.23.0
|
||||
yarl==1.22.0
|
||||
# via aiohttp
|
||||
zipp==3.23.0
|
||||
# via
|
||||
|
||||
+4
-4
@@ -1,9 +1,9 @@
|
||||
# requirements.in
|
||||
|
||||
# requirements-macos.txt was generated on macOS and is platform-specific (macOS 26.3.1 25D2128 arm64).
|
||||
# Darwin MacBook-Pro.local 25.3.0 Darwin Kernel Version 25.3.0: Wed Jan 28 20:54:55 PST 2026; root:xnu-12377.91.3~2/RELEASE_ARM64_T8132 arm64
|
||||
# requirements-macos.txt was generated on macOS and is platform-specific (macOS 26.0.1 25A362 arm64).
|
||||
# Darwin MacBook-Pro.local 25.0.0 Darwin Kernel Version 25.0.0: Wed Sep 17 21:42:08 PDT 2025; root:xnu-12377.1.9~141/RELEASE_ARM64_T8132 arm64
|
||||
|
||||
# requirements-ubuntu.txt was generated on Linux and is platform-specific (Ubuntu 24.04.4 LTS x86_64).
|
||||
# Linux lerobot-linux 6.17.0-14-generic #14~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Jan 15 15:52:10 UTC 2 x86_64 x86_64 x86_64 GNU/Linux
|
||||
# requirements-ubuntu.txt was generated on Linux and is platform-specific (Ubuntu 24.04.3 LTS x86_64).
|
||||
# Linux mlerobot-linux 6.14.0-33-generic #33~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Fri Sep 19 17:02:30 UTC 2 x86_64 x86_64 x86_64 GNU/Linux
|
||||
|
||||
-e .[all]
|
||||
|
||||
@@ -126,12 +126,6 @@ class RobotClientConfig:
|
||||
|
||||
# Device configuration
|
||||
policy_device: str = field(default="cpu", metadata={"help": "Device for policy inference"})
|
||||
client_device: str = field(
|
||||
default="cpu",
|
||||
metadata={
|
||||
"help": "Device to move actions to after receiving from server (e.g., for downstream planners)"
|
||||
},
|
||||
)
|
||||
|
||||
# Control behavior configuration
|
||||
chunk_size_threshold: float = field(default=0.5, metadata={"help": "Threshold for chunk size control"})
|
||||
@@ -167,9 +161,6 @@ class RobotClientConfig:
|
||||
if not self.policy_device:
|
||||
raise ValueError("policy_device cannot be empty")
|
||||
|
||||
if not self.client_device:
|
||||
raise ValueError("client_device cannot be empty")
|
||||
|
||||
if self.chunk_size_threshold < 0 or self.chunk_size_threshold > 1:
|
||||
raise ValueError(f"chunk_size_threshold must be between 0 and 1, got {self.chunk_size_threshold}")
|
||||
|
||||
@@ -193,7 +184,6 @@ class RobotClientConfig:
|
||||
"policy_type": self.policy_type,
|
||||
"pretrained_name_or_path": self.pretrained_name_or_path,
|
||||
"policy_device": self.policy_device,
|
||||
"client_device": self.client_device,
|
||||
"chunk_size_threshold": self.chunk_size_threshold,
|
||||
"fps": self.fps,
|
||||
"actions_per_chunk": self.actions_per_chunk,
|
||||
|
||||
@@ -23,7 +23,7 @@ DEFAULT_INFERENCE_LATENCY = 1 / DEFAULT_FPS
|
||||
DEFAULT_OBS_QUEUE_TIMEOUT = 2
|
||||
|
||||
# All action chunking policies
|
||||
SUPPORTED_POLICIES = ["act", "smolvla", "diffusion", "tdmpc", "vqbet", "pi0", "pi05", "groot"]
|
||||
SUPPORTED_POLICIES = ["act", "smolvla", "diffusion", "tdmpc", "vqbet", "pi0", "pi05"]
|
||||
|
||||
# TODO: Add all other robots
|
||||
SUPPORTED_ROBOTS = ["so100_follower", "so101_follower", "bi_so_follower", "omx_follower"]
|
||||
|
||||
@@ -18,12 +18,11 @@ import os
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import torch
|
||||
|
||||
from lerobot.configs.types import PolicyFeature
|
||||
from lerobot.datasets.feature_utils import build_dataset_frame, hw_to_dataset_features
|
||||
from lerobot.datasets.utils import build_dataset_frame, hw_to_dataset_features
|
||||
|
||||
# NOTE: Configs need to be loaded for the client to be able to instantiate the policy config
|
||||
from lerobot.policies import ( # noqa: F401
|
||||
@@ -40,8 +39,8 @@ from lerobot.utils.utils import init_logging
|
||||
|
||||
Action = torch.Tensor
|
||||
|
||||
# observation as received from the robot (can be numpy arrays, floats, etc.)
|
||||
RawObservation = dict[str, Any]
|
||||
# observation as received from the robot
|
||||
RawObservation = dict[str, torch.Tensor]
|
||||
|
||||
# observation as those recorded in LeRobot dataset (keys are different)
|
||||
LeRobotObservation = dict[str, torch.Tensor]
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user