Compare commits

..

7 Commits

Author SHA1 Message Date
CarolinePascal 34454748f4 fix(datasets) 2026-03-09 15:06:55 +01:00
CarolinePascal 8e5763c5ab fix(datasets) 2026-03-08 20:53:16 +01:00
CarolinePascal 388d4518ba fix(datasets) 2026-03-07 17:57:56 +01:00
CarolinePascal 232dbe4176 fix(datasets) 2026-03-07 17:46:09 +01:00
CarolinePascal 10c2e2fc87 fix(datasets) 2026-03-07 01:14:19 +01:00
CarolinePascal 5e74f06b20 fix(datasets) 2026-03-07 00:24:01 +01:00
CarolinePascal 07931b1101 fix(datasets) 2026-03-07 00:18:57 +01:00
53 changed files with 198 additions and 533 deletions
+1 -1
View File
@@ -30,7 +30,7 @@ pytest -sx tests/test_stuff.py::test_something
```
```bash
lerobot-train --some.option=true
python -m lerobot.scripts.train --some.option=true
```
## SECTION TO REMOVE BEFORE SUBMITTING YOUR PR
+2 -2
View File
@@ -29,8 +29,8 @@ on:
env:
UV_VERSION: "0.8.0"
PYTHON_VERSION: "3.10"
DOCKER_IMAGE_NAME_CPU: huggingface/lerobot-cpu:latest
DOCKER_IMAGE_NAME_GPU: huggingface/lerobot-gpu:latest
DOCKER_IMAGE_NAME_CPU: huggingface/lerobot-gpu:latest
DOCKER_IMAGE_NAME_GPU: huggingface/lerobot-cpu:latest
# Ensures that only the latest commit is built, canceling older runs.
concurrency:
+9 -9
View File
@@ -44,7 +44,7 @@ test-end-to-end:
${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-eval
test-act-ete-train:
lerobot-train \
python -m lerobot.scripts.train \
--policy.type=act \
--policy.dim_model=64 \
--policy.n_action_steps=20 \
@@ -68,12 +68,12 @@ test-act-ete-train:
--output_dir=tests/outputs/act/
test-act-ete-train-resume:
lerobot-train \
python -m lerobot.scripts.train \
--config_path=tests/outputs/act/checkpoints/000002/pretrained_model/train_config.json \
--resume=true
test-act-ete-eval:
lerobot-eval \
python -m lerobot.scripts.eval \
--policy.path=tests/outputs/act/checkpoints/000004/pretrained_model \
--policy.device=$(DEVICE) \
--env.type=aloha \
@@ -82,7 +82,7 @@ test-act-ete-eval:
--eval.batch_size=1
test-diffusion-ete-train:
lerobot-train \
python -m lerobot.scripts.train \
--policy.type=diffusion \
--policy.down_dims='[64,128,256]' \
--policy.diffusion_step_embed_dim=32 \
@@ -106,7 +106,7 @@ test-diffusion-ete-train:
--output_dir=tests/outputs/diffusion/
test-diffusion-ete-eval:
lerobot-eval \
python -m lerobot.scripts.eval \
--policy.path=tests/outputs/diffusion/checkpoints/000002/pretrained_model \
--policy.device=$(DEVICE) \
--env.type=pusht \
@@ -115,7 +115,7 @@ test-diffusion-ete-eval:
--eval.batch_size=1
test-tdmpc-ete-train:
lerobot-train \
python -m lerobot.scripts.train \
--policy.type=tdmpc \
--policy.device=$(DEVICE) \
--policy.push_to_hub=false \
@@ -137,7 +137,7 @@ test-tdmpc-ete-train:
--output_dir=tests/outputs/tdmpc/
test-tdmpc-ete-eval:
lerobot-eval \
python -m lerobot.scripts.eval \
--policy.path=tests/outputs/tdmpc/checkpoints/000002/pretrained_model \
--policy.device=$(DEVICE) \
--env.type=xarm \
@@ -148,7 +148,7 @@ test-tdmpc-ete-eval:
test-smolvla-ete-train:
lerobot-train \
python -m lerobot.scripts.train \
--policy.type=smolvla \
--policy.n_action_steps=20 \
--policy.chunk_size=20 \
@@ -171,7 +171,7 @@ test-smolvla-ete-train:
--output_dir=tests/outputs/smolvla/
test-smolvla-ete-eval:
lerobot-eval \
python -m lerobot.scripts.eval \
--policy.path=tests/outputs/smolvla/checkpoints/000004/pretrained_model \
--policy.device=$(DEVICE) \
--env.type=aloha \
+6 -6
View File
@@ -6,7 +6,7 @@
<div align="center">
[![Tests](https://github.com/huggingface/lerobot/actions/workflows/nightly.yml/badge.svg?branch=main)](https://github.com/huggingface/lerobot/actions/workflows/nightly.yml?query=branch%3Amain)
[![Tests](https://github.com/huggingface/lerobot/actions/workflows/nightly.yml/badge.svg?branch=main)](https://github.com/huggingface/lerobot/actions/workflows/nighty.yml?query=branch%3Amain)
[![Python versions](https://img.shields.io/pypi/pyversions/lerobot)](https://www.python.org/downloads/)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/huggingface/lerobot/blob/main/LICENSE)
[![Status](https://img.shields.io/pypi/status/lerobot)](https://pypi.org/project/lerobot/)
@@ -276,7 +276,7 @@ Check out [example 2](https://github.com/huggingface/lerobot/blob/main/examples/
We also provide a more capable script to parallelize the evaluation over multiple environments during the same rollout. Here is an example with a pretrained model hosted on [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht):
```bash
lerobot-eval \
python -m lerobot.scripts.eval \
--policy.path=lerobot/diffusion_pusht \
--env.type=pusht \
--eval.batch_size=10 \
@@ -288,10 +288,10 @@ lerobot-eval \
Note: After training your own policy, you can re-evaluate the checkpoints with:
```bash
lerobot-eval --policy.path={OUTPUT_DIR}/checkpoints/last/pretrained_model
python -m lerobot.scripts.eval --policy.path={OUTPUT_DIR}/checkpoints/last/pretrained_model
```
See `lerobot-eval --help` for more instructions.
See `python -m lerobot.scripts.eval --help` for more instructions.
### Train your own policy
@@ -303,7 +303,7 @@ A link to the wandb logs for the run will also show up in yellow in your termina
\<img src="https://raw.githubusercontent.com/huggingface/lerobot/main/media/wandb.png" alt="WandB logs example"\>
Note: For efficiency, during training every checkpoint is evaluated on a low number of episodes. You may use `--eval.n_episodes=500` to evaluate on more episodes than the default. Or, after training, you may want to re-evaluate your best checkpoints on more episodes or change the evaluation settings. See `lerobot-eval --help` for more instructions.
Note: For efficiency, during training every checkpoint is evaluated on a low number of episodes. You may use `--eval.n_episodes=500` to evaluate on more episodes than the default. Or, after training, you may want to re-evaluate your best checkpoints on more episodes or change the evaluation settings. See `python -m lerobot.scripts.eval --help` for more instructions.
#### Reproduce state-of-the-art (SOTA)
@@ -311,7 +311,7 @@ We provide some pretrained policies on our [hub page](https://huggingface.co/ler
You can reproduce their training by loading the config from their run. Simply running:
```bash
lerobot-train --config_path=lerobot/diffusion_pusht
python -m lerobot.scripts.train --config_path=lerobot/diffusion_pusht
```
reproduces SOTA results for Diffusion Policy on the PushT task.
-11
View File
@@ -1,11 +0,0 @@
compute_environment: LOCAL_MACHINE
debug: false
distributed_type: NO
downcast_bf16: 'no'
enable_cpu_affinity: false
machine_rank: 0
main_training_function: main
mixed_precision: 'no'
num_machines: 1
num_processes: 1
use_cpu: false
-18
View File
@@ -1,18 +0,0 @@
compute_environment: LOCAL_MACHINE
debug: false
distributed_type: MULTI_GPU
downcast_bf16: 'no'
enable_cpu_affinity: false
gpu_ids: all
machine_rank: 0
main_training_function: main
mixed_precision: 'no'
num_machines: 1
num_processes: 2
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
dynamo_backend: "no"
+1 -1
View File
@@ -29,7 +29,7 @@ ENV DEBIAN_FRONTEND=noninteractive \
# Install system dependencies and uv (as root)
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential git curl libglib2.0-0 libegl1-mesa-dev ffmpeg \
build-essential git curl libglib2.0-0 libegl1-mesa ffmpeg \
libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev \
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
&& mv /root/.local/bin/uv /usr/local/bin/uv \
-2
View File
@@ -39,8 +39,6 @@
- sections:
- local: notebooks
title: Notebooks
- local: feetech
title: Updating Feetech Firmware
title: "Resources"
- sections:
- local: contributing
+1 -1
View File
@@ -9,7 +9,7 @@ To instantiate a camera, you need a camera identifier. This identifier might cha
To find the camera indices of the cameras plugged into your system, run the following script:
```bash
lerobot-find-cameras opencv # or realsense for Intel Realsense cameras
python -m lerobot.find_cameras opencv # or realsense for Intel Realsense cameras
```
The output will look something like this if you have two cameras connected:
-71
View File
@@ -1,71 +0,0 @@
# Feetech Motor Firmware Update
This tutorial guides you through updating the firmware of Feetech motors using the official Feetech software.
## Prerequisites
- Windows computer (Feetech software is only available for Windows)
- Feetech motor control board
- USB cable to connect the control board to your computer
- Feetech motors connected to the control board
## Step 1: Download Feetech Software
1. Visit the official Feetech software download page: [https://www.feetechrc.com/software.html](https://www.feetechrc.com/software.html)
2. Download the latest version of the Feetech debugging software (FD)
3. Install the software on your Windows computer
## Step 2: Hardware Setup
1. Connect your Feetech motors to the motor control board
2. Connect the motor control board to your Windows computer via USB cable
3. Ensure power is supplied to the motors
## Step 3: Configure Connection
1. Launch the Feetech debugging software
2. Select the correct COM port from the port dropdown menu
- If unsure which port to use, check Windows Device Manager under "Ports (COM & LPT)"
3. Set the appropriate baud rate (typically 1000000 for most Feetech motors)
4. Click "Open" to establish communication with the control board
## Step 4: Scan for Motors
1. Once connected, click the "Search" button to detect all connected motors
2. The software will automatically discover and list all motors on the bus
3. Each motor will appear with its ID number
## Step 5: Update Firmware
For each motor you want to update:
1. **Select the motor** from the list by clicking on it
2. **Click on Upgrade tab**:
3. **Click on Online button**:
- If an potential firmware update is found, it will be displayed in the box
4. **Click on Upgrade button**:
- The update progress will be displayed
## Step 6: Verify Update
1. After the update completes, the software should automatically refresh the motor information
2. Verify that the firmware version has been updated to the expected version
## Important Notes
⚠️ **Warning**: Do not disconnect power or USB during firmware updates, it will potentially brick the motor.
## Bonus: Motor Debugging on Linux/macOS
For debugging purposes only, you can use the open-source Feetech Debug Tool:
- **Repository**: [FT_SCServo_Debug_Qt](https://github.com/CarolinePascal/FT_SCServo_Debug_Qt/tree/fix/port-search-timer)
### Installation Instructions
Follow the instructions in the repository to install the tool, for Ubuntu you can directly install it, for MacOS you need to build it from source.
**Limitations:**
- This tool is for debugging and parameter adjustment only
- Firmware updates must still be done on Windows with official Feetech software
+2 -2
View File
@@ -412,7 +412,7 @@ Example configuration for training the [reward classifier](https://huggingface.c
To train the classifier, use the `train.py` script with your configuration:
```bash
lerobot-train --config_path path/to/reward_classifier_train_config.json
python -m lerobot.scripts.train --config_path path/to/reward_classifier_train_config.json
```
**Deploying and Testing the Model**
@@ -458,7 +458,7 @@ The reward classifier will automatically provide rewards based on the visual inp
3. **Train the classifier**:
```bash
lerobot-train --config_path src/lerobot/configs/reward_classifier_train_config.json
python -m lerobot.scripts.train --config_path src/lerobot/configs/reward_classifier_train_config.json
```
4. **Test the classifier**:
+11 -11
View File
@@ -19,7 +19,7 @@ pip install -e ".[hopejr]"
Before starting calibration and operation, you need to identify the USB ports for each HopeJR component. Run this script to find the USB ports for the arm, hand, glove, and exoskeleton:
```bash
lerobot-find-port
python -m lerobot.find_port
```
This will display the available USB ports and their associated devices. Make note of the port paths (e.g., `/dev/tty.usbmodem58760433331`, `/dev/tty.usbmodem11301`) as you'll need to specify them in the `--robot.port` and `--teleop.port` parameters when recording data, replaying episodes, or running teleoperation scripts.
@@ -31,7 +31,7 @@ Before performing teleoperation, HopeJR's limbs need to be calibrated. Calibrati
### 1.1 Calibrate Robot Hand
```bash
lerobot-calibrate \
python -m lerobot.calibrate \
--robot.type=hope_jr_hand \
--robot.port=/dev/tty.usbmodem58760432281 \
--robot.id=blue \
@@ -81,7 +81,7 @@ Once you have set the appropriate boundaries for all joints, click "Save" to sav
### 1.2 Calibrate Teleoperator Glove
```bash
lerobot-calibrate \
python -m lerobot.calibrate \
--teleop.type=homunculus_glove \
--teleop.port=/dev/tty.usbmodem11201 \
--teleop.id=red \
@@ -120,7 +120,7 @@ Once calibration is complete, the system will save the calibration to `/Users/yo
### 1.3 Calibrate Robot Arm
```bash
lerobot-calibrate \
python -m lerobot.calibrate \
--robot.type=hope_jr_arm \
--robot.port=/dev/tty.usbserial-1110 \
--robot.id=white
@@ -146,7 +146,7 @@ Use the calibration interface to set the range boundaries for each joint. Move e
### 1.4 Calibrate Teleoperator Exoskeleton
```bash
lerobot-calibrate \
python -m lerobot.calibrate \
--teleop.type=homunculus_arm \
--teleop.port=/dev/tty.usbmodem11201 \
--teleop.id=black
@@ -178,7 +178,7 @@ Due to global variable conflicts in the Feetech middleware, teleoperation for ar
### Hand
```bash
lerobot-teleoperate \
python -m lerobot.teleoperate \
--robot.type=hope_jr_hand \
--robot.port=/dev/tty.usbmodem58760432281 \
--robot.id=blue \
@@ -194,7 +194,7 @@ lerobot-teleoperate \
### Arm
```bash
lerobot-teleoperate \
python -m lerobot.teleoperate \
--robot.type=hope_jr_arm \
--robot.port=/dev/tty.usbserial-1110 \
--robot.id=white \
@@ -214,7 +214,7 @@ Record, Replay and Train with Hope-JR is still experimental.
This step records the dataset, which can be seen as an example [here](https://huggingface.co/datasets/nepyope/hand_record_test_with_video_data/settings).
```bash
lerobot-record \
python -m lerobot.record \
--robot.type=hope_jr_hand \
--robot.port=/dev/tty.usbmodem58760432281 \
--robot.id=right \
@@ -236,7 +236,7 @@ lerobot-record \
### Replay
```bash
lerobot-replay \
python -m lerobot.replay \
--robot.type=hope_jr_hand \
--robot.port=/dev/tty.usbmodem58760432281 \
--robot.id=right \
@@ -248,7 +248,7 @@ lerobot-replay \
### Train
```bash
lerobot-train \
python -m lerobot.scripts.train \
--dataset.repo_id=nepyope/hand_record_test_with_video_data \
--policy.type=act \
--output_dir=outputs/train/hopejr_hand \
@@ -263,7 +263,7 @@ lerobot-train \
This training run can be viewed as an example [here](https://wandb.ai/tino/lerobot/runs/rp0k8zvw?nw=nwusertino).
```bash
lerobot-record \
python -m lerobot.record \
--robot.type=hope_jr_hand \
--robot.port=/dev/tty.usbmodem58760432281 \
--robot.id=right \
+8 -8
View File
@@ -45,7 +45,7 @@ Note that the `id` associated with a robot is used to store the calibration file
<hfoptions id="teleoperate_so101">
<hfoption id="Command">
```bash
lerobot-teleoperate \
python -m lerobot.teleoperate \
--robot.type=so101_follower \
--robot.port=/dev/tty.usbmodem58760431541 \
--robot.id=my_awesome_follower_arm \
@@ -101,7 +101,7 @@ With `rerun`, you can teleoperate again while simultaneously visualizing the cam
<hfoptions id="teleoperate_koch_camera">
<hfoption id="Command">
```bash
lerobot-teleoperate \
python -m lerobot.teleoperate \
--robot.type=koch_follower \
--robot.port=/dev/tty.usbmodem58760431541 \
--robot.id=my_awesome_follower_arm \
@@ -174,7 +174,7 @@ Now you can record a dataset. To record 5 episodes and upload your dataset to th
<hfoptions id="record">
<hfoption id="Command">
```bash
lerobot-record \
python -m lerobot.record \
--robot.type=so101_follower \
--robot.port=/dev/tty.usbmodem585A0076841 \
--robot.id=my_awesome_follower_arm \
@@ -376,7 +376,7 @@ You can replay the first episode on your robot with either the command below or
<hfoptions id="replay">
<hfoption id="Command">
```bash
lerobot-replay \
python -m lerobot.replay \
--robot.type=so101_follower \
--robot.port=/dev/tty.usbmodem58760431541 \
--robot.id=my_awesome_follower_arm \
@@ -428,10 +428,10 @@ Your robot should replicate movements similar to those you recorded. For example
## Train a policy
To train a policy to control your robot, use the [`lerobot-train`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
To train a policy to control your robot, use the [`python -m lerobot.scripts.train`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
```bash
lerobot-train \
python -m lerobot.scripts.train \
--dataset.repo_id=${HF_USER}/so101_test \
--policy.type=act \
--output_dir=outputs/train/act_so101_test \
@@ -453,7 +453,7 @@ Training should take several hours. You will find checkpoints in `outputs/train/
To resume training from a checkpoint, below is an example command to resume from `last` checkpoint of the `act_so101_test` policy:
```bash
lerobot-train \
python -m lerobot.scripts.train \
--config_path=outputs/train/act_so101_test/checkpoints/last/pretrained_model/train_config.json \
--resume=true
```
@@ -490,7 +490,7 @@ You can use the `record` script from [`lerobot/record.py`](https://github.com/hu
<hfoptions id="eval">
<hfoption id="Command">
```bash
lerobot-record \
python -m lerobot.record \
--robot.type=so100_follower \
--robot.port=/dev/ttyACM1 \
--robot.cameras="{ up: {type: opencv, index_or_path: /dev/video10, width: 640, height: 480, fps: 30}, side: {type: intelrealsense, serial_number_or_name: 233522074606, width: 640, height: 480, fps: 30}}" \
+2 -2
View File
@@ -96,10 +96,10 @@ If you uploaded your dataset to the hub you can [visualize your dataset online](
## Train a policy
To train a policy to control your robot, use the [`lerobot-train`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
To train a policy to control your robot, use the [`python -m lerobot.scripts.train`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
```bash
lerobot-train \
python -m lerobot.scripts.train \
--dataset.repo_id=${HF_USER}/il_gym \
--policy.type=act \
--output_dir=outputs/train/il_sim_test \
+5 -5
View File
@@ -31,7 +31,7 @@ pip install -e ".[dynamixel]"
To find the port for each bus servo adapter, run this script:
```bash
lerobot-find-port
python -m lerobot.find_port
```
<hfoptions id="example">
@@ -98,7 +98,7 @@ For a visual reference on how to set the motor ids please refer to [this video](
<hfoption id="Command">
```bash
lerobot-setup-motors \
python -m lerobot.setup_motors \
--robot.type=koch_follower \
--robot.port=/dev/tty.usbmodem575E0031751 # <- paste here the port found at previous step
```
@@ -174,7 +174,7 @@ Do the same steps for the leader arm but modify the command or script accordingl
<hfoption id="Command">
```bash
lerobot-setup-motors \
python -m lerobot.setup_motors \
--teleop.type=koch_leader \
--teleop.port=/dev/tty.usbmodem575E0031751 \ # <- paste here the port found at previous step
```
@@ -211,7 +211,7 @@ Run the following command or API example to calibrate the follower arm:
<hfoption id="Command">
```bash
lerobot-calibrate \
python -m lerobot.calibrate \
--robot.type=koch_follower \
--robot.port=/dev/tty.usbmodem58760431551 \ # <- The port of your robot
--robot.id=my_awesome_follower_arm # <- Give the robot a unique name
@@ -249,7 +249,7 @@ Do the same steps to calibrate the leader arm, run the following command or API
<hfoption id="Command">
```bash
lerobot-calibrate \
python -m lerobot.calibrate \
--teleop.type=koch_leader \
--teleop.port=/dev/tty.usbmodem58760431551 \ # <- The port of your robot
--teleop.id=my_awesome_leader_arm # <- Give the robot a unique name
+4 -4
View File
@@ -60,7 +60,7 @@ First, we will assemble the two SO100/SO101 arms. One to attach to the mobile ba
To find the port for each bus servo adapter, run this script:
```bash
lerobot-find-port
python -m lerobot.find_port
```
<hfoptions id="example">
@@ -116,7 +116,7 @@ The instructions for configuring the motors can be found in the SO101 [docs](./s
You can run this command to setup motors for LeKiwi. It will first setup the motors for arm (id 6..1) and then setup motors for wheels (9,8,7)
```bash
lerobot-setup-motors \
python -m lerobot.setup_motors \
--robot.type=lekiwi \
--robot.port=/dev/tty.usbmodem58760431551 # <- paste here the port found at previous step
```
@@ -174,7 +174,7 @@ The calibration process is very important because it allows a neural network tra
Make sure the arm is connected to the Raspberry Pi and run this script or API example (on the Raspberry Pi via SSH) to launch calibration of the follower arm:
```bash
lerobot-calibrate \
python -m lerobot.calibrate \
--robot.type=lekiwi \
--robot.id=my_awesome_kiwi # <- Give the robot a unique name
```
@@ -193,7 +193,7 @@ Then, to calibrate the leader arm (which is attached to the laptop/pc). Run the
<hfoption id="Command">
```bash
lerobot-calibrate \
python -m lerobot.calibrate \
--teleop.type=so100_leader \
--teleop.port=/dev/tty.usbmodem58760431551 \ # <- The port of your robot
--teleop.id=my_awesome_leader_arm # <- Give the robot a unique name
+3 -3
View File
@@ -54,7 +54,7 @@ If you don't have a gpu device, you can train using our notebook on [![Google Co
Pass your dataset to the training script using `--dataset.repo_id`. If you want to test your installation, run the following command where we use one of the datasets we collected for the [SmolVLA Paper](https://huggingface.co/papers/2506.01844).
```bash
cd lerobot && lerobot-train \
cd lerobot && python -m lerobot.scripts.train \
--policy.path=lerobot/smolvla_base \
--dataset.repo_id=${HF_USER}/mydataset \
--batch_size=64 \
@@ -73,7 +73,7 @@ cd lerobot && lerobot-train \
Fine-tuning is an art. For a complete overview of the options for finetuning, run
```bash
lerobot-train --help
python -m lerobot.scripts.train --help
```
<p align="center">
@@ -97,7 +97,7 @@ Similarly for when recording an episode, it is recommended that you are logged i
Once you are logged in, you can run inference in your setup by doing:
```bash
lerobot-record \
python -m lerobot.record \
--robot.type=so101_follower \
--robot.port=/dev/ttyACM0 \ # <- Use your port
--robot.id=my_blue_follower_arm \ # <- Use your robot id
+5 -5
View File
@@ -26,7 +26,7 @@ Unlike the SO-101, the motor connectors are not easily accessible once the arm i
To find the port for each bus servo adapter, run this script:
```bash
lerobot-find-port
python -m lerobot.find_port
```
<hfoptions id="example">
@@ -93,7 +93,7 @@ For a visual reference on how to set the motor ids please refer to [this video](
<hfoption id="Command">
```bash
lerobot-setup-motors \
python -m lerobot.setup_motors \
--robot.type=so100_follower \
--robot.port=/dev/tty.usbmodem585A0076841 # <- paste here the port found at previous step
```
@@ -168,7 +168,7 @@ Do the same steps for the leader arm.
<hfoptions id="setup_motors">
<hfoption id="Command">
```bash
lerobot-setup-motors \
python -m lerobot.setup_motors \
--teleop.type=so100_leader \
--teleop.port=/dev/tty.usbmodem575E0031751 # <- paste here the port found at previous step
```
@@ -568,7 +568,7 @@ Run the following command or API example to calibrate the follower arm:
<hfoption id="Command">
```bash
lerobot-calibrate \
python -m lerobot.calibrate \
--robot.type=so100_follower \
--robot.port=/dev/tty.usbmodem58760431551 \ # <- The port of your robot
--robot.id=my_awesome_follower_arm # <- Give the robot a unique name
@@ -606,7 +606,7 @@ Do the same steps to calibrate the leader arm, run the following command or API
<hfoption id="Command">
```bash
lerobot-calibrate \
python -m lerobot.calibrate \
--teleop.type=so100_leader \
--teleop.port=/dev/tty.usbmodem58760431551 \ # <- The port of your robot
--teleop.id=my_awesome_leader_arm # <- Give the robot a unique name
+5 -5
View File
@@ -162,7 +162,7 @@ It is advisable to install one 3-pin cable in the motor after placing them befor
To find the port for each bus servo adapter, connect MotorBus to your computer via USB and power. Run the following script and disconnect the MotorBus when prompted:
```bash
lerobot-find-port
python -m lerobot.find_port
```
<hfoptions id="example">
@@ -240,7 +240,7 @@ Connect the usb cable from your computer and the power supply to the follower ar
<hfoption id="Command">
```bash
lerobot-setup-motors \
python -m lerobot.setup_motors \
--robot.type=so101_follower \
--robot.port=/dev/tty.usbmodem585A0076841 # <- paste here the port found at previous step
```
@@ -316,7 +316,7 @@ Do the same steps for the leader arm.
<hfoption id="Command">
```bash
lerobot-setup-motors \
python -m lerobot.setup_motors \
--teleop.type=so101_leader \
--teleop.port=/dev/tty.usbmodem575E0031751 # <- paste here the port found at previous step
```
@@ -353,7 +353,7 @@ Run the following command or API example to calibrate the follower arm:
<hfoption id="Command">
```bash
lerobot-calibrate \
python -m lerobot.calibrate \
--robot.type=so101_follower \
--robot.port=/dev/tty.usbmodem58760431551 \ # <- The port of your robot
--robot.id=my_awesome_follower_arm # <- Give the robot a unique name
@@ -402,7 +402,7 @@ Do the same steps to calibrate the leader arm, run the following command or API
<hfoption id="Command">
```bash
lerobot-calibrate \
python -m lerobot.calibrate \
--teleop.type=so101_leader \
--teleop.port=/dev/tty.usbmodem58760431551 \ # <- The port of your robot
--teleop.id=my_awesome_leader_arm # <- Give the robot a unique name
+14 -14
View File
@@ -62,7 +62,7 @@ By default, every field takes its default value specified in the dataclass. If a
Let's say that we want to train [Diffusion Policy](../src/lerobot/policies/diffusion) on the [pusht](https://huggingface.co/datasets/lerobot/pusht) dataset, using the [gym_pusht](https://github.com/huggingface/gym-pusht) environment for evaluation. The command to do so would look like this:
```bash
lerobot-train \
python -m lerobot.scripts.train \
--dataset.repo_id=lerobot/pusht \
--policy.type=diffusion \
--env.type=pusht
@@ -77,7 +77,7 @@ Let's break this down:
Let's see another example. Let's say you've been training [ACT](../src/lerobot/policies/act) on [lerobot/aloha_sim_insertion_human](https://huggingface.co/datasets/lerobot/aloha_sim_insertion_human) using the [gym-aloha](https://github.com/huggingface/gym-aloha) environment for evaluation with:
```bash
lerobot-train \
python -m lerobot.scripts.train \
--policy.type=act \
--dataset.repo_id=lerobot/aloha_sim_insertion_human \
--env.type=aloha \
@@ -90,7 +90,7 @@ We now want to train a different policy for aloha on another task. We'll change
Looking at the [`AlohaEnv`](../src/lerobot/envs/configs.py) config, the task is `"AlohaInsertion-v0"` by default, which corresponds to the task we trained on in the command above. The [gym-aloha](https://github.com/huggingface/gym-aloha?tab=readme-ov-file#description) environment also has the `AlohaTransferCube-v0` task which corresponds to this other task we want to train on. Putting this together, we can train this new policy on this different task using:
```bash
lerobot-train \
python -m lerobot.scripts.train \
--policy.type=act \
--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
--env.type=aloha \
@@ -127,7 +127,7 @@ Now, let's assume that we want to reproduce the run just above. That run has pro
We can then simply load the config values from this file using:
```bash
lerobot-train \
python -m lerobot.scripts.train \
--config_path=outputs/train/act_aloha_transfer/checkpoints/last/pretrained_model/ \
--output_dir=outputs/train/act_aloha_transfer_2
```
@@ -137,7 +137,7 @@ lerobot-train \
Similarly to Hydra, we can still override some parameters in the CLI if we want to, e.g.:
```bash
lerobot-train \
python -m lerobot.scripts.train \
--config_path=outputs/train/act_aloha_transfer/checkpoints/last/pretrained_model/ \
--output_dir=outputs/train/act_aloha_transfer_2
--policy.n_action_steps=80
@@ -148,7 +148,7 @@ lerobot-train \
`--config_path` can also accept the repo_id of a repo on the hub that contains a `train_config.json` file, e.g. running:
```bash
lerobot-train --config_path=lerobot/diffusion_pusht
python -m lerobot.scripts.train --config_path=lerobot/diffusion_pusht
```
will start a training run with the same configuration used for training [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht)
@@ -160,7 +160,7 @@ Being able to resume a training run is important in case it crashed or aborted f
Let's reuse the command from the previous run and add a few more options:
```bash
lerobot-train \
python -m lerobot.scripts.train \
--policy.type=act \
--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
--env.type=aloha \
@@ -179,7 +179,7 @@ INFO 2025-01-24 16:10:56 ts/train.py:263 Checkpoint policy after step 100
Now let's simulate a crash by killing the process (hit `ctrl`+`c`). We can then simply resume this run from the last checkpoint available with:
```bash
lerobot-train \
python -m lerobot.scripts.train \
--config_path=outputs/train/run_resumption/checkpoints/last/pretrained_model/ \
--resume=true
```
@@ -190,7 +190,7 @@ Another reason for which you might want to resume a run is simply to extend trai
You could double the number of steps of the previous run with:
```bash
lerobot-train \
python -m lerobot.scripts.train \
--config_path=outputs/train/run_resumption/checkpoints/last/pretrained_model/ \
--resume=true \
--steps=200000
@@ -224,7 +224,7 @@ In addition to the features currently in Draccus, we've added a special `.path`
For example, we could fine-tune a [policy pre-trained on the aloha transfer task](https://huggingface.co/lerobot/act_aloha_sim_transfer_cube_human) on the aloha insertion task. We can achieve this with:
```bash
lerobot-train \
python -m lerobot.scripts.train \
--policy.path=lerobot/act_aloha_sim_transfer_cube_human \
--dataset.repo_id=lerobot/aloha_sim_insertion_human \
--env.type=aloha \
@@ -270,7 +270,7 @@ We'll summarize here the main use cases to remember from this tutorial.
#### Train a policy from scratch CLI
```bash
lerobot-train \
python -m lerobot.scripts.train \
--policy.type=act \ # <- select 'act' policy
--env.type=pusht \ # <- select 'pusht' environment
--dataset.repo_id=lerobot/pusht # <- train on this dataset
@@ -279,7 +279,7 @@ lerobot-train \
#### Train a policy from scratch - config file + CLI
```bash
lerobot-train \
python -m lerobot.scripts.train \
--config_path=path/to/pretrained_model \ # <- can also be a repo_id
--policy.n_action_steps=80 # <- you may still override values
```
@@ -287,7 +287,7 @@ lerobot-train \
#### Resume/continue a training run
```bash
lerobot-train \
python -m lerobot.scripts.train \
--config_path=checkpoint/pretrained_model/ \
--resume=true \
--steps=200000 # <- you can change some training parameters
@@ -296,7 +296,7 @@ lerobot-train \
#### Fine-tuning
```bash
lerobot-train \
python -m lerobot.scripts.train \
--policy.path=lerobot/act_aloha_sim_transfer_cube_human \ # <- can also be a local path to a checkpoint
--dataset.repo_id=lerobot/aloha_sim_insertion_human \
--env.type=aloha \
+1 -1
View File
@@ -18,7 +18,7 @@ Replays the actions of an episode from a dataset on a robot.
Example:
```shell
lerobot-replay \
python -m lerobot.replay \
--robot.type=so100_follower \
--robot.port=/dev/tty.usbmodem58760431541 \
--robot.id=black \
+1 -1
View File
@@ -25,7 +25,7 @@ discord = "https://discord.gg/s3KuuzsPFb"
[project]
name = "lerobot"
version = "0.3.4"
version = "0.3.3"
description = "🤗 LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch"
readme = "README.md"
license = { text = "Apache-2.0" }
+1 -1
View File
@@ -18,7 +18,7 @@ Helper to recalibrate your device (robot or teleoperator).
Example:
```shell
lerobot-calibrate \
python -m lerobot.calibrate \
--teleop.type=so100_leader \
--teleop.port=/dev/tty.usbmodem58760431551 \
--teleop.id=blue
+3 -2
View File
@@ -60,7 +60,7 @@ class OpenCVCamera(Camera):
or port changes, especially on Linux. Use the provided utility script to find
available camera indices or paths:
```bash
lerobot-find-cameras opencv
python -m lerobot.find_cameras opencv
```
The camera's default settings (FPS, resolution, color mode) are used unless
@@ -165,7 +165,8 @@ class OpenCVCamera(Camera):
self.videocapture.release()
self.videocapture = None
raise ConnectionError(
f"Failed to open {self}.Run `lerobot-find-cameras opencv` to find available cameras."
f"Failed to open {self}."
f"Run `python -m lerobot.find_cameras opencv` to find available cameras."
)
self._configure_capture_settings()
@@ -51,7 +51,7 @@ class RealSenseCamera(Camera):
Use the provided utility script to find available camera indices and default profiles:
```bash
lerobot-find-cameras realsense
python -m lerobot.find_cameras realsense
```
A `RealSenseCamera` instance requires a configuration object specifying the
@@ -176,7 +176,8 @@ class RealSenseCamera(Camera):
self.rs_profile = None
self.rs_pipeline = None
raise ConnectionError(
f"Failed to open {self}.Run `lerobot-find-cameras realsense` to find available cameras."
f"Failed to open {self}."
"Run `python -m lerobot.find_cameras realsense` to find available cameras."
) from e
self._configure_capture_settings()
+4 -6
View File
@@ -486,8 +486,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
self.episode_data_index = get_episode_data_index(self.meta.episodes, self.episodes)
# Check timestamps
timestamps = torch.stack(self.hf_dataset["timestamp"]).numpy()
episode_indices = torch.stack(self.hf_dataset["episode_index"]).numpy()
timestamps = torch.tensor(self.hf_dataset["timestamp"]).numpy()
episode_indices = torch.tensor(self.hf_dataset["episode_index"]).numpy()
ep_data_index_np = {k: t.numpy() for k, t in self.episode_data_index.items()}
check_timestamps_sync(timestamps, episode_indices, ep_data_index_np, self.fps, self.tolerance_s)
@@ -667,7 +667,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
for key in self.meta.video_keys:
if query_indices is not None and key in query_indices:
timestamps = self.hf_dataset.select(query_indices[key])["timestamp"]
query_timestamps[key] = torch.stack(timestamps).tolist()
query_timestamps[key] = torch.tensor(timestamps).tolist()
else:
query_timestamps[key] = [current_ts]
@@ -675,7 +675,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
def _query_hf_dataset(self, query_indices: dict[str, list[int]]) -> dict:
return {
key: torch.stack(self.hf_dataset.select(q_idx)[key])
key: torch.tensor(self.hf_dataset.select(q_idx)[key])
for key, q_idx in query_indices.items()
if key not in self.meta.video_keys
}
@@ -825,8 +825,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
"""
if not episode_data:
episode_buffer = self.episode_buffer
else:
episode_buffer = episode_data
validate_episode_buffer(episode_buffer, self.meta.total_episodes, self.features)
+2 -2
View File
@@ -632,7 +632,7 @@ def cycle(iterable):
iterator = iter(iterable)
def create_branch(repo_id, *, branch: str, repo_type: str | None = None) -> None:
def create_branch(repo_id, *, branch: str, repo_type: str | None = None, revision: str | None = None) -> None:
"""Create a branch on a existing Hugging Face repo. Delete the branch if it already
exists before creating it.
"""
@@ -644,7 +644,7 @@ def create_branch(repo_id, *, branch: str, repo_type: str | None = None) -> None
if ref in refs:
api.delete_branch(repo_id, repo_type=repo_type, branch=branch)
api.create_branch(repo_id, repo_type=repo_type, branch=branch)
api.create_branch(repo_id, repo_type=repo_type, branch=branch, revision=revision)
def create_lerobot_dataset_card(
@@ -105,6 +105,7 @@ import filecmp
import json
import logging
import math
import re
import shutil
import subprocess
import tempfile
@@ -119,6 +120,7 @@ from huggingface_hub import HfApi
from huggingface_hub.errors import EntryNotFoundError, HfHubHTTPError
from safetensors.torch import load_file
from lerobot.datasets.backward_compatibility import CompatibilityError
from lerobot.datasets.utils import (
DEFAULT_CHUNK_SIZE,
DEFAULT_PARQUET_PATH,
@@ -130,6 +132,7 @@ from lerobot.datasets.utils import (
create_branch,
create_lerobot_dataset_card,
flatten_dict,
get_repo_versions,
get_safe_version,
load_json,
unflatten_dict,
@@ -205,7 +208,7 @@ def convert_stats_to_json(v1_dir: Path, v2_dir: Path) -> None:
def get_features_from_hf_dataset(
dataset: Dataset, robot_config: RobotConfig | None = None
) -> dict[str, list]:
robot_config = parse_robot_config(robot_config)
robot_config = parse_robot_config(robot_config) if robot_config else None
features = {}
for key, ft in dataset.features.items():
if isinstance(ft, datasets.Value):
@@ -325,7 +328,19 @@ def move_videos(
video_files = [str(f.relative_to(work_dir)) for f in work_dir.glob("videos*/*/*/*.mp4")]
videos_moved = True # Videos have already been moved
assert len(video_files) == total_episodes * len(video_keys)
expected_count = total_episodes * len(video_keys)
if len(video_files) != expected_count:
print(
f"Warning: expected {expected_count} video files "
f"({total_episodes} episodes x {len(video_keys)} keys), "
f"found {len(video_files)}. Keeping only videos matching existing episodes."
)
episode_pattern = re.compile(r"episode_(\d+)")
valid_episodes = set(range(total_episodes))
video_files = [
f for f in video_files
if (m := episode_pattern.search(f)) and int(m.group(1)) in valid_episodes
]
lfs_untracked_videos = _get_lfs_untracked_videos(work_dir, video_files)
@@ -442,8 +457,16 @@ def convert_dataset(
test_branch: str | None = None,
**card_kwargs,
):
v1 = get_safe_version(repo_id, V16)
v1x_dir = local_dir / V16 / repo_id
try:
v1 = get_safe_version(repo_id, V16)
except CompatibilityError:
hub_versions = get_repo_versions(repo_id)
v1x_versions = [v for v in hub_versions if v.major == 1]
if not v1x_versions:
raise
v1 = f"v{max(v1x_versions)}"
logging.warning(f"v1.6 not found for {repo_id}, falling back to {v1}")
v1x_dir = local_dir / v1 / repo_id
v20_dir = local_dir / V20 / repo_id
v1x_dir.mkdir(parents=True, exist_ok=True)
v20_dir.mkdir(parents=True, exist_ok=True)
@@ -455,7 +478,7 @@ def convert_dataset(
branch = "main"
if test_branch:
branch = test_branch
create_branch(repo_id=repo_id, branch=test_branch, repo_type="dataset")
create_branch(repo_id=repo_id, branch=test_branch, repo_type="dataset", revision=v1)
metadata_v1 = load_json(v1x_dir / V1_INFO_PATH)
dataset = datasets.load_dataset("parquet", data_dir=v1x_dir / "data", split="train")
@@ -564,6 +587,12 @@ def convert_dataset(
"features": features,
}
write_json(metadata_v2_0, v20_dir / INFO_PATH)
info = load_json(v20_dir / INFO_PATH)
if "language_instruction" in info.get("features", {}):
del info["features"]["language_instruction"]
write_json(info, v20_dir / INFO_PATH)
convert_stats_to_json(v1x_dir, v20_dir)
card = create_lerobot_dataset_card(tags=repo_tags, dataset_info=metadata_v2_0, **card_kwargs)
@@ -677,6 +706,8 @@ def main():
if args.robot is not None:
robot_config = make_robot_config(args.robot)
else:
robot_config = None
del args.robot
@@ -85,7 +85,7 @@ def convert_dataset(
path_in_repo=STATS_PATH, repo_id=dataset.repo_id, revision=branch, repo_type="dataset"
)
hub_api.create_tag(repo_id, tag=CODEBASE_VERSION, revision=branch, repo_type="dataset")
#hub_api.create_tag(repo_id, tag=CODEBASE_VERSION, revision=branch, repo_type="dataset")
if __name__ == "__main__":
@@ -45,6 +45,8 @@ def convert_episode_stats(dataset: LeRobotDataset, ep_idx: int):
axes_to_reduce = (0, 2, 3) if ft["dtype"] in ["image", "video"] else 0
keepdims = True if ft["dtype"] in ["image", "video"] else ep_ft_data.ndim == 1
if ft["dtype"] in ["image", "video"] and ep_ft_data.ndim == 3:
ep_ft_data = np.expand_dims(ep_ft_data, axis=0)
ep_stats[key] = get_feature_stats(ep_ft_data, axis=axes_to_reduce, keepdims=keepdims)
if ft["dtype"] in ["image", "video"]: # remove batch dim
+1 -1
View File
@@ -20,7 +20,7 @@ Helper to find the camera devices available in your system.
Example:
```shell
lerobot-find-cameras
python -m lerobot.find_cameras
```
"""
+1 -1
View File
@@ -18,7 +18,7 @@ Helper to find the USB port associated with your MotorsBus.
Example:
```shell
lerobot-find-port
python -m lerobot.find_port
```
"""
-2
View File
@@ -107,8 +107,6 @@ X_SERIES_ENCODINGS_TABLE = {
"Goal_PWM": X_SERIES_CONTROL_TABLE["Goal_PWM"][1],
"Goal_Current": X_SERIES_CONTROL_TABLE["Goal_Current"][1],
"Goal_Velocity": X_SERIES_CONTROL_TABLE["Goal_Velocity"][1],
"Goal_Position": X_SERIES_CONTROL_TABLE["Goal_Position"][1],
"Present_Position": X_SERIES_CONTROL_TABLE["Present_Position"][1],
"Present_PWM": X_SERIES_CONTROL_TABLE["Present_PWM"][1],
"Present_Current": X_SERIES_CONTROL_TABLE["Present_Current"][1],
"Present_Velocity": X_SERIES_CONTROL_TABLE["Present_Velocity"][1],
+2 -2
View File
@@ -222,7 +222,7 @@ class MotorsBus(abc.ABC):
A MotorsBus subclass instance requires a port (e.g. `FeetechMotorsBus(port="/dev/tty.usbmodem575E0031751"`)).
To find the port, you can run our utility script:
```bash
lerobot-find-port.py
python -m lerobot.find_port.py
>>> Finding all available ports for the MotorsBus.
>>> ["/dev/tty.usbmodem575E0032081", "/dev/tty.usbmodem575E0031751"]
>>> Remove the usb cable from your MotorsBus and press Enter when done.
@@ -446,7 +446,7 @@ class MotorsBus(abc.ABC):
except (FileNotFoundError, OSError, serial.SerialException) as e:
raise ConnectionError(
f"\nCould not connect on port '{self.port}'. Make sure you are using the correct port."
"\nTry running `lerobot-find-port`\n"
"\nTry running `python -m lerobot.find_port`\n"
) from e
@abc.abstractmethod
+2 -2
View File
@@ -30,7 +30,7 @@ pip install -e ".[pi0]"
Example of finetuning the pi0 pretrained model (`pi0_base` in `openpi`):
```bash
lerobot-train \
python -m lerobot.scripts.train \
--policy.path=lerobot/pi0 \
--dataset.repo_id=danaaubakirova/koch_test
```
@@ -38,7 +38,7 @@ lerobot-train \
Example of finetuning the pi0 neural network with PaliGemma and expert Gemma
pretrained with VLM default parameters before pi0 finetuning:
```bash
lerobot-train \
python -m lerobot.scripts.train \
--policy.type=pi0 \
--dataset.repo_id=danaaubakirova/koch_test
```
@@ -25,14 +25,14 @@ Disclaimer: It is not expected to perform as well as the original implementation
Example of finetuning the pi0+FAST pretrained model (`pi0_fast_base` in `openpi`):
```bash
lerobot-train \
python -m lerobot.scripts.train \
--policy.path=lerobot/pi0fast_base \
--dataset.repo_id=danaaubakirova/koch_test
```
Example of training the pi0+FAST neural network with from scratch:
```bash
lerobot-train \
python -m lerobot.scripts.train \
--policy.type=pi0fast \
--dataset.repo_id=danaaubakirova/koch_test
```
@@ -28,7 +28,7 @@ pip install -e ".[smolvla]"
Example of finetuning the smolvla pretrained model (`smolvla_base`):
```bash
lerobot-train \
python -m lerobot.scripts.train \
--policy.path=lerobot/smolvla_base \
--dataset.repo_id=danaaubakirova/svla_so100_task1_v3 \
--batch_size=64 \
@@ -38,7 +38,7 @@ lerobot-train \
Example of finetuning a smolVLA. SmolVLA is composed of a pretrained VLM,
and an action expert.
```bash
lerobot-train \
python -m lerobot.scripts.train \
--policy.type=smolvla \
--dataset.repo_id=danaaubakirova/svla_so100_task1_v3 \
--batch_size=64 \
+2 -2
View File
@@ -18,7 +18,7 @@ Records a dataset. Actions for the robot can be either generated by teleoperatio
Example:
```shell
lerobot-record \
python -m lerobot.record \
--robot.type=so100_follower \
--robot.port=/dev/tty.usbmodem58760431541 \
--robot.cameras="{laptop: {type: opencv, camera_index: 0, width: 640, height: 480}}" \
@@ -36,7 +36,7 @@ lerobot-record \
Example recording with bimanual so100:
```shell
lerobot-record \
python -m lerobot.record \
--robot.type=bi_so100_follower \
--robot.left_arm_port=/dev/tty.usbmodem5A460851411 \
--robot.right_arm_port=/dev/tty.usbmodem5A460812391 \
+2 -2
View File
@@ -18,7 +18,7 @@ Replays the actions of an episode from a dataset on a robot.
Examples:
```shell
lerobot-replay \
python -m lerobot.replay \
--robot.type=so100_follower \
--robot.port=/dev/tty.usbmodem58760431541 \
--robot.id=black \
@@ -28,7 +28,7 @@ lerobot-replay \
Example replay with bimanual so100:
```shell
lerobot-replay \
python -m lerobot.replay \
--robot.type=bi_so100_follower \
--robot.left_arm_port=/dev/tty.usbmodem5A460851411 \
--robot.right_arm_port=/dev/tty.usbmodem5A460812391 \
@@ -161,11 +161,6 @@ class SO100Follower(Robot):
self.bus.write("I_Coefficient", motor, 0)
self.bus.write("D_Coefficient", motor, 32)
if motor == "gripper":
self.bus.write("Max_Torque_Limit", motor, 500) # 50% of max torque to avoid burnout
self.bus.write("Protection_Current", motor, 250) # 50% of max current to avoid burnout
self.bus.write("Overload_Torque", motor, 25) # 25% torque when overloaded
def setup_motors(self) -> None:
for motor in reversed(self.bus.motors):
input(f"Connect the controller board to the '{motor}' motor only and press enter.")
@@ -157,13 +157,6 @@ class SO101Follower(Robot):
self.bus.write("I_Coefficient", motor, 0)
self.bus.write("D_Coefficient", motor, 32)
if motor == "gripper":
self.bus.write(
"Max_Torque_Limit", motor, 500
) # 50% of the max torque limit to avoid burnout
self.bus.write("Protection_Current", motor, 250) # 50% of max current to avoid burnout
self.bus.write("Overload_Torque", motor, 25) # 25% torque when overloaded
def setup_motors(self) -> None:
for motor in reversed(self.bus.motors):
input(f"Connect the controller board to the '{motor}' motor only and press enter.")
+2 -2
View File
@@ -141,10 +141,10 @@ python lerobot/scripts/control_robot.py \
## Train a policy
To train a policy to control your robot, use the [`lerobot-train`](../src/lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
To train a policy to control your robot, use the [`python -m lerobot.scripts.train`](../src/lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
```bash
lerobot-train \
python -m lerobot.scripts.train \
--dataset.repo_id=${HF_USER}/aloha_test \
--policy.type=act \
--output_dir=outputs/train/act_aloha_test \
+3 -7
View File
@@ -21,7 +21,7 @@ You want to evaluate a model from the hub (eg: https://huggingface.co/lerobot/di
for 10 episodes.
```
lerobot-eval \
python -m lerobot.scripts.eval \
--policy.path=lerobot/diffusion_pusht \
--env.type=pusht \
--eval.batch_size=10 \
@@ -32,7 +32,7 @@ lerobot-eval \
OR, you want to evaluate a model checkpoint from the LeRobot training script for 10 episodes.
```
lerobot-eval \
python -m lerobot.scripts.eval \
--policy.path=outputs/train/diffusion_pusht/checkpoints/005000/pretrained_model \
--env.type=pusht \
--eval.batch_size=10 \
@@ -243,11 +243,7 @@ def eval_policy(
if max_episodes_rendered > 0 and not videos_dir:
raise ValueError("If max_episodes_rendered > 0, videos_dir must be provided.")
# Handle accelerate-wrapped models by unwrapping them
if hasattr(policy, 'module') and isinstance(policy.module, PreTrainedPolicy):
# This is likely an accelerate-wrapped model (DistributedDataParallel)
policy = policy.module
elif not isinstance(policy, PreTrainedPolicy):
if not isinstance(policy, PreTrainedPolicy):
raise ValueError(
f"Policy of type 'PreTrainedPolicy' is expected, but type '{type(policy)}' was provided."
)
@@ -302,6 +302,11 @@ class RobotClient:
self.logger.debug(f"Current latest action: {latest_action}")
# Get queue state before changes
old_size, old_timesteps = self._inspect_action_queue()
if not old_timesteps:
old_timesteps = [latest_action] # queue was empty
# Get queue state before changes
old_size, old_timesteps = self._inspect_action_queue()
if not old_timesteps:
+30 -99
View File
@@ -16,7 +16,6 @@
import logging
import time
from contextlib import nullcontext
from functools import partial
from pprint import pformat
from typing import Any
@@ -24,8 +23,6 @@ import torch
from termcolor import colored
from torch.amp import GradScaler
from torch.optim import Optimizer
import os
from datetime import timedelta
from lerobot.configs import parser
from lerobot.configs.train import TrainPipelineConfig
@@ -55,8 +52,6 @@ from lerobot.utils.utils import (
)
from lerobot.utils.wandb_utils import WandBLogger
def is_launched_with_accelerate() -> bool:
return "ACCELERATE_MIXED_PRECISION" in os.environ
def update_policy(
train_metrics: MetricsTracker,
@@ -64,65 +59,36 @@ def update_policy(
batch: Any,
optimizer: Optimizer,
grad_clip_norm: float,
grad_scaler: GradScaler | None,
grad_scaler: GradScaler,
lr_scheduler=None,
use_amp: bool = False,
lock=None,
accelerator=None,
) -> tuple[MetricsTracker, dict]:
start_time = time.perf_counter()
device = get_device_from_parameters(policy)
policy.train()
grad_norm = 0.0 # Initialize grad_norm to avoid undefined variable
if accelerator:
with accelerator.accumulate(policy):
with torch.autocast(device_type=device.type) if use_amp else nullcontext():
loss, output_dict = policy.forward(batch)
# TODO(rcadene): policy.unnormalize_outputs(out_dict)
accelerator.backward(loss)
if accelerator.sync_gradients:
grad_norm = torch.nn.utils.clip_grad_norm_(
policy.parameters(),
grad_clip_norm,
error_if_nonfinite=False,
)
optimizer.step()
optimizer.zero_grad()
else:
# Standard training loop without accelerate
with torch.autocast(device_type=device.type) if use_amp else nullcontext():
loss, output_dict = policy.forward(batch)
with torch.autocast(device_type=device.type) if use_amp else nullcontext():
loss, output_dict = policy.forward(batch)
# TODO(rcadene): policy.unnormalize_outputs(out_dict)
if grad_scaler is not None:
grad_scaler.scale(loss).backward()
# Unscale the gradient of the optimizer's assigned params in-place **prior to gradient clipping**.
grad_scaler.unscale_(optimizer)
grad_norm = torch.nn.utils.clip_grad_norm_(
policy.parameters(),
grad_clip_norm,
error_if_nonfinite=False,
)
# Optimizer's gradients are already unscaled, so scaler.step does not unscale them,
# although it still skips optimizer.step() if the gradients contain infs or NaNs.
with lock if lock is not None else nullcontext():
grad_scaler.step(optimizer)
# Updates the scale for next iteration.
grad_scaler.update()
else:
# Without GradScaler (fallback)
loss.backward()
grad_norm = torch.nn.utils.clip_grad_norm_(
policy.parameters(),
grad_clip_norm,
error_if_nonfinite=False,
)
with lock if lock is not None else nullcontext():
optimizer.step()
grad_scaler.scale(loss).backward()
optimizer.zero_grad()
# Unscale the gradient of the optimizer's assigned params in-place **prior to gradient clipping**.
grad_scaler.unscale_(optimizer)
grad_norm = torch.nn.utils.clip_grad_norm_(
policy.parameters(),
grad_clip_norm,
error_if_nonfinite=False,
)
# Optimizer's gradients are already unscaled, so scaler.step does not unscale them,
# although it still skips optimizer.step() if the gradients contain infs or NaNs.
with lock if lock is not None else nullcontext():
grad_scaler.step(optimizer)
# Updates the scale for next iteration.
grad_scaler.update()
optimizer.zero_grad()
# Step through pytorch scheduler at every batch instead of epoch
if lr_scheduler is not None:
@@ -133,7 +99,7 @@ def update_policy(
policy.update()
train_metrics.loss = loss.item()
train_metrics.grad_norm = grad_norm.item() if isinstance(grad_norm, torch.Tensor) else grad_norm
train_metrics.grad_norm = grad_norm.item()
train_metrics.lr = optimizer.param_groups[0]["lr"]
train_metrics.update_s = time.perf_counter() - start_time
return train_metrics, output_dict
@@ -142,33 +108,8 @@ def update_policy(
@parser.wrap()
def train(cfg: TrainPipelineConfig):
cfg.validate()
accelerator = None
if is_launched_with_accelerate():
import accelerate
# For example pi0 has unused params (last llm block)
from accelerate import DistributedDataParallelKwargs
ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
# accelerator = accelerate.Accelerator(step_scheduler_with_optimizer=False, kwargs_handlers=[ddp_kwargs])
from accelerate import InitProcessGroupKwargs
# Set NCCL timeout (default 30 minutes = 1800 seconds)
nccl_timeout = getattr(cfg, 'nccl_timeout', 1800)
ddp_init_kwargs = InitProcessGroupKwargs(timeout=timedelta(seconds=nccl_timeout)) # FIXME(mshukor): allow user to set timeout. This should be longer than the evaluation time
# Set gradient accumulation steps (default 1)
gradient_accumulation_steps = getattr(cfg, 'gradient_accumulation_steps', 1)
accelerator = accelerate.Accelerator(step_scheduler_with_optimizer=False, gradient_accumulation_steps=gradient_accumulation_steps, kwargs_handlers=[ddp_init_kwargs, ddp_kwargs])
if accelerator is not None and not accelerator.is_main_process:
# Disable duplicate logging on non-main processes
logging.info(f"Setting logging level on non-main process {accelerator.process_index} to WARNING.")
logging.getLogger().setLevel(logging.WARNING)
logging.info(pformat(cfg.to_dict()))
if accelerator and not accelerator.is_main_process:
# Disable logging on non-main processes.
cfg.wandb.enable = False
if cfg.wandb.enable and cfg.wandb.project:
wandb_logger = WandBLogger(cfg)
else:
@@ -202,8 +143,7 @@ def train(cfg: TrainPipelineConfig):
logging.info("Creating optimizer and scheduler")
optimizer, lr_scheduler = make_optimizer_and_scheduler(cfg, policy)
# Only use GradScaler when not using accelerate (accelerate handles mixed precision internally)
grad_scaler = None if accelerator else GradScaler(device.type, enabled=cfg.policy.use_amp)
grad_scaler = GradScaler(device.type, enabled=cfg.policy.use_amp)
step = 0 # number of policy updates (forward + backward + optim)
@@ -245,11 +185,6 @@ def train(cfg: TrainPipelineConfig):
)
dl_iter = cycle(dataloader)
# Prepare models for accelerate if using multi-GPU
if accelerator:
policy, optimizer, dataloader = accelerator.prepare(policy, optimizer, dataloader)
dl_iter = cycle(dataloader)
policy.train()
train_metrics = {
@@ -270,10 +205,9 @@ def train(cfg: TrainPipelineConfig):
batch = next(dl_iter)
train_tracker.dataloading_s = time.perf_counter() - start_time
if not accelerator:
for key in batch:
if isinstance(batch[key], torch.Tensor):
batch[key] = batch[key].to(device, non_blocking=device.type == "cuda")
for key in batch:
if isinstance(batch[key], torch.Tensor):
batch[key] = batch[key].to(device, non_blocking=device.type == "cuda")
train_tracker, output_dict = update_policy(
train_tracker,
@@ -284,7 +218,6 @@ def train(cfg: TrainPipelineConfig):
grad_scaler=grad_scaler,
lr_scheduler=lr_scheduler,
use_amp=cfg.policy.use_amp,
accelerator=accelerator,
)
# Note: eval and checkpoint happens *after* the `step`th training update has completed, so we
@@ -304,17 +237,15 @@ def train(cfg: TrainPipelineConfig):
wandb_logger.log_dict(wandb_log_dict, step)
train_tracker.reset_averages()
if cfg.save_checkpoint and is_saving_step and (not accelerator or accelerator.is_main_process):
if cfg.save_checkpoint and is_saving_step:
logging.info(f"Checkpoint policy after step {step}")
checkpoint_dir = get_step_checkpoint_dir(cfg.output_dir, cfg.steps, step)
# Unwrap model for accelerate
policy_to_save = accelerator.unwrap_model(policy) if accelerator else policy
save_checkpoint(checkpoint_dir, step, cfg, policy_to_save, optimizer, lr_scheduler)
save_checkpoint(checkpoint_dir, step, cfg, policy, optimizer, lr_scheduler)
update_last_checkpoint(checkpoint_dir)
if wandb_logger:
wandb_logger.log_policy(checkpoint_dir)
if cfg.env and is_eval_step and (not accelerator or accelerator.is_main_process):
if cfg.env and is_eval_step:
step_id = get_step_identifier(step, cfg.steps)
logging.info(f"Eval policy at step {step}")
with (
@@ -323,7 +254,7 @@ def train(cfg: TrainPipelineConfig):
):
eval_info = eval_policy(
eval_env,
accelerator.unwrap_model(policy) if accelerator else policy,
policy,
cfg.eval.n_episodes,
videos_dir=cfg.output_dir / "eval" / f"videos_step_{step_id}",
max_episodes_rendered=4,
+1 -1
View File
@@ -18,7 +18,7 @@ Helper to set motor ids and baudrate.
Example:
```shell
lerobot-setup-motors \
python -m lerobot.setup_motors \
--teleop.type=so100_leader \
--teleop.port=/dev/tty.usbmodem575E0031751
```
+2 -2
View File
@@ -18,7 +18,7 @@ Simple script to control a robot from teleoperation.
Example:
```shell
lerobot-teleoperate \
python -m lerobot.teleoperate \
--robot.type=so101_follower \
--robot.port=/dev/tty.usbmodem58760431541 \
--robot.cameras="{ front: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}}" \
@@ -32,7 +32,7 @@ lerobot-teleoperate \
Example teleoperation with bimanual so100:
```shell
lerobot-teleoperate \
python -m lerobot.teleoperate \
--robot.type=bi_so100_follower \
--robot.left_arm_port=/dev/tty.usbmodem5A460851411 \
--robot.right_arm_port=/dev/tty.usbmodem5A460812391 \
@@ -44,7 +44,7 @@ Below is the short version on how to train and run inference/eval:
### Train from scratch
```bash
lerobot-train \
python -m lerobot.scripts.train \
--dataset.repo_id=${HF_USER}/<dataset> \
--policy.type=act \
--output_dir=outputs/train/<desired_policy_repo_id> \
@@ -59,7 +59,7 @@ _Writes checkpoints to `outputs/train/<desired_policy_repo_id>/checkpoints/`._
### Evaluate the policy/run inference
```bash
lerobot-record \
python -m lerobot.record \
--robot.type=so100_follower \
--dataset.repo_id=<hf_user>/eval_<dataset> \
--policy.path=<hf_user>/<desired_policy_repo_id> \
+3 -2
View File
@@ -17,9 +17,10 @@ import time
def busy_wait(seconds):
if platform.system() == "Darwin" or platform.system() == "Windows":
# On Mac and Windows, `time.sleep` is not accurate and we need to use this while loop trick,
if platform.system() == "Darwin":
# On Mac, `time.sleep` is not accurate and we need to use this while loop trick,
# but it consumes CPU cycles.
# TODO(rcadene): find an alternative: from python 11, time.sleep is precise
end_time = time.perf_counter() + seconds
while time.perf_counter() < end_time:
pass
+3 -31
View File
@@ -60,39 +60,11 @@ def load_training_step(save_dir: Path) -> int:
def update_last_checkpoint(checkpoint_dir: Path) -> Path:
import fcntl
import tempfile
import os
last_checkpoint_dir = checkpoint_dir.parent / LAST_CHECKPOINT_LINK
if last_checkpoint_dir.is_symlink():
last_checkpoint_dir.unlink()
relative_target = checkpoint_dir.relative_to(checkpoint_dir.parent)
# Use file locking to prevent race conditions in multi-GPU training
lock_file = checkpoint_dir.parent / ".symlink_lock"
try:
with open(lock_file, 'w') as f:
# Get exclusive lock
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
# Update symlink atomically
if last_checkpoint_dir.exists() or last_checkpoint_dir.is_symlink():
last_checkpoint_dir.unlink()
last_checkpoint_dir.symlink_to(relative_target)
except (OSError, FileExistsError) as e:
# Handle race conditions gracefully - another process may have already updated
if not last_checkpoint_dir.exists():
try:
last_checkpoint_dir.symlink_to(relative_target)
except FileExistsError:
pass # Another process created it, that's fine
finally:
# Clean up lock file
try:
lock_file.unlink()
except FileNotFoundError:
pass
last_checkpoint_dir.symlink_to(relative_target)
def save_checkpoint(
-45
View File
@@ -1,45 +0,0 @@
#!/bin/bash
echo "=== Local 1-GPU Accelerate Training Test with SmolVLA ==="
echo "Environment: multi"
echo "GPU: 1"
echo "Steps: 50 (quick local test)"
echo ""
# Activate conda environment
source /fsx/dana_aubakirova/miniconda3/etc/profile.d/conda.sh
conda activate multi
# Set CUDA environment for 1 GPU
export CUDA_VISIBLE_DEVICES=0
export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128,expandable_segments:True
export TORCH_DISTRIBUTED_DEBUG=OFF
export CUDA_LAUNCH_BLOCKING=0
export TRANSFORMERS_NO_ADVISORY_WARNINGS=1
# Change to working directory
cd /fsx/dana_aubakirova/vla/pr/lerobot
# Set output directory with timestamp
export OUTPUT_DIR="outputs/test_accelerate_1gpu_local_$(date +%Y%m%d_%H%M%S)"
echo "Output directory: $OUTPUT_DIR"
echo ""
# Test accelerate training with 1 GPU
accelerate launch --config_file accelerate_configs/1gpu_config.yaml -m lerobot.scripts.train \
--policy.path=lerobot/smolvla_base \
--policy.push_to_hub=false \
--dataset.repo_id=lerobot/svla_so100_sorting \
--dataset.video_backend=pyav \
--steps=50 \
--save_freq=25 \
--log_freq=5 \
--batch_size=1 \
--num_workers=0 \
--output_dir=$OUTPUT_DIR \
--wandb.enable=false
echo ""
echo "=== Training completed! ==="
echo "Check outputs in: $OUTPUT_DIR"
-67
View File
@@ -1,67 +0,0 @@
#!/bin/bash
#SBATCH --job-name=test_accelerate
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=16
#SBATCH --gres=gpu:2
#SBATCH --time=1:00:00
#SBATCH --partition=hopper-prod
#SBATCH --output=/fsx/dana_aubakirova/vla/logs/test_accelerate_%j.out
#SBATCH --error=/fsx/dana_aubakirova/vla/logs/test_accelerate_%j.err
# Create logs directory if it doesn't exist
mkdir -p /fsx/dana_aubakirova/vla/pr/lerobot/logs
# Activate conda environment
source /fsx/dana_aubakirova/miniconda3/etc/profile.d/conda.sh
conda activate multi
# 2-GPU Test CUDA environment
export CUDA_VISIBLE_DEVICES=0,1
export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128,expandable_segments:True
export TORCH_DISTRIBUTED_DEBUG=OFF
export NCCL_DEBUG=INFO
export CUDA_LAUNCH_BLOCKING=0
export ACCELERATE_USE_FSDP=false
export ACCELERATE_USE_DEEPSPEED=false
export HF_ACCELERATE_DEVICE_MAP=false
export TRANSFORMERS_NO_ADVISORY_WARNINGS=1
export SAFETENSORS_FAST_GPU=1
export HF_HUB_ENABLE_HF_TRANSFER=1
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
export ACCELERATE_TORCH_DEVICE_MAP_AUTO=false
# Change to working directory
cd /fsx/dana_aubakirova/vla/pr/lerobot
echo "=== Testing Accelerate Multi-GPU Training with SmolVLA ==="
echo "Dataset: lerobot/svla_so100_sorting"
echo "GPUs: 2"
echo "Steps: 100 (for quick test)"
echo "Job ID: $SLURM_JOB_ID"
echo ""
# Set output directory with job ID
export OUTPUT_DIR="outputs/test_accelerate_2gpu_job_${SLURM_JOB_ID}"
echo "Output directory: $OUTPUT_DIR"
echo ""
# Test accelerate training
accelerate launch --config_file accelerate_configs/2gpu_config_safe.yaml -m lerobot.scripts.train \
--policy.type=smolvla \
--policy.push_to_hub=false \
--dataset.repo_id=lerobot/svla_so100_sorting \
--dataset.video_backend=pyav \
--steps=100 \
--save_freq=50 \
--log_freq=5 \
--batch_size=2 \
--num_workers=0 \
--output_dir=$OUTPUT_DIR \
--wandb.enable=false
echo ""
echo "=== Training completed! ==="
echo "Check logs and outputs in: $OUTPUT_DIR"
echo "Job ID: $SLURM_JOB_ID"
-45
View File
@@ -1,45 +0,0 @@
#!/bin/bash
echo "=== Direct 1-GPU Training Test with SmolVLA (no accelerate) ==="
echo "Environment: multi"
echo "GPU: 1"
echo "Steps: 50 (quick local test)"
echo ""
# Activate conda environment
source /fsx/dana_aubakirova/miniconda3/etc/profile.d/conda.sh
conda activate multi
# Set CUDA environment for 1 GPU
export CUDA_VISIBLE_DEVICES=0
export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128,expandable_segments:True
export TORCH_DISTRIBUTED_DEBUG=OFF
export CUDA_LAUNCH_BLOCKING=0
export TRANSFORMERS_NO_ADVISORY_WARNINGS=1
# Change to working directory
cd /fsx/dana_aubakirova/vla/pr/lerobot
# Set output directory with timestamp
export OUTPUT_DIR="outputs/test_direct_1gpu_local_$(date +%Y%m%d_%H%M%S)"
echo "Output directory: $OUTPUT_DIR"
echo ""
# Test direct training with 1 GPU (no accelerate)
python -m lerobot.scripts.train \
--policy.path=lerobot/smolvla_base \
--policy.push_to_hub=false \
--dataset.repo_id=lerobot/svla_so100_sorting \
--dataset.video_backend=pyav \
--steps=50 \
--save_freq=25 \
--log_freq=5 \
--batch_size=1 \
--num_workers=0 \
--output_dir=$OUTPUT_DIR \
--wandb.enable=false
echo ""
echo "=== Training completed! ==="
echo "Check outputs in: $OUTPUT_DIR"