From a5727e37b4ff405f9d8de424e07dcc441aa2c82f Mon Sep 17 00:00:00 2001
From: pranavsaroha <sarohapranav@gmail.com>
Date: Mon, 23 Jun 2025 07:49:14 -0700
Subject: [PATCH 1/7] Fix teleop disconnect during eval (#1364)

---
 lerobot/record.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lerobot/record.py b/lerobot/record.py
index acc844ff9..2f443c208 100644
--- a/lerobot/record.py
+++ b/lerobot/record.py
@@ -330,7 +330,8 @@ def record(cfg: RecordConfig) -> LeRobotDataset:
     log_say("Stop recording", cfg.play_sounds, blocking=True)
 
     robot.disconnect()
-    teleop.disconnect()
+    if teleop is not None:
+        teleop.disconnect()
 
     if not is_headless() and listener is not None:
         listener.stop()

From fe88c5942cce222c5463350d999a463b9016cf8c Mon Sep 17 00:00:00 2001
From: Jim Burtoft <39492751+jimburtoft@users.noreply.github.com>
Date: Wed, 25 Jun 2025 08:43:14 -0400
Subject: [PATCH 2/7] There can be only one!! (#1343)

pkg-config appears twice in the package list.

Co-authored-by: Caroline Pascal <caroline8.pascal@gmail.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e98f35663..09398021e 100644
--- a/README.md
+++ b/README.md
@@ -130,7 +130,7 @@ pip install -e .
 ```
 
 > **NOTE:** If you encounter build errors, you may need to install additional dependencies (`cmake`, `build-essential`, and `ffmpeg libs`). On Linux, run:
-`sudo apt-get install cmake build-essential python3-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev pkg-config`. For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/installation.html#bring-your-own-ffmpeg)
+`sudo apt-get install cmake build-essential python3-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev`. For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/installation.html#bring-your-own-ffmpeg)
 
 For simulations, 🤗 LeRobot comes with gymnasium environments that can be installed as extras:
 - [aloha](https://github.com/huggingface/gym-aloha)

From 06450c6777c40d828fa5f81524bf4c3cd5140932 Mon Sep 17 00:00:00 2001
From: Krzysztof Skrzypski
 <18378172+KrzysztofSkrzypski@users.noreply.github.com>
Date: Thu, 26 Jun 2025 19:15:35 +0900
Subject: [PATCH 3/7] update assembly instructions to match outputs from setup
 motors 'python -m lerobot.setup_motors' script (#1384)

---
 lerobot/common/robots/so101_follower/so101.mdx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lerobot/common/robots/so101_follower/so101.mdx b/lerobot/common/robots/so101_follower/so101.mdx
index 5d39a1780..36bd4fc70 100644
--- a/lerobot/common/robots/so101_follower/so101.mdx
+++ b/lerobot/common/robots/so101_follower/so101.mdx
@@ -22,11 +22,11 @@ The follower arm uses 6x STS3215 motors with 1/345 gearing. The leader, however,
 
 | Leader-Arm Axis | Motor | Gear Ratio |
 |-----------------|:-------:|:----------:|
-| Base / Shoulder Yaw | 1 | 1 / 191 |
-| Shoulder Pitch      | 2 | 1 / 345 |
-| Elbow               | 3 | 1 / 191 |
-| Wrist Roll          | 4 | 1 / 147 |
-| Wrist Pitch         | 5 | 1 / 147 |
+| Base / Shoulder Pan | 1 | 1 / 191 |
+| Shoulder Lift       | 2 | 1 / 345 |
+| Elbow Flex          | 3 | 1 / 191 |
+| Wrist Flex          | 4 | 1 / 147 |
+| Wrist Roll          | 5 | 1 / 147 |
 | Gripper             | 6 | 1 / 147 |
 
 ### Clean Parts

From a989c795587d122299275c65a38ffdd0a804b8dc Mon Sep 17 00:00:00 2001
From: Jean-Baptiste Cayrou <jb.cayrou@gmail.com>
Date: Thu, 26 Jun 2025 13:31:32 +0200
Subject: [PATCH 4/7] docs: Fix the SO-100 documentation, the motors
 configuration step should be before the assembly instructions (#1315)

Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com>
---
 .../common/robots/so100_follower/so100.mdx    | 317 +++++++++---------
 1 file changed, 160 insertions(+), 157 deletions(-)

diff --git a/lerobot/common/robots/so100_follower/so100.mdx b/lerobot/common/robots/so100_follower/so100.mdx
index d6149b5b8..5443a687b 100644
--- a/lerobot/common/robots/so100_follower/so100.mdx
+++ b/lerobot/common/robots/so100_follower/so100.mdx
@@ -15,6 +15,166 @@ In addition to these instructions, you need to install the Feetech SDK:
 pip install -e ".[feetech]"
 ```
 
+## Configure the motors
+
+**Note:**
+Unlike the SO-101, the motor connectors are not easily accessible once the arm is assembled, so the configuration step must be done beforehand.
+
+### 1. Find the USB ports associated with each arm
+
+To find the port for each bus servo adapter, run this script:
+```bash
+python lerobot/find_port.py
+```
+
+<hfoptions id="example">
+<hfoption id="Mac">
+
+Example output:
+
+```
+Finding all available ports for the MotorBus.
+['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
+Remove the USB cable from your MotorsBus and press Enter when done.
+
+[...Disconnect corresponding leader or follower arm and press Enter...]
+
+The port of this MotorsBus is /dev/tty.usbmodem575E0032081
+Reconnect the USB cable.
+```
+
+Where the found port is: `/dev/tty.usbmodem575E0032081` corresponding to your leader or follower arm.
+
+</hfoption>
+<hfoption id="Linux">
+
+On Linux, you might need to give access to the USB ports by running:
+```bash
+sudo chmod 666 /dev/ttyACM0
+sudo chmod 666 /dev/ttyACM1
+```
+
+Example output:
+
+```
+Finding all available ports for the MotorBus.
+['/dev/ttyACM0', '/dev/ttyACM1']
+Remove the usb cable from your MotorsBus and press Enter when done.
+
+[...Disconnect corresponding leader or follower arm and press Enter...]
+
+The port of this MotorsBus is /dev/ttyACM1
+Reconnect the USB cable.
+```
+
+Where the found port is: `/dev/ttyACM1` corresponding to your leader or follower arm.
+
+</hfoption>
+</hfoptions>
+
+### 2. Set the motors ids and baudrates
+
+Each motor is identified by a unique id on the bus. When brand new, motors usually come with a default id of `1`. For the communication to work properly between the motors and the controller, we first need to set a unique, different id to each motor. Additionally, the speed at which data is transmitted on the bus is determined by the baudrate. In order to talk to each other, the controller and all the motors need to be configured with the same baudrate.
+
+To that end, we first need to connect to each motor individually with the controller in order to set these. Since we will write these parameters in the non-volatile section of the motors' internal memory (EEPROM), we'll only need to do this once.
+
+If you are repurposing motors from another robot, you will probably also need to perform this step as the ids and baudrate likely won't match.
+
+#### Follower
+
+Connect the usb cable from your computer and the power supply to the follower arm's controller board. Then, run the following command or run the API example with the port you got from the previous step. You'll also need to give your leader arm a name with the `id` parameter.
+
+For a visual reference on how to set the motor ids please refer to [this video](https://huggingface.co/docs/lerobot/en/so101#setup-motors-video) where we follow the process for the SO101 arm.
+
+<hfoptions id="setup_motors">
+<hfoption id="Command">
+
+```bash
+python -m lerobot.setup_motors \
+    --robot.type=so100_follower \
+    --robot.port=/dev/tty.usbmodem585A0076841  # <- paste here the port found at previous step
+```
+</hfoption>
+<hfoption id="API example">
+
+```python
+from lerobot.common.robots.so100_follower import SO100Follower, SO100FollowerConfig
+
+config = SO100FollowerConfig(
+    port="/dev/tty.usbmodem585A0076841",
+    id="my_awesome_follower_arm",
+)
+follower = SO100Follower(config)
+follower.setup_motors()
+```
+</hfoption>
+</hfoptions>
+
+You should see the following instruction
+```
+Connect the controller board to the 'gripper' motor only and press enter.
+```
+
+As instructed, plug the gripper's motor. Make sure it's the only motor connected to the board, and that the motor itself is not yet daisy-chained to any other motor. As you press `[Enter]`, the script will automatically set the id and baudrate for that motor.
+
+<details>
+<summary>Troubleshooting</summary>
+
+  If you get an error at that point, check your cables and make sure they are plugged in properly:
+  <ul>
+    <li>Power supply</li>
+    <li>USB cable between your computer and the controller board</li>
+    <li>The 3-pin cable from the controller board to the motor</li>
+  </ul>
+
+If you are using a Waveshare controller board, make sure that the two jumpers are set on the `B` channel (USB).
+</details>
+
+You should then see the following message:
+```
+'gripper' motor id set to 6
+```
+
+Followed by the next instruction:
+```
+Connect the controller board to the 'wrist_roll' motor only and press enter.
+```
+
+You can disconnect the 3-pin cable from the controller board, but you can leave it connected to the gripper motor on the other end, as it will already be in the right place. Now, plug in another 3-pin cable to the wrist roll motor and connect it to the controller board. As with the previous motor, make sure it is the only motor connected to the board and that the motor itself isn't connected to any other one.
+
+Repeat the operation for each motor as instructed.
+
+> [!TIP]
+> Check your cabling at each step before pressing Enter. For instance, the power supply cable might disconnect as you manipulate the board.
+
+When you are done, the script will simply finish, at which point the motors are ready to be used. You can now plug the 3-pin cable from each motor to the next one, and the cable from the first motor (the 'shoulder pan' with id=1) to the controller board, which can now be attached to the base of the arm.
+
+#### Leader
+Do the same steps for the leader arm.
+
+<hfoptions id="setup_motors">
+<hfoption id="Command">
+```bash
+python -m lerobot.setup_motors \
+    --teleop.type=so100_leader \
+    --teleop.port=/dev/tty.usbmodem575E0031751  # <- paste here the port found at previous step
+```
+</hfoption>
+<hfoption id="API example">
+
+```python
+from lerobot.common.teleoperators.so100_leader import SO100Leader, SO100LeaderConfig
+
+config = SO100LeaderConfig(
+    port="/dev/tty.usbmodem585A0076841",
+    id="my_awesome_leader_arm",
+)
+leader = SO100Leader(config)
+leader.setup_motors()
+```
+</hfoption>
+</hfoptions>
+
 ## Step-by-Step Assembly Instructions
 
 ## Remove the gears of the 6 leader motors
@@ -252,163 +412,6 @@ For the leader configuration, perform **Steps 1–23**. Make sure that you remov
   <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/so100_assembly_28.webp" style="height:300px;"/>
 </div>
 
-## Configure the motors
-
-### 1. Find the USB ports associated with each arm
-
-To find the port for each bus servo adapter, run this script:
-```bash
-python lerobot/find_port.py
-```
-
-<hfoptions id="example">
-<hfoption id="Mac">
-
-Example output:
-
-```
-Finding all available ports for the MotorBus.
-['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
-Remove the USB cable from your MotorsBus and press Enter when done.
-
-[...Disconnect corresponding leader or follower arm and press Enter...]
-
-The port of this MotorsBus is /dev/tty.usbmodem575E0032081
-Reconnect the USB cable.
-```
-
-Where the found port is: `/dev/tty.usbmodem575E0032081` corresponding to your leader or follower arm.
-
-</hfoption>
-<hfoption id="Linux">
-
-On Linux, you might need to give access to the USB ports by running:
-```bash
-sudo chmod 666 /dev/ttyACM0
-sudo chmod 666 /dev/ttyACM1
-```
-
-Example output:
-
-```
-Finding all available ports for the MotorBus.
-['/dev/ttyACM0', '/dev/ttyACM1']
-Remove the usb cable from your MotorsBus and press Enter when done.
-
-[...Disconnect corresponding leader or follower arm and press Enter...]
-
-The port of this MotorsBus is /dev/ttyACM1
-Reconnect the USB cable.
-```
-
-Where the found port is: `/dev/ttyACM1` corresponding to your leader or follower arm.
-
-</hfoption>
-</hfoptions>
-
-### 2. Set the motors ids and baudrates
-
-Each motor is identified by a unique id on the bus. When brand new, motors usually come with a default id of `1`. For the communication to work properly between the motors and the controller, we first need to set a unique, different id to each motor. Additionally, the speed at which data is transmitted on the bus is determined by the baudrate. In order to talk to each other, the controller and all the motors need to be configured with the same baudrate.
-
-To that end, we first need to connect to each motor individually with the controller in order to set these. Since we will write these parameters in the non-volatile section of the motors' internal memory (EEPROM), we'll only need to do this once.
-
-If you are repurposing motors from another robot, you will probably also need to perform this step as the ids and baudrate likely won't match.
-
-#### Follower
-
-Connect the usb cable from your computer and the power supply to the follower arm's controller board. Then, run the following command or run the API example with the port you got from the previous step. You'll also need to give your leader arm a name with the `id` parameter.
-
-For a visual reference on how to set the motor ids please refer to [this video](https://huggingface.co/docs/lerobot/en/so101#setup-motors-video) where we follow the process for the SO101 arm.
-
-<hfoptions id="setup_motors">
-<hfoption id="Command">
-
-```bash
-python -m lerobot.setup_motors \
-    --robot.type=so100_follower \
-    --robot.port=/dev/tty.usbmodem585A0076841  # <- paste here the port found at previous step
-```
-</hfoption>
-<hfoption id="API example">
-
-```python
-from lerobot.common.robots.so100_follower import SO100Follower, SO100FollowerConfig
-
-config = SO100FollowerConfig(
-    port="/dev/tty.usbmodem585A0076841",
-    id="my_awesome_follower_arm",
-)
-follower = SO100Follower(config)
-follower.setup_motors()
-```
-</hfoption>
-</hfoptions>
-
-You should see the following instruction
-```
-Connect the controller board to the 'gripper' motor only and press enter.
-```
-
-As instructed, plug the gripper's motor. Make sure it's the only motor connected to the board, and that the motor itself is not yet daisy-chained to any other motor. As you press `[Enter]`, the script will automatically set the id and baudrate for that motor.
-
-<details>
-<summary>Troubleshooting</summary>
-
-  If you get an error at that point, check your cables and make sure they are plugged in properly:
-  <ul>
-    <li>Power supply</li>
-    <li>USB cable between your computer and the controller board</li>
-    <li>The 3-pin cable from the controller board to the motor</li>
-  </ul>
-
-If you are using a Waveshare controller board, make sure that the two jumpers are set on the `B` channel (USB).
-</details>
-
-You should then see the following message:
-```
-'gripper' motor id set to 6
-```
-
-Followed by the next instruction:
-```
-Connect the controller board to the 'wrist_roll' motor only and press enter.
-```
-
-You can disconnect the 3-pin cable from the controller board, but you can leave it connected to the gripper motor on the other end, as it will already be in the right place. Now, plug in another 3-pin cable to the wrist roll motor and connect it to the controller board. As with the previous motor, make sure it is the only motor connected to the board and that the motor itself isn't connected to any other one.
-
-Repeat the operation for each motor as instructed.
-
-> [!TIP]
-> Check your cabling at each step before pressing Enter. For instance, the power supply cable might disconnect as you manipulate the board.
-
-When you are done, the script will simply finish, at which point the motors are ready to be used. You can now plug the 3-pin cable from each motor to the next one, and the cable from the first motor (the 'shoulder pan' with id=1) to the controller board, which can now be attached to the base of the arm.
-
-#### Leader
-Do the same steps for the leader arm.
-
-<hfoptions id="setup_motors">
-<hfoption id="Command">
-```bash
-python -m lerobot.setup_motors \
-    --teleop.type=so100_leader \
-    --teleop.port=/dev/tty.usbmodem575E0031751  # <- paste here the port found at previous step
-```
-</hfoption>
-<hfoption id="API example">
-
-```python
-from lerobot.common.teleoperators.so100_leader import SO100Leader, SO100LeaderConfig
-
-config = SO100LeaderConfig(
-    port="/dev/tty.usbmodem585A0076841",
-    id="my_awesome_leader_arm",
-)
-leader = SO100Leader(config)
-leader.setup_motors()
-```
-</hfoption>
-</hfoptions>
-
 ## Calibrate
 
 Next, you'll need to calibrate your robot to ensure that the leader and follower arms have the same position values when they are in the same physical position.

From 0b2285d1ec7454bda6af340f547116f2a4bda855 Mon Sep 17 00:00:00 2001
From: Pepijn <138571049+pkooij@users.noreply.github.com>
Date: Thu, 26 Jun 2025 14:36:16 +0200
Subject: [PATCH 5/7] Feat: Improve hub integration (#1382)

* feat(policies): Initial setup to push policies to hub with tags and model card

* feat: add dataset that is used to train

* Add model template summary

* fix: Update link model_card template

* fix: remove print

* fix: change import name

* fix: add model summary in template

* fix: minor text

* fix: comments Lucain

* fix: feedback steven

* fix: restructure push to hub

* fix: remove unneeded changes

* fix: import

* fix: import 2

* Add MANIFEST.in

* fix: feedback pr

* Fix tests

* tests: Add smolvla end-to-end test

* Fix: smolvla test

* fix test name

* fix policy tests

* Add push to hub false policy tests

* Do push to hub cleaner

* fix(ci): add push_to_hub false in tests

---------

Co-authored-by: Steven Palma <steven.palma@huggingface.co>
---
 MANIFEST.in                                   |  2 +
 Makefile                                      | 38 +++++++++
 docs/source/il_robots.mdx                     |  7 +-
 lerobot/common/policies/pretrained.py         | 81 +++++++++++++------
 lerobot/configs/policies.py                   | 10 +++
 lerobot/configs/train.py                      |  5 ++
 lerobot/record.py                             |  5 +-
 lerobot/scripts/push_pretrained.py            | 71 ----------------
 lerobot/scripts/train.py                      |  3 +
 .../templates/lerobot_modelcard_template.md   | 74 +++++++++++++++++
 .../policies/save_policy_to_safetensors.py    |  2 +-
 tests/datasets/test_datasets.py               |  3 +-
 tests/policies/test_policies.py               |  6 +-
 13 files changed, 206 insertions(+), 101 deletions(-)
 create mode 100644 MANIFEST.in
 delete mode 100644 lerobot/scripts/push_pretrained.py
 create mode 100644 lerobot/templates/lerobot_modelcard_template.md

diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 000000000..d2608ca0a
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
+include lerobot/templates/lerobot_modelcard_template.md
+include lerobot/common/datasets/card_template.md
diff --git a/Makefile b/Makefile
index c82483cc3..9457dbe6e 100644
--- a/Makefile
+++ b/Makefile
@@ -40,6 +40,8 @@ test-end-to-end:
 	${MAKE} DEVICE=$(DEVICE) test-diffusion-ete-eval
 	${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-train
 	${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-eval
+	${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-train
+	${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-eval
 
 test-act-ete-train:
 	python lerobot/scripts/train.py \
@@ -48,6 +50,7 @@ test-act-ete-train:
 		--policy.n_action_steps=20 \
 		--policy.chunk_size=20 \
 		--policy.device=$(DEVICE) \
+		--policy.push_to_hub=false \
 		--env.type=aloha \
 		--env.episode_length=5 \
 		--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
@@ -85,6 +88,7 @@ test-diffusion-ete-train:
 		--policy.diffusion_step_embed_dim=32 \
 		--policy.num_inference_steps=10 \
 		--policy.device=$(DEVICE) \
+		--policy.push_to_hub=false \
 		--env.type=pusht \
 		--env.episode_length=5 \
 		--dataset.repo_id=lerobot/pusht \
@@ -114,6 +118,7 @@ test-tdmpc-ete-train:
 	python lerobot/scripts/train.py \
 		--policy.type=tdmpc \
 		--policy.device=$(DEVICE) \
+		--policy.push_to_hub=false \
 		--env.type=xarm \
 		--env.task=XarmLift-v0 \
 		--env.episode_length=5 \
@@ -140,3 +145,36 @@ test-tdmpc-ete-eval:
 		--env.task=XarmLift-v0 \
 		--eval.n_episodes=1 \
 		--eval.batch_size=1
+
+
+test-smolvla-ete-train:
+	python lerobot/scripts/train.py \
+		--policy.type=smolvla \
+		--policy.n_action_steps=20 \
+		--policy.chunk_size=20 \
+		--policy.device=$(DEVICE) \
+		--policy.push_to_hub=false \
+		--env.type=aloha \
+		--env.episode_length=5 \
+		--dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
+		--dataset.image_transforms.enable=true \
+		--dataset.episodes="[0]" \
+		--batch_size=2 \
+		--steps=4 \
+		--eval_freq=2 \
+		--eval.n_episodes=1 \
+		--eval.batch_size=1 \
+		--save_freq=2 \
+		--save_checkpoint=true \
+		--log_freq=1 \
+		--wandb.enable=false \
+		--output_dir=tests/outputs/smolvla/
+
+test-smolvla-ete-eval:
+	python lerobot/scripts/eval.py \
+		--policy.path=tests/outputs/smolvla/checkpoints/000004/pretrained_model \
+		--policy.device=$(DEVICE) \
+		--env.type=aloha \
+		--env.episode_length=5 \
+		--eval.n_episodes=1 \
+		--eval.batch_size=1
diff --git a/docs/source/il_robots.mdx b/docs/source/il_robots.mdx
index f3b4b1a25..fb99797ee 100644
--- a/docs/source/il_robots.mdx
+++ b/docs/source/il_robots.mdx
@@ -255,7 +255,8 @@ python lerobot/scripts/train.py \
   --output_dir=outputs/train/act_so101_test \
   --job_name=act_so101_test \
   --policy.device=cuda \
-  --wandb.enable=true
+  --wandb.enable=true \
+  --policy.repo_id=${HF_USER}/my_policy
 ```
 
 Let's explain the command:
@@ -273,6 +274,10 @@ python lerobot/scripts/train.py \
   --resume=true
 ```
 
+If you do not want to push your model to the hub after training use `--policy.push_to_hub=false`.
+
+Additionally you can provide extra `tags` or specify a `license` for your model or make the model repo `private` by adding this: `--policy.private=true --policy.tags=\[ppo,rl\] --policy.license=mit`
+
 #### Train using Collab
 If your local computer doesn't have a powerful GPU you could utilize Google Collab to train your model by following the [ACT training notebook](./notebooks#training-act).
 
diff --git a/lerobot/common/policies/pretrained.py b/lerobot/common/policies/pretrained.py
index da4ef1572..58eef9baf 100644
--- a/lerobot/common/policies/pretrained.py
+++ b/lerobot/common/policies/pretrained.py
@@ -14,12 +14,14 @@
 import abc
 import logging
 import os
+from importlib.resources import files
 from pathlib import Path
-from typing import Type, TypeVar
+from tempfile import TemporaryDirectory
+from typing import List, Type, TypeVar
 
 import packaging
 import safetensors
-from huggingface_hub import hf_hub_download
+from huggingface_hub import HfApi, ModelCard, ModelCardData, hf_hub_download
 from huggingface_hub.constants import SAFETENSORS_SINGLE_FILE
 from huggingface_hub.errors import HfHubHTTPError
 from safetensors.torch import load_model as load_model_as_safetensor
@@ -28,20 +30,10 @@ from torch import Tensor, nn
 
 from lerobot.common.utils.hub import HubMixin
 from lerobot.configs.policies import PreTrainedConfig
+from lerobot.configs.train import TrainPipelineConfig
 
 T = TypeVar("T", bound="PreTrainedPolicy")
 
-DEFAULT_POLICY_CARD = """
----
-# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
-# Doc / guide: https://huggingface.co/docs/hub/model-cards
-{{ card_data }}
----
-
-This policy has been pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot):
-- Docs: {{ docs_url | default("[More Information Needed]", true) }}
-"""
-
 
 class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
     """
@@ -150,16 +142,6 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
             safetensors.torch.load_model(model, model_file, strict=strict, device=map_location)
         return model
 
-    # def generate_model_card(self, *args, **kwargs) -> ModelCard:
-    #     card = ModelCard.from_template(
-    #         card_data=self._hub_mixin_info.model_card_data,
-    #         template_str=self._hub_mixin_info.model_card_template,
-    #         repo_url=self._hub_mixin_info.repo_url,
-    #         docs_url=self._hub_mixin_info.docs_url,
-    #         **kwargs,
-    #     )
-    #     return card
-
     @abc.abstractmethod
     def get_optim_params(self) -> dict:
         """
@@ -197,3 +179,56 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
         with caching.
         """
         raise NotImplementedError
+
+    def push_model_to_hub(
+        self,
+        cfg: TrainPipelineConfig,
+    ):
+        api = HfApi()
+        repo_id = api.create_repo(
+            repo_id=self.config.repo_id, private=self.config.private, exist_ok=True
+        ).repo_id
+
+        # Push the files to the repo in a single commit
+        with TemporaryDirectory(ignore_cleanup_errors=True) as tmp:
+            saved_path = Path(tmp) / repo_id
+
+            self.save_pretrained(saved_path)  # Calls _save_pretrained and stores model tensors
+
+            card = self.generate_model_card(
+                cfg.dataset.repo_id, self.config.type, self.config.license, self.config.tags
+            )
+            card.save(str(saved_path / "README.md"))
+
+            cfg.save_pretrained(saved_path)  # Calls _save_pretrained and stores train config
+
+            commit_info = api.upload_folder(
+                repo_id=repo_id,
+                repo_type="model",
+                folder_path=saved_path,
+                commit_message="Upload policy weights, train config and readme",
+                allow_patterns=["*.safetensors", "*.json", "*.yaml", "*.md"],
+                ignore_patterns=["*.tmp", "*.log"],
+            )
+
+            logging.info(f"Model pushed to {commit_info.repo_url.url}")
+
+    def generate_model_card(
+        self, dataset_repo_id: str, model_type: str, license: str | None, tags: List[str] | None
+    ) -> ModelCard:
+        base_model = "lerobot/smolvla_base" if model_type == "smolvla" else None  # Set a base model
+
+        card_data = ModelCardData(
+            license=license or "apache-2.0",
+            library_name="lerobot",
+            pipeline_tag="robotics",
+            tags=list(set(tags or []).union({"robotics", "lerobot", model_type})),
+            model_name=model_type,
+            datasets=dataset_repo_id,
+            base_model=base_model,
+        )
+
+        template_card = files("lerobot.templates").joinpath("lerobot_modelcard_template.md").read_text()
+        card = ModelCard.from_template(card_data, template_str=template_card)
+        card.validate()
+        return card
diff --git a/lerobot/configs/policies.py b/lerobot/configs/policies.py
index 1302db1fa..9e7f3dd56 100644
--- a/lerobot/configs/policies.py
+++ b/lerobot/configs/policies.py
@@ -60,6 +60,16 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
     # automatic gradient scaling is used.
     use_amp: bool = False
 
+    push_to_hub: bool = True
+    repo_id: str | None = None
+
+    # Upload on private repository on the Hugging Face hub.
+    private: bool | None = None
+    # Add tags to your policy on the hub.
+    tags: list[str] | None = None
+    # Add tags to your policy on the hub.
+    license: str | None = None
+
     def __post_init__(self):
         self.pretrained_path = None
         if not self.device or not is_torch_device_available(self.device):
diff --git a/lerobot/configs/train.py b/lerobot/configs/train.py
index 96a460bdf..377fb8a9b 100644
--- a/lerobot/configs/train.py
+++ b/lerobot/configs/train.py
@@ -116,6 +116,11 @@ class TrainPipelineConfig(HubMixin):
             self.optimizer = self.policy.get_optimizer_preset()
             self.scheduler = self.policy.get_scheduler_preset()
 
+        if self.policy.push_to_hub and not self.policy.repo_id:
+            raise ValueError(
+                "'policy.repo_id' argument missing. Please specify it to push the model to the hub."
+            )
+
     @classmethod
     def __get_path_fields__(cls) -> list[str]:
         """This enables the parser to load config from the policy using `--policy.path=local/dir`"""
diff --git a/lerobot/record.py b/lerobot/record.py
index 2f443c208..766cfb35b 100644
--- a/lerobot/record.py
+++ b/lerobot/record.py
@@ -65,7 +65,10 @@ from lerobot.common.robots import (  # noqa: F401
 from lerobot.common.teleoperators import (  # noqa: F401
     Teleoperator,
     TeleoperatorConfig,
+    koch_leader,
     make_teleoperator_from_config,
+    so100_leader,
+    so101_leader,
 )
 from lerobot.common.utils.control_utils import (
     init_keyboard_listener,
@@ -84,8 +87,6 @@ from lerobot.common.utils.visualization_utils import _init_rerun
 from lerobot.configs import parser
 from lerobot.configs.policies import PreTrainedConfig
 
-from .common.teleoperators import koch_leader, so100_leader, so101_leader  # noqa: F401
-
 
 @dataclass
 class DatasetRecordConfig:
diff --git a/lerobot/scripts/push_pretrained.py b/lerobot/scripts/push_pretrained.py
deleted file mode 100644
index e3c683f96..000000000
--- a/lerobot/scripts/push_pretrained.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Once you have trained a policy with our training script (lerobot/scripts/train.py), use this script to push it
-to the hub.
-
-Example:
-
-```bash
-python lerobot/scripts/push_pretrained.py \
-    --pretrained_path=outputs/train/act_aloha_sim_transfer_cube_human/checkpoints/last/pretrained_model \
-    --repo_id=lerobot/act_aloha_sim_transfer_cube_human
-```
-"""
-
-from dataclasses import dataclass
-from pathlib import Path
-
-import draccus
-from huggingface_hub import HfApi
-
-
-@dataclass
-class PushPreTrainedConfig:
-    pretrained_path: Path
-    repo_id: str
-    branch: str | None = None
-    private: bool = False
-    exist_ok: bool = False
-
-
-@draccus.wrap()
-def main(cfg: PushPreTrainedConfig):
-    hub_api = HfApi()
-    hub_api.create_repo(
-        repo_id=cfg.repo_id,
-        private=cfg.private,
-        repo_type="model",
-        exist_ok=cfg.exist_ok,
-    )
-    if cfg.branch:
-        hub_api.create_branch(
-            repo_id=cfg.repo_id,
-            branch=cfg.branch,
-            repo_type="model",
-            exist_ok=cfg.exist_ok,
-        )
-
-    hub_api.upload_folder(
-        repo_id=cfg.repo_id,
-        folder_path=cfg.pretrained_path,
-        repo_type="model",
-        revision=cfg.branch,
-    )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py
index 0de247be9..bdb17dae5 100644
--- a/lerobot/scripts/train.py
+++ b/lerobot/scripts/train.py
@@ -282,6 +282,9 @@ def train(cfg: TrainPipelineConfig):
         eval_env.close()
     logging.info("End of training")
 
+    if cfg.policy.push_to_hub:
+        policy.push_model_to_hub(cfg)
+
 
 if __name__ == "__main__":
     init_logging()
diff --git a/lerobot/templates/lerobot_modelcard_template.md b/lerobot/templates/lerobot_modelcard_template.md
new file mode 100644
index 000000000..ca5c182ac
--- /dev/null
+++ b/lerobot/templates/lerobot_modelcard_template.md
@@ -0,0 +1,74 @@
+---
+# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
+# Doc / guide: https://huggingface.co/docs/hub/model-cards
+{{ card_data }}
+---
+
+# Model Card for {{ model_name | default("Model ID", true) }}
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+{% if model_name == "smolvla" %}
+[SmolVLA](https://huggingface.co/papers/2506.01844) is a compact, efficient vision-language-action model that achieves competitive performance at reduced computational costs and can be deployed on consumer-grade hardware.
+{% elif model_name == "act" %}
+[Action Chunking with Transformers (ACT)](https://huggingface.co/papers/2304.13705) is an imitation-learning method that predicts short action chunks instead of single steps. It learns from teleoperated data and often achieves high success rates.
+{% elif model_name == "tdmpc" %}
+[TD-MPC](https://huggingface.co/papers/2203.04955) combines model-free and model-based approaches to improve sample efficiency and performance in continuous control tasks by using a learned latent dynamics model and terminal value function.
+{% elif model_name == "diffusion" %}
+[Diffusion Policy](https://huggingface.co/papers/2303.04137) treats visuomotor control as a generative diffusion process, producing smooth, multi-step action trajectories that excel at contact-rich manipulation.
+{% elif model_name == "vqbet" %}
+[VQ-BET](https://huggingface.co/papers/2403.03181) combines vector-quantised action tokens with Behaviour Transformers to discretise control and achieve data-efficient imitation across diverse skills.
+{% elif model_name == "pi0" %}
+[Pi0](https://huggingface.co/papers/2410.24164) is a generalist vision-language-action transformer that converts multimodal observations and text instructions into robot actions for zero-shot task transfer.
+{% elif model_name == "pi0fast" %}
+[Pi0-Fast](https://huggingface.co/papers/2501.09747) is a variant of Pi0 that uses a new tokenization method called FAST, which enables training of an autoregressive vision-language-action policy for high-frequency robotic tasks with improved performance and reduced training time.
+{% elif model_name == "sac" %}
+[Soft Actor-Critic (SAC)](https://huggingface.co/papers/1801.01290) is an entropy-regularised actor-critic algorithm offering stable, sample-efficient learning in continuous-control environments.
+{% elif model_name == "reward_classifier" %}
+A reward classifier is a lightweight neural network that scores observations or trajectories for task success, providing a learned reward signal or offline evaluation when explicit rewards are unavailable.
+{% else %}
+_Model type not recognized — please update this template._
+{% endif %}
+
+This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
+See the full documentation at [LeRobot Docs](https://huggingface.co/docs/lerobot/index).
+
+---
+
+## How to Get Started with the Model
+
+For a complete walkthrough, see the [training guide](https://huggingface.co/docs/lerobot/il_robots#train-a-policy).
+Below is the short version on how to train and run inference/eval:
+
+### Train from scratch
+
+```bash
+python lerobot/scripts/train.py \
+  --dataset.repo_id=${HF_USER}/<dataset> \
+  --policy.type=act \
+  --output_dir=outputs/train/<desired_policy_repo_id> \
+  --job_name=lerobot_training \
+  --policy.device=cuda \
+  --policy.repo_id=${HF_USER}/<desired_policy_repo_id>
+  --wandb.enable=true
+```
+
+*Writes checkpoints to `outputs/train/<desired_policy_repo_id>/checkpoints/`.*
+
+### Evaluate the policy/run inference
+
+```bash
+python -m lerobot.record \
+  --robot.type=so100_follower \
+  --dataset.repo_id=<hf_user>/eval_<dataset> \
+  --policy.path=<hf_user>/<desired_policy_repo_id> \
+  --episodes=10
+```
+
+Prefix the dataset repo with **eval\_** and supply `--policy.path` pointing to a local or hub checkpoint.
+
+---
+
+## Model Details
+
+* **License:** {{ license | default("\[More Information Needed]", true) }}
diff --git a/tests/artifacts/policies/save_policy_to_safetensors.py b/tests/artifacts/policies/save_policy_to_safetensors.py
index 106f0dc04..785f296c7 100644
--- a/tests/artifacts/policies/save_policy_to_safetensors.py
+++ b/tests/artifacts/policies/save_policy_to_safetensors.py
@@ -32,7 +32,7 @@ def get_policy_stats(ds_repo_id: str, policy_name: str, policy_kwargs: dict):
     train_cfg = TrainPipelineConfig(
         # TODO(rcadene, aliberts): remove dataset download
         dataset=DatasetConfig(repo_id=ds_repo_id, episodes=[0]),
-        policy=make_policy_config(policy_name, **policy_kwargs),
+        policy=make_policy_config(policy_name, push_to_hub=False, **policy_kwargs),
     )
     train_cfg.validate()  # Needed for auto-setting some parameters
 
diff --git a/tests/datasets/test_datasets.py b/tests/datasets/test_datasets.py
index 55a417c30..b4fca77c1 100644
--- a/tests/datasets/test_datasets.py
+++ b/tests/datasets/test_datasets.py
@@ -338,8 +338,9 @@ def test_factory(env_name, repo_id, policy_name):
         # TODO(rcadene, aliberts): remove dataset download
         dataset=DatasetConfig(repo_id=repo_id, episodes=[0]),
         env=make_env_config(env_name),
-        policy=make_policy_config(policy_name),
+        policy=make_policy_config(policy_name, push_to_hub=False),
     )
+    cfg.validate()
 
     dataset = make_dataset(cfg)
     delta_timestamps = dataset.delta_timestamps
diff --git a/tests/policies/test_policies.py b/tests/policies/test_policies.py
index dff5975ae..1b40c663b 100644
--- a/tests/policies/test_policies.py
+++ b/tests/policies/test_policies.py
@@ -142,9 +142,10 @@ def test_policy(ds_repo_id, env_name, env_kwargs, policy_name, policy_kwargs):
     train_cfg = TrainPipelineConfig(
         # TODO(rcadene, aliberts): remove dataset download
         dataset=DatasetConfig(repo_id=ds_repo_id, episodes=[0]),
-        policy=make_policy_config(policy_name, **policy_kwargs),
+        policy=make_policy_config(policy_name, push_to_hub=False, **policy_kwargs),
         env=make_env_config(env_name, **env_kwargs),
     )
+    train_cfg.validate()
 
     # Check that we can make the policy object.
     dataset = make_dataset(train_cfg)
@@ -213,7 +214,7 @@ def test_act_backbone_lr():
     cfg = TrainPipelineConfig(
         # TODO(rcadene, aliberts): remove dataset download
         dataset=DatasetConfig(repo_id="lerobot/aloha_sim_insertion_scripted", episodes=[0]),
-        policy=make_policy_config("act", optimizer_lr=0.01, optimizer_lr_backbone=0.001),
+        policy=make_policy_config("act", optimizer_lr=0.01, optimizer_lr_backbone=0.001, push_to_hub=False),
     )
     cfg.validate()  # Needed for auto-setting some parameters
 
@@ -415,6 +416,7 @@ def test_backward_compatibility(ds_repo_id: str, policy_name: str, policy_kwargs
     https://github.com/huggingface/lerobot/pull/1127.
 
     """
+
     # NOTE: ACT policy has different randomness, after PyTorch 2.7.0
     if policy_name == "act" and version.parse(torch.__version__) < version.parse("2.7.0"):
         pytest.skip(f"Skipping act policy test with PyTorch {torch.__version__}. Requires PyTorch >= 2.7.0")

From f3d931e1b2e4b5b68a988c413fcc74825c02dd01 Mon Sep 17 00:00:00 2001
From: Francesco Capuano <74058581+fracapuano@users.noreply.github.com>
Date: Fri, 27 Jun 2025 10:19:19 +0200
Subject: [PATCH 6/7] Add direct access to action chunks (#1020)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: sharing predicted chunk with user

* [pre-commit.ci] pre-commit autoupdate (#1011)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* Revert "[pre-commit.ci] pre-commit autoupdate" (#1025)

* fix(ci): Pin draccus (<0.10.0) and torch (<2.7) to fix pipeline (#1022)

Co-authored-by: imstevenpmwork <steven.palma@huggingface.co>
Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com>

* fix(ci): Pin `torchcodec` (==0.2.1) to fix pipeline temporarly (#1030)

* Update tutorial (#1021)

Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com>

* Add description motor order SO-101 leader (#1051)

* feat(encoding): switching to PyAV for ffmpeg related tasks (#983)

* feat(docs): Add new docs build process (#1046)

Co-authored-by: Mishig Davaadorj <dmishig@gmail.com>
Co-authored-by: Steven Palma <steven.palma@huggingface.co>

* Docs: adapt text + fix video code (#1064)

* Fix typos (#1070)

* docs: minor corrections and clean-up (#1089)

* Update 10_use_so100.md; use diff syntax (#944)

Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com>

* Update 12_use_so101.md (#1081)

Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com>

* bug fix for #1071 When --display_data=true, Failed running control_robot. (#1073)

* Add editable -e for feetech install command (#1133)

* Fix: emptying action queue between resets (#1117)

* fix: typos and grammar (#1148)

* Update README.md (#1160)

* Update README.md (#1163)

* [Fix]  Unpin torch beyond 2.6.0 & torchcodec beyond 0.2.1  (#1127)

* (hotfix): nightly CI by clipping pymunk version below 7.0.0 (#1182)

* [pre-commit.ci] pre-commit autoupdate (#1048)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Simon Alibert <simon.alibert@huggingface.co>

* Add SmolVLA (#1175)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: fracapuano <francesco.capuano@huggingface.co>
Co-authored-by: Steven Palma <imstevenpmwork@ieee.org>
Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com>
Co-authored-by: Remi <remi.cadene@huggingface.co>

* Fix SmolVLA loss not sent to wandb (#1198)

* Hardware API redesign (#777)

Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com>
Co-authored-by: Steven Palma <imstevenpmwork@ieee.org>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Steven Palma <steven.palma@huggingface.co>
Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com>
Co-authored-by: Pepijn <pepijn@huggingface.co>

* fix(smolvla): update record.py, fix populate_queues and remove unused dependencies (#1208)

* replaced OBS_ROBOT with OBS_STATE constant (#1211)

* Fix test_teleoperate (#1216)

* Fix LeKiwi example (#1217)

* Fix smolVLA dependencies (#1218)

* fix(pyserial): adding pyserial dependency to global ones (#1219)

* Update SmolVLA README.md (#1228)

* Fix unable to set camera width/height to non-default (#1225)

* Update tutorial link (#1250)

* update KochFollower.get_observation() so it returns same observation structure as SO101 (#1248)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* [pre-commit.ci] pre-commit autoupdate (#1185)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com>

* Proposal for fix for enter_pressed on Windows (#1230)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com>

* fix: update pi0 dependency version constraint (#1247)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* Match motor names with ids lekiwi (#1261)

* fix issues: checkpoints keys mismatch and 'task' tokenisation in smolvla (#1256)

Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com>
Co-authored-by: Simon Alibert <simon.alibert@huggingface.co>

* fix(docs): update realsense documentation (#1268)

* Use HF Papers (#1120)

* Skip normalization parameters in load_smolvla (#1274)

* fix(record): no teleop needed when running with policy (#1284)

* Port HIL SERL (#644)

Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co>
Co-authored-by: Eugene Mironov <helper2424@gmail.com>
Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com>
Co-authored-by: Ke Wang <superwk1017@gmail.com>
Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com>
Co-authored-by: imstevenpmwork <steven.palma@huggingface.co>
Co-authored-by: Simon Alibert <simon.alibert@huggingface.co>

* fix(docs): SmolVLA fine-tuning getting started (#1201)

Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com>
Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz>
Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com>
Co-authored-by: Francesco Capuano <francesco_capuano@aol.com>
Co-authored-by: Steven Palma <steven.palma@huggingface.co>

* chore(teleop): print calibration path saved (#1286)

* chore(dependencies): add gamepad support with pygame and hidapi (#1287)

* Robot integration tutorial (#1285)

* fix(docs): update send_feedback docstrings

* Add sim tutorial, fix lekiwi motor config, add notebook links (#1275)

Co-authored-by: AdilZouitine <adilzouitinegm@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co>
Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com>
Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com>
Co-authored-by: Eugene Mironov <helper2424@gmail.com>
Co-authored-by: imstevenpmwork <steven.palma@huggingface.co>
Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com>
Co-authored-by: Steven Palma <imstevenpmwork@ieee.org>

* Fixes on robot integration tutorial (#1290)

* Add keyboard teleop device to control the end effector robot  (#1289)

* Improve type hints (#1293)

* fix(record): no teleop arg in reset environment (#1294)

* `learner.py` import so101_leader instead of so100 (#1295)

Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com>

* Fixing `PI0` Policy (#1297)

* `gym_manipulator.py` Remove None value action_intervention of BaseLeaderTeleoperator (#1299)

* (chore): incorrect resume parameter in recording documentation (#1301)

* Update lekiwi.mdx  (#1229)

* bump `pi0` and `hil` transformers version (#1298)

* docs: fix imitation learning robots docs command (#1308)

* fix(benchmarks): remove .numpy() from frame in benchmark script (#1354)

* add smolvla to the supported policies to run tests (:

* add: chunk-level access for the policy

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add: smolvla in availables

* remove: smolvla from library supported policies

* fix: change env for training, xarm is broken as of now

* add: predict_action_chunk to all supported policies

* fix: add robot type constants

* add: predict action chunk in base policy class

* restore original Makefile

* fix: minor

* fix: dict keys come from lerobot/constants

* fix: improve act encapsulation, properly supporting temporal ensembling

* fix: smolvla action chunking

* fix: very minor, but very annoying

* fix: minor

* fix minor naming

Co-authored-by: Steven Palma <imstevenpmwork@ieee.org>
Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com>

* fix: refactoring inference for single actions and chunks into different components

* fix: minor

* fix: temporal ensembling

* fix: moving populate queues out of modular component for batch preparation

* fix: minor for CI

* fix: smovla debug

* fix: reward classifier, maybe the last policy lacking?

---------

Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com>
Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com>
Co-authored-by: imstevenpmwork <steven.palma@huggingface.co>
Co-authored-by: Pepijn <138571049+pkooij@users.noreply.github.com>
Co-authored-by: Caroline Pascal <caroline8.pascal@gmail.com>
Co-authored-by: Mishig Davaadorj <dmishig@gmail.com>
Co-authored-by: omahs <73983677+omahs@users.noreply.github.com>
Co-authored-by: CharlesCNorton <135471798+CharlesCNorton@users.noreply.github.com>
Co-authored-by: masato-ka <jp6uzv@gmail.com>
Co-authored-by: Ragnar <rodiondenmark@gmail.com>
Co-authored-by: mshukor <mustafa.shukor97@gmail.com>
Co-authored-by: Simon Alibert <simon.alibert@huggingface.co>
Co-authored-by: Steven Palma <imstevenpmwork@ieee.org>
Co-authored-by: Dana Aubakirova <118912928+danaaubakirova@users.noreply.github.com>
Co-authored-by: Remi <remi.cadene@huggingface.co>
Co-authored-by: Ben Zhang <5977478+ben-z@users.noreply.github.com>
Co-authored-by: Pepijn <pepijn@huggingface.co>
Co-authored-by: Dhruva <51377003+utterwqlnut@users.noreply.github.com>
Co-authored-by: Daisuke Sato <tiryoh@gmail.com>
Co-authored-by: Sarunas Kalade <sarunas.kalade@amd.com>
Co-authored-by: koenvanwijk <koenvanwijk@users.noreply.github.com>
Co-authored-by: Yushun Xiang <73413365+YushunXiang@users.noreply.github.com>
Co-authored-by: danaaubakirova <d.aubakirova@alumni.edu.kz>
Co-authored-by: Quentin Gallouédec <45557362+qgallouedec@users.noreply.github.com>
Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co>
Co-authored-by: Eugene Mironov <helper2424@gmail.com>
Co-authored-by: s1lent4gnt <kmeftah.khalil@gmail.com>
Co-authored-by: Ke Wang <superwk1017@gmail.com>
Co-authored-by: Yoel Chornton <yoel.chornton@gmail.com>
Co-authored-by: Michel Aractingi <michel.aractingi@gmail.com>
Co-authored-by: tidely <43219534+tidely@users.noreply.github.com>
Co-authored-by: David <17435126+DavidLMS@users.noreply.github.com>
---
 lerobot/common/constants.py                   |  1 +
 lerobot/common/policies/act/modeling_act.py   | 36 +++++----
 .../policies/diffusion/modeling_diffusion.py  | 36 +++++----
 lerobot/common/policies/pi0/modeling_pi0.py   |  5 ++
 .../policies/pi0fast/modeling_pi0fast.py      |  5 ++
 lerobot/common/policies/pretrained.py         |  9 +++
 lerobot/common/policies/sac/modeling_sac.py   |  5 ++
 .../sac/reward_model/modeling_classifier.py   |  7 ++
 .../policies/smolvla/modeling_smolvla.py      | 69 ++++++++++------
 .../common/policies/tdmpc/modeling_tdmpc.py   | 80 ++++++++++---------
 .../common/policies/vqbet/modeling_vqbet.py   | 32 ++++----
 11 files changed, 176 insertions(+), 109 deletions(-)

diff --git a/lerobot/common/constants.py b/lerobot/common/constants.py
index 990f2aa1e..30777239e 100644
--- a/lerobot/common/constants.py
+++ b/lerobot/common/constants.py
@@ -25,6 +25,7 @@ ACTION = "action"
 REWARD = "next.reward"
 
 ROBOTS = "robots"
+ROBOT_TYPE = "robot_type"
 TELEOPERATORS = "teleoperators"
 
 # files & directories
diff --git a/lerobot/common/policies/act/modeling_act.py b/lerobot/common/policies/act/modeling_act.py
index e7e74bf38..122066577 100644
--- a/lerobot/common/policies/act/modeling_act.py
+++ b/lerobot/common/policies/act/modeling_act.py
@@ -33,6 +33,7 @@ from torch import Tensor, nn
 from torchvision.models._utils import IntermediateLayerGetter
 from torchvision.ops.misc import FrozenBatchNorm2d
 
+from lerobot.common.constants import ACTION, OBS_IMAGES
 from lerobot.common.policies.act.configuration_act import ACTConfig
 from lerobot.common.policies.normalize import Normalize, Unnormalize
 from lerobot.common.policies.pretrained import PreTrainedPolicy
@@ -114,46 +115,49 @@ class ACTPolicy(PreTrainedPolicy):
         environment. It works by managing the actions in a queue and only calling `select_actions` when the
         queue is empty.
         """
-        self.eval()
+        self.eval()  # keeping the policy in eval mode as it could be set to train mode while queue is consumed
 
-        batch = self.normalize_inputs(batch)
-        if self.config.image_features:
-            batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
-            batch["observation.images"] = [batch[key] for key in self.config.image_features]
-
-        # If we are doing temporal ensembling, do online updates where we keep track of the number of actions
-        # we are ensembling over.
         if self.config.temporal_ensemble_coeff is not None:
-            actions = self.model(batch)[0]  # (batch_size, chunk_size, action_dim)
-            actions = self.unnormalize_outputs({"action": actions})["action"]
+            actions = self.predict_action_chunk(batch)
             action = self.temporal_ensembler.update(actions)
             return action
 
         # Action queue logic for n_action_steps > 1. When the action_queue is depleted, populate it by
         # querying the policy.
         if len(self._action_queue) == 0:
-            actions = self.model(batch)[0][:, : self.config.n_action_steps]
-
-            # TODO(rcadene): make _forward return output dictionary?
-            actions = self.unnormalize_outputs({"action": actions})["action"]
+            actions = self.predict_action_chunk(batch)[:, : self.config.n_action_steps]
 
             # `self.model.forward` returns a (batch_size, n_action_steps, action_dim) tensor, but the queue
             # effectively has shape (n_action_steps, batch_size, *), hence the transpose.
             self._action_queue.extend(actions.transpose(0, 1))
         return self._action_queue.popleft()
 
+    @torch.no_grad
+    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+        """Predict a chunk of actions given environment observations."""
+        self.eval()
+
+        batch = self.normalize_inputs(batch)
+        if self.config.image_features:
+            batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
+            batch[OBS_IMAGES] = [batch[key] for key in self.config.image_features]
+
+        actions = self.model(batch)[0]
+        actions = self.unnormalize_outputs({ACTION: actions})[ACTION]
+        return actions
+
     def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict]:
         """Run the batch through the model and compute the loss for training or validation."""
         batch = self.normalize_inputs(batch)
         if self.config.image_features:
             batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
-            batch["observation.images"] = [batch[key] for key in self.config.image_features]
+            batch[OBS_IMAGES] = [batch[key] for key in self.config.image_features]
 
         batch = self.normalize_targets(batch)
         actions_hat, (mu_hat, log_sigma_x2_hat) = self.model(batch)
 
         l1_loss = (
-            F.l1_loss(batch["action"], actions_hat, reduction="none") * ~batch["action_is_pad"].unsqueeze(-1)
+            F.l1_loss(batch[ACTION], actions_hat, reduction="none") * ~batch["action_is_pad"].unsqueeze(-1)
         ).mean()
 
         loss_dict = {"l1_loss": l1_loss.item()}
diff --git a/lerobot/common/policies/diffusion/modeling_diffusion.py b/lerobot/common/policies/diffusion/modeling_diffusion.py
index 446e2cb6e..038136d07 100644
--- a/lerobot/common/policies/diffusion/modeling_diffusion.py
+++ b/lerobot/common/policies/diffusion/modeling_diffusion.py
@@ -33,7 +33,7 @@ from diffusers.schedulers.scheduling_ddim import DDIMScheduler
 from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
 from torch import Tensor, nn
 
-from lerobot.common.constants import OBS_ENV_STATE, OBS_STATE
+from lerobot.common.constants import ACTION, OBS_ENV_STATE, OBS_IMAGES, OBS_STATE
 from lerobot.common.policies.diffusion.configuration_diffusion import DiffusionConfig
 from lerobot.common.policies.normalize import Normalize, Unnormalize
 from lerobot.common.policies.pretrained import PreTrainedPolicy
@@ -99,6 +99,18 @@ class DiffusionPolicy(PreTrainedPolicy):
         if self.config.env_state_feature:
             self._queues["observation.environment_state"] = deque(maxlen=self.config.n_obs_steps)
 
+    @torch.no_grad
+    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+        """Predict a chunk of actions given environment observations."""
+        # stack n latest observations from the queue
+        batch = {k: torch.stack(list(self._queues[k]), dim=1) for k in batch if k in self._queues}
+        actions = self.diffusion.generate_actions(batch)
+
+        # TODO(rcadene): make above methods return output dictionary?
+        actions = self.unnormalize_outputs({ACTION: actions})[ACTION]
+
+        return actions
+
     @torch.no_grad
     def select_action(self, batch: dict[str, Tensor]) -> Tensor:
         """Select a single action given environment observations.
@@ -124,23 +136,15 @@ class DiffusionPolicy(PreTrainedPolicy):
         batch = self.normalize_inputs(batch)
         if self.config.image_features:
             batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
-            batch["observation.images"] = torch.stack(
-                [batch[key] for key in self.config.image_features], dim=-4
-            )
+            batch[OBS_IMAGES] = torch.stack([batch[key] for key in self.config.image_features], dim=-4)
         # Note: It's important that this happens after stacking the images into a single key.
         self._queues = populate_queues(self._queues, batch)
 
-        if len(self._queues["action"]) == 0:
-            # stack n latest observations from the queue
-            batch = {k: torch.stack(list(self._queues[k]), dim=1) for k in batch if k in self._queues}
-            actions = self.diffusion.generate_actions(batch)
+        if len(self._queues[ACTION]) == 0:
+            actions = self.predict_action_chunk(batch)
+            self._queues[ACTION].extend(actions.transpose(0, 1))
 
-            # TODO(rcadene): make above methods return output dictionary?
-            actions = self.unnormalize_outputs({"action": actions})["action"]
-
-            self._queues["action"].extend(actions.transpose(0, 1))
-
-        action = self._queues["action"].popleft()
+        action = self._queues[ACTION].popleft()
         return action
 
     def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, None]:
@@ -148,9 +152,7 @@ class DiffusionPolicy(PreTrainedPolicy):
         batch = self.normalize_inputs(batch)
         if self.config.image_features:
             batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
-            batch["observation.images"] = torch.stack(
-                [batch[key] for key in self.config.image_features], dim=-4
-            )
+            batch[OBS_IMAGES] = torch.stack([batch[key] for key in self.config.image_features], dim=-4)
         batch = self.normalize_targets(batch)
         loss = self.diffusion.compute_loss(batch)
         # no output_dict so returning None
diff --git a/lerobot/common/policies/pi0/modeling_pi0.py b/lerobot/common/policies/pi0/modeling_pi0.py
index 1d8a50559..97e66a272 100644
--- a/lerobot/common/policies/pi0/modeling_pi0.py
+++ b/lerobot/common/policies/pi0/modeling_pi0.py
@@ -260,6 +260,11 @@ class PI0Policy(PreTrainedPolicy):
     def get_optim_params(self) -> dict:
         return self.parameters()
 
+    @torch.no_grad
+    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+        """Predict a chunk of actions given environment observations."""
+        raise NotImplementedError("Currently not implemented for PI0")
+
     @torch.no_grad
     def select_action(self, batch: dict[str, Tensor], noise: Tensor | None = None) -> Tensor:
         """Select a single action given environment observations.
diff --git a/lerobot/common/policies/pi0fast/modeling_pi0fast.py b/lerobot/common/policies/pi0fast/modeling_pi0fast.py
index 7102bdded..dbf5266b1 100644
--- a/lerobot/common/policies/pi0fast/modeling_pi0fast.py
+++ b/lerobot/common/policies/pi0fast/modeling_pi0fast.py
@@ -192,6 +192,11 @@ class PI0FASTPolicy(PreTrainedPolicy):
             actions[:, :, motor_idx] = aloha_gripper_from_angular_inv(actions[:, :, motor_idx])
         return actions
 
+    @torch.no_grad
+    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+        """Predict a chunk of actions given environment observations."""
+        raise NotImplementedError("Currently not implemented for PI0FAST")
+
     @torch.no_grad
     def select_action(self, batch: dict[str, Tensor]) -> Tensor:
         """Select a single action given environment observations.
diff --git a/lerobot/common/policies/pretrained.py b/lerobot/common/policies/pretrained.py
index 58eef9baf..bc9276d0c 100644
--- a/lerobot/common/policies/pretrained.py
+++ b/lerobot/common/policies/pretrained.py
@@ -171,6 +171,15 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC):
         """
         raise NotImplementedError
 
+    @abc.abstractmethod
+    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+        """Returns the action chunk (for action chunking policies) for a given observation, potentially in batch mode.
+
+        Child classes using action chunking should use this method within `select_action` to form the action chunk
+        cached for selection.
+        """
+        raise NotImplementedError
+
     @abc.abstractmethod
     def select_action(self, batch: dict[str, Tensor]) -> Tensor:
         """Return one action to run in the environment (potentially in batch mode).
diff --git a/lerobot/common/policies/sac/modeling_sac.py b/lerobot/common/policies/sac/modeling_sac.py
index b588115ea..1ca469351 100644
--- a/lerobot/common/policies/sac/modeling_sac.py
+++ b/lerobot/common/policies/sac/modeling_sac.py
@@ -76,6 +76,11 @@ class SACPolicy(
         """Reset the policy"""
         pass
 
+    @torch.no_grad
+    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+        """Predict a chunk of actions given environment observations."""
+        raise NotImplementedError("SACPolicy does not support action chunking. It returns single actions!")
+
     @torch.no_grad()
     def select_action(self, batch: dict[str, Tensor]) -> Tensor:
         """Select action for inference/evaluation"""
diff --git a/lerobot/common/policies/sac/reward_model/modeling_classifier.py b/lerobot/common/policies/sac/reward_model/modeling_classifier.py
index f537e3aef..7fec67f1a 100644
--- a/lerobot/common/policies/sac/reward_model/modeling_classifier.py
+++ b/lerobot/common/policies/sac/reward_model/modeling_classifier.py
@@ -308,6 +308,13 @@ class Classifier(PreTrainedPolicy):
         """
         raise NotImplementedError("Reward classifiers do not select actions")
 
+    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+        """
+        This method is required by PreTrainedPolicy but not used for reward classifiers.
+        The reward classifier is not an actor and does not produce action chunks.
+        """
+        raise NotImplementedError("Reward classifiers do not predict action chunks")
+
     def reset(self):
         """
         This method is required by PreTrainedPolicy but not used for reward classifiers.
diff --git a/lerobot/common/policies/smolvla/modeling_smolvla.py b/lerobot/common/policies/smolvla/modeling_smolvla.py
index 5e0a9622e..361999844 100644
--- a/lerobot/common/policies/smolvla/modeling_smolvla.py
+++ b/lerobot/common/policies/smolvla/modeling_smolvla.py
@@ -383,6 +383,45 @@ class SmolVLAPolicy(PreTrainedPolicy):
     def get_optim_params(self) -> dict:
         return self.parameters()
 
+    def _get_action_chunk(self, batch: dict[str, Tensor], noise: Tensor | None = None) -> Tensor:
+        for k in batch:
+            if k in self._queues:
+                batch[k] = torch.stack(list(self._queues[k]), dim=1)
+
+        images, img_masks = self.prepare_images(batch)
+        state = self.prepare_state(batch)
+        lang_tokens, lang_masks = self.prepare_language(batch)
+
+        actions = self.model.sample_actions(images, img_masks, lang_tokens, lang_masks, state, noise=noise)
+
+        # Unpad actions
+        original_action_dim = self.config.action_feature.shape[0]
+        actions = actions[:, :, :original_action_dim]
+
+        actions = self.unnormalize_outputs({ACTION: actions})[ACTION]
+
+        if self.config.adapt_to_pi_aloha:
+            actions = self._pi_aloha_encode_actions(actions)
+
+        return actions
+
+    def _prepare_batch(self, batch: dict[str, Tensor]) -> dict[str, Tensor]:
+        if self.config.adapt_to_pi_aloha:
+            batch[OBS_STATE] = self._pi_aloha_decode_state(batch[OBS_STATE])
+
+        batch = self.normalize_inputs(batch)
+
+        return batch
+
+    def predict_action_chunk(self, batch: dict[str, Tensor], noise: Tensor | None = None) -> Tensor:
+        self.eval()
+
+        batch = self._prepare_batch(batch)
+        self._queues = populate_queues(self._queues, batch, exclude_keys=[ACTION])
+
+        actions = self._get_action_chunk(batch, noise)
+        return actions
+
     @torch.no_grad
     def select_action(self, batch: dict[str, Tensor], noise: Tensor | None = None) -> Tensor:
         """Select a single action given environment observations.
@@ -392,38 +431,18 @@ class SmolVLAPolicy(PreTrainedPolicy):
         queue is empty.
         """
         self.eval()
-
-        if self.config.adapt_to_pi_aloha:
-            batch[OBS_STATE] = self._pi_aloha_decode_state(batch[OBS_STATE])
-
-        batch = self.normalize_inputs(batch)
-
+        batch = self._prepare_batch(batch)
         self._queues = populate_queues(self._queues, batch, exclude_keys=[ACTION])
+
         # Action queue logic for n_action_steps > 1. When the action_queue is depleted, populate it by
         # querying the policy.
         if len(self._queues[ACTION]) == 0:
-            for k in batch:
-                if k in self._queues:
-                    batch[k] = torch.stack(list(self._queues[k]), dim=1)
-            images, img_masks = self.prepare_images(batch)
-            state = self.prepare_state(batch)
-            lang_tokens, lang_masks = self.prepare_language(batch)
+            actions = self._get_action_chunk(batch, noise)
 
-            actions = self.model.sample_actions(
-                images, img_masks, lang_tokens, lang_masks, state, noise=noise
-            )
-            # Unpad actions
-            original_action_dim = self.config.action_feature.shape[0]
-            actions = actions[:, :, :original_action_dim]
-
-            actions = self.unnormalize_outputs({"action": actions})["action"]
-
-            if self.config.adapt_to_pi_aloha:
-                actions = self._pi_aloha_encode_actions(actions)
-
-            # `self.model.forward` returns a (batch_size, n_action_steps, action_dim) tensor, but the queue
+            # `self.predict_action_chunk` returns a (batch_size, n_action_steps, action_dim) tensor, but the queue
             # effectively has shape (n_action_steps, batch_size, *), hence the transpose.
             self._queues[ACTION].extend(actions.transpose(0, 1)[: self.config.n_action_steps])
+
         return self._queues[ACTION].popleft()
 
     def forward(self, batch: dict[str, Tensor], noise=None, time=None) -> dict[str, Tensor]:
diff --git a/lerobot/common/policies/tdmpc/modeling_tdmpc.py b/lerobot/common/policies/tdmpc/modeling_tdmpc.py
index 476e6decd..4bb564f8f 100644
--- a/lerobot/common/policies/tdmpc/modeling_tdmpc.py
+++ b/lerobot/common/policies/tdmpc/modeling_tdmpc.py
@@ -35,7 +35,7 @@ import torch.nn as nn
 import torch.nn.functional as F  # noqa: N812
 from torch import Tensor
 
-from lerobot.common.constants import OBS_ENV_STATE, OBS_STATE
+from lerobot.common.constants import ACTION, OBS_ENV_STATE, OBS_IMAGE, OBS_STATE, REWARD
 from lerobot.common.policies.normalize import Normalize, Unnormalize
 from lerobot.common.policies.pretrained import PreTrainedPolicy
 from lerobot.common.policies.tdmpc.configuration_tdmpc import TDMPCConfig
@@ -110,52 +110,58 @@ class TDMPCPolicy(PreTrainedPolicy):
         # CEM for the next step.
         self._prev_mean: torch.Tensor | None = None
 
+    @torch.no_grad
+    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+        """Predict a chunk of actions given environment observations."""
+        batch = {key: torch.stack(list(self._queues[key]), dim=1) for key in batch if key in self._queues}
+
+        # Remove the time dimensions as it is not handled yet.
+        for key in batch:
+            assert batch[key].shape[1] == 1
+            batch[key] = batch[key][:, 0]
+
+        # NOTE: Order of observations matters here.
+        encode_keys = []
+        if self.config.image_features:
+            encode_keys.append(OBS_IMAGE)
+        if self.config.env_state_feature:
+            encode_keys.append(OBS_ENV_STATE)
+        encode_keys.append(OBS_STATE)
+        z = self.model.encode({k: batch[k] for k in encode_keys})
+        if self.config.use_mpc:  # noqa: SIM108
+            actions = self.plan(z)  # (horizon, batch, action_dim)
+        else:
+            # Plan with the policy (π) alone. This always returns one action so unsqueeze to get a
+            # sequence dimension like in the MPC branch.
+            actions = self.model.pi(z).unsqueeze(0)
+
+        actions = torch.clamp(actions, -1, +1)
+
+        actions = self.unnormalize_outputs({ACTION: actions})[ACTION]
+        return actions
+
     @torch.no_grad()
     def select_action(self, batch: dict[str, Tensor]) -> Tensor:
         """Select a single action given environment observations."""
         batch = self.normalize_inputs(batch)
         if self.config.image_features:
             batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
-            batch["observation.image"] = batch[next(iter(self.config.image_features))]
+            batch[OBS_IMAGE] = batch[next(iter(self.config.image_features))]
 
         self._queues = populate_queues(self._queues, batch)
 
         # When the action queue is depleted, populate it again by querying the policy.
-        if len(self._queues["action"]) == 0:
-            batch = {key: torch.stack(list(self._queues[key]), dim=1) for key in batch if key in self._queues}
-
-            # Remove the time dimensions as it is not handled yet.
-            for key in batch:
-                assert batch[key].shape[1] == 1
-                batch[key] = batch[key][:, 0]
-
-            # NOTE: Order of observations matters here.
-            encode_keys = []
-            if self.config.image_features:
-                encode_keys.append("observation.image")
-            if self.config.env_state_feature:
-                encode_keys.append("observation.environment_state")
-            encode_keys.append("observation.state")
-            z = self.model.encode({k: batch[k] for k in encode_keys})
-            if self.config.use_mpc:  # noqa: SIM108
-                actions = self.plan(z)  # (horizon, batch, action_dim)
-            else:
-                # Plan with the policy (π) alone. This always returns one action so unsqueeze to get a
-                # sequence dimension like in the MPC branch.
-                actions = self.model.pi(z).unsqueeze(0)
-
-            actions = torch.clamp(actions, -1, +1)
-
-            actions = self.unnormalize_outputs({"action": actions})["action"]
+        if len(self._queues[ACTION]) == 0:
+            actions = self.predict_action_chunk(batch)
 
             if self.config.n_action_repeats > 1:
                 for _ in range(self.config.n_action_repeats):
-                    self._queues["action"].append(actions[0])
+                    self._queues[ACTION].append(actions[0])
             else:
                 # Action queue is (n_action_steps, batch_size, action_dim), so we transpose the action.
-                self._queues["action"].extend(actions[: self.config.n_action_steps])
+                self._queues[ACTION].extend(actions[: self.config.n_action_steps])
 
-        action = self._queues["action"].popleft()
+        action = self._queues[ACTION].popleft()
         return action
 
     @torch.no_grad()
@@ -312,7 +318,7 @@ class TDMPCPolicy(PreTrainedPolicy):
         batch = self.normalize_inputs(batch)
         if self.config.image_features:
             batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
-            batch["observation.image"] = batch[next(iter(self.config.image_features))]
+            batch[OBS_IMAGE] = batch[next(iter(self.config.image_features))]
         batch = self.normalize_targets(batch)
 
         info = {}
@@ -322,15 +328,15 @@ class TDMPCPolicy(PreTrainedPolicy):
             if isinstance(batch[key], torch.Tensor) and batch[key].ndim > 1:
                 batch[key] = batch[key].transpose(1, 0)
 
-        action = batch["action"]  # (t, b, action_dim)
-        reward = batch["next.reward"]  # (t, b)
+        action = batch[ACTION]  # (t, b, action_dim)
+        reward = batch[REWARD]  # (t, b)
         observations = {k: v for k, v in batch.items() if k.startswith("observation.")}
 
         # Apply random image augmentations.
         if self.config.image_features and self.config.max_random_shift_ratio > 0:
-            observations["observation.image"] = flatten_forward_unflatten(
+            observations[OBS_IMAGE] = flatten_forward_unflatten(
                 partial(random_shifts_aug, max_random_shift_ratio=self.config.max_random_shift_ratio),
-                observations["observation.image"],
+                observations[OBS_IMAGE],
             )
 
         # Get the current observation for predicting trajectories, and all future observations for use in
@@ -340,7 +346,7 @@ class TDMPCPolicy(PreTrainedPolicy):
             current_observation[k] = observations[k][0]
             next_observations[k] = observations[k][1:]
         horizon, batch_size = next_observations[
-            "observation.image" if self.config.image_features else "observation.environment_state"
+            OBS_IMAGE if self.config.image_features else OBS_ENV_STATE
         ].shape[:2]
 
         # Run latent rollout using the latent dynamics model and policy model.
diff --git a/lerobot/common/policies/vqbet/modeling_vqbet.py b/lerobot/common/policies/vqbet/modeling_vqbet.py
index 44006a5b2..a76bea2ab 100644
--- a/lerobot/common/policies/vqbet/modeling_vqbet.py
+++ b/lerobot/common/policies/vqbet/modeling_vqbet.py
@@ -27,6 +27,7 @@ import torch.nn.functional as F  # noqa: N812
 import torchvision
 from torch import Tensor, nn
 
+from lerobot.common.constants import ACTION, OBS_IMAGES, OBS_STATE
 from lerobot.common.policies.normalize import Normalize, Unnormalize
 from lerobot.common.policies.pretrained import PreTrainedPolicy
 from lerobot.common.policies.utils import get_device_from_parameters, get_output_shape, populate_queues
@@ -118,11 +119,18 @@ class VQBeTPolicy(PreTrainedPolicy):
         queues are populated during rollout of the policy, they contain the n latest observations and actions
         """
         self._queues = {
-            "observation.images": deque(maxlen=self.config.n_obs_steps),
-            "observation.state": deque(maxlen=self.config.n_obs_steps),
-            "action": deque(maxlen=self.config.action_chunk_size),
+            OBS_IMAGES: deque(maxlen=self.config.n_obs_steps),
+            OBS_STATE: deque(maxlen=self.config.n_obs_steps),
+            ACTION: deque(maxlen=self.config.action_chunk_size),
         }
 
+    @torch.no_grad
+    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+        batch = {k: torch.stack(list(self._queues[k]), dim=1) for k in batch if k in self._queues}
+        actions = self.vqbet(batch, rollout=True)[:, : self.config.action_chunk_size]
+        actions = self.unnormalize_outputs({ACTION: actions})[ACTION]
+        return actions
+
     @torch.no_grad
     def select_action(self, batch: dict[str, Tensor]) -> Tensor:
         """Select a single action given environment observations.
@@ -144,23 +152,19 @@ class VQBeTPolicy(PreTrainedPolicy):
                 stacklevel=1,
             )
 
-        if len(self._queues["action"]) == 0:
-            batch = {k: torch.stack(list(self._queues[k]), dim=1) for k in batch if k in self._queues}
-            actions = self.vqbet(batch, rollout=True)[:, : self.config.action_chunk_size]
-
-            # the dimension of returned action is (batch_size, action_chunk_size, action_dim)
-            actions = self.unnormalize_outputs({"action": actions})["action"]
+        if len(self._queues[ACTION]) == 0:
+            actions = self.predict_action_chunk(batch)
             # since the data in the action queue's dimension is (action_chunk_size, batch_size, action_dim), we transpose the action and fill the queue
-            self._queues["action"].extend(actions.transpose(0, 1))
+            self._queues[ACTION].extend(actions.transpose(0, 1))
 
-        action = self._queues["action"].popleft()
+        action = self._queues[ACTION].popleft()
         return action
 
     def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict]:
         """Run the batch through the model and compute the loss for training or validation."""
         batch = self.normalize_inputs(batch)
         batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
-        batch["observation.images"] = torch.stack([batch[key] for key in self.config.image_features], dim=-4)
+        batch[OBS_IMAGES] = torch.stack([batch[key] for key in self.config.image_features], dim=-4)
         batch = self.normalize_targets(batch)
         # VQ-BeT discretizes action using VQ-VAE before training BeT (please refer to section 3.2 in the VQ-BeT paper https://huggingface.co/papers/2403.03181)
         if not self.vqbet.action_head.vqvae_model.discretized.item():
@@ -168,7 +172,7 @@ class VQBeTPolicy(PreTrainedPolicy):
             # n_different_codes: how many of the total possible VQ codes are being used in single batch (how many of them have at least one encoder embedding as a nearest neighbor). This can be at most `vqvae_n_embed * number of layers of RVQ (=2)`.
             # n_different_combinations: how many different code combinations are being used out of all possible combinations in single batch. This can be at most `vqvae_n_embed ^ number of layers of RVQ (=2)` (hint consider the RVQ as a decision tree).
             loss, n_different_codes, n_different_combinations, recon_l1_error = (
-                self.vqbet.action_head.discretize(self.config.n_vqvae_training_steps, batch["action"])
+                self.vqbet.action_head.discretize(self.config.n_vqvae_training_steps, batch[ACTION])
             )
             return loss, {
                 "n_different_codes": n_different_codes,
@@ -404,7 +408,7 @@ class VQBeTModel(nn.Module):
             )
         # else, it calculate overall loss (bin prediction loss, and offset loss)
         else:
-            output = batch["action"][:, self.select_target_actions_indices]
+            output = batch[ACTION][:, self.select_target_actions_indices]
             loss = self.action_head.loss_fn(action_head_output, output, reduction="mean")
             return action_head_output, loss
 

From 2f9ba4e2cc97a2be93a2e3a8eb27f498a4eb4861 Mon Sep 17 00:00:00 2001
From: Pepijn <138571049+pkooij@users.noreply.github.com>
Date: Fri, 27 Jun 2025 11:57:24 +0200
Subject: [PATCH 7/7] Add api examples IL docs (#1391)

* feat: add api examples for record, replay, eval for il

* fix: Add typings utils.py

* fix: Add inference to text eval

* fix: Add placeholders dataset and policy repo_ids

* fix: Improve text

* fix: Add type to 3rd ;)

* chore(docs): update API examples for replay, eval and record

---------

Co-authored-by: Steven Palma <steven.palma@huggingface.co>
---
 docs/source/il_robots.mdx     | 233 +++++++++++++++++++++++++++++++++-
 lerobot/common/utils/utils.py |   4 +-
 2 files changed, 230 insertions(+), 7 deletions(-)

diff --git a/docs/source/il_robots.mdx b/docs/source/il_robots.mdx
index fb99797ee..8bc16b753 100644
--- a/docs/source/il_robots.mdx
+++ b/docs/source/il_robots.mdx
@@ -154,7 +154,10 @@ HF_USER=$(huggingface-cli whoami | head -n 1)
 echo $HF_USER
 ```
 
-Now you can record a dataset. To record 2 episodes and upload your dataset to the hub, execute this command tailored to the SO101.
+Now you can record a dataset. To record 5 episodes and upload your dataset to the hub, adapt the code below for your robot and execute the command or API example.
+
+<hfoptions id="record">
+<hfoption id="Command">
 ```bash
 python -m lerobot.record \
     --robot.type=so101_follower \
@@ -166,9 +169,111 @@ python -m lerobot.record \
     --teleop.id=my_awesome_leader_arm \
     --display_data=true \
     --dataset.repo_id=${HF_USER}/record-test \
-    --dataset.num_episodes=2 \
+    --dataset.num_episodes=5 \
     --dataset.single_task="Grab the black cube"
 ```
+</hfoption>
+<hfoption id="API example">
+```python
+from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.datasets.utils import hw_to_dataset_features
+from lerobot.common.robots.so100_follower import SO100Follower, SO100FollowerConfig
+from lerobot.common.teleoperators.so100_leader.config_so100_leader import SO100LeaderConfig
+from lerobot.common.teleoperators.so100_leader.so100_leader import SO100Leader
+from lerobot.common.utils.control_utils import init_keyboard_listener
+from lerobot.common.utils.utils import log_say
+from lerobot.common.utils.visualization_utils import _init_rerun
+from lerobot.record import record_loop
+
+NUM_EPISODES = 5
+FPS = 30
+EPISODE_TIME_SEC = 60
+RESET_TIME_SEC = 10
+TASK_DESCRIPTION = "My task description"
+
+# Create the robot and teleoperator configurations
+camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)}
+robot_config = SO100FollowerConfig(
+    port="/dev/tty.usbmodem58760434471", id="my_awesome_follower_arm", cameras=camera_config
+)
+teleop_config = SO100LeaderConfig(port="/dev/tty.usbmodem585A0077581", id="my_awesome_leader_arm")
+
+# Initialize the robot and teleoperator
+robot = SO100Follower(robot_config)
+teleop = SO100Leader(teleop_config)
+
+# Configure the dataset features
+action_features = hw_to_dataset_features(robot.action_features, "action")
+obs_features = hw_to_dataset_features(robot.observation_features, "observation")
+dataset_features = {**action_features, **obs_features}
+
+# Create the dataset
+dataset = LeRobotDataset.create(
+    repo_id="<hf_username>/<dataset_repo_id>",
+    fps=FPS,
+    features=dataset_features,
+    robot_type=robot.name,
+    use_videos=True,
+    image_writer_threads=4,
+)
+
+# Initialize the keyboard listener and rerun visualization
+_, events = init_keyboard_listener()
+_init_rerun(session_name="recording")
+
+# Connect the robot and teleoperator
+robot.connect()
+teleop.connect()
+
+for episode_idx in range(NUM_EPISODES):
+    log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}")
+
+    record_loop(
+        robot=robot,
+        events=events,
+        fps=FPS,
+        teleop=teleop,
+        dataset=dataset,
+        control_time_s=EPISODE_TIME_SEC,
+        single_task=TASK_DESCRIPTION,
+        display_data=True,
+    )
+
+    # Reset the environment if not stopping or re-recording
+    if not events["stop_recording"] and (episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]):
+        log_say("Reset the environment")
+        record_loop(
+            robot=robot,
+            events=events,
+            fps=FPS,
+            teleop=teleop,
+            control_time_s=RESET_TIME_SEC,
+            single_task=TASK_DESCRIPTION,
+            display_data=True,
+        )
+
+    if events["rerecord_episode"]:
+        log_say("Re-recording episode")
+        events["rerecord_episode"] = False
+        events["exit_early"] = False
+        dataset.clear_episode_buffer()
+        continue
+
+    dataset.save_episode()
+
+    if events["stop_recording"]:
+        log_say("Exiting session")
+        break
+
+# Clean up
+log_say("Stop recording")
+robot.disconnect()
+teleop.disconnect()
+dataset.push_to_hub()
+```
+</hfoption>
+</hfoptions>
 
 #### Dataset upload
 Locally, your dataset is stored in this folder: `~/.cache/huggingface/lerobot/{repo-id}`. At the end of data recording, your dataset will be uploaded on your Hugging Face page (e.g. https://huggingface.co/datasets/cadene/so101_test) that you can obtain by running:
@@ -233,7 +338,10 @@ echo ${HF_USER}/so101_test
 
 A useful feature is the `replay` function, which allows you to replay any episode that you've recorded or episodes from any dataset out there. This function helps you test the repeatability of your robot's actions and assess transferability across robots of the same model.
 
-You can replay the first episode on your robot with:
+You can replay the first episode on your robot with either the command below or with the API example:
+
+<hfoptions id="replay">
+<hfoption id="Command">
 ```bash
 python -m lerobot.replay \
     --robot.type=so101_follower \
@@ -242,6 +350,42 @@ python -m lerobot.replay \
     --dataset.repo_id=${HF_USER}/record-test \
     --dataset.episode=0 # choose the episode you want to replay
 ```
+</hfoption>
+<hfoption id="API example">
+```python
+import time
+
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.robots.so100_follower.config_so100_follower import SO100FollowerConfig
+from lerobot.common.robots.so100_follower.so100_follower import SO100Follower
+from lerobot.common.utils.robot_utils import busy_wait
+from lerobot.common.utils.utils import log_say
+
+episode_idx = 0
+
+robot_config = SO100FollowerConfig(port="/dev/tty.usbmodem58760434471", id="my_awesome_follower_arm")
+
+robot = SO100Follower(robot_config)
+robot.connect()
+
+dataset = LeRobotDataset("<hf_username>/<dataset_repo_id>", episodes=[episode_idx])
+actions = dataset.hf_dataset.select_columns("action")
+
+log_say(f"Replaying episode {episode_idx}")
+for idx in range(dataset.num_frames):
+    t0 = time.perf_counter()
+
+    action = {
+        name: float(actions[idx]["action"][i]) for i, name in enumerate(dataset.features["action"]["names"])
+    }
+    robot.send_action(action)
+
+    busy_wait(1.0 / dataset.fps - (time.perf_counter() - t0))
+
+robot.disconnect()
+```
+</hfoption>
+</hfoptions>
 
 Your robot should replicate movements similar to those you recorded. For example, check out [this video](https://x.com/RemiCadene/status/1793654950905680090) where we use `replay` on a Aloha robot from [Trossen Robotics](https://www.trossenrobotics.com).
 
@@ -296,9 +440,12 @@ huggingface-cli upload ${HF_USER}/act_so101_test${CKPT} \
   outputs/train/act_so101_test/checkpoints/${CKPT}/pretrained_model
 ```
 
-## Evaluate your policy
+## Run inference and evaluate your policy
 
-You can use the `record` script from [`lerobot/record.py`](https://github.com/huggingface/lerobot/blob/main/lerobot/record.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes:
+You can use the `record` script from [`lerobot/record.py`](https://github.com/huggingface/lerobot/blob/main/lerobot/record.py) with a policy checkpoint as input, to run inference and evaluate your policy. For instance, run this command or API example to run inference and record 10 evaluation episodes:
+
+<hfoptions id="eval">
+<hfoption id="Command">
 ```bash
 python -m lerobot.record  \
   --robot.type=so100_follower \
@@ -314,6 +461,82 @@ python -m lerobot.record  \
   # --teleop.id=my_awesome_leader_arm \
   --policy.path=${HF_USER}/my_policy
 ```
+</hfoption>
+<hfoption id="API example">
+```python
+from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.datasets.utils import hw_to_dataset_features
+from lerobot.common.policies.act.modeling_act import ACTPolicy
+from lerobot.common.robots.so100_follower.config_so100_follower import SO100FollowerConfig
+from lerobot.common.robots.so100_follower.so100_follower import SO100Follower
+from lerobot.common.utils.control_utils import init_keyboard_listener
+from lerobot.common.utils.utils import log_say
+from lerobot.common.utils.visualization_utils import _init_rerun
+from lerobot.record import record_loop
+
+NUM_EPISODES = 5
+FPS = 30
+EPISODE_TIME_SEC = 60
+TASK_DESCRIPTION = "My task description"
+
+# Create the robot configuration
+camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)}
+robot_config = SO100FollowerConfig(
+    port="/dev/tty.usbmodem58760434471", id="my_awesome_follower_arm", cameras=camera_config
+)
+
+# Initialize the robot
+robot = SO100Follower(robot_config)
+
+# Initialize the policy
+policy = ACTPolicy.from_pretrained("<hf_username>/<my_policy_repo_id>")
+
+# Configure the dataset features
+action_features = hw_to_dataset_features(robot.action_features, "action")
+obs_features = hw_to_dataset_features(robot.observation_features, "observation")
+dataset_features = {**action_features, **obs_features}
+
+# Create the dataset
+dataset = LeRobotDataset.create(
+    repo_id="<hf_username>/eval_<dataset_repo_id>",
+    fps=FPS,
+    features=dataset_features,
+    robot_type=robot.name,
+    use_videos=True,
+    image_writer_threads=4,
+)
+
+# Initialize the keyboard listener and rerun visualization
+_, events = init_keyboard_listener()
+_init_rerun(session_name="recording")
+
+# Connect the robot
+robot.connect()
+
+for episode_idx in range(NUM_EPISODES):
+    log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}")
+
+    # Run the policy inference loop
+    record_loop(
+        robot=robot,
+        events=events,
+        fps=FPS,
+        policy=policy,
+        dataset=dataset,
+        control_time_s=EPISODE_TIME_SEC,
+        single_task=TASK_DESCRIPTION,
+        display_data=True,
+    )
+
+    dataset.save_episode()
+
+# Clean up
+robot.disconnect()
+dataset.push_to_hub()
+```
+</hfoption>
+</hfoptions>
 
 As you can see, it's almost the same command as previously used to record your training dataset. Two things changed:
 1. There is an additional `--control.policy.path` argument which indicates the path to your policy checkpoint with  (e.g. `outputs/train/eval_act_so101_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `${HF_USER}/act_so101_test`).
diff --git a/lerobot/common/utils/utils.py b/lerobot/common/utils/utils.py
index cba65ba45..6d9c0338b 100644
--- a/lerobot/common/utils/utils.py
+++ b/lerobot/common/utils/utils.py
@@ -184,7 +184,7 @@ def capture_timestamp_utc():
     return datetime.now(timezone.utc)
 
 
-def say(text, blocking=False):
+def say(text: str, blocking: bool = False):
     system = platform.system()
 
     if system == "Darwin":
@@ -212,7 +212,7 @@ def say(text, blocking=False):
         subprocess.Popen(cmd, creationflags=subprocess.CREATE_NO_WINDOW if system == "Windows" else 0)
 
 
-def log_say(text, play_sounds, blocking=False):
+def log_say(text: str, play_sounds: bool = True, blocking: bool = False):
     logging.info(text)
 
     if play_sounds: