add umi example

This commit is contained in:
Pepijn
2026-04-01 13:48:06 +02:00
parent 15934d8d08
commit 5ac3e568f1
4 changed files with 528 additions and 0 deletions
@@ -0,0 +1,161 @@
#!/usr/bin/env python
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Add ``observation.state`` to an existing UMI LeRobot dataset and recompute
stats for pi0 training with relative EE actions.
UMI datasets already contain ``action`` (absolute EE pose from SLAM) and
images. This script derives ``observation.state`` from the action column
and recomputes statistics with relative action stats.
State-Action Offset:
UMI SLAM produces a single trajectory of EE poses stored as ``action``.
We derive ``observation.state`` from the same trajectory with a
configurable offset:
state[t] = action[t - STATE_ACTION_OFFSET]
With offset=0, state equals action at the same timestep. With offset=1,
state is the previous timestep's action — representing where the gripper
*arrived* (the result of the previous command), which is what the robot
knows at decision time. Offset=1 is the typical UMI convention.
For the first frame(s) of each episode where t < offset, we use the
earliest available action (action[0]) as state.
After adding state, train with standard lerobot-train:
lerobot-train \\
--dataset.repo_id=<your_dataset> \\
--policy.type=pi0 \\
--policy.use_relative_actions=true \\
--policy.relative_exclude_joints='["gripper"]' \\
--policy.pretrained_path=lerobot/pi0_base
Usage:
python convert_umi_dataset.py
"""
from __future__ import annotations
import logging
import numpy as np
from lerobot.datasets.dataset_tools import add_features, recompute_stats
from lerobot.datasets.lerobot_dataset import LeRobotDataset
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# ── Configuration ─────────────────────────────────────────────────────────
HF_DATASET_ID = "<hf_username>/<dataset_repo_id>"
# Offset between state and action indices within each episode.
# 0 → state[t] = action[t] (same instant)
# 1 → state[t] = action[t-1] (state lags by 1 step — typical for UMI)
STATE_ACTION_OFFSET = 1
# Joint names to keep absolute (not converted to relative).
RELATIVE_EXCLUDE_JOINTS: list[str] = ["gripper"]
# pi0 chunk size (for relative stats computation).
CHUNK_SIZE = 50
# ── Build state from action with offset ──────────────────────────────────
def build_state_array(dataset: LeRobotDataset, offset: int) -> np.ndarray:
"""Derive observation.state from the action column with a per-episode offset.
For each frame t in an episode:
state[t] = action[max(0, t - offset)] (clamped to episode start)
"""
hf = dataset.hf_dataset
actions = np.array(hf["action"], dtype=np.float32)
episode_indices = np.array(hf["episode_index"])
frame_indices = np.array(hf["frame_index"])
states = np.empty_like(actions)
for ep_idx in np.unique(episode_indices):
ep_mask = episode_indices == ep_idx
ep_global_indices = np.where(ep_mask)[0]
ep_actions = actions[ep_global_indices]
ep_frames = frame_indices[ep_global_indices]
sort_order = np.argsort(ep_frames)
ep_global_indices = ep_global_indices[sort_order]
ep_actions = ep_actions[sort_order]
n = len(ep_actions)
for local_t in range(n):
source_t = max(0, local_t - offset)
states[ep_global_indices[local_t]] = ep_actions[source_t]
return states
def main():
logger.info(f"Loading dataset {HF_DATASET_ID}")
dataset = LeRobotDataset(HF_DATASET_ID)
if "observation.state" in dataset.features:
logger.warning("observation.state already exists — skipping add_features")
augmented = dataset
else:
logger.info(f"Building observation.state from action with offset={STATE_ACTION_OFFSET}")
state_array = build_state_array(dataset, offset=STATE_ACTION_OFFSET)
action_meta = dataset.features["action"]
state_feature_info = {
"dtype": "float32",
"shape": list(action_meta["shape"]),
"names": action_meta.get("names"),
}
augmented = add_features(
dataset,
features={
"observation.state": (state_array, state_feature_info),
},
)
logger.info("observation.state added")
logger.info("Recomputing stats with relative action statistics...")
recompute_stats(
augmented,
relative_action=True,
relative_exclude_joints=RELATIVE_EXCLUDE_JOINTS,
chunk_size=CHUNK_SIZE,
)
logger.info(f"Dataset ready at {augmented.root}")
logger.info(
"Train with:\n"
" lerobot-train \\\n"
f" --dataset.repo_id={augmented.repo_id} \\\n"
" --policy.type=pi0 \\\n"
" --policy.use_relative_actions=true \\\n"
f" --policy.relative_exclude_joints='{RELATIVE_EXCLUDE_JOINTS}' \\\n"
" --policy.pretrained_path=lerobot/pi0_base"
)
if __name__ == "__main__":
main()
+227
View File
@@ -0,0 +1,227 @@
#!/usr/bin/env python
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Inference script for a pi0 model trained with **relative EE actions**.
This uses the built-in ``RelativeActionsProcessorStep`` and
``AbsoluteActionsProcessorStep`` that are already wired into pi0's
processor pipeline when ``use_relative_actions=True``.
The inference loop:
1. Reads joint positions from the robot.
2. Converts to EE pose via forward kinematics (FK).
This produces ``observation.state`` with the current EE pose.
3. The pi0 preprocessor:
a) ``RelativeActionsProcessorStep`` caches the raw state.
b) ``NormalizerProcessorStep`` normalizes state and actions.
4. pi0 predicts relative action chunk.
5. The pi0 postprocessor:
a) ``UnnormalizerProcessorStep`` unnormalizes.
b) ``AbsoluteActionsProcessorStep`` adds cached state → absolute EE.
6. IK converts absolute EE → joint targets → robot.
Based on the so100_to_so100_EE/evaluate.py example.
Usage:
python evaluate.py
"""
from __future__ import annotations
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.configs.types import FeatureType, PolicyFeature
from lerobot.datasets.feature_utils import combine_feature_dicts
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
from lerobot.model.kinematics import RobotKinematics
from lerobot.policies.factory import make_pre_post_processors
from lerobot.policies.pi0.modeling_pi0 import PI0Policy
from lerobot.processor import (
RobotProcessorPipeline,
make_default_teleop_action_processor,
)
from lerobot.processor.converters import (
observation_to_transition,
robot_action_observation_to_transition,
transition_to_observation,
transition_to_robot_action,
)
from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
from lerobot.robots.so_follower.robot_kinematic_processor import (
ForwardKinematicsJointsToEE,
InverseKinematicsEEToJoints,
)
from lerobot.scripts.lerobot_record import record_loop
from lerobot.types import RobotAction, RobotObservation
from lerobot.utils.control_utils import init_keyboard_listener
from lerobot.utils.utils import log_say
from lerobot.utils.visualization_utils import init_rerun
NUM_EPISODES = 5
FPS = 10
EPISODE_TIME_SEC = 60
TASK_DESCRIPTION = "manipulation task"
HF_MODEL_ID = "<hf_username>/<model_repo_id>"
HF_DATASET_ID = "<hf_username>/<dataset_repo_id>"
# EE feature keys produced by ForwardKinematicsJointsToEE
EE_KEYS = ["x", "y", "z", "wx", "wy", "wz", "gripper_pos"]
def main():
camera_config = {"wrist": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)}
robot_config = SO100FollowerConfig(
port="/dev/tty.usbmodem5A460814411",
id="my_awesome_follower_arm",
cameras=camera_config,
use_degrees=True,
)
robot = SO100Follower(robot_config)
policy = PI0Policy.from_pretrained(HF_MODEL_ID)
kinematics_solver = RobotKinematics(
urdf_path="./SO101/so101_new_calib.urdf",
target_frame_name="gripper_frame_link",
joint_names=list(robot.bus.motors.keys()),
)
# FK: joint observation → EE observation (produces observation.state)
robot_joints_to_ee_processor = RobotProcessorPipeline[RobotObservation, RobotObservation](
steps=[
ForwardKinematicsJointsToEE(
kinematics=kinematics_solver,
motor_names=list(robot.bus.motors.keys()),
)
],
to_transition=observation_to_transition,
to_output=transition_to_observation,
)
# IK: EE action → joint targets
robot_ee_to_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction](
steps=[
InverseKinematicsEEToJoints(
kinematics=kinematics_solver,
motor_names=list(robot.bus.motors.keys()),
initial_guess_current_joints=True,
),
],
to_transition=robot_action_observation_to_transition,
to_output=transition_to_robot_action,
)
# Dataset handle for stats (used by preprocessor/postprocessor)
dataset = LeRobotDataset.create(
repo_id=HF_DATASET_ID,
fps=FPS,
features=combine_feature_dicts(
aggregate_pipeline_dataset_features(
pipeline=robot_joints_to_ee_processor,
initial_features=create_initial_features(observation=robot.observation_features),
use_videos=True,
),
aggregate_pipeline_dataset_features(
pipeline=make_default_teleop_action_processor(),
initial_features=create_initial_features(
action={f"ee.{k}": PolicyFeature(type=FeatureType.ACTION, shape=(1,)) for k in EE_KEYS}
),
use_videos=True,
),
),
robot_type=robot.name,
use_videos=True,
image_writer_threads=4,
)
# Build pre/post processors from the trained model.
# The pi0 processor pipeline already includes:
# pre: ... → RelativeActionsProcessorStep → NormalizerProcessorStep
# post: UnnormalizerProcessorStep → AbsoluteActionsProcessorStep → ...
# These handle the relative ↔ absolute conversion automatically.
preprocessor, postprocessor = make_pre_post_processors(
policy_cfg=policy,
pretrained_path=HF_MODEL_ID,
dataset_stats=dataset.meta.stats,
preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}},
)
robot.connect()
listener, events = init_keyboard_listener()
init_rerun(session_name="umi_pi0_relative_ee_evaluate")
try:
if not robot.is_connected:
raise ValueError("Robot is not connected!")
print("Starting evaluate loop...")
for episode_idx in range(NUM_EPISODES):
log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}")
record_loop(
robot=robot,
events=events,
fps=FPS,
policy=policy,
preprocessor=preprocessor,
postprocessor=postprocessor,
dataset=dataset,
control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
teleop_action_processor=make_default_teleop_action_processor(),
robot_action_processor=robot_ee_to_joints_processor,
robot_observation_processor=robot_joints_to_ee_processor,
)
if not events["stop_recording"] and (
(episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]
):
log_say("Reset the environment")
record_loop(
robot=robot,
events=events,
fps=FPS,
control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
teleop_action_processor=make_default_teleop_action_processor(),
robot_action_processor=robot_ee_to_joints_processor,
robot_observation_processor=robot_joints_to_ee_processor,
)
if events["rerecord_episode"]:
log_say("Re-record episode")
events["rerecord_episode"] = False
events["exit_early"] = False
dataset.clear_episode_buffer()
continue
dataset.save_episode()
finally:
log_say("Stop recording")
robot.disconnect()
listener.stop()
dataset.finalize()
dataset.push_to_hub()
if __name__ == "__main__":
main()