add umi example

2026-05-17 01:30:14 +00:00 · 2026-04-01 13:48:06 +02:00
parent 15934d8d08
commit 5ac3e568f1
4 changed files with 528 additions and 0 deletions
@@ -0,0 +1,161 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Add ``observation.state`` to an existing UMI LeRobot dataset and recompute
+stats for pi0 training with relative EE actions.
+
+UMI datasets already contain ``action`` (absolute EE pose from SLAM) and
+images. This script derives ``observation.state`` from the action column
+and recomputes statistics with relative action stats.
+
+State-Action Offset:
+UMI SLAM produces a single trajectory of EE poses stored as ``action``.
+We derive ``observation.state`` from the same trajectory with a
+configurable offset:
+
+  state[t] = action[t - STATE_ACTION_OFFSET]
+
+With offset=0, state equals action at the same timestep. With offset=1,
+state is the previous timestep's action — representing where the gripper
+*arrived* (the result of the previous command), which is what the robot
+knows at decision time. Offset=1 is the typical UMI convention.
+
+For the first frame(s) of each episode where t < offset, we use the
+earliest available action (action[0]) as state.
+
+After adding state, train with standard lerobot-train:
+    lerobot-train \\
+        --dataset.repo_id=<your_dataset> \\
+        --policy.type=pi0 \\
+        --policy.use_relative_actions=true \\
+        --policy.relative_exclude_joints='["gripper"]' \\
+        --policy.pretrained_path=lerobot/pi0_base
+
+Usage:
+    python convert_umi_dataset.py
+"""
+
+from __future__ import annotations
+
+import logging
+
+import numpy as np
+
+from lerobot.datasets.dataset_tools import add_features, recompute_stats
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# ── Configuration ─────────────────────────────────────────────────────────
+
+HF_DATASET_ID = "<hf_username>/<dataset_repo_id>"
+
+# Offset between state and action indices within each episode.
+#   0 → state[t] = action[t]       (same instant)
+#   1 → state[t] = action[t-1]     (state lags by 1 step — typical for UMI)
+STATE_ACTION_OFFSET = 1
+
+# Joint names to keep absolute (not converted to relative).
+RELATIVE_EXCLUDE_JOINTS: list[str] = ["gripper"]
+
+# pi0 chunk size (for relative stats computation).
+CHUNK_SIZE = 50
+
+
+# ── Build state from action with offset ──────────────────────────────────
+
+
+def build_state_array(dataset: LeRobotDataset, offset: int) -> np.ndarray:
+    """Derive observation.state from the action column with a per-episode offset.
+
+    For each frame t in an episode:
+        state[t] = action[max(0, t - offset)]  (clamped to episode start)
+    """
+    hf = dataset.hf_dataset
+    actions = np.array(hf["action"], dtype=np.float32)
+    episode_indices = np.array(hf["episode_index"])
+    frame_indices = np.array(hf["frame_index"])
+
+    states = np.empty_like(actions)
+
+    for ep_idx in np.unique(episode_indices):
+        ep_mask = episode_indices == ep_idx
+        ep_global_indices = np.where(ep_mask)[0]
+        ep_actions = actions[ep_global_indices]
+        ep_frames = frame_indices[ep_global_indices]
+
+        sort_order = np.argsort(ep_frames)
+        ep_global_indices = ep_global_indices[sort_order]
+        ep_actions = ep_actions[sort_order]
+
+        n = len(ep_actions)
+        for local_t in range(n):
+            source_t = max(0, local_t - offset)
+            states[ep_global_indices[local_t]] = ep_actions[source_t]
+
+    return states
+
+
+def main():
+    logger.info(f"Loading dataset {HF_DATASET_ID}")
+    dataset = LeRobotDataset(HF_DATASET_ID)
+
+    if "observation.state" in dataset.features:
+        logger.warning("observation.state already exists — skipping add_features")
+        augmented = dataset
+    else:
+        logger.info(f"Building observation.state from action with offset={STATE_ACTION_OFFSET}")
+        state_array = build_state_array(dataset, offset=STATE_ACTION_OFFSET)
+
+        action_meta = dataset.features["action"]
+        state_feature_info = {
+            "dtype": "float32",
+            "shape": list(action_meta["shape"]),
+            "names": action_meta.get("names"),
+        }
+
+        augmented = add_features(
+            dataset,
+            features={
+                "observation.state": (state_array, state_feature_info),
+            },
+        )
+        logger.info("observation.state added")
+
+    logger.info("Recomputing stats with relative action statistics...")
+    recompute_stats(
+        augmented,
+        relative_action=True,
+        relative_exclude_joints=RELATIVE_EXCLUDE_JOINTS,
+        chunk_size=CHUNK_SIZE,
+    )
+
+    logger.info(f"Dataset ready at {augmented.root}")
+    logger.info(
+        "Train with:\n"
+        "  lerobot-train \\\n"
+        f"    --dataset.repo_id={augmented.repo_id} \\\n"
+        "    --policy.type=pi0 \\\n"
+        "    --policy.use_relative_actions=true \\\n"
+        f"    --policy.relative_exclude_joints='{RELATIVE_EXCLUDE_JOINTS}' \\\n"
+        "    --policy.pretrained_path=lerobot/pi0_base"
+    )
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,227 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Inference script for a pi0 model trained with **relative EE actions**.
+
+This uses the built-in ``RelativeActionsProcessorStep`` and
+``AbsoluteActionsProcessorStep`` that are already wired into pi0's
+processor pipeline when ``use_relative_actions=True``.
+
+The inference loop:
+  1. Reads joint positions from the robot.
+  2. Converts to EE pose via forward kinematics (FK).
+     This produces ``observation.state`` with the current EE pose.
+  3. The pi0 preprocessor:
+     a) ``RelativeActionsProcessorStep`` caches the raw state.
+     b) ``NormalizerProcessorStep`` normalizes state and actions.
+  4. pi0 predicts relative action chunk.
+  5. The pi0 postprocessor:
+     a) ``UnnormalizerProcessorStep`` unnormalizes.
+     b) ``AbsoluteActionsProcessorStep`` adds cached state → absolute EE.
+  6. IK converts absolute EE → joint targets → robot.
+
+Based on the so100_to_so100_EE/evaluate.py example.
+
+Usage:
+    python evaluate.py
+"""
+
+from __future__ import annotations
+
+from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.configs.types import FeatureType, PolicyFeature
+from lerobot.datasets.feature_utils import combine_feature_dicts
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
+from lerobot.model.kinematics import RobotKinematics
+from lerobot.policies.factory import make_pre_post_processors
+from lerobot.policies.pi0.modeling_pi0 import PI0Policy
+from lerobot.processor import (
+    RobotProcessorPipeline,
+    make_default_teleop_action_processor,
+)
+from lerobot.processor.converters import (
+    observation_to_transition,
+    robot_action_observation_to_transition,
+    transition_to_observation,
+    transition_to_robot_action,
+)
+from lerobot.robots.so_follower import SO100Follower, SO100FollowerConfig
+from lerobot.robots.so_follower.robot_kinematic_processor import (
+    ForwardKinematicsJointsToEE,
+    InverseKinematicsEEToJoints,
+)
+from lerobot.scripts.lerobot_record import record_loop
+from lerobot.types import RobotAction, RobotObservation
+from lerobot.utils.control_utils import init_keyboard_listener
+from lerobot.utils.utils import log_say
+from lerobot.utils.visualization_utils import init_rerun
+
+NUM_EPISODES = 5
+FPS = 10
+EPISODE_TIME_SEC = 60
+TASK_DESCRIPTION = "manipulation task"
+
+HF_MODEL_ID = "<hf_username>/<model_repo_id>"
+HF_DATASET_ID = "<hf_username>/<dataset_repo_id>"
+
+# EE feature keys produced by ForwardKinematicsJointsToEE
+EE_KEYS = ["x", "y", "z", "wx", "wy", "wz", "gripper_pos"]
+
+
+def main():
+    camera_config = {"wrist": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)}
+    robot_config = SO100FollowerConfig(
+        port="/dev/tty.usbmodem5A460814411",
+        id="my_awesome_follower_arm",
+        cameras=camera_config,
+        use_degrees=True,
+    )
+    robot = SO100Follower(robot_config)
+
+    policy = PI0Policy.from_pretrained(HF_MODEL_ID)
+
+    kinematics_solver = RobotKinematics(
+        urdf_path="./SO101/so101_new_calib.urdf",
+        target_frame_name="gripper_frame_link",
+        joint_names=list(robot.bus.motors.keys()),
+    )
+
+    # FK: joint observation → EE observation (produces observation.state)
+    robot_joints_to_ee_processor = RobotProcessorPipeline[RobotObservation, RobotObservation](
+        steps=[
+            ForwardKinematicsJointsToEE(
+                kinematics=kinematics_solver,
+                motor_names=list(robot.bus.motors.keys()),
+            )
+        ],
+        to_transition=observation_to_transition,
+        to_output=transition_to_observation,
+    )
+
+    # IK: EE action → joint targets
+    robot_ee_to_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction](
+        steps=[
+            InverseKinematicsEEToJoints(
+                kinematics=kinematics_solver,
+                motor_names=list(robot.bus.motors.keys()),
+                initial_guess_current_joints=True,
+            ),
+        ],
+        to_transition=robot_action_observation_to_transition,
+        to_output=transition_to_robot_action,
+    )
+
+    # Dataset handle for stats (used by preprocessor/postprocessor)
+    dataset = LeRobotDataset.create(
+        repo_id=HF_DATASET_ID,
+        fps=FPS,
+        features=combine_feature_dicts(
+            aggregate_pipeline_dataset_features(
+                pipeline=robot_joints_to_ee_processor,
+                initial_features=create_initial_features(observation=robot.observation_features),
+                use_videos=True,
+            ),
+            aggregate_pipeline_dataset_features(
+                pipeline=make_default_teleop_action_processor(),
+                initial_features=create_initial_features(
+                    action={f"ee.{k}": PolicyFeature(type=FeatureType.ACTION, shape=(1,)) for k in EE_KEYS}
+                ),
+                use_videos=True,
+            ),
+        ),
+        robot_type=robot.name,
+        use_videos=True,
+        image_writer_threads=4,
+    )
+
+    # Build pre/post processors from the trained model.
+    # The pi0 processor pipeline already includes:
+    #   pre:  ... → RelativeActionsProcessorStep → NormalizerProcessorStep
+    #   post: UnnormalizerProcessorStep → AbsoluteActionsProcessorStep → ...
+    # These handle the relative ↔ absolute conversion automatically.
+    preprocessor, postprocessor = make_pre_post_processors(
+        policy_cfg=policy,
+        pretrained_path=HF_MODEL_ID,
+        dataset_stats=dataset.meta.stats,
+        preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}},
+    )
+
+    robot.connect()
+
+    listener, events = init_keyboard_listener()
+    init_rerun(session_name="umi_pi0_relative_ee_evaluate")
+
+    try:
+        if not robot.is_connected:
+            raise ValueError("Robot is not connected!")
+
+        print("Starting evaluate loop...")
+        for episode_idx in range(NUM_EPISODES):
+            log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}")
+
+            record_loop(
+                robot=robot,
+                events=events,
+                fps=FPS,
+                policy=policy,
+                preprocessor=preprocessor,
+                postprocessor=postprocessor,
+                dataset=dataset,
+                control_time_s=EPISODE_TIME_SEC,
+                single_task=TASK_DESCRIPTION,
+                display_data=True,
+                teleop_action_processor=make_default_teleop_action_processor(),
+                robot_action_processor=robot_ee_to_joints_processor,
+                robot_observation_processor=robot_joints_to_ee_processor,
+            )
+
+            if not events["stop_recording"] and (
+                (episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]
+            ):
+                log_say("Reset the environment")
+                record_loop(
+                    robot=robot,
+                    events=events,
+                    fps=FPS,
+                    control_time_s=EPISODE_TIME_SEC,
+                    single_task=TASK_DESCRIPTION,
+                    display_data=True,
+                    teleop_action_processor=make_default_teleop_action_processor(),
+                    robot_action_processor=robot_ee_to_joints_processor,
+                    robot_observation_processor=robot_joints_to_ee_processor,
+                )
+
+            if events["rerecord_episode"]:
+                log_say("Re-record episode")
+                events["rerecord_episode"] = False
+                events["exit_early"] = False
+                dataset.clear_episode_buffer()
+                continue
+
+            dataset.save_episode()
+    finally:
+        log_say("Stop recording")
+        robot.disconnect()
+        listener.stop()
+
+        dataset.finalize()
+        dataset.push_to_hub()
+
+
+if __name__ == "__main__":
+    main()