Add relative position UMI style

This commit is contained in:
Pepijn
2026-01-02 15:57:39 +01:00
parent cf1d8c3d5b
commit 01c7c74070
4 changed files with 506 additions and 0 deletions
+319
View File
@@ -0,0 +1,319 @@
#!/usr/bin/env python
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
OpenArms Policy Evaluation with UMI-style Relative Actions
Evaluates a policy trained with relative actions (use_relative_actions=True).
During inference, the policy outputs relative deltas which are added to the
current robot position to get absolute targets.
This follows the UMI paper's "relative trajectory" action representation:
action_absolute[t] = action_relative[t] + current_position
Example usage:
python examples/openarms/evaluate_relative.py
"""
import time
from pathlib import Path
import torch
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
from lerobot.configs.policies import PreTrainedConfig
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
from lerobot.datasets.utils import build_dataset_frame, combine_feature_dicts
from lerobot.policies.factory import make_policy, make_pre_post_processors
from lerobot.policies.utils import predict_action
from lerobot.processor import make_default_processors
from lerobot.processor.core import RobotAction
from lerobot.robots.openarms.config_openarms_follower import OpenArmsFollowerConfig
from lerobot.robots.openarms.openarms_follower import OpenArmsFollower
from lerobot.utils.constants import ACTION, OBS_STR
from lerobot.utils.control_utils import init_keyboard_listener, precise_sleep
from lerobot.utils.device_utils import get_safe_torch_device
from lerobot.utils.relative_actions import convert_from_relative_actions_dict, convert_state_to_relative
from lerobot.utils.utils import log_say
from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
# Configuration - Update these for your setup
HF_MODEL_ID = "your-org/your-relative-policy" # Policy trained with use_relative_actions=True
HF_EVAL_DATASET_ID = "your-org/your-eval-dataset"
TASK_DESCRIPTION = "your task description"
NUM_EPISODES = 1
FPS = 30
EPISODE_TIME_SEC = 300
# Robot CAN interfaces
FOLLOWER_LEFT_PORT = "can0"
FOLLOWER_RIGHT_PORT = "can1"
# Camera configuration
CAMERA_CONFIG = {
"left_wrist": OpenCVCameraConfig(index_or_path="/dev/video5", width=640, height=480, fps=FPS),
"right_wrist": OpenCVCameraConfig(index_or_path="/dev/video1", width=640, height=480, fps=FPS),
"base": OpenCVCameraConfig(index_or_path="/dev/video3", width=640, height=480, fps=FPS),
}
def make_robot_action(action_values: dict, features: dict) -> RobotAction:
"""Convert action values to robot action dict, filtering by features."""
robot_action = {}
for key in features:
if key.startswith(ACTION + "."):
action_key = key.removeprefix(ACTION + ".")
if action_key in action_values:
robot_action[action_key] = action_values[action_key]
return robot_action
def inference_loop_relative(
robot,
policy,
preprocessor,
postprocessor,
dataset,
events,
fps: int,
control_time_s: float,
single_task: str,
display_data: bool = True,
state_key: str = "observation.state",
):
"""
Inference loop for policies trained with UMI-style relative actions and state.
Key differences from standard inference:
- Observation state is converted to relative (provides velocity info)
- Policy outputs relative deltas (action_relative)
- We add current robot position to get absolute targets:
action_absolute = action_relative + current_position
"""
device = get_safe_torch_device(policy.config.device)
timestamp = 0
start_t = time.perf_counter()
while timestamp < control_time_s:
loop_start = time.perf_counter()
if events["exit_early"] or events["stop_recording"]:
break
# Get current robot observation
obs = robot.get_observation()
observation_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR)
# Get current joint positions (reference for relative conversion)
current_pos = {k: v for k, v in obs.items() if k.endswith(".pos")}
# Convert observation state to relative (UMI-style)
# This gives velocity-like information to the policy
if state_key in observation_frame:
state_tensor = observation_frame[state_key]
if isinstance(state_tensor, torch.Tensor):
observation_frame[state_key] = convert_state_to_relative(state_tensor)
# Run policy inference - outputs relative actions
action_values = predict_action(
observation=observation_frame,
policy=policy,
device=device,
preprocessor=preprocessor,
postprocessor=postprocessor,
use_amp=policy.config.use_amp,
task=single_task,
robot_type=robot.robot_type,
)
# Convert relative actions to absolute
# action_values contains relative deltas, current_pos has absolute positions
relative_action = make_robot_action(action_values, dataset.features)
absolute_action = convert_from_relative_actions_dict(relative_action, current_pos)
# Send absolute action to robot
robot.send_action(absolute_action)
# Record to dataset (store the absolute action that was sent)
if dataset is not None:
action_frame = build_dataset_frame(dataset.features, absolute_action, prefix=ACTION)
frame = {**observation_frame, **action_frame, "task": single_task}
dataset.add_frame(frame)
if display_data:
log_rerun_data(observation=obs, action=absolute_action)
dt = time.perf_counter() - loop_start
precise_sleep(1 / fps - dt)
timestamp = time.perf_counter() - start_t
def main():
"""Main evaluation function for relative action policies."""
print("=" * 65)
print(" OpenArms Evaluation - UMI-style Relative Actions")
print("=" * 65)
print(f"\nModel: {HF_MODEL_ID}")
print(f"Evaluation Dataset: {HF_EVAL_DATASET_ID}")
print(f"Task: {TASK_DESCRIPTION}")
print(f"Episodes: {NUM_EPISODES}")
print(f"Episode Duration: {EPISODE_TIME_SEC}s")
print("\nNote: Policy outputs are relative deltas, converted to absolute at inference time")
# Setup robot
follower_config = OpenArmsFollowerConfig(
port_left=FOLLOWER_LEFT_PORT,
port_right=FOLLOWER_RIGHT_PORT,
can_interface="socketcan",
id="openarms_follower",
disable_torque_on_disconnect=True,
max_relative_target=10.0,
cameras=CAMERA_CONFIG,
)
follower = OpenArmsFollower(follower_config)
follower.connect(calibrate=False)
if not follower.is_connected:
raise RuntimeError("Follower robot failed to connect!")
# Build processors
teleop_action_processor, robot_action_processor, robot_observation_processor = make_default_processors()
# Build dataset features
action_features_hw = {k: v for k, v in follower.action_features.items() if k.endswith(".pos")}
dataset_features = combine_feature_dicts(
aggregate_pipeline_dataset_features(
pipeline=teleop_action_processor,
initial_features=create_initial_features(action=action_features_hw),
use_videos=True,
),
aggregate_pipeline_dataset_features(
pipeline=robot_observation_processor,
initial_features=create_initial_features(observation=follower.observation_features),
use_videos=True,
),
)
# Check existing dataset
dataset_path = Path.home() / ".cache" / "huggingface" / "lerobot" / HF_EVAL_DATASET_ID
if dataset_path.exists():
print(f"\nDataset already exists at: {dataset_path}")
choice = input("Continue and append? (y/n): ").strip().lower()
if choice != 'y':
follower.disconnect()
return
# Create dataset
dataset = LeRobotDataset.create(
repo_id=HF_EVAL_DATASET_ID,
fps=FPS,
features=dataset_features,
robot_type=follower.name,
use_videos=True,
image_writer_processes=0,
image_writer_threads=12,
)
# Load policy
policy_config = PreTrainedConfig.from_pretrained(HF_MODEL_ID)
policy_config.pretrained_path = HF_MODEL_ID
policy = make_policy(policy_config, ds_meta=dataset.meta)
preprocessor, postprocessor = make_pre_post_processors(
policy_cfg=policy.config,
pretrained_path=HF_MODEL_ID,
dataset_stats=dataset.meta.stats,
preprocessor_overrides={
"device_processor": {"device": str(policy.config.device)}
},
)
# Initialize controls
listener, events = init_keyboard_listener()
init_rerun(session_name="openarms_eval_relative")
episode_idx = 0
print("\nControls:")
print(" ESC - Stop recording and save")
print(" → - End current episode")
print(" ← - Re-record episode")
try:
while episode_idx < NUM_EPISODES and not events["stop_recording"]:
log_say(f"Evaluating episode {episode_idx + 1} of {NUM_EPISODES}")
print(f"\nRunning relative action inference for episode {episode_idx + 1}...")
# Run inference with relative action conversion
inference_loop_relative(
robot=follower,
policy=policy,
preprocessor=preprocessor,
postprocessor=postprocessor,
dataset=dataset,
events=events,
fps=FPS,
control_time_s=EPISODE_TIME_SEC,
single_task=TASK_DESCRIPTION,
display_data=True,
)
# Handle re-recording
if events.get("rerecord_episode", False):
log_say("Re-recording episode")
events["rerecord_episode"] = False
events["exit_early"] = False
dataset.clear_episode_buffer()
continue
# Save episode
if dataset.episode_buffer is not None and dataset.episode_buffer.get("size", 0) > 0:
print(f"Saving episode {episode_idx + 1} ({dataset.episode_buffer['size']} frames)...")
dataset.save_episode()
episode_idx += 1
events["exit_early"] = False
# Wait for manual reset between episodes
if not events["stop_recording"] and episode_idx < NUM_EPISODES:
log_say("Waiting for manual reset")
input("Press ENTER when ready for next episode...")
print(f"\nEvaluation complete! {episode_idx} episodes recorded")
log_say("Evaluation complete", blocking=True)
except KeyboardInterrupt:
print("\n\nEvaluation interrupted by user")
finally:
follower.disconnect()
if listener is not None:
listener.stop()
dataset.finalize()
print("\nUploading to Hugging Face Hub...")
dataset.push_to_hub(private=True)
if __name__ == "__main__":
main()
+5
View File
@@ -66,6 +66,11 @@ class TrainPipelineConfig(HubMixin):
eval: EvalConfig = field(default_factory=EvalConfig)
wandb: WandBConfig = field(default_factory=WandBConfig)
# UMI-style relative actions: convert absolute joint positions to chunk-relative deltas
# During training, actions become relative to current position at chunk start
# During inference, predicted deltas are added to current robot position
use_relative_actions: bool = False
# RA-BC (Reward-Aligned Behavior Cloning) parameters
use_rabc: bool = False # Enable reward-weighted training
rabc_progress_path: str | None = None # Path to precomputed SARM progress parquet file
+10
View File
@@ -46,6 +46,7 @@ from lerobot.utils.train_utils import (
save_checkpoint,
update_last_checkpoint,
)
from lerobot.utils.relative_actions import convert_to_relative_actions
from lerobot.utils.utils import (
format_big_number,
has_method,
@@ -298,6 +299,10 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
device=device,
)
if cfg.use_relative_actions and is_main_process:
logging.info(colored("UMI-style relative actions enabled", "cyan", attrs=["bold"]))
logging.info("Actions will be converted to chunk-relative deltas during training")
step = 0 # number of policy updates (forward + backward + optim)
if cfg.resume:
@@ -385,6 +390,11 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
start_time = time.perf_counter()
batch = next(dl_iter)
batch = preprocessor(batch)
# Convert to UMI-style relative actions if enabled
if cfg.use_relative_actions:
batch = convert_to_relative_actions(batch)
train_tracker.dataloading_s = time.perf_counter() - start_time
train_tracker, output_dict = update_policy(
+172
View File
@@ -0,0 +1,172 @@
"""
UMI-style relative action and state utilities.
Implements chunk-relative representation from the UMI paper:
"Universal Manipulation Interface: In-The-Wild Robot Teaching Without In-The-Wild Robots"
For each inference step:
- Actions are relative to current position at chunk start (t0)
- State history is relative to current position (provides velocity info)
Training:
action_relative[t] = action_absolute[t] - position_at_t0
state_relative[t] = state_absolute[t] - current_position
Inference:
action_absolute[t] = action_relative[t] + current_position
"""
import torch
def convert_to_relative(batch: dict, state_key: str = "observation.state") -> dict:
"""
Convert absolute actions AND state to chunk-relative (UMI-style) for training.
Following UMI paper PD2.1 and PD2.2:
- Actions become relative to current position
- State history becomes relative to current position (provides velocity info)
Args:
batch: Training batch containing:
- "action": (batch_size, chunk_size, action_dim) absolute action targets
- state_key: (batch_size, [n_obs_steps,] state_dim) observation state
state_key: Key for the observation state in the batch
Returns:
Modified batch with relative actions and state
"""
if "action" not in batch or state_key not in batch:
return batch
action = batch["action"]
state = batch[state_key]
batch = batch.copy()
# Get current position (reference for relative conversion)
# State shape: (batch, state_dim) or (batch, n_obs_steps, state_dim)
if state.dim() == 3:
current_pos = state[:, -1, :] # (batch, state_dim)
# Convert state history to relative (each timestep relative to current)
# This gives velocity-like information to the policy
relative_state = state.clone()
relative_state = state - current_pos[:, None, :]
batch[state_key] = relative_state
else:
current_pos = state # (batch, state_dim)
# Single timestep state becomes zeros (relative to itself)
batch[state_key] = torch.zeros_like(state)
# Convert actions to relative
action_dim = action.shape[-1]
state_dim = current_pos.shape[-1]
min_dim = min(action_dim, state_dim)
relative_action = action.clone()
relative_action[..., :min_dim] = action[..., :min_dim] - current_pos[:, None, :min_dim]
batch["action"] = relative_action
return batch
# Alias for backward compatibility
convert_to_relative_actions = convert_to_relative
def convert_state_to_relative(
state: torch.Tensor,
current_pos: torch.Tensor | None = None,
) -> torch.Tensor:
"""
Convert absolute state to relative for inference.
Args:
state: State tensor, shape (state_dim,) or (n_obs_steps, state_dim)
current_pos: Current position to use as reference. If None, uses last timestep of state.
Returns:
Relative state tensor
"""
if current_pos is None:
if state.dim() >= 2:
current_pos = state[-1, :] # Last timestep
else:
current_pos = state
if state.dim() == 1:
return torch.zeros_like(state)
elif state.dim() == 2:
# (n_obs_steps, state_dim)
return state - current_pos[None, :]
else:
# (batch, n_obs_steps, state_dim)
return state - current_pos[:, None, :]
def convert_from_relative_actions(
relative_actions: torch.Tensor,
current_pos: torch.Tensor | dict[str, float],
) -> torch.Tensor:
"""
Convert relative actions back to absolute for robot execution.
Args:
relative_actions: Predicted relative actions, shape (chunk_size, action_dim)
or (batch, chunk_size, action_dim)
current_pos: Current robot position as tensor (action_dim,) or dict of joint positions
Returns:
Absolute actions for robot execution
"""
if isinstance(current_pos, dict):
# Convert dict to tensor, maintaining key order
current_pos = torch.tensor(list(current_pos.values()), dtype=relative_actions.dtype)
# Ensure current_pos is on same device
current_pos = current_pos.to(relative_actions.device)
# Match dimensions
action_dim = relative_actions.shape[-1]
pos_dim = current_pos.shape[-1] if current_pos.dim() > 0 else len(current_pos)
min_dim = min(action_dim, pos_dim)
absolute_actions = relative_actions.clone()
if relative_actions.dim() == 2:
# Shape: (chunk_size, action_dim)
absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[:min_dim]
elif relative_actions.dim() == 3:
# Shape: (batch, chunk_size, action_dim)
absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[None, None, :min_dim]
else:
# Shape: (action_dim,)
absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[:min_dim]
return absolute_actions
def convert_from_relative_actions_dict(
relative_actions: dict[str, float],
current_pos: dict[str, float],
) -> dict[str, float]:
"""
Convert relative actions back to absolute for robot execution (dict version).
Args:
relative_actions: Predicted relative actions as dict (e.g., {"joint_1.pos": 0.1, ...})
current_pos: Current robot position as dict (e.g., {"joint_1.pos": 45.0, ...})
Returns:
Absolute actions dict for robot execution
"""
absolute_actions = {}
for key, rel_value in relative_actions.items():
if key in current_pos:
absolute_actions[key] = rel_value + current_pos[key]
else:
# Key not in current position, keep as-is (shouldn't happen normally)
absolute_actions[key] = rel_value
return absolute_actions