mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-22 12:09:42 +00:00
add code for relative actions and state and unifing tasks
This commit is contained in:
@@ -1,28 +1,14 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
OpenArms Policy Evaluation with UMI-style Relative Actions
|
OpenArms Policy Evaluation with Relative Actions
|
||||||
|
|
||||||
Evaluates a policy trained with relative actions (use_relative_actions=True).
|
Two modes supported (based on training config):
|
||||||
During inference, the policy outputs relative deltas which are added to the
|
Mode 1: Relative actions only (use_relative_state=False)
|
||||||
current robot position to get absolute targets.
|
- Policy outputs relative action deltas
|
||||||
|
- State input is absolute
|
||||||
This follows the UMI paper's "relative trajectory" action representation:
|
Mode 2: Relative actions + state (use_relative_state=True)
|
||||||
action_absolute[t] = action_relative[t] + current_position
|
- Policy outputs relative action deltas
|
||||||
|
- State input is also converted to relative
|
||||||
|
|
||||||
Example usage:
|
Example usage:
|
||||||
python examples/openarms/evaluate_relative.py
|
python examples/openarms/evaluate_relative.py
|
||||||
@@ -35,6 +21,7 @@ import torch
|
|||||||
|
|
||||||
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
|
||||||
from lerobot.configs.policies import PreTrainedConfig
|
from lerobot.configs.policies import PreTrainedConfig
|
||||||
|
from lerobot.configs.train import TrainPipelineConfig
|
||||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||||
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
|
from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
|
||||||
from lerobot.datasets.utils import build_dataset_frame, combine_feature_dicts
|
from lerobot.datasets.utils import build_dataset_frame, combine_feature_dicts
|
||||||
@@ -47,13 +34,17 @@ from lerobot.robots.openarms.openarms_follower import OpenArmsFollower
|
|||||||
from lerobot.utils.constants import ACTION, OBS_STR
|
from lerobot.utils.constants import ACTION, OBS_STR
|
||||||
from lerobot.utils.control_utils import init_keyboard_listener, precise_sleep
|
from lerobot.utils.control_utils import init_keyboard_listener, precise_sleep
|
||||||
from lerobot.utils.device_utils import get_safe_torch_device
|
from lerobot.utils.device_utils import get_safe_torch_device
|
||||||
from lerobot.utils.relative_actions import convert_from_relative_actions_dict, convert_state_to_relative
|
from lerobot.utils.relative_actions import (
|
||||||
|
convert_from_relative_actions_dict,
|
||||||
|
convert_state_to_relative,
|
||||||
|
PerTimestepNormalizer,
|
||||||
|
)
|
||||||
from lerobot.utils.utils import log_say
|
from lerobot.utils.utils import log_say
|
||||||
from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
|
from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
|
||||||
|
|
||||||
|
|
||||||
# Configuration - Update these for your setup
|
# Configuration
|
||||||
HF_MODEL_ID = "your-org/your-relative-policy" # Policy trained with use_relative_actions=True
|
HF_MODEL_ID = "your-org/your-relative-policy"
|
||||||
HF_EVAL_DATASET_ID = "your-org/your-eval-dataset"
|
HF_EVAL_DATASET_ID = "your-org/your-eval-dataset"
|
||||||
TASK_DESCRIPTION = "your task description"
|
TASK_DESCRIPTION = "your task description"
|
||||||
|
|
||||||
@@ -61,11 +52,9 @@ NUM_EPISODES = 1
|
|||||||
FPS = 30
|
FPS = 30
|
||||||
EPISODE_TIME_SEC = 300
|
EPISODE_TIME_SEC = 300
|
||||||
|
|
||||||
# Robot CAN interfaces
|
|
||||||
FOLLOWER_LEFT_PORT = "can0"
|
FOLLOWER_LEFT_PORT = "can0"
|
||||||
FOLLOWER_RIGHT_PORT = "can1"
|
FOLLOWER_RIGHT_PORT = "can1"
|
||||||
|
|
||||||
# Camera configuration
|
|
||||||
CAMERA_CONFIG = {
|
CAMERA_CONFIG = {
|
||||||
"left_wrist": OpenCVCameraConfig(index_or_path="/dev/video5", width=640, height=480, fps=FPS),
|
"left_wrist": OpenCVCameraConfig(index_or_path="/dev/video5", width=640, height=480, fps=FPS),
|
||||||
"right_wrist": OpenCVCameraConfig(index_or_path="/dev/video1", width=640, height=480, fps=FPS),
|
"right_wrist": OpenCVCameraConfig(index_or_path="/dev/video1", width=640, height=480, fps=FPS),
|
||||||
@@ -74,7 +63,6 @@ CAMERA_CONFIG = {
|
|||||||
|
|
||||||
|
|
||||||
def make_robot_action(action_values: dict, features: dict) -> RobotAction:
|
def make_robot_action(action_values: dict, features: dict) -> RobotAction:
|
||||||
"""Convert action values to robot action dict, filtering by features."""
|
|
||||||
robot_action = {}
|
robot_action = {}
|
||||||
for key in features:
|
for key in features:
|
||||||
if key.startswith(ACTION + "."):
|
if key.startswith(ACTION + "."):
|
||||||
@@ -84,6 +72,40 @@ def make_robot_action(action_values: dict, features: dict) -> RobotAction:
|
|||||||
return robot_action
|
return robot_action
|
||||||
|
|
||||||
|
|
||||||
|
def load_relative_config(model_path: Path | str) -> tuple[PerTimestepNormalizer | None, bool]:
|
||||||
|
"""Load normalizer and relative_state setting from checkpoint."""
|
||||||
|
model_path = Path(model_path) if isinstance(model_path, str) else model_path
|
||||||
|
normalizer = None
|
||||||
|
use_relative_state = False
|
||||||
|
|
||||||
|
# Try local path first
|
||||||
|
if model_path.exists():
|
||||||
|
stats_path = model_path / "relative_stats.pt"
|
||||||
|
if stats_path.exists():
|
||||||
|
normalizer = PerTimestepNormalizer.load(stats_path)
|
||||||
|
print(f"Loaded per-timestep stats from: {stats_path}")
|
||||||
|
|
||||||
|
config_path = model_path / "train_config.json"
|
||||||
|
if config_path.exists():
|
||||||
|
cfg = TrainPipelineConfig.from_pretrained(model_path)
|
||||||
|
use_relative_state = getattr(cfg, "use_relative_state", False)
|
||||||
|
else:
|
||||||
|
# Try hub
|
||||||
|
try:
|
||||||
|
from huggingface_hub import hf_hub_download
|
||||||
|
stats_file = hf_hub_download(repo_id=str(model_path), filename="relative_stats.pt")
|
||||||
|
normalizer = PerTimestepNormalizer.load(stats_file)
|
||||||
|
print("Loaded per-timestep stats from hub")
|
||||||
|
|
||||||
|
config_file = hf_hub_download(repo_id=str(model_path), filename="train_config.json")
|
||||||
|
cfg = TrainPipelineConfig.from_pretrained(Path(config_file).parent)
|
||||||
|
use_relative_state = getattr(cfg, "use_relative_state", False)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Could not load relative config: {e}")
|
||||||
|
|
||||||
|
return normalizer, use_relative_state
|
||||||
|
|
||||||
|
|
||||||
def inference_loop_relative(
|
def inference_loop_relative(
|
||||||
robot,
|
robot,
|
||||||
policy,
|
policy,
|
||||||
@@ -96,18 +118,15 @@ def inference_loop_relative(
|
|||||||
single_task: str,
|
single_task: str,
|
||||||
display_data: bool = True,
|
display_data: bool = True,
|
||||||
state_key: str = "observation.state",
|
state_key: str = "observation.state",
|
||||||
|
relative_normalizer: PerTimestepNormalizer | None = None,
|
||||||
|
use_relative_state: bool = False,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Inference loop for policies trained with UMI-style relative actions and state.
|
Inference loop for relative action policies.
|
||||||
|
|
||||||
Key differences from standard inference:
|
If use_relative_state=True, also converts observation state to relative.
|
||||||
- Observation state is converted to relative (provides velocity info)
|
|
||||||
- Policy outputs relative deltas (action_relative)
|
|
||||||
- We add current robot position to get absolute targets:
|
|
||||||
action_absolute = action_relative + current_position
|
|
||||||
"""
|
"""
|
||||||
device = get_safe_torch_device(policy.config.device)
|
device = get_safe_torch_device(policy.config.device)
|
||||||
|
|
||||||
timestamp = 0
|
timestamp = 0
|
||||||
start_t = time.perf_counter()
|
start_t = time.perf_counter()
|
||||||
|
|
||||||
@@ -117,21 +136,17 @@ def inference_loop_relative(
|
|||||||
if events["exit_early"] or events["stop_recording"]:
|
if events["exit_early"] or events["stop_recording"]:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Get current robot observation
|
|
||||||
obs = robot.get_observation()
|
obs = robot.get_observation()
|
||||||
observation_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR)
|
observation_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR)
|
||||||
|
|
||||||
# Get current joint positions (reference for relative conversion)
|
|
||||||
current_pos = {k: v for k, v in obs.items() if k.endswith(".pos")}
|
current_pos = {k: v for k, v in obs.items() if k.endswith(".pos")}
|
||||||
|
|
||||||
# Convert observation state to relative (UMI-style)
|
# Convert state to relative if using full UMI mode
|
||||||
# This gives velocity-like information to the policy
|
if use_relative_state and state_key in observation_frame:
|
||||||
if state_key in observation_frame:
|
|
||||||
state_tensor = observation_frame[state_key]
|
state_tensor = observation_frame[state_key]
|
||||||
if isinstance(state_tensor, torch.Tensor):
|
if isinstance(state_tensor, torch.Tensor):
|
||||||
observation_frame[state_key] = convert_state_to_relative(state_tensor)
|
observation_frame[state_key] = convert_state_to_relative(state_tensor)
|
||||||
|
|
||||||
# Run policy inference - outputs relative actions
|
# Policy inference (outputs normalized relative actions)
|
||||||
action_values = predict_action(
|
action_values = predict_action(
|
||||||
observation=observation_frame,
|
observation=observation_frame,
|
||||||
policy=policy,
|
policy=policy,
|
||||||
@@ -143,15 +158,21 @@ def inference_loop_relative(
|
|||||||
robot_type=robot.robot_type,
|
robot_type=robot.robot_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Convert relative actions to absolute
|
# Unnormalize actions
|
||||||
# action_values contains relative deltas, current_pos has absolute positions
|
if relative_normalizer is not None:
|
||||||
|
action_keys = [k for k in action_values.keys() if not k.startswith("task")]
|
||||||
|
action_tensor = torch.tensor([[action_values[k] for k in action_keys]])
|
||||||
|
action_tensor = action_tensor.unsqueeze(1)
|
||||||
|
action_unnorm = relative_normalizer.unnormalize(action_tensor)
|
||||||
|
for i, k in enumerate(action_keys):
|
||||||
|
action_values[k] = action_unnorm[0, 0, i].item()
|
||||||
|
|
||||||
|
# Convert to absolute
|
||||||
relative_action = make_robot_action(action_values, dataset.features)
|
relative_action = make_robot_action(action_values, dataset.features)
|
||||||
absolute_action = convert_from_relative_actions_dict(relative_action, current_pos)
|
absolute_action = convert_from_relative_actions_dict(relative_action, current_pos)
|
||||||
|
|
||||||
# Send absolute action to robot
|
|
||||||
robot.send_action(absolute_action)
|
robot.send_action(absolute_action)
|
||||||
|
|
||||||
# Record to dataset (store the absolute action that was sent)
|
|
||||||
if dataset is not None:
|
if dataset is not None:
|
||||||
action_frame = build_dataset_frame(dataset.features, absolute_action, prefix=ACTION)
|
action_frame = build_dataset_frame(dataset.features, absolute_action, prefix=ACTION)
|
||||||
frame = {**observation_frame, **action_frame, "task": single_task}
|
frame = {**observation_frame, **action_frame, "task": single_task}
|
||||||
@@ -166,16 +187,17 @@ def inference_loop_relative(
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main evaluation function for relative action policies."""
|
print("=" * 60)
|
||||||
print("=" * 65)
|
print(" OpenArms Evaluation - Relative Actions")
|
||||||
print(" OpenArms Evaluation - UMI-style Relative Actions")
|
print("=" * 60)
|
||||||
print("=" * 65)
|
|
||||||
print(f"\nModel: {HF_MODEL_ID}")
|
print(f"\nModel: {HF_MODEL_ID}")
|
||||||
print(f"Evaluation Dataset: {HF_EVAL_DATASET_ID}")
|
print(f"Dataset: {HF_EVAL_DATASET_ID}")
|
||||||
print(f"Task: {TASK_DESCRIPTION}")
|
print(f"Episodes: {NUM_EPISODES}, Duration: {EPISODE_TIME_SEC}s")
|
||||||
print(f"Episodes: {NUM_EPISODES}")
|
|
||||||
print(f"Episode Duration: {EPISODE_TIME_SEC}s")
|
# Load relative action config
|
||||||
print("\nNote: Policy outputs are relative deltas, converted to absolute at inference time")
|
relative_normalizer, use_relative_state = load_relative_config(HF_MODEL_ID)
|
||||||
|
mode = "actions + state" if use_relative_state else "actions only"
|
||||||
|
print(f"Mode: relative {mode}")
|
||||||
|
|
||||||
# Setup robot
|
# Setup robot
|
||||||
follower_config = OpenArmsFollowerConfig(
|
follower_config = OpenArmsFollowerConfig(
|
||||||
@@ -192,12 +214,9 @@ def main():
|
|||||||
follower.connect(calibrate=False)
|
follower.connect(calibrate=False)
|
||||||
|
|
||||||
if not follower.is_connected:
|
if not follower.is_connected:
|
||||||
raise RuntimeError("Follower robot failed to connect!")
|
raise RuntimeError("Robot failed to connect!")
|
||||||
|
|
||||||
# Build processors
|
|
||||||
teleop_action_processor, robot_action_processor, robot_observation_processor = make_default_processors()
|
teleop_action_processor, robot_action_processor, robot_observation_processor = make_default_processors()
|
||||||
|
|
||||||
# Build dataset features
|
|
||||||
action_features_hw = {k: v for k, v in follower.action_features.items() if k.endswith(".pos")}
|
action_features_hw = {k: v for k, v in follower.action_features.items() if k.endswith(".pos")}
|
||||||
|
|
||||||
dataset_features = combine_feature_dicts(
|
dataset_features = combine_feature_dicts(
|
||||||
@@ -213,16 +232,13 @@ def main():
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check existing dataset
|
|
||||||
dataset_path = Path.home() / ".cache" / "huggingface" / "lerobot" / HF_EVAL_DATASET_ID
|
dataset_path = Path.home() / ".cache" / "huggingface" / "lerobot" / HF_EVAL_DATASET_ID
|
||||||
if dataset_path.exists():
|
if dataset_path.exists():
|
||||||
print(f"\nDataset already exists at: {dataset_path}")
|
print(f"\nDataset exists at: {dataset_path}")
|
||||||
choice = input("Continue and append? (y/n): ").strip().lower()
|
if input("Continue? (y/n): ").strip().lower() != 'y':
|
||||||
if choice != 'y':
|
|
||||||
follower.disconnect()
|
follower.disconnect()
|
||||||
return
|
return
|
||||||
|
|
||||||
# Create dataset
|
|
||||||
dataset = LeRobotDataset.create(
|
dataset = LeRobotDataset.create(
|
||||||
repo_id=HF_EVAL_DATASET_ID,
|
repo_id=HF_EVAL_DATASET_ID,
|
||||||
fps=FPS,
|
fps=FPS,
|
||||||
@@ -233,7 +249,6 @@ def main():
|
|||||||
image_writer_threads=12,
|
image_writer_threads=12,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Load policy
|
|
||||||
policy_config = PreTrainedConfig.from_pretrained(HF_MODEL_ID)
|
policy_config = PreTrainedConfig.from_pretrained(HF_MODEL_ID)
|
||||||
policy_config.pretrained_path = HF_MODEL_ID
|
policy_config.pretrained_path = HF_MODEL_ID
|
||||||
policy = make_policy(policy_config, ds_meta=dataset.meta)
|
policy = make_policy(policy_config, ds_meta=dataset.meta)
|
||||||
@@ -242,27 +257,19 @@ def main():
|
|||||||
policy_cfg=policy.config,
|
policy_cfg=policy.config,
|
||||||
pretrained_path=HF_MODEL_ID,
|
pretrained_path=HF_MODEL_ID,
|
||||||
dataset_stats=dataset.meta.stats,
|
dataset_stats=dataset.meta.stats,
|
||||||
preprocessor_overrides={
|
preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}},
|
||||||
"device_processor": {"device": str(policy.config.device)}
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Initialize controls
|
|
||||||
listener, events = init_keyboard_listener()
|
listener, events = init_keyboard_listener()
|
||||||
init_rerun(session_name="openarms_eval_relative")
|
init_rerun(session_name="openarms_eval_relative")
|
||||||
episode_idx = 0
|
episode_idx = 0
|
||||||
|
|
||||||
print("\nControls:")
|
print("\nControls: ESC=stop, →=next episode, ←=rerecord")
|
||||||
print(" ESC - Stop recording and save")
|
|
||||||
print(" → - End current episode")
|
|
||||||
print(" ← - Re-record episode")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
while episode_idx < NUM_EPISODES and not events["stop_recording"]:
|
while episode_idx < NUM_EPISODES and not events["stop_recording"]:
|
||||||
log_say(f"Evaluating episode {episode_idx + 1} of {NUM_EPISODES}")
|
log_say(f"Episode {episode_idx + 1} of {NUM_EPISODES}")
|
||||||
print(f"\nRunning relative action inference for episode {episode_idx + 1}...")
|
|
||||||
|
|
||||||
# Run inference with relative action conversion
|
|
||||||
inference_loop_relative(
|
inference_loop_relative(
|
||||||
robot=follower,
|
robot=follower,
|
||||||
policy=policy,
|
policy=policy,
|
||||||
@@ -274,46 +281,41 @@ def main():
|
|||||||
control_time_s=EPISODE_TIME_SEC,
|
control_time_s=EPISODE_TIME_SEC,
|
||||||
single_task=TASK_DESCRIPTION,
|
single_task=TASK_DESCRIPTION,
|
||||||
display_data=True,
|
display_data=True,
|
||||||
|
relative_normalizer=relative_normalizer,
|
||||||
|
use_relative_state=use_relative_state,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Handle re-recording
|
|
||||||
if events.get("rerecord_episode", False):
|
if events.get("rerecord_episode", False):
|
||||||
log_say("Re-recording episode")
|
log_say("Re-recording")
|
||||||
events["rerecord_episode"] = False
|
events["rerecord_episode"] = False
|
||||||
events["exit_early"] = False
|
events["exit_early"] = False
|
||||||
dataset.clear_episode_buffer()
|
dataset.clear_episode_buffer()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Save episode
|
|
||||||
if dataset.episode_buffer is not None and dataset.episode_buffer.get("size", 0) > 0:
|
if dataset.episode_buffer is not None and dataset.episode_buffer.get("size", 0) > 0:
|
||||||
print(f"Saving episode {episode_idx + 1} ({dataset.episode_buffer['size']} frames)...")
|
print(f"Saving episode {episode_idx + 1}...")
|
||||||
dataset.save_episode()
|
dataset.save_episode()
|
||||||
episode_idx += 1
|
episode_idx += 1
|
||||||
|
|
||||||
events["exit_early"] = False
|
events["exit_early"] = False
|
||||||
|
|
||||||
# Wait for manual reset between episodes
|
|
||||||
if not events["stop_recording"] and episode_idx < NUM_EPISODES:
|
if not events["stop_recording"] and episode_idx < NUM_EPISODES:
|
||||||
log_say("Waiting for manual reset")
|
input("Press ENTER for next episode...")
|
||||||
input("Press ENTER when ready for next episode...")
|
|
||||||
|
|
||||||
print(f"\nEvaluation complete! {episode_idx} episodes recorded")
|
print(f"\nDone! {episode_idx} episodes recorded")
|
||||||
log_say("Evaluation complete", blocking=True)
|
log_say("Complete", blocking=True)
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print("\n\nEvaluation interrupted by user")
|
print("\n\nInterrupted")
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
follower.disconnect()
|
follower.disconnect()
|
||||||
|
|
||||||
if listener is not None:
|
if listener is not None:
|
||||||
listener.stop()
|
listener.stop()
|
||||||
|
|
||||||
dataset.finalize()
|
dataset.finalize()
|
||||||
print("\nUploading to Hugging Face Hub...")
|
print("Uploading to Hub...")
|
||||||
dataset.push_to_hub(private=True)
|
dataset.push_to_hub(private=True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,59 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
"""Unify all tasks in a dataset to a single task."""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||||
|
from lerobot.datasets.utils import write_tasks
|
||||||
|
|
||||||
|
|
||||||
|
def unify_tasks(repo_id: str, new_task: str):
|
||||||
|
"""Set all episodes to use a single task."""
|
||||||
|
print(f"Loading dataset: {repo_id}")
|
||||||
|
dataset = LeRobotDataset(repo_id)
|
||||||
|
root = dataset.root
|
||||||
|
|
||||||
|
print(f"Current tasks: {list(dataset.meta.tasks['task']) if dataset.meta.tasks is not None else []}")
|
||||||
|
|
||||||
|
# 1. Update tasks.parquet to have only one task
|
||||||
|
tasks_df = pd.DataFrame({"task": [new_task]})
|
||||||
|
write_tasks(tasks_df, root)
|
||||||
|
print(f"Set single task: '{new_task}'")
|
||||||
|
|
||||||
|
# 2. Update all data parquet files to set task_index=0
|
||||||
|
data_dir = root / "data"
|
||||||
|
parquet_files = sorted(data_dir.glob("*/*.parquet"))
|
||||||
|
for parquet_path in parquet_files:
|
||||||
|
df = pd.read_parquet(parquet_path)
|
||||||
|
df["task_index"] = 0
|
||||||
|
df.to_parquet(parquet_path)
|
||||||
|
print(f"Updated: {parquet_path.relative_to(root)}")
|
||||||
|
|
||||||
|
# 3. Update info.json
|
||||||
|
info_path = root / "info.json"
|
||||||
|
with open(info_path) as f:
|
||||||
|
info = json.load(f)
|
||||||
|
info["total_tasks"] = 1
|
||||||
|
with open(info_path, "w") as f:
|
||||||
|
json.dump(info, f, indent=2)
|
||||||
|
|
||||||
|
print(f"\nDone! All {dataset.meta.total_episodes} episodes now use task: '{new_task}'")
|
||||||
|
print(f"\nTo push: huggingface-cli upload {repo_id} {root} --repo-type dataset")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Unify all tasks in a dataset to a single task")
|
||||||
|
parser.add_argument("--repo_id", type=str, required=True, help="Dataset repo_id")
|
||||||
|
parser.add_argument("--task", type=str, required=True, help="New task description")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
unify_tasks(args.repo_id, args.task)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
||||||
@@ -66,10 +66,16 @@ class TrainPipelineConfig(HubMixin):
|
|||||||
eval: EvalConfig = field(default_factory=EvalConfig)
|
eval: EvalConfig = field(default_factory=EvalConfig)
|
||||||
wandb: WandBConfig = field(default_factory=WandBConfig)
|
wandb: WandBConfig = field(default_factory=WandBConfig)
|
||||||
|
|
||||||
# UMI-style relative actions: convert absolute joint positions to chunk-relative deltas
|
# UMI-style relative actions with per-timestep normalization
|
||||||
# During training, actions become relative to current position at chunk start
|
# Mode 1: use_relative_actions=True, use_relative_state=False
|
||||||
# During inference, predicted deltas are added to current robot position
|
# - Actions: relative to current position + per-timestep normalized
|
||||||
|
# - State: absolute (unchanged)
|
||||||
|
# Mode 2: use_relative_actions=True, use_relative_state=True (full UMI)
|
||||||
|
# - Actions: relative to current position + per-timestep normalized
|
||||||
|
# - State: relative to current position (provides velocity info)
|
||||||
|
# Stats are computed automatically from first 1000 batches at training start
|
||||||
use_relative_actions: bool = False
|
use_relative_actions: bool = False
|
||||||
|
use_relative_state: bool = False
|
||||||
|
|
||||||
# RA-BC (Reward-Aligned Behavior Cloning) parameters
|
# RA-BC (Reward-Aligned Behavior Cloning) parameters
|
||||||
use_rabc: bool = False # Enable reward-weighted training
|
use_rabc: bool = False # Enable reward-weighted training
|
||||||
|
|||||||
@@ -46,7 +46,11 @@ from lerobot.utils.train_utils import (
|
|||||||
save_checkpoint,
|
save_checkpoint,
|
||||||
update_last_checkpoint,
|
update_last_checkpoint,
|
||||||
)
|
)
|
||||||
from lerobot.utils.relative_actions import convert_to_relative_actions
|
from lerobot.utils.relative_actions import (
|
||||||
|
convert_to_relative_actions,
|
||||||
|
compute_relative_action_stats,
|
||||||
|
PerTimestepNormalizer,
|
||||||
|
)
|
||||||
from lerobot.utils.utils import (
|
from lerobot.utils.utils import (
|
||||||
format_big_number,
|
format_big_number,
|
||||||
has_method,
|
has_method,
|
||||||
@@ -299,9 +303,26 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
|
|||||||
device=device,
|
device=device,
|
||||||
)
|
)
|
||||||
|
|
||||||
if cfg.use_relative_actions and is_main_process:
|
# Compute per-timestep normalizer for relative actions
|
||||||
logging.info(colored("UMI-style relative actions enabled", "cyan", attrs=["bold"]))
|
relative_normalizer = None
|
||||||
logging.info("Actions will be converted to chunk-relative deltas during training")
|
if cfg.use_relative_actions:
|
||||||
|
mode = "actions + state" if cfg.use_relative_state else "actions only"
|
||||||
|
if is_main_process:
|
||||||
|
logging.info(colored(f"Relative mode: {mode}", "cyan", attrs=["bold"]))
|
||||||
|
logging.info("Computing per-timestep stats from dataset (first 1000 batches)...")
|
||||||
|
temp_loader = torch.utils.data.DataLoader(
|
||||||
|
dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=0
|
||||||
|
)
|
||||||
|
mean, std = compute_relative_action_stats(temp_loader, num_batches=1000)
|
||||||
|
relative_normalizer = PerTimestepNormalizer(mean, std)
|
||||||
|
stats_path = cfg.output_dir / "relative_stats.pt"
|
||||||
|
relative_normalizer.save(stats_path)
|
||||||
|
logging.info(f"Saved stats to: {stats_path}")
|
||||||
|
|
||||||
|
accelerator.wait_for_everyone()
|
||||||
|
|
||||||
|
if not is_main_process:
|
||||||
|
relative_normalizer = PerTimestepNormalizer.load(cfg.output_dir / "relative_stats.pt")
|
||||||
|
|
||||||
step = 0 # number of policy updates (forward + backward + optim)
|
step = 0 # number of policy updates (forward + backward + optim)
|
||||||
|
|
||||||
@@ -391,9 +412,11 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
|
|||||||
batch = next(dl_iter)
|
batch = next(dl_iter)
|
||||||
batch = preprocessor(batch)
|
batch = preprocessor(batch)
|
||||||
|
|
||||||
# Convert to UMI-style relative actions if enabled
|
# Convert to relative actions (and optionally state) if enabled
|
||||||
if cfg.use_relative_actions:
|
if cfg.use_relative_actions:
|
||||||
batch = convert_to_relative_actions(batch)
|
batch = convert_to_relative_actions(batch, convert_state=cfg.use_relative_state)
|
||||||
|
if relative_normalizer is not None:
|
||||||
|
batch["action"] = relative_normalizer.normalize(batch["action"])
|
||||||
|
|
||||||
train_tracker.dataloading_s = time.perf_counter() - start_time
|
train_tracker.dataloading_s = time.perf_counter() - start_time
|
||||||
|
|
||||||
@@ -449,6 +472,9 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
|
|||||||
preprocessor=preprocessor,
|
preprocessor=preprocessor,
|
||||||
postprocessor=postprocessor,
|
postprocessor=postprocessor,
|
||||||
)
|
)
|
||||||
|
# Save relative action stats with checkpoint
|
||||||
|
if relative_normalizer is not None:
|
||||||
|
relative_normalizer.save(checkpoint_dir / "relative_stats.pt")
|
||||||
update_last_checkpoint(checkpoint_dir)
|
update_last_checkpoint(checkpoint_dir)
|
||||||
if wandb_logger:
|
if wandb_logger:
|
||||||
wandb_logger.log_policy(checkpoint_dir)
|
wandb_logger.log_policy(checkpoint_dir)
|
||||||
|
|||||||
@@ -1,172 +1,150 @@
|
|||||||
"""
|
"""
|
||||||
UMI-style relative action and state utilities.
|
UMI-style relative actions with per-timestep normalization.
|
||||||
|
|
||||||
Implements chunk-relative representation from the UMI paper:
|
Two modes supported:
|
||||||
"Universal Manipulation Interface: In-The-Wild Robot Teaching Without In-The-Wild Robots"
|
Mode 1: Relative actions only (use_relative_state=False)
|
||||||
|
- Actions converted to relative, state stays absolute
|
||||||
|
Mode 2: Relative actions + state (use_relative_state=True, full UMI)
|
||||||
|
- Both actions and state converted to relative
|
||||||
|
|
||||||
For each inference step:
|
Per-timestep normalization (TRI LBM / BEHAVIOR style):
|
||||||
- Actions are relative to current position at chunk start (t0)
|
Training: action_norm[t] = (action_rel[t] - mean[t]) / std[t]
|
||||||
- State history is relative to current position (provides velocity info)
|
Inference: action_rel[t] = action_norm[t] * std[t] + mean[t]
|
||||||
|
|
||||||
Training:
|
|
||||||
action_relative[t] = action_absolute[t] - position_at_t0
|
|
||||||
state_relative[t] = state_absolute[t] - current_position
|
|
||||||
|
|
||||||
Inference:
|
|
||||||
action_absolute[t] = action_relative[t] + current_position
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
def convert_to_relative(batch: dict, state_key: str = "observation.state") -> dict:
|
class PerTimestepNormalizer:
|
||||||
"""
|
"""Per-timestep normalization using precomputed dataset statistics."""
|
||||||
Convert absolute actions AND state to chunk-relative (UMI-style) for training.
|
|
||||||
|
|
||||||
Following UMI paper PD2.1 and PD2.2:
|
def __init__(self, mean: torch.Tensor, std: torch.Tensor, eps: float = 1e-8):
|
||||||
- Actions become relative to current position
|
self.mean = mean
|
||||||
- State history becomes relative to current position (provides velocity info)
|
self.std = std
|
||||||
|
self.eps = eps
|
||||||
|
|
||||||
|
def normalize(self, x: torch.Tensor) -> torch.Tensor:
|
||||||
|
mean = self.mean.to(x.device, x.dtype)
|
||||||
|
std = self.std.to(x.device, x.dtype)
|
||||||
|
if x.dim() == 3 and mean.dim() == 2:
|
||||||
|
mean, std = mean.unsqueeze(0), std.unsqueeze(0)
|
||||||
|
return (x - mean) / (std + self.eps)
|
||||||
|
|
||||||
|
def unnormalize(self, x: torch.Tensor) -> torch.Tensor:
|
||||||
|
mean = self.mean.to(x.device, x.dtype)
|
||||||
|
std = self.std.to(x.device, x.dtype)
|
||||||
|
if x.dim() == 3 and mean.dim() == 2:
|
||||||
|
mean, std = mean.unsqueeze(0), std.unsqueeze(0)
|
||||||
|
return x * (std + self.eps) + mean
|
||||||
|
|
||||||
|
def save(self, path: Path | str):
|
||||||
|
path = Path(path)
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
torch.save({"mean": self.mean.cpu(), "std": self.std.cpu(), "eps": self.eps}, path)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def load(cls, path: Path | str) -> "PerTimestepNormalizer":
|
||||||
|
data = torch.load(path, weights_only=True, map_location="cpu")
|
||||||
|
return cls(data["mean"], data["std"], data.get("eps", 1e-8))
|
||||||
|
|
||||||
|
|
||||||
|
def compute_relative_action_stats(
|
||||||
|
dataloader,
|
||||||
|
state_key: str = "observation.state",
|
||||||
|
num_batches: int | None = None,
|
||||||
|
) -> tuple[torch.Tensor, torch.Tensor]:
|
||||||
|
"""Compute per-timestep mean/std from relative actions."""
|
||||||
|
all_rel = []
|
||||||
|
for i, batch in enumerate(dataloader):
|
||||||
|
if num_batches is not None and i >= num_batches:
|
||||||
|
break
|
||||||
|
action, state = batch["action"], batch[state_key]
|
||||||
|
current_pos = state[:, -1, :] if state.dim() == 3 else state
|
||||||
|
min_dim = min(action.shape[-1], current_pos.shape[-1])
|
||||||
|
rel = action.clone()
|
||||||
|
rel[..., :min_dim] -= current_pos[:, None, :min_dim]
|
||||||
|
all_rel.append(rel)
|
||||||
|
|
||||||
|
all_rel = torch.cat(all_rel, dim=0)
|
||||||
|
return all_rel.mean(dim=0), all_rel.std(dim=0).clamp(min=1e-6)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_relative(
|
||||||
|
batch: dict,
|
||||||
|
state_key: str = "observation.state",
|
||||||
|
convert_state: bool = True,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Convert actions (and optionally state) to relative.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
batch: Training batch containing:
|
batch: Training batch with "action" and state_key
|
||||||
- "action": (batch_size, chunk_size, action_dim) absolute action targets
|
state_key: Key for observation state
|
||||||
- state_key: (batch_size, [n_obs_steps,] state_dim) observation state
|
convert_state: If True, also convert state to relative (full UMI mode)
|
||||||
state_key: Key for the observation state in the batch
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Modified batch with relative actions and state
|
|
||||||
"""
|
"""
|
||||||
if "action" not in batch or state_key not in batch:
|
if "action" not in batch or state_key not in batch:
|
||||||
return batch
|
return batch
|
||||||
|
|
||||||
action = batch["action"]
|
action = batch["action"]
|
||||||
state = batch[state_key]
|
state = batch[state_key]
|
||||||
|
|
||||||
batch = batch.copy()
|
batch = batch.copy()
|
||||||
|
|
||||||
# Get current position (reference for relative conversion)
|
# Get current position as reference
|
||||||
# State shape: (batch, state_dim) or (batch, n_obs_steps, state_dim)
|
current_pos = state[:, -1, :] if state.dim() == 3 else state
|
||||||
if state.dim() == 3:
|
|
||||||
current_pos = state[:, -1, :] # (batch, state_dim)
|
# Convert state if requested
|
||||||
|
if convert_state:
|
||||||
# Convert state history to relative (each timestep relative to current)
|
if state.dim() == 3:
|
||||||
# This gives velocity-like information to the policy
|
batch[state_key] = state - current_pos[:, None, :]
|
||||||
relative_state = state.clone()
|
else:
|
||||||
relative_state = state - current_pos[:, None, :]
|
batch[state_key] = torch.zeros_like(state)
|
||||||
batch[state_key] = relative_state
|
|
||||||
else:
|
|
||||||
current_pos = state # (batch, state_dim)
|
|
||||||
# Single timestep state becomes zeros (relative to itself)
|
|
||||||
batch[state_key] = torch.zeros_like(state)
|
|
||||||
|
|
||||||
# Convert actions to relative
|
# Convert actions to relative
|
||||||
action_dim = action.shape[-1]
|
min_dim = min(action.shape[-1], current_pos.shape[-1])
|
||||||
state_dim = current_pos.shape[-1]
|
rel_action = action.clone()
|
||||||
min_dim = min(action_dim, state_dim)
|
rel_action[..., :min_dim] -= current_pos[:, None, :min_dim]
|
||||||
|
batch["action"] = rel_action
|
||||||
relative_action = action.clone()
|
|
||||||
relative_action[..., :min_dim] = action[..., :min_dim] - current_pos[:, None, :min_dim]
|
|
||||||
batch["action"] = relative_action
|
|
||||||
|
|
||||||
return batch
|
return batch
|
||||||
|
|
||||||
|
|
||||||
# Alias for backward compatibility
|
# Backward compatibility alias
|
||||||
convert_to_relative_actions = convert_to_relative
|
convert_to_relative_actions = convert_to_relative
|
||||||
|
|
||||||
|
|
||||||
def convert_state_to_relative(
|
def convert_state_to_relative(state: torch.Tensor) -> torch.Tensor:
|
||||||
state: torch.Tensor,
|
"""Convert state to relative (for inference with use_relative_state=True)."""
|
||||||
current_pos: torch.Tensor | None = None,
|
|
||||||
) -> torch.Tensor:
|
|
||||||
"""
|
|
||||||
Convert absolute state to relative for inference.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
state: State tensor, shape (state_dim,) or (n_obs_steps, state_dim)
|
|
||||||
current_pos: Current position to use as reference. If None, uses last timestep of state.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Relative state tensor
|
|
||||||
"""
|
|
||||||
if current_pos is None:
|
|
||||||
if state.dim() >= 2:
|
|
||||||
current_pos = state[-1, :] # Last timestep
|
|
||||||
else:
|
|
||||||
current_pos = state
|
|
||||||
|
|
||||||
if state.dim() == 1:
|
if state.dim() == 1:
|
||||||
return torch.zeros_like(state)
|
return torch.zeros_like(state)
|
||||||
elif state.dim() == 2:
|
current_pos = state[-1, :] if state.dim() == 2 else state[:, -1, :]
|
||||||
# (n_obs_steps, state_dim)
|
if state.dim() == 2:
|
||||||
return state - current_pos[None, :]
|
return state - current_pos[None, :]
|
||||||
else:
|
return state - current_pos[:, None, :]
|
||||||
# (batch, n_obs_steps, state_dim)
|
|
||||||
return state - current_pos[:, None, :]
|
|
||||||
|
|
||||||
|
|
||||||
def convert_from_relative_actions(
|
def convert_from_relative_actions(
|
||||||
relative_actions: torch.Tensor,
|
relative_actions: torch.Tensor,
|
||||||
current_pos: torch.Tensor | dict[str, float],
|
current_pos: torch.Tensor,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
"""
|
"""Convert relative actions back to absolute for robot execution."""
|
||||||
Convert relative actions back to absolute for robot execution.
|
current_pos = current_pos.to(relative_actions.device, relative_actions.dtype)
|
||||||
|
min_dim = min(relative_actions.shape[-1], current_pos.shape[-1])
|
||||||
Args:
|
absolute = relative_actions.clone()
|
||||||
relative_actions: Predicted relative actions, shape (chunk_size, action_dim)
|
|
||||||
or (batch, chunk_size, action_dim)
|
|
||||||
current_pos: Current robot position as tensor (action_dim,) or dict of joint positions
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Absolute actions for robot execution
|
|
||||||
"""
|
|
||||||
if isinstance(current_pos, dict):
|
|
||||||
# Convert dict to tensor, maintaining key order
|
|
||||||
current_pos = torch.tensor(list(current_pos.values()), dtype=relative_actions.dtype)
|
|
||||||
|
|
||||||
# Ensure current_pos is on same device
|
|
||||||
current_pos = current_pos.to(relative_actions.device)
|
|
||||||
|
|
||||||
# Match dimensions
|
|
||||||
action_dim = relative_actions.shape[-1]
|
|
||||||
pos_dim = current_pos.shape[-1] if current_pos.dim() > 0 else len(current_pos)
|
|
||||||
min_dim = min(action_dim, pos_dim)
|
|
||||||
|
|
||||||
absolute_actions = relative_actions.clone()
|
|
||||||
|
|
||||||
if relative_actions.dim() == 2:
|
if relative_actions.dim() == 2:
|
||||||
# Shape: (chunk_size, action_dim)
|
absolute[..., :min_dim] += current_pos[:min_dim]
|
||||||
absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[:min_dim]
|
|
||||||
elif relative_actions.dim() == 3:
|
elif relative_actions.dim() == 3:
|
||||||
# Shape: (batch, chunk_size, action_dim)
|
absolute[..., :min_dim] += current_pos[None, None, :min_dim]
|
||||||
absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[None, None, :min_dim]
|
|
||||||
else:
|
else:
|
||||||
# Shape: (action_dim,)
|
absolute[..., :min_dim] += current_pos[:min_dim]
|
||||||
absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[:min_dim]
|
|
||||||
|
|
||||||
return absolute_actions
|
return absolute
|
||||||
|
|
||||||
|
|
||||||
def convert_from_relative_actions_dict(
|
def convert_from_relative_actions_dict(
|
||||||
relative_actions: dict[str, float],
|
relative_actions: dict[str, float],
|
||||||
current_pos: dict[str, float],
|
current_pos: dict[str, float],
|
||||||
) -> dict[str, float]:
|
) -> dict[str, float]:
|
||||||
"""
|
"""Convert relative actions back to absolute (dict version for inference)."""
|
||||||
Convert relative actions back to absolute for robot execution (dict version).
|
return {k: v + current_pos.get(k, 0.0) for k, v in relative_actions.items()}
|
||||||
|
|
||||||
Args:
|
|
||||||
relative_actions: Predicted relative actions as dict (e.g., {"joint_1.pos": 0.1, ...})
|
|
||||||
current_pos: Current robot position as dict (e.g., {"joint_1.pos": 45.0, ...})
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Absolute actions dict for robot execution
|
|
||||||
"""
|
|
||||||
absolute_actions = {}
|
|
||||||
for key, rel_value in relative_actions.items():
|
|
||||||
if key in current_pos:
|
|
||||||
absolute_actions[key] = rel_value + current_pos[key]
|
|
||||||
else:
|
|
||||||
# Key not in current position, keep as-is (shouldn't happen normally)
|
|
||||||
absolute_actions[key] = rel_value
|
|
||||||
return absolute_actions
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user