From 01c7c7407029066a3be2b6c99122816f02a34296 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Fri, 2 Jan 2026 15:57:39 +0100 Subject: [PATCH 1/8] Add relative position UMI style --- examples/openarms/evaluate_relative.py | 319 +++++++++++++++++++++++++ src/lerobot/configs/train.py | 5 + src/lerobot/scripts/lerobot_train.py | 10 + src/lerobot/utils/relative_actions.py | 172 +++++++++++++ 4 files changed, 506 insertions(+) create mode 100644 examples/openarms/evaluate_relative.py create mode 100644 src/lerobot/utils/relative_actions.py diff --git a/examples/openarms/evaluate_relative.py b/examples/openarms/evaluate_relative.py new file mode 100644 index 000000000..c9c90355e --- /dev/null +++ b/examples/openarms/evaluate_relative.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +OpenArms Policy Evaluation with UMI-style Relative Actions + +Evaluates a policy trained with relative actions (use_relative_actions=True). +During inference, the policy outputs relative deltas which are added to the +current robot position to get absolute targets. + +This follows the UMI paper's "relative trajectory" action representation: + action_absolute[t] = action_relative[t] + current_position + +Example usage: + python examples/openarms/evaluate_relative.py +""" + +import time +from pathlib import Path + +import torch + +from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig +from lerobot.configs.policies import PreTrainedConfig +from lerobot.datasets.lerobot_dataset import LeRobotDataset +from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features +from lerobot.datasets.utils import build_dataset_frame, combine_feature_dicts +from lerobot.policies.factory import make_policy, make_pre_post_processors +from lerobot.policies.utils import predict_action +from lerobot.processor import make_default_processors +from lerobot.processor.core import RobotAction +from lerobot.robots.openarms.config_openarms_follower import OpenArmsFollowerConfig +from lerobot.robots.openarms.openarms_follower import OpenArmsFollower +from lerobot.utils.constants import ACTION, OBS_STR +from lerobot.utils.control_utils import init_keyboard_listener, precise_sleep +from lerobot.utils.device_utils import get_safe_torch_device +from lerobot.utils.relative_actions import convert_from_relative_actions_dict, convert_state_to_relative +from lerobot.utils.utils import log_say +from lerobot.utils.visualization_utils import init_rerun, log_rerun_data + + +# Configuration - Update these for your setup +HF_MODEL_ID = "your-org/your-relative-policy" # Policy trained with use_relative_actions=True +HF_EVAL_DATASET_ID = "your-org/your-eval-dataset" +TASK_DESCRIPTION = "your task description" + +NUM_EPISODES = 1 +FPS = 30 +EPISODE_TIME_SEC = 300 + +# Robot CAN interfaces +FOLLOWER_LEFT_PORT = "can0" +FOLLOWER_RIGHT_PORT = "can1" + +# Camera configuration +CAMERA_CONFIG = { + "left_wrist": OpenCVCameraConfig(index_or_path="/dev/video5", width=640, height=480, fps=FPS), + "right_wrist": OpenCVCameraConfig(index_or_path="/dev/video1", width=640, height=480, fps=FPS), + "base": OpenCVCameraConfig(index_or_path="/dev/video3", width=640, height=480, fps=FPS), +} + + +def make_robot_action(action_values: dict, features: dict) -> RobotAction: + """Convert action values to robot action dict, filtering by features.""" + robot_action = {} + for key in features: + if key.startswith(ACTION + "."): + action_key = key.removeprefix(ACTION + ".") + if action_key in action_values: + robot_action[action_key] = action_values[action_key] + return robot_action + + +def inference_loop_relative( + robot, + policy, + preprocessor, + postprocessor, + dataset, + events, + fps: int, + control_time_s: float, + single_task: str, + display_data: bool = True, + state_key: str = "observation.state", +): + """ + Inference loop for policies trained with UMI-style relative actions and state. + + Key differences from standard inference: + - Observation state is converted to relative (provides velocity info) + - Policy outputs relative deltas (action_relative) + - We add current robot position to get absolute targets: + action_absolute = action_relative + current_position + """ + device = get_safe_torch_device(policy.config.device) + + timestamp = 0 + start_t = time.perf_counter() + + while timestamp < control_time_s: + loop_start = time.perf_counter() + + if events["exit_early"] or events["stop_recording"]: + break + + # Get current robot observation + obs = robot.get_observation() + observation_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR) + + # Get current joint positions (reference for relative conversion) + current_pos = {k: v for k, v in obs.items() if k.endswith(".pos")} + + # Convert observation state to relative (UMI-style) + # This gives velocity-like information to the policy + if state_key in observation_frame: + state_tensor = observation_frame[state_key] + if isinstance(state_tensor, torch.Tensor): + observation_frame[state_key] = convert_state_to_relative(state_tensor) + + # Run policy inference - outputs relative actions + action_values = predict_action( + observation=observation_frame, + policy=policy, + device=device, + preprocessor=preprocessor, + postprocessor=postprocessor, + use_amp=policy.config.use_amp, + task=single_task, + robot_type=robot.robot_type, + ) + + # Convert relative actions to absolute + # action_values contains relative deltas, current_pos has absolute positions + relative_action = make_robot_action(action_values, dataset.features) + absolute_action = convert_from_relative_actions_dict(relative_action, current_pos) + + # Send absolute action to robot + robot.send_action(absolute_action) + + # Record to dataset (store the absolute action that was sent) + if dataset is not None: + action_frame = build_dataset_frame(dataset.features, absolute_action, prefix=ACTION) + frame = {**observation_frame, **action_frame, "task": single_task} + dataset.add_frame(frame) + + if display_data: + log_rerun_data(observation=obs, action=absolute_action) + + dt = time.perf_counter() - loop_start + precise_sleep(1 / fps - dt) + timestamp = time.perf_counter() - start_t + + +def main(): + """Main evaluation function for relative action policies.""" + print("=" * 65) + print(" OpenArms Evaluation - UMI-style Relative Actions") + print("=" * 65) + print(f"\nModel: {HF_MODEL_ID}") + print(f"Evaluation Dataset: {HF_EVAL_DATASET_ID}") + print(f"Task: {TASK_DESCRIPTION}") + print(f"Episodes: {NUM_EPISODES}") + print(f"Episode Duration: {EPISODE_TIME_SEC}s") + print("\nNote: Policy outputs are relative deltas, converted to absolute at inference time") + + # Setup robot + follower_config = OpenArmsFollowerConfig( + port_left=FOLLOWER_LEFT_PORT, + port_right=FOLLOWER_RIGHT_PORT, + can_interface="socketcan", + id="openarms_follower", + disable_torque_on_disconnect=True, + max_relative_target=10.0, + cameras=CAMERA_CONFIG, + ) + + follower = OpenArmsFollower(follower_config) + follower.connect(calibrate=False) + + if not follower.is_connected: + raise RuntimeError("Follower robot failed to connect!") + + # Build processors + teleop_action_processor, robot_action_processor, robot_observation_processor = make_default_processors() + + # Build dataset features + action_features_hw = {k: v for k, v in follower.action_features.items() if k.endswith(".pos")} + + dataset_features = combine_feature_dicts( + aggregate_pipeline_dataset_features( + pipeline=teleop_action_processor, + initial_features=create_initial_features(action=action_features_hw), + use_videos=True, + ), + aggregate_pipeline_dataset_features( + pipeline=robot_observation_processor, + initial_features=create_initial_features(observation=follower.observation_features), + use_videos=True, + ), + ) + + # Check existing dataset + dataset_path = Path.home() / ".cache" / "huggingface" / "lerobot" / HF_EVAL_DATASET_ID + if dataset_path.exists(): + print(f"\nDataset already exists at: {dataset_path}") + choice = input("Continue and append? (y/n): ").strip().lower() + if choice != 'y': + follower.disconnect() + return + + # Create dataset + dataset = LeRobotDataset.create( + repo_id=HF_EVAL_DATASET_ID, + fps=FPS, + features=dataset_features, + robot_type=follower.name, + use_videos=True, + image_writer_processes=0, + image_writer_threads=12, + ) + + # Load policy + policy_config = PreTrainedConfig.from_pretrained(HF_MODEL_ID) + policy_config.pretrained_path = HF_MODEL_ID + policy = make_policy(policy_config, ds_meta=dataset.meta) + + preprocessor, postprocessor = make_pre_post_processors( + policy_cfg=policy.config, + pretrained_path=HF_MODEL_ID, + dataset_stats=dataset.meta.stats, + preprocessor_overrides={ + "device_processor": {"device": str(policy.config.device)} + }, + ) + + # Initialize controls + listener, events = init_keyboard_listener() + init_rerun(session_name="openarms_eval_relative") + episode_idx = 0 + + print("\nControls:") + print(" ESC - Stop recording and save") + print(" → - End current episode") + print(" ← - Re-record episode") + + try: + while episode_idx < NUM_EPISODES and not events["stop_recording"]: + log_say(f"Evaluating episode {episode_idx + 1} of {NUM_EPISODES}") + print(f"\nRunning relative action inference for episode {episode_idx + 1}...") + + # Run inference with relative action conversion + inference_loop_relative( + robot=follower, + policy=policy, + preprocessor=preprocessor, + postprocessor=postprocessor, + dataset=dataset, + events=events, + fps=FPS, + control_time_s=EPISODE_TIME_SEC, + single_task=TASK_DESCRIPTION, + display_data=True, + ) + + # Handle re-recording + if events.get("rerecord_episode", False): + log_say("Re-recording episode") + events["rerecord_episode"] = False + events["exit_early"] = False + dataset.clear_episode_buffer() + continue + + # Save episode + if dataset.episode_buffer is not None and dataset.episode_buffer.get("size", 0) > 0: + print(f"Saving episode {episode_idx + 1} ({dataset.episode_buffer['size']} frames)...") + dataset.save_episode() + episode_idx += 1 + + events["exit_early"] = False + + # Wait for manual reset between episodes + if not events["stop_recording"] and episode_idx < NUM_EPISODES: + log_say("Waiting for manual reset") + input("Press ENTER when ready for next episode...") + + print(f"\nEvaluation complete! {episode_idx} episodes recorded") + log_say("Evaluation complete", blocking=True) + + except KeyboardInterrupt: + print("\n\nEvaluation interrupted by user") + + finally: + follower.disconnect() + + if listener is not None: + listener.stop() + + dataset.finalize() + print("\nUploading to Hugging Face Hub...") + dataset.push_to_hub(private=True) + + +if __name__ == "__main__": + main() + diff --git a/src/lerobot/configs/train.py b/src/lerobot/configs/train.py index cee9dfdf9..030dec027 100644 --- a/src/lerobot/configs/train.py +++ b/src/lerobot/configs/train.py @@ -66,6 +66,11 @@ class TrainPipelineConfig(HubMixin): eval: EvalConfig = field(default_factory=EvalConfig) wandb: WandBConfig = field(default_factory=WandBConfig) + # UMI-style relative actions: convert absolute joint positions to chunk-relative deltas + # During training, actions become relative to current position at chunk start + # During inference, predicted deltas are added to current robot position + use_relative_actions: bool = False + # RA-BC (Reward-Aligned Behavior Cloning) parameters use_rabc: bool = False # Enable reward-weighted training rabc_progress_path: str | None = None # Path to precomputed SARM progress parquet file diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py index 6cf733442..c6724c11d 100644 --- a/src/lerobot/scripts/lerobot_train.py +++ b/src/lerobot/scripts/lerobot_train.py @@ -46,6 +46,7 @@ from lerobot.utils.train_utils import ( save_checkpoint, update_last_checkpoint, ) +from lerobot.utils.relative_actions import convert_to_relative_actions from lerobot.utils.utils import ( format_big_number, has_method, @@ -298,6 +299,10 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None): device=device, ) + if cfg.use_relative_actions and is_main_process: + logging.info(colored("UMI-style relative actions enabled", "cyan", attrs=["bold"])) + logging.info("Actions will be converted to chunk-relative deltas during training") + step = 0 # number of policy updates (forward + backward + optim) if cfg.resume: @@ -385,6 +390,11 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None): start_time = time.perf_counter() batch = next(dl_iter) batch = preprocessor(batch) + + # Convert to UMI-style relative actions if enabled + if cfg.use_relative_actions: + batch = convert_to_relative_actions(batch) + train_tracker.dataloading_s = time.perf_counter() - start_time train_tracker, output_dict = update_policy( diff --git a/src/lerobot/utils/relative_actions.py b/src/lerobot/utils/relative_actions.py new file mode 100644 index 000000000..5862498e9 --- /dev/null +++ b/src/lerobot/utils/relative_actions.py @@ -0,0 +1,172 @@ +""" +UMI-style relative action and state utilities. + +Implements chunk-relative representation from the UMI paper: +"Universal Manipulation Interface: In-The-Wild Robot Teaching Without In-The-Wild Robots" + +For each inference step: +- Actions are relative to current position at chunk start (t0) +- State history is relative to current position (provides velocity info) + +Training: + action_relative[t] = action_absolute[t] - position_at_t0 + state_relative[t] = state_absolute[t] - current_position + +Inference: + action_absolute[t] = action_relative[t] + current_position +""" + +import torch + + +def convert_to_relative(batch: dict, state_key: str = "observation.state") -> dict: + """ + Convert absolute actions AND state to chunk-relative (UMI-style) for training. + + Following UMI paper PD2.1 and PD2.2: + - Actions become relative to current position + - State history becomes relative to current position (provides velocity info) + + Args: + batch: Training batch containing: + - "action": (batch_size, chunk_size, action_dim) absolute action targets + - state_key: (batch_size, [n_obs_steps,] state_dim) observation state + state_key: Key for the observation state in the batch + + Returns: + Modified batch with relative actions and state + """ + if "action" not in batch or state_key not in batch: + return batch + + action = batch["action"] + state = batch[state_key] + + batch = batch.copy() + + # Get current position (reference for relative conversion) + # State shape: (batch, state_dim) or (batch, n_obs_steps, state_dim) + if state.dim() == 3: + current_pos = state[:, -1, :] # (batch, state_dim) + + # Convert state history to relative (each timestep relative to current) + # This gives velocity-like information to the policy + relative_state = state.clone() + relative_state = state - current_pos[:, None, :] + batch[state_key] = relative_state + else: + current_pos = state # (batch, state_dim) + # Single timestep state becomes zeros (relative to itself) + batch[state_key] = torch.zeros_like(state) + + # Convert actions to relative + action_dim = action.shape[-1] + state_dim = current_pos.shape[-1] + min_dim = min(action_dim, state_dim) + + relative_action = action.clone() + relative_action[..., :min_dim] = action[..., :min_dim] - current_pos[:, None, :min_dim] + batch["action"] = relative_action + + return batch + + +# Alias for backward compatibility +convert_to_relative_actions = convert_to_relative + + +def convert_state_to_relative( + state: torch.Tensor, + current_pos: torch.Tensor | None = None, +) -> torch.Tensor: + """ + Convert absolute state to relative for inference. + + Args: + state: State tensor, shape (state_dim,) or (n_obs_steps, state_dim) + current_pos: Current position to use as reference. If None, uses last timestep of state. + + Returns: + Relative state tensor + """ + if current_pos is None: + if state.dim() >= 2: + current_pos = state[-1, :] # Last timestep + else: + current_pos = state + + if state.dim() == 1: + return torch.zeros_like(state) + elif state.dim() == 2: + # (n_obs_steps, state_dim) + return state - current_pos[None, :] + else: + # (batch, n_obs_steps, state_dim) + return state - current_pos[:, None, :] + + +def convert_from_relative_actions( + relative_actions: torch.Tensor, + current_pos: torch.Tensor | dict[str, float], +) -> torch.Tensor: + """ + Convert relative actions back to absolute for robot execution. + + Args: + relative_actions: Predicted relative actions, shape (chunk_size, action_dim) + or (batch, chunk_size, action_dim) + current_pos: Current robot position as tensor (action_dim,) or dict of joint positions + + Returns: + Absolute actions for robot execution + """ + if isinstance(current_pos, dict): + # Convert dict to tensor, maintaining key order + current_pos = torch.tensor(list(current_pos.values()), dtype=relative_actions.dtype) + + # Ensure current_pos is on same device + current_pos = current_pos.to(relative_actions.device) + + # Match dimensions + action_dim = relative_actions.shape[-1] + pos_dim = current_pos.shape[-1] if current_pos.dim() > 0 else len(current_pos) + min_dim = min(action_dim, pos_dim) + + absolute_actions = relative_actions.clone() + + if relative_actions.dim() == 2: + # Shape: (chunk_size, action_dim) + absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[:min_dim] + elif relative_actions.dim() == 3: + # Shape: (batch, chunk_size, action_dim) + absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[None, None, :min_dim] + else: + # Shape: (action_dim,) + absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[:min_dim] + + return absolute_actions + + +def convert_from_relative_actions_dict( + relative_actions: dict[str, float], + current_pos: dict[str, float], +) -> dict[str, float]: + """ + Convert relative actions back to absolute for robot execution (dict version). + + Args: + relative_actions: Predicted relative actions as dict (e.g., {"joint_1.pos": 0.1, ...}) + current_pos: Current robot position as dict (e.g., {"joint_1.pos": 45.0, ...}) + + Returns: + Absolute actions dict for robot execution + """ + absolute_actions = {} + for key, rel_value in relative_actions.items(): + if key in current_pos: + absolute_actions[key] = rel_value + current_pos[key] + else: + # Key not in current position, keep as-is (shouldn't happen normally) + absolute_actions[key] = rel_value + return absolute_actions + From 036795559033793d2deaef1566c648e82b6540fd Mon Sep 17 00:00:00 2001 From: Pepijn Date: Fri, 2 Jan 2026 18:58:47 +0100 Subject: [PATCH 2/8] add code for relative actions and state and unifing tasks --- examples/openarms/evaluate_relative.py | 182 ++++++++++---------- scripts/unify_tasks.py | 59 +++++++ src/lerobot/configs/train.py | 12 +- src/lerobot/scripts/lerobot_train.py | 38 ++++- src/lerobot/utils/relative_actions.py | 228 +++++++++++-------------- 5 files changed, 295 insertions(+), 224 deletions(-) create mode 100644 scripts/unify_tasks.py diff --git a/examples/openarms/evaluate_relative.py b/examples/openarms/evaluate_relative.py index c9c90355e..a3e29726c 100644 --- a/examples/openarms/evaluate_relative.py +++ b/examples/openarms/evaluate_relative.py @@ -1,28 +1,14 @@ #!/usr/bin/env python - -# Copyright 2025 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - """ -OpenArms Policy Evaluation with UMI-style Relative Actions +OpenArms Policy Evaluation with Relative Actions -Evaluates a policy trained with relative actions (use_relative_actions=True). -During inference, the policy outputs relative deltas which are added to the -current robot position to get absolute targets. - -This follows the UMI paper's "relative trajectory" action representation: - action_absolute[t] = action_relative[t] + current_position +Two modes supported (based on training config): + Mode 1: Relative actions only (use_relative_state=False) + - Policy outputs relative action deltas + - State input is absolute + Mode 2: Relative actions + state (use_relative_state=True) + - Policy outputs relative action deltas + - State input is also converted to relative Example usage: python examples/openarms/evaluate_relative.py @@ -35,6 +21,7 @@ import torch from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.configs.policies import PreTrainedConfig +from lerobot.configs.train import TrainPipelineConfig from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features from lerobot.datasets.utils import build_dataset_frame, combine_feature_dicts @@ -47,13 +34,17 @@ from lerobot.robots.openarms.openarms_follower import OpenArmsFollower from lerobot.utils.constants import ACTION, OBS_STR from lerobot.utils.control_utils import init_keyboard_listener, precise_sleep from lerobot.utils.device_utils import get_safe_torch_device -from lerobot.utils.relative_actions import convert_from_relative_actions_dict, convert_state_to_relative +from lerobot.utils.relative_actions import ( + convert_from_relative_actions_dict, + convert_state_to_relative, + PerTimestepNormalizer, +) from lerobot.utils.utils import log_say from lerobot.utils.visualization_utils import init_rerun, log_rerun_data -# Configuration - Update these for your setup -HF_MODEL_ID = "your-org/your-relative-policy" # Policy trained with use_relative_actions=True +# Configuration +HF_MODEL_ID = "your-org/your-relative-policy" HF_EVAL_DATASET_ID = "your-org/your-eval-dataset" TASK_DESCRIPTION = "your task description" @@ -61,11 +52,9 @@ NUM_EPISODES = 1 FPS = 30 EPISODE_TIME_SEC = 300 -# Robot CAN interfaces FOLLOWER_LEFT_PORT = "can0" FOLLOWER_RIGHT_PORT = "can1" -# Camera configuration CAMERA_CONFIG = { "left_wrist": OpenCVCameraConfig(index_or_path="/dev/video5", width=640, height=480, fps=FPS), "right_wrist": OpenCVCameraConfig(index_or_path="/dev/video1", width=640, height=480, fps=FPS), @@ -74,7 +63,6 @@ CAMERA_CONFIG = { def make_robot_action(action_values: dict, features: dict) -> RobotAction: - """Convert action values to robot action dict, filtering by features.""" robot_action = {} for key in features: if key.startswith(ACTION + "."): @@ -84,6 +72,40 @@ def make_robot_action(action_values: dict, features: dict) -> RobotAction: return robot_action +def load_relative_config(model_path: Path | str) -> tuple[PerTimestepNormalizer | None, bool]: + """Load normalizer and relative_state setting from checkpoint.""" + model_path = Path(model_path) if isinstance(model_path, str) else model_path + normalizer = None + use_relative_state = False + + # Try local path first + if model_path.exists(): + stats_path = model_path / "relative_stats.pt" + if stats_path.exists(): + normalizer = PerTimestepNormalizer.load(stats_path) + print(f"Loaded per-timestep stats from: {stats_path}") + + config_path = model_path / "train_config.json" + if config_path.exists(): + cfg = TrainPipelineConfig.from_pretrained(model_path) + use_relative_state = getattr(cfg, "use_relative_state", False) + else: + # Try hub + try: + from huggingface_hub import hf_hub_download + stats_file = hf_hub_download(repo_id=str(model_path), filename="relative_stats.pt") + normalizer = PerTimestepNormalizer.load(stats_file) + print("Loaded per-timestep stats from hub") + + config_file = hf_hub_download(repo_id=str(model_path), filename="train_config.json") + cfg = TrainPipelineConfig.from_pretrained(Path(config_file).parent) + use_relative_state = getattr(cfg, "use_relative_state", False) + except Exception as e: + print(f"Warning: Could not load relative config: {e}") + + return normalizer, use_relative_state + + def inference_loop_relative( robot, policy, @@ -96,18 +118,15 @@ def inference_loop_relative( single_task: str, display_data: bool = True, state_key: str = "observation.state", + relative_normalizer: PerTimestepNormalizer | None = None, + use_relative_state: bool = False, ): """ - Inference loop for policies trained with UMI-style relative actions and state. + Inference loop for relative action policies. - Key differences from standard inference: - - Observation state is converted to relative (provides velocity info) - - Policy outputs relative deltas (action_relative) - - We add current robot position to get absolute targets: - action_absolute = action_relative + current_position + If use_relative_state=True, also converts observation state to relative. """ device = get_safe_torch_device(policy.config.device) - timestamp = 0 start_t = time.perf_counter() @@ -117,21 +136,17 @@ def inference_loop_relative( if events["exit_early"] or events["stop_recording"]: break - # Get current robot observation obs = robot.get_observation() observation_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR) - - # Get current joint positions (reference for relative conversion) current_pos = {k: v for k, v in obs.items() if k.endswith(".pos")} - # Convert observation state to relative (UMI-style) - # This gives velocity-like information to the policy - if state_key in observation_frame: + # Convert state to relative if using full UMI mode + if use_relative_state and state_key in observation_frame: state_tensor = observation_frame[state_key] if isinstance(state_tensor, torch.Tensor): observation_frame[state_key] = convert_state_to_relative(state_tensor) - # Run policy inference - outputs relative actions + # Policy inference (outputs normalized relative actions) action_values = predict_action( observation=observation_frame, policy=policy, @@ -143,15 +158,21 @@ def inference_loop_relative( robot_type=robot.robot_type, ) - # Convert relative actions to absolute - # action_values contains relative deltas, current_pos has absolute positions + # Unnormalize actions + if relative_normalizer is not None: + action_keys = [k for k in action_values.keys() if not k.startswith("task")] + action_tensor = torch.tensor([[action_values[k] for k in action_keys]]) + action_tensor = action_tensor.unsqueeze(1) + action_unnorm = relative_normalizer.unnormalize(action_tensor) + for i, k in enumerate(action_keys): + action_values[k] = action_unnorm[0, 0, i].item() + + # Convert to absolute relative_action = make_robot_action(action_values, dataset.features) absolute_action = convert_from_relative_actions_dict(relative_action, current_pos) - # Send absolute action to robot robot.send_action(absolute_action) - # Record to dataset (store the absolute action that was sent) if dataset is not None: action_frame = build_dataset_frame(dataset.features, absolute_action, prefix=ACTION) frame = {**observation_frame, **action_frame, "task": single_task} @@ -166,16 +187,17 @@ def inference_loop_relative( def main(): - """Main evaluation function for relative action policies.""" - print("=" * 65) - print(" OpenArms Evaluation - UMI-style Relative Actions") - print("=" * 65) + print("=" * 60) + print(" OpenArms Evaluation - Relative Actions") + print("=" * 60) print(f"\nModel: {HF_MODEL_ID}") - print(f"Evaluation Dataset: {HF_EVAL_DATASET_ID}") - print(f"Task: {TASK_DESCRIPTION}") - print(f"Episodes: {NUM_EPISODES}") - print(f"Episode Duration: {EPISODE_TIME_SEC}s") - print("\nNote: Policy outputs are relative deltas, converted to absolute at inference time") + print(f"Dataset: {HF_EVAL_DATASET_ID}") + print(f"Episodes: {NUM_EPISODES}, Duration: {EPISODE_TIME_SEC}s") + + # Load relative action config + relative_normalizer, use_relative_state = load_relative_config(HF_MODEL_ID) + mode = "actions + state" if use_relative_state else "actions only" + print(f"Mode: relative {mode}") # Setup robot follower_config = OpenArmsFollowerConfig( @@ -192,12 +214,9 @@ def main(): follower.connect(calibrate=False) if not follower.is_connected: - raise RuntimeError("Follower robot failed to connect!") + raise RuntimeError("Robot failed to connect!") - # Build processors teleop_action_processor, robot_action_processor, robot_observation_processor = make_default_processors() - - # Build dataset features action_features_hw = {k: v for k, v in follower.action_features.items() if k.endswith(".pos")} dataset_features = combine_feature_dicts( @@ -213,16 +232,13 @@ def main(): ), ) - # Check existing dataset dataset_path = Path.home() / ".cache" / "huggingface" / "lerobot" / HF_EVAL_DATASET_ID if dataset_path.exists(): - print(f"\nDataset already exists at: {dataset_path}") - choice = input("Continue and append? (y/n): ").strip().lower() - if choice != 'y': + print(f"\nDataset exists at: {dataset_path}") + if input("Continue? (y/n): ").strip().lower() != 'y': follower.disconnect() return - # Create dataset dataset = LeRobotDataset.create( repo_id=HF_EVAL_DATASET_ID, fps=FPS, @@ -233,7 +249,6 @@ def main(): image_writer_threads=12, ) - # Load policy policy_config = PreTrainedConfig.from_pretrained(HF_MODEL_ID) policy_config.pretrained_path = HF_MODEL_ID policy = make_policy(policy_config, ds_meta=dataset.meta) @@ -242,27 +257,19 @@ def main(): policy_cfg=policy.config, pretrained_path=HF_MODEL_ID, dataset_stats=dataset.meta.stats, - preprocessor_overrides={ - "device_processor": {"device": str(policy.config.device)} - }, + preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}}, ) - # Initialize controls listener, events = init_keyboard_listener() init_rerun(session_name="openarms_eval_relative") episode_idx = 0 - print("\nControls:") - print(" ESC - Stop recording and save") - print(" → - End current episode") - print(" ← - Re-record episode") + print("\nControls: ESC=stop, →=next episode, ←=rerecord") try: while episode_idx < NUM_EPISODES and not events["stop_recording"]: - log_say(f"Evaluating episode {episode_idx + 1} of {NUM_EPISODES}") - print(f"\nRunning relative action inference for episode {episode_idx + 1}...") + log_say(f"Episode {episode_idx + 1} of {NUM_EPISODES}") - # Run inference with relative action conversion inference_loop_relative( robot=follower, policy=policy, @@ -274,46 +281,41 @@ def main(): control_time_s=EPISODE_TIME_SEC, single_task=TASK_DESCRIPTION, display_data=True, + relative_normalizer=relative_normalizer, + use_relative_state=use_relative_state, ) - # Handle re-recording if events.get("rerecord_episode", False): - log_say("Re-recording episode") + log_say("Re-recording") events["rerecord_episode"] = False events["exit_early"] = False dataset.clear_episode_buffer() continue - # Save episode if dataset.episode_buffer is not None and dataset.episode_buffer.get("size", 0) > 0: - print(f"Saving episode {episode_idx + 1} ({dataset.episode_buffer['size']} frames)...") + print(f"Saving episode {episode_idx + 1}...") dataset.save_episode() episode_idx += 1 events["exit_early"] = False - # Wait for manual reset between episodes if not events["stop_recording"] and episode_idx < NUM_EPISODES: - log_say("Waiting for manual reset") - input("Press ENTER when ready for next episode...") + input("Press ENTER for next episode...") - print(f"\nEvaluation complete! {episode_idx} episodes recorded") - log_say("Evaluation complete", blocking=True) + print(f"\nDone! {episode_idx} episodes recorded") + log_say("Complete", blocking=True) except KeyboardInterrupt: - print("\n\nEvaluation interrupted by user") + print("\n\nInterrupted") finally: follower.disconnect() - if listener is not None: listener.stop() - dataset.finalize() - print("\nUploading to Hugging Face Hub...") + print("Uploading to Hub...") dataset.push_to_hub(private=True) if __name__ == "__main__": main() - diff --git a/scripts/unify_tasks.py b/scripts/unify_tasks.py new file mode 100644 index 000000000..c457b7b31 --- /dev/null +++ b/scripts/unify_tasks.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +"""Unify all tasks in a dataset to a single task.""" + +import argparse +import json +from pathlib import Path + +import pandas as pd + +from lerobot.datasets.lerobot_dataset import LeRobotDataset +from lerobot.datasets.utils import write_tasks + + +def unify_tasks(repo_id: str, new_task: str): + """Set all episodes to use a single task.""" + print(f"Loading dataset: {repo_id}") + dataset = LeRobotDataset(repo_id) + root = dataset.root + + print(f"Current tasks: {list(dataset.meta.tasks['task']) if dataset.meta.tasks is not None else []}") + + # 1. Update tasks.parquet to have only one task + tasks_df = pd.DataFrame({"task": [new_task]}) + write_tasks(tasks_df, root) + print(f"Set single task: '{new_task}'") + + # 2. Update all data parquet files to set task_index=0 + data_dir = root / "data" + parquet_files = sorted(data_dir.glob("*/*.parquet")) + for parquet_path in parquet_files: + df = pd.read_parquet(parquet_path) + df["task_index"] = 0 + df.to_parquet(parquet_path) + print(f"Updated: {parquet_path.relative_to(root)}") + + # 3. Update info.json + info_path = root / "info.json" + with open(info_path) as f: + info = json.load(f) + info["total_tasks"] = 1 + with open(info_path, "w") as f: + json.dump(info, f, indent=2) + + print(f"\nDone! All {dataset.meta.total_episodes} episodes now use task: '{new_task}'") + print(f"\nTo push: huggingface-cli upload {repo_id} {root} --repo-type dataset") + + +def main(): + parser = argparse.ArgumentParser(description="Unify all tasks in a dataset to a single task") + parser.add_argument("--repo_id", type=str, required=True, help="Dataset repo_id") + parser.add_argument("--task", type=str, required=True, help="New task description") + args = parser.parse_args() + + unify_tasks(args.repo_id, args.task) + + +if __name__ == "__main__": + main() + diff --git a/src/lerobot/configs/train.py b/src/lerobot/configs/train.py index 030dec027..384f5041c 100644 --- a/src/lerobot/configs/train.py +++ b/src/lerobot/configs/train.py @@ -66,10 +66,16 @@ class TrainPipelineConfig(HubMixin): eval: EvalConfig = field(default_factory=EvalConfig) wandb: WandBConfig = field(default_factory=WandBConfig) - # UMI-style relative actions: convert absolute joint positions to chunk-relative deltas - # During training, actions become relative to current position at chunk start - # During inference, predicted deltas are added to current robot position + # UMI-style relative actions with per-timestep normalization + # Mode 1: use_relative_actions=True, use_relative_state=False + # - Actions: relative to current position + per-timestep normalized + # - State: absolute (unchanged) + # Mode 2: use_relative_actions=True, use_relative_state=True (full UMI) + # - Actions: relative to current position + per-timestep normalized + # - State: relative to current position (provides velocity info) + # Stats are computed automatically from first 1000 batches at training start use_relative_actions: bool = False + use_relative_state: bool = False # RA-BC (Reward-Aligned Behavior Cloning) parameters use_rabc: bool = False # Enable reward-weighted training diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py index c6724c11d..9117e64c5 100644 --- a/src/lerobot/scripts/lerobot_train.py +++ b/src/lerobot/scripts/lerobot_train.py @@ -46,7 +46,11 @@ from lerobot.utils.train_utils import ( save_checkpoint, update_last_checkpoint, ) -from lerobot.utils.relative_actions import convert_to_relative_actions +from lerobot.utils.relative_actions import ( + convert_to_relative_actions, + compute_relative_action_stats, + PerTimestepNormalizer, +) from lerobot.utils.utils import ( format_big_number, has_method, @@ -299,9 +303,26 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None): device=device, ) - if cfg.use_relative_actions and is_main_process: - logging.info(colored("UMI-style relative actions enabled", "cyan", attrs=["bold"])) - logging.info("Actions will be converted to chunk-relative deltas during training") + # Compute per-timestep normalizer for relative actions + relative_normalizer = None + if cfg.use_relative_actions: + mode = "actions + state" if cfg.use_relative_state else "actions only" + if is_main_process: + logging.info(colored(f"Relative mode: {mode}", "cyan", attrs=["bold"])) + logging.info("Computing per-timestep stats from dataset (first 1000 batches)...") + temp_loader = torch.utils.data.DataLoader( + dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=0 + ) + mean, std = compute_relative_action_stats(temp_loader, num_batches=1000) + relative_normalizer = PerTimestepNormalizer(mean, std) + stats_path = cfg.output_dir / "relative_stats.pt" + relative_normalizer.save(stats_path) + logging.info(f"Saved stats to: {stats_path}") + + accelerator.wait_for_everyone() + + if not is_main_process: + relative_normalizer = PerTimestepNormalizer.load(cfg.output_dir / "relative_stats.pt") step = 0 # number of policy updates (forward + backward + optim) @@ -391,9 +412,11 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None): batch = next(dl_iter) batch = preprocessor(batch) - # Convert to UMI-style relative actions if enabled + # Convert to relative actions (and optionally state) if enabled if cfg.use_relative_actions: - batch = convert_to_relative_actions(batch) + batch = convert_to_relative_actions(batch, convert_state=cfg.use_relative_state) + if relative_normalizer is not None: + batch["action"] = relative_normalizer.normalize(batch["action"]) train_tracker.dataloading_s = time.perf_counter() - start_time @@ -449,6 +472,9 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None): preprocessor=preprocessor, postprocessor=postprocessor, ) + # Save relative action stats with checkpoint + if relative_normalizer is not None: + relative_normalizer.save(checkpoint_dir / "relative_stats.pt") update_last_checkpoint(checkpoint_dir) if wandb_logger: wandb_logger.log_policy(checkpoint_dir) diff --git a/src/lerobot/utils/relative_actions.py b/src/lerobot/utils/relative_actions.py index 5862498e9..ee58741b9 100644 --- a/src/lerobot/utils/relative_actions.py +++ b/src/lerobot/utils/relative_actions.py @@ -1,172 +1,150 @@ """ -UMI-style relative action and state utilities. +UMI-style relative actions with per-timestep normalization. -Implements chunk-relative representation from the UMI paper: -"Universal Manipulation Interface: In-The-Wild Robot Teaching Without In-The-Wild Robots" +Two modes supported: + Mode 1: Relative actions only (use_relative_state=False) + - Actions converted to relative, state stays absolute + Mode 2: Relative actions + state (use_relative_state=True, full UMI) + - Both actions and state converted to relative -For each inference step: -- Actions are relative to current position at chunk start (t0) -- State history is relative to current position (provides velocity info) - -Training: - action_relative[t] = action_absolute[t] - position_at_t0 - state_relative[t] = state_absolute[t] - current_position - -Inference: - action_absolute[t] = action_relative[t] + current_position +Per-timestep normalization (TRI LBM / BEHAVIOR style): + Training: action_norm[t] = (action_rel[t] - mean[t]) / std[t] + Inference: action_rel[t] = action_norm[t] * std[t] + mean[t] """ import torch +from pathlib import Path -def convert_to_relative(batch: dict, state_key: str = "observation.state") -> dict: - """ - Convert absolute actions AND state to chunk-relative (UMI-style) for training. +class PerTimestepNormalizer: + """Per-timestep normalization using precomputed dataset statistics.""" - Following UMI paper PD2.1 and PD2.2: - - Actions become relative to current position - - State history becomes relative to current position (provides velocity info) + def __init__(self, mean: torch.Tensor, std: torch.Tensor, eps: float = 1e-8): + self.mean = mean + self.std = std + self.eps = eps + + def normalize(self, x: torch.Tensor) -> torch.Tensor: + mean = self.mean.to(x.device, x.dtype) + std = self.std.to(x.device, x.dtype) + if x.dim() == 3 and mean.dim() == 2: + mean, std = mean.unsqueeze(0), std.unsqueeze(0) + return (x - mean) / (std + self.eps) + + def unnormalize(self, x: torch.Tensor) -> torch.Tensor: + mean = self.mean.to(x.device, x.dtype) + std = self.std.to(x.device, x.dtype) + if x.dim() == 3 and mean.dim() == 2: + mean, std = mean.unsqueeze(0), std.unsqueeze(0) + return x * (std + self.eps) + mean + + def save(self, path: Path | str): + path = Path(path) + path.parent.mkdir(parents=True, exist_ok=True) + torch.save({"mean": self.mean.cpu(), "std": self.std.cpu(), "eps": self.eps}, path) + + @classmethod + def load(cls, path: Path | str) -> "PerTimestepNormalizer": + data = torch.load(path, weights_only=True, map_location="cpu") + return cls(data["mean"], data["std"], data.get("eps", 1e-8)) + + +def compute_relative_action_stats( + dataloader, + state_key: str = "observation.state", + num_batches: int | None = None, +) -> tuple[torch.Tensor, torch.Tensor]: + """Compute per-timestep mean/std from relative actions.""" + all_rel = [] + for i, batch in enumerate(dataloader): + if num_batches is not None and i >= num_batches: + break + action, state = batch["action"], batch[state_key] + current_pos = state[:, -1, :] if state.dim() == 3 else state + min_dim = min(action.shape[-1], current_pos.shape[-1]) + rel = action.clone() + rel[..., :min_dim] -= current_pos[:, None, :min_dim] + all_rel.append(rel) + + all_rel = torch.cat(all_rel, dim=0) + return all_rel.mean(dim=0), all_rel.std(dim=0).clamp(min=1e-6) + + +def convert_to_relative( + batch: dict, + state_key: str = "observation.state", + convert_state: bool = True, +) -> dict: + """ + Convert actions (and optionally state) to relative. Args: - batch: Training batch containing: - - "action": (batch_size, chunk_size, action_dim) absolute action targets - - state_key: (batch_size, [n_obs_steps,] state_dim) observation state - state_key: Key for the observation state in the batch - - Returns: - Modified batch with relative actions and state + batch: Training batch with "action" and state_key + state_key: Key for observation state + convert_state: If True, also convert state to relative (full UMI mode) """ if "action" not in batch or state_key not in batch: return batch action = batch["action"] state = batch[state_key] - batch = batch.copy() - # Get current position (reference for relative conversion) - # State shape: (batch, state_dim) or (batch, n_obs_steps, state_dim) - if state.dim() == 3: - current_pos = state[:, -1, :] # (batch, state_dim) - - # Convert state history to relative (each timestep relative to current) - # This gives velocity-like information to the policy - relative_state = state.clone() - relative_state = state - current_pos[:, None, :] - batch[state_key] = relative_state - else: - current_pos = state # (batch, state_dim) - # Single timestep state becomes zeros (relative to itself) - batch[state_key] = torch.zeros_like(state) + # Get current position as reference + current_pos = state[:, -1, :] if state.dim() == 3 else state + + # Convert state if requested + if convert_state: + if state.dim() == 3: + batch[state_key] = state - current_pos[:, None, :] + else: + batch[state_key] = torch.zeros_like(state) # Convert actions to relative - action_dim = action.shape[-1] - state_dim = current_pos.shape[-1] - min_dim = min(action_dim, state_dim) - - relative_action = action.clone() - relative_action[..., :min_dim] = action[..., :min_dim] - current_pos[:, None, :min_dim] - batch["action"] = relative_action + min_dim = min(action.shape[-1], current_pos.shape[-1]) + rel_action = action.clone() + rel_action[..., :min_dim] -= current_pos[:, None, :min_dim] + batch["action"] = rel_action return batch -# Alias for backward compatibility +# Backward compatibility alias convert_to_relative_actions = convert_to_relative -def convert_state_to_relative( - state: torch.Tensor, - current_pos: torch.Tensor | None = None, -) -> torch.Tensor: - """ - Convert absolute state to relative for inference. - - Args: - state: State tensor, shape (state_dim,) or (n_obs_steps, state_dim) - current_pos: Current position to use as reference. If None, uses last timestep of state. - - Returns: - Relative state tensor - """ - if current_pos is None: - if state.dim() >= 2: - current_pos = state[-1, :] # Last timestep - else: - current_pos = state - +def convert_state_to_relative(state: torch.Tensor) -> torch.Tensor: + """Convert state to relative (for inference with use_relative_state=True).""" if state.dim() == 1: return torch.zeros_like(state) - elif state.dim() == 2: - # (n_obs_steps, state_dim) + current_pos = state[-1, :] if state.dim() == 2 else state[:, -1, :] + if state.dim() == 2: return state - current_pos[None, :] - else: - # (batch, n_obs_steps, state_dim) - return state - current_pos[:, None, :] + return state - current_pos[:, None, :] def convert_from_relative_actions( relative_actions: torch.Tensor, - current_pos: torch.Tensor | dict[str, float], + current_pos: torch.Tensor, ) -> torch.Tensor: - """ - Convert relative actions back to absolute for robot execution. - - Args: - relative_actions: Predicted relative actions, shape (chunk_size, action_dim) - or (batch, chunk_size, action_dim) - current_pos: Current robot position as tensor (action_dim,) or dict of joint positions - - Returns: - Absolute actions for robot execution - """ - if isinstance(current_pos, dict): - # Convert dict to tensor, maintaining key order - current_pos = torch.tensor(list(current_pos.values()), dtype=relative_actions.dtype) - - # Ensure current_pos is on same device - current_pos = current_pos.to(relative_actions.device) - - # Match dimensions - action_dim = relative_actions.shape[-1] - pos_dim = current_pos.shape[-1] if current_pos.dim() > 0 else len(current_pos) - min_dim = min(action_dim, pos_dim) - - absolute_actions = relative_actions.clone() + """Convert relative actions back to absolute for robot execution.""" + current_pos = current_pos.to(relative_actions.device, relative_actions.dtype) + min_dim = min(relative_actions.shape[-1], current_pos.shape[-1]) + absolute = relative_actions.clone() if relative_actions.dim() == 2: - # Shape: (chunk_size, action_dim) - absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[:min_dim] + absolute[..., :min_dim] += current_pos[:min_dim] elif relative_actions.dim() == 3: - # Shape: (batch, chunk_size, action_dim) - absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[None, None, :min_dim] + absolute[..., :min_dim] += current_pos[None, None, :min_dim] else: - # Shape: (action_dim,) - absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[:min_dim] + absolute[..., :min_dim] += current_pos[:min_dim] - return absolute_actions + return absolute def convert_from_relative_actions_dict( relative_actions: dict[str, float], current_pos: dict[str, float], ) -> dict[str, float]: - """ - Convert relative actions back to absolute for robot execution (dict version). - - Args: - relative_actions: Predicted relative actions as dict (e.g., {"joint_1.pos": 0.1, ...}) - current_pos: Current robot position as dict (e.g., {"joint_1.pos": 45.0, ...}) - - Returns: - Absolute actions dict for robot execution - """ - absolute_actions = {} - for key, rel_value in relative_actions.items(): - if key in current_pos: - absolute_actions[key] = rel_value + current_pos[key] - else: - # Key not in current position, keep as-is (shouldn't happen normally) - absolute_actions[key] = rel_value - return absolute_actions - + """Convert relative actions back to absolute (dict version for inference).""" + return {k: v + current_pos.get(k, 0.0) for k, v in relative_actions.items()} From 7f16e8cb097a3ad31f46fae874b3a9473d02dec3 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Fri, 2 Jan 2026 19:56:42 +0100 Subject: [PATCH 3/8] fix --- src/lerobot/scripts/lerobot_train.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py index 9117e64c5..3186eae44 100644 --- a/src/lerobot/scripts/lerobot_train.py +++ b/src/lerobot/scripts/lerobot_train.py @@ -303,26 +303,29 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None): device=device, ) - # Compute per-timestep normalizer for relative actions + # Compute per-timestep normalizer for relative actions (main process computes, others load) relative_normalizer = None if cfg.use_relative_actions: + stats_path = cfg.output_dir / "relative_stats.pt" mode = "actions + state" if cfg.use_relative_state else "actions only" + if is_main_process: logging.info(colored(f"Relative mode: {mode}", "cyan", attrs=["bold"])) logging.info("Computing per-timestep stats from dataset (first 1000 batches)...") + cfg.output_dir.mkdir(parents=True, exist_ok=True) temp_loader = torch.utils.data.DataLoader( dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=0 ) mean, std = compute_relative_action_stats(temp_loader, num_batches=1000) relative_normalizer = PerTimestepNormalizer(mean, std) - stats_path = cfg.output_dir / "relative_stats.pt" relative_normalizer.save(stats_path) logging.info(f"Saved stats to: {stats_path}") + # Barrier: wait for main process to finish computing and saving stats accelerator.wait_for_everyone() if not is_main_process: - relative_normalizer = PerTimestepNormalizer.load(cfg.output_dir / "relative_stats.pt") + relative_normalizer = PerTimestepNormalizer.load(stats_path) step = 0 # number of policy updates (forward + backward + optim) From c5f66edff943d277150830d63353f462fe59d9c4 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Fri, 2 Jan 2026 22:34:57 +0100 Subject: [PATCH 4/8] shuffle false --- src/lerobot/scripts/lerobot_train.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py index 3186eae44..f88d5c02f 100644 --- a/src/lerobot/scripts/lerobot_train.py +++ b/src/lerobot/scripts/lerobot_train.py @@ -303,29 +303,25 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None): device=device, ) - # Compute per-timestep normalizer for relative actions (main process computes, others load) + # Compute per-timestep normalizer for relative actions + # Each process computes stats independently to avoid distributed sync issues relative_normalizer = None if cfg.use_relative_actions: - stats_path = cfg.output_dir / "relative_stats.pt" mode = "actions + state" if cfg.use_relative_state else "actions only" - if is_main_process: logging.info(colored(f"Relative mode: {mode}", "cyan", attrs=["bold"])) logging.info("Computing per-timestep stats from dataset (first 1000 batches)...") + + temp_loader = torch.utils.data.DataLoader( + dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=0 + ) + mean, std = compute_relative_action_stats(temp_loader, num_batches=1000) + relative_normalizer = PerTimestepNormalizer(mean, std) + + if is_main_process: cfg.output_dir.mkdir(parents=True, exist_ok=True) - temp_loader = torch.utils.data.DataLoader( - dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=0 - ) - mean, std = compute_relative_action_stats(temp_loader, num_batches=1000) - relative_normalizer = PerTimestepNormalizer(mean, std) - relative_normalizer.save(stats_path) - logging.info(f"Saved stats to: {stats_path}") - - # Barrier: wait for main process to finish computing and saving stats - accelerator.wait_for_everyone() - - if not is_main_process: - relative_normalizer = PerTimestepNormalizer.load(stats_path) + relative_normalizer.save(cfg.output_dir / "relative_stats.pt") + logging.info(f"Saved stats to: {cfg.output_dir / 'relative_stats.pt'}") step = 0 # number of policy updates (forward + backward + optim) From 574081ac02b7d3da19edb220b2d322ac13677beb Mon Sep 17 00:00:00 2001 From: Pepijn Date: Sat, 3 Jan 2026 11:34:31 +0100 Subject: [PATCH 5/8] fix mem bug --- src/lerobot/utils/relative_actions.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/lerobot/utils/relative_actions.py b/src/lerobot/utils/relative_actions.py index ee58741b9..888cb63a0 100644 --- a/src/lerobot/utils/relative_actions.py +++ b/src/lerobot/utils/relative_actions.py @@ -23,17 +23,26 @@ class PerTimestepNormalizer: self.mean = mean self.std = std self.eps = eps + self._cache = {} # Cache for device/dtype converted tensors + + def _get_stats(self, device, dtype): + """Get cached stats for device/dtype, or create and cache them.""" + key = (device, dtype) + if key not in self._cache: + self._cache[key] = ( + self.mean.to(device, dtype), + self.std.to(device, dtype), + ) + return self._cache[key] def normalize(self, x: torch.Tensor) -> torch.Tensor: - mean = self.mean.to(x.device, x.dtype) - std = self.std.to(x.device, x.dtype) + mean, std = self._get_stats(x.device, x.dtype) if x.dim() == 3 and mean.dim() == 2: mean, std = mean.unsqueeze(0), std.unsqueeze(0) return (x - mean) / (std + self.eps) def unnormalize(self, x: torch.Tensor) -> torch.Tensor: - mean = self.mean.to(x.device, x.dtype) - std = self.std.to(x.device, x.dtype) + mean, std = self._get_stats(x.device, x.dtype) if x.dim() == 3 and mean.dim() == 2: mean, std = mean.unsqueeze(0), std.unsqueeze(0) return x * (std + self.eps) + mean From 63619619bfbbc0c3b15c5e070c939b19ebdd26c2 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Wed, 7 Jan 2026 10:03:56 +0100 Subject: [PATCH 6/8] fix data loader issue --- src/lerobot/scripts/lerobot_train.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py index f88d5c02f..405d619c8 100644 --- a/src/lerobot/scripts/lerobot_train.py +++ b/src/lerobot/scripts/lerobot_train.py @@ -304,24 +304,29 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None): ) # Compute per-timestep normalizer for relative actions - # Each process computes stats independently to avoid distributed sync issues + # Only main process computes, then broadcasts to avoid video decoder issues relative_normalizer = None if cfg.use_relative_actions: mode = "actions + state" if cfg.use_relative_state else "actions only" if is_main_process: logging.info(colored(f"Relative mode: {mode}", "cyan", attrs=["bold"])) logging.info("Computing per-timestep stats from dataset (first 1000 batches)...") - - temp_loader = torch.utils.data.DataLoader( - dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=0 - ) - mean, std = compute_relative_action_stats(temp_loader, num_batches=1000) - relative_normalizer = PerTimestepNormalizer(mean, std) - - if is_main_process: + temp_loader = torch.utils.data.DataLoader( + dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=0 + ) + mean, std = compute_relative_action_stats(temp_loader, num_batches=1000) + del temp_loader cfg.output_dir.mkdir(parents=True, exist_ok=True) - relative_normalizer.save(cfg.output_dir / "relative_stats.pt") - logging.info(f"Saved stats to: {cfg.output_dir / 'relative_stats.pt'}") + stats_path = cfg.output_dir / "relative_stats.pt" + torch.save({"mean": mean, "std": std}, stats_path) + logging.info(f"Saved stats to: {stats_path}") + + accelerator.wait_for_everyone() + + # All ranks load from saved file + stats_path = cfg.output_dir / "relative_stats.pt" + data = torch.load(stats_path, weights_only=True, map_location="cpu") + relative_normalizer = PerTimestepNormalizer(data["mean"], data["std"]) step = 0 # number of policy updates (forward + backward + optim) From c23472e37647eb11fd9c1f0483868191d381f59f Mon Sep 17 00:00:00 2001 From: Pepijn Date: Wed, 7 Jan 2026 15:30:25 +0100 Subject: [PATCH 7/8] only main saves stat file --- src/lerobot/scripts/lerobot_train.py | 31 ++++++++++++++-------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py index 405d619c8..dd36f4a39 100644 --- a/src/lerobot/scripts/lerobot_train.py +++ b/src/lerobot/scripts/lerobot_train.py @@ -13,6 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import gc import logging import time from contextlib import nullcontext @@ -304,29 +305,27 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None): ) # Compute per-timestep normalizer for relative actions - # Only main process computes, then broadcasts to avoid video decoder issues + # Compute BEFORE accelerator.prepare() to avoid NCCL timeouts during long computation relative_normalizer = None if cfg.use_relative_actions: mode = "actions + state" if cfg.use_relative_state else "actions only" if is_main_process: logging.info(colored(f"Relative mode: {mode}", "cyan", attrs=["bold"])) logging.info("Computing per-timestep stats from dataset (first 1000 batches)...") - temp_loader = torch.utils.data.DataLoader( - dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=0 - ) - mean, std = compute_relative_action_stats(temp_loader, num_batches=1000) - del temp_loader + + # All ranks compute independently to avoid NCCL timeout (computation takes ~10min) + temp_loader = torch.utils.data.DataLoader( + dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=0 + ) + mean, std = compute_relative_action_stats(temp_loader, num_batches=1000) + del temp_loader + gc.collect() # Clean up any video decoder resources + relative_normalizer = PerTimestepNormalizer(mean, std) + + if is_main_process: cfg.output_dir.mkdir(parents=True, exist_ok=True) - stats_path = cfg.output_dir / "relative_stats.pt" - torch.save({"mean": mean, "std": std}, stats_path) - logging.info(f"Saved stats to: {stats_path}") - - accelerator.wait_for_everyone() - - # All ranks load from saved file - stats_path = cfg.output_dir / "relative_stats.pt" - data = torch.load(stats_path, weights_only=True, map_location="cpu") - relative_normalizer = PerTimestepNormalizer(data["mean"], data["std"]) + relative_normalizer.save(cfg.output_dir / "relative_stats.pt") + logging.info(f"Saved stats to: {cfg.output_dir / 'relative_stats.pt'}") step = 0 # number of policy updates (forward + backward + optim) From 5a15a6a91116eda5fc1305de9e5f22ec651b0b19 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Wed, 7 Jan 2026 16:52:15 +0100 Subject: [PATCH 8/8] use seperate process for stats computation --- src/lerobot/scripts/lerobot_train.py | 40 ++++++++++++++++++---------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py index dd36f4a39..deb5a4681 100644 --- a/src/lerobot/scripts/lerobot_train.py +++ b/src/lerobot/scripts/lerobot_train.py @@ -305,27 +305,39 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None): ) # Compute per-timestep normalizer for relative actions - # Compute BEFORE accelerator.prepare() to avoid NCCL timeouts during long computation relative_normalizer = None if cfg.use_relative_actions: mode = "actions + state" if cfg.use_relative_state else "actions only" + cfg.output_dir.mkdir(parents=True, exist_ok=True) + stats_path = cfg.output_dir / "relative_stats.pt" + if is_main_process: logging.info(colored(f"Relative mode: {mode}", "cyan", attrs=["bold"])) - logging.info("Computing per-timestep stats from dataset (first 1000 batches)...") + + if stats_path.exists(): + logging.info(f"Loading pre-computed stats from: {stats_path}") + else: + logging.info("Computing per-timestep stats (first 1000 batches)...") + logging.info("Using fresh dataset to avoid video decoder state issues...") + # Create separate dataset instance to avoid corrupting main dataset's video decoders + stats_dataset = make_dataset(cfg) + temp_loader = torch.utils.data.DataLoader( + stats_dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=0 + ) + mean, std = compute_relative_action_stats(temp_loader, num_batches=1000) + del temp_loader, stats_dataset + gc.collect() + torch.save({"mean": mean, "std": std}, stats_path) + logging.info(f"Saved stats to: {stats_path}") - # All ranks compute independently to avoid NCCL timeout (computation takes ~10min) - temp_loader = torch.utils.data.DataLoader( - dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=0 - ) - mean, std = compute_relative_action_stats(temp_loader, num_batches=1000) - del temp_loader - gc.collect() # Clean up any video decoder resources - relative_normalizer = PerTimestepNormalizer(mean, std) + # Poll for stats file instead of using NCCL barrier (avoids timeout during long computation) + if not is_main_process: + while not stats_path.exists(): + time.sleep(5) - if is_main_process: - cfg.output_dir.mkdir(parents=True, exist_ok=True) - relative_normalizer.save(cfg.output_dir / "relative_stats.pt") - logging.info(f"Saved stats to: {cfg.output_dir / 'relative_stats.pt'}") + data = torch.load(stats_path, weights_only=True, map_location="cpu") + relative_normalizer = PerTimestepNormalizer(data["mean"], data["std"]) + accelerator.wait_for_everyone() # Sync after everyone has loaded step = 0 # number of policy updates (forward + backward + optim)