From 01c7c7407029066a3be2b6c99122816f02a34296 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Fri, 2 Jan 2026 15:57:39 +0100
Subject: [PATCH 1/8] Add relative position UMI style

---
 examples/openarms/evaluate_relative.py | 319 +++++++++++++++++++++++++
 src/lerobot/configs/train.py           |   5 +
 src/lerobot/scripts/lerobot_train.py   |  10 +
 src/lerobot/utils/relative_actions.py  | 172 +++++++++++++
 4 files changed, 506 insertions(+)
 create mode 100644 examples/openarms/evaluate_relative.py
 create mode 100644 src/lerobot/utils/relative_actions.py

diff --git a/examples/openarms/evaluate_relative.py b/examples/openarms/evaluate_relative.py
new file mode 100644
index 000000000..c9c90355e
--- /dev/null
+++ b/examples/openarms/evaluate_relative.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+OpenArms Policy Evaluation with UMI-style Relative Actions
+
+Evaluates a policy trained with relative actions (use_relative_actions=True).
+During inference, the policy outputs relative deltas which are added to the
+current robot position to get absolute targets.
+
+This follows the UMI paper's "relative trajectory" action representation:
+    action_absolute[t] = action_relative[t] + current_position
+
+Example usage:
+    python examples/openarms/evaluate_relative.py
+"""
+
+import time
+from pathlib import Path
+
+import torch
+
+from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.configs.policies import PreTrainedConfig
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
+from lerobot.datasets.utils import build_dataset_frame, combine_feature_dicts
+from lerobot.policies.factory import make_policy, make_pre_post_processors
+from lerobot.policies.utils import predict_action
+from lerobot.processor import make_default_processors
+from lerobot.processor.core import RobotAction
+from lerobot.robots.openarms.config_openarms_follower import OpenArmsFollowerConfig
+from lerobot.robots.openarms.openarms_follower import OpenArmsFollower
+from lerobot.utils.constants import ACTION, OBS_STR
+from lerobot.utils.control_utils import init_keyboard_listener, precise_sleep
+from lerobot.utils.device_utils import get_safe_torch_device
+from lerobot.utils.relative_actions import convert_from_relative_actions_dict, convert_state_to_relative
+from lerobot.utils.utils import log_say
+from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
+
+
+# Configuration - Update these for your setup
+HF_MODEL_ID = "your-org/your-relative-policy"  # Policy trained with use_relative_actions=True
+HF_EVAL_DATASET_ID = "your-org/your-eval-dataset"
+TASK_DESCRIPTION = "your task description"
+
+NUM_EPISODES = 1
+FPS = 30
+EPISODE_TIME_SEC = 300
+
+# Robot CAN interfaces
+FOLLOWER_LEFT_PORT = "can0"
+FOLLOWER_RIGHT_PORT = "can1"
+
+# Camera configuration
+CAMERA_CONFIG = {
+    "left_wrist": OpenCVCameraConfig(index_or_path="/dev/video5", width=640, height=480, fps=FPS),
+    "right_wrist": OpenCVCameraConfig(index_or_path="/dev/video1", width=640, height=480, fps=FPS),
+    "base": OpenCVCameraConfig(index_or_path="/dev/video3", width=640, height=480, fps=FPS),
+}
+
+
+def make_robot_action(action_values: dict, features: dict) -> RobotAction:
+    """Convert action values to robot action dict, filtering by features."""
+    robot_action = {}
+    for key in features:
+        if key.startswith(ACTION + "."):
+            action_key = key.removeprefix(ACTION + ".")
+            if action_key in action_values:
+                robot_action[action_key] = action_values[action_key]
+    return robot_action
+
+
+def inference_loop_relative(
+    robot,
+    policy,
+    preprocessor,
+    postprocessor,
+    dataset,
+    events,
+    fps: int,
+    control_time_s: float,
+    single_task: str,
+    display_data: bool = True,
+    state_key: str = "observation.state",
+):
+    """
+    Inference loop for policies trained with UMI-style relative actions and state.
+    
+    Key differences from standard inference:
+    - Observation state is converted to relative (provides velocity info)
+    - Policy outputs relative deltas (action_relative)
+    - We add current robot position to get absolute targets:
+      action_absolute = action_relative + current_position
+    """
+    device = get_safe_torch_device(policy.config.device)
+    
+    timestamp = 0
+    start_t = time.perf_counter()
+    
+    while timestamp < control_time_s:
+        loop_start = time.perf_counter()
+        
+        if events["exit_early"] or events["stop_recording"]:
+            break
+        
+        # Get current robot observation
+        obs = robot.get_observation()
+        observation_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR)
+        
+        # Get current joint positions (reference for relative conversion)
+        current_pos = {k: v for k, v in obs.items() if k.endswith(".pos")}
+        
+        # Convert observation state to relative (UMI-style)
+        # This gives velocity-like information to the policy
+        if state_key in observation_frame:
+            state_tensor = observation_frame[state_key]
+            if isinstance(state_tensor, torch.Tensor):
+                observation_frame[state_key] = convert_state_to_relative(state_tensor)
+        
+        # Run policy inference - outputs relative actions
+        action_values = predict_action(
+            observation=observation_frame,
+            policy=policy,
+            device=device,
+            preprocessor=preprocessor,
+            postprocessor=postprocessor,
+            use_amp=policy.config.use_amp,
+            task=single_task,
+            robot_type=robot.robot_type,
+        )
+        
+        # Convert relative actions to absolute
+        # action_values contains relative deltas, current_pos has absolute positions
+        relative_action = make_robot_action(action_values, dataset.features)
+        absolute_action = convert_from_relative_actions_dict(relative_action, current_pos)
+        
+        # Send absolute action to robot
+        robot.send_action(absolute_action)
+        
+        # Record to dataset (store the absolute action that was sent)
+        if dataset is not None:
+            action_frame = build_dataset_frame(dataset.features, absolute_action, prefix=ACTION)
+            frame = {**observation_frame, **action_frame, "task": single_task}
+            dataset.add_frame(frame)
+        
+        if display_data:
+            log_rerun_data(observation=obs, action=absolute_action)
+        
+        dt = time.perf_counter() - loop_start
+        precise_sleep(1 / fps - dt)
+        timestamp = time.perf_counter() - start_t
+
+
+def main():
+    """Main evaluation function for relative action policies."""
+    print("=" * 65)
+    print("  OpenArms Evaluation - UMI-style Relative Actions")
+    print("=" * 65)
+    print(f"\nModel: {HF_MODEL_ID}")
+    print(f"Evaluation Dataset: {HF_EVAL_DATASET_ID}")
+    print(f"Task: {TASK_DESCRIPTION}")
+    print(f"Episodes: {NUM_EPISODES}")
+    print(f"Episode Duration: {EPISODE_TIME_SEC}s")
+    print("\nNote: Policy outputs are relative deltas, converted to absolute at inference time")
+    
+    # Setup robot
+    follower_config = OpenArmsFollowerConfig(
+        port_left=FOLLOWER_LEFT_PORT,
+        port_right=FOLLOWER_RIGHT_PORT,
+        can_interface="socketcan",
+        id="openarms_follower",
+        disable_torque_on_disconnect=True,
+        max_relative_target=10.0,
+        cameras=CAMERA_CONFIG,
+    )
+    
+    follower = OpenArmsFollower(follower_config)
+    follower.connect(calibrate=False)
+    
+    if not follower.is_connected:
+        raise RuntimeError("Follower robot failed to connect!")
+
+    # Build processors
+    teleop_action_processor, robot_action_processor, robot_observation_processor = make_default_processors()
+    
+    # Build dataset features
+    action_features_hw = {k: v for k, v in follower.action_features.items() if k.endswith(".pos")}
+    
+    dataset_features = combine_feature_dicts(
+        aggregate_pipeline_dataset_features(
+            pipeline=teleop_action_processor,
+            initial_features=create_initial_features(action=action_features_hw),
+            use_videos=True,
+        ),
+        aggregate_pipeline_dataset_features(
+            pipeline=robot_observation_processor,
+            initial_features=create_initial_features(observation=follower.observation_features),
+            use_videos=True,
+        ),
+    )
+    
+    # Check existing dataset
+    dataset_path = Path.home() / ".cache" / "huggingface" / "lerobot" / HF_EVAL_DATASET_ID
+    if dataset_path.exists():
+        print(f"\nDataset already exists at: {dataset_path}")
+        choice = input("Continue and append? (y/n): ").strip().lower()
+        if choice != 'y':
+            follower.disconnect()
+            return
+    
+    # Create dataset
+    dataset = LeRobotDataset.create(
+        repo_id=HF_EVAL_DATASET_ID,
+        fps=FPS,
+        features=dataset_features,
+        robot_type=follower.name,
+        use_videos=True,
+        image_writer_processes=0,
+        image_writer_threads=12, 
+    )
+    
+    # Load policy
+    policy_config = PreTrainedConfig.from_pretrained(HF_MODEL_ID)
+    policy_config.pretrained_path = HF_MODEL_ID
+    policy = make_policy(policy_config, ds_meta=dataset.meta)
+    
+    preprocessor, postprocessor = make_pre_post_processors(
+        policy_cfg=policy.config,
+        pretrained_path=HF_MODEL_ID,
+        dataset_stats=dataset.meta.stats,
+        preprocessor_overrides={
+            "device_processor": {"device": str(policy.config.device)}
+        },
+    )
+
+    # Initialize controls
+    listener, events = init_keyboard_listener()
+    init_rerun(session_name="openarms_eval_relative")
+    episode_idx = 0
+    
+    print("\nControls:")
+    print("  ESC    - Stop recording and save")
+    print("  →      - End current episode")
+    print("  ←      - Re-record episode")
+    
+    try:
+        while episode_idx < NUM_EPISODES and not events["stop_recording"]:
+            log_say(f"Evaluating episode {episode_idx + 1} of {NUM_EPISODES}")
+            print(f"\nRunning relative action inference for episode {episode_idx + 1}...")
+            
+            # Run inference with relative action conversion
+            inference_loop_relative(
+                robot=follower,
+                policy=policy,
+                preprocessor=preprocessor,
+                postprocessor=postprocessor,
+                dataset=dataset,
+                events=events,
+                fps=FPS,
+                control_time_s=EPISODE_TIME_SEC,
+                single_task=TASK_DESCRIPTION,
+                display_data=True,
+            )
+            
+            # Handle re-recording
+            if events.get("rerecord_episode", False):
+                log_say("Re-recording episode")
+                events["rerecord_episode"] = False
+                events["exit_early"] = False
+                dataset.clear_episode_buffer()
+                continue
+            
+            # Save episode
+            if dataset.episode_buffer is not None and dataset.episode_buffer.get("size", 0) > 0:
+                print(f"Saving episode {episode_idx + 1} ({dataset.episode_buffer['size']} frames)...")
+                dataset.save_episode()
+                episode_idx += 1
+            
+            events["exit_early"] = False
+            
+            # Wait for manual reset between episodes
+            if not events["stop_recording"] and episode_idx < NUM_EPISODES:
+                log_say("Waiting for manual reset")
+                input("Press ENTER when ready for next episode...")
+        
+        print(f"\nEvaluation complete! {episode_idx} episodes recorded")
+        log_say("Evaluation complete", blocking=True)
+    
+    except KeyboardInterrupt:
+        print("\n\nEvaluation interrupted by user")
+    
+    finally:
+        follower.disconnect()
+        
+        if listener is not None:
+            listener.stop()
+        
+        dataset.finalize()
+        print("\nUploading to Hugging Face Hub...")
+        dataset.push_to_hub(private=True)
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/src/lerobot/configs/train.py b/src/lerobot/configs/train.py
index cee9dfdf9..030dec027 100644
--- a/src/lerobot/configs/train.py
+++ b/src/lerobot/configs/train.py
@@ -66,6 +66,11 @@ class TrainPipelineConfig(HubMixin):
     eval: EvalConfig = field(default_factory=EvalConfig)
     wandb: WandBConfig = field(default_factory=WandBConfig)
 
+    # UMI-style relative actions: convert absolute joint positions to chunk-relative deltas
+    # During training, actions become relative to current position at chunk start
+    # During inference, predicted deltas are added to current robot position
+    use_relative_actions: bool = False
+
     # RA-BC (Reward-Aligned Behavior Cloning) parameters
     use_rabc: bool = False  # Enable reward-weighted training
     rabc_progress_path: str | None = None  # Path to precomputed SARM progress parquet file
diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py
index 6cf733442..c6724c11d 100644
--- a/src/lerobot/scripts/lerobot_train.py
+++ b/src/lerobot/scripts/lerobot_train.py
@@ -46,6 +46,7 @@ from lerobot.utils.train_utils import (
     save_checkpoint,
     update_last_checkpoint,
 )
+from lerobot.utils.relative_actions import convert_to_relative_actions
 from lerobot.utils.utils import (
     format_big_number,
     has_method,
@@ -298,6 +299,10 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
             device=device,
         )
 
+    if cfg.use_relative_actions and is_main_process:
+        logging.info(colored("UMI-style relative actions enabled", "cyan", attrs=["bold"]))
+        logging.info("Actions will be converted to chunk-relative deltas during training")
+
     step = 0  # number of policy updates (forward + backward + optim)
 
     if cfg.resume:
@@ -385,6 +390,11 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
         start_time = time.perf_counter()
         batch = next(dl_iter)
         batch = preprocessor(batch)
+        
+        # Convert to UMI-style relative actions if enabled
+        if cfg.use_relative_actions:
+            batch = convert_to_relative_actions(batch)
+        
         train_tracker.dataloading_s = time.perf_counter() - start_time
 
         train_tracker, output_dict = update_policy(
diff --git a/src/lerobot/utils/relative_actions.py b/src/lerobot/utils/relative_actions.py
new file mode 100644
index 000000000..5862498e9
--- /dev/null
+++ b/src/lerobot/utils/relative_actions.py
@@ -0,0 +1,172 @@
+"""
+UMI-style relative action and state utilities.
+
+Implements chunk-relative representation from the UMI paper:
+"Universal Manipulation Interface: In-The-Wild Robot Teaching Without In-The-Wild Robots"
+
+For each inference step:
+- Actions are relative to current position at chunk start (t0)
+- State history is relative to current position (provides velocity info)
+
+Training:
+  action_relative[t] = action_absolute[t] - position_at_t0
+  state_relative[t] = state_absolute[t] - current_position
+
+Inference:
+  action_absolute[t] = action_relative[t] + current_position
+"""
+
+import torch
+
+
+def convert_to_relative(batch: dict, state_key: str = "observation.state") -> dict:
+    """
+    Convert absolute actions AND state to chunk-relative (UMI-style) for training.
+    
+    Following UMI paper PD2.1 and PD2.2:
+    - Actions become relative to current position
+    - State history becomes relative to current position (provides velocity info)
+    
+    Args:
+        batch: Training batch containing:
+            - "action": (batch_size, chunk_size, action_dim) absolute action targets
+            - state_key: (batch_size, [n_obs_steps,] state_dim) observation state
+        state_key: Key for the observation state in the batch
+        
+    Returns:
+        Modified batch with relative actions and state
+    """
+    if "action" not in batch or state_key not in batch:
+        return batch
+    
+    action = batch["action"]
+    state = batch[state_key]
+    
+    batch = batch.copy()
+    
+    # Get current position (reference for relative conversion)
+    # State shape: (batch, state_dim) or (batch, n_obs_steps, state_dim)
+    if state.dim() == 3:
+        current_pos = state[:, -1, :]  # (batch, state_dim)
+        
+        # Convert state history to relative (each timestep relative to current)
+        # This gives velocity-like information to the policy
+        relative_state = state.clone()
+        relative_state = state - current_pos[:, None, :]
+        batch[state_key] = relative_state
+    else:
+        current_pos = state  # (batch, state_dim)
+        # Single timestep state becomes zeros (relative to itself)
+        batch[state_key] = torch.zeros_like(state)
+    
+    # Convert actions to relative
+    action_dim = action.shape[-1]
+    state_dim = current_pos.shape[-1]
+    min_dim = min(action_dim, state_dim)
+    
+    relative_action = action.clone()
+    relative_action[..., :min_dim] = action[..., :min_dim] - current_pos[:, None, :min_dim]
+    batch["action"] = relative_action
+    
+    return batch
+
+
+# Alias for backward compatibility
+convert_to_relative_actions = convert_to_relative
+
+
+def convert_state_to_relative(
+    state: torch.Tensor,
+    current_pos: torch.Tensor | None = None,
+) -> torch.Tensor:
+    """
+    Convert absolute state to relative for inference.
+    
+    Args:
+        state: State tensor, shape (state_dim,) or (n_obs_steps, state_dim)
+        current_pos: Current position to use as reference. If None, uses last timestep of state.
+        
+    Returns:
+        Relative state tensor
+    """
+    if current_pos is None:
+        if state.dim() >= 2:
+            current_pos = state[-1, :]  # Last timestep
+        else:
+            current_pos = state
+    
+    if state.dim() == 1:
+        return torch.zeros_like(state)
+    elif state.dim() == 2:
+        # (n_obs_steps, state_dim)
+        return state - current_pos[None, :]
+    else:
+        # (batch, n_obs_steps, state_dim)
+        return state - current_pos[:, None, :]
+
+
+def convert_from_relative_actions(
+    relative_actions: torch.Tensor,
+    current_pos: torch.Tensor | dict[str, float],
+) -> torch.Tensor:
+    """
+    Convert relative actions back to absolute for robot execution.
+    
+    Args:
+        relative_actions: Predicted relative actions, shape (chunk_size, action_dim) 
+                         or (batch, chunk_size, action_dim)
+        current_pos: Current robot position as tensor (action_dim,) or dict of joint positions
+        
+    Returns:
+        Absolute actions for robot execution
+    """
+    if isinstance(current_pos, dict):
+        # Convert dict to tensor, maintaining key order
+        current_pos = torch.tensor(list(current_pos.values()), dtype=relative_actions.dtype)
+    
+    # Ensure current_pos is on same device
+    current_pos = current_pos.to(relative_actions.device)
+    
+    # Match dimensions
+    action_dim = relative_actions.shape[-1]
+    pos_dim = current_pos.shape[-1] if current_pos.dim() > 0 else len(current_pos)
+    min_dim = min(action_dim, pos_dim)
+    
+    absolute_actions = relative_actions.clone()
+    
+    if relative_actions.dim() == 2:
+        # Shape: (chunk_size, action_dim)
+        absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[:min_dim]
+    elif relative_actions.dim() == 3:
+        # Shape: (batch, chunk_size, action_dim)
+        absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[None, None, :min_dim]
+    else:
+        # Shape: (action_dim,)
+        absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[:min_dim]
+    
+    return absolute_actions
+
+
+def convert_from_relative_actions_dict(
+    relative_actions: dict[str, float],
+    current_pos: dict[str, float],
+) -> dict[str, float]:
+    """
+    Convert relative actions back to absolute for robot execution (dict version).
+    
+    Args:
+        relative_actions: Predicted relative actions as dict (e.g., {"joint_1.pos": 0.1, ...})
+        current_pos: Current robot position as dict (e.g., {"joint_1.pos": 45.0, ...})
+        
+    Returns:
+        Absolute actions dict for robot execution
+    """
+    absolute_actions = {}
+    for key, rel_value in relative_actions.items():
+        if key in current_pos:
+            absolute_actions[key] = rel_value + current_pos[key]
+        else:
+            # Key not in current position, keep as-is (shouldn't happen normally)
+            absolute_actions[key] = rel_value
+    return absolute_actions
+

From 036795559033793d2deaef1566c648e82b6540fd Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Fri, 2 Jan 2026 18:58:47 +0100
Subject: [PATCH 2/8] add code for relative actions and state and unifing tasks

---
 examples/openarms/evaluate_relative.py | 182 ++++++++++----------
 scripts/unify_tasks.py                 |  59 +++++++
 src/lerobot/configs/train.py           |  12 +-
 src/lerobot/scripts/lerobot_train.py   |  38 ++++-
 src/lerobot/utils/relative_actions.py  | 228 +++++++++++--------------
 5 files changed, 295 insertions(+), 224 deletions(-)
 create mode 100644 scripts/unify_tasks.py

diff --git a/examples/openarms/evaluate_relative.py b/examples/openarms/evaluate_relative.py
index c9c90355e..a3e29726c 100644
--- a/examples/openarms/evaluate_relative.py
+++ b/examples/openarms/evaluate_relative.py
@@ -1,28 +1,14 @@
 #!/usr/bin/env python
-
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 """
-OpenArms Policy Evaluation with UMI-style Relative Actions
+OpenArms Policy Evaluation with Relative Actions
 
-Evaluates a policy trained with relative actions (use_relative_actions=True).
-During inference, the policy outputs relative deltas which are added to the
-current robot position to get absolute targets.
-
-This follows the UMI paper's "relative trajectory" action representation:
-    action_absolute[t] = action_relative[t] + current_position
+Two modes supported (based on training config):
+  Mode 1: Relative actions only (use_relative_state=False)
+    - Policy outputs relative action deltas
+    - State input is absolute
+  Mode 2: Relative actions + state (use_relative_state=True)
+    - Policy outputs relative action deltas  
+    - State input is also converted to relative
 
 Example usage:
     python examples/openarms/evaluate_relative.py
@@ -35,6 +21,7 @@ import torch
 
 from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
 from lerobot.configs.policies import PreTrainedConfig
+from lerobot.configs.train import TrainPipelineConfig
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
 from lerobot.datasets.utils import build_dataset_frame, combine_feature_dicts
@@ -47,13 +34,17 @@ from lerobot.robots.openarms.openarms_follower import OpenArmsFollower
 from lerobot.utils.constants import ACTION, OBS_STR
 from lerobot.utils.control_utils import init_keyboard_listener, precise_sleep
 from lerobot.utils.device_utils import get_safe_torch_device
-from lerobot.utils.relative_actions import convert_from_relative_actions_dict, convert_state_to_relative
+from lerobot.utils.relative_actions import (
+    convert_from_relative_actions_dict,
+    convert_state_to_relative,
+    PerTimestepNormalizer,
+)
 from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
 
 
-# Configuration - Update these for your setup
-HF_MODEL_ID = "your-org/your-relative-policy"  # Policy trained with use_relative_actions=True
+# Configuration
+HF_MODEL_ID = "your-org/your-relative-policy"
 HF_EVAL_DATASET_ID = "your-org/your-eval-dataset"
 TASK_DESCRIPTION = "your task description"
 
@@ -61,11 +52,9 @@ NUM_EPISODES = 1
 FPS = 30
 EPISODE_TIME_SEC = 300
 
-# Robot CAN interfaces
 FOLLOWER_LEFT_PORT = "can0"
 FOLLOWER_RIGHT_PORT = "can1"
 
-# Camera configuration
 CAMERA_CONFIG = {
     "left_wrist": OpenCVCameraConfig(index_or_path="/dev/video5", width=640, height=480, fps=FPS),
     "right_wrist": OpenCVCameraConfig(index_or_path="/dev/video1", width=640, height=480, fps=FPS),
@@ -74,7 +63,6 @@ CAMERA_CONFIG = {
 
 
 def make_robot_action(action_values: dict, features: dict) -> RobotAction:
-    """Convert action values to robot action dict, filtering by features."""
     robot_action = {}
     for key in features:
         if key.startswith(ACTION + "."):
@@ -84,6 +72,40 @@ def make_robot_action(action_values: dict, features: dict) -> RobotAction:
     return robot_action
 
 
+def load_relative_config(model_path: Path | str) -> tuple[PerTimestepNormalizer | None, bool]:
+    """Load normalizer and relative_state setting from checkpoint."""
+    model_path = Path(model_path) if isinstance(model_path, str) else model_path
+    normalizer = None
+    use_relative_state = False
+    
+    # Try local path first
+    if model_path.exists():
+        stats_path = model_path / "relative_stats.pt"
+        if stats_path.exists():
+            normalizer = PerTimestepNormalizer.load(stats_path)
+            print(f"Loaded per-timestep stats from: {stats_path}")
+        
+        config_path = model_path / "train_config.json"
+        if config_path.exists():
+            cfg = TrainPipelineConfig.from_pretrained(model_path)
+            use_relative_state = getattr(cfg, "use_relative_state", False)
+    else:
+        # Try hub
+        try:
+            from huggingface_hub import hf_hub_download
+            stats_file = hf_hub_download(repo_id=str(model_path), filename="relative_stats.pt")
+            normalizer = PerTimestepNormalizer.load(stats_file)
+            print("Loaded per-timestep stats from hub")
+            
+            config_file = hf_hub_download(repo_id=str(model_path), filename="train_config.json")
+            cfg = TrainPipelineConfig.from_pretrained(Path(config_file).parent)
+            use_relative_state = getattr(cfg, "use_relative_state", False)
+        except Exception as e:
+            print(f"Warning: Could not load relative config: {e}")
+    
+    return normalizer, use_relative_state
+
+
 def inference_loop_relative(
     robot,
     policy,
@@ -96,18 +118,15 @@ def inference_loop_relative(
     single_task: str,
     display_data: bool = True,
     state_key: str = "observation.state",
+    relative_normalizer: PerTimestepNormalizer | None = None,
+    use_relative_state: bool = False,
 ):
     """
-    Inference loop for policies trained with UMI-style relative actions and state.
+    Inference loop for relative action policies.
     
-    Key differences from standard inference:
-    - Observation state is converted to relative (provides velocity info)
-    - Policy outputs relative deltas (action_relative)
-    - We add current robot position to get absolute targets:
-      action_absolute = action_relative + current_position
+    If use_relative_state=True, also converts observation state to relative.
     """
     device = get_safe_torch_device(policy.config.device)
-    
     timestamp = 0
     start_t = time.perf_counter()
     
@@ -117,21 +136,17 @@ def inference_loop_relative(
         if events["exit_early"] or events["stop_recording"]:
             break
         
-        # Get current robot observation
         obs = robot.get_observation()
         observation_frame = build_dataset_frame(dataset.features, obs, prefix=OBS_STR)
-        
-        # Get current joint positions (reference for relative conversion)
         current_pos = {k: v for k, v in obs.items() if k.endswith(".pos")}
         
-        # Convert observation state to relative (UMI-style)
-        # This gives velocity-like information to the policy
-        if state_key in observation_frame:
+        # Convert state to relative if using full UMI mode
+        if use_relative_state and state_key in observation_frame:
             state_tensor = observation_frame[state_key]
             if isinstance(state_tensor, torch.Tensor):
                 observation_frame[state_key] = convert_state_to_relative(state_tensor)
         
-        # Run policy inference - outputs relative actions
+        # Policy inference (outputs normalized relative actions)
         action_values = predict_action(
             observation=observation_frame,
             policy=policy,
@@ -143,15 +158,21 @@ def inference_loop_relative(
             robot_type=robot.robot_type,
         )
         
-        # Convert relative actions to absolute
-        # action_values contains relative deltas, current_pos has absolute positions
+        # Unnormalize actions
+        if relative_normalizer is not None:
+            action_keys = [k for k in action_values.keys() if not k.startswith("task")]
+            action_tensor = torch.tensor([[action_values[k] for k in action_keys]])
+            action_tensor = action_tensor.unsqueeze(1)
+            action_unnorm = relative_normalizer.unnormalize(action_tensor)
+            for i, k in enumerate(action_keys):
+                action_values[k] = action_unnorm[0, 0, i].item()
+        
+        # Convert to absolute
         relative_action = make_robot_action(action_values, dataset.features)
         absolute_action = convert_from_relative_actions_dict(relative_action, current_pos)
         
-        # Send absolute action to robot
         robot.send_action(absolute_action)
         
-        # Record to dataset (store the absolute action that was sent)
         if dataset is not None:
             action_frame = build_dataset_frame(dataset.features, absolute_action, prefix=ACTION)
             frame = {**observation_frame, **action_frame, "task": single_task}
@@ -166,16 +187,17 @@ def inference_loop_relative(
 
 
 def main():
-    """Main evaluation function for relative action policies."""
-    print("=" * 65)
-    print("  OpenArms Evaluation - UMI-style Relative Actions")
-    print("=" * 65)
+    print("=" * 60)
+    print("  OpenArms Evaluation - Relative Actions")
+    print("=" * 60)
     print(f"\nModel: {HF_MODEL_ID}")
-    print(f"Evaluation Dataset: {HF_EVAL_DATASET_ID}")
-    print(f"Task: {TASK_DESCRIPTION}")
-    print(f"Episodes: {NUM_EPISODES}")
-    print(f"Episode Duration: {EPISODE_TIME_SEC}s")
-    print("\nNote: Policy outputs are relative deltas, converted to absolute at inference time")
+    print(f"Dataset: {HF_EVAL_DATASET_ID}")
+    print(f"Episodes: {NUM_EPISODES}, Duration: {EPISODE_TIME_SEC}s")
+    
+    # Load relative action config
+    relative_normalizer, use_relative_state = load_relative_config(HF_MODEL_ID)
+    mode = "actions + state" if use_relative_state else "actions only"
+    print(f"Mode: relative {mode}")
     
     # Setup robot
     follower_config = OpenArmsFollowerConfig(
@@ -192,12 +214,9 @@ def main():
     follower.connect(calibrate=False)
     
     if not follower.is_connected:
-        raise RuntimeError("Follower robot failed to connect!")
+        raise RuntimeError("Robot failed to connect!")
 
-    # Build processors
     teleop_action_processor, robot_action_processor, robot_observation_processor = make_default_processors()
-    
-    # Build dataset features
     action_features_hw = {k: v for k, v in follower.action_features.items() if k.endswith(".pos")}
     
     dataset_features = combine_feature_dicts(
@@ -213,16 +232,13 @@ def main():
         ),
     )
     
-    # Check existing dataset
     dataset_path = Path.home() / ".cache" / "huggingface" / "lerobot" / HF_EVAL_DATASET_ID
     if dataset_path.exists():
-        print(f"\nDataset already exists at: {dataset_path}")
-        choice = input("Continue and append? (y/n): ").strip().lower()
-        if choice != 'y':
+        print(f"\nDataset exists at: {dataset_path}")
+        if input("Continue? (y/n): ").strip().lower() != 'y':
             follower.disconnect()
             return
     
-    # Create dataset
     dataset = LeRobotDataset.create(
         repo_id=HF_EVAL_DATASET_ID,
         fps=FPS,
@@ -233,7 +249,6 @@ def main():
         image_writer_threads=12, 
     )
     
-    # Load policy
     policy_config = PreTrainedConfig.from_pretrained(HF_MODEL_ID)
     policy_config.pretrained_path = HF_MODEL_ID
     policy = make_policy(policy_config, ds_meta=dataset.meta)
@@ -242,27 +257,19 @@ def main():
         policy_cfg=policy.config,
         pretrained_path=HF_MODEL_ID,
         dataset_stats=dataset.meta.stats,
-        preprocessor_overrides={
-            "device_processor": {"device": str(policy.config.device)}
-        },
+        preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}},
     )
 
-    # Initialize controls
     listener, events = init_keyboard_listener()
     init_rerun(session_name="openarms_eval_relative")
     episode_idx = 0
     
-    print("\nControls:")
-    print("  ESC    - Stop recording and save")
-    print("  →      - End current episode")
-    print("  ←      - Re-record episode")
+    print("\nControls: ESC=stop, →=next episode, ←=rerecord")
     
     try:
         while episode_idx < NUM_EPISODES and not events["stop_recording"]:
-            log_say(f"Evaluating episode {episode_idx + 1} of {NUM_EPISODES}")
-            print(f"\nRunning relative action inference for episode {episode_idx + 1}...")
+            log_say(f"Episode {episode_idx + 1} of {NUM_EPISODES}")
             
-            # Run inference with relative action conversion
             inference_loop_relative(
                 robot=follower,
                 policy=policy,
@@ -274,46 +281,41 @@ def main():
                 control_time_s=EPISODE_TIME_SEC,
                 single_task=TASK_DESCRIPTION,
                 display_data=True,
+                relative_normalizer=relative_normalizer,
+                use_relative_state=use_relative_state,
             )
             
-            # Handle re-recording
             if events.get("rerecord_episode", False):
-                log_say("Re-recording episode")
+                log_say("Re-recording")
                 events["rerecord_episode"] = False
                 events["exit_early"] = False
                 dataset.clear_episode_buffer()
                 continue
             
-            # Save episode
             if dataset.episode_buffer is not None and dataset.episode_buffer.get("size", 0) > 0:
-                print(f"Saving episode {episode_idx + 1} ({dataset.episode_buffer['size']} frames)...")
+                print(f"Saving episode {episode_idx + 1}...")
                 dataset.save_episode()
                 episode_idx += 1
             
             events["exit_early"] = False
             
-            # Wait for manual reset between episodes
             if not events["stop_recording"] and episode_idx < NUM_EPISODES:
-                log_say("Waiting for manual reset")
-                input("Press ENTER when ready for next episode...")
+                input("Press ENTER for next episode...")
         
-        print(f"\nEvaluation complete! {episode_idx} episodes recorded")
-        log_say("Evaluation complete", blocking=True)
+        print(f"\nDone! {episode_idx} episodes recorded")
+        log_say("Complete", blocking=True)
     
     except KeyboardInterrupt:
-        print("\n\nEvaluation interrupted by user")
+        print("\n\nInterrupted")
     
     finally:
         follower.disconnect()
-        
         if listener is not None:
             listener.stop()
-        
         dataset.finalize()
-        print("\nUploading to Hugging Face Hub...")
+        print("Uploading to Hub...")
         dataset.push_to_hub(private=True)
 
 
 if __name__ == "__main__":
     main()
-
diff --git a/scripts/unify_tasks.py b/scripts/unify_tasks.py
new file mode 100644
index 000000000..c457b7b31
--- /dev/null
+++ b/scripts/unify_tasks.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+"""Unify all tasks in a dataset to a single task."""
+
+import argparse
+import json
+from pathlib import Path
+
+import pandas as pd
+
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets.utils import write_tasks
+
+
+def unify_tasks(repo_id: str, new_task: str):
+    """Set all episodes to use a single task."""
+    print(f"Loading dataset: {repo_id}")
+    dataset = LeRobotDataset(repo_id)
+    root = dataset.root
+    
+    print(f"Current tasks: {list(dataset.meta.tasks['task']) if dataset.meta.tasks is not None else []}")
+    
+    # 1. Update tasks.parquet to have only one task
+    tasks_df = pd.DataFrame({"task": [new_task]})
+    write_tasks(tasks_df, root)
+    print(f"Set single task: '{new_task}'")
+    
+    # 2. Update all data parquet files to set task_index=0
+    data_dir = root / "data"
+    parquet_files = sorted(data_dir.glob("*/*.parquet"))
+    for parquet_path in parquet_files:
+        df = pd.read_parquet(parquet_path)
+        df["task_index"] = 0
+        df.to_parquet(parquet_path)
+        print(f"Updated: {parquet_path.relative_to(root)}")
+    
+    # 3. Update info.json
+    info_path = root / "info.json"
+    with open(info_path) as f:
+        info = json.load(f)
+    info["total_tasks"] = 1
+    with open(info_path, "w") as f:
+        json.dump(info, f, indent=2)
+    
+    print(f"\nDone! All {dataset.meta.total_episodes} episodes now use task: '{new_task}'")
+    print(f"\nTo push: huggingface-cli upload {repo_id} {root} --repo-type dataset")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Unify all tasks in a dataset to a single task")
+    parser.add_argument("--repo_id", type=str, required=True, help="Dataset repo_id")
+    parser.add_argument("--task", type=str, required=True, help="New task description")
+    args = parser.parse_args()
+    
+    unify_tasks(args.repo_id, args.task)
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/src/lerobot/configs/train.py b/src/lerobot/configs/train.py
index 030dec027..384f5041c 100644
--- a/src/lerobot/configs/train.py
+++ b/src/lerobot/configs/train.py
@@ -66,10 +66,16 @@ class TrainPipelineConfig(HubMixin):
     eval: EvalConfig = field(default_factory=EvalConfig)
     wandb: WandBConfig = field(default_factory=WandBConfig)
 
-    # UMI-style relative actions: convert absolute joint positions to chunk-relative deltas
-    # During training, actions become relative to current position at chunk start
-    # During inference, predicted deltas are added to current robot position
+    # UMI-style relative actions with per-timestep normalization
+    # Mode 1: use_relative_actions=True, use_relative_state=False
+    #   - Actions: relative to current position + per-timestep normalized
+    #   - State: absolute (unchanged)
+    # Mode 2: use_relative_actions=True, use_relative_state=True (full UMI)
+    #   - Actions: relative to current position + per-timestep normalized  
+    #   - State: relative to current position (provides velocity info)
+    # Stats are computed automatically from first 1000 batches at training start
     use_relative_actions: bool = False
+    use_relative_state: bool = False
 
     # RA-BC (Reward-Aligned Behavior Cloning) parameters
     use_rabc: bool = False  # Enable reward-weighted training
diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py
index c6724c11d..9117e64c5 100644
--- a/src/lerobot/scripts/lerobot_train.py
+++ b/src/lerobot/scripts/lerobot_train.py
@@ -46,7 +46,11 @@ from lerobot.utils.train_utils import (
     save_checkpoint,
     update_last_checkpoint,
 )
-from lerobot.utils.relative_actions import convert_to_relative_actions
+from lerobot.utils.relative_actions import (
+    convert_to_relative_actions,
+    compute_relative_action_stats,
+    PerTimestepNormalizer,
+)
 from lerobot.utils.utils import (
     format_big_number,
     has_method,
@@ -299,9 +303,26 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
             device=device,
         )
 
-    if cfg.use_relative_actions and is_main_process:
-        logging.info(colored("UMI-style relative actions enabled", "cyan", attrs=["bold"]))
-        logging.info("Actions will be converted to chunk-relative deltas during training")
+    # Compute per-timestep normalizer for relative actions
+    relative_normalizer = None
+    if cfg.use_relative_actions:
+        mode = "actions + state" if cfg.use_relative_state else "actions only"
+        if is_main_process:
+            logging.info(colored(f"Relative mode: {mode}", "cyan", attrs=["bold"]))
+            logging.info("Computing per-timestep stats from dataset (first 1000 batches)...")
+            temp_loader = torch.utils.data.DataLoader(
+                dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=0
+            )
+            mean, std = compute_relative_action_stats(temp_loader, num_batches=1000)
+            relative_normalizer = PerTimestepNormalizer(mean, std)
+            stats_path = cfg.output_dir / "relative_stats.pt"
+            relative_normalizer.save(stats_path)
+            logging.info(f"Saved stats to: {stats_path}")
+        
+        accelerator.wait_for_everyone()
+        
+        if not is_main_process:
+            relative_normalizer = PerTimestepNormalizer.load(cfg.output_dir / "relative_stats.pt")
 
     step = 0  # number of policy updates (forward + backward + optim)
 
@@ -391,9 +412,11 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
         batch = next(dl_iter)
         batch = preprocessor(batch)
         
-        # Convert to UMI-style relative actions if enabled
+        # Convert to relative actions (and optionally state) if enabled
         if cfg.use_relative_actions:
-            batch = convert_to_relative_actions(batch)
+            batch = convert_to_relative_actions(batch, convert_state=cfg.use_relative_state)
+            if relative_normalizer is not None:
+                batch["action"] = relative_normalizer.normalize(batch["action"])
         
         train_tracker.dataloading_s = time.perf_counter() - start_time
 
@@ -449,6 +472,9 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
                     preprocessor=preprocessor,
                     postprocessor=postprocessor,
                 )
+                # Save relative action stats with checkpoint
+                if relative_normalizer is not None:
+                    relative_normalizer.save(checkpoint_dir / "relative_stats.pt")
                 update_last_checkpoint(checkpoint_dir)
                 if wandb_logger:
                     wandb_logger.log_policy(checkpoint_dir)
diff --git a/src/lerobot/utils/relative_actions.py b/src/lerobot/utils/relative_actions.py
index 5862498e9..ee58741b9 100644
--- a/src/lerobot/utils/relative_actions.py
+++ b/src/lerobot/utils/relative_actions.py
@@ -1,172 +1,150 @@
 """
-UMI-style relative action and state utilities.
+UMI-style relative actions with per-timestep normalization.
 
-Implements chunk-relative representation from the UMI paper:
-"Universal Manipulation Interface: In-The-Wild Robot Teaching Without In-The-Wild Robots"
+Two modes supported:
+  Mode 1: Relative actions only (use_relative_state=False)
+    - Actions converted to relative, state stays absolute
+  Mode 2: Relative actions + state (use_relative_state=True, full UMI)
+    - Both actions and state converted to relative
 
-For each inference step:
-- Actions are relative to current position at chunk start (t0)
-- State history is relative to current position (provides velocity info)
-
-Training:
-  action_relative[t] = action_absolute[t] - position_at_t0
-  state_relative[t] = state_absolute[t] - current_position
-
-Inference:
-  action_absolute[t] = action_relative[t] + current_position
+Per-timestep normalization (TRI LBM / BEHAVIOR style):
+  Training: action_norm[t] = (action_rel[t] - mean[t]) / std[t]
+  Inference: action_rel[t] = action_norm[t] * std[t] + mean[t]
 """
 
 import torch
+from pathlib import Path
 
 
-def convert_to_relative(batch: dict, state_key: str = "observation.state") -> dict:
-    """
-    Convert absolute actions AND state to chunk-relative (UMI-style) for training.
+class PerTimestepNormalizer:
+    """Per-timestep normalization using precomputed dataset statistics."""
     
-    Following UMI paper PD2.1 and PD2.2:
-    - Actions become relative to current position
-    - State history becomes relative to current position (provides velocity info)
+    def __init__(self, mean: torch.Tensor, std: torch.Tensor, eps: float = 1e-8):
+        self.mean = mean
+        self.std = std
+        self.eps = eps
+    
+    def normalize(self, x: torch.Tensor) -> torch.Tensor:
+        mean = self.mean.to(x.device, x.dtype)
+        std = self.std.to(x.device, x.dtype)
+        if x.dim() == 3 and mean.dim() == 2:
+            mean, std = mean.unsqueeze(0), std.unsqueeze(0)
+        return (x - mean) / (std + self.eps)
+    
+    def unnormalize(self, x: torch.Tensor) -> torch.Tensor:
+        mean = self.mean.to(x.device, x.dtype)
+        std = self.std.to(x.device, x.dtype)
+        if x.dim() == 3 and mean.dim() == 2:
+            mean, std = mean.unsqueeze(0), std.unsqueeze(0)
+        return x * (std + self.eps) + mean
+    
+    def save(self, path: Path | str):
+        path = Path(path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        torch.save({"mean": self.mean.cpu(), "std": self.std.cpu(), "eps": self.eps}, path)
+    
+    @classmethod
+    def load(cls, path: Path | str) -> "PerTimestepNormalizer":
+        data = torch.load(path, weights_only=True, map_location="cpu")
+        return cls(data["mean"], data["std"], data.get("eps", 1e-8))
+
+
+def compute_relative_action_stats(
+    dataloader,
+    state_key: str = "observation.state",
+    num_batches: int | None = None,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Compute per-timestep mean/std from relative actions."""
+    all_rel = []
+    for i, batch in enumerate(dataloader):
+        if num_batches is not None and i >= num_batches:
+            break
+        action, state = batch["action"], batch[state_key]
+        current_pos = state[:, -1, :] if state.dim() == 3 else state
+        min_dim = min(action.shape[-1], current_pos.shape[-1])
+        rel = action.clone()
+        rel[..., :min_dim] -= current_pos[:, None, :min_dim]
+        all_rel.append(rel)
+    
+    all_rel = torch.cat(all_rel, dim=0)
+    return all_rel.mean(dim=0), all_rel.std(dim=0).clamp(min=1e-6)
+
+
+def convert_to_relative(
+    batch: dict,
+    state_key: str = "observation.state",
+    convert_state: bool = True,
+) -> dict:
+    """
+    Convert actions (and optionally state) to relative.
     
     Args:
-        batch: Training batch containing:
-            - "action": (batch_size, chunk_size, action_dim) absolute action targets
-            - state_key: (batch_size, [n_obs_steps,] state_dim) observation state
-        state_key: Key for the observation state in the batch
-        
-    Returns:
-        Modified batch with relative actions and state
+        batch: Training batch with "action" and state_key
+        state_key: Key for observation state
+        convert_state: If True, also convert state to relative (full UMI mode)
     """
     if "action" not in batch or state_key not in batch:
         return batch
     
     action = batch["action"]
     state = batch[state_key]
-    
     batch = batch.copy()
     
-    # Get current position (reference for relative conversion)
-    # State shape: (batch, state_dim) or (batch, n_obs_steps, state_dim)
-    if state.dim() == 3:
-        current_pos = state[:, -1, :]  # (batch, state_dim)
-        
-        # Convert state history to relative (each timestep relative to current)
-        # This gives velocity-like information to the policy
-        relative_state = state.clone()
-        relative_state = state - current_pos[:, None, :]
-        batch[state_key] = relative_state
-    else:
-        current_pos = state  # (batch, state_dim)
-        # Single timestep state becomes zeros (relative to itself)
-        batch[state_key] = torch.zeros_like(state)
+    # Get current position as reference
+    current_pos = state[:, -1, :] if state.dim() == 3 else state
+    
+    # Convert state if requested
+    if convert_state:
+        if state.dim() == 3:
+            batch[state_key] = state - current_pos[:, None, :]
+        else:
+            batch[state_key] = torch.zeros_like(state)
     
     # Convert actions to relative
-    action_dim = action.shape[-1]
-    state_dim = current_pos.shape[-1]
-    min_dim = min(action_dim, state_dim)
-    
-    relative_action = action.clone()
-    relative_action[..., :min_dim] = action[..., :min_dim] - current_pos[:, None, :min_dim]
-    batch["action"] = relative_action
+    min_dim = min(action.shape[-1], current_pos.shape[-1])
+    rel_action = action.clone()
+    rel_action[..., :min_dim] -= current_pos[:, None, :min_dim]
+    batch["action"] = rel_action
     
     return batch
 
 
-# Alias for backward compatibility
+# Backward compatibility alias
 convert_to_relative_actions = convert_to_relative
 
 
-def convert_state_to_relative(
-    state: torch.Tensor,
-    current_pos: torch.Tensor | None = None,
-) -> torch.Tensor:
-    """
-    Convert absolute state to relative for inference.
-    
-    Args:
-        state: State tensor, shape (state_dim,) or (n_obs_steps, state_dim)
-        current_pos: Current position to use as reference. If None, uses last timestep of state.
-        
-    Returns:
-        Relative state tensor
-    """
-    if current_pos is None:
-        if state.dim() >= 2:
-            current_pos = state[-1, :]  # Last timestep
-        else:
-            current_pos = state
-    
+def convert_state_to_relative(state: torch.Tensor) -> torch.Tensor:
+    """Convert state to relative (for inference with use_relative_state=True)."""
     if state.dim() == 1:
         return torch.zeros_like(state)
-    elif state.dim() == 2:
-        # (n_obs_steps, state_dim)
+    current_pos = state[-1, :] if state.dim() == 2 else state[:, -1, :]
+    if state.dim() == 2:
         return state - current_pos[None, :]
-    else:
-        # (batch, n_obs_steps, state_dim)
-        return state - current_pos[:, None, :]
+    return state - current_pos[:, None, :]
 
 
 def convert_from_relative_actions(
     relative_actions: torch.Tensor,
-    current_pos: torch.Tensor | dict[str, float],
+    current_pos: torch.Tensor,
 ) -> torch.Tensor:
-    """
-    Convert relative actions back to absolute for robot execution.
-    
-    Args:
-        relative_actions: Predicted relative actions, shape (chunk_size, action_dim) 
-                         or (batch, chunk_size, action_dim)
-        current_pos: Current robot position as tensor (action_dim,) or dict of joint positions
-        
-    Returns:
-        Absolute actions for robot execution
-    """
-    if isinstance(current_pos, dict):
-        # Convert dict to tensor, maintaining key order
-        current_pos = torch.tensor(list(current_pos.values()), dtype=relative_actions.dtype)
-    
-    # Ensure current_pos is on same device
-    current_pos = current_pos.to(relative_actions.device)
-    
-    # Match dimensions
-    action_dim = relative_actions.shape[-1]
-    pos_dim = current_pos.shape[-1] if current_pos.dim() > 0 else len(current_pos)
-    min_dim = min(action_dim, pos_dim)
-    
-    absolute_actions = relative_actions.clone()
+    """Convert relative actions back to absolute for robot execution."""
+    current_pos = current_pos.to(relative_actions.device, relative_actions.dtype)
+    min_dim = min(relative_actions.shape[-1], current_pos.shape[-1])
+    absolute = relative_actions.clone()
     
     if relative_actions.dim() == 2:
-        # Shape: (chunk_size, action_dim)
-        absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[:min_dim]
+        absolute[..., :min_dim] += current_pos[:min_dim]
     elif relative_actions.dim() == 3:
-        # Shape: (batch, chunk_size, action_dim)
-        absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[None, None, :min_dim]
+        absolute[..., :min_dim] += current_pos[None, None, :min_dim]
     else:
-        # Shape: (action_dim,)
-        absolute_actions[..., :min_dim] = relative_actions[..., :min_dim] + current_pos[:min_dim]
+        absolute[..., :min_dim] += current_pos[:min_dim]
     
-    return absolute_actions
+    return absolute
 
 
 def convert_from_relative_actions_dict(
     relative_actions: dict[str, float],
     current_pos: dict[str, float],
 ) -> dict[str, float]:
-    """
-    Convert relative actions back to absolute for robot execution (dict version).
-    
-    Args:
-        relative_actions: Predicted relative actions as dict (e.g., {"joint_1.pos": 0.1, ...})
-        current_pos: Current robot position as dict (e.g., {"joint_1.pos": 45.0, ...})
-        
-    Returns:
-        Absolute actions dict for robot execution
-    """
-    absolute_actions = {}
-    for key, rel_value in relative_actions.items():
-        if key in current_pos:
-            absolute_actions[key] = rel_value + current_pos[key]
-        else:
-            # Key not in current position, keep as-is (shouldn't happen normally)
-            absolute_actions[key] = rel_value
-    return absolute_actions
-
+    """Convert relative actions back to absolute (dict version for inference)."""
+    return {k: v + current_pos.get(k, 0.0) for k, v in relative_actions.items()}

From 7f16e8cb097a3ad31f46fae874b3a9473d02dec3 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Fri, 2 Jan 2026 19:56:42 +0100
Subject: [PATCH 3/8] fix

---
 src/lerobot/scripts/lerobot_train.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py
index 9117e64c5..3186eae44 100644
--- a/src/lerobot/scripts/lerobot_train.py
+++ b/src/lerobot/scripts/lerobot_train.py
@@ -303,26 +303,29 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
             device=device,
         )
 
-    # Compute per-timestep normalizer for relative actions
+    # Compute per-timestep normalizer for relative actions (main process computes, others load)
     relative_normalizer = None
     if cfg.use_relative_actions:
+        stats_path = cfg.output_dir / "relative_stats.pt"
         mode = "actions + state" if cfg.use_relative_state else "actions only"
+        
         if is_main_process:
             logging.info(colored(f"Relative mode: {mode}", "cyan", attrs=["bold"]))
             logging.info("Computing per-timestep stats from dataset (first 1000 batches)...")
+            cfg.output_dir.mkdir(parents=True, exist_ok=True)
             temp_loader = torch.utils.data.DataLoader(
                 dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=0
             )
             mean, std = compute_relative_action_stats(temp_loader, num_batches=1000)
             relative_normalizer = PerTimestepNormalizer(mean, std)
-            stats_path = cfg.output_dir / "relative_stats.pt"
             relative_normalizer.save(stats_path)
             logging.info(f"Saved stats to: {stats_path}")
         
+        # Barrier: wait for main process to finish computing and saving stats
         accelerator.wait_for_everyone()
         
         if not is_main_process:
-            relative_normalizer = PerTimestepNormalizer.load(cfg.output_dir / "relative_stats.pt")
+            relative_normalizer = PerTimestepNormalizer.load(stats_path)
 
     step = 0  # number of policy updates (forward + backward + optim)
 

From c5f66edff943d277150830d63353f462fe59d9c4 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Fri, 2 Jan 2026 22:34:57 +0100
Subject: [PATCH 4/8] shuffle false

---
 src/lerobot/scripts/lerobot_train.py | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py
index 3186eae44..f88d5c02f 100644
--- a/src/lerobot/scripts/lerobot_train.py
+++ b/src/lerobot/scripts/lerobot_train.py
@@ -303,29 +303,25 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
             device=device,
         )
 
-    # Compute per-timestep normalizer for relative actions (main process computes, others load)
+    # Compute per-timestep normalizer for relative actions
+    # Each process computes stats independently to avoid distributed sync issues
     relative_normalizer = None
     if cfg.use_relative_actions:
-        stats_path = cfg.output_dir / "relative_stats.pt"
         mode = "actions + state" if cfg.use_relative_state else "actions only"
-        
         if is_main_process:
             logging.info(colored(f"Relative mode: {mode}", "cyan", attrs=["bold"]))
             logging.info("Computing per-timestep stats from dataset (first 1000 batches)...")
+        
+        temp_loader = torch.utils.data.DataLoader(
+            dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=0
+        )
+        mean, std = compute_relative_action_stats(temp_loader, num_batches=1000)
+        relative_normalizer = PerTimestepNormalizer(mean, std)
+        
+        if is_main_process:
             cfg.output_dir.mkdir(parents=True, exist_ok=True)
-            temp_loader = torch.utils.data.DataLoader(
-                dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=0
-            )
-            mean, std = compute_relative_action_stats(temp_loader, num_batches=1000)
-            relative_normalizer = PerTimestepNormalizer(mean, std)
-            relative_normalizer.save(stats_path)
-            logging.info(f"Saved stats to: {stats_path}")
-        
-        # Barrier: wait for main process to finish computing and saving stats
-        accelerator.wait_for_everyone()
-        
-        if not is_main_process:
-            relative_normalizer = PerTimestepNormalizer.load(stats_path)
+            relative_normalizer.save(cfg.output_dir / "relative_stats.pt")
+            logging.info(f"Saved stats to: {cfg.output_dir / 'relative_stats.pt'}")
 
     step = 0  # number of policy updates (forward + backward + optim)
 

From 574081ac02b7d3da19edb220b2d322ac13677beb Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Sat, 3 Jan 2026 11:34:31 +0100
Subject: [PATCH 5/8] fix mem bug

---
 src/lerobot/utils/relative_actions.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/lerobot/utils/relative_actions.py b/src/lerobot/utils/relative_actions.py
index ee58741b9..888cb63a0 100644
--- a/src/lerobot/utils/relative_actions.py
+++ b/src/lerobot/utils/relative_actions.py
@@ -23,17 +23,26 @@ class PerTimestepNormalizer:
         self.mean = mean
         self.std = std
         self.eps = eps
+        self._cache = {}  # Cache for device/dtype converted tensors
+    
+    def _get_stats(self, device, dtype):
+        """Get cached stats for device/dtype, or create and cache them."""
+        key = (device, dtype)
+        if key not in self._cache:
+            self._cache[key] = (
+                self.mean.to(device, dtype),
+                self.std.to(device, dtype),
+            )
+        return self._cache[key]
     
     def normalize(self, x: torch.Tensor) -> torch.Tensor:
-        mean = self.mean.to(x.device, x.dtype)
-        std = self.std.to(x.device, x.dtype)
+        mean, std = self._get_stats(x.device, x.dtype)
         if x.dim() == 3 and mean.dim() == 2:
             mean, std = mean.unsqueeze(0), std.unsqueeze(0)
         return (x - mean) / (std + self.eps)
     
     def unnormalize(self, x: torch.Tensor) -> torch.Tensor:
-        mean = self.mean.to(x.device, x.dtype)
-        std = self.std.to(x.device, x.dtype)
+        mean, std = self._get_stats(x.device, x.dtype)
         if x.dim() == 3 and mean.dim() == 2:
             mean, std = mean.unsqueeze(0), std.unsqueeze(0)
         return x * (std + self.eps) + mean

From 63619619bfbbc0c3b15c5e070c939b19ebdd26c2 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Wed, 7 Jan 2026 10:03:56 +0100
Subject: [PATCH 6/8] fix data loader issue

---
 src/lerobot/scripts/lerobot_train.py | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py
index f88d5c02f..405d619c8 100644
--- a/src/lerobot/scripts/lerobot_train.py
+++ b/src/lerobot/scripts/lerobot_train.py
@@ -304,24 +304,29 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
         )
 
     # Compute per-timestep normalizer for relative actions
-    # Each process computes stats independently to avoid distributed sync issues
+    # Only main process computes, then broadcasts to avoid video decoder issues
     relative_normalizer = None
     if cfg.use_relative_actions:
         mode = "actions + state" if cfg.use_relative_state else "actions only"
         if is_main_process:
             logging.info(colored(f"Relative mode: {mode}", "cyan", attrs=["bold"]))
             logging.info("Computing per-timestep stats from dataset (first 1000 batches)...")
-        
-        temp_loader = torch.utils.data.DataLoader(
-            dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=0
-        )
-        mean, std = compute_relative_action_stats(temp_loader, num_batches=1000)
-        relative_normalizer = PerTimestepNormalizer(mean, std)
-        
-        if is_main_process:
+            temp_loader = torch.utils.data.DataLoader(
+                dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=0
+            )
+            mean, std = compute_relative_action_stats(temp_loader, num_batches=1000)
+            del temp_loader
             cfg.output_dir.mkdir(parents=True, exist_ok=True)
-            relative_normalizer.save(cfg.output_dir / "relative_stats.pt")
-            logging.info(f"Saved stats to: {cfg.output_dir / 'relative_stats.pt'}")
+            stats_path = cfg.output_dir / "relative_stats.pt"
+            torch.save({"mean": mean, "std": std}, stats_path)
+            logging.info(f"Saved stats to: {stats_path}")
+        
+        accelerator.wait_for_everyone()
+        
+        # All ranks load from saved file
+        stats_path = cfg.output_dir / "relative_stats.pt"
+        data = torch.load(stats_path, weights_only=True, map_location="cpu")
+        relative_normalizer = PerTimestepNormalizer(data["mean"], data["std"])
 
     step = 0  # number of policy updates (forward + backward + optim)
 

From c23472e37647eb11fd9c1f0483868191d381f59f Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Wed, 7 Jan 2026 15:30:25 +0100
Subject: [PATCH 7/8] only main saves stat file

---
 src/lerobot/scripts/lerobot_train.py | 31 ++++++++++++++--------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py
index 405d619c8..dd36f4a39 100644
--- a/src/lerobot/scripts/lerobot_train.py
+++ b/src/lerobot/scripts/lerobot_train.py
@@ -13,6 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import gc
 import logging
 import time
 from contextlib import nullcontext
@@ -304,29 +305,27 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
         )
 
     # Compute per-timestep normalizer for relative actions
-    # Only main process computes, then broadcasts to avoid video decoder issues
+    # Compute BEFORE accelerator.prepare() to avoid NCCL timeouts during long computation
     relative_normalizer = None
     if cfg.use_relative_actions:
         mode = "actions + state" if cfg.use_relative_state else "actions only"
         if is_main_process:
             logging.info(colored(f"Relative mode: {mode}", "cyan", attrs=["bold"]))
             logging.info("Computing per-timestep stats from dataset (first 1000 batches)...")
-            temp_loader = torch.utils.data.DataLoader(
-                dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=0
-            )
-            mean, std = compute_relative_action_stats(temp_loader, num_batches=1000)
-            del temp_loader
+        
+        # All ranks compute independently to avoid NCCL timeout (computation takes ~10min)
+        temp_loader = torch.utils.data.DataLoader(
+            dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=0
+        )
+        mean, std = compute_relative_action_stats(temp_loader, num_batches=1000)
+        del temp_loader
+        gc.collect()  # Clean up any video decoder resources
+        relative_normalizer = PerTimestepNormalizer(mean, std)
+        
+        if is_main_process:
             cfg.output_dir.mkdir(parents=True, exist_ok=True)
-            stats_path = cfg.output_dir / "relative_stats.pt"
-            torch.save({"mean": mean, "std": std}, stats_path)
-            logging.info(f"Saved stats to: {stats_path}")
-        
-        accelerator.wait_for_everyone()
-        
-        # All ranks load from saved file
-        stats_path = cfg.output_dir / "relative_stats.pt"
-        data = torch.load(stats_path, weights_only=True, map_location="cpu")
-        relative_normalizer = PerTimestepNormalizer(data["mean"], data["std"])
+            relative_normalizer.save(cfg.output_dir / "relative_stats.pt")
+            logging.info(f"Saved stats to: {cfg.output_dir / 'relative_stats.pt'}")
 
     step = 0  # number of policy updates (forward + backward + optim)
 

From 5a15a6a91116eda5fc1305de9e5f22ec651b0b19 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Wed, 7 Jan 2026 16:52:15 +0100
Subject: [PATCH 8/8] use seperate process for stats computation

---
 src/lerobot/scripts/lerobot_train.py | 40 ++++++++++++++++++----------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py
index dd36f4a39..deb5a4681 100644
--- a/src/lerobot/scripts/lerobot_train.py
+++ b/src/lerobot/scripts/lerobot_train.py
@@ -305,27 +305,39 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
         )
 
     # Compute per-timestep normalizer for relative actions
-    # Compute BEFORE accelerator.prepare() to avoid NCCL timeouts during long computation
     relative_normalizer = None
     if cfg.use_relative_actions:
         mode = "actions + state" if cfg.use_relative_state else "actions only"
+        cfg.output_dir.mkdir(parents=True, exist_ok=True)
+        stats_path = cfg.output_dir / "relative_stats.pt"
+        
         if is_main_process:
             logging.info(colored(f"Relative mode: {mode}", "cyan", attrs=["bold"]))
-            logging.info("Computing per-timestep stats from dataset (first 1000 batches)...")
+            
+            if stats_path.exists():
+                logging.info(f"Loading pre-computed stats from: {stats_path}")
+            else:
+                logging.info("Computing per-timestep stats (first 1000 batches)...")
+                logging.info("Using fresh dataset to avoid video decoder state issues...")
+                # Create separate dataset instance to avoid corrupting main dataset's video decoders
+                stats_dataset = make_dataset(cfg)
+                temp_loader = torch.utils.data.DataLoader(
+                    stats_dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=0
+                )
+                mean, std = compute_relative_action_stats(temp_loader, num_batches=1000)
+                del temp_loader, stats_dataset
+                gc.collect()
+                torch.save({"mean": mean, "std": std}, stats_path)
+                logging.info(f"Saved stats to: {stats_path}")
         
-        # All ranks compute independently to avoid NCCL timeout (computation takes ~10min)
-        temp_loader = torch.utils.data.DataLoader(
-            dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=0
-        )
-        mean, std = compute_relative_action_stats(temp_loader, num_batches=1000)
-        del temp_loader
-        gc.collect()  # Clean up any video decoder resources
-        relative_normalizer = PerTimestepNormalizer(mean, std)
+        # Poll for stats file instead of using NCCL barrier (avoids timeout during long computation)
+        if not is_main_process:
+            while not stats_path.exists():
+                time.sleep(5)
         
-        if is_main_process:
-            cfg.output_dir.mkdir(parents=True, exist_ok=True)
-            relative_normalizer.save(cfg.output_dir / "relative_stats.pt")
-            logging.info(f"Saved stats to: {cfg.output_dir / 'relative_stats.pt'}")
+        data = torch.load(stats_path, weights_only=True, map_location="cpu")
+        relative_normalizer = PerTimestepNormalizer(data["mean"], data["std"])
+        accelerator.wait_for_everyone()  # Sync after everyone has loaded
 
     step = 0  # number of policy updates (forward + backward + optim)