fix(scripts): better prints teleop (#2538 )

feat(utils): precise_sleep() less CPU hungry without sacrificing accuracy (#2526 )
fix(benchmark) : fixing video benchmark (#2094 )
2026-06-18 16:57:12 +00:00 · 2025-11-27 16:54:17 +01:00 · 2025-11-26 17:42:16 +01:00 · 2025-11-26 17:41:31 +01:00 · 2025-11-26 14:57:34 +01:00 · 2025-11-26 14:28:04 +01:00
54 changed files with 3088 additions and 2102 deletions
@@ -1,94 +0,0 @@
 #!/usr/bin/env python
 # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import threading
 import time
 from contextlib import ContextDecorator
 class TimeBenchmark(ContextDecorator):
    """
    Measures execution time using a context manager or decorator.
    This class supports both context manager and decorator usage, and is thread-safe for multithreaded
    environments.
    Args:
        print: If True, prints the elapsed time upon exiting the context or completing the function. Defaults
        to False.
    Examples:
        Using as a context manager:
        >>> benchmark = TimeBenchmark()
        >>> with benchmark:
        ...     time.sleep(1)
        >>> print(f"Block took {benchmark.result:.4f} seconds")
        Block took approximately 1.0000 seconds
        Using with multithreading:
        ```python
        import threading
        benchmark = TimeBenchmark()
        def context_manager_example():
            with benchmark:
                time.sleep(0.01)
            print(f"Block took {benchmark.result_ms:.2f} milliseconds")
        threads = []
        for _ in range(3):
            t1 = threading.Thread(target=context_manager_example)
            threads.append(t1)
        for t in threads:
            t.start()
        for t in threads:
            t.join()
        ```
        Expected output:
        Block took approximately 10.00 milliseconds
        Block took approximately 10.00 milliseconds
        Block took approximately 10.00 milliseconds
    """
    def __init__(self, print=False):
        self.local = threading.local()
        self.print_time = print
    def __enter__(self):
        self.local.start_time = time.perf_counter()
        return self
    def __exit__(self, *exc):
        self.local.end_time = time.perf_counter()
        self.local.elapsed_time = self.local.end_time - self.local.start_time
        if self.print_time:
            print(f"Elapsed time: {self.local.elapsed_time:.4f} seconds")
        return False
    @property
    def result(self):
        return getattr(self.local, "elapsed_time", None)
    @property
    def result_ms(self):
        return self.result * 1e3
@@ -1,102 +0,0 @@
 #!/usr/bin/env python
 # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Capture video feed from a camera as raw images."""
 import argparse
 import datetime as dt
 import os
 import time
 from pathlib import Path
 import cv2
 import rerun as rr
 # see https://rerun.io/docs/howto/visualization/limit-ram
 RERUN_MEMORY_LIMIT = os.getenv("LEROBOT_RERUN_MEMORY_LIMIT", "5%")
 def display_and_save_video_stream(output_dir: Path, fps: int, width: int, height: int, duration: int):
    rr.init("lerobot_capture_camera_feed")
    rr.spawn(memory_limit=RERUN_MEMORY_LIMIT)
    now = dt.datetime.now()
    capture_dir = output_dir / f"{now:%Y-%m-%d}" / f"{now:%H-%M-%S}"
    if not capture_dir.exists():
        capture_dir.mkdir(parents=True, exist_ok=True)
    # Opens the default webcam
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Could not open video stream.")
        return
    cap.set(cv2.CAP_PROP_FPS, fps)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
    frame_index = 0
    start_time = time.time()
    while time.time() - start_time < duration:
        ret, frame = cap.read()
        if not ret:
            print("Error: Could not read frame.")
            break
        rr.log("video/stream", rr.Image(frame), static=True)
        cv2.imwrite(str(capture_dir / f"frame_{frame_index:06d}.png"), frame)
        frame_index += 1
    # Release the capture
    cap.release()
    # TODO(Steven): Add a graceful shutdown via a close() method for the Viewer context, though not currently supported in the Rerun API.
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--output-dir",
        type=Path,
        default=Path("outputs/cam_capture/"),
        help="Directory where the capture images are written. A subfolder named with the current date & time will be created inside it for each capture.",
    )
    parser.add_argument(
        "--fps",
        type=int,
        default=30,
        help="Frames Per Second of the capture.",
    )
    parser.add_argument(
        "--width",
        type=int,
        default=1280,
        help="Width of the captured images.",
    )
    parser.add_argument(
        "--height",
        type=int,
        default=720,
        help="Height of the captured images.",
    )
    parser.add_argument(
        "--duration",
        type=int,
        default=20,
        help="Duration in seconds for which the video stream should be captured.",
    )
    args = parser.parse_args()
    display_and_save_video_stream(**vars(args))
@@ -21,11 +21,13 @@ See the provided README.md or run `python benchmark/video/run_video_benchmark.py
 import argparse
 import datetime as dt
 import itertools
 import random
 import shutil
 from collections import OrderedDict
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 from threading import Lock
 import einops
 import numpy as np
@@ -35,13 +37,13 @@ import torch
 from skimage.metrics import mean_squared_error, peak_signal_noise_ratio, structural_similarity
 from tqdm import tqdm
 from benchmarks.video.benchmark import TimeBenchmark
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.datasets.video_utils import (
-    decode_video_frames_torchvision,
+    decode_video_frames,
    encode_video_frames,
 )
 from lerobot.utils.constants import OBS_IMAGE
 from lerobot.utils.utils import TimerManager
 BASE_ENCODING = OrderedDict(
    [
@@ -86,7 +88,7 @@ def load_original_frames(imgs_dir: Path, timestamps: list[float], fps: int) -> t
    frames = []
    for ts in timestamps:
        idx = int(ts * fps)
-        frame = PIL.Image.open(imgs_dir / f"frame_{idx:06d}.png")
+        frame = PIL.Image.open(imgs_dir / f"frame-{idx:06d}.png")
        frame = torch.from_numpy(np.array(frame))
        frame = frame.type(torch.float32) / 255
        frame = einops.rearrange(frame, "h w c -> c h w")
@@ -97,21 +99,21 @@ def load_original_frames(imgs_dir: Path, timestamps: list[float], fps: int) -> t
 def save_decoded_frames(
    imgs_dir: Path, save_dir: Path, frames: torch.Tensor, timestamps: list[float], fps: int
 ) -> None:
-    if save_dir.exists() and len(list(save_dir.glob("frame_*.png"))) == len(timestamps):
+    if save_dir.exists() and len(list(save_dir.glob("frame-*.png"))) == len(timestamps):
        return
    save_dir.mkdir(parents=True, exist_ok=True)
    for i, ts in enumerate(timestamps):
        idx = int(ts * fps)
        frame_hwc = (frames[i].permute((1, 2, 0)) * 255).type(torch.uint8).cpu().numpy()
-        PIL.Image.fromarray(frame_hwc).save(save_dir / f"frame_{idx:06d}_decoded.png")
+        PIL.Image.fromarray(frame_hwc).save(save_dir / f"frame-{idx:06d}_decoded.png")
-        shutil.copyfile(imgs_dir / f"frame_{idx:06d}.png", save_dir / f"frame_{idx:06d}_original.png")
+        shutil.copyfile(imgs_dir / f"frame-{idx:06d}.png", save_dir / f"frame-{idx:06d}_original.png")
 def save_first_episode(imgs_dir: Path, dataset: LeRobotDataset) -> None:
    episode_index = 0
    ep_num_images = dataset.meta.episodes["length"][episode_index]
-    if imgs_dir.exists() and len(list(imgs_dir.glob("frame_*.png"))) == ep_num_images:
+    if imgs_dir.exists() and len(list(imgs_dir.glob("frame-*.png"))) == ep_num_images:
        return
    imgs_dir.mkdir(parents=True, exist_ok=True)
@@ -125,7 +127,7 @@ def save_first_episode(imgs_dir: Path, dataset: LeRobotDataset) -> None:
        tqdm(imgs_dataset, desc=f"saving {dataset.repo_id} first episode images", leave=False)
    ):
        img = item[img_keys[0]]
-        img.save(str(imgs_dir / f"frame_{i:06d}.png"), quality=100)
+        img.save(str(imgs_dir / f"frame-{i:06d}.png"), quality=100)
        if i >= ep_num_images - 1:
            break
@@ -149,18 +151,6 @@ def sample_timestamps(timestamps_mode: str, ep_num_images: int, fps: int) -> lis
    return [idx / fps for idx in frame_indexes]
 def decode_video_frames(
    video_path: str,
    timestamps: list[float],
    tolerance_s: float,
    backend: str,
 ) -> torch.Tensor:
    if backend in ["pyav", "video_reader"]:
        return decode_video_frames_torchvision(video_path, timestamps, tolerance_s, backend)
    else:
        raise NotImplementedError(backend)
 def benchmark_decoding(
    imgs_dir: Path,
    video_path: Path,
@@ -172,8 +162,8 @@ def benchmark_decoding(
    num_workers: int = 4,
    save_frames: bool = False,
 ) -> dict:
-    def process_sample(sample: int):
+    def process_sample(sample: int, lock: Lock):
-        time_benchmark = TimeBenchmark()
+        time_benchmark = TimerManager(log=False)
        timestamps = sample_timestamps(timestamps_mode, ep_num_images, fps)
        num_frames = len(timestamps)
        result = {
@@ -182,13 +172,13 @@ def benchmark_decoding(
            "mse_values": [],
        }
-        with time_benchmark:
+        with time_benchmark, lock:
            frames = decode_video_frames(video_path, timestamps=timestamps, tolerance_s=5e-1, backend=backend)
-        result["load_time_video_ms"] = time_benchmark.result_ms / num_frames
+        result["load_time_video_ms"] = (time_benchmark.last * 1000) / num_frames
        with time_benchmark:
            original_frames = load_original_frames(imgs_dir, timestamps, fps)
-        result["load_time_images_ms"] = time_benchmark.result_ms / num_frames
+        result["load_time_images_ms"] = (time_benchmark.last * 1000) / num_frames
        frames_np, original_frames_np = frames.numpy(), original_frames.numpy()
        for i in range(num_frames):
@@ -215,8 +205,10 @@ def benchmark_decoding(
    # A sample is a single set of decoded frames specified by timestamps_mode (e.g. a single frame, 2 frames, etc.).
    # For each sample, we record metrics (loading time and quality metrics) which are then averaged over all samples.
    # As these samples are independent, we run them in parallel threads to speed up the benchmark.
    # Use a single shared lock for all worker threads
    shared_lock = Lock()
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
-        futures = [executor.submit(process_sample, i) for i in range(num_samples)]
+        futures = [executor.submit(process_sample, i, shared_lock) for i in range(num_samples)]
        for future in tqdm(as_completed(futures), total=num_samples, desc="samples", leave=False):
            result = future.result()
            load_times_video_ms.append(result["load_time_video_ms"])
@@ -358,11 +350,14 @@ def main(
                imgs_dir = output_dir / "images" / dataset.repo_id.replace("/", "_")
                # We only use the first episode
                save_first_episode(imgs_dir, dataset)
-                for key, values in tqdm(encoding_benchmarks.items(), desc="encodings (g, crf)", leave=False):
+                for duet in [
-                    for value in tqdm(values, desc=f"encodings ({key})", leave=False):
+                    dict(zip(encoding_benchmarks.keys(), unique_combination, strict=False))
                    for unique_combination in itertools.product(*encoding_benchmarks.values())
                ]:
                    encoding_cfg = BASE_ENCODING.copy()
                    encoding_cfg["vcodec"] = video_codec
                    encoding_cfg["pix_fmt"] = pixel_format
                    for key, value in duet.items():
                        encoding_cfg[key] = value
                    args_path = Path("_".join(str(value) for value in encoding_cfg.values()))
                    video_path = output_dir / "videos" / args_path / f"{repo_id.replace('/', '_')}.mp4"
@@ -409,9 +404,9 @@ if __name__ == "__main__":
        nargs="*",
        default=[
            "lerobot/pusht_image",
-            "aliberts/aloha_mobile_shrimp_image",
+            "lerobot/aloha_mobile_shrimp_image",
-            "aliberts/paris_street",
+            "lerobot/paris_street",
-            "aliberts/kitchen",
+            "lerobot/kitchen",
        ],
        help="Datasets repo-ids to test against. First episodes only are used. Must be images.",
    )
@@ -419,7 +414,7 @@ if __name__ == "__main__":
        "--vcodec",
        type=str,
        nargs="*",
-        default=["libx264", "hevc", "libsvtav1"],
+        default=["h264", "hevc", "libsvtav1"],
        help="Video codecs to be tested",
    )
    parser.add_argument(
@@ -468,7 +463,7 @@ if __name__ == "__main__":
        "--backends",
        type=str,
        nargs="*",
-        default=["pyav", "video_reader"],
+        default=["torchcodec", "pyav"],
        help="Torchvision decoding backend to be tested.",
    )
    parser.add_argument(
@@ -47,8 +47,8 @@
 - sections:
  - local: envhub
    title: Environments from the Hub
-  - local: il_sim
+  - local: envhub_leisaac
-    title: Imitation Learning in Sim
+    title: Control & Train Robots in Sim (LeIsaac)
  - local: libero
    title: Using Libero
  - local: metaworld
@@ -63,6 +63,8 @@
    title: Implement your own processor
  - local: processors_robots_teleop
    title: Processors for Robots and Teleoperators
  - local: env_processor
    title: Environment Processors
  title: "Robot Processors"
 - sections:
  - local: so101
@@ -196,7 +196,7 @@ client_cfg = RobotClientConfig(
    server_address="localhost:8080",
    policy_device="mps",
    policy_type="smolvla",
-    pretrained_name_or_path="fracapuano/smolvla_async",
+    pretrained_name_or_path="<user>/smolvla_async",
    chunk_size_threshold=0.5,
    actions_per_chunk=50,  # make sure this is less than the max actions of the policy
 )
@@ -0,0 +1,418 @@
 # Environment Processors
 Environment processors are a critical layer in LeRobot's data processing architecture that handle **environment-specific** transformations, separate from policy-specific processing. This separation of concerns enables cleaner code, better modularity, and easier experimentation with different environments and policies.
 ## Why Environment Processors?
 When working with different robot environments (LIBERO, MetaWorld, Aloha, etc.), each environment often has unique data formats, coordinate systems, and conventions that need standardization **before** policy processing. Without environment processors, these transformations would be:
 1. **Hardcoded in environment code** - Making it difficult to experiment with different state representations
 2. **Duplicated across policies** - Each policy would need to handle environment-specific quirks
 3. **Mixed with policy logic** - Violating separation of concerns and making debugging harder
 Environment processors solve this by providing a **dedicated processing layer** between raw environment observations and policy inputs.
 ## The Processing Pipeline
 Here's how data flows through the complete processing pipeline during evaluation:
 ```python
 # In lerobot_eval.py rollout() function:
 # 1. Raw environment observation (numpy arrays, various formats)
 raw_observation = env.step(action)
 # 2. Convert numpy to torch, normalize images [0,1]
 observation = preprocess_observation(raw_observation)
 # 3. Add task metadata (for multi-task environments)
 observation = add_envs_task(env, observation)
 # 4. ENVIRONMENT-SPECIFIC preprocessing (NEW!)
 #    - Flatten robot states
 #    - Rotate images to match dataset conventions
 #    - Handle environment-specific coordinate systems
 observation = env_preprocessor(observation)
 # 5. POLICY-SPECIFIC preprocessing
 #    - Normalize with dataset statistics
 #    - Add batch dimensions
 #    - Move to GPU
 #    - Tokenize language instructions
 observation = preprocessor(observation)
 # 6. Policy inference
 action = policy.select_action(observation)
 # 7. POLICY-SPECIFIC postprocessing
 #    - Unnormalize actions
 #    - Remove batch dimensions
 action = postprocessor(action)
 # 8. ENVIRONMENT-SPECIFIC postprocessing (NEW!)
 #    - Convert action formats if needed
 #    - Apply environment-specific constraints
 action_transition = {"action": action}
 action_transition = env_postprocessor(action_transition)
 action = action_transition["action"]
 # 9. Execute in environment
 env.step(action)
 ```
 ## The Benefits
 ### 1. **Separation of Concerns**
 Environment processors handle transformations specific to the **environment's data format**, while policy processors handle transformations specific to the **model's requirements**.
 ```python
 # ❌ Before: Mixed concerns
 class LiberoVLAPolicy:
    def preprocess(self, obs):
        # Environment-specific: Flatten robot state (shouldn't be in policy!)
        state = self._flatten_robot_state(obs["robot_state"])
        # Policy-specific: Normalize with dataset stats
        state = self.normalizer(state)
        return state
 # ✅ After: Clear separation
 # Environment processor: Handles LIBERO's nested robot state
 env_preprocessor = LiberoProcessorStep()  # Flattens robot_state
 # Policy processor: Handles model requirements
 policy_preprocessor = NormalizerProcessorStep(stats=dataset_stats)
 ```
 ### 2. **Flexibility and Reusability**
 The same policy can work with different environment processors, and the same environment processor can work with different policies:
 ```python
 # Use SmolVLA policy with LIBERO environment
 libero_preprocessor, libero_postprocessor = make_env_pre_post_processors(libero_cfg)
 smolvla_preprocessor, smolvla_postprocessor = make_pre_post_processors(smolvla_cfg)
 # Or use ACT policy with the same LIBERO environment
 libero_preprocessor, libero_postprocessor = make_env_pre_post_processors(libero_cfg)
 act_preprocessor, act_postprocessor = make_pre_post_processors(act_cfg)
 ```
 ### 3. **Easier Experimentation**
 Want to try different state representations for LIBERO? Just create a new processor:
 ```python
 # Original: 8D state (pos + quat→axisangle + gripper)
@ProcessorStepRegistry.register("libero_processor")
 class LiberoProcessorStep(ObservationProcessorStep):
    def _process_observation(self, obs):
        eef_pos = robot_state["eef"]["pos"]          # 3D
        eef_axisangle = quat2axisangle(quat)         # 3D
        gripper = robot_state["gripper"]["qpos"]     # 2D
        state = torch.cat([eef_pos, eef_axisangle, gripper], dim=-1)  # 8D
        return state
 # Experiment: Add velocity for better control
@ProcessorStepRegistry.register("libero_velocity_processor")
 class LiberoVelocityProcessorStep(ObservationProcessorStep):
    def _process_observation(self, obs):
        # Include velocities for 14D state
        eef_pos = robot_state["eef"]["pos"]          # 3D
        eef_axisangle = quat2axisangle(quat)         # 3D
        eef_vel = robot_state["eef"]["vel"]          # 3D  (NEW)
        gripper_pos = robot_state["gripper"]["qpos"] # 2D
        gripper_vel = robot_state["gripper"]["qvel"] # 3D  (NEW)
        state = torch.cat([eef_pos, eef_axisangle, eef_vel,
                          gripper_pos, gripper_vel], dim=-1)  # 14D
        return state
 ```
 ### 4. **Cleaner Environment Code**
 Environments expose **all available data** without needing to know what downstream models will use:
 ```python
 # LIBERO environment exposes full robot state
 observation = {
    "pixels": {"image": img, "image2": img2},
    "robot_state": {
        "eef": {"pos": ..., "quat": ..., "vel": ..., "mat": ..., "axisangle": ...},
        "gripper": {"qpos": ..., "qvel": ...},
        "joints": {"pos": ..., "vel": ...}
    }
 }
 # Environment processor decides what to use
 # Policy processor handles model-specific transformations
 ```
 ## Using Environment Processors
 ### Factory Function
 The `make_env_pre_post_processors` function follows the same pattern as `make_pre_post_processors` for policies:
 ```python
 from lerobot.envs.factory import make_env_pre_post_processors
 from lerobot.envs.configs import LiberoEnv, PushtEnv
 # For LIBERO: Returns LiberoProcessorStep in preprocessor
 libero_cfg = LiberoEnv(task="libero_spatial", camera_name=["agentview"])
 env_preprocessor, env_postprocessor = make_env_pre_post_processors(libero_cfg)
 # For other environments: Returns identity processors (no-op)
 pusht_cfg = PushtEnv()
 env_preprocessor, env_postprocessor = make_env_pre_post_processors(pusht_cfg)
 ```
 ### Implementation in `envs/factory.py`
 ```python
 def make_env_pre_post_processors(
    env_cfg: EnvConfig,
 ) -> tuple[
    PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
    PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
 ]:
    """
    Create preprocessor and postprocessor pipelines for environment observations.
    Args:
        env_cfg: The configuration of the environment.
    Returns:
        A tuple containing:
            - preprocessor: Pipeline that processes environment observations
            - postprocessor: Pipeline that processes environment outputs
    """
    # For LIBERO environments, add the LiberoProcessorStep to preprocessor
    if isinstance(env_cfg, LiberoEnv) or "libero" in env_cfg.type:
        preprocessor = PolicyProcessorPipeline(steps=[LiberoProcessorStep()])
    else:
        # For all other environments, return an identity preprocessor
        preprocessor = PolicyProcessorPipeline(steps=[])
    # Postprocessor is currently identity for all environments
    # Future: Could add environment-specific action transformations
    postprocessor = PolicyProcessorPipeline(steps=[])
    return preprocessor, postprocessor
 ```
 ### Integration in Evaluation
 In `lerobot_eval.py`, the environment processors are created once and used throughout:
 ```python
 def eval_main(cfg: EvalPipelineConfig):
    # Create environment
    envs = make_env(cfg.env, n_envs=cfg.eval.batch_size)
    # Create policy
    policy = make_policy(cfg=cfg.policy, env_cfg=cfg.env)
    # Create policy processors
    preprocessor, postprocessor = make_pre_post_processors(
        policy_cfg=cfg.policy,
        pretrained_path=cfg.policy.pretrained_path,
    )
    # Create environment processors (NEW!)
    env_preprocessor, env_postprocessor = make_env_pre_post_processors(env_cfg=cfg.env)
    # Run evaluation with both processor types
    eval_policy_all(
        envs=envs,
        policy=policy,
        env_preprocessor=env_preprocessor,      # Environment-specific
        env_postprocessor=env_postprocessor,    # Environment-specific
        preprocessor=preprocessor,              # Policy-specific
        postprocessor=postprocessor,            # Policy-specific
        n_episodes=cfg.eval.n_episodes,
    )
 ```
 ## Example: LIBERO Environment Processor
 The `LiberoProcessorStep` demonstrates a real-world environment processor:
 ```python
 from lerobot.processor.pipeline import ObservationProcessorStep
@dataclass
@ProcessorStepRegistry.register(name="libero_processor")
 class LiberoProcessorStep(ObservationProcessorStep):
    """
    Processes LIBERO observations into the LeRobot format.
    **State Processing:**
    - Extracts end-effector position (3D)
    - Converts quaternion to axis-angle representation (3D)
    - Extracts gripper joint positions (2D)
    - Concatenates into 8D state vector
    **Image Processing:**
    - Rotates images 180° to match HuggingFaceVLA/libero convention
    """
    def _process_observation(self, observation):
        processed_obs = observation.copy()
        # Process images: Flip 180° for camera convention
        for key in list(processed_obs.keys()):
            if key.startswith("observation.images."):
                img = processed_obs[key]
                img = torch.flip(img, dims=[2, 3])  # Flip H and W
                processed_obs[key] = img
        # Process robot_state: Flatten to 8D vector
        if "observation.robot_state" in processed_obs:
            robot_state = processed_obs.pop("observation.robot_state")
            eef_pos = robot_state["eef"]["pos"]           # (B, 3)
            eef_quat = robot_state["eef"]["quat"]         # (B, 4)
            gripper_qpos = robot_state["gripper"]["qpos"] # (B, 2)
            # Convert quaternion to axis-angle
            eef_axisangle = self._quat2axisangle(eef_quat)  # (B, 3)
            # Concatenate into single state vector
            state = torch.cat((eef_pos, eef_axisangle, gripper_qpos), dim=-1)
            state = state.float()
            processed_obs["observation.state"] = state
        return processed_obs
 ```
 ### Why These Transformations?
 1. **Image Rotation**: The HuggingFaceVLA/libero dataset has images rotated 180° from the raw LIBERO simulator. The processor handles this convention mismatch so policies trained on the dataset work seamlessly.
 2. **State Flattening**: The raw LIBERO environment exposes nested dictionaries with all available state information (position, quaternion, velocity, matrix representation, etc.). The processor:
   - Selects the relevant components (pos, quat, gripper)
   - Converts quaternion to axis-angle (more suitable for learning)
   - Flattens to a single 8D vector that policies expect
 3. **Flexibility**: The environment still exposes **all** raw data. If you want to try different state representations (e.g., including velocities, using matrix representation instead of axis-angle), you can create a new processor without modifying the environment code.
 ## Adding Environment Processors for New Environments
 To add environment processors for a new environment:
 ### 1. Create the Processor Step
 ```python
 # In src/lerobot/processor/env_processor.py
@dataclass
@ProcessorStepRegistry.register(name="myenv_processor")
 class MyEnvProcessorStep(ObservationProcessorStep):
    """Process observations from MyEnv."""
    def _process_observation(self, observation):
        processed = observation.copy()
        # Your environment-specific transformations
        if "myenv.specific.state" in processed:
            state = processed.pop("myenv.specific.state")
            # Transform to standard format
            processed["observation.state"] = self._transform_state(state)
        return processed
 ```
 ### 2. Update the Factory
 ```python
 # In src/lerobot/envs/factory.py
 def make_env_pre_post_processors(env_cfg: EnvConfig):
    if isinstance(env_cfg, LiberoEnv) or "libero" in env_cfg.type:
        preprocessor = PolicyProcessorPipeline(steps=[LiberoProcessorStep()])
    elif isinstance(env_cfg, MyEnvConfig) or "myenv" in env_cfg.type:
        preprocessor = PolicyProcessorPipeline(steps=[MyEnvProcessorStep()])
    else:
        preprocessor = PolicyProcessorPipeline(steps=[])
    postprocessor = PolicyProcessorPipeline(steps=[])
    return preprocessor, postprocessor
 ```
 ### 3. Use in Evaluation
 No changes needed! The evaluation script automatically uses the appropriate processor:
 ```bash
 lerobot-eval \
    --policy.path=lerobot/my_policy \
    --env.type=myenv \  # Automatically uses MyEnvProcessorStep
    --eval.n_episodes=10
 ```
 ## Future: Environment Postprocessors
 Currently, postprocessors are identity (no-op) for all environments. Future use cases include:
 ### Action Space Transformations
 ```python
@dataclass
 class MyEnvActionPostprocessor(ProcessorStep):
    """Convert policy actions to environment-specific format."""
    def __call__(self, transition: EnvTransition) -> EnvTransition:
        action = transition["action"]
        # Example: Convert from Cartesian to joint space
        if self.action_space == "joint":
            action = self.ik_solver(action)
        # Example: Apply environment-specific safety limits
        action = torch.clamp(action, self.min_action, self.max_action)
        transition["action"] = action
        return transition
 ```
 ### Coordinate System Conversions
 ```python
@dataclass
 class CoordinateTransformPostprocessor(ProcessorStep):
    """Transform actions between coordinate systems."""
    def __call__(self, transition: EnvTransition) -> EnvTransition:
        action = transition["action"]
        # Example: Policy outputs in world frame, env expects base frame
        action = self.world_to_base_transform(action)
        transition["action"] = action
        return transition
 ```
 ## Best Practices
 1. **Keep environment processors simple**: They should only handle environment-specific data format issues, not complex learning-related transformations.
 2. **Use policy processors for model requirements**: Normalization, batching, device placement, and tokenization belong in policy processors.
 3. **Expose all data from environments**: Let processors decide what to use rather than hardcoding choices in the environment.
 4. **Document conventions**: Clearly document any coordinate system conventions, camera orientations, or data formats that your processor handles.
 5. **Test independently**: Environment processors should be testable without loading full policies or environments.
 ## Summary
 Environment processors provide a **clean separation** between environment-specific data transformations and policy-specific model requirements. This architecture:
 - ✅ Enables easy experimentation with different state representations
 - ✅ Allows policies to work seamlessly across different environments
 - ✅ Keeps environment code focused on simulation/hardware interface
 - ✅ Makes processor pipelines more maintainable and debuggable
 - ✅ Follows the single responsibility principle
 The key insight: **Environments define data formats, processors standardize them, policies consume standardized data.** Each layer has a clear, focused responsibility.
@@ -0,0 +1,301 @@
 # LeIsaac × LeRobot EnvHub
 LeRobot EnvHub now supports **imitation learning in simulation** with LeIsaac.
 Spin up everyday manipulation tasks, teleoperate the robot, collect demos, push them to the Hub, and train policies in LeRobot — all in one loop.
 [LeIsaac](https://github.com/LightwheelAI/leisaac) integrates with IsaacLab and the SO101 Leader/Follower setup to provide:
 - 🕹️ **Teleoperation-first workflows** for data collection
 - 📦 **Built-in data conversion** ready for LeRobot training
 - 🤖 **Everyday skills** like picking oranges, lifting cubes, cleaning tables, and folding cloth
 - ☁️ **Ongoing upgrades** from [LightWheel](https://lightwheel.ai/): cloud simulation, EnvHub support, Sim2Real tooling, and more
 Below you’ll find the currently supported LeIsaac tasks exposed through LeRobot EnvHub.
 # Available Environments
 The following table lists all available tasks and environments in LeIsaac x LeRobot Envhub. You can also get the latest list of environments by running the following command:
 ```bash
 python scripts/environments/list_envs.py
 ```
 | Task                                                                                                                                                            | Environment ID                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                | Task Description                                                                                                           | Related Robot                                              |
 | :-------------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------- | :--------------------------------------------------------- |
 | <video src="https://github.com/user-attachments/assets/466eddff-f720-4f99-94d5-5e123e4c302c" autoplay loop muted playsinline style="max-width: 300px;"></video> | [LeIsaac-SO101-PickOrange-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/pick_orange/pick_orange_env_cfg.py)<br /><br />[LeIsaac-SO101-PickOrange-Direct-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/pick_orange/direct/pick_orange_env.py)                                                                                                                                                                                                                        | Pick three oranges and put them into the plate, then reset the arm to rest state.                                          | Single-Arm SO101 Follower                                  |
 | <video src="https://github.com/user-attachments/assets/1e4eb83a-0b38-40fb-a0b2-ddb0fe201e6d" autoplay loop muted playsinline style="max-width: 300px;"></video> | [LeIsaac-SO101-LiftCube-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/lift_cube/lift_cube_env_cfg.py)<br /><br />[LeIsaac-SO101-LiftCube-Direct-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/lift_cube/direct/lift_cube_env.py)                                                                                                                                                                                                                                    | Lift the red cube up.                                                                                                      | Single-Arm SO101 Follower                                  |
 | <video src="https://github.com/user-attachments/assets/e49d8f1c-dcc9-412b-a88f-100680d8a45b" autoplay loop muted playsinline style="max-width: 300px;"></video> | [LeIsaac-SO101-CleanToyTable-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/clean_toy_table/clean_toy_table_env_cfg.py)<br /><br />[LeIsaac-SO101-CleanToyTable-BiArm-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/clean_toy_table/clean_toy_table_bi_arm_env_cfg.py)<br /><br />[LeIsaac-SO101-CleanToyTable-BiArm-Direct-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/clean_toy_table/direct/clean_toy_table_bi_arm_env.py) | Pick two letter e objects into the box, and reset the arm to rest state.                                                   | Single-Arm SO101 Follower<br /><br />Bi-Arm SO101 Follower |
 | <video src="https://github.com/user-attachments/assets/e29a0f8a-9286-4ce6-b45d-342c3d3ba754" autoplay loop muted playsinline style="max-width: 300px;"></video> | [LeIsaac-SO101-FoldCloth-BiArm-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/fold_cloth/fold_cloth_bi_arm_env_cfg.py)<br /><br />[LeIsaac-SO101-FoldCloth-BiArm-Direct-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/fold_cloth/direct/fold_cloth_bi_arm_env.py)                                                                                                                                                                                                    | Fold the cloth, and reset the arm to rest state.<br /><br />_Note: Only the DirectEnv support check_success in this task._ | Bi-Arm SO101 Follower                                      |
 # Load LeIsaac directly in LeRobot with one line of code
 > EnvHub: Share LeIsaac environments through HuggingFace
 [EnvHub](https://huggingface.co/docs/lerobot/envhub) is our reproducible environment hub, spin up a packaged simulation with one line, experiment immediately, and publish your own tasks for the community.
 LeIsaac offers EnvHub support so you can consume or share tasks with only a few commands.
 <video
  controls
  src="https://github.com/user-attachments/assets/687666f5-ebe0-421d-84a0-eb86116ac5f8"
  style={{ width: "100%", maxWidth: "960px", borderRadius: "8px" }}
 />
 ## How to get started, environment Setup
 Run the following commands to setup your code environments:
 ```bash
 # Refer to Getting Started/Installation to install leisaac firstly
 conda create -n leisaac_envhub python=3.11
 conda activate leisaac_envhub
 conda install -c "nvidia/label/cuda-12.8.1" cuda-toolkit
 pip install -U torch==2.7.0 torchvision==0.22.0 --index-url https://download.pytorch.org/whl/cu128
 pip install 'leisaac[isaaclab] @ git+https://github.com/LightwheelAI/leisaac.git#subdirectory=source/leisaac' --extra-index-url https://pypi.nvidia.com
 # Install lerobot
 pip install lerobot==0.4.1
 # Fix numpy version
 pip install numpy==1.26.0
 ```
 ## Usage Example
 EnvHub exposes every LeIsaac-supported task in a uniform interface. The examples below load `so101_pick_orange` and demonstrate a random-action rollout and an interactive teleoperation.
 ### Random Action
 <details>
 <summary>Click to expand code example</summary>
 ```python
 # envhub_random_action.py
 import torch
 from lerobot.envs.factory import make_env
 # Load from the hub
 envs_dict = make_env("LightwheelAI/leisaac_env:envs/so101_pick_orange.py", n_envs=1, trust_remote_code=True)
 # Access the environment
 suite_name = next(iter(envs_dict))
 sync_vector_env = envs_dict[suite_name][0]
 # retrieve the isaac environment from the sync vector env
 env = sync_vector_env.envs[0].unwrapped
 # Use it like any gym environment
 obs, info = env.reset()
 while True:
    action = torch.tensor(env.action_space.sample())
    obs, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        obs, info = env.reset()
 env.close()
 ```
 </details>
 ```bash
 python envhub_random_action.py
 ```
 You should see the SO101 arm swinging under purely random commands.
 ### Teleoperation
 LeRobot’s teleoperation stack can drive the simulated arm.
 Connect the SO101 Leader controller, run the calibration command below.
 ```bash
 lerobot-calibrate \
    --teleop.type=so101_leader \
    --teleop.port=/dev/ttyACM0 \
    --teleop.id=leader
 ```
 And then launch the teleop script.
 <details>
 <summary>Click to expand code example</summary>
 ```python
 # envhub_teleop_example.py
 import logging
 import time
 import gymnasium as gym
 from dataclasses import asdict, dataclass
 from pprint import pformat
 from lerobot.teleoperators import (  # noqa: F401
    Teleoperator,
    TeleoperatorConfig,
    make_teleoperator_from_config,
    so101_leader,
 )
 from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import init_logging
 from lerobot.envs.factory import make_env
@dataclass
 class TeleoperateConfig:
    teleop: TeleoperatorConfig
    env_name: str = "so101_pick_orange"
    fps: int = 60
@dataclass
 class EnvWrap:
    env: gym.Env
 def make_env_from_leisaac(env_name: str = "so101_pick_orange"):
    envs_dict = make_env(
        f'LightwheelAI/leisaac_env:envs/{env_name}.py',
        n_envs=1,
        trust_remote_code=True
    )
    suite_name = next(iter(envs_dict))
    sync_vector_env = envs_dict[suite_name][0]
    env = sync_vector_env.envs[0].unwrapped
    return env
 def teleop_loop(teleop: Teleoperator, env: gym.Env, fps: int):
    from leisaac.devices.action_process import preprocess_device_action
    from leisaac.assets.robots.lerobot import SO101_FOLLOWER_MOTOR_LIMITS
    from leisaac.utils.env_utils import dynamic_reset_gripper_effort_limit_sim
    env_wrap = EnvWrap(env=env)
    obs, info = env.reset()
    while True:
        loop_start = time.perf_counter()
        if env.cfg.dynamic_reset_gripper_effort_limit:
            dynamic_reset_gripper_effort_limit_sim(env, 'so101leader')
        raw_action = teleop.get_action()
        processed_action = preprocess_device_action(
            dict(
                so101_leader=True,
                joint_state={
                    k.removesuffix(".pos"): v for k, v in raw_action.items()},
                motor_limits=SO101_FOLLOWER_MOTOR_LIMITS),
            env_wrap
        )
        obs, reward, terminated, truncated, info = env.step(processed_action)
        if terminated or truncated:
            obs, info = env.reset()
        dt_s = time.perf_counter() - loop_start
        precise_sleep(1 / fps - dt_s)
        loop_s = time.perf_counter() - loop_start
        print(f"\ntime: {loop_s * 1e3:.2f}ms ({1 / loop_s:.0f} Hz)")
 def teleoperate(cfg: TeleoperateConfig):
    init_logging()
    logging.info(pformat(asdict(cfg)))
    teleop = make_teleoperator_from_config(cfg.teleop)
    env = make_env_from_leisaac(cfg.env_name)
    teleop.connect()
    if hasattr(env, 'initialize'):
        env.initialize()
    try:
        teleop_loop(teleop=teleop, env=env, fps=cfg.fps)
    except KeyboardInterrupt:
        pass
    finally:
        teleop.disconnect()
        env.close()
 def main():
    teleoperate(TeleoperateConfig(
        teleop=so101_leader.SO101LeaderConfig(
            port="/dev/ttyACM0",
            id='leader',
            use_degrees=False,
        ),
        env_name="so101_pick_orange",
        fps=60,
    ))
 if __name__ == "__main__":
    main()
 ```
 </details>
 ```bash
 python envhub_teleop_example.py
 ```
 Running the script lets you operate the simulated arm using the physical Leader device.
 ## ☁️ Cloud Simulation (No GPU Required)
 Don’t have a local GPU or the right drivers? No problem! You can run LeIsaac entirely in the cloud with zero setup.
 LeIsaac works out-of-the-box on **NVIDIA Brev**, giving you a fully configured environment directly in your browser.
 👉 **Start here:** [https://lightwheelai.github.io/leisaac/docs/cloud_simulation/nvidia_brev](https://lightwheelai.github.io/leisaac/docs/cloud_simulation/nvidia_brev)
 Once your instance is deployed, simply open the link for **port 80 (HTTP)** to launch **Visual Studio Code Server** (default password: `password`). From there, you can run simulations, edit code, and visualize IsaacLab environments — all from your web browser.
 **No GPU, no drivers, no local installation. Just click and run.**
 ## Additional Notes
 We keep EnvHub coverage aligned with the LeIsaac task. Currently supported:
 - `so101_pick_orange`
 - `so101_lift_cube`
 - `so101_clean_toytable`
 - `bi_so101_fold_cloth`
 Switch tasks by targeting a different script when calling `make_env`, for example:
 ```python
 envs_dict_pick_orange = make_env("LightwheelAI/leisaac_env:envs/so101_pick_orange.py", n_envs=1, trust_remote_code=True)
 envs_dict_lift_cube = make_env("LightwheelAI/leisaac_env:envs/so101_lift_cube.py", n_envs=1, trust_remote_code=True)
 envs_dict_clean_toytable = make_env("LightwheelAI/leisaac_env:envs/so101_clean_toytable.py", n_envs=1, trust_remote_code=True)
 envs_dict_fold_cloth = make_env("LightwheelAI/leisaac_env:envs/bi_so101_fold_cloth.py", n_envs=1, trust_remote_code=True)
 ```
 Note: when working with `bi_so101_fold_cloth`, call `initialize()` immediately after retrieving the env before performing any other operations:
 <details>
 <summary>Click to expand code example</summary>
 ```python
 import torch
 from lerobot.envs.factory import make_env
 # Load from the hub
 envs_dict = make_env("LightwheelAI/leisaac_env:envs/bi_so101_fold_cloth.py", n_envs=1, trust_remote_code=True)
 # Access the environment
 suite_name = next(iter(envs_dict))
 sync_vector_env = envs_dict[suite_name][0]
 # retrieve the isaac environment from the sync vector env
 env = sync_vector_env.envs[0].unwrapped
 # NOTE: initialize() first
 env.initialize()
 # other operation with env...
 ```
 </details>
@@ -393,7 +393,7 @@ import time
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.robots.so100_follower.config_so100_follower import SO100FollowerConfig
 from lerobot.robots.so100_follower.so100_follower import SO100Follower
-from lerobot.utils.robot_utils import busy_wait
+from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import log_say
 episode_idx = 0
@@ -415,7 +415,7 @@ for idx in range(dataset.num_frames):
    }
    robot.send_action(action)
-    busy_wait(1.0 / dataset.fps - (time.perf_counter() - t0))
+    precise_sleep(1.0 / dataset.fps - (time.perf_counter() - t0))
 robot.disconnect()
 ```
@@ -1,220 +0,0 @@
 # Imitation Learning in Sim
 This tutorial will explain how to train a neural network to control a robot in simulation with imitation learning.
 **You'll learn:**
 1. How to record a dataset in simulation with [gym-hil](https://github.com/huggingface/gym-hil) and visualize the dataset.
 2. How to train a policy using your data.
 3. How to evaluate your policy in simulation and visualize the results.
 For the simulation environment we use the same [repo](https://github.com/huggingface/gym-hil) that is also being used by the Human-In-the-Loop (HIL) reinforcement learning algorithm.
 This environment is based on [MuJoCo](https://mujoco.org) and allows you to record datasets in LeRobotDataset format.
 Teleoperation is easiest with a controller like the Logitech F710, but you can also use your keyboard if you are up for the challenge.
 ## Installation
 First, install the `gym_hil` package within the LeRobot environment, go to your LeRobot folder and run this command:
 ```bash
 pip install -e ".[hilserl]"
 ```
 ## Teleoperate and Record a Dataset
 To use `gym_hil` with LeRobot, you need to use a configuration file. An example config file can be found [here](https://huggingface.co/datasets/lerobot/config_examples/resolve/main/sim_il/env_config.json).
 To teleoperate and collect a dataset, we need to modify this config file. Here's an example configuration for imitation learning data collection:
 ```json
 {
  "env": {
    "type": "gym_manipulator",
    "name": "gym_hil",
    "task": "PandaPickCubeGamepad-v0",
    "fps": 10
  },
  "dataset": {
    "repo_id": "your_username/il_gym",
    "root": null,
    "task": "pick_cube",
    "num_episodes_to_record": 30,
    "replay_episode": null,
    "push_to_hub": true
  },
  "mode": "record",
  "device": "cuda"
 }
 ```
 Key configuration points:
 - Set your `repo_id` in the `dataset` section: `"repo_id": "your_username/il_gym"`
 - Set `num_episodes_to_record: 30` to collect 30 demonstration episodes
 - Ensure `mode` is set to `"record"`
 - If you don't have an NVIDIA GPU, change `"device": "cuda"` to `"mps"` for macOS or `"cpu"`
 - To use keyboard instead of gamepad, change `"task"` to `"PandaPickCubeKeyboard-v0"`
 Then we can run this command to start:
 <hfoptions id="teleop_sim">
 <hfoption id="Linux">
 ```bash
 python -m lerobot.rl.gym_manipulator --config_path path/to/env_config_gym_hil_il.json
 ```
 </hfoption>
 <hfoption id="MacOS">
 ```bash
 mjpython -m lerobot.rl.gym_manipulator --config_path path/to/env_config_gym_hil_il.json
 ```
 </hfoption>
 </hfoptions>
 Once rendered you can teleoperate the robot with the gamepad or keyboard, below you can find the gamepad/keyboard controls.
 Note that to teleoperate the robot you have to hold the "Human Take Over Pause Policy" Button `RB` to enable control!
 **Gamepad Controls**
 <p align="center">
  <img
    src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/gamepad_guide.jpg?raw=true"
    alt="Figure shows the control mappings on a Logitech gamepad."
    title="Gamepad Control Mapping"
    width="100%"
  ></img>
 </p>
 <p align="center">
  <i>Gamepad button mapping for robot control and episode management</i>
 </p>
 **Keyboard controls**
 For keyboard controls use the `spacebar` to enable control and the following keys to move the robot:
 ```bash
  Arrow keys: Move in X-Y plane
  Shift and Shift_R: Move in Z axis
  Right Ctrl and Left Ctrl: Open and close gripper
  ESC: Exit
 ```
 ## Visualize a dataset
 If you uploaded your dataset to the hub you can [visualize your dataset online](https://huggingface.co/spaces/lerobot/visualize_dataset) by copy pasting your repo id.
 <p align="center">
  <img
    src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/dataset_visualizer_sim.png"
    alt="Figure shows the dataset visualizer"
    title="Dataset visualization"
    width="100%"
  ></img>
 </p>
 <p align="center">
  <i>Dataset visualizer</i>
 </p>
 ## Train a policy
 To train a policy to control your robot, use the [`lerobot-train`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
 ```bash
 lerobot-train \
  --dataset.repo_id=${HF_USER}/il_gym \
  --policy.type=act \
  --output_dir=outputs/train/il_sim_test \
  --job_name=il_sim_test \
  --policy.device=cuda \
  --wandb.enable=true
 ```
 Let's explain the command:
 1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/il_gym`.
 2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor states, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset.
 3. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon.
 4. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
 Training should take several hours, 100k steps (which is the default) will take about 1h on Nvidia A100. You will find checkpoints in `outputs/train/il_sim_test/checkpoints`.
 #### Train using Collab
 If your local computer doesn't have a powerful GPU you could utilize Google Collab to train your model by following the [ACT training notebook](./notebooks#training-act).
 #### Upload policy checkpoints
 Once training is done, upload the latest checkpoint with:
 ```bash
 huggingface-cli upload ${HF_USER}/il_sim_test \
  outputs/train/il_sim_test/checkpoints/last/pretrained_model
 ```
 You can also upload intermediate checkpoints with:
 ```bash
 CKPT=010000
 huggingface-cli upload ${HF_USER}/il_sim_test${CKPT} \
  outputs/train/il_sim_test/checkpoints/${CKPT}/pretrained_model
 ```
 ## Evaluate your policy in Sim
 To evaluate your policy we have to use a configuration file. An example can be found [here](https://huggingface.co/datasets/lerobot/config_examples/resolve/main/sim_il/eval_config.json).
 Here's an example evaluation configuration:
 ```json
 {
  "env": {
    "type": "gym_manipulator",
    "name": "gym_hil",
    "task": "PandaPickCubeGamepad-v0",
    "fps": 10
  },
  "dataset": {
    "repo_id": "your_username/il_sim_dataset",
    "dataset_root": null,
    "task": "pick_cube"
  },
  "pretrained_policy_name_or_path": "your_username/il_sim_model",
  "device": "cuda"
 }
 ```
 Make sure to replace:
 - `repo_id` with the dataset you trained on (e.g., `your_username/il_sim_dataset`)
 - `pretrained_policy_name_or_path` with your model ID (e.g., `your_username/il_sim_model`)
 Then you can run this command to visualize your trained policy
 <hfoptions id="eval_policy">
 <hfoption id="Linux">
 ```bash
 python -m lerobot.rl.eval_policy --config_path=path/to/eval_config_gym_hil.json
 ```
 </hfoption>
 <hfoption id="MacOS">
 ```bash
 mjpython -m lerobot.rl.eval_policy --config_path=path/to/eval_config_gym_hil.json
 ```
 </hfoption>
 </hfoptions>
 > [!WARNING]
 > While the main workflow of training ACT in simulation is straightforward, there is significant room for exploring how to set up the task, define the initial state of the environment, and determine the type of data required during collection to learn the most effective policy. If your trained policy doesn't perform well, investigate the quality of the dataset it was trained on using our visualizers, as well as the action values and various hyperparameters related to ACT and the simulation.
 Congrats 🎉, you have finished this tutorial. If you want to continue with using LeRobot in simulation follow this [Tutorial on reinforcement learning in sim with HIL-SERL](https://huggingface.co/docs/lerobot/hilserl_sim)
 > [!TIP]
 > If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb).
@@ -45,7 +45,7 @@ from lerobot.robots import (  # noqa: F401
    so101_follower,
 )
 from lerobot.utils.constants import ACTION
-from lerobot.utils.robot_utils import busy_wait
+from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import (
    init_logging,
    log_say,
@@ -97,7 +97,7 @@ def replay(cfg: ReplayConfig):
        robot.send_action(action)
        dt_s = time.perf_counter() - start_episode_t
-        busy_wait(1 / dataset.fps - dt_s)
+        precise_sleep(1 / dataset.fps - dt_s)
    robot.disconnect()
@@ -34,6 +34,8 @@ from huggingface_hub import HfApi
 import lerobot
 from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
 def main():
    # We ported a number of existing datasets ourselves, use this to see the list:
    print("List of available datasets:")
    pprint(lerobot.available_datasets)
@@ -132,7 +134,6 @@ print(f"\n{dataset[0][camera_key].shape=}")  # (4, c, h, w)
    print(f"{dataset[0]['observation.state'].shape=}")  # (6, c)
    print(f"{dataset[0]['action'].shape=}\n")  # (64, c)
 if __name__ == "__main__":
    dataloader = torch.utils.data.DataLoader(
        dataset,
        num_workers=4,
@@ -144,3 +145,7 @@ if __name__ == "__main__":
        print(f"{batch['observation.state'].shape=}")  # (32, 6, c)
        print(f"{batch['action'].shape=}")  # (32, 64, c)
        break
 if __name__ == "__main__":
    main()
@@ -33,6 +33,8 @@ TASK_DESCRIPTION = "My task description"
 HF_MODEL_ID = "<hf_username>/<model_repo_id>"
 HF_DATASET_ID = "<hf_username>/<eval_dataset_repo_id>"
 def main():
    # Create the robot configuration & robot
    robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi")
@@ -136,3 +138,7 @@ listener.stop()
    dataset.finalize()
    dataset.push_to_hub()
 if __name__ == "__main__":
    main()
@@ -34,6 +34,8 @@ RESET_TIME_SEC = 10
 TASK_DESCRIPTION = "My task description"
 HF_REPO_ID = "<hf_username>/<dataset_repo_id>"
 def main():
    # Create the robot and teleoperator configurations
    robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi")
    leader_arm_config = SO100LeaderConfig(port="/dev/tty.usbmodem585A0077581", id="my_awesome_leader_arm")
@@ -133,3 +135,7 @@ listener.stop()
    dataset.finalize()
    dataset.push_to_hub()
 if __name__ == "__main__":
    main()
@@ -20,11 +20,13 @@ from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.robots.lekiwi.config_lekiwi import LeKiwiClientConfig
 from lerobot.robots.lekiwi.lekiwi_client import LeKiwiClient
 from lerobot.utils.constants import ACTION
-from lerobot.utils.robot_utils import busy_wait
+from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import log_say
 EPISODE_IDX = 0
 def main():
    # Initialize the robot config
    robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi")
@@ -56,6 +58,10 @@ for idx in range(len(episode_frames)):
        # Send action to robot
        _ = robot.send_action(action)
-    busy_wait(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
+        precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0))
    robot.disconnect()
 if __name__ == "__main__":
    main()
@@ -19,11 +19,13 @@ import time
 from lerobot.robots.lekiwi import LeKiwiClient, LeKiwiClientConfig
 from lerobot.teleoperators.keyboard.teleop_keyboard import KeyboardTeleop, KeyboardTeleopConfig
 from lerobot.teleoperators.so100_leader import SO100Leader, SO100LeaderConfig
-from lerobot.utils.robot_utils import busy_wait
+from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
 FPS = 30
 def main():
    # Create the robot and teleoperator configurations
    robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="my_lekiwi")
    teleop_arm_config = SO100LeaderConfig(port="/dev/tty.usbmodem585A0077581", id="my_awesome_leader_arm")
@@ -69,4 +71,8 @@ while True:
        # Visualize
        log_rerun_data(observation=observation, action=action)
-    busy_wait(max(1.0 / FPS - (time.perf_counter() - t0), 0.0))
+        precise_sleep(max(1.0 / FPS - (time.perf_counter() - t0), 0.0))
 if __name__ == "__main__":
    main()
@@ -52,6 +52,8 @@ TASK_DESCRIPTION = "My task description"
 HF_MODEL_ID = "<hf_username>/<model_repo_id>"
 HF_DATASET_ID = "<hf_username>/<dataset_repo_id>"
 def main():
    # Create the robot configuration & robot
    camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)}
    robot_config = SO100FollowerConfig(
@@ -89,7 +91,9 @@ robot_ee_to_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotOb
    # Build pipeline to convert joints observation to EE observation
    robot_joints_to_ee_pose_processor = RobotProcessorPipeline[RobotObservation, RobotObservation](
        steps=[
-        ForwardKinematicsJointsToEE(kinematics=kinematics_solver, motor_names=list(robot.bus.motors.keys()))
+            ForwardKinematicsJointsToEE(
                kinematics=kinematics_solver, motor_names=list(robot.bus.motors.keys())
            )
        ],
        to_transition=observation_to_transition,
        to_output=transition_to_observation,
@@ -197,3 +201,7 @@ listener.stop()
    dataset.finalize()
    dataset.push_to_hub()
 if __name__ == "__main__":
    main()
@@ -50,6 +50,8 @@ RESET_TIME_SEC = 30
 TASK_DESCRIPTION = "My task description"
 HF_REPO_ID = "<hf_username>/<dataset_repo_id>"
 def main():
    # Create the robot and teleoperator configurations
    camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)}
    robot_config = SO100FollowerConfig(
@@ -72,7 +74,9 @@ kinematics_solver = RobotKinematics(
    )
    # Build pipeline to convert phone action to EE action
-phone_to_robot_ee_pose_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction](
+    phone_to_robot_ee_pose_processor = RobotProcessorPipeline[
        tuple[RobotAction, RobotObservation], RobotAction
    ](
        steps=[
            MapPhoneActionToRobotAction(platform=teleop_config.phone_os),
            EEReferenceAndDelta(
@@ -107,7 +111,9 @@ robot_ee_to_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotOb
    # Build pipeline to convert joint observation to EE observation
    robot_joints_to_ee_pose = RobotProcessorPipeline[RobotObservation, RobotObservation](
        steps=[
-        ForwardKinematicsJointsToEE(kinematics=kinematics_solver, motor_names=list(robot.bus.motors.keys()))
+            ForwardKinematicsJointsToEE(
                kinematics=kinematics_solver, motor_names=list(robot.bus.motors.keys())
            )
        ],
        to_transition=observation_to_transition,
        to_output=transition_to_observation,
@@ -147,7 +153,6 @@ init_rerun(session_name="phone_so100_record")
    if not robot.is_connected or not phone.is_connected:
        raise ValueError("Robot or teleop is not connected!")
    print("Starting record loop. Move your phone to teleoperate the robot...")
    episode_idx = 0
    while episode_idx < NUM_EPISODES and not events["stop_recording"]:
@@ -203,3 +208,7 @@ listener.stop()
    dataset.finalize()
    dataset.push_to_hub()
 if __name__ == "__main__":
    main()
@@ -29,12 +29,14 @@ from lerobot.robots.so100_follower.robot_kinematic_processor import (
 )
 from lerobot.robots.so100_follower.so100_follower import SO100Follower
 from lerobot.utils.constants import ACTION
-from lerobot.utils.robot_utils import busy_wait
+from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import log_say
 EPISODE_IDX = 0
 HF_REPO_ID = "<hf_username>/<dataset_repo_id>"
 def main():
    # Initialize the robot config
    robot_config = SO100FollowerConfig(
        port="/dev/tty.usbmodem5A460814411", id="my_awesome_follower_arm", use_degrees=True
@@ -94,7 +96,11 @@ for idx in range(len(episode_frames)):
        # Send action to robot
        _ = robot.send_action(joint_action)
-    busy_wait(1.0 / dataset.fps - (time.perf_counter() - t0))
+        precise_sleep(1.0 / dataset.fps - (time.perf_counter() - t0))
    # Clean up
    robot.disconnect()
 if __name__ == "__main__":
    main()
@@ -32,11 +32,13 @@ from lerobot.robots.so100_follower.so100_follower import SO100Follower
 from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
 from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
 from lerobot.teleoperators.phone.teleop_phone import Phone
-from lerobot.utils.robot_utils import busy_wait
+from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
 FPS = 30
 def main():
    # Initialize the robot and teleoperator
    robot_config = SO100FollowerConfig(
        port="/dev/tty.usbmodem5A460814411", id="my_awesome_follower_arm", use_degrees=True
@@ -55,7 +57,9 @@ kinematics_solver = RobotKinematics(
    )
    # Build pipeline to convert phone action to ee pose action to joint action
-phone_to_robot_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction](
+    phone_to_robot_joints_processor = RobotProcessorPipeline[
        tuple[RobotAction, RobotObservation], RobotAction
    ](
        steps=[
            MapPhoneActionToRobotAction(platform=teleop_config.phone_os),
            EEReferenceAndDelta(
@@ -110,4 +114,8 @@ while True:
        # Visualize
        log_rerun_data(observation=phone_obs, action=joint_action)
-    busy_wait(max(1.0 / FPS - (time.perf_counter() - t0), 0.0))
+        precise_sleep(max(1.0 / FPS - (time.perf_counter() - t0), 0.0))
 if __name__ == "__main__":
    main()
@@ -142,7 +142,7 @@ def _check_matplotlib_available():
        raise ImportError(
            "matplotlib is required for RTC debug visualizations. "
            "Please install it by running:\n"
-            "  uv pip install -e '.[matplotlib-dep]'"
+            "  uv pip install matplotlib"
        )
@@ -52,6 +52,8 @@ TASK_DESCRIPTION = "My task description"
 HF_MODEL_ID = "<hf_username>/<model_repo_id>"
 HF_DATASET_ID = "<hf_username>/<dataset_repo_id>"
 def main():
    # Create the robot configuration & robot
    camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)}
    robot_config = SO100FollowerConfig(
@@ -89,13 +91,14 @@ robot_ee_to_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotOb
    # Build pipeline to convert joints observation to EE observation
    robot_joints_to_ee_pose_processor = RobotProcessorPipeline[RobotObservation, RobotObservation](
        steps=[
-        ForwardKinematicsJointsToEE(kinematics=kinematics_solver, motor_names=list(robot.bus.motors.keys()))
+            ForwardKinematicsJointsToEE(
                kinematics=kinematics_solver, motor_names=list(robot.bus.motors.keys())
            )
        ],
        to_transition=observation_to_transition,
        to_output=transition_to_observation,
    )
    # Create the dataset
    dataset = LeRobotDataset.create(
        repo_id=HF_DATASET_ID,
@@ -198,3 +201,7 @@ listener.stop()
    dataset.finalize()
    dataset.push_to_hub()
 if __name__ == "__main__":
    main()
@@ -48,10 +48,15 @@ RESET_TIME_SEC = 30
 TASK_DESCRIPTION = "My task description"
 HF_REPO_ID = "<hf_username>/<dataset_repo_id>"
 def main():
    # Create the robot and teleoperator configurations
    camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)}
    follower_config = SO100FollowerConfig(
-    port="/dev/tty.usbmodem5A460814411", id="my_awesome_follower_arm", cameras=camera_config, use_degrees=True
+        port="/dev/tty.usbmodem5A460814411",
        id="my_awesome_follower_arm",
        cameras=camera_config,
        use_degrees=True,
    )
    leader_config = SO100LeaderConfig(port="/dev/tty.usbmodem5A460819811", id="my_awesome_leader_arm")
@@ -135,7 +140,6 @@ dataset = LeRobotDataset.create(
        image_writer_threads=4,
    )
    # Connect the robot and teleoperator
    leader.connect()
    follower.connect()
@@ -202,3 +206,7 @@ listener.stop()
    dataset.finalize()
    dataset.push_to_hub()
 if __name__ == "__main__":
    main()
@@ -30,12 +30,14 @@ from lerobot.robots.so100_follower.robot_kinematic_processor import (
 )
 from lerobot.robots.so100_follower.so100_follower import SO100Follower
 from lerobot.utils.constants import ACTION
-from lerobot.utils.robot_utils import busy_wait
+from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import log_say
 EPISODE_IDX = 0
 HF_REPO_ID = "<hf_username>/<dataset_repo_id>"
 def main():
    # Initialize the robot config
    robot_config = SO100FollowerConfig(
        port="/dev/tty.usbmodem5A460814411", id="my_awesome_follower_arm", use_degrees=True
@@ -95,7 +97,11 @@ for idx in range(len(episode_frames)):
        # Send action to robot
        _ = robot.send_action(joint_action)
-    busy_wait(1.0 / dataset.fps - (time.perf_counter() - t0))
+        precise_sleep(1.0 / dataset.fps - (time.perf_counter() - t0))
    # Clean up
    robot.disconnect()
 if __name__ == "__main__":
    main()
@@ -32,11 +32,13 @@ from lerobot.robots.so100_follower.robot_kinematic_processor import (
 from lerobot.robots.so100_follower.so100_follower import SO100Follower
 from lerobot.teleoperators.so100_leader.config_so100_leader import SO100LeaderConfig
 from lerobot.teleoperators.so100_leader.so100_leader import SO100Leader
-from lerobot.utils.robot_utils import busy_wait
+from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
 FPS = 30
 def main():
    # Initialize the robot and teleoperator config
    follower_config = SO100FollowerConfig(
        port="/dev/tty.usbmodem5A460814411", id="my_awesome_follower_arm", use_degrees=True
@@ -118,4 +120,8 @@ while True:
        # Visualize
        log_rerun_data(observation=leader_ee_act, action=follower_joints_act)
-    busy_wait(max(1.0 / FPS - (time.perf_counter() - t0), 0.0))
+        precise_sleep(max(1.0 / FPS - (time.perf_counter() - t0), 0.0))
 if __name__ == "__main__":
    main()
@@ -19,6 +19,7 @@ def make_delta_timestamps(delta_indices: list[int] | None, fps: int) -> list[flo
    return [i / fps for i in delta_indices]
 def main():
    output_directory = Path("outputs/robot_learning_tutorial/act")
    output_directory.mkdir(parents=True, exist_ok=True)
@@ -48,7 +49,8 @@ delta_timestamps = {
    # add image features if they are present
    delta_timestamps |= {
-    k: make_delta_timestamps(cfg.observation_delta_indices, dataset_metadata.fps) for k in cfg.image_features
+        k: make_delta_timestamps(cfg.observation_delta_indices, dataset_metadata.fps)
        for k in cfg.image_features
    }
    # Instantiate the dataset
@@ -93,6 +95,10 @@ preprocessor.save_pretrained(output_directory)
    postprocessor.save_pretrained(output_directory)
    # Save all assets to the Hub
-policy.push_to_hub("fracapuano/robot_learning_tutorial_act")
+    policy.push_to_hub("<user>/robot_learning_tutorial_act")
-preprocessor.push_to_hub("fracapuano/robot_learning_tutorial_act")
+    preprocessor.push_to_hub("<user>/robot_learning_tutorial_act")
-postprocessor.push_to_hub("fracapuano/robot_learning_tutorial_act")
+    postprocessor.push_to_hub("<user>/robot_learning_tutorial_act")
 if __name__ == "__main__":
    main()
@@ -8,8 +8,13 @@ from lerobot.policies.utils import build_inference_frame, make_robot_action
 from lerobot.robots.so100_follower.config_so100_follower import SO100FollowerConfig
 from lerobot.robots.so100_follower.so100_follower import SO100Follower
 MAX_EPISODES = 5
 MAX_STEPS_PER_EPISODE = 20
 def main():
    device = torch.device("mps")  # or "cuda" or "cpu"
-model_id = "fracapuano/robot_learning_tutorial_act"
+    model_id = "<user>/robot_learning_tutorial_act"
    model = ACTPolicy.from_pretrained(model_id)
    dataset_id = "lerobot/svla_so101_pickplace"
@@ -23,9 +28,6 @@ follower_port = ...  # something like "/dev/tty.usbmodem58760431631"
    # # the robot ids are used the load the right calibration files
    follower_id = ...  # something like "follower_so100"
 MAX_EPISODES = 5
 MAX_STEPS_PER_EPISODE = 20
    # Robot and environment configuration
    # Camera keys must match the name and resolutions of the ones used for training!
    # You can check the camera keys expected by a model in the info.json card on the model card on the Hub
@@ -55,3 +57,7 @@ for _ in range(MAX_EPISODES):
            robot.send_action(action)
        print("Episode finished! Starting new episode...")
 if __name__ == "__main__":
    main()
@@ -1,6 +1,8 @@
 from lerobot.async_inference.configs import PolicyServerConfig
 from lerobot.async_inference.policy_server import serve
 def main():
    host = ...  # something like "127.0.0.1" if you're exposing to localhost
    port = ...  # something like 8080
@@ -9,3 +11,7 @@ config = PolicyServerConfig(
        port=port,
    )
    serve(config)
 if __name__ == "__main__":
    main()
@@ -6,6 +6,8 @@ from lerobot.async_inference.robot_client import RobotClient
 from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig
 from lerobot.robots.so100_follower import SO100FollowerConfig
 def main():
    # these cameras must match the ones expected by the policy - find your cameras with lerobot-find-cameras
    # check the config.json on the Hub for the policy you are using to see the expected camera specs
    camera_cfg = {
@@ -29,7 +31,7 @@ client_cfg = RobotClientConfig(
        server_address=server_address,
        policy_device="mps",
        policy_type="act",
-    pretrained_name_or_path="fracapuano/robot_learning_tutorial_act",
+        pretrained_name_or_path="<user>/robot_learning_tutorial_act",
        chunk_size_threshold=0.5,  # g
        actions_per_chunk=50,  # make sure this is less than the max actions of the policy
    )
@@ -53,3 +55,7 @@ if client.start():
            action_receiver_thread.join()
            # (Optionally) plot the action queue size
            visualize_action_queue_size(client.action_queue_size)
 if __name__ == "__main__":
    main()
@@ -19,6 +19,7 @@ def make_delta_timestamps(delta_indices: list[int] | None, fps: int) -> list[flo
    return [i / fps for i in delta_indices]
 def main():
    output_directory = Path("outputs/robot_learning_tutorial/diffusion")
    output_directory.mkdir(parents=True, exist_ok=True)
@@ -49,7 +50,8 @@ delta_timestamps = {
    # add image features if they are present
    delta_timestamps |= {
-    k: make_delta_timestamps(cfg.observation_delta_indices, dataset_metadata.fps) for k in cfg.image_features
+        k: make_delta_timestamps(cfg.observation_delta_indices, dataset_metadata.fps)
        for k in cfg.image_features
    }
    # Instantiate the dataset
@@ -94,6 +96,10 @@ preprocessor.save_pretrained(output_directory)
    postprocessor.save_pretrained(output_directory)
    # Save all assets to the Hub
-policy.push_to_hub("fracapuano/robot_learning_tutorial_diffusion")
+    policy.push_to_hub("<user>/robot_learning_tutorial_diffusion")
-preprocessor.push_to_hub("fracapuano/robot_learning_tutorial_diffusion")
+    preprocessor.push_to_hub("<user>/robot_learning_tutorial_diffusion")
-postprocessor.push_to_hub("fracapuano/robot_learning_tutorial_diffusion")
+    postprocessor.push_to_hub("<user>/robot_learning_tutorial_diffusion")
 if __name__ == "__main__":
    main()
@@ -8,8 +8,13 @@ from lerobot.policies.utils import build_inference_frame, make_robot_action
 from lerobot.robots.so100_follower.config_so100_follower import SO100FollowerConfig
 from lerobot.robots.so100_follower.so100_follower import SO100Follower
 MAX_EPISODES = 5
 MAX_STEPS_PER_EPISODE = 20
 def main():
    device = torch.device("mps")  # or "cuda" or "cpu"
-model_id = "fracapuano/robot_learning_tutorial_diffusion"
+    model_id = "<user>/robot_learning_tutorial_diffusion"
    model = DiffusionPolicy.from_pretrained(model_id)
@@ -20,10 +25,6 @@ preprocess, postprocess = make_pre_post_processors(
        model.config, model_id, dataset_stats=dataset_metadata.stats
    )
 MAX_EPISODES = 5
 MAX_STEPS_PER_EPISODE = 20
    # # find ports using lerobot-find-port
    follower_port = ...  # something like "/dev/tty.usbmodem58760431631"
@@ -42,7 +43,6 @@ robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id, cameras=came
    robot = SO100Follower(robot_cfg)
    robot.connect()
    for _ in range(MAX_EPISODES):
        for _ in range(MAX_STEPS_PER_EPISODE):
            obs = robot.get_observation()
@@ -58,3 +58,7 @@ for _ in range(MAX_EPISODES):
            robot.send_action(action)
        print("Episode finished! Starting new episode...")
 if __name__ == "__main__":
    main()
@@ -11,6 +11,8 @@ from lerobot.robots.so100_follower.so100_follower import SO100Follower
 MAX_EPISODES = 5
 MAX_STEPS_PER_EPISODE = 20
 def main():
    device = torch.device("mps")  # or "cuda" or "cpu"
    model_id = "lerobot/pi0_base"
@@ -65,3 +67,7 @@ for _ in range(MAX_EPISODES):
            robot.send_action(action)
        print("Episode finished! Starting new episode...")
 if __name__ == "__main__":
    main()
@@ -20,6 +20,8 @@ from lerobot.teleoperators.utils import TeleopEvents
 LOG_EVERY = 10
 SEND_EVERY = 10
 MAX_EPISODES = 5
 MAX_STEPS_PER_EPISODE = 20
 def run_learner(
@@ -223,6 +225,7 @@ def make_policy_obs(obs, device: torch.device = "cpu"):
    }
 def main():
    """Main function - coordinates actor and learner processes."""
    device = "mps"  # or "cuda" or "cpu"
@@ -238,15 +241,12 @@ follower_id = ...
    leader_id = ...
    # A pretrained model (to be used in-distribution!)
-reward_classifier_id = "fracapuano/reward_classifier_hil_serl_example"
+    reward_classifier_id = "<user>/reward_classifier_hil_serl_example"
    reward_classifier = Classifier.from_pretrained(reward_classifier_id)
    reward_classifier.to(device)
    reward_classifier.eval()
 MAX_EPISODES = 5
 MAX_STEPS_PER_EPISODE = 20
    # Robot and environment configuration
    robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id)
    teleop_cfg = SO100LeaderConfig(port=leader_port, id=leader_id)
@@ -285,13 +285,11 @@ transitions_queue = mp.Queue(maxsize=10)
    parameters_queue = mp.Queue(maxsize=2)
    shutdown_event = mp.Event()
    # Signal handler for graceful shutdown
    def signal_handler(sig):
        print(f"\nSignal {sig} received, shutting down...")
        shutdown_event.set()
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGTERM, signal_handler)
@@ -343,3 +341,7 @@ finally:
            learner_process.terminate()
        if actor_process.is_alive():
            actor_process.terminate()
 if __name__ == "__main__":
    main()
@@ -4,6 +4,8 @@ from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.policies.factory import make_policy, make_pre_post_processors
 from lerobot.policies.sac.reward_model.configuration_classifier import RewardClassifierConfig
 def main():
    # Device to use for training
    device = "mps"  # or "cuda", or "cpu"
@@ -26,8 +28,7 @@ policy = make_policy(config, ds_meta=dataset.meta)
    optimizer = config.get_optimizer_preset().build(policy.parameters())
    preprocessor, _ = make_pre_post_processors(policy_cfg=config, dataset_stats=dataset.meta.stats)
-
+    classifier_id = "<user>/reward_classifier_hil_serl_example"
 classifier_id = "fracapuano/reward_classifier_hil_serl_example"
    # Instantiate a dataloader
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True)
@@ -60,3 +61,7 @@ print("Training finished!")
    # You can now save the trained policy.
    policy.push_to_hub(classifier_id)
 if __name__ == "__main__":
    main()
@@ -11,6 +11,8 @@ from lerobot.robots.so100_follower.so100_follower import SO100Follower
 MAX_EPISODES = 5
 MAX_STEPS_PER_EPISODE = 20
 def main():
    device = torch.device("mps")  # or "cuda" or "cpu"
    model_id = "lerobot/smolvla_base"
@@ -64,3 +66,7 @@ for _ in range(MAX_EPISODES):
            robot.send_action(action)
        print("Episode finished! Starting new episode...")
 if __name__ == "__main__":
    main()
@@ -110,8 +110,8 @@ def worker_thread_loop(queue: queue.Queue):
        if item is None:
            queue.task_done()
            break
-        image_array, fpath = item
+        image_array, fpath, compress_level = item
-        write_image(image_array, fpath)
+        write_image(image_array, fpath, compress_level)
        queue.task_done()
@@ -169,11 +169,13 @@ class AsyncImageWriter:
                p.start()
                self.processes.append(p)
-    def save_image(self, image: torch.Tensor | np.ndarray | PIL.Image.Image, fpath: Path):
+    def save_image(
        self, image: torch.Tensor | np.ndarray | PIL.Image.Image, fpath: Path, compress_level: int = 1
    ):
        if isinstance(image, torch.Tensor):
            # Convert tensor to numpy array to minimize main process time
            image = image.cpu().numpy()
-        self.queue.put((image, fpath))
+        self.queue.put((image, fpath, compress_level))
    def wait_until_done(self):
        self.queue.join()
@@ -13,6 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import concurrent.futures
 import contextlib
 import logging
 import shutil
@@ -539,6 +540,15 @@ class LeRobotDatasetMetadata:
        return obj
 def _encode_video_worker(video_key: str, episode_index: int, root: Path, fps: int) -> Path:
    temp_path = Path(tempfile.mkdtemp(dir=root)) / f"{video_key}_{episode_index:03d}.mp4"
    fpath = DEFAULT_IMAGE_PATH.format(image_key=video_key, episode_index=episode_index, frame_index=0)
    img_dir = (root / fpath).parent
    encode_video_frames(img_dir, temp_path, fps, overwrite=True)
    shutil.rmtree(img_dir)
    return temp_path
 class LeRobotDataset(torch.utils.data.Dataset):
    def __init__(
        self,
@@ -712,6 +722,15 @@ class LeRobotDataset(torch.utils.data.Dataset):
            self.download(download_videos)
            self.hf_dataset = self.load_hf_dataset()
        # Create mapping from absolute indices to relative indices when only a subset of the episodes are loaded
        # Build a mapping: absolute_index -> relative_index_in_filtered_dataset
        self._absolute_to_relative_idx = None
        if self.episodes is not None:
            self._absolute_to_relative_idx = {
                abs_idx.item() if isinstance(abs_idx, torch.Tensor) else abs_idx: rel_idx
                for rel_idx, abs_idx in enumerate(self.hf_dataset["index"])
            }
        # Setup delta_indices
        if self.delta_timestamps is not None:
            check_delta_timestamps(self.delta_timestamps, self.fps, self.tolerance_s)
@@ -830,7 +849,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
    def load_hf_dataset(self) -> datasets.Dataset:
        """hf_dataset contains all the observations, states, actions, rewards, etc."""
        features = get_hf_features_from_features(self.features)
-        hf_dataset = load_nested_dataset(self.root / "data", features=features)
+        hf_dataset = load_nested_dataset(self.root / "data", features=features, episodes=self.episodes)
        hf_dataset.set_transform(hf_transform_to_torch)
        return hf_dataset
@@ -847,10 +866,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
        # Determine requested episodes
        if self.episodes is None:
            # Requesting all episodes - check if we have all episodes from metadata
            requested_episodes = set(range(self.meta.total_episodes))
        else:
            # Requesting specific episodes
            requested_episodes = set(self.episodes)
        # Check if all requested episodes are available in cached data
@@ -932,6 +949,10 @@ class LeRobotDataset(torch.utils.data.Dataset):
        query_timestamps = {}
        for key in self.meta.video_keys:
            if query_indices is not None and key in query_indices:
                if self._absolute_to_relative_idx is not None:
                    relative_indices = [self._absolute_to_relative_idx[idx] for idx in query_indices[key]]
                    timestamps = self.hf_dataset[relative_indices]["timestamp"]
                else:
                    timestamps = self.hf_dataset[query_indices[key]]["timestamp"]
                query_timestamps[key] = torch.stack(timestamps).tolist()
            else:
@@ -955,10 +976,16 @@ class LeRobotDataset(torch.utils.data.Dataset):
        for key, q_idx in query_indices.items():
            if key in self.meta.video_keys:
                continue
            # Map absolute indices to relative indices if needed
            relative_indices = (
                q_idx
                if self._absolute_to_relative_idx is None
                else [self._absolute_to_relative_idx[idx] for idx in q_idx]
            )
            try:
-                result[key] = torch.stack(self.hf_dataset[key][q_idx])
+                result[key] = torch.stack(self.hf_dataset[key][relative_indices])
            except (KeyError, TypeError, IndexError):
-                result[key] = torch.stack(self.hf_dataset[q_idx][key])
+                result[key] = torch.stack(self.hf_dataset[relative_indices][key])
        return result
    def _query_videos(self, query_timestamps: dict[str, list[float]], ep_idx: int) -> dict[str, torch.Tensor]:
@@ -1054,6 +1081,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
            ep_buffer[key] = current_ep_idx if key == "episode_index" else []
        return ep_buffer
    # TODO(Steven): consider move this to utils
    def _get_image_file_path(self, episode_index: int, image_key: str, frame_index: int) -> Path:
        fpath = DEFAULT_IMAGE_PATH.format(
            image_key=image_key, episode_index=episode_index, frame_index=frame_index
@@ -1063,13 +1091,15 @@ class LeRobotDataset(torch.utils.data.Dataset):
    def _get_image_file_dir(self, episode_index: int, image_key: str) -> Path:
        return self._get_image_file_path(episode_index, image_key, frame_index=0).parent
-    def _save_image(self, image: torch.Tensor | np.ndarray | PIL.Image.Image, fpath: Path) -> None:
+    def _save_image(
        self, image: torch.Tensor | np.ndarray | PIL.Image.Image, fpath: Path, compress_level: int = 1
    ) -> None:
        if self.image_writer is None:
            if isinstance(image, torch.Tensor):
                image = image.cpu().numpy()
-            write_image(image, fpath)
+            write_image(image, fpath, compress_level=compress_level)
        else:
-            self.image_writer.save_image(image=image, fpath=fpath)
+            self.image_writer.save_image(image=image, fpath=fpath, compress_level=compress_level)
    def add_frame(self, frame: dict) -> None:
        """
@@ -1107,14 +1137,19 @@ class LeRobotDataset(torch.utils.data.Dataset):
                )
                if frame_index == 0:
                    img_path.parent.mkdir(parents=True, exist_ok=True)
-                self._save_image(frame[key], img_path)
+                compress_level = 1 if self.features[key]["dtype"] == "video" else 6
                self._save_image(frame[key], img_path, compress_level)
                self.episode_buffer[key].append(str(img_path))
            else:
                self.episode_buffer[key].append(frame[key])
        self.episode_buffer["size"] += 1
-    def save_episode(self, episode_data: dict | None = None) -> None:
+    def save_episode(
        self,
        episode_data: dict | None = None,
        parallel_encoding: bool = True,
    ) -> None:
        """
        This will save to disk the current episode in self.episode_buffer.
@@ -1126,6 +1161,8 @@ class LeRobotDataset(torch.utils.data.Dataset):
            episode_data (dict | None, optional): Dict containing the episode data to save. If None, this will
                save the current episode in self.episode_buffer, which is filled with 'add_frame'. Defaults to
                None.
            parallel_encoding (bool, optional): If True, encode videos in parallel using ProcessPoolExecutor.
                Defaults to True on Linux, False on macOS as it tends to use all the CPU available already.
        """
        episode_buffer = episode_data if episode_data is not None else self.episode_buffer
@@ -1162,6 +1199,38 @@ class LeRobotDataset(torch.utils.data.Dataset):
        use_batched_encoding = self.batch_encoding_size > 1
        if has_video_keys and not use_batched_encoding:
            num_cameras = len(self.meta.video_keys)
            if parallel_encoding and num_cameras > 1:
                # TODO(Steven): Ideally we would like to control the number of threads per encoding such that:
                # num_cameras * num_threads = (total_cpu -1)
                with concurrent.futures.ProcessPoolExecutor(max_workers=num_cameras) as executor:
                    future_to_key = {
                        executor.submit(
                            _encode_video_worker,
                            video_key,
                            episode_index,
                            self.root,
                            self.fps,
                        ): video_key
                        for video_key in self.meta.video_keys
                    }
                    results = {}
                    for future in concurrent.futures.as_completed(future_to_key):
                        video_key = future_to_key[future]
                        try:
                            temp_path = future.result()
                            results[video_key] = temp_path
                        except Exception as exc:
                            logging.error(f"Video encoding failed for {video_key}: {exc}")
                            raise exc
                for video_key in self.meta.video_keys:
                    temp_path = results[video_key]
                    ep_metadata.update(
                        self._save_episode_video(video_key, episode_index, temp_path=temp_path)
                    )
            else:
                for video_key in self.meta.video_keys:
                    ep_metadata.update(self._save_episode_video(video_key, episode_index))
@@ -1328,9 +1397,18 @@ class LeRobotDataset(torch.utils.data.Dataset):
        return metadata
-    def _save_episode_video(self, video_key: str, episode_index: int) -> dict:
+    def _save_episode_video(
        self,
        video_key: str,
        episode_index: int,
        temp_path: Path | None = None,
    ) -> dict:
        # Encode episode frames into a temporary video
        if temp_path is None:
            ep_path = self._encode_temporary_episode_video(video_key, episode_index)
        else:
            ep_path = temp_path
        ep_size_in_mb = get_file_size_in_mb(ep_path)
        ep_duration_in_s = get_video_duration_in_s(ep_path)
@@ -1448,11 +1526,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
        Note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding,
        since video encoding with ffmpeg is already using multithreading.
        """
-        temp_path = Path(tempfile.mkdtemp(dir=self.root)) / f"{video_key}_{episode_index:03d}.mp4"
+        return _encode_video_worker(video_key, episode_index, self.root, self.fps)
        img_dir = self._get_image_file_dir(episode_index, video_key)
        encode_video_frames(img_dir, temp_path, self.fps, overwrite=True)
        shutil.rmtree(img_dir)
        return temp_path
    @classmethod
    def create(
@@ -1498,6 +1572,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
        obj.image_transforms = None
        obj.delta_timestamps = None
        obj.delta_indices = None
        obj._absolute_to_relative_idx = None
        obj.video_backend = video_backend if video_backend is not None else get_safe_default_codec()
        obj.writer = None
        obj.latest_episode = None
@@ -28,6 +28,7 @@ import numpy as np
 import packaging.version
 import pandas
 import pandas as pd
 import pyarrow.dataset as pa_ds
 import pyarrow.parquet as pq
 import torch
 from datasets import Dataset
@@ -48,7 +49,7 @@ from lerobot.utils.utils import SuppressProgressBars, is_valid_numpy_dtype_strin
 DEFAULT_CHUNK_SIZE = 1000  # Max number of files per chunk
 DEFAULT_DATA_FILE_SIZE_IN_MB = 100  # Max size per file
-DEFAULT_VIDEO_FILE_SIZE_IN_MB = 500  # Max size per file
+DEFAULT_VIDEO_FILE_SIZE_IN_MB = 200  # Max size per file
 INFO_PATH = "meta/info.json"
 STATS_PATH = "meta/stats.json"
@@ -103,7 +104,9 @@ def update_chunk_file_indices(chunk_idx: int, file_idx: int, chunks_size: int) -
    return chunk_idx, file_idx
-def load_nested_dataset(pq_dir: Path, features: datasets.Features | None = None) -> Dataset:
+def load_nested_dataset(
    pq_dir: Path, features: datasets.Features | None = None, episodes: list[int] | None = None
 ) -> Dataset:
    """Find parquet files in provided directory {pq_dir}/chunk-xxx/file-xxx.parquet
    Convert parquet files to pyarrow memory mapped in a cache folder for efficient RAM usage
    Concatenate all pyarrow references to return HF Dataset format
@@ -111,15 +114,26 @@ def load_nested_dataset(pq_dir: Path, features: datasets.Features | None = None)
    Args:
        pq_dir: Directory containing parquet files
        features: Optional features schema to ensure consistent loading of complex types like images
        episodes: Optional list of episode indices to filter. Uses PyArrow predicate pushdown for efficiency.
    """
    paths = sorted(pq_dir.glob("*/*.parquet"))
    if len(paths) == 0:
        raise FileNotFoundError(f"Provided directory does not contain any parquet file: {pq_dir}")
    # TODO(rcadene): set num_proc to accelerate conversion to pyarrow
    with SuppressProgressBars():
-        datasets = Dataset.from_parquet([str(path) for path in paths], features=features)
+        # When no filtering needed, Dataset uses memory-mapped loading for efficiency
-    return datasets
+        # PyArrow loads the entire dataset into memory
        if episodes is None:
            return Dataset.from_parquet([str(path) for path in paths], features=features)
        arrow_dataset = pa_ds.dataset(paths, format="parquet")
        filter_expr = pa_ds.field("episode_index").isin(episodes)
        table = arrow_dataset.to_table(filter=filter_expr)
        if features is not None:
            table = table.cast(features.arrow_schema)
        return Dataset(table)
 def get_parquet_num_frames(parquet_path: str | Path) -> int:
@@ -311,6 +311,7 @@ def encode_video_frames(
    fast_decode: int = 0,
    log_level: int | None = av.logging.ERROR,
    overwrite: bool = False,
    preset: int | None = None,
 ) -> None:
    """More info on ffmpeg arguments tuning on `benchmark/video/README.md`"""
    # Check encoder availability
@@ -359,6 +360,9 @@ def encode_video_frames(
        value = f"fast-decode={fast_decode}" if vcodec == "libsvtav1" else "fastdecode"
        video_options[key] = value
    if vcodec == "libsvtav1":
        video_options["preset"] = str(preset) if preset is not None else "12"
    # Set logging level
    if log_level is not None:
        # "While less efficient, it is generally preferable to modify logging with Python's logging"
@@ -21,7 +21,22 @@ import draccus
 from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.robots import RobotConfig
 from lerobot.teleoperators.config import TeleoperatorConfig
-from lerobot.utils.constants import ACTION, OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE
+from lerobot.utils.constants import (
    ACTION,
    LIBERO_KEY_EEF_MAT,
    LIBERO_KEY_EEF_POS,
    LIBERO_KEY_EEF_QUAT,
    LIBERO_KEY_GRIPPER_QPOS,
    LIBERO_KEY_GRIPPER_QVEL,
    LIBERO_KEY_JOINTS_POS,
    LIBERO_KEY_JOINTS_VEL,
    LIBERO_KEY_PIXELS_AGENTVIEW,
    LIBERO_KEY_PIXELS_EYE_IN_HAND,
    OBS_ENV_STATE,
    OBS_IMAGE,
    OBS_IMAGES,
    OBS_STATE,
 )
@dataclass
@@ -246,28 +261,61 @@ class LiberoEnv(EnvConfig):
    features_map: dict[str, str] = field(
        default_factory=lambda: {
            ACTION: ACTION,
-            "agent_pos": OBS_STATE,
+            LIBERO_KEY_EEF_POS: f"{OBS_STATE}.eef_pos",
-            "pixels/agentview_image": f"{OBS_IMAGES}.image",
+            LIBERO_KEY_EEF_QUAT: f"{OBS_STATE}.eef_quat",
-            "pixels/robot0_eye_in_hand_image": f"{OBS_IMAGES}.image2",
+            LIBERO_KEY_EEF_MAT: f"{OBS_STATE}.eef_mat",
            LIBERO_KEY_GRIPPER_QPOS: f"{OBS_STATE}.gripper_qpos",
            LIBERO_KEY_GRIPPER_QVEL: f"{OBS_STATE}.gripper_qvel",
            LIBERO_KEY_JOINTS_POS: f"{OBS_STATE}.joint_pos",
            LIBERO_KEY_JOINTS_VEL: f"{OBS_STATE}.joint_vel",
            LIBERO_KEY_PIXELS_AGENTVIEW: f"{OBS_IMAGES}.image",
            LIBERO_KEY_PIXELS_EYE_IN_HAND: f"{OBS_IMAGES}.image2",
        }
    )
    def __post_init__(self):
        if self.obs_type == "pixels":
-            self.features["pixels/agentview_image"] = PolicyFeature(
+            self.features[LIBERO_KEY_PIXELS_AGENTVIEW] = PolicyFeature(
                type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
            )
-            self.features["pixels/robot0_eye_in_hand_image"] = PolicyFeature(
+            self.features[LIBERO_KEY_PIXELS_EYE_IN_HAND] = PolicyFeature(
                type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
            )
        elif self.obs_type == "pixels_agent_pos":
-            self.features["agent_pos"] = PolicyFeature(type=FeatureType.STATE, shape=(8,))
+            self.features[LIBERO_KEY_PIXELS_AGENTVIEW] = PolicyFeature(
            self.features["pixels/agentview_image"] = PolicyFeature(
                type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
            )
-            self.features["pixels/robot0_eye_in_hand_image"] = PolicyFeature(
+            self.features[LIBERO_KEY_PIXELS_EYE_IN_HAND] = PolicyFeature(
                type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
            )
            self.features[LIBERO_KEY_EEF_POS] = PolicyFeature(
                type=FeatureType.STATE,
                shape=(3,),
            )
            self.features[LIBERO_KEY_EEF_QUAT] = PolicyFeature(
                type=FeatureType.STATE,
                shape=(4,),
            )
            self.features[LIBERO_KEY_EEF_MAT] = PolicyFeature(
                type=FeatureType.STATE,
                shape=(3, 3),
            )
            self.features[LIBERO_KEY_GRIPPER_QPOS] = PolicyFeature(
                type=FeatureType.STATE,
                shape=(2,),
            )
            self.features[LIBERO_KEY_GRIPPER_QVEL] = PolicyFeature(
                type=FeatureType.STATE,
                shape=(2,),
            )
            self.features[LIBERO_KEY_JOINTS_POS] = PolicyFeature(
                type=FeatureType.STATE,
                shape=(7,),
            )
            self.features[LIBERO_KEY_JOINTS_VEL] = PolicyFeature(
                type=FeatureType.STATE,
                shape=(7,),
            )
        else:
            raise ValueError(f"Unsupported obs_type: {self.obs_type}")
@@ -14,12 +14,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import importlib
 from typing import Any
 import gymnasium as gym
 from gymnasium.envs.registration import registry as gym_registry
 from lerobot.envs.configs import AlohaEnv, EnvConfig, LiberoEnv, PushtEnv
 from lerobot.envs.utils import _call_make_env, _download_hub_file, _import_hub_module, _normalize_hub_result
 from lerobot.processor import ProcessorStep
 from lerobot.processor.env_processor import LiberoProcessorStep
 from lerobot.processor.pipeline import PolicyProcessorPipeline
 def make_env_config(env_type: str, **kwargs) -> EnvConfig:
@@ -33,6 +37,41 @@ def make_env_config(env_type: str, **kwargs) -> EnvConfig:
        raise ValueError(f"Policy type '{env_type}' is not available.")
 def make_env_pre_post_processors(
    env_cfg: EnvConfig,
 ) -> tuple[
    PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
    PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
 ]:
    """
    Create preprocessor and postprocessor pipelines for environment observations.
    This function creates processor pipelines that transform raw environment
    observations and actions. By default, it returns identity processors that do nothing.
    For specific environments like LIBERO, it adds environment-specific processing steps.
    Args:
        env_cfg: The configuration of the environment.
    Returns:
        A tuple containing:
            - preprocessor: Pipeline that processes environment observations
            - postprocessor: Pipeline that processes environment outputs (currently identity)
    """
    # Preprocessor and Postprocessor steps are Identity for most environments
    preprocessor_steps: list[ProcessorStep] = []
    postprocessor_steps: list[ProcessorStep] = []
    # For LIBERO environments, add the LiberoProcessorStep to preprocessor
    if isinstance(env_cfg, LiberoEnv) or "libero" in env_cfg.type:
        preprocessor_steps.append(LiberoProcessorStep())
    preprocessor = PolicyProcessorPipeline(steps=preprocessor_steps)
    postprocessor = PolicyProcessorPipeline(steps=postprocessor_steps)
    return preprocessor, postprocessor
 def make_env(
    cfg: EnvConfig | str,
    n_envs: int = 1,
@@ -28,7 +28,6 @@ import torch
 from gymnasium import spaces
 from libero.libero import benchmark, get_libero_path
 from libero.libero.envs import OffScreenRenderEnv
 from robosuite.utils.transform_utils import quat2axisangle
 def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
@@ -175,11 +174,36 @@ class LiberoEnv(gym.Env):
            self.observation_space = spaces.Dict(
                {
                    "pixels": spaces.Dict(images),
-                    "agent_pos": spaces.Box(
+                    "robot_state": spaces.Dict(
-                        low=AGENT_POS_LOW,
+                        {
-                        high=AGENT_POS_HIGH,
+                            "eef": spaces.Dict(
-                        shape=(OBS_STATE_DIM,),
+                                {
-                        dtype=np.float64,
+                                    "pos": spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float64),
                                    "quat": spaces.Box(
                                        low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64
                                    ),
                                    "mat": spaces.Box(
                                        low=-np.inf, high=np.inf, shape=(3, 3), dtype=np.float64
                                    ),
                                }
                            ),
                            "gripper": spaces.Dict(
                                {
                                    "qpos": spaces.Box(
                                        low=-np.inf, high=np.inf, shape=(2,), dtype=np.float64
                                    ),
                                    "qvel": spaces.Box(
                                        low=-np.inf, high=np.inf, shape=(2,), dtype=np.float64
                                    ),
                                }
                            ),
                            "joints": spaces.Dict(
                                {
                                    "pos": spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float64),
                                    "vel": spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float64),
                                }
                            ),
                        }
                    ),
                }
            )
@@ -191,6 +215,7 @@ class LiberoEnv(gym.Env):
    def render(self):
        raw_obs = self._env.env._get_observations()
        image = self._format_raw_obs(raw_obs)["pixels"]["image"]
        image = image[::-1, ::-1]  # flip both H and W for visualization
        return image
    def _make_envs_task(self, task_suite: Any, task_id: int = 0):
@@ -212,23 +237,48 @@ class LiberoEnv(gym.Env):
        images = {}
        for camera_name in self.camera_name:
            image = raw_obs[camera_name]
            image = image[::-1, ::-1]  # rotate 180 degrees
            images[self.camera_name_mapping[camera_name]] = image
-        state = np.concatenate(
+
-            (
+        eef_pos = raw_obs.get("robot0_eef_pos")
-                raw_obs["robot0_eef_pos"],
+        eef_quat = raw_obs.get("robot0_eef_quat")
-                quat2axisangle(raw_obs["robot0_eef_quat"]),
+
-                raw_obs["robot0_gripper_qpos"],
+        # rotation matrix from controller
-            )
+        eef_mat = self._env.robots[0].controller.ee_ori_mat if eef_pos is not None else None
-        )
+        gripper_qpos = raw_obs.get("robot0_gripper_qpos")
-        agent_pos = state
+        gripper_qvel = raw_obs.get("robot0_gripper_qvel")
        joint_pos = raw_obs.get("robot0_joint_pos")
        joint_vel = raw_obs.get("robot0_joint_vel")
        obs = {
            "pixels": images,
            "robot_state": {
                "eef": {
                    "pos": eef_pos,  # (3,)
                    "quat": eef_quat,  # (4,)
                    "mat": eef_mat,  # (3, 3)
                },
                "gripper": {
                    "qpos": gripper_qpos,  # (2,)
                    "qvel": gripper_qvel,  # (2,)
                },
                "joints": {
                    "pos": joint_pos,  # (7,)
                    "vel": joint_vel,  # (7,)
                },
            },
        }
        if self.obs_type == "pixels":
            return {"pixels": images.copy()}
        if self.obs_type == "pixels_agent_pos":
-            return {
+            # Validate required fields are present
-                "pixels": images.copy(),
+            if eef_pos is None or eef_quat is None or gripper_qpos is None:
-                "agent_pos": agent_pos,
+                raise ValueError(
-            }
+                    f"Missing required robot state fields in raw observation. "
                    f"Got eef_pos={eef_pos is not None}, eef_quat={eef_quat is not None}, "
                    f"gripper_qpos={gripper_qpos is not None}"
                )
            return obs
        raise NotImplementedError(
            f"The observation type '{self.obs_type}' is not supported in LiberoEnv. "
            "Please switch to an image-based obs_type (e.g. 'pixels', 'pixels_agent_pos')."
@@ -355,12 +405,10 @@ def create_libero_envs(
        print(f"Restricting to task_ids={task_ids_filter}")
    out: dict[str, dict[int, Any]] = defaultdict(dict)
    for suite_name in suite_names:
        suite = _get_suite(suite_name)
        total = len(suite.tasks)
        selected = _select_task_ids(total, task_ids_filter)
        if not selected:
            raise ValueError(f"No tasks selected for suite '{suite_name}' (available: {total}).")
@@ -29,10 +29,22 @@ from torch import Tensor
 from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.envs.configs import EnvConfig
-from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE
+from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE, OBS_STR
 from lerobot.utils.utils import get_channel_first_image_shape
 def _convert_nested_dict(d):
    result = {}
    for k, v in d.items():
        if isinstance(v, dict):
            result[k] = _convert_nested_dict(v)
        elif isinstance(v, np.ndarray):
            result[k] = torch.from_numpy(v)
        else:
            result[k] = v
    return result
 def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Tensor]:
    # TODO(aliberts, rcadene): refactor this to use features from the environment (no hardcoding)
    """Convert environment observation to LeRobot format observation.
@@ -78,12 +90,14 @@ def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Ten
        return_observations[OBS_ENV_STATE] = env_state
-    # TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing
+    if "agent_pos" in observations:
        agent_pos = torch.from_numpy(observations["agent_pos"]).float()
        if agent_pos.dim() == 1:
            agent_pos = agent_pos.unsqueeze(0)
        return_observations[OBS_STATE] = agent_pos
    if "robot_state" in observations:
        return_observations[f"{OBS_STR}.robot_state"] = _convert_nested_dict(observations["robot_state"])
    return return_observations
@@ -0,0 +1,154 @@
 #!/usr/bin/env python
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from dataclasses import dataclass
 import torch
 from lerobot.configs.types import PipelineFeatureType, PolicyFeature
 from lerobot.utils.constants import OBS_IMAGES, OBS_STATE
 from .pipeline import ObservationProcessorStep, ProcessorStepRegistry
@dataclass
@ProcessorStepRegistry.register(name="libero_processor")
 class LiberoProcessorStep(ObservationProcessorStep):
    """
    Processes LIBERO observations into the LeRobot format.
    This step handles the specific observation structure from LIBERO environments,
    which includes nested robot_state dictionaries and image observations.
    **State Processing:**
    -   Processes the `robot_state` dictionary which contains nested end-effector,
        gripper, and joint information.
    -   Extracts and concatenates:
        - End-effector position (3D)
        - End-effector quaternion converted to axis-angle (3D)
        - Gripper joint positions (2D)
    -   Maps the concatenated state to `"observation.state"`.
    **Image Processing:**
    -   Rotates images by 180 degrees by flipping both height and width dimensions.
    -   This accounts for the HuggingFaceVLA/libero camera orientation convention.
    """
    def _process_observation(self, observation):
        """
        Processes both image and robot_state observations from LIBERO.
        """
        processed_obs = observation.copy()
        for key in list(processed_obs.keys()):
            if key.startswith(f"{OBS_IMAGES}."):
                img = processed_obs[key]
                # Flip both H and W
                img = torch.flip(img, dims=[2, 3])
                processed_obs[key] = img
        # Process robot_state into a flat state vector
        if "observation.robot_state" in processed_obs:
            robot_state = processed_obs.pop("observation.robot_state")
            # Extract components
            eef_pos = robot_state["eef"]["pos"]  # (B, 3,)
            eef_quat = robot_state["eef"]["quat"]  # (B, 4,)
            gripper_qpos = robot_state["gripper"]["qpos"]  # (B, 2,)
            # Convert quaternion to axis-angle
            eef_axisangle = self._quat2axisangle(eef_quat)  # (B, 3)
            # Concatenate into a single state vector
            state = torch.cat((eef_pos, eef_axisangle, gripper_qpos), dim=-1)
            # ensure float32
            state = state.float()
            if state.dim() == 1:
                state = state.unsqueeze(0)
            processed_obs[OBS_STATE] = state
        return processed_obs
    def transform_features(
        self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
    ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
        """
        Transforms feature keys from the LIBERO format to the LeRobot standard.
        """
        new_features: dict[PipelineFeatureType, dict[str, PolicyFeature]] = {}
        # copy over non-STATE features
        for ft, feats in features.items():
            if ft != PipelineFeatureType.STATE:
                new_features[ft] = feats.copy()
        # rebuild STATE features
        state_feats = {}
        # add our new flattened state
        state_feats["observation.state"] = PolicyFeature(
            key="observation.state",
            shape=(8,),  # [eef_pos(3), axis_angle(3), gripper(2)]
            dtype="float32",
            description=("Concatenated end-effector position (3), axis-angle (3), and gripper qpos (2)."),
        )
        new_features[PipelineFeatureType.STATE] = state_feats
        return new_features
    def observation(self, observation):
        return self._process_observation(observation)
    def _quat2axisangle(self, quat: torch.Tensor) -> torch.Tensor:
        """
        Convert batched quaternions to axis-angle format.
        Only accepts torch tensors of shape (B, 4).
        Args:
            quat (Tensor): (B, 4) tensor of quaternions in (x, y, z, w) format
        Returns:
            Tensor: (B, 3) axis-angle vectors
        Raises:
            TypeError: if input is not a torch tensor
            ValueError: if shape is not (B, 4)
        """
        if not isinstance(quat, torch.Tensor):
            raise TypeError(f"_quat2axisangle expected a torch.Tensor, got {type(quat)}")
        if quat.ndim != 2 or quat.shape[1] != 4:
            raise ValueError(f"_quat2axisangle expected shape (B, 4), got {tuple(quat.shape)}")
        quat = quat.to(dtype=torch.float32)
        device = quat.device
        batch_size = quat.shape[0]
        w = quat[:, 3].clamp(-1.0, 1.0)
        den = torch.sqrt(torch.clamp(1.0 - w * w, min=0.0))
        result = torch.zeros((batch_size, 3), device=device)
        mask = den > 1e-10
        if mask.any():
            angle = 2.0 * torch.acos(w[mask])  # (M,)
            axis = quat[mask, :3] / den[mask].unsqueeze(1)
            result[mask] = axis * angle.unsqueeze(1)
        return result
@@ -78,7 +78,7 @@ from lerobot.transport.utils import (
    transitions_to_bytes,
 )
 from lerobot.utils.random_utils import set_seed
-from lerobot.utils.robot_utils import busy_wait
+from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.transition import (
    Transition,
    move_state_dict_to_device,
@@ -398,7 +398,7 @@ def act_with_policy(
        if cfg.env.fps is not None:
            dt_time = time.perf_counter() - start_time
-            busy_wait(1 / cfg.env.fps - dt_time)
+            precise_sleep(1 / cfg.env.fps - dt_time)
 #  Communication Functions - Group all gRPC/messaging functions
@@ -74,7 +74,7 @@ from lerobot.teleoperators import (
 from lerobot.teleoperators.teleoperator import Teleoperator
 from lerobot.teleoperators.utils import TeleopEvents
 from lerobot.utils.constants import ACTION, DONE, OBS_IMAGES, OBS_STATE, REWARD
-from lerobot.utils.robot_utils import busy_wait
+from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import log_say
 logging.basicConfig(level=logging.INFO)
@@ -114,7 +114,7 @@ def reset_follower_position(robot_arm: Robot, target_position: np.ndarray) -> No
    for pose in trajectory:
        action_dict = dict(zip(current_position_dict, pose, strict=False))
        robot_arm.bus.sync_write("Goal_Position", action_dict)
-        busy_wait(0.015)
+        precise_sleep(0.015)
 class RobotEnv(gym.Env):
@@ -238,7 +238,7 @@ class RobotEnv(gym.Env):
            reset_follower_position(self.robot, np.array(self.reset_pose))
            log_say("Reset the environment done.", play_sounds=True)
-        busy_wait(self.reset_time_s - (time.perf_counter() - start_time))
+        precise_sleep(self.reset_time_s - (time.perf_counter() - start_time))
        super().reset(seed=seed, options=options)
@@ -713,7 +713,7 @@ def control_loop(
            transition = env_processor(transition)
        # Maintain fps timing
-        busy_wait(dt - (time.perf_counter() - step_start_time))
+        precise_sleep(dt - (time.perf_counter() - step_start_time))
    if dataset is not None and cfg.dataset.push_to_hub:
        logging.info("Pushing dataset to hub")
@@ -745,7 +745,7 @@ def replay_trajectory(
        )
        transition = action_processor(transition)
        env.step(transition[TransitionKey.ACTION])
-        busy_wait(1 / cfg.env.fps - (time.perf_counter() - start_time))
+        precise_sleep(1 / cfg.env.fps - (time.perf_counter() - start_time))
@parser.wrap()
@@ -71,7 +71,7 @@ from tqdm import trange
 from lerobot.configs import parser
 from lerobot.configs.eval import EvalPipelineConfig
-from lerobot.envs.factory import make_env
+from lerobot.envs.factory import make_env, make_env_pre_post_processors
 from lerobot.envs.utils import (
    add_envs_task,
    check_env_attributes_and_types,
@@ -94,6 +94,8 @@ from lerobot.utils.utils import (
 def rollout(
    env: gym.vector.VectorEnv,
    policy: PreTrainedPolicy,
    env_preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
    env_postprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
    preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
    postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction],
    seeds: list[int] | None = None,
@@ -165,11 +167,19 @@ def rollout(
        # Infer "task" from attributes of environments.
        # TODO: works with SyncVectorEnv but not AsyncVectorEnv
        observation = add_envs_task(env, observation)
        # Apply environment-specific preprocessing (e.g., LiberoProcessorStep for LIBERO)
        observation = env_preprocessor(observation)
        observation = preprocessor(observation)
        with torch.inference_mode():
            action = policy.select_action(observation)
        action = postprocessor(action)
        action_transition = {"action": action}
        action_transition = env_postprocessor(action_transition)
        action = action_transition["action"]
        # Convert to CPU / numpy.
        action_numpy: np.ndarray = action.to("cpu").numpy()
        assert action_numpy.ndim == 2, "Action dimensions should be (batch, action_dim)"
@@ -239,6 +249,8 @@ def rollout(
 def eval_policy(
    env: gym.vector.VectorEnv,
    policy: PreTrainedPolicy,
    env_preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
    env_postprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
    preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
    postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction],
    n_episodes: int,
@@ -319,6 +331,8 @@ def eval_policy(
        rollout_data = rollout(
            env=env,
            policy=policy,
            env_preprocessor=env_preprocessor,
            env_postprocessor=env_postprocessor,
            preprocessor=preprocessor,
            postprocessor=postprocessor,
            seeds=list(seeds) if seeds else None,
@@ -517,10 +531,16 @@ def eval_main(cfg: EvalPipelineConfig):
        pretrained_path=cfg.policy.pretrained_path,
        preprocessor_overrides=preprocessor_overrides,
    )
    # Create environment-specific preprocessor and postprocessor (e.g., for LIBERO environments)
    env_preprocessor, env_postprocessor = make_env_pre_post_processors(env_cfg=cfg.env)
    with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext():
        info = eval_policy_all(
            envs=envs,
            policy=policy,
            env_preprocessor=env_preprocessor,
            env_postprocessor=env_postprocessor,
            preprocessor=preprocessor,
            postprocessor=postprocessor,
            n_episodes=cfg.eval.n_episodes,
@@ -561,6 +581,8 @@ def eval_one(
    env: gym.vector.VectorEnv,
    *,
    policy: PreTrainedPolicy,
    env_preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
    env_postprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
    preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
    postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction],
    n_episodes: int,
@@ -576,6 +598,8 @@ def eval_one(
    task_result = eval_policy(
        env=env,
        policy=policy,
        env_preprocessor=env_preprocessor,
        env_postprocessor=env_postprocessor,
        preprocessor=preprocessor,
        postprocessor=postprocessor,
        n_episodes=n_episodes,
@@ -600,6 +624,8 @@ def run_one(
    env,
    *,
    policy,
    env_preprocessor,
    env_postprocessor,
    preprocessor,
    postprocessor,
    n_episodes: int,
@@ -622,6 +648,8 @@ def run_one(
    metrics = eval_one(
        env,
        policy=policy,
        env_preprocessor=env_preprocessor,
        env_postprocessor=env_postprocessor,
        preprocessor=preprocessor,
        postprocessor=postprocessor,
        n_episodes=n_episodes,
@@ -639,6 +667,8 @@ def run_one(
 def eval_policy_all(
    envs: dict[str, dict[int, gym.vector.VectorEnv]],
    policy,
    env_preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
    env_postprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
    preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
    postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction],
    n_episodes: int,
@@ -694,6 +724,8 @@ def eval_policy_all(
    task_runner = partial(
        run_one,
        policy=policy,
        env_preprocessor=env_preprocessor,
        env_postprocessor=env_postprocessor,
        preprocessor=preprocessor,
        postprocessor=postprocessor,
        n_episodes=n_episodes,
@@ -50,7 +50,7 @@ from lerobot.teleoperators import (  # noqa: F401
    make_teleoperator_from_config,
    so100_leader,
 )
-from lerobot.utils.robot_utils import busy_wait
+from lerobot.utils.robot_utils import precise_sleep
@dataclass
@@ -114,7 +114,7 @@ def find_joint_and_ee_bounds(cfg: FindJointLimitsConfig):
            print(f"Min joint pos position {np.round(min_pos, 4).tolist()}")
            break
-        busy_wait(0.01)
+        precise_sleep(0.01)
 def main():
@@ -119,7 +119,7 @@ from lerobot.utils.control_utils import (
    sanity_check_dataset_robot_compatibility,
 )
 from lerobot.utils.import_utils import register_third_party_devices
-from lerobot.utils.robot_utils import busy_wait
+from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import (
    get_safe_torch_device,
    init_logging,
@@ -364,7 +364,7 @@ def record_loop(
            log_rerun_data(observation=obs_processed, action=action_values)
        dt_s = time.perf_counter() - start_loop_t
-        busy_wait(1 / fps - dt_s)
+        precise_sleep(1 / fps - dt_s)
        timestamp = time.perf_counter() - start_episode_t
@@ -62,7 +62,7 @@ from lerobot.robots import (  # noqa: F401
 )
 from lerobot.utils.constants import ACTION
 from lerobot.utils.import_utils import register_third_party_devices
-from lerobot.utils.robot_utils import busy_wait
+from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import (
    init_logging,
    log_say,
@@ -121,7 +121,7 @@ def replay(cfg: ReplayConfig):
        _ = robot.send_action(processed_action)
        dt_s = time.perf_counter() - start_episode_t
-        busy_wait(1 / dataset.fps - dt_s)
+        precise_sleep(1 / dataset.fps - dt_s)
    robot.disconnect()
@@ -89,7 +89,7 @@ from lerobot.teleoperators import (  # noqa: F401
    so101_leader,
 )
 from lerobot.utils.import_utils import register_third_party_devices
-from lerobot.utils.robot_utils import busy_wait
+from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import init_logging, move_cursor_up
 from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
@@ -170,12 +170,13 @@ def teleop_loop(
            # Display the final robot action that was sent
            for motor, value in robot_action_to_send.items():
                print(f"{motor:<{display_len}} | {value:>7.2f}")
-            move_cursor_up(len(robot_action_to_send) + 5)
+            move_cursor_up(len(robot_action_to_send) + 3)
        dt_s = time.perf_counter() - loop_start
-        busy_wait(1 / fps - dt_s)
+        precise_sleep(1 / fps - dt_s)
        loop_s = time.perf_counter() - loop_start
-        print(f"\ntime: {loop_s * 1e3:.2f}ms ({1 / loop_s:.0f} Hz)")
+        print(f"Teleop loop time: {loop_s * 1e3:.2f}ms ({1 / loop_s:.0f} Hz)")
        move_cursor_up(1)
        if duration is not None and time.perf_counter() - start >= duration:
            return
@@ -29,7 +29,7 @@ from lerobot.configs.train import TrainPipelineConfig
 from lerobot.datasets.factory import make_dataset
 from lerobot.datasets.sampler import EpisodeAwareSampler
 from lerobot.datasets.utils import cycle
-from lerobot.envs.factory import make_env
+from lerobot.envs.factory import make_env, make_env_pre_post_processors
 from lerobot.envs.utils import close_envs
 from lerobot.optim.factory import make_optimizer_and_scheduler
 from lerobot.policies.factory import make_policy, make_pre_post_processors
@@ -259,6 +259,8 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
        logging.info(colored("Output dir:", "yellow", attrs=["bold"]) + f" {cfg.output_dir}")
        if cfg.env is not None:
            logging.info(f"{cfg.env.task=}")
            logging.info("Creating environment processors")
            env_preprocessor, env_postprocessor = make_env_pre_post_processors(env_cfg=cfg.env)
        logging.info(f"{cfg.steps=} ({format_big_number(cfg.steps)})")
        logging.info(f"{dataset.num_frames=} ({format_big_number(dataset.num_frames)})")
        logging.info(f"{dataset.num_episodes=}")
@@ -274,6 +276,7 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
        sampler = EpisodeAwareSampler(
            dataset.meta.episodes["dataset_from_index"],
            dataset.meta.episodes["dataset_to_index"],
            episode_indices_to_use=dataset.episodes,
            drop_n_last_frames=cfg.policy.drop_n_last_frames,
            shuffle=True,
        )
@@ -384,6 +387,8 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
                    eval_info = eval_policy_all(
                        envs=eval_env,  # dict[suite][task_id] -> vec_env
                        policy=accelerator.unwrap_model(policy),
                        env_preprocessor=env_preprocessor,
                        env_postprocessor=env_postprocessor,
                        preprocessor=preprocessor,
                        postprocessor=postprocessor,
                        n_episodes=cfg.eval.n_episodes,
@@ -70,3 +70,15 @@ LOOKAHEAD_BACKTRACKTABLE = 100
 # openpi
 OPENPI_ATTENTION_MASK_VALUE = -2.3819763e38  # TODO(pepijn): Modify this when extending support to fp8 models
 # Constants for LIBERO observation keys
 LIBERO_KEY_EEF_POS = "robot_state/eef/pos"
 LIBERO_KEY_EEF_QUAT = "robot_state/eef/quat"
 LIBERO_KEY_EEF_MAT = "robot_state/eef/mat"
 LIBERO_KEY_EEF_AXISANGLE = "robot_state/eef/axisangle"
 LIBERO_KEY_GRIPPER_QPOS = "robot_state/gripper/qpos"
 LIBERO_KEY_GRIPPER_QVEL = "robot_state/gripper/qvel"
 LIBERO_KEY_JOINTS_POS = "robot_state/joints/pos"
 LIBERO_KEY_JOINTS_VEL = "robot_state/joints/vel"
 LIBERO_KEY_PIXELS_AGENTVIEW = "pixels/agentview_image"
 LIBERO_KEY_PIXELS_EYE_IN_HAND = "pixels/robot0_eye_in_hand_image"
@@ -16,14 +16,40 @@ import platform
 import time
-def busy_wait(seconds):
+def precise_sleep(seconds: float, spin_threshold: float = 0.010, sleep_margin: float = 0.003):
-    if platform.system() == "Darwin" or platform.system() == "Windows":
+    """
-        # On Mac and Windows, `time.sleep` is not accurate and we need to use this while loop trick,
+    Wait for `seconds` with better precision than time.sleep alone at the expense of more CPU usage.
-        # but it consumes CPU cycles.
+
    Parameters:
      - seconds: duration to wait
      - spin_threshold: if remaining <= spin_threshold -> spin; otherwise sleep (seconds). Default 10ms
      - sleep_margin: when sleeping leave this much time before deadline to avoid oversleep. Default 3ms
    Note:
        The default parameters are chosen to prioritize timing accuracy over CPU usage for the common 30 FPS use case.
    """
    if seconds <= 0:
        return
    system = platform.system()
    # On macOS and Windows the scheduler / sleep granularity can make
    # short sleeps inaccurate. Instead of burning CPU for the whole
    # duration, sleep for most of the time and spin for the final few
    # milliseconds to achieve good accuracy with much lower CPU usage.
    if system in ("Darwin", "Windows"):
        end_time = time.perf_counter() + seconds
-        while time.perf_counter() < end_time:
+        while True:
            remaining = end_time - time.perf_counter()
            if remaining <= 0:
                break
            # If there's more than a couple milliseconds left, sleep most
            # of the remaining time and leave a small margin for the final spin.
            if remaining > spin_threshold:
                # Sleep but avoid sleeping past the end by leaving a small margin.
                time.sleep(max(remaining - sleep_margin, 0))
            else:
                # Final short spin to hit precise timing without long sleeps.
                pass
    else:
-        # On Linux time.sleep is accurate
+        # On Linux time.sleep is accurate enough for most uses
        if seconds > 0:
        time.sleep(seconds)
@@ -0,0 +1,72 @@
 #!/usr/bin/env python
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import numpy as np
 import torch
 from lerobot.envs.utils import preprocess_observation
 from lerobot.processor.env_processor import LiberoProcessorStep
 from lerobot.processor.pipeline import PolicyProcessorPipeline
 seed = 42
 np.random.seed(seed)
 B = 5
 obs1 = {
    "pixels": {
        "image": (np.random.rand(B, 256, 256, 3) * 255).astype(np.uint8),
        "image2": (np.random.rand(B, 256, 256, 3) * 255).astype(np.uint8),
    },
    "robot_state": {
        "eef": {
            "pos": np.random.randn(B, 3),
            "quat": np.random.randn(B, 4),
            "mat": np.random.randn(B, 3, 3),
        },
        "gripper": {
            "qpos": np.random.randn(B, 2),
            "qvel": np.random.randn(B, 2),
        },
        "joints": {
            "pos": np.random.randn(B, 7),
            "vel": np.random.randn(B, 7),
        },
    },
 }
 observation = preprocess_observation(obs1)
 libero_preprocessor = PolicyProcessorPipeline(
    steps=[
        LiberoProcessorStep(),
    ]
 )
 processed_obs = libero_preprocessor(observation)
 assert "observation.state" in processed_obs
 state = processed_obs["observation.state"]
 assert isinstance(state, torch.Tensor)
 assert state.dtype == torch.float32
 assert state.shape[0] == B
 assert state.shape[1] == 8
 assert "observation.images.image" in processed_obs
 assert "observation.images.image2" in processed_obs
 assert isinstance(processed_obs["observation.images.image"], torch.Tensor)
 assert isinstance(processed_obs["observation.images.image2"], torch.Tensor)
 assert processed_obs["observation.images.image"].shape == (B, 3, 256, 256)
 assert processed_obs["observation.images.image2"].shape == (B, 3, 256, 256)
Author	SHA1	Message	Date
Steven Palma	58f70b6bd3	fix(scripts): better prints teleop (#2538 )	2025-11-27 16:54:17 +01:00
Steven Palma	b07160eb1b	feat(utils): precise_sleep() less CPU hungry without sacrificing accuracy (#2526 )	2025-11-26 17:42:16 +01:00
Caroline Pascal	648ea8f485	fix(benchmark) : fixing video benchmark (#2094 ) * fix(time benchmark): removing deprecated TimeBenchmark dependency * fix(typo): renaming frames in an up-to-date fashion * feat(duets): rearanging crf and g parameters in a proper unique combination manner * fix(segfault): fixing segfault by adding a lock in ThreadPoolExecutor * chore(update) : update datasets, codecs and backends to the latest versions * chore(unused files): removing unused files * fix(dataset paths): fix datasets paths to live among lerobot datasets	2025-11-26 17:41:31 +01:00
Caroline Pascal	581dd45eae	feat(parallel encoding): making parallel encoding the default choice over all platforms (#2525 )	2025-11-26 14:57:34 +01:00
Steven Palma	17581a9449	fix(examples): wrap all of them into a main function (#2524 )	2025-11-26 14:28:04 +01:00
Steven Palma	87bee86640	feat(dataset): dynamic compress_level depending on the type of dataset (video or image) (#2517 )	2025-11-25 19:11:12 +01:00
Steven Palma	18b32dced9	feat(dataset): speed-up encoding time (#2514 ) * feat(dataset): speed-up encoding time * feat(dataset): add parallel encoding option * feat(datasets): parallel encoding only if num_cams > 2 * feat(datasets): implement feedback	2025-11-25 16:46:12 +01:00
Jade Choghari	36e8feefe3	docs: Add LeIsaac x LeRobot Envhub tutorial (#2498 ) * add leisaac doc * depreciate il in sim * fix readme * more * fix styling * update title * more changes * more * fix style * more * fix style	2025-11-25 16:23:12 +01:00
Michel Aractingi	0f551df8f4	add `absolute_to_reative_idx` for remapping indicies when a subset of data is loaded (#2490 )	2025-11-20 14:05:31 +01:00
Jade Choghari	6e86a69dcd	feat(envs): add envs pre-post processor (#2474 ) * more changes * working changes * more changes * more fixes * fix style * more * clean * put axis-1 * more fixes * more styling fixes: * iterate on review: * more changes * add env processor * style * more changes * add docs * fix imports * fix test, add to train * Update src/lerobot/envs/factory.py Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co> Signed-off-by: Jade Choghari <chogharijade@gmail.com> * iterate on review --------- Signed-off-by: Jade Choghari <chogharijade@gmail.com> Co-authored-by: jade.choghari@huggingface.co <“chogharijade@gmail.com”> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co>	2025-11-19 18:36:14 +01:00
Eugene Mironov	8a915c6b6f	[RTC] Real Time Chunking for Pi0, Smolvla, Pi0.5 (#1698 ) * Add Real-Time Chunking (RTC) support for flow matching models Implement Real-Time Chunking (RTC) for action chunking policies using flow matching denoising. RTC enables smooth action transitions between consecutive chunks by using prefix guidance during denoising. Key features: - RTCProcessor class with denoise_step method for RTC guidance - Tracker system for debug tracking using time-based dictionary storage - RTCDebugVisualizer with comprehensive visualization utilities - Integration with SmolVLA policy for flow matching models - Support for multiple prefix attention schedules (ZEROS, ONES, LINEAR, EXP) - Configurable execution horizon and max guidance weight - Example scripts for dataset evaluation and real-time control Technical details: - Uses autograd-based gradient computation for RTC corrections - Time-based tracking eliminates duplicate step issues - Proxy methods in RTCProcessor for cleaner API - Full integration with LeRobot's policy and dataset systems Files added/modified: - src/lerobot/configs/types.py: Add RTCAttentionSchedule enum - src/lerobot/policies/rtc/: Core RTC implementation - configuration_rtc.py: RTC configuration - modeling_rtc.py: RTCProcessor with denoise_step - debug_handler.py: Tracker for debug information - debug_visualizer.py: Visualization utilities - src/lerobot/policies/smolvla/modeling_smolvla.py: RTC integration - examples/rtc/: Example scripts and evaluation tools 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Alexander Soare <alexander.soare159@gmail.com> Co-Authored-By: Claude <noreply@anthropic.com> * Fix rtc_config attribute access in SmolVLA Use getattr() to safely check for rtc_config attribute existence instead of direct attribute access. This fixes AttributeError when loading policies without rtc_config in their config. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Alexander Soare <alexander.soare159@gmail.com> Co-Authored-By: Claude <noreply@anthropic.com> * fixup! Fix rtc_config attribute access in SmolVLA * Add RTCConfig field to SmolVLAConfig Add rtc_config as an optional field in SmolVLAConfig to properly support Real-Time Chunking configuration. This replaces the previous getattr() workarounds with direct attribute access, making the code cleaner and more maintainable. Changes: - Import RTCConfig in configuration_smolvla.py - Add rtc_config: RTCConfig \| None = None field - Revert getattr() calls to direct attribute access in modeling_smolvla.py 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Alexander Soare <alexander.soare159@gmail.com> Co-Authored-By: Claude <noreply@anthropic.com> * Refactor RTC enabled checks to use _rtc_enabled helper Add _rtc_enabled() helper method in VLAFlowMatching class to simplify and clean up RTC enabled checks throughout the code. This reduces code duplication and improves readability. Changes: - Add _rtc_enabled() method in VLAFlowMatching - Replace verbose rtc_config checks with _rtc_enabled() calls - Maintain exact same functionality with cleaner code 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Alexander Soare <alexander.soare159@gmail.com> Co-Authored-By: Claude <noreply@anthropic.com> * Rename track_debug method to track Simplify the method name from track_debug to just track for better readability and consistency. The method already has clear documentation about its debug tracking purpose. Changes: - Rename RTCProcessor.track_debug() to track() - Update all call sites in modeling_smolvla.py and modeling_rtc.py 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Alexander Soare <alexander.soare159@gmail.com> Co-Authored-By: Claude <noreply@anthropic.com> * Use output_dir for saving all evaluation images Update eval_dataset.py to save all comparison images to the configured output_dir instead of the current directory. This provides better organization and allows users to specify where outputs should be saved. Changes: - Add os import at top level - Create output_dir at start of run_evaluation() - Save all comparison images to output_dir - Remove duplicate os imports - Update init_rtc_processor() docstring to be more concise 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Alexander Soare <alexander.soare159@gmail.com> Co-Authored-By: Claude <noreply@anthropic.com> * fixup! Use output_dir for saving all evaluation images * Fix logging buffering and enable tracking when RTC config provided - Add force=True to logging.basicConfig to override existing configuration - Enable line buffering for stdout/stderr for real-time log output - Modify init_rtc_processor to create processor when rtc_config exists even if RTC is disabled, allowing tracking of denoising data 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Alexander Soare <alexander.soare159@gmail.com> * Refactor SmolVLA plotting to use tracker data instead of local variables Remove local tracking variables (correction, x1_t, error) from the denoising loop and instead retrieve plotting data from the RTC tracker after each denoise step. This makes the code cleaner and uses the tracker as the single source of truth for debug/visualization data. Changes: - Remove initialization of correction, x1_t, error before denoising loop - After each Euler step, retrieve most recent debug step from tracker - Extract correction, x1_t, err from debug step for plotting - Update tracking condition to use is_debug_enabled() method 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Alexander Soare <alexander.soare159@gmail.com> * Move plotting logic from modeling_smolvla to eval_dataset script Refactor to improve separation of concerns: modeling_smolvla.py changes: - Remove all plotting logic from sample_actions method - Remove viz_xt_axs, viz_vt_axs, viz_x1t_axs parameters - Remove matplotlib and RTCDebugVisualizer imports - Remove viz_fig, viz_axs, denoise_step_counter instance variables - Simplify denoising loop to only track data in rtc_processor eval_dataset.py changes: - Add _plot_denoising_steps_from_tracker helper method - Retrieve debug steps from tracker after inference - Plot x_t, v_t, x1_t, correction, and error from tracker data - Enable debug tracking (cfg.rtc.debug = True) for visualization - Remove viz axes parameters from predict_action_chunk calls modeling_rtc.py changes: - Remove v_t from track() call (handled by user change) Benefits: - Cleaner modeling code focused on inference - Evaluation script owns all visualization logic - Better separation of concerns - Tracker is single source of truth for debug data 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Alexander Soare <alexander.soare159@gmail.com> * Refactor plotting loging * fixup! Refactor plotting loging * Improve visualization: separate correction plot and fix axis scaling Changes: - Create separate figure for correction data instead of overlaying on v_t - Add _rescale_axes helper method to properly scale all axes - Add 10% margin to y-axis for better visualization - Fix v_t chart vertical compression issue Benefits: - Clearer v_t plot without correction overlay - Better axis scaling with proper margins - Separate correction figure for focused analysis - Improved readability of all denoising visualizations Output files: - denoising_xt_comparison.png (x_t trajectories) - denoising_vt_comparison.png (v_t velocity - now cleaner) - denoising_correction_comparison.png (NEW - separate corrections) - denoising_x1t_comparison.png (x1_t state with error) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Alexander Soare <alexander.soare159@gmail.com> * fixup! Improve visualization: separate correction plot and fix axis scaling * fixup! fixup! Improve visualization: separate correction plot and fix axis scaling * fixup! fixup! fixup! Improve visualization: separate correction plot and fix axis scaling * Fix traacking * Right kwargs for the policy * Add tests for tracker * Fix tests * Drop not required methods * Add torch compilation for eval_dataset * delete policies * Add matplotliv to dev * fixup! Add matplotliv to dev * Experiemnt with late detach * Debug * Fix compilation * Add RTC to PI0 * Pi0 * Pi0 eval dataset * fixup! Pi0 eval dataset * Turn off compilation for pi0/pi05 * fixup! Turn off compilation for pi0/pi05 * fixup! fixup! Turn off compilation for pi0/pi05 * fixup! fixup! fixup! Turn off compilation for pi0/pi05 * fixup! fixup! fixup! fixup! Turn off compilation for pi0/pi05 * fixup! fixup! fixup! fixup! fixup! Turn off compilation for pi0/pi05 * Add workable flow * Small fixes * Add more tests * Add validatio at the end * Update README * Silent validation * Fix tests * Add tests for modeling_rtc * Add tests for flow matching models with RTC * fixup! Add tests for flow matching models with RTC * fixup! fixup! Add tests for flow matching models with RTC * Add one more test * fixup! Add one more test * Fix test to use _rtc_enabled() instead of is_rtc_enabled() 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * fixup! Fix test to use _rtc_enabled() instead of is_rtc_enabled() * fixup! fixup! Fix test to use _rtc_enabled() instead of is_rtc_enabled() * Add RTC initialization tests without config for PI0.5 and SmolVLA Add test_pi05_rtc_initialization_without_rtc_config and test_smolvla_rtc_initialization_without_rtc_config to verify that policies can initialize without RTC config and that _rtc_enabled() returns False in this case. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * Fix PI0.5 init_rtc_processor to use getattr instead of direct model access 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * Fix SmolVLA init_rtc_processor to use getattr instead of direct model access 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * Fix PI0.5 RTC tests to use quantile stats (q01, q99) for normalization 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * fixup! Fix PI0.5 RTC tests to use quantile stats (q01, q99) for normalization * Fixup eval with real robot * fixup! Fixup eval with real robot * fixup! fixup! Fixup eval with real robot * Extract simulator logic from eval_with real robot and add proper headers to files * Update images * Fix tests * fixup! Fix tests * add docs for rtc * enhance doc and add images * Fix instal instructions --------- Co-authored-by: Ben Zhang <benzhangniu@gmail.com> Co-authored-by: Alexander Soare <alexander.soare159@gmail.com> Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co>	2025-11-19 11:19:48 +01:00
Michel Aractingi	b464d9f8bc	Fix episode filtering bug when requesting a subset of the episodes in a dataset (#2456 ) * filter episodes in load_nested_dataset * nit * remove test filtering * move import to module level * added missing episode indices to the EpisodeAwareSampler in lerobot_train.py;	2025-11-18 17:26:41 +01:00