refactor import fixes

2026-05-25 21:50:03 +00:00 · 2026-04-11 18:02:59 +02:00
parent d626964119
commit af0d72bd42
69 changed files with 306 additions and 339 deletions
@@ -15,10 +15,9 @@
 """
 Public API for lightweight, base-dependency-only utilities.

-Heavy utility modules (train_utils, control_utils, visualization_utils)
-are intentionally NOT re-exported here to avoid pulling in optional
-dependencies. Import them directly, e.g.:
-``from lerobot.utils.train_utils import save_checkpoint``
+Heavy cross-cutting modules (train_utils, control_utils) have been moved
+to ``lerobot.common``. ``visualization_utils`` remains here but is
+intentionally NOT re-exported to avoid pulling in optional dependencies.
 """

 from .constants import (
@@ -1,242 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import annotations
-
-########################################################################################
-# Utilities
-########################################################################################
-import logging
-import traceback
-from contextlib import nullcontext
-from copy import copy
-from functools import cache
-from typing import TYPE_CHECKING, Any
-
-import numpy as np
-import torch
-
-from lerobot.policies import PreTrainedPolicy, prepare_observation_for_inference
-
-if TYPE_CHECKING:
-    from lerobot.datasets import LeRobotDataset
-from lerobot.processor import PolicyProcessorPipeline
-from lerobot.robots import Robot
-from lerobot.types import PolicyAction
-
-
-@cache
-def is_headless():
-    """
-    Detects if the Python script is running in a headless environment (e.g., without a display).
-
-    This function attempts to import `pynput`, a library that requires a graphical environment.
-    If the import fails, it assumes the environment is headless. The result is cached to avoid
-    re-running the check.
-
-    Returns:
-        True if the environment is determined to be headless, False otherwise.
-    """
-    try:
-        import pynput  # noqa
-
-        return False
-    except Exception:
-        print(
-            "Error trying to import pynput. Switching to headless mode. "
-            "As a result, the video stream from the cameras won't be shown, "
-            "and you won't be able to change the control flow with keyboards. "
-            "For more info, see traceback below.\n"
-        )
-        traceback.print_exc()
-        print()
-        return True
-
-
-def predict_action(
-    observation: dict[str, np.ndarray],
-    policy: PreTrainedPolicy,
-    device: torch.device,
-    preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
-    postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction],
-    use_amp: bool,
-    task: str | None = None,
-    robot_type: str | None = None,
-):
-    """
-    Performs a single-step inference to predict a robot action from an observation.
-
-    This function encapsulates the full inference pipeline:
-    1. Prepares the observation by converting it to PyTorch tensors and adding a batch dimension.
-    2. Runs the preprocessor pipeline on the observation.
-    3. Feeds the processed observation to the policy to get a raw action.
-    4. Runs the postprocessor pipeline on the raw action.
-    5. Formats the final action by removing the batch dimension and moving it to the CPU.
-
-    Args:
-        observation: A dictionary of NumPy arrays representing the robot's current observation.
-        policy: The `PreTrainedPolicy` model to use for action prediction.
-        device: The `torch.device` (e.g., 'cuda' or 'cpu') to run inference on.
-        preprocessor: The `PolicyProcessorPipeline` for preprocessing observations.
-        postprocessor: The `PolicyProcessorPipeline` for postprocessing actions.
-        use_amp: A boolean to enable/disable Automatic Mixed Precision for CUDA inference.
-        task: An optional string identifier for the task.
-        robot_type: An optional string identifier for the robot type.
-
-    Returns:
-        A `torch.Tensor` containing the predicted action, ready for the robot.
-    """
-    observation = copy(observation)
-    with (
-        torch.inference_mode(),
-        torch.autocast(device_type=device.type) if device.type == "cuda" and use_amp else nullcontext(),
-    ):
-        # Convert to pytorch format: channel first and float32 in [0,1] with batch dimension
-        observation = prepare_observation_for_inference(observation, device, task, robot_type)
-        observation = preprocessor(observation)
-
-        # Compute the next action with the policy
-        # based on the current observation
-        action = policy.select_action(observation)
-
-        action = postprocessor(action)
-
-    return action
-
-
-def init_keyboard_listener():
-    """
-    Initializes a non-blocking keyboard listener for real-time user interaction.
-
-    This function sets up a listener for specific keys (right arrow, left arrow, escape) to control
-    the program flow during execution, such as stopping recording or exiting loops. It gracefully
-    handles headless environments where keyboard listening is not possible.
-
-    Returns:
-        A tuple containing:
-        - The `pynput.keyboard.Listener` instance, or `None` if in a headless environment.
-        - A dictionary of event flags (e.g., `exit_early`) that are set by key presses.
-    """
-    # Allow to exit early while recording an episode or resetting the environment,
-    # by tapping the right arrow key '->'. This might require a sudo permission
-    # to allow your terminal to monitor keyboard events.
-    events = {}
-    events["exit_early"] = False
-    events["rerecord_episode"] = False
-    events["stop_recording"] = False
-
-    if is_headless():
-        logging.warning(
-            "Headless environment detected. On-screen cameras display and keyboard inputs will not be available."
-        )
-        listener = None
-        return listener, events
-
-    # Only import pynput if not in a headless environment
-    from pynput import keyboard
-
-    def on_press(key):
-        try:
-            if key == keyboard.Key.right:
-                print("Right arrow key pressed. Exiting loop...")
-                events["exit_early"] = True
-            elif key == keyboard.Key.left:
-                print("Left arrow key pressed. Exiting loop and rerecord the last episode...")
-                events["rerecord_episode"] = True
-                events["exit_early"] = True
-            elif key == keyboard.Key.esc:
-                print("Escape key pressed. Stopping data recording...")
-                events["stop_recording"] = True
-                events["exit_early"] = True
-        except Exception as e:
-            print(f"Error handling key press: {e}")
-
-    listener = keyboard.Listener(on_press=on_press)
-    listener.start()
-
-    return listener, events
-
-
-def sanity_check_dataset_name(repo_id, policy_cfg):
-    """
-    Validates the dataset repository name against the presence of a policy configuration.
-
-    This function enforces a naming convention: a dataset repository ID should start with "eval_"
-    if and only if a policy configuration is provided for evaluation purposes.
-
-    Args:
-        repo_id: The Hugging Face Hub repository ID of the dataset.
-        policy_cfg: The configuration object for the policy, or `None`.
-
-    Raises:
-        ValueError: If the naming convention is violated.
-    """
-    _, dataset_name = repo_id.split("/")
-    # either repo_id doesnt start with "eval_" and there is no policy
-    # or repo_id starts with "eval_" and there is a policy
-
-    # Check if dataset_name starts with "eval_" but policy is missing
-    if dataset_name.startswith("eval_") and policy_cfg is None:
-        raise ValueError(
-            f"Your dataset name begins with 'eval_' ({dataset_name}), but no policy is provided."
-        )
-
-    # Check if dataset_name does not start with "eval_" but policy is provided
-    if not dataset_name.startswith("eval_") and policy_cfg is not None:
-        raise ValueError(
-            f"Your dataset name does not begin with 'eval_' ({dataset_name}), but a policy is provided ({policy_cfg.type})."
-        )
-
-
-def sanity_check_dataset_robot_compatibility(
-    dataset: LeRobotDataset, robot: Robot, fps: int, features: dict
-) -> None:
-    """
-    Checks if a dataset's metadata is compatible with the current robot and recording setup.
-
-    This function compares key metadata fields (`robot_type`, `fps`, and `features`) from the
-    dataset against the current configuration to ensure that appended data will be consistent.
-
-    Args:
-        dataset: The `LeRobotDataset` instance to check.
-        robot: The `Robot` instance representing the current hardware setup.
-        fps: The current recording frequency (frames per second).
-        features: The dictionary of features for the current recording session.
-
-    Raises:
-        ValueError: If any of the checked metadata fields do not match.
-    """
-    from .import_utils import require_package
-
-    require_package("deepdiff", extra="hardware")
-    from deepdiff import DeepDiff
-
-    from .constants import DEFAULT_FEATURES
-
-    fields = [
-        ("robot_type", dataset.meta.robot_type, robot.robot_type),
-        ("fps", dataset.fps, fps),
-        ("features", dataset.features, {**features, **DEFAULT_FEATURES}),
-    ]
-
-    mismatches = []
-    for field, dataset_value, present_value in fields:
-        diff = DeepDiff(dataset_value, present_value, exclude_regex_paths=[r".*\['info'\]$"])
-        if diff:
-            mismatches.append(f"{field}: expected {present_value}, got {dataset_value}")
-
-    if mismatches:
-        raise ValueError(
-            "Dataset metadata compatibility check failed with mismatches:\n" + "\n".join(mismatches)
-        )
@@ -1,174 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from pathlib import Path
-
-from torch.optim import Optimizer
-from torch.optim.lr_scheduler import LRScheduler
-
-from lerobot.configs.train import TrainPipelineConfig
-from lerobot.optim import (
-    load_optimizer_state,
-    load_scheduler_state,
-    save_optimizer_state,
-    save_scheduler_state,
-)
-from lerobot.policies import PreTrainedPolicy
-from lerobot.processor import PolicyProcessorPipeline
-
-from .constants import (
-    CHECKPOINTS_DIR,
-    LAST_CHECKPOINT_LINK,
-    PRETRAINED_MODEL_DIR,
-    TRAINING_STATE_DIR,
-    TRAINING_STEP,
-)
-from .io_utils import load_json, write_json
-from .random_utils import load_rng_state, save_rng_state
-
-
-def get_step_identifier(step: int, total_steps: int) -> str:
-    num_digits = max(6, len(str(total_steps)))
-    return f"{step:0{num_digits}d}"
-
-
-def get_step_checkpoint_dir(output_dir: Path, total_steps: int, step: int) -> Path:
-    """Returns the checkpoint sub-directory corresponding to the step number."""
-    step_identifier = get_step_identifier(step, total_steps)
-    return output_dir / CHECKPOINTS_DIR / step_identifier
-
-
-def save_training_step(step: int, save_dir: Path) -> None:
-    write_json({"step": step}, save_dir / TRAINING_STEP)
-
-
-def load_training_step(save_dir: Path) -> int:
-    training_step = load_json(save_dir / TRAINING_STEP)
-    return training_step["step"]
-
-
-def update_last_checkpoint(checkpoint_dir: Path) -> Path:
-    last_checkpoint_dir = checkpoint_dir.parent / LAST_CHECKPOINT_LINK
-    if last_checkpoint_dir.is_symlink():
-        last_checkpoint_dir.unlink()
-    relative_target = checkpoint_dir.relative_to(checkpoint_dir.parent)
-    last_checkpoint_dir.symlink_to(relative_target)
-
-
-def save_checkpoint(
-    checkpoint_dir: Path,
-    step: int,
-    cfg: TrainPipelineConfig,
-    policy: PreTrainedPolicy,
-    optimizer: Optimizer,
-    scheduler: LRScheduler | None = None,
-    preprocessor: PolicyProcessorPipeline | None = None,
-    postprocessor: PolicyProcessorPipeline | None = None,
-) -> None:
-    """This function creates the following directory structure:
-
-    005000/  #  training step at checkpoint
-    ├── pretrained_model/
-    │   ├── config.json  # policy config
-    │   ├── model.safetensors  # policy weights
-    │   ├── train_config.json  # train config
-    │   ├── processor.json  # processor config (if preprocessor provided)
-    │   └── step_*.safetensors  # processor state files (if any)
-    └── training_state/
-        ├── optimizer_param_groups.json  #  optimizer param groups
-        ├── optimizer_state.safetensors  # optimizer state
-        ├── rng_state.safetensors  # rng states
-        ├── scheduler_state.json  # scheduler state
-        └── training_step.json  # training step
-
-    Args:
-        cfg (TrainPipelineConfig): The training config used for this run.
-        step (int): The training step at that checkpoint.
-        policy (PreTrainedPolicy): The policy to save.
-        optimizer (Optimizer | None, optional): The optimizer to save the state from. Defaults to None.
-        scheduler (LRScheduler | None, optional): The scheduler to save the state from. Defaults to None.
-        preprocessor: The preprocessor/pipeline to save. Defaults to None.
-    """
-    pretrained_dir = checkpoint_dir / PRETRAINED_MODEL_DIR
-    policy.save_pretrained(pretrained_dir)
-    cfg.save_pretrained(pretrained_dir)
-    if cfg.peft is not None:
-        # When using PEFT, policy.save_pretrained will only write the adapter weights + config, not the
-        # policy config which we need for loading the model. In this case we'll write it ourselves.
-        policy.config.save_pretrained(pretrained_dir)
-    if preprocessor is not None:
-        preprocessor.save_pretrained(pretrained_dir)
-    if postprocessor is not None:
-        postprocessor.save_pretrained(pretrained_dir)
-    save_training_state(checkpoint_dir, step, optimizer, scheduler)
-
-
-def save_training_state(
-    checkpoint_dir: Path,
-    train_step: int,
-    optimizer: Optimizer | None = None,
-    scheduler: LRScheduler | None = None,
-) -> None:
-    """
-    Saves the training step, optimizer state, scheduler state, and rng state.
-
-    Args:
-        save_dir (Path): The directory to save artifacts to.
-        train_step (int): Current training step.
-        optimizer (Optimizer | None, optional): The optimizer from which to save the state_dict.
-            Defaults to None.
-        scheduler (LRScheduler | None, optional): The scheduler from which to save the state_dict.
-            Defaults to None.
-    """
-    save_dir = checkpoint_dir / TRAINING_STATE_DIR
-    save_dir.mkdir(parents=True, exist_ok=True)
-    save_training_step(train_step, save_dir)
-    save_rng_state(save_dir)
-    if optimizer is not None:
-        save_optimizer_state(optimizer, save_dir)
-    if scheduler is not None:
-        save_scheduler_state(scheduler, save_dir)
-
-
-def load_training_state(
-    checkpoint_dir: Path, optimizer: Optimizer, scheduler: LRScheduler | None
-) -> tuple[int, Optimizer, LRScheduler | None]:
-    """
-    Loads the training step, optimizer state, scheduler state, and rng state.
-    This is used to resume a training run.
-
-    Args:
-        checkpoint_dir (Path): The checkpoint directory. Should contain a 'training_state' dir.
-        optimizer (Optimizer): The optimizer to load the state_dict to.
-        scheduler (LRScheduler | None): The scheduler to load the state_dict to (can be None).
-
-    Raises:
-        NotADirectoryError: If 'checkpoint_dir' doesn't contain a 'training_state' dir
-
-    Returns:
-        tuple[int, Optimizer, LRScheduler | None]: training step, optimizer and scheduler with their
-            state_dict loaded.
-    """
-    training_state_dir = checkpoint_dir / TRAINING_STATE_DIR
-    if not training_state_dir.is_dir():
-        raise NotADirectoryError(training_state_dir)
-
-    load_rng_state(training_state_dir)
-    step = load_training_step(training_state_dir)
-    optimizer = load_optimizer_state(optimizer, training_state_dir)
-    if scheduler is not None:
-        scheduler = load_scheduler_state(scheduler, training_state_dir)
-
-    return step, optimizer, scheduler