diff --git a/src/lerobot/async_inference/helpers.py b/src/lerobot/async_inference/helpers.py
index 9dd44eb44..e0e96c95f 100644
--- a/src/lerobot/async_inference/helpers.py
+++ b/src/lerobot/async_inference/helpers.py
@@ -23,7 +23,6 @@ from typing import Any
 import torch
 
 from lerobot.configs.types import PolicyFeature
-from lerobot.datasets.feature_utils import build_dataset_frame, hw_to_dataset_features
 
 # NOTE: Configs need to be loaded for the client to be able to instantiate the policy config
 from lerobot.policies import (  # noqa: F401
@@ -36,6 +35,7 @@ from lerobot.policies import (  # noqa: F401
 )
 from lerobot.robots.robot import Robot
 from lerobot.utils.constants import OBS_IMAGES, OBS_STATE, OBS_STR
+from lerobot.utils.feature_utils import build_dataset_frame, hw_to_dataset_features
 from lerobot.utils.utils import init_logging
 
 Action = torch.Tensor
diff --git a/src/lerobot/data_processing/sarm_annotations/subtask_annotation.py b/src/lerobot/data_processing/sarm_annotations/subtask_annotation.py
index 8f3a65e39..6865a6d1e 100644
--- a/src/lerobot/data_processing/sarm_annotations/subtask_annotation.py
+++ b/src/lerobot/data_processing/sarm_annotations/subtask_annotation.py
@@ -746,7 +746,7 @@ def save_annotations_to_dataset(
     dataset_path: Path, annotations: dict[int, SubtaskAnnotation], fps: int, prefix: str = "sparse"
 ):
     """Save annotations to LeRobot dataset parquet format."""
-    from lerobot.datasets.io_utils import load_episodes
+    from lerobot.datasets import load_episodes
     from lerobot.datasets.utils import DEFAULT_EPISODES_PATH
 
     episodes_dataset = load_episodes(dataset_path)
@@ -841,7 +841,7 @@ def generate_auto_sparse_annotations(
 
 def load_annotations_from_dataset(dataset_path: Path, prefix: str = "sparse") -> dict[int, SubtaskAnnotation]:
     """Load annotations from LeRobot dataset parquet files."""
-    from lerobot.datasets.io_utils import load_episodes
+    from lerobot.datasets import load_episodes
 
     episodes_dataset = load_episodes(dataset_path)
     if not episodes_dataset or len(episodes_dataset) == 0:
diff --git a/src/lerobot/datasets/__init__.py b/src/lerobot/datasets/__init__.py
index d1522eda2..ec7b9f45f 100644
--- a/src/lerobot/datasets/__init__.py
+++ b/src/lerobot/datasets/__init__.py
@@ -20,10 +20,15 @@ from lerobot.utils.import_utils import require_package
 require_package("datasets", extra="dataset")
 
 from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
+from lerobot.datasets.factory import make_dataset
+from lerobot.datasets.image_writer import safe_stop_image_writer
+from lerobot.datasets.io_utils import load_episodes, write_stats
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.datasets.multi_dataset import MultiLeRobotDataset
+from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
 from lerobot.datasets.sampler import EpisodeAwareSampler
 from lerobot.datasets.streaming_dataset import StreamingLeRobotDataset
+from lerobot.datasets.video_utils import VideoEncodingManager
 
 __all__ = [
     "EpisodeAwareSampler",
@@ -31,4 +36,11 @@ __all__ = [
     "LeRobotDatasetMetadata",
     "MultiLeRobotDataset",
     "StreamingLeRobotDataset",
+    "VideoEncodingManager",
+    "aggregate_pipeline_dataset_features",
+    "create_initial_features",
+    "load_episodes",
+    "make_dataset",
+    "safe_stop_image_writer",
+    "write_stats",
 ]
diff --git a/src/lerobot/datasets/dataset_metadata.py b/src/lerobot/datasets/dataset_metadata.py
index d79f4bfba..cf150c6dd 100644
--- a/src/lerobot/datasets/dataset_metadata.py
+++ b/src/lerobot/datasets/dataset_metadata.py
@@ -24,7 +24,7 @@ import pyarrow.parquet as pq
 from huggingface_hub import snapshot_download
 
 from lerobot.datasets.compute_stats import aggregate_stats
-from lerobot.datasets.feature_utils import _validate_feature_names, create_empty_dataset_info
+from lerobot.datasets.feature_utils import create_empty_dataset_info
 from lerobot.datasets.io_utils import (
     get_file_size_in_mb,
     load_episodes,
@@ -39,7 +39,6 @@ from lerobot.datasets.io_utils import (
 )
 from lerobot.datasets.utils import (
     DEFAULT_EPISODES_PATH,
-    DEFAULT_FEATURES,
     INFO_PATH,
     check_version_compatibility,
     flatten_dict,
@@ -49,7 +48,8 @@ from lerobot.datasets.utils import (
     update_chunk_file_indices,
 )
 from lerobot.datasets.video_utils import get_video_info
-from lerobot.utils.constants import HF_LEROBOT_HOME, HF_LEROBOT_HUB_CACHE
+from lerobot.utils.constants import DEFAULT_FEATURES, HF_LEROBOT_HOME, HF_LEROBOT_HUB_CACHE
+from lerobot.utils.feature_utils import _validate_feature_names
 
 CODEBASE_VERSION = "v3.0"
 
diff --git a/src/lerobot/datasets/factory.py b/src/lerobot/datasets/factory.py
index d2b3090c2..1baac5e1e 100644
--- a/src/lerobot/datasets/factory.py
+++ b/src/lerobot/datasets/factory.py
@@ -25,12 +25,7 @@ from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.datasets.multi_dataset import MultiLeRobotDataset
 from lerobot.datasets.streaming_dataset import StreamingLeRobotDataset
 from lerobot.transforms import ImageTransforms
-from lerobot.utils.constants import ACTION, OBS_PREFIX, REWARD
-
-IMAGENET_STATS = {
-    "mean": [[[0.485]], [[0.456]], [[0.406]]],  # (c,1,1)
-    "std": [[[0.229]], [[0.224]], [[0.225]]],  # (c,1,1)
-}
+from lerobot.utils.constants import ACTION, IMAGENET_STATS, OBS_PREFIX, REWARD
 
 
 def resolve_delta_timestamps(
diff --git a/src/lerobot/datasets/feature_utils.py b/src/lerobot/datasets/feature_utils.py
index 46154d92a..137c9f58a 100644
--- a/src/lerobot/datasets/feature_utils.py
+++ b/src/lerobot/datasets/feature_utils.py
@@ -14,22 +14,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from pprint import pformat
-from typing import Any
 
 import datasets
 import numpy as np
 from PIL import Image as PILImage
 
-from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.datasets.utils import (
     DEFAULT_CHUNK_SIZE,
     DEFAULT_DATA_FILE_SIZE_IN_MB,
     DEFAULT_DATA_PATH,
-    DEFAULT_FEATURES,
     DEFAULT_VIDEO_FILE_SIZE_IN_MB,
     DEFAULT_VIDEO_PATH,
 )
-from lerobot.utils.constants import ACTION, OBS_ENV_STATE, OBS_STR
+from lerobot.utils.constants import DEFAULT_FEATURES
 from lerobot.utils.utils import is_valid_numpy_dtype_string
 
 
@@ -71,199 +68,6 @@ def get_hf_features_from_features(features: dict) -> datasets.Features:
     return datasets.Features(hf_features)
 
 
-def _validate_feature_names(features: dict[str, dict]) -> None:
-    """Validate that feature names do not contain invalid characters.
-
-    Args:
-        features (dict): The LeRobot features dictionary.
-
-    Raises:
-        ValueError: If any feature name contains '/'.
-    """
-    invalid_features = {name: ft for name, ft in features.items() if "/" in name}
-    if invalid_features:
-        raise ValueError(f"Feature names should not contain '/'. Found '/' in '{invalid_features}'.")
-
-
-def hw_to_dataset_features(
-    hw_features: dict[str, type | tuple], prefix: str, use_video: bool = True
-) -> dict[str, dict]:
-    """Convert hardware-specific features to a LeRobot dataset feature dictionary.
-
-    This function takes a dictionary describing hardware outputs (like joint states
-    or camera image shapes) and formats it into the standard LeRobot feature
-    specification.
-
-    Args:
-        hw_features (dict): Dictionary mapping feature names to their type (float for
-            joints) or shape (tuple for images).
-        prefix (str): The prefix to add to the feature keys (e.g., "observation"
-            or "action").
-        use_video (bool): If True, image features are marked as "video", otherwise "image".
-
-    Returns:
-        dict: A LeRobot features dictionary.
-    """
-    features = {}
-    joint_fts = {
-        key: ftype
-        for key, ftype in hw_features.items()
-        if ftype is float or (isinstance(ftype, PolicyFeature) and ftype.type != FeatureType.VISUAL)
-    }
-    cam_fts = {key: shape for key, shape in hw_features.items() if isinstance(shape, tuple)}
-
-    if joint_fts and prefix == ACTION:
-        features[prefix] = {
-            "dtype": "float32",
-            "shape": (len(joint_fts),),
-            "names": list(joint_fts),
-        }
-
-    if joint_fts and prefix == OBS_STR:
-        features[f"{prefix}.state"] = {
-            "dtype": "float32",
-            "shape": (len(joint_fts),),
-            "names": list(joint_fts),
-        }
-
-    for key, shape in cam_fts.items():
-        features[f"{prefix}.images.{key}"] = {
-            "dtype": "video" if use_video else "image",
-            "shape": shape,
-            "names": ["height", "width", "channels"],
-        }
-
-    _validate_feature_names(features)
-    return features
-
-
-def build_dataset_frame(
-    ds_features: dict[str, dict], values: dict[str, Any], prefix: str
-) -> dict[str, np.ndarray]:
-    """Construct a single data frame from raw values based on dataset features.
-
-    A "frame" is a dictionary containing all the data for a single timestep,
-    formatted as numpy arrays according to the feature specification.
-
-    Args:
-        ds_features (dict): The LeRobot dataset features dictionary.
-        values (dict): A dictionary of raw values from the hardware/environment.
-        prefix (str): The prefix to filter features by (e.g., "observation"
-            or "action").
-
-    Returns:
-        dict: A dictionary representing a single frame of data.
-    """
-    frame = {}
-    for key, ft in ds_features.items():
-        if key in DEFAULT_FEATURES or not key.startswith(prefix):
-            continue
-        elif ft["dtype"] == "float32" and len(ft["shape"]) == 1:
-            frame[key] = np.array([values[name] for name in ft["names"]], dtype=np.float32)
-        elif ft["dtype"] in ["image", "video"]:
-            frame[key] = values[key.removeprefix(f"{prefix}.images.")]
-
-    return frame
-
-
-def dataset_to_policy_features(features: dict[str, dict]) -> dict[str, PolicyFeature]:
-    """Convert dataset features to policy features.
-
-    This function transforms the dataset's feature specification into a format
-    that a policy can use, classifying features by type (e.g., visual, state,
-    action) and ensuring correct shapes (e.g., channel-first for images).
-
-    Args:
-        features (dict): The LeRobot dataset features dictionary.
-
-    Returns:
-        dict: A dictionary mapping feature keys to `PolicyFeature` objects.
-
-    Raises:
-        ValueError: If an image feature does not have a 3D shape.
-    """
-    # TODO(aliberts): Implement "type" in dataset features and simplify this
-    policy_features = {}
-    for key, ft in features.items():
-        shape = ft["shape"]
-        if ft["dtype"] in ["image", "video"]:
-            type = FeatureType.VISUAL
-            if len(shape) != 3:
-                raise ValueError(f"Number of dimensions of {key} != 3 (shape={shape})")
-
-            names = ft["names"]
-            # Backward compatibility for "channel" which is an error introduced in LeRobotDataset v2.0 for ported datasets.
-            if names[2] in ["channel", "channels"]:  # (h, w, c) -> (c, h, w)
-                shape = (shape[2], shape[0], shape[1])
-        elif key == OBS_ENV_STATE:
-            type = FeatureType.ENV
-        elif key.startswith(OBS_STR):
-            type = FeatureType.STATE
-        elif key.startswith(ACTION):
-            type = FeatureType.ACTION
-        else:
-            continue
-
-        policy_features[key] = PolicyFeature(
-            type=type,
-            shape=shape,
-        )
-
-    return policy_features
-
-
-def combine_feature_dicts(*dicts: dict) -> dict:
-    """Merge LeRobot grouped feature dicts.
-
-    - For 1D numeric specs (dtype not image/video/string) with "names": we merge the names and recompute the shape.
-    - For others (e.g. `observation.images.*`), the last one wins (if they are identical).
-
-    Args:
-        *dicts: A variable number of LeRobot feature dictionaries to merge.
-
-    Returns:
-        dict: A single merged feature dictionary.
-
-    Raises:
-        ValueError: If there's a dtype mismatch for a feature being merged.
-    """
-    out: dict = {}
-    for d in dicts:
-        for key, value in d.items():
-            if not isinstance(value, dict):
-                out[key] = value
-                continue
-
-            dtype = value.get("dtype")
-            shape = value.get("shape")
-            is_vector = (
-                dtype not in ("image", "video", "string")
-                and isinstance(shape, tuple)
-                and len(shape) == 1
-                and "names" in value
-            )
-
-            if is_vector:
-                # Initialize or retrieve the accumulating dict for this feature key
-                target = out.setdefault(key, {"dtype": dtype, "names": [], "shape": (0,)})
-                # Ensure consistent data types across merged entries
-                if "dtype" in target and dtype != target["dtype"]:
-                    raise ValueError(f"dtype mismatch for '{key}': {target['dtype']} vs {dtype}")
-
-                # Merge feature names: append only new ones to preserve order without duplicates
-                seen = set(target["names"])
-                for n in value["names"]:
-                    if n not in seen:
-                        target["names"].append(n)
-                        seen.add(n)
-                # Recompute the shape to reflect the updated number of features
-                target["shape"] = (len(target["names"]),)
-            else:
-                # For images/videos and non-1D entries: override with the latest definition
-                out[key] = value
-    return out
-
-
 def create_empty_dataset_info(
     codebase_version: str,
     fps: int,
diff --git a/src/lerobot/datasets/pipeline_features.py b/src/lerobot/datasets/pipeline_features.py
index 96779fdc6..76e2a6511 100644
--- a/src/lerobot/datasets/pipeline_features.py
+++ b/src/lerobot/datasets/pipeline_features.py
@@ -17,10 +17,10 @@ from collections.abc import Sequence
 from typing import Any
 
 from lerobot.configs.types import PipelineFeatureType
-from lerobot.datasets.feature_utils import hw_to_dataset_features
 from lerobot.processor import DataProcessorPipeline
 from lerobot.types import RobotAction, RobotObservation
 from lerobot.utils.constants import ACTION, OBS_IMAGES, OBS_STATE, OBS_STR
+from lerobot.utils.feature_utils import hw_to_dataset_features
 
 
 def create_initial_features(
diff --git a/src/lerobot/datasets/utils.py b/src/lerobot/datasets/utils.py
index 076b021f3..c6815e0f5 100644
--- a/src/lerobot/datasets/utils.py
+++ b/src/lerobot/datasets/utils.py
@@ -93,14 +93,6 @@ LEGACY_EPISODES_PATH = "meta/episodes.jsonl"
 LEGACY_EPISODES_STATS_PATH = "meta/episodes_stats.jsonl"
 LEGACY_TASKS_PATH = "meta/tasks.jsonl"
 
-DEFAULT_FEATURES = {
-    "timestamp": {"dtype": "float32", "shape": (1,), "names": None},
-    "frame_index": {"dtype": "int64", "shape": (1,), "names": None},
-    "episode_index": {"dtype": "int64", "shape": (1,), "names": None},
-    "index": {"dtype": "int64", "shape": (1,), "names": None},
-    "task_index": {"dtype": "int64", "shape": (1,), "names": None},
-}
-
 
 def has_legacy_hub_download_metadata(root: Path) -> bool:
     """Return ``True`` when *root* looks like a legacy Hub ``local_dir`` mirror.
diff --git a/src/lerobot/policies/diffusion/modeling_diffusion.py b/src/lerobot/policies/diffusion/modeling_diffusion.py
index b3b3da7df..0334f0e70 100644
--- a/src/lerobot/policies/diffusion/modeling_diffusion.py
+++ b/src/lerobot/policies/diffusion/modeling_diffusion.py
@@ -29,24 +29,17 @@ import numpy as np
 import torch
 import torch.nn.functional as F  # noqa: N812
 import torchvision
+from torch import Tensor, nn
 
-from lerobot.utils.import_utils import require_package
-
-require_package("diffusers", extra="training")
-
-from diffusers.schedulers.scheduling_ddim import DDIMScheduler  # noqa: E402
-from diffusers.schedulers.scheduling_ddpm import DDPMScheduler  # noqa: E402
-from torch import Tensor, nn  # noqa: E402
-
-from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig  # noqa: E402
-from lerobot.policies.pretrained import PreTrainedPolicy  # noqa: E402
-from lerobot.policies.utils import (  # noqa: E402
+from lerobot.policies.diffusion.configuration_diffusion import DiffusionConfig
+from lerobot.policies.pretrained import PreTrainedPolicy
+from lerobot.policies.utils import (
     get_device_from_parameters,
     get_dtype_from_parameters,
     get_output_shape,
     populate_queues,
 )
-from lerobot.utils.constants import ACTION, OBS_ENV_STATE, OBS_IMAGES, OBS_STATE  # noqa: E402
+from lerobot.utils.constants import ACTION, OBS_ENV_STATE, OBS_IMAGES, OBS_STATE
 
 
 class DiffusionPolicy(PreTrainedPolicy):
@@ -156,11 +149,17 @@ class DiffusionPolicy(PreTrainedPolicy):
         return loss, None
 
 
-def _make_noise_scheduler(name: str, **kwargs: dict) -> DDPMScheduler | DDIMScheduler:
+def _make_noise_scheduler(name: str, **kwargs: dict):
     """
     Factory for noise scheduler instances of the requested type. All kwargs are passed
     to the scheduler.
     """
+    from lerobot.utils.import_utils import require_package
+
+    require_package("diffusers", extra="training")
+    from diffusers.schedulers.scheduling_ddim import DDIMScheduler
+    from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
+
     if name == "DDPM":
         return DDPMScheduler(**kwargs)
     elif name == "DDIM":
diff --git a/src/lerobot/policies/factory.py b/src/lerobot/policies/factory.py
index d3864a82d..051a70bcc 100644
--- a/src/lerobot/policies/factory.py
+++ b/src/lerobot/policies/factory.py
@@ -495,7 +495,7 @@ def make_policy(
 
     kwargs = {}
     if ds_meta is not None:
-        from lerobot.datasets.feature_utils import dataset_to_policy_features
+        from lerobot.utils.feature_utils import dataset_to_policy_features
 
         features = dataset_to_policy_features(ds_meta.features)
     else:
diff --git a/src/lerobot/policies/multi_task_dit/modeling_multi_task_dit.py b/src/lerobot/policies/multi_task_dit/modeling_multi_task_dit.py
index 72929198f..45d017b68 100644
--- a/src/lerobot/policies/multi_task_dit/modeling_multi_task_dit.py
+++ b/src/lerobot/policies/multi_task_dit/modeling_multi_task_dit.py
@@ -34,17 +34,10 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F  # noqa: N812
 import torchvision
+from torch import Tensor
 
-from lerobot.utils.import_utils import require_package
-
-require_package("diffusers", extra="training")
-
-from diffusers.schedulers.scheduling_ddim import DDIMScheduler  # noqa: E402
-from diffusers.schedulers.scheduling_ddpm import DDPMScheduler  # noqa: E402
-from torch import Tensor  # noqa: E402
-
-from lerobot.policies.multi_task_dit.configuration_multi_task_dit import MultiTaskDiTConfig  # noqa: E402
-from lerobot.utils.import_utils import _transformers_available  # noqa: E402
+from lerobot.policies.multi_task_dit.configuration_multi_task_dit import MultiTaskDiTConfig
+from lerobot.utils.import_utils import _transformers_available
 
 # Conditional import for type checking and lazy loading
 if TYPE_CHECKING or _transformers_available:
@@ -52,9 +45,9 @@ if TYPE_CHECKING or _transformers_available:
 else:
     CLIPTextModel = None
     CLIPVisionModel = None
-from lerobot.policies.pretrained import PreTrainedPolicy  # noqa: E402
-from lerobot.policies.utils import populate_queues  # noqa: E402
-from lerobot.utils.constants import (  # noqa: E402
+from lerobot.policies.pretrained import PreTrainedPolicy
+from lerobot.policies.utils import populate_queues
+from lerobot.utils.constants import (
     ACTION,
     OBS_IMAGES,
     OBS_LANGUAGE_ATTENTION_MASK,
@@ -648,6 +641,12 @@ class DiffusionObjective(nn.Module):
             "prediction_type": config.prediction_type,
         }
 
+        from lerobot.utils.import_utils import require_package
+
+        require_package("diffusers", extra="training")
+        from diffusers.schedulers.scheduling_ddim import DDIMScheduler
+        from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
+
         if config.noise_scheduler_type == "DDPM":
             self.noise_scheduler: DDPMScheduler | DDIMScheduler = DDPMScheduler(**scheduler_kwargs)
         elif config.noise_scheduler_type == "DDIM":
diff --git a/src/lerobot/policies/utils.py b/src/lerobot/policies/utils.py
index 635286c48..8c57bcc07 100644
--- a/src/lerobot/policies/utils.py
+++ b/src/lerobot/policies/utils.py
@@ -162,7 +162,7 @@ def build_inference_frame(
     Returns:
         A dictionary of preprocessed tensors ready for model inference.
     """
-    from lerobot.datasets.feature_utils import build_dataset_frame
+    from lerobot.utils.feature_utils import build_dataset_frame
 
     # Extracts the correct keys from the incoming raw observation
     observation = build_dataset_frame(ds_features, observation, prefix=OBS_STR)
diff --git a/src/lerobot/policies/xvla/processor_xvla.py b/src/lerobot/policies/xvla/processor_xvla.py
index 0fa9ffe3f..6fb68587a 100644
--- a/src/lerobot/policies/xvla/processor_xvla.py
+++ b/src/lerobot/policies/xvla/processor_xvla.py
@@ -21,7 +21,6 @@ import numpy as np
 import torch
 
 from lerobot.configs.types import PipelineFeatureType, PolicyFeature
-from lerobot.datasets.factory import IMAGENET_STATS
 from lerobot.policies.xvla.configuration_xvla import XVLAConfig
 from lerobot.policies.xvla.utils import rotate6d_to_axis_angle
 from lerobot.processor import (
@@ -40,6 +39,7 @@ from lerobot.processor import (
 from lerobot.processor.converters import policy_action_to_transition, transition_to_policy_action
 from lerobot.types import EnvTransition, TransitionKey
 from lerobot.utils.constants import (
+    IMAGENET_STATS,
     OBS_IMAGES,
     OBS_PREFIX,
     OBS_STATE,
diff --git a/src/lerobot/rl/learner.py b/src/lerobot/rl/learner.py
index 2853fbcb3..7034b11d8 100644
--- a/src/lerobot/rl/learner.py
+++ b/src/lerobot/rl/learner.py
@@ -62,8 +62,7 @@ from torch.optim.optimizer import Optimizer
 from lerobot.cameras import opencv  # noqa: F401
 from lerobot.configs import parser
 from lerobot.configs.train import TrainRLServerPipelineConfig
-from lerobot.datasets.factory import make_dataset
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.datasets import LeRobotDataset, make_dataset
 from lerobot.policies.factory import make_policy
 from lerobot.policies.sac.modeling_sac import SACPolicy
 from lerobot.rl.buffer import ReplayBuffer, concatenate_batch_transitions
diff --git a/src/lerobot/scripts/augment_dataset_quantile_stats.py b/src/lerobot/scripts/augment_dataset_quantile_stats.py
index 8ec0dcc13..7ec066b0a 100644
--- a/src/lerobot/scripts/augment_dataset_quantile_stats.py
+++ b/src/lerobot/scripts/augment_dataset_quantile_stats.py
@@ -44,10 +44,9 @@ from huggingface_hub import HfApi
 from requests import HTTPError
 from tqdm import tqdm
 
-from lerobot.datasets import LeRobotDataset
+from lerobot.datasets import LeRobotDataset, write_stats
 from lerobot.datasets.compute_stats import DEFAULT_QUANTILES, aggregate_stats, get_feature_stats
 from lerobot.datasets.dataset_metadata import CODEBASE_VERSION
-from lerobot.datasets.io_utils import write_stats
 from lerobot.utils.utils import init_logging
 
 
diff --git a/src/lerobot/scripts/lerobot_record.py b/src/lerobot/scripts/lerobot_record.py
index badec6051..063489238 100644
--- a/src/lerobot/scripts/lerobot_record.py
+++ b/src/lerobot/scripts/lerobot_record.py
@@ -85,11 +85,13 @@ from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraCon
 from lerobot.cameras.zmq.configuration_zmq import ZMQCameraConfig  # noqa: F401
 from lerobot.configs import parser
 from lerobot.configs.policies import PreTrainedConfig
-from lerobot.datasets import LeRobotDataset
-from lerobot.datasets.feature_utils import build_dataset_frame, combine_feature_dicts
-from lerobot.datasets.image_writer import safe_stop_image_writer
-from lerobot.datasets.pipeline_features import aggregate_pipeline_dataset_features, create_initial_features
-from lerobot.datasets.video_utils import VideoEncodingManager
+from lerobot.datasets import (
+    LeRobotDataset,
+    VideoEncodingManager,
+    aggregate_pipeline_dataset_features,
+    create_initial_features,
+    safe_stop_image_writer,
+)
 from lerobot.policies.factory import make_policy, make_pre_post_processors
 from lerobot.policies.pretrained import PreTrainedPolicy
 from lerobot.policies.rtc import ActionInterpolator
@@ -143,6 +145,7 @@ from lerobot.utils.control_utils import (
     sanity_check_dataset_robot_compatibility,
 )
 from lerobot.utils.device_utils import get_safe_torch_device
+from lerobot.utils.feature_utils import build_dataset_frame, combine_feature_dicts
 from lerobot.utils.import_utils import register_third_party_plugins
 from lerobot.utils.robot_utils import precise_sleep
 from lerobot.utils.utils import (
diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py
index 33435dfcc..9572c4e1b 100644
--- a/src/lerobot/scripts/lerobot_train.py
+++ b/src/lerobot/scripts/lerobot_train.py
@@ -13,45 +13,44 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
+
 import dataclasses
 import logging
 import time
 from contextlib import nullcontext
 from pprint import pformat
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
-from lerobot.utils.import_utils import require_package
+if TYPE_CHECKING:
+    from accelerate import Accelerator
 
-require_package("accelerate", extra="training")
+import torch
+from termcolor import colored
+from torch.optim import Optimizer
+from tqdm import tqdm
 
-import torch  # noqa: E402
-from accelerate import Accelerator  # noqa: E402
-from termcolor import colored  # noqa: E402
-from torch.optim import Optimizer  # noqa: E402
-from tqdm import tqdm  # noqa: E402
-
-from lerobot.configs import parser  # noqa: E402
-from lerobot.configs.train import TrainPipelineConfig  # noqa: E402
-from lerobot.datasets import EpisodeAwareSampler  # noqa: E402
-from lerobot.datasets.factory import make_dataset  # noqa: E402
-from lerobot.envs.factory import make_env, make_env_pre_post_processors  # noqa: E402
-from lerobot.envs.utils import close_envs  # noqa: E402
-from lerobot.optim.factory import make_optimizer_and_scheduler  # noqa: E402
-from lerobot.policies.factory import make_policy, make_pre_post_processors  # noqa: E402
-from lerobot.policies.pretrained import PreTrainedPolicy  # noqa: E402
-from lerobot.rl.wandb_utils import WandBLogger  # noqa: E402
-from lerobot.scripts.lerobot_eval import eval_policy_all  # noqa: E402
-from lerobot.utils.import_utils import register_third_party_plugins  # noqa: E402
-from lerobot.utils.logging_utils import AverageMeter, MetricsTracker  # noqa: E402
-from lerobot.utils.random_utils import set_seed  # noqa: E402
-from lerobot.utils.train_utils import (  # noqa: E402
+from lerobot.configs import parser
+from lerobot.configs.train import TrainPipelineConfig
+from lerobot.datasets import EpisodeAwareSampler, make_dataset
+from lerobot.envs.factory import make_env, make_env_pre_post_processors
+from lerobot.envs.utils import close_envs
+from lerobot.optim.factory import make_optimizer_and_scheduler
+from lerobot.policies.factory import make_policy, make_pre_post_processors
+from lerobot.policies.pretrained import PreTrainedPolicy
+from lerobot.rl.wandb_utils import WandBLogger
+from lerobot.scripts.lerobot_eval import eval_policy_all
+from lerobot.utils.import_utils import register_third_party_plugins
+from lerobot.utils.logging_utils import AverageMeter, MetricsTracker
+from lerobot.utils.random_utils import set_seed
+from lerobot.utils.train_utils import (
     get_step_checkpoint_dir,
     get_step_identifier,
     load_training_state,
     save_checkpoint,
     update_last_checkpoint,
 )
-from lerobot.utils.utils import (  # noqa: E402
+from lerobot.utils.utils import (
     cycle,
     format_big_number,
     has_method,
@@ -171,6 +170,11 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
         cfg: A `TrainPipelineConfig` object containing all training configurations.
         accelerator: Optional Accelerator instance. If None, one will be created automatically.
     """
+    from lerobot.utils.import_utils import require_package
+
+    require_package("accelerate", extra="training")
+    from accelerate import Accelerator
+
     cfg.validate()
 
     # Create Accelerator if not provided
diff --git a/src/lerobot/utils/constants.py b/src/lerobot/utils/constants.py
index fd10cab35..43869228d 100644
--- a/src/lerobot/utils/constants.py
+++ b/src/lerobot/utils/constants.py
@@ -75,6 +75,21 @@ default_calibration_path = HF_LEROBOT_HOME / "calibration"
 HF_LEROBOT_CALIBRATION = Path(os.getenv("HF_LEROBOT_CALIBRATION", default_calibration_path)).expanduser()
 
 
+# Dataset meta-features (auto-populated by the recording pipeline)
+DEFAULT_FEATURES = {
+    "timestamp": {"dtype": "float32", "shape": (1,), "names": None},
+    "frame_index": {"dtype": "int64", "shape": (1,), "names": None},
+    "episode_index": {"dtype": "int64", "shape": (1,), "names": None},
+    "index": {"dtype": "int64", "shape": (1,), "names": None},
+    "task_index": {"dtype": "int64", "shape": (1,), "names": None},
+}
+
+# ImageNet normalization constants
+IMAGENET_STATS = {
+    "mean": [[[0.485]], [[0.456]], [[0.406]]],  # (c,1,1)
+    "std": [[[0.229]], [[0.224]], [[0.225]]],  # (c,1,1)
+}
+
 # streaming datasets
 LOOKBACK_BACKTRACKTABLE = 100
 LOOKAHEAD_BACKTRACKTABLE = 100
diff --git a/src/lerobot/utils/control_utils.py b/src/lerobot/utils/control_utils.py
index 94ec26714..68aa653f6 100644
--- a/src/lerobot/utils/control_utils.py
+++ b/src/lerobot/utils/control_utils.py
@@ -223,7 +223,7 @@ def sanity_check_dataset_robot_compatibility(
     require_package("deepdiff", extra="hardware")
     from deepdiff import DeepDiff
 
-    from lerobot.datasets.utils import DEFAULT_FEATURES
+    from lerobot.utils.constants import DEFAULT_FEATURES
 
     fields = [
         ("robot_type", dataset.meta.robot_type, robot.robot_type),
diff --git a/src/lerobot/utils/feature_utils.py b/src/lerobot/utils/feature_utils.py
new file mode 100644
index 000000000..45ba80800
--- /dev/null
+++ b/src/lerobot/utils/feature_utils.py
@@ -0,0 +1,222 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Lightweight feature-manipulation utilities.
+
+These functions are intentionally kept free of heavy dependencies (e.g. the
+HuggingFace ``datasets`` library) so that they can be imported from anywhere
+in the codebase – including modules that are part of the *minimal* install –
+without triggering the ``lerobot.datasets`` package guard.
+"""
+
+from typing import Any
+
+import numpy as np
+
+from lerobot.configs.types import FeatureType, PolicyFeature
+from lerobot.utils.constants import ACTION, DEFAULT_FEATURES, OBS_ENV_STATE, OBS_STR
+
+
+def _validate_feature_names(features: dict[str, dict]) -> None:
+    """Validate that feature names do not contain invalid characters.
+
+    Args:
+        features (dict): The LeRobot features dictionary.
+
+    Raises:
+        ValueError: If any feature name contains '/'.
+    """
+    invalid_features = {name: ft for name, ft in features.items() if "/" in name}
+    if invalid_features:
+        raise ValueError(f"Feature names should not contain '/'. Found '/' in '{invalid_features}'.")
+
+
+def hw_to_dataset_features(
+    hw_features: dict[str, type | tuple], prefix: str, use_video: bool = True
+) -> dict[str, dict]:
+    """Convert hardware-specific features to a LeRobot dataset feature dictionary.
+
+    This function takes a dictionary describing hardware outputs (like joint states
+    or camera image shapes) and formats it into the standard LeRobot feature
+    specification.
+
+    Args:
+        hw_features (dict): Dictionary mapping feature names to their type (float for
+            joints) or shape (tuple for images).
+        prefix (str): The prefix to add to the feature keys (e.g., "observation"
+            or "action").
+        use_video (bool): If True, image features are marked as "video", otherwise "image".
+
+    Returns:
+        dict: A LeRobot features dictionary.
+    """
+    features = {}
+    joint_fts = {
+        key: ftype
+        for key, ftype in hw_features.items()
+        if ftype is float or (isinstance(ftype, PolicyFeature) and ftype.type != FeatureType.VISUAL)
+    }
+    cam_fts = {key: shape for key, shape in hw_features.items() if isinstance(shape, tuple)}
+
+    if joint_fts and prefix == ACTION:
+        features[prefix] = {
+            "dtype": "float32",
+            "shape": (len(joint_fts),),
+            "names": list(joint_fts),
+        }
+
+    if joint_fts and prefix == OBS_STR:
+        features[f"{prefix}.state"] = {
+            "dtype": "float32",
+            "shape": (len(joint_fts),),
+            "names": list(joint_fts),
+        }
+
+    for key, shape in cam_fts.items():
+        features[f"{prefix}.images.{key}"] = {
+            "dtype": "video" if use_video else "image",
+            "shape": shape,
+            "names": ["height", "width", "channels"],
+        }
+
+    _validate_feature_names(features)
+    return features
+
+
+def build_dataset_frame(
+    ds_features: dict[str, dict], values: dict[str, Any], prefix: str
+) -> dict[str, np.ndarray]:
+    """Construct a single data frame from raw values based on dataset features.
+
+    A "frame" is a dictionary containing all the data for a single timestep,
+    formatted as numpy arrays according to the feature specification.
+
+    Args:
+        ds_features (dict): The LeRobot dataset features dictionary.
+        values (dict): A dictionary of raw values from the hardware/environment.
+        prefix (str): The prefix to filter features by (e.g., "observation"
+            or "action").
+
+    Returns:
+        dict: A dictionary representing a single frame of data.
+    """
+    frame = {}
+    for key, ft in ds_features.items():
+        if key in DEFAULT_FEATURES or not key.startswith(prefix):
+            continue
+        elif ft["dtype"] == "float32" and len(ft["shape"]) == 1:
+            frame[key] = np.array([values[name] for name in ft["names"]], dtype=np.float32)
+        elif ft["dtype"] in ["image", "video"]:
+            frame[key] = values[key.removeprefix(f"{prefix}.images.")]
+
+    return frame
+
+
+def dataset_to_policy_features(features: dict[str, dict]) -> dict[str, PolicyFeature]:
+    """Convert dataset features to policy features.
+
+    This function transforms the dataset's feature specification into a format
+    that a policy can use, classifying features by type (e.g., visual, state,
+    action) and ensuring correct shapes (e.g., channel-first for images).
+
+    Args:
+        features (dict): The LeRobot dataset features dictionary.
+
+    Returns:
+        dict: A dictionary mapping feature keys to `PolicyFeature` objects.
+
+    Raises:
+        ValueError: If an image feature does not have a 3D shape.
+    """
+    # TODO(aliberts): Implement "type" in dataset features and simplify this
+    policy_features = {}
+    for key, ft in features.items():
+        shape = ft["shape"]
+        if ft["dtype"] in ["image", "video"]:
+            type = FeatureType.VISUAL
+            if len(shape) != 3:
+                raise ValueError(f"Number of dimensions of {key} != 3 (shape={shape})")
+
+            names = ft["names"]
+            # Backward compatibility for "channel" which is an error introduced in LeRobotDataset v2.0 for ported datasets.
+            if names[2] in ["channel", "channels"]:  # (h, w, c) -> (c, h, w)
+                shape = (shape[2], shape[0], shape[1])
+        elif key == OBS_ENV_STATE:
+            type = FeatureType.ENV
+        elif key.startswith(OBS_STR):
+            type = FeatureType.STATE
+        elif key.startswith(ACTION):
+            type = FeatureType.ACTION
+        else:
+            continue
+
+        policy_features[key] = PolicyFeature(
+            type=type,
+            shape=shape,
+        )
+
+    return policy_features
+
+
+def combine_feature_dicts(*dicts: dict) -> dict:
+    """Merge LeRobot grouped feature dicts.
+
+    - For 1D numeric specs (dtype not image/video/string) with "names": we merge the names and recompute the shape.
+    - For others (e.g. `observation.images.*`), the last one wins (if they are identical).
+
+    Args:
+        *dicts: A variable number of LeRobot feature dictionaries to merge.
+
+    Returns:
+        dict: A single merged feature dictionary.
+
+    Raises:
+        ValueError: If there's a dtype mismatch for a feature being merged.
+    """
+    out: dict = {}
+    for d in dicts:
+        for key, value in d.items():
+            if not isinstance(value, dict):
+                out[key] = value
+                continue
+
+            dtype = value.get("dtype")
+            shape = value.get("shape")
+            is_vector = (
+                dtype not in ("image", "video", "string")
+                and isinstance(shape, tuple)
+                and len(shape) == 1
+                and "names" in value
+            )
+
+            if is_vector:
+                # Initialize or retrieve the accumulating dict for this feature key
+                target = out.setdefault(key, {"dtype": dtype, "names": [], "shape": (0,)})
+                # Ensure consistent data types across merged entries
+                if "dtype" in target and dtype != target["dtype"]:
+                    raise ValueError(f"dtype mismatch for '{key}': {target['dtype']} vs {dtype}")
+
+                # Merge feature names: append only new ones to preserve order without duplicates
+                seen = set(target["names"])
+                for n in value["names"]:
+                    if n not in seen:
+                        target["names"].append(n)
+                        seen.add(n)
+                # Recompute the shape to reflect the updated number of features
+                target["shape"] = (len(target["names"]),)
+            else:
+                # For images/videos and non-1D entries: override with the latest definition
+                out[key] = value
+    return out
diff --git a/src/lerobot/utils/import_utils.py b/src/lerobot/utils/import_utils.py
index e85f265f9..0fefbd973 100644
--- a/src/lerobot/utils/import_utils.py
+++ b/src/lerobot/utils/import_utils.py
@@ -90,7 +90,8 @@ def require_package(pkg_name: str, extra: str, import_name: str | None = None) -
         _require_package_cache[cache_key] = is_package_available(pkg_name, import_name)
     if not _require_package_cache[cache_key]:
         raise ImportError(
-            f"'{pkg_name}' is required but not installed. Install it with: pip install 'lerobot[{extra}]'"
+            f"'{pkg_name}' is required but not installed. Install it with: "
+            f"pip install 'lerobot[{extra}]' (or uv pip install 'lerobot[{extra}]')"
         )
 
 
diff --git a/src/lerobot/utils/io_utils.py b/src/lerobot/utils/io_utils.py
index 554d341aa..227c0265b 100644
--- a/src/lerobot/utils/io_utils.py
+++ b/src/lerobot/utils/io_utils.py
@@ -81,6 +81,8 @@ def write_video(video_path: str | Path, stacked_frames: list, fps: int) -> None:
         stream.height = height
         stream.pix_fmt = "yuv420p"
         for frame_array in stacked_frames:
+            if height != orig_height or width != orig_width:
+                frame_array = frame_array[:height, :width]
             frame = av.VideoFrame.from_ndarray(frame_array, format="rgb24")
             for packet in stream.encode(frame):
                 container.mux(packet)
diff --git a/src/lerobot/utils/utils.py b/src/lerobot/utils/utils.py
index d8b06a56b..2574f1fa3 100644
--- a/src/lerobot/utils/utils.py
+++ b/src/lerobot/utils/utils.py
@@ -292,9 +292,8 @@ class SuppressProgressBars:
 
             disable_progress_bar()
         except ImportError:
-            logging.getLogger(__name__).info(
-                "SuppressProgressBars is a no-op because 'datasets' is not installed. "
-                "Install it with: pip install 'lerobot[dataset]'"
+            logging.getLogger(__name__).debug(
+                "SuppressProgressBars is a no-op because 'datasets' is not installed."
             )
 
     def __exit__(self, exc_type, exc_val, exc_tb):
diff --git a/tests/artifacts/policies/save_policy_to_safetensors.py b/tests/artifacts/policies/save_policy_to_safetensors.py
index 7359f6169..ffb3efd03 100644
--- a/tests/artifacts/policies/save_policy_to_safetensors.py
+++ b/tests/artifacts/policies/save_policy_to_safetensors.py
@@ -21,7 +21,7 @@ from safetensors.torch import save_file
 
 from lerobot.configs.default import DatasetConfig
 from lerobot.configs.train import TrainPipelineConfig
-from lerobot.datasets.factory import make_dataset
+from lerobot.datasets import make_dataset
 from lerobot.optim.factory import make_optimizer_and_scheduler
 from lerobot.policies.factory import make_policy, make_policy_config, make_pre_post_processors
 from lerobot.utils.constants import OBS_STR
diff --git a/tests/datasets/test_dataset_utils.py b/tests/datasets/test_dataset_utils.py
index 874099e2b..0cf9bf4d4 100644
--- a/tests/datasets/test_dataset_utils.py
+++ b/tests/datasets/test_dataset_utils.py
@@ -19,10 +19,10 @@ import torch
 from datasets import Dataset
 from huggingface_hub import DatasetCard
 
-from lerobot.datasets.feature_utils import combine_feature_dicts
 from lerobot.datasets.io_utils import hf_transform_to_torch
 from lerobot.datasets.utils import create_lerobot_dataset_card
 from lerobot.utils.constants import ACTION, OBS_IMAGES
+from lerobot.utils.feature_utils import combine_feature_dicts
 
 
 def calculate_episode_data_index(hf_dataset: Dataset) -> dict[str, torch.Tensor]:
diff --git a/tests/datasets/test_datasets.py b/tests/datasets/test_datasets.py
index af77a40e3..18d844444 100644
--- a/tests/datasets/test_datasets.py
+++ b/tests/datasets/test_datasets.py
@@ -29,8 +29,8 @@ from torchvision.transforms import v2
 import lerobot
 from lerobot.configs.default import DatasetConfig
 from lerobot.configs.train import TrainPipelineConfig
-from lerobot.datasets.factory import make_dataset
-from lerobot.datasets.feature_utils import get_hf_features_from_features, hw_to_dataset_features
+from lerobot.datasets import make_dataset
+from lerobot.datasets.feature_utils import get_hf_features_from_features
 from lerobot.datasets.image_writer import image_array_to_pil_image
 from lerobot.datasets.io_utils import hf_transform_to_torch
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
@@ -47,6 +47,7 @@ from lerobot.policies.factory import make_policy_config
 from lerobot.robots import make_robot_from_config
 from lerobot.transforms import ImageTransforms, ImageTransformsConfig
 from lerobot.utils.constants import ACTION, DONE, OBS_IMAGES, OBS_STATE, OBS_STR, REWARD
+from lerobot.utils.feature_utils import hw_to_dataset_features
 from tests.fixtures.constants import DUMMY_CHW, DUMMY_HWC, DUMMY_REPO_ID
 from tests.mocks.mock_robot import MockRobotConfig
 from tests.utils import require_x86_64_kernel
diff --git a/tests/policies/test_policies.py b/tests/policies/test_policies.py
index 3129e03b7..e9c50d249 100644
--- a/tests/policies/test_policies.py
+++ b/tests/policies/test_policies.py
@@ -27,8 +27,7 @@ from lerobot import available_policies
 from lerobot.configs.default import DatasetConfig
 from lerobot.configs.train import TrainPipelineConfig
 from lerobot.configs.types import FeatureType, PolicyFeature
-from lerobot.datasets.factory import make_dataset
-from lerobot.datasets.feature_utils import dataset_to_policy_features
+from lerobot.datasets import make_dataset
 from lerobot.envs.factory import make_env, make_env_config
 from lerobot.envs.utils import close_envs, preprocess_observation
 from lerobot.optim.factory import make_optimizer_and_scheduler
@@ -44,6 +43,7 @@ from lerobot.policies.pretrained import PreTrainedPolicy
 from lerobot.policies.vqbet.configuration_vqbet import VQBeTConfig
 from lerobot.policies.vqbet.modeling_vqbet import VQBeTHead
 from lerobot.utils.constants import ACTION, OBS_IMAGES, OBS_STATE
+from lerobot.utils.feature_utils import dataset_to_policy_features
 from lerobot.utils.random_utils import seeded_context
 from lerobot.utils.utils import cycle
 from tests.artifacts.policies.save_policy_to_safetensors import get_policy_stats