refactor: more changes

2026-05-25 21:50:03 +00:00 · 2026-04-11 11:13:15 +02:00
parent 4767f51971
commit 964acd0151
27 changed files with 338 additions and 292 deletions
@@ -75,6 +75,21 @@ default_calibration_path = HF_LEROBOT_HOME / "calibration"
 HF_LEROBOT_CALIBRATION = Path(os.getenv("HF_LEROBOT_CALIBRATION", default_calibration_path)).expanduser()


+# Dataset meta-features (auto-populated by the recording pipeline)
+DEFAULT_FEATURES = {
+    "timestamp": {"dtype": "float32", "shape": (1,), "names": None},
+    "frame_index": {"dtype": "int64", "shape": (1,), "names": None},
+    "episode_index": {"dtype": "int64", "shape": (1,), "names": None},
+    "index": {"dtype": "int64", "shape": (1,), "names": None},
+    "task_index": {"dtype": "int64", "shape": (1,), "names": None},
+}
+
+# ImageNet normalization constants
+IMAGENET_STATS = {
+    "mean": [[[0.485]], [[0.456]], [[0.406]]],  # (c,1,1)
+    "std": [[[0.229]], [[0.224]], [[0.225]]],  # (c,1,1)
+}
+
 # streaming datasets
 LOOKBACK_BACKTRACKTABLE = 100
 LOOKAHEAD_BACKTRACKTABLE = 100
@@ -223,7 +223,7 @@ def sanity_check_dataset_robot_compatibility(
    require_package("deepdiff", extra="hardware")
    from deepdiff import DeepDiff

-    from lerobot.datasets.utils import DEFAULT_FEATURES
+    from lerobot.utils.constants import DEFAULT_FEATURES

    fields = [
        ("robot_type", dataset.meta.robot_type, robot.robot_type),
@@ -0,0 +1,222 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Lightweight feature-manipulation utilities.
+
+These functions are intentionally kept free of heavy dependencies (e.g. the
+HuggingFace ``datasets`` library) so that they can be imported from anywhere
+in the codebase – including modules that are part of the *minimal* install –
+without triggering the ``lerobot.datasets`` package guard.
+"""
+
+from typing import Any
+
+import numpy as np
+
+from lerobot.configs.types import FeatureType, PolicyFeature
+from lerobot.utils.constants import ACTION, DEFAULT_FEATURES, OBS_ENV_STATE, OBS_STR
+
+
+def _validate_feature_names(features: dict[str, dict]) -> None:
+    """Validate that feature names do not contain invalid characters.
+
+    Args:
+        features (dict): The LeRobot features dictionary.
+
+    Raises:
+        ValueError: If any feature name contains '/'.
+    """
+    invalid_features = {name: ft for name, ft in features.items() if "/" in name}
+    if invalid_features:
+        raise ValueError(f"Feature names should not contain '/'. Found '/' in '{invalid_features}'.")
+
+
+def hw_to_dataset_features(
+    hw_features: dict[str, type | tuple], prefix: str, use_video: bool = True
+) -> dict[str, dict]:
+    """Convert hardware-specific features to a LeRobot dataset feature dictionary.
+
+    This function takes a dictionary describing hardware outputs (like joint states
+    or camera image shapes) and formats it into the standard LeRobot feature
+    specification.
+
+    Args:
+        hw_features (dict): Dictionary mapping feature names to their type (float for
+            joints) or shape (tuple for images).
+        prefix (str): The prefix to add to the feature keys (e.g., "observation"
+            or "action").
+        use_video (bool): If True, image features are marked as "video", otherwise "image".
+
+    Returns:
+        dict: A LeRobot features dictionary.
+    """
+    features = {}
+    joint_fts = {
+        key: ftype
+        for key, ftype in hw_features.items()
+        if ftype is float or (isinstance(ftype, PolicyFeature) and ftype.type != FeatureType.VISUAL)
+    }
+    cam_fts = {key: shape for key, shape in hw_features.items() if isinstance(shape, tuple)}
+
+    if joint_fts and prefix == ACTION:
+        features[prefix] = {
+            "dtype": "float32",
+            "shape": (len(joint_fts),),
+            "names": list(joint_fts),
+        }
+
+    if joint_fts and prefix == OBS_STR:
+        features[f"{prefix}.state"] = {
+            "dtype": "float32",
+            "shape": (len(joint_fts),),
+            "names": list(joint_fts),
+        }
+
+    for key, shape in cam_fts.items():
+        features[f"{prefix}.images.{key}"] = {
+            "dtype": "video" if use_video else "image",
+            "shape": shape,
+            "names": ["height", "width", "channels"],
+        }
+
+    _validate_feature_names(features)
+    return features
+
+
+def build_dataset_frame(
+    ds_features: dict[str, dict], values: dict[str, Any], prefix: str
+) -> dict[str, np.ndarray]:
+    """Construct a single data frame from raw values based on dataset features.
+
+    A "frame" is a dictionary containing all the data for a single timestep,
+    formatted as numpy arrays according to the feature specification.
+
+    Args:
+        ds_features (dict): The LeRobot dataset features dictionary.
+        values (dict): A dictionary of raw values from the hardware/environment.
+        prefix (str): The prefix to filter features by (e.g., "observation"
+            or "action").
+
+    Returns:
+        dict: A dictionary representing a single frame of data.
+    """
+    frame = {}
+    for key, ft in ds_features.items():
+        if key in DEFAULT_FEATURES or not key.startswith(prefix):
+            continue
+        elif ft["dtype"] == "float32" and len(ft["shape"]) == 1:
+            frame[key] = np.array([values[name] for name in ft["names"]], dtype=np.float32)
+        elif ft["dtype"] in ["image", "video"]:
+            frame[key] = values[key.removeprefix(f"{prefix}.images.")]
+
+    return frame
+
+
+def dataset_to_policy_features(features: dict[str, dict]) -> dict[str, PolicyFeature]:
+    """Convert dataset features to policy features.
+
+    This function transforms the dataset's feature specification into a format
+    that a policy can use, classifying features by type (e.g., visual, state,
+    action) and ensuring correct shapes (e.g., channel-first for images).
+
+    Args:
+        features (dict): The LeRobot dataset features dictionary.
+
+    Returns:
+        dict: A dictionary mapping feature keys to `PolicyFeature` objects.
+
+    Raises:
+        ValueError: If an image feature does not have a 3D shape.
+    """
+    # TODO(aliberts): Implement "type" in dataset features and simplify this
+    policy_features = {}
+    for key, ft in features.items():
+        shape = ft["shape"]
+        if ft["dtype"] in ["image", "video"]:
+            type = FeatureType.VISUAL
+            if len(shape) != 3:
+                raise ValueError(f"Number of dimensions of {key} != 3 (shape={shape})")
+
+            names = ft["names"]
+            # Backward compatibility for "channel" which is an error introduced in LeRobotDataset v2.0 for ported datasets.
+            if names[2] in ["channel", "channels"]:  # (h, w, c) -> (c, h, w)
+                shape = (shape[2], shape[0], shape[1])
+        elif key == OBS_ENV_STATE:
+            type = FeatureType.ENV
+        elif key.startswith(OBS_STR):
+            type = FeatureType.STATE
+        elif key.startswith(ACTION):
+            type = FeatureType.ACTION
+        else:
+            continue
+
+        policy_features[key] = PolicyFeature(
+            type=type,
+            shape=shape,
+        )
+
+    return policy_features
+
+
+def combine_feature_dicts(*dicts: dict) -> dict:
+    """Merge LeRobot grouped feature dicts.
+
+    - For 1D numeric specs (dtype not image/video/string) with "names": we merge the names and recompute the shape.
+    - For others (e.g. `observation.images.*`), the last one wins (if they are identical).
+
+    Args:
+        *dicts: A variable number of LeRobot feature dictionaries to merge.
+
+    Returns:
+        dict: A single merged feature dictionary.
+
+    Raises:
+        ValueError: If there's a dtype mismatch for a feature being merged.
+    """
+    out: dict = {}
+    for d in dicts:
+        for key, value in d.items():
+            if not isinstance(value, dict):
+                out[key] = value
+                continue
+
+            dtype = value.get("dtype")
+            shape = value.get("shape")
+            is_vector = (
+                dtype not in ("image", "video", "string")
+                and isinstance(shape, tuple)
+                and len(shape) == 1
+                and "names" in value
+            )
+
+            if is_vector:
+                # Initialize or retrieve the accumulating dict for this feature key
+                target = out.setdefault(key, {"dtype": dtype, "names": [], "shape": (0,)})
+                # Ensure consistent data types across merged entries
+                if "dtype" in target and dtype != target["dtype"]:
+                    raise ValueError(f"dtype mismatch for '{key}': {target['dtype']} vs {dtype}")
+
+                # Merge feature names: append only new ones to preserve order without duplicates
+                seen = set(target["names"])
+                for n in value["names"]:
+                    if n not in seen:
+                        target["names"].append(n)
+                        seen.add(n)
+                # Recompute the shape to reflect the updated number of features
+                target["shape"] = (len(target["names"]),)
+            else:
+                # For images/videos and non-1D entries: override with the latest definition
+                out[key] = value
+    return out
@@ -90,7 +90,8 @@ def require_package(pkg_name: str, extra: str, import_name: str | None = None) -
        _require_package_cache[cache_key] = is_package_available(pkg_name, import_name)
    if not _require_package_cache[cache_key]:
        raise ImportError(
-            f"'{pkg_name}' is required but not installed. Install it with: pip install 'lerobot[{extra}]'"
+            f"'{pkg_name}' is required but not installed. Install it with: "
+            f"pip install 'lerobot[{extra}]' (or uv pip install 'lerobot[{extra}]')"
        )


@@ -81,6 +81,8 @@ def write_video(video_path: str | Path, stacked_frames: list, fps: int) -> None:
        stream.height = height
        stream.pix_fmt = "yuv420p"
        for frame_array in stacked_frames:
+            if height != orig_height or width != orig_width:
+                frame_array = frame_array[:height, :width]
            frame = av.VideoFrame.from_ndarray(frame_array, format="rgb24")
            for packet in stream.encode(frame):
                container.mux(packet)
@@ -292,9 +292,8 @@ class SuppressProgressBars:

            disable_progress_bar()
        except ImportError:
-            logging.getLogger(__name__).info(
-                "SuppressProgressBars is a no-op because 'datasets' is not installed. "
-                "Install it with: pip install 'lerobot[dataset]'"
+            logging.getLogger(__name__).debug(
+                "SuppressProgressBars is a no-op because 'datasets' is not installed."
            )

    def __exit__(self, exc_type, exc_val, exc_tb):