From d3fd459f81f57e149175e22241e2e0d1b77230f4 Mon Sep 17 00:00:00 2001
From: CarolinePascal <caroline8.pascal@gmail.com>
Date: Tue, 30 Jun 2026 13:51:07 +0200
Subject: [PATCH] test(depth stats): updating tests

---
 tests/datasets/test_depth.py        | 41 +++++++++++++++++++++++++++++
 tests/fixtures/dataset_factories.py | 19 ++++++++-----
 2 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/tests/datasets/test_depth.py b/tests/datasets/test_depth.py
index a075fa6b5..df5964bd8 100644
--- a/tests/datasets/test_depth.py
+++ b/tests/datasets/test_depth.py
@@ -245,3 +245,44 @@ class TestFeatureFileRouting:
 
         dataset.save_episode()
         dataset.finalize()
+
+
+# ── 5. Depth stats unit canonicalization (millimetres) ────────────────
+
+
+class TestDepthStatsUnit:
+    """Depth stats are always stored in millimetres, regardless of raw frame dtype."""
+
+    NUM_FRAMES = 4
+
+    @pytest.mark.parametrize("use_videos", [False, True])
+    def test_stats_canonicalized_to_mm(self, tmp_path, features_factory, use_videos):
+        """Float (metre) and integer (millimetre) depth over the same physical range
+        yield identical millimetre-scale stats."""
+        from lerobot.datasets.lerobot_dataset import LeRobotDataset
+
+        def _record(depth_dtype, root):
+            features = features_factory(
+                camera_features=DUMMY_CAMERA_FEATURES_WITH_DEPTH, use_videos=use_videos
+            )
+            dataset = LeRobotDataset.create(
+                repo_id=DUMMY_REPO_ID,
+                fps=DEFAULT_FPS,
+                features=features,
+                root=root,
+                use_videos=use_videos,
+                streaming_encoding=use_videos,
+            )
+            add_frames(dataset, num_frames=self.NUM_FRAMES, depth_dtype=depth_dtype)
+            dataset.save_episode()
+            dataset.finalize()
+            return np.asarray(dataset.meta.stats[DEPTH_KEY]["mean"]).reshape(-1)
+
+        # add_frames ramps float depth over 0.1–10 m and integer depth over 100–10000 mm
+        # (the same physical range), so canonicalized stats must match.
+        mean_m = _record(np.float32, tmp_path / "ds_m")
+        mean_mm = _record(np.uint16, tmp_path / "ds_mm")
+
+        # Float (metre) input is scaled to millimetres, not left in the single-digit metre range.
+        assert mean_m.item() > 50.0
+        np.testing.assert_allclose(mean_m, mean_mm, rtol=0.05)
diff --git a/tests/fixtures/dataset_factories.py b/tests/fixtures/dataset_factories.py
index 100922f9c..f2be57822 100644
--- a/tests/fixtures/dataset_factories.py
+++ b/tests/fixtures/dataset_factories.py
@@ -49,16 +49,18 @@ from tests.fixtures.constants import (
 )
 
 
-def add_frames(dataset: LeRobotDataset, num_frames: int) -> None:
+def add_frames(dataset: LeRobotDataset, num_frames: int, depth_dtype: np.dtype = np.uint16) -> None:
     """Append ``num_frames`` synthetic frames to ``dataset``.
 
-    Generates per-feature payloads from ``dataset.meta``: uint16 depth ramps for
-    keys in ``dataset.meta.depth_keys``, uint8 random noise for video/image keys,
-    and float32 zeros for everything else. ``DEFAULT_FEATURES`` (timestamp,
-    frame_index, ...) are auto-populated by ``add_frame`` and skipped here.
+    Generates per-feature payloads from ``dataset.meta``: depth ramps (``depth_dtype``,
+    default ``uint16`` millimetres; pass ``np.float32`` for metres) for keys in
+    ``dataset.meta.depth_keys``, uint8 random noise for video/image keys, and float32
+    zeros for everything else. ``DEFAULT_FEATURES`` (timestamp, frame_index, ...) are
+    auto-populated by ``add_frame`` and skipped here.
     """
     video_keys = dataset.meta.video_keys
     depth_keys = dataset.meta.depth_keys
+    depth_is_float = np.issubdtype(depth_dtype, np.floating)
     # Smooth gradient base reused per (H, W) to keep depth frames cheap to
     # encode (HEVC Main 12 hates white noise).
     _depth_base_cache: dict[tuple[int, int], np.ndarray] = {}
@@ -70,11 +72,14 @@ def add_frames(dataset: LeRobotDataset, num_frames: int) -> None:
             shape = ft["shape"]
             if key in depth_keys:
                 h, w, _ = shape
+                # Float depth is expressed in metres, integer depth in millimetres.
+                lo, hi = (0.1, 10.0) if depth_is_float else (100.0, 10_000.0)
                 base = _depth_base_cache.setdefault(
                     (h, w),
-                    np.linspace(100.0, 10_000.0, h * w, dtype=np.float32).reshape(h, w, 1),
+                    np.linspace(lo, hi, h * w, dtype=np.float32).reshape(h, w, 1),
                 )
-                frame[key] = (base + 50.0 * i).clip(0, 65535).astype(np.uint16)
+                step = (0.05 if depth_is_float else 50.0) * i
+                frame[key] = (base + step).clip(0, 65535).astype(depth_dtype)
             elif key in video_keys:
                 frame[key] = np.random.randint(0, 256, shape, dtype=np.uint8)
             else: