fix(datasets): skip zero-width features in compute_episode_stats

`LeRobotDataset.save_episode()` raised
`ValueError: cannot reshape array of size 0 into shape (0)` whenever a
declared non-string feature had a zero-width dimension (e.g. `shape=(0,)`).
The root cause was `compute_episode_stats` running stats on every
non-string/language feature, then `RunningQuantileStats.update` calling
`batch.reshape(-1, batch.shape[-1])` on the empty array.

Skip features whose declared `shape` contains a zero dim, mirroring the
existing skip for `string` / `language` dtype features.

Fixes #3654
This commit is contained in:
Mahbod
2026-05-24 22:09:38 +02:00
committed by CarolinePascal
parent fc262fbc06
commit f59260f4aa
2 changed files with 26 additions and 0 deletions
+3
View File
@@ -519,6 +519,9 @@ def compute_episode_stats(
if features[key]["dtype"] in {"string", "language"}:
continue
if any(d == 0 for d in features[key].get("shape", ())):
continue
if features[key]["dtype"] in ["image", "video"]:
ep_ft_array = sample_images(data)
axes_to_reduce = (0, 2, 3)
+23
View File
@@ -687,6 +687,29 @@ def test_compute_episode_stats_string_features_skipped():
assert "q01" in stats["action"]
def test_compute_episode_stats_zero_width_features_skipped():
"""Test that features with a zero-width dim (e.g. shape=(0,)) are skipped."""
episode_data = {
"empty": np.zeros((100, 0), dtype=np.float32), # Zero-width feature
"action": np.random.normal(0, 1, (100, 5)),
}
features = {
"empty": {"dtype": "float32", "shape": (0,)},
"action": {"dtype": "float32", "shape": (5,)},
}
stats = compute_episode_stats(
episode_data,
features,
)
# Zero-width features should be skipped
assert "empty" not in stats
assert "action" in stats
assert "q01" in stats["action"]
assert stats["action"]["mean"].shape == (5,)
def test_aggregate_feature_stats_with_quantiles():
"""Test aggregating feature stats that include quantiles."""
stats_ft_list = [