From 2726b4e865cd58b4fcae73a44851f2212c8f7118 Mon Sep 17 00:00:00 2001 From: CarolinePascal Date: Wed, 6 Aug 2025 20:34:06 +0200 Subject: [PATCH] fix(hw-dataset): adding missing support for audio in hw-to-dataset functions --- src/lerobot/datasets/utils.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/lerobot/datasets/utils.py b/src/lerobot/datasets/utils.py index fc312b3db..8926e3963 100644 --- a/src/lerobot/datasets/utils.py +++ b/src/lerobot/datasets/utils.py @@ -657,7 +657,12 @@ def hw_to_dataset_features( for key, ftype in hw_features.items() if ftype is float or (isinstance(ftype, PolicyFeature) and ftype.type != FeatureType.VISUAL) } - cam_fts = {key: shape for key, shape in hw_features.items() if isinstance(shape, tuple)} + cam_fts = { + key: shape for key, shape in hw_features.items() if isinstance(shape, tuple) and len(shape) == 3 + } + mic_fts = { + key: shape for key, shape in hw_features.items() if isinstance(shape, tuple) and len(shape) == 2 + } if joint_fts and prefix == ACTION: features[prefix] = { @@ -680,6 +685,14 @@ def hw_to_dataset_features( "names": ["height", "width", "channels"], } + for key, features in mic_fts.items(): + features[f"{prefix}.audio.{key}"] = { + "dtype": "audio", + "shape": (features[1],), + "names": ["channels"], + "sample_rate": features[0], + } + _validate_feature_names(features) return features @@ -709,6 +722,8 @@ def build_dataset_frame( frame[key] = np.array([values[name] for name in ft["names"]], dtype=np.float32) elif ft["dtype"] in ["image", "video"]: frame[key] = values[key.removeprefix(f"{prefix}.images.")] + elif ft["dtype"] == "audio": + frame[key] = values[key.removeprefix(f"{prefix}.audio.")] return frame