fix(audio feature shape): fixing audio feature shape ordering (frames first, channels second)

This commit is contained in:
CarolinePascal
2025-04-22 17:06:10 +02:00
parent e714ff22e2
commit e4dd00c8f5
3 changed files with 5 additions and 7 deletions
+2 -4
View File
@@ -1143,11 +1143,9 @@ def validate_feature_audio(name: str, expected_shape: list[str], value: np.ndarr
if isinstance(value, np.ndarray): if isinstance(value, np.ndarray):
actual_shape = value.shape actual_shape = value.shape
c = expected_shape c = expected_shape
if len(actual_shape) != 2 or ( if len(actual_shape) != 2 or actual_shape[-1] != c[-1]: # The number of frames might be different
actual_shape[-1] != c[-1] and actual_shape[0] != c[0]
): # The number of frames might be different
error_message += ( error_message += (
f"The feature '{name}' of shape '{actual_shape}' does not have the expected shape '{(c,)}'.\n" f"The feature '{name}' of shape '{actual_shape}' does not have the expected shape '{c}'.\n"
) )
else: else:
error_message += f"The feature '{name}' is expected to be of type 'np.ndarray', but type '{type(value)}' provided instead.\n" error_message += f"The feature '{name}' is expected to be of type 'np.ndarray', but type '{type(value)}' provided instead.\n"
+1 -1
View File
@@ -82,7 +82,7 @@ def audio_dataset(tmp_path, empty_lerobot_dataset_factory):
features = { features = {
"audio": { "audio": {
"dtype": "audio", "dtype": "audio",
"shape": (DUMMY_AUDIO_CHANNELS,), "shape": (1, DUMMY_AUDIO_CHANNELS),
"names": [ "names": [
"channels", "channels",
], ],
+2 -2
View File
@@ -41,8 +41,8 @@ DUMMY_VIDEO_INFO = {
"has_audio": False, "has_audio": False,
} }
DUMMY_MICROPHONE_FEATURES = { DUMMY_MICROPHONE_FEATURES = {
"laptop": {"dtype": "audio", "shape": (1,), "names": ["channels"], "info": None}, "laptop": {"dtype": "audio", "shape": (1, 2), "names": ["channels"], "info": None},
"phone": {"dtype": "audio", "shape": (1,), "names": ["channels"], "info": None}, "phone": {"dtype": "audio", "shape": (1, 2), "names": ["channels"], "info": None},
} }
DEFAULT_SAMPLE_RATE = 48000 DEFAULT_SAMPLE_RATE = 48000
DUMMY_AUDIO_CHANNELS = 2 DUMMY_AUDIO_CHANNELS = 2