From e714ff22e28913285b66d2468155ff8d190d035b Mon Sep 17 00:00:00 2001 From: CarolinePascal Date: Thu, 17 Apr 2025 20:04:05 +0200 Subject: [PATCH] [skip ci] doc(benchmark): adding microphones recording benchmark --- benchmarks/audio/run_microphone_benchmark.py | 216 +++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 benchmarks/audio/run_microphone_benchmark.py diff --git a/benchmarks/audio/run_microphone_benchmark.py b/benchmarks/audio/run_microphone_benchmark.py new file mode 100644 index 000000000..f4cf16303 --- /dev/null +++ b/benchmarks/audio/run_microphone_benchmark.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import time +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import soundfile as sf + +from lerobot.microphones.configs import MicrophoneConfig +from lerobot.microphones.portaudio import PortAudioMicrophone, PortAudioMicrophoneConfig +from lerobot.microphones.utils import ( + async_microphones_read, + async_microphones_start_recording, + async_microphones_stop_recording, + make_microphones_from_configs, +) + + +def main( + microphones_configs: dict[str, MicrophoneConfig], + audio_chunks_number: int, + audio_chunks_duration: float, + repetitions: int, + multiprocessing: bool = False, +): + recording_dir = Path("outputs/audio_benchmark") + recording_dir.mkdir(parents=True, exist_ok=True) + + # Create microphones + microphones = make_microphones_from_configs(microphones_configs) + + # Connect microphones + for microphone in microphones.values(): + microphone.connect() + + all_audio_chunks = [] + for i in range(repetitions): + # Create audio chunks + audio_chunks = {} + for microphone_key in microphones: + audio_chunks.update({microphone_key: []}) + + # Start recording + async_microphones_start_recording( + microphones, + output_files=[ + recording_dir / f"{microphone_key}_recording_{i}.wav" for microphone_key in microphones + ], + multiprocessing=True, + ) + + # Record audio chunks + for j in range(audio_chunks_number): + time.sleep(audio_chunks_duration) + + audio_readings = async_microphones_read(microphones) + for microphone_index, reading in audio_readings.items(): + print(f"{microphone_index} - repetition {i} - chunk {j} - samples {reading.shape[0]}") + audio_chunks[microphone_index].append(reading) + + # Stop recording + async_microphones_stop_recording(microphones) + + for microphone_key in microphones: + audio_chunks[microphone_key] = np.concatenate(audio_chunks[microphone_key], axis=0) + + all_audio_chunks.append(audio_chunks) + + # Disconnect microphones + for microphone in microphones.values(): + microphone.disconnect() + + # Compute statistics + cmap = plt.get_cmap("tab10") + _, ax = plt.subplots(nrows=repetitions, ncols=len(microphones)) + chunk_length = np.zeros((repetitions, len(microphones))) + record_length = np.zeros((repetitions, len(microphones))) + for i in range(repetitions): + for j, (microphone_key, microphone) in enumerate(microphones.items()): + # Get recorded audio chunks + recorded_audio_chunks = all_audio_chunks[i][microphone_key] + + # Load recorded file + recorded_data, _ = sf.read(recording_dir / f"{microphone_key}_recording_{i}.wav") + if len(recorded_data.shape) == 1: + recorded_data = np.expand_dims(recorded_data, axis=1) + + record_length[i, j] = recorded_data.shape[0] + chunk_length[i, j] = recorded_audio_chunks.shape[0] + + for k, (chunk_data, record_data) in enumerate( + zip(recorded_audio_chunks.T, recorded_data.T, strict=False) + ): + # Plot audio chunks and recorded data + ax[i, j].plot( + np.arange(0, len(chunk_data)) / microphone.sample_rate, + chunk_data, + label=f"audio chunks - channel {k}", + color=cmap(2 * k), + ) + ax[i, j].plot( + np.arange(0, len(record_data)) / microphone.sample_rate, + record_data, + label=f"recorded data - channel {k}", + linestyle="dashed", + color=cmap(2 * k + 1), + ) + + # Plot absolute difference (errors should be located at the end of the recordings) + if recorded_data.shape[0] - recorded_audio_chunks.shape[0] > 0: + chunk_data = np.append( + chunk_data, np.zeros(int(recorded_data.shape[0] - recorded_audio_chunks.shape[0])) + ) + else: + record_data = np.append( + record_data, np.zeros(int(-recorded_data.shape[0] + recorded_audio_chunks.shape[0])) + ) + ax[i, j].plot( + np.arange(0, len(record_data)) / microphone.sample_rate, + np.abs(chunk_data - record_data), + label=f"differences - channel {k}", + color="red", + linestyle="dotted", + ) + ax[i, j].set_title(f"{microphone_key} - repetition {i}") + ax[i, j].legend() + + plt.show() + + # Print statistics + differences = record_length - chunk_length + for i, microphone in enumerate(microphones.values()): + print( + f"Average recorded duration for {microphone_key} : {np.mean(record_length[:, i]) / microphone.sample_rate:.3f} seconds" + ) + print( + f"Average chunk duration for {microphone_key} : {np.mean(chunk_length[:, i]) / microphone.sample_rate:.3f} seconds" + ) + print(f"Average difference for {microphone_key} : {np.mean(differences[:, i]):.3f} samples") + print( + f"Average difference for {microphone_key} : {np.mean(differences[:, i]) / microphone.sample_rate:.3f} seconds" + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--microphones_indices", + type=int, + nargs="+", + default=[microphone["index"] for microphone in PortAudioMicrophone.find_microphones()], + ) + parser.add_argument( + "--microphones_sample_rate", + type=float, + nargs="+", + default=[None] * len(PortAudioMicrophone.find_microphones()), + ) + parser.add_argument( + "--microphones_channels", + type=int, + nargs="+", + default=[None] * len(PortAudioMicrophone.find_microphones()), + ) + parser.add_argument("--audio_chunks_number", type=int, default=2) + parser.add_argument( + "--audio_chunks_duration", + type=float, + default=1.0, + ) + parser.add_argument( + "--repetitions", + type=int, + default=2, + ) + parser.add_argument( + "--multiprocessing", + action="store_true", + ) + + args = vars(parser.parse_args()) + + args["microphones_configs"] = {} + for index, sample_rate, channels in zip( + args["microphones_indices"], + args["microphones_sample_rate"], + args["microphones_channels"], + strict=False, + ): + microphone_config = PortAudioMicrophoneConfig( + microphone_index=index, + sample_rate=sample_rate, + channels=channels, + ) + args["microphones_configs"].update({f"microphone_{index}": microphone_config}) + args.pop("microphones_indices") + args.pop("microphones_sample_rate") + args.pop("microphones_channels") + + main(**args)