From ad01ef19f4715b232d3b6bd49d27cae6f69e7d77 Mon Sep 17 00:00:00 2001 From: CarolinePascal Date: Thu, 15 May 2025 13:18:01 +0200 Subject: [PATCH] fix(audio buffers): add security crop to avoid audio buffer overfilling --- src/lerobot/scripts/lerobot_record.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/lerobot/scripts/lerobot_record.py b/src/lerobot/scripts/lerobot_record.py index d0d196f0b..ae62ffe6f 100644 --- a/src/lerobot/scripts/lerobot_record.py +++ b/src/lerobot/scripts/lerobot_record.py @@ -327,12 +327,6 @@ def record_loop( preprocessor.reset() postprocessor.reset() - if dataset is not None and robot.name != "lekiwi": - for microphone_key, microphone in robot.microphones.items(): - dataset.add_microphone_recording(microphone, microphone_key) - else: - async_microphones_start_recording(robot.microphones) - # Create a buffer for audio observations (shifting window of fixed size over audio samples) audio_buffer = { microphone_name: np.zeros( @@ -341,6 +335,14 @@ def record_loop( for microphone_name, microphone in robot.microphones.items() } + if ( + dataset is not None and robot.name != "lekiwi" + ): # For now, LeKiwi only supports frame audio recording (which may lead to audio chunks loss, extended post-processing, increased memory usage) + for microphone_key, microphone in robot.microphones.items(): + dataset.add_microphone_recording(microphone, microphone_key) + else: + async_microphones_start_recording(robot.microphones) + timestamp = 0 start_episode_t = time.perf_counter() while timestamp < control_time_s: @@ -364,10 +366,13 @@ def record_loop( # Transform instantaneous audio samples into a buffer of fixed size buffered_observation_frame = copy(observation_frame) for name in audio_buffer: + buffer_size = audio_buffer[name].shape[0] # Remove as many old audio samples as needed audio_buffer[name] = audio_buffer[name][len(buffered_observation_frame[name]) :] # Add new audio samples - audio_buffer[name] = np.vstack((audio_buffer[name], buffered_observation_frame[name])) + audio_buffer[name] = np.vstack( + (audio_buffer[name], buffered_observation_frame[name][-buffer_size:]) + ) # Add the audio buffer to the observation buffered_observation_frame[name] = audio_buffer[name] @@ -427,7 +432,10 @@ def record_loop( if display_data: log_rerun_data( - observation=obs_processed, action=action_values, compress_images=display_compressed_images + observation=obs_processed, + action=action_values, + compress_images=display_compressed_images, + log_time=time.perf_counter() - start_episode_t, ) dt_s = time.perf_counter() - start_loop_t