feat(rolling vstack): opting for an inplace copy efficient implementation of the rolling vstack for the audio buffer

This commit is contained in:
CarolinePascal
2025-08-09 01:14:43 +02:00
parent 0caba222ef
commit af2f044f5a
2 changed files with 40 additions and 17 deletions
+3 -17
View File
@@ -136,6 +136,7 @@ from lerobot.teleoperators import ( # noqa: F401
so_leader,
)
from lerobot.teleoperators.keyboard.teleop_keyboard import KeyboardTeleop
from lerobot.utils.audio_utils import rolling_vstack
from lerobot.utils.constants import ACTION, OBS_STR
from lerobot.utils.control_utils import (
init_keyboard_listener,
@@ -358,17 +359,9 @@ def record_loop(
# (1) ensure that the audio buffers are filled with enough data
# (2) add additional initial samples to the dataset in case of variable audio chunk duration during training
busy_wait(DEFAULT_INITIAL_AUDIO_BUFFER_DURATION)
for microphone_name, microphone in robot.microphones.items():
audio_chunk = microphone.read()
buffer_size = audio_buffer[microphone_name].shape[0]
# Remove as many old audio samples as needed
audio_buffer[microphone_name] = audio_buffer[microphone_name][len(audio_chunk) :]
# Add new audio samples, only the newest if the buffer is already full
audio_buffer[microphone_name] = np.vstack(
(audio_buffer[microphone_name], audio_chunk[-buffer_size:])
)
audio_buffer[microphone_name] = rolling_vstack(audio_buffer[microphone_name], audio_chunk)
timestamp = 0
start_episode_t = time.perf_counter()
@@ -393,15 +386,8 @@ def record_loop(
# Transform instantaneous audio samples into a buffer of fixed size
buffered_observation_frame = copy(observation_frame)
for name in audio_buffer:
buffer_size = audio_buffer[name].shape[0]
# Remove as many old audio samples as needed
audio_buffer[name] = audio_buffer[name][len(buffered_observation_frame[name]) :]
# Add new audio samples
audio_buffer[name] = np.vstack(
(audio_buffer[name], buffered_observation_frame[name][-buffer_size:])
)
# Add the audio buffer to the observation
buffered_observation_frame[name] = audio_buffer[name]
buffered_observation_frame[name] = rolling_vstack(audio_buffer[name], observation_frame[name])
action_values = predict_action(
observation=buffered_observation_frame,
+37
View File
@@ -0,0 +1,37 @@
#!/usr/bin/env python
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
def rolling_vstack(buffer: np.ndarray, new_data: np.ndarray) -> np.ndarray:
"""
Rolling implementation of numpy.vstack to add new data in at the end of a fixed shape buffer in a rolling fashion.
Args:
buffer: The *fixed* shape buffer to update.
new_data: The new data to add to the buffer.
Returns:
The updated buffer.
"""
buffer_size = buffer.shape[0]
# Remove as many old audio samples as needed
buffer[: -len(new_data)] = buffer[len(new_data) :]
# Add new audio samples, only the newest if the buffer is already full
buffer[-len(new_data) :] = new_data[-buffer_size:]
return buffer