mirror of
https://github.com/huggingface/lerobot.git
synced 2026-06-18 16:57:12 +00:00
feat(rolling vstack): opting for an inplace copy efficient implementation of the rolling vstack for the audio buffer
This commit is contained in:
@@ -136,6 +136,7 @@ from lerobot.teleoperators import ( # noqa: F401
|
||||
so_leader,
|
||||
)
|
||||
from lerobot.teleoperators.keyboard.teleop_keyboard import KeyboardTeleop
|
||||
from lerobot.utils.audio_utils import rolling_vstack
|
||||
from lerobot.utils.constants import ACTION, OBS_STR
|
||||
from lerobot.utils.control_utils import (
|
||||
init_keyboard_listener,
|
||||
@@ -358,17 +359,9 @@ def record_loop(
|
||||
# (1) ensure that the audio buffers are filled with enough data
|
||||
# (2) add additional initial samples to the dataset in case of variable audio chunk duration during training
|
||||
busy_wait(DEFAULT_INITIAL_AUDIO_BUFFER_DURATION)
|
||||
|
||||
for microphone_name, microphone in robot.microphones.items():
|
||||
audio_chunk = microphone.read()
|
||||
|
||||
buffer_size = audio_buffer[microphone_name].shape[0]
|
||||
# Remove as many old audio samples as needed
|
||||
audio_buffer[microphone_name] = audio_buffer[microphone_name][len(audio_chunk) :]
|
||||
# Add new audio samples, only the newest if the buffer is already full
|
||||
audio_buffer[microphone_name] = np.vstack(
|
||||
(audio_buffer[microphone_name], audio_chunk[-buffer_size:])
|
||||
)
|
||||
audio_buffer[microphone_name] = rolling_vstack(audio_buffer[microphone_name], audio_chunk)
|
||||
|
||||
timestamp = 0
|
||||
start_episode_t = time.perf_counter()
|
||||
@@ -393,15 +386,8 @@ def record_loop(
|
||||
# Transform instantaneous audio samples into a buffer of fixed size
|
||||
buffered_observation_frame = copy(observation_frame)
|
||||
for name in audio_buffer:
|
||||
buffer_size = audio_buffer[name].shape[0]
|
||||
# Remove as many old audio samples as needed
|
||||
audio_buffer[name] = audio_buffer[name][len(buffered_observation_frame[name]) :]
|
||||
# Add new audio samples
|
||||
audio_buffer[name] = np.vstack(
|
||||
(audio_buffer[name], buffered_observation_frame[name][-buffer_size:])
|
||||
)
|
||||
# Add the audio buffer to the observation
|
||||
buffered_observation_frame[name] = audio_buffer[name]
|
||||
buffered_observation_frame[name] = rolling_vstack(audio_buffer[name], observation_frame[name])
|
||||
|
||||
action_values = predict_action(
|
||||
observation=buffered_observation_frame,
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def rolling_vstack(buffer: np.ndarray, new_data: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Rolling implementation of numpy.vstack to add new data in at the end of a fixed shape buffer in a rolling fashion.
|
||||
|
||||
Args:
|
||||
buffer: The *fixed* shape buffer to update.
|
||||
new_data: The new data to add to the buffer.
|
||||
|
||||
Returns:
|
||||
The updated buffer.
|
||||
"""
|
||||
|
||||
buffer_size = buffer.shape[0]
|
||||
# Remove as many old audio samples as needed
|
||||
buffer[: -len(new_data)] = buffer[len(new_data) :]
|
||||
# Add new audio samples, only the newest if the buffer is already full
|
||||
buffer[-len(new_data) :] = new_data[-buffer_size:]
|
||||
return buffer
|
||||
Reference in New Issue
Block a user