mirror of
https://github.com/huggingface/lerobot.git
synced 2026-06-19 01:07:18 +00:00
docs: add methods descriptions and comments on tricky parts
This commit is contained in:
@@ -46,12 +46,12 @@ def decode_audio(
|
||||
Decodes audio using the specified backend.
|
||||
Args:
|
||||
audio_path (Path): Path to the audio file.
|
||||
timestamps (list[float]): List of timestamps to extract frames.
|
||||
tolerance_s (float): Allowed deviation in seconds for frame retrieval.
|
||||
timestamps (list[float]): List of (starting) timestamps to extract audio chunks.
|
||||
duration (float): Duration of the audio chunks in seconds.
|
||||
backend (str, optional): Backend to use for decoding. Defaults to "torchaudio".
|
||||
|
||||
Returns:
|
||||
torch.Tensor: Decoded frames.
|
||||
torch.Tensor: Decoded audio chunks.
|
||||
|
||||
Currently supports torchaudio.
|
||||
"""
|
||||
|
||||
@@ -246,6 +246,7 @@ def sample_images(image_paths: list[str]) -> np.ndarray:
|
||||
|
||||
|
||||
def sample_audio_from_path(audio_path: str) -> np.ndarray:
|
||||
"""Samples audio data from an audio recording stored in a WAV file."""
|
||||
data = load_audio_from_path(audio_path)
|
||||
sampled_indices = sample_indices(len(data))
|
||||
|
||||
@@ -253,6 +254,7 @@ def sample_audio_from_path(audio_path: str) -> np.ndarray:
|
||||
|
||||
|
||||
def sample_audio_from_data(data: np.ndarray) -> np.ndarray:
|
||||
"""Samples audio data from an audio recording stored in a numpy array."""
|
||||
sampled_indices = sample_indices(len(data))
|
||||
return data[sampled_indices]
|
||||
|
||||
@@ -527,7 +529,7 @@ def compute_episode_stats(
|
||||
elif features[key]["dtype"] == "audio":
|
||||
try:
|
||||
ep_ft_array = sample_audio_from_path(data[0])
|
||||
except TypeError: # Should only be triggered for LeKiwi robot
|
||||
except TypeError: # Should only be triggered for LeKiwi robot, for which audio is stored chunk by chunk in a visual frame-like manner
|
||||
ep_ft_array = sample_audio_from_data(data)
|
||||
axes_to_reduce = 0
|
||||
keepdims = True
|
||||
|
||||
@@ -1297,9 +1297,11 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
self._save_image(frame[key], img_path, compress_level)
|
||||
self.episode_buffer[key].append(str(img_path))
|
||||
elif self.features[key]["dtype"] == "audio":
|
||||
if self.meta.robot_type == "lekiwi":
|
||||
if (
|
||||
self.meta.robot_type == "lekiwi"
|
||||
): # Raw data storage should only be triggered for LeKiwi robot, for which audio is stored chunk by chunk in a visual frame-like manner
|
||||
self.episode_buffer[key].append(frame[key])
|
||||
else:
|
||||
else: # Otherwise, only the audio file path is stored in the episode buffer
|
||||
if frame_index == 0:
|
||||
audio_path = self._get_raw_audio_file_path(
|
||||
episode_index=self.episode_buffer["episode_index"], audio_key=key
|
||||
@@ -1312,7 +1314,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
|
||||
def add_microphone_recording(self, microphone: Microphone, microphone_key: str) -> None:
|
||||
"""
|
||||
This function will start recording audio from the microphone and save it to disk.
|
||||
Starts recording audio data provided by the microphone and directly writes it in a .wav file.
|
||||
"""
|
||||
|
||||
audio_dir = self._get_raw_audio_file_path(
|
||||
@@ -1371,7 +1373,9 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
||||
if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["image", "video"]:
|
||||
continue
|
||||
elif ft["dtype"] == "audio":
|
||||
if self.meta.robot_type == "lekiwi":
|
||||
if (
|
||||
self.meta.robot_type == "lekiwi"
|
||||
): # Raw data storage should only be triggered for LeKiwi robot, for which audio is stored chunk by chunk in a visual frame-like manner
|
||||
episode_buffer[key] = np.concatenate(episode_buffer[key], axis=0)
|
||||
continue
|
||||
episode_buffer[key] = np.stack(episode_buffer[key])
|
||||
|
||||
@@ -64,6 +64,11 @@ def find_microphones(raise_when_empty=False) -> list[dict]:
|
||||
def record_audio_from_microphones(
|
||||
output_dir: Path, microphone_ids: list[int] | None = None, record_time_s: float = 2.0
|
||||
):
|
||||
"""
|
||||
Records audio from all the channels of the specified microphones for the specified duration.
|
||||
If no microphone ids are provided, all available microphones will be used.
|
||||
"""
|
||||
|
||||
if microphone_ids is None or len(microphone_ids) == 0:
|
||||
microphones = find_microphones()
|
||||
microphone_ids = [m["index"] for m in microphones]
|
||||
@@ -138,11 +143,11 @@ class Microphone:
|
||||
# Input audio stream
|
||||
self.stream = None
|
||||
|
||||
# Thread-safe concurrent queue to store the recorded/read audio
|
||||
# Thread/Process-safe concurrent queue to store the recorded/read audio
|
||||
self.record_queue = None
|
||||
self.read_queue = None
|
||||
|
||||
# Thread to handle data reading and file writing in a separate thread (safely)
|
||||
# Thread/Process to handle data reading and file writing in a separate thread/process (safely)
|
||||
self.record_thread = None
|
||||
self.record_stop_event = None
|
||||
|
||||
@@ -152,6 +157,9 @@ class Microphone:
|
||||
self.is_writing = False
|
||||
|
||||
def connect(self) -> None:
|
||||
"""
|
||||
Connects the microphone and checks if the requested acquisition parameters are compatible with the microphone.
|
||||
"""
|
||||
if self.is_connected:
|
||||
raise DeviceAlreadyConnectedError(f"Microphone {self.microphone_index} is already connected.")
|
||||
|
||||
@@ -205,6 +213,9 @@ class Microphone:
|
||||
self.is_connected = True
|
||||
|
||||
def _audio_callback(self, indata, frames, time, status) -> None:
|
||||
"""
|
||||
Low-level sounddevice callback.
|
||||
"""
|
||||
if status:
|
||||
logging.warning(status)
|
||||
# Slicing makes copy unnecessary
|
||||
@@ -215,6 +226,9 @@ class Microphone:
|
||||
|
||||
@staticmethod
|
||||
def _record_loop(queue, event: Event, sample_rate: int, channels: list[int], output_file: Path) -> None:
|
||||
"""
|
||||
Thread/Process-safe loop to write audio data into a file.
|
||||
"""
|
||||
# Can only be run on a single process/thread for file writing safety
|
||||
with sf.SoundFile(
|
||||
output_file,
|
||||
@@ -234,9 +248,7 @@ class Microphone:
|
||||
|
||||
def _read(self) -> np.ndarray:
|
||||
"""
|
||||
Gets audio data from the queue and coverts it to a numpy array.
|
||||
-> PROS : Inherently thread safe, no need to lock the queue, lightweight CPU usage
|
||||
-> CONS : Reading duration does not scale well with the number of channels and reading duration
|
||||
Thread/Process-safe callback to read available audio data
|
||||
"""
|
||||
audio_readings = np.empty((0, len(self.channels)))
|
||||
|
||||
@@ -251,6 +263,9 @@ class Microphone:
|
||||
return audio_readings
|
||||
|
||||
def read(self) -> np.ndarray:
|
||||
"""
|
||||
Reads the last audio chunk recorded by the microphone, e.g. all samples recorded since the last read or since the beginning of the recording.
|
||||
"""
|
||||
if not self.is_connected:
|
||||
raise DeviceNotConnectedError(f"Microphone {self.microphone_index} is not connected.")
|
||||
if not self.stream.active:
|
||||
@@ -269,6 +284,9 @@ class Microphone:
|
||||
return audio_readings
|
||||
|
||||
def start_recording(self, output_file: str | None = None, multiprocessing: bool | None = False) -> None:
|
||||
"""
|
||||
Starts the recording of the microphone. If output_file is provided, the audio will be written to this file.
|
||||
"""
|
||||
if not self.is_connected:
|
||||
raise DeviceNotConnectedError(f"Microphone {self.microphone_index} is not connected.")
|
||||
if self.is_recording:
|
||||
@@ -320,6 +338,9 @@ class Microphone:
|
||||
self.stream.start()
|
||||
|
||||
def stop_recording(self) -> None:
|
||||
"""
|
||||
Stops the recording of the microphones.
|
||||
"""
|
||||
if not self.is_connected:
|
||||
raise DeviceNotConnectedError(f"Microphone {self.microphone_index} is not connected.")
|
||||
if not self.is_recording:
|
||||
@@ -341,6 +362,9 @@ class Microphone:
|
||||
self.logs["stop_timestamp"] = capture_timestamp_utc()
|
||||
|
||||
def disconnect(self) -> None:
|
||||
"""
|
||||
Disconnects the microphone and stops the recording.
|
||||
"""
|
||||
if not self.is_connected:
|
||||
raise DeviceNotConnectedError(f"Microphone {self.microphone_index} is not connected.")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user