docs: add methods descriptions and comments on tricky parts

2026-06-19 01:07:18 +00:00 · 2025-04-11 13:46:34 +02:00
parent 99eb0bbafc
commit 688195fc46
4 changed files with 43 additions and 13 deletions
@@ -46,12 +46,12 @@ def decode_audio(
    Decodes audio using the specified backend.
    Args:
        audio_path (Path): Path to the audio file.
-        timestamps (list[float]): List of timestamps to extract frames.
-        tolerance_s (float): Allowed deviation in seconds for frame retrieval.
+        timestamps (list[float]): List of (starting) timestamps to extract audio chunks.
+        duration (float): Duration of the audio chunks in seconds.
        backend (str, optional): Backend to use for decoding. Defaults to "torchaudio".

    Returns:
-        torch.Tensor: Decoded frames.
+        torch.Tensor: Decoded audio chunks.

    Currently supports torchaudio.
    """
@@ -246,6 +246,7 @@ def sample_images(image_paths: list[str]) -> np.ndarray:


 def sample_audio_from_path(audio_path: str) -> np.ndarray:
+    """Samples audio data from an audio recording stored in a WAV file."""
    data = load_audio_from_path(audio_path)
    sampled_indices = sample_indices(len(data))

@@ -253,6 +254,7 @@ def sample_audio_from_path(audio_path: str) -> np.ndarray:


 def sample_audio_from_data(data: np.ndarray) -> np.ndarray:
+    """Samples audio data from an audio recording stored in a numpy array."""
    sampled_indices = sample_indices(len(data))
    return data[sampled_indices]

@@ -527,7 +529,7 @@ def compute_episode_stats(
        elif features[key]["dtype"] == "audio":
            try:
                ep_ft_array = sample_audio_from_path(data[0])
-            except TypeError:  # Should only be triggered for LeKiwi robot
+            except TypeError:  # Should only be triggered for LeKiwi robot, for which audio is stored chunk by chunk in a visual frame-like manner
                ep_ft_array = sample_audio_from_data(data)
            axes_to_reduce = 0
            keepdims = True
@@ -1297,9 +1297,11 @@ class LeRobotDataset(torch.utils.data.Dataset):
                self._save_image(frame[key], img_path, compress_level)
                self.episode_buffer[key].append(str(img_path))
            elif self.features[key]["dtype"] == "audio":
-                if self.meta.robot_type == "lekiwi":
+                if (
+                    self.meta.robot_type == "lekiwi"
+                ):  # Raw data storage should only be triggered for LeKiwi robot, for which audio is stored chunk by chunk in a visual frame-like manner
                    self.episode_buffer[key].append(frame[key])
-                else:
+                else:  # Otherwise, only the audio file path is stored in the episode buffer
                    if frame_index == 0:
                        audio_path = self._get_raw_audio_file_path(
                            episode_index=self.episode_buffer["episode_index"], audio_key=key
@@ -1312,7 +1314,7 @@ class LeRobotDataset(torch.utils.data.Dataset):

    def add_microphone_recording(self, microphone: Microphone, microphone_key: str) -> None:
        """
-        This function will start recording audio from the microphone and save it to disk.
+        Starts recording audio data provided by the microphone and directly writes it in a .wav file.
        """

        audio_dir = self._get_raw_audio_file_path(
@@ -1371,7 +1373,9 @@ class LeRobotDataset(torch.utils.data.Dataset):
            if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["image", "video"]:
                continue
            elif ft["dtype"] == "audio":
-                if self.meta.robot_type == "lekiwi":
+                if (
+                    self.meta.robot_type == "lekiwi"
+                ):  # Raw data storage should only be triggered for LeKiwi robot, for which audio is stored chunk by chunk in a visual frame-like manner
                    episode_buffer[key] = np.concatenate(episode_buffer[key], axis=0)
                continue
            episode_buffer[key] = np.stack(episode_buffer[key])
@@ -64,6 +64,11 @@ def find_microphones(raise_when_empty=False) -> list[dict]:
 def record_audio_from_microphones(
    output_dir: Path, microphone_ids: list[int] | None = None, record_time_s: float = 2.0
 ):
+    """
+    Records audio from all the channels of the specified microphones for the specified duration.
+    If no microphone ids are provided, all available microphones will be used.
+    """
+
    if microphone_ids is None or len(microphone_ids) == 0:
        microphones = find_microphones()
        microphone_ids = [m["index"] for m in microphones]
@@ -138,11 +143,11 @@ class Microphone:
        # Input audio stream
        self.stream = None

-        # Thread-safe concurrent queue to store the recorded/read audio
+        # Thread/Process-safe concurrent queue to store the recorded/read audio
        self.record_queue = None
        self.read_queue = None

-        # Thread to handle data reading and file writing in a separate thread (safely)
+        # Thread/Process to handle data reading and file writing in a separate thread/process (safely)
        self.record_thread = None
        self.record_stop_event = None

@@ -152,6 +157,9 @@ class Microphone:
        self.is_writing = False

    def connect(self) -> None:
+        """
+        Connects the microphone and checks if the requested acquisition parameters are compatible with the microphone.
+        """
        if self.is_connected:
            raise DeviceAlreadyConnectedError(f"Microphone {self.microphone_index} is already connected.")

@@ -205,6 +213,9 @@ class Microphone:
        self.is_connected = True

    def _audio_callback(self, indata, frames, time, status) -> None:
+        """
+        Low-level sounddevice callback.
+        """
        if status:
            logging.warning(status)
        # Slicing makes copy unnecessary
@@ -215,6 +226,9 @@ class Microphone:

    @staticmethod
    def _record_loop(queue, event: Event, sample_rate: int, channels: list[int], output_file: Path) -> None:
+        """
+        Thread/Process-safe loop to write audio data into a file.
+        """
        # Can only be run on a single process/thread for file writing safety
        with sf.SoundFile(
            output_file,
@@ -234,9 +248,7 @@ class Microphone:

    def _read(self) -> np.ndarray:
        """
-        Gets audio data from the queue and coverts it to a numpy array.
-        -> PROS : Inherently thread safe, no need to lock the queue, lightweight CPU usage
-        -> CONS : Reading duration does not scale well with the number of channels and reading duration
+        Thread/Process-safe callback to read available audio data
        """
        audio_readings = np.empty((0, len(self.channels)))

@@ -251,6 +263,9 @@ class Microphone:
        return audio_readings

    def read(self) -> np.ndarray:
+        """
+        Reads the last audio chunk recorded by the microphone, e.g. all samples recorded since the last read or since the beginning of the recording.
+        """
        if not self.is_connected:
            raise DeviceNotConnectedError(f"Microphone {self.microphone_index} is not connected.")
        if not self.stream.active:
@@ -269,6 +284,9 @@ class Microphone:
        return audio_readings

    def start_recording(self, output_file: str | None = None, multiprocessing: bool | None = False) -> None:
+        """
+        Starts the recording of the microphone. If output_file is provided, the audio will be written to this file.
+        """
        if not self.is_connected:
            raise DeviceNotConnectedError(f"Microphone {self.microphone_index} is not connected.")
        if self.is_recording:
@@ -320,6 +338,9 @@ class Microphone:
        self.stream.start()

    def stop_recording(self) -> None:
+        """
+        Stops the recording of the microphones.
+        """
        if not self.is_connected:
            raise DeviceNotConnectedError(f"Microphone {self.microphone_index} is not connected.")
        if not self.is_recording:
@@ -341,6 +362,9 @@ class Microphone:
        self.logs["stop_timestamp"] = capture_timestamp_utc()

    def disconnect(self) -> None:
+        """
+        Disconnects the microphone and stops the recording.
+        """
        if not self.is_connected:
            raise DeviceNotConnectedError(f"Microphone {self.microphone_index} is not connected.")