Adding audio frames reading capability

2026-05-15 08:39:49 +00:00 · 2025-04-03 11:41:05 +02:00
parent 8874547353
commit 6a2882f978
8 changed files with 132 additions and 35 deletions
@@ -112,16 +112,15 @@ class Microphone:
    config = MicrophoneConfig(microphone_index=0, sampling_rate=16000, channels=[1], data_type="int16")
    microphone = Microphone(config)

+    microphone.connect()
    microphone.start_recording("some/output/file.wav")
    ...
-    microphone.stop_recording()
-
-    # OR
-
-    microphone.start_recording()
+    audio_readings = (
+        microphone.read()
+    )  # Gets all recorded audio data since the last read or since the beginning of the recording
    ...
    microphone.stop_recording()
-    last_recorded_audio_chunk = microphone.queue.get()
+    microphone.disconnect()
    ```
    """

@@ -136,12 +135,16 @@ class Microphone:

        # Input audio stream
        self.stream = None
-        # Thread-safe concurrent queue to store the recorded audio
-        self.queue = Queue()
-        self.thread = None
-        self.stop_event = None
-        self.logs = {}

+        # Thread-safe concurrent queue to store the recorded/read audio
+        self.record_queue = Queue()
+        self.read_queue = Queue()
+
+        # Thread to handle data reading and file writing in a separate thread (safely)
+        self.record_thread = None
+        self.record_stop_event = None
+
+        self.logs = {}
        self.is_connected = False

    def connect(self) -> None:
@@ -200,15 +203,12 @@ class Microphone:
    def _audio_callback(self, indata, frames, time, status) -> None:
        if status:
            logging.warning(status)
-        # slicing makes copy unnecessary
-        self.queue.put(indata[:, self.channels])
+        # Slicing makes copy unnecessary
+        # Two separate queues are necessary because .get() also pops the data from the queue
+        self.record_queue.put(indata[:, self.channels])
+        self.read_queue.put(indata[:, self.channels])

-    def _read_write_loop(self, output_file: Path) -> None:
-        output_file = Path(output_file)
-        if output_file.exists():
-            shutil.rmtree(
-                output_file,
-            )
+    def _record_loop(self, output_file: Path) -> None:
        with sf.SoundFile(
            output_file,
            mode="x",
@@ -216,20 +216,73 @@ class Microphone:
            channels=max(self.channels) + 1,
            subtype=sf.default_subtype(output_file.suffix[1:]),
        ) as file:
-            while not self.stop_event.is_set():
-                file.write(self.queue.get())
+            while not self.record_stop_event.is_set():
+                file.write(self.record_queue.get())
+                # self.record_queue.task_done()
+
+    def _read(self) -> np.ndarray:
+        """
+        Gets audio data from the queue and coverts it to a numpy array.
+        -> PROS : Inherently thread safe, no need to lock the queue, lightweight CPU usage
+        -> CONS : Reading duration does not scale well with the number of channels and reading duration
+        """
+        try:
+            audio_readings = self.read_queue.queue
+        except Queue.Empty:
+            audio_readings = np.empty((0, len(self.channels)))
+        else:
+            # TODO(CarolinePascal): Check if this is the fastest way to do it
+            self.read_queue = Queue()
+            with self.read_queue.mutex:
+                self.read_queue.queue.clear()
+                # self.read_queue.all_tasks_done.notify_all()
+            audio_readings = np.array(audio_readings).reshape(-1, len(self.channels))
+
+        return audio_readings
+
+    def read(self) -> np.ndarray:
+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"Microphone {self.microphone_index} is not connected.")
+        if not self.stream.active:
+            raise RuntimeError(f"Microphone {self.microphone_index} is not recording.")
+
+        start_time = time.perf_counter()
+
+        audio_readings = self._read()
+
+        # log the number of seconds it took to read the audio chunk
+        self.logs["delta_timestamp_s"] = time.perf_counter() - start_time
+
+        # log the utc time at which the audio chunk was received
+        self.logs["timestamp_utc"] = capture_timestamp_utc()
+
+        return audio_readings

    def start_recording(self, output_file: str | None = None) -> None:
        if not self.is_connected:
            raise DeviceNotConnectedError(f"Microphone {self.microphone_index} is not connected.")

-        if output_file is not None:
-            self.stop_event = Event()
-            self.thread = Thread(target=self._read_write_loop, args=(output_file,))
-            self.thread.daemon = True
-            self.thread.start()
+        self.read_queue = Queue()
+        with self.read_queue.mutex:
+            self.read_queue.queue.clear()
+            # self.read_queue.all_tasks_done.notify_all()
+
+        self.record_queue = Queue()
+        with self.record_queue.mutex:
+            self.record_queue.queue.clear()
+            # self.record_queue.all_tasks_done.notify_all()
+
+        # Recording case
+        if output_file is not None:
+            output_file = Path(output_file)
+            if output_file.exists():
+                output_file.unlink()
+
+            self.record_stop_event = Event()
+            self.record_thread = Thread(target=self._record_loop, args=(output_file,))
+            self.record_thread.daemon = True
+            self.record_thread.start()

-        self.logs["start_timestamp"] = capture_timestamp_utc()
        self.stream.start()

    def stop_recording(self) -> None:
@@ -238,18 +291,17 @@ class Microphone:

        self.logs["stop_timestamp"] = capture_timestamp_utc()

-        if self.thread is not None:
-            self.stop_event.set()
-            self.thread.join()
-            self.thread = None
-            self.stop_event = None
+        if self.record_thread is not None:
+            # self.record_queue.join()
+            self.record_stop_event.set()
+            self.record_thread.join()
+            self.record_thread = None
+            self.record_stop_event = None

        if self.stream.active:
            self.stream.stop()  # Wait for all buffers to be processed
            # Remark : stream.abort() flushes the buffers !

-        self.logs["duration"] = self.logs["stop_timestamp"] - self.logs["start_timestamp"]
-
    def disconnect(self) -> None:
        if not self.is_connected:
            raise DeviceNotConnectedError(f"Microphone {self.microphone_index} is not connected.")
@@ -144,6 +144,13 @@ class HopeJrArm(Robot):
            dt_ms = (time.perf_counter() - start) * 1e3
            logger.debug(f"{self} read {cam_key}: {dt_ms:.1f}ms")

+        # Read audio frames from microphones
+        for mic_key, mic in self.microphones.items():
+            start = time.perf_counter()
+            obs_dict[mic_key] = mic.read()
+            dt_ms = (time.perf_counter() - start) * 1e3
+            logger.debug(f"{self} read {mic_key}: {dt_ms:.1f}ms")
+
        return obs_dict

    @check_if_not_connected
@@ -175,6 +175,13 @@ class HopeJrHand(Robot):
            dt_ms = (time.perf_counter() - start) * 1e3
            logger.debug(f"{self} read {cam_key}: {dt_ms:.1f}ms")

+        # Read audio frames from microphones
+        for mic_key, mic in self.microphones.items():
+            start = time.perf_counter()
+            obs_dict[mic_key] = mic.read()
+            dt_ms = (time.perf_counter() - start) * 1e3
+            logger.debug(f"{self} read {mic_key}: {dt_ms:.1f}ms")
+
        return obs_dict

    @check_if_not_connected
@@ -213,6 +213,13 @@ class KochFollower(Robot):
            dt_ms = (time.perf_counter() - start) * 1e3
            logger.debug(f"{self} read {cam_key}: {dt_ms:.1f}ms")

+        # Read audio frames from microphones
+        for mic_key, mic in self.microphones.items():
+            start = time.perf_counter()
+            obs_dict[mic_key] = mic.read()
+            dt_ms = (time.perf_counter() - start) * 1e3
+            logger.debug(f"{self} read {mic_key}: {dt_ms:.1f}ms")
+
        return obs_dict

    @check_if_not_connected
@@ -380,6 +380,13 @@ class LeKiwi(Robot):
            dt_ms = (time.perf_counter() - start) * 1e3
            logger.debug(f"{self} read {cam_key}: {dt_ms:.1f}ms")

+        # Read audio frames from microphones
+        for mic_key, mic in self.microphones.items():
+            start = time.perf_counter()
+            obs_dict[mic_key] = mic.read()
+            dt_ms = (time.perf_counter() - start) * 1e3
+            logger.debug(f"{self} read {mic_key}: {dt_ms:.1f}ms")
+
        return obs_dict

    @check_if_not_connected
@@ -207,6 +207,13 @@ class SOFollower(Robot):
            dt_ms = (time.perf_counter() - start) * 1e3
            logger.debug(f"{self} read {cam_key}: {dt_ms:.1f}ms")

+        # Read audio frames from microphones
+        for mic_key, mic in self.microphones.items():
+            start = time.perf_counter()
+            obs_dict[mic_key] = mic.read()
+            dt_ms = (time.perf_counter() - start) * 1e3
+            logger.debug(f"{self} read {mic_key}: {dt_ms:.1f}ms")
+
        return obs_dict

    @check_if_not_connected
@@ -316,6 +316,9 @@ def record_loop(
    if dataset is not None:
        for microphone_key, microphone in robot.microphones.items():
            dataset.add_microphone_recording(microphone, microphone_key)
+    else:
+        for _, microphone in robot.microphones.items():
+            microphone.start_recording()

    timestamp = 0
    start_episode_t = time.perf_counter()
@@ -145,6 +145,10 @@ def teleop_loop(
    """

    display_len = max(len(key) for key in robot.action_features)
+
+    for _, microphone in robot.microphones.items():
+        microphone.start_recording()
+
    start = time.perf_counter()

    while True:
@@ -192,7 +196,10 @@ def teleop_loop(
        move_cursor_up(1)

        if duration is not None and time.perf_counter() - start >= duration:
-            return
+            break
+
+    for _, microphone in robot.microphones.items():
+        microphone.stop_recording()


@parser.wrap()