Files
lerobot/additional_functions/test_speaker_wav.py
T
2025-11-21 14:13:05 +01:00

200 lines
6.2 KiB
Python

#!/usr/bin/env python3
"""
Play a WAV file through the G1 robot's speaker.
Requirements:
- WAV file must be 16kHz, mono, 16-bit PCM
Usage:
python test_speaker_wav.py en7 path/to/audio.wav
"""
import sys
import time
import struct
from unitree_sdk2py.core.channel import ChannelFactoryInitialize
from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient
def read_wav(filename):
"""Read WAV file and return PCM data."""
try:
with open(filename, 'rb') as f:
def read(fmt):
return struct.unpack(fmt, f.read(struct.calcsize(fmt)))
# Read RIFF header
chunk_id, = read('<I')
if chunk_id != 0x46464952: # "RIFF"
print(f"[ERROR] Not a valid WAV file (invalid RIFF header)")
return [], -1, -1, False
_chunk_size, = read('<I')
format_tag, = read('<I')
if format_tag != 0x45564157: # "WAVE"
print(f"[ERROR] Not a valid WAV file (invalid WAVE format)")
return [], -1, -1, False
# Read fmt chunk
subchunk1_id, = read('<I')
subchunk1_size, = read('<I')
# Skip JUNK chunk if present
if subchunk1_id == 0x4B4E554A: # "JUNK"
f.seek(subchunk1_size, 1)
subchunk1_id, = read('<I')
subchunk1_size, = read('<I')
if subchunk1_id != 0x20746D66: # "fmt "
print(f"[ERROR] Invalid fmt chunk")
return [], -1, -1, False
if subchunk1_size not in [16, 18]:
print(f"[ERROR] Unsupported fmt chunk size: {subchunk1_size}")
return [], -1, -1, False
audio_format, = read('<H')
if audio_format != 1:
print(f"[ERROR] Only PCM format supported, got format {audio_format}")
return [], -1, -1, False
num_channels, = read('<H')
sample_rate, = read('<I')
_byte_rate, = read('<I')
_block_align, = read('<H')
bits_per_sample, = read('<H')
if bits_per_sample != 16:
print(f"[ERROR] Only 16-bit samples supported, got {bits_per_sample}-bit")
return [], -1, -1, False
if subchunk1_size == 18:
extra_size, = read('<H')
if extra_size != 0:
f.seek(extra_size, 1)
# Find data chunk
while True:
subchunk2_id, subchunk2_size = read('<II')
if subchunk2_id == 0x61746164: # "data"
break
f.seek(subchunk2_size, 1)
# Read PCM data
raw_pcm = f.read(subchunk2_size)
if len(raw_pcm) != subchunk2_size:
print("[ERROR] Failed to read full PCM data")
return [], -1, -1, False
return list(raw_pcm), sample_rate, num_channels, True
except Exception as e:
print(f"[ERROR] Failed to read WAV file: {e}")
return [], -1, -1, False
def play_pcm_stream(client, pcm_list, app_name="example", chunk_size=96000):
"""
Play PCM audio in chunks.
Args:
client: AudioClient instance
pcm_list: List of PCM bytes
app_name: Application name for this audio stream
chunk_size: Bytes per chunk (96000 = ~3 seconds at 16kHz)
"""
pcm_data = bytes(pcm_list)
stream_id = str(int(time.time() * 1000))
offset = 0
chunk_index = 0
total_size = len(pcm_data)
print(f"Playing audio: {total_size} bytes in {(total_size // chunk_size) + 1} chunks")
while offset < total_size:
remaining = total_size - offset
current_chunk_size = min(chunk_size, remaining)
chunk = pcm_data[offset:offset + current_chunk_size]
# Send chunk
ret_code, _ = client.PlayStream(app_name, stream_id, chunk)
if ret_code != 0:
print(f"[ERROR] Failed to send chunk {chunk_index}, return code: {ret_code}")
break
else:
print(f"[INFO] Sent chunk {chunk_index}/{(total_size // chunk_size)}")
offset += current_chunk_size
chunk_index += 1
time.sleep(1.0) # Wait between chunks
def main():
if len(sys.argv) < 3:
print(f"Usage: python3 {sys.argv[0]} <network_interface> <wav_file>")
print("Example: python3 test_speaker_wav.py en7 audio.wav")
print("\nWAV file requirements:")
print(" - Sample rate: 16000 Hz")
print(" - Channels: 1 (mono)")
print(" - Bit depth: 16-bit")
sys.exit(1)
network_interface = sys.argv[1]
wav_path = sys.argv[2]
# Initialize communication
print(f"Initializing on {network_interface}...")
ChannelFactoryInitialize(0)
# Create audio client
audio_client = AudioClient()
audio_client.SetTimeout(10.0)
audio_client.Init()
print("Audio client initialized!")
# Read WAV file
print(f"Reading WAV file: {wav_path}")
pcm_list, sample_rate, num_channels, is_ok = read_wav(wav_path)
if not is_ok:
print("[ERROR] Failed to read WAV file")
sys.exit(1)
print(f"WAV info:")
print(f" - Sample rate: {sample_rate} Hz")
print(f" - Channels: {num_channels}")
print(f" - Size: {len(pcm_list)} bytes")
# Verify format
if sample_rate != 16000:
print(f"[ERROR] Sample rate must be 16000 Hz, got {sample_rate} Hz")
print("Use ffmpeg to convert:")
print(f" ffmpeg -i input.wav -ar 16000 -ac 1 -sample_fmt s16 output.wav")
sys.exit(1)
if num_channels != 1:
print(f"[ERROR] Must be mono (1 channel), got {num_channels} channels")
print("Use ffmpeg to convert:")
print(f" ffmpeg -i input.wav -ar 16000 -ac 1 -sample_fmt s16 output.wav")
sys.exit(1)
# Play audio
print("Playing audio...")
play_pcm_stream(audio_client, pcm_list, "test_wav")
# Wait for playback to finish
duration_seconds = len(pcm_list) / (16000 * 2) # 16kHz, 16-bit (2 bytes)
print(f"Waiting {duration_seconds:.1f} seconds for playback...")
time.sleep(duration_seconds + 1)
# Stop playback
print("Stopping playback...")
audio_client.PlayStop("test_wav")
print("Done!")
if __name__ == "__main__":
main()