Refactored voice recording to use a queue and write audio to a file.

2025-06-04 11:45:00 +00:00 · 2023-08-10 16:10:02 -03:00 · 2023-08-10 16:10:02 -03:00 · 9b526d51e4
commit 9b526d51e4
parent 1f42b0839f
1 changed files with 21 additions and 17 deletions
--- a/aider/voice.py
+++ b/aider/voice.py
@ -3,7 +3,9 @@ import numpy as np
 import keyboard
 import openai
 import io
-
+import tempfile
 import queue
 import soundfile as sf
 import os
 def record_and_transcribe(api_key):
@ -12,25 +14,27 @@ def record_and_transcribe(api_key):
    sample_rate = 16000  # 16kHz
    duration = 10  # in seconds
-    # Create a callback function to stop recording when a key is pressed
+    def callback(indata, frames, time, status):
-    def on_key_press(e):
+        """This is called (from a separate thread) for each audio block."""
-        print("Key pressed, stopping recording...")
+        if status:
-        sd.stop()
+            print(status, file=sys.stderr)
        q.put(indata.copy())
    # Start the recording
    print("Recording started, press any key to stop...")
    # Create an instance of InputStream with the callback
    stream = sd.InputStream(samplerate=sample_rate, channels=1, callback=on_key_press)
    stream.start()
    recording = sd.rec(int(sample_rate * duration), samplerate=sample_rate, channels=1)
-    # Wait for a key press
+    filename = tempfile.mktemp(prefix='delme_rec_unlimited_', suffix='.wav', dir='')
    keyboard.wait()
-    # Convert the recording to bytes
+    q = queue.Queue()
-    recording_bytes = io.BytesIO()
+
-    np.save(recording_bytes, recording, allow_pickle=False)
+    # Make sure the file is opened before recording anything:
-    recording_bytes = recording_bytes.getvalue()
+    with sf.SoundFile(filename, mode='x', samplerate=sample_rate, channels=1) as file:
        with sd.InputStream(samplerate=sample_rate, channels=1, callback=callback):
            input('Press enter when done')
        while not q.empty():
            print('.')
            file.write(q.get())
    print('done')
    # Transcribe the audio using the Whisper API
    response = openai.Whisper.asr.create(audio_data=recording_bytes)