mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-04 11:45:00 +00:00
Refactored voice recording to use a queue and write audio to a file.
This commit is contained in:
parent
1f42b0839f
commit
9b526d51e4
1 changed files with 21 additions and 17 deletions
|
@ -3,7 +3,9 @@ import numpy as np
|
||||||
import keyboard
|
import keyboard
|
||||||
import openai
|
import openai
|
||||||
import io
|
import io
|
||||||
|
import tempfile
|
||||||
|
import queue
|
||||||
|
import soundfile as sf
|
||||||
import os
|
import os
|
||||||
|
|
||||||
def record_and_transcribe(api_key):
|
def record_and_transcribe(api_key):
|
||||||
|
@ -12,25 +14,27 @@ def record_and_transcribe(api_key):
|
||||||
sample_rate = 16000 # 16kHz
|
sample_rate = 16000 # 16kHz
|
||||||
duration = 10 # in seconds
|
duration = 10 # in seconds
|
||||||
|
|
||||||
# Create a callback function to stop recording when a key is pressed
|
def callback(indata, frames, time, status):
|
||||||
def on_key_press(e):
|
"""This is called (from a separate thread) for each audio block."""
|
||||||
print("Key pressed, stopping recording...")
|
if status:
|
||||||
sd.stop()
|
print(status, file=sys.stderr)
|
||||||
|
q.put(indata.copy())
|
||||||
|
|
||||||
# Start the recording
|
|
||||||
print("Recording started, press any key to stop...")
|
|
||||||
# Create an instance of InputStream with the callback
|
|
||||||
stream = sd.InputStream(samplerate=sample_rate, channels=1, callback=on_key_press)
|
|
||||||
stream.start()
|
|
||||||
recording = sd.rec(int(sample_rate * duration), samplerate=sample_rate, channels=1)
|
|
||||||
|
|
||||||
# Wait for a key press
|
filename = tempfile.mktemp(prefix='delme_rec_unlimited_', suffix='.wav', dir='')
|
||||||
keyboard.wait()
|
|
||||||
|
|
||||||
# Convert the recording to bytes
|
q = queue.Queue()
|
||||||
recording_bytes = io.BytesIO()
|
|
||||||
np.save(recording_bytes, recording, allow_pickle=False)
|
# Make sure the file is opened before recording anything:
|
||||||
recording_bytes = recording_bytes.getvalue()
|
with sf.SoundFile(filename, mode='x', samplerate=sample_rate, channels=1) as file:
|
||||||
|
with sd.InputStream(samplerate=sample_rate, channels=1, callback=callback):
|
||||||
|
input('Press enter when done')
|
||||||
|
|
||||||
|
while not q.empty():
|
||||||
|
print('.')
|
||||||
|
file.write(q.get())
|
||||||
|
|
||||||
|
print('done')
|
||||||
|
|
||||||
# Transcribe the audio using the Whisper API
|
# Transcribe the audio using the Whisper API
|
||||||
response = openai.Whisper.asr.create(audio_data=recording_bytes)
|
response = openai.Whisper.asr.create(audio_data=recording_bytes)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue