mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-25 14:55:00 +00:00
show loudness bar
This commit is contained in:
parent
4dc704272e
commit
053751cb43
2 changed files with 47 additions and 24 deletions
|
@ -436,11 +436,13 @@ class Commands:
|
||||||
|
|
||||||
def cmd_voice(self, args):
|
def cmd_voice(self, args):
|
||||||
"Record and transcribe voice input"
|
"Record and transcribe voice input"
|
||||||
if not voice.is_audio_available():
|
v = voice.Voice()
|
||||||
|
|
||||||
|
if not v.is_audio_available():
|
||||||
self.io.tool_error("Unable to import `sounddevice`, is portaudio installed?")
|
self.io.tool_error("Unable to import `sounddevice`, is portaudio installed?")
|
||||||
return
|
return
|
||||||
|
|
||||||
text = voice.record_and_transcribe()
|
text = v.record_and_transcribe()
|
||||||
self.io.add_to_file_history(text)
|
self.io.add_to_file_history(text)
|
||||||
print()
|
print()
|
||||||
self.io.user_input(text, log_only=False)
|
self.io.user_input(text, log_only=False)
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
import os
|
import os
|
||||||
import queue
|
import queue
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import time
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import openai
|
import openai
|
||||||
|
from prompt_toolkit.shortcuts import prompt
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import sounddevice as sd
|
import sounddevice as sd
|
||||||
|
@ -14,41 +17,59 @@ import soundfile as sf
|
||||||
from .dump import dump # noqa: F401
|
from .dump import dump # noqa: F401
|
||||||
|
|
||||||
|
|
||||||
def is_audio_available():
|
class Voice:
|
||||||
return sd is not None
|
max_rms = 0
|
||||||
|
min_rms = 1e5
|
||||||
|
pct = 0
|
||||||
|
|
||||||
|
def is_audio_available(self):
|
||||||
|
return sd is not None
|
||||||
|
|
||||||
def record_and_transcribe():
|
def callback(self, indata, frames, time, status):
|
||||||
q = queue.Queue()
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
def callback(indata, frames, time, status):
|
|
||||||
"""This is called (from a separate thread) for each audio block."""
|
"""This is called (from a separate thread) for each audio block."""
|
||||||
q.put(indata.copy())
|
self.q.put(indata.copy())
|
||||||
rms = np.sqrt(np.mean(indata**2))
|
rms = np.sqrt(np.mean(indata**2))
|
||||||
dump(rms)
|
self.max_rms = max(self.max_rms, rms)
|
||||||
|
self.min_rms = min(self.min_rms, rms)
|
||||||
|
|
||||||
filename = tempfile.mktemp(suffix=".wav")
|
rng = self.max_rms - self.min_rms
|
||||||
|
if rng > 0.001:
|
||||||
|
self.pct = (rms - self.min_rms) / rng
|
||||||
|
|
||||||
sample_rate = 16000 # 16kHz
|
def get_prompt(self):
|
||||||
|
if np.isnan(self.pct):
|
||||||
|
bar = ""
|
||||||
|
else:
|
||||||
|
bar = "█" * int(self.pct * 10)
|
||||||
|
|
||||||
with sf.SoundFile(filename, mode="x", samplerate=sample_rate, channels=1) as file:
|
dur = time.time() - self.start_time
|
||||||
with sd.InputStream(samplerate=sample_rate, channels=1, callback=callback):
|
return f"Recording, press ENTER when done... {dur:.1f}sec {bar}"
|
||||||
input("Recording... Press ENTER when done speaking...")
|
|
||||||
|
|
||||||
while not q.empty():
|
def record_and_transcribe(self):
|
||||||
file.write(q.get())
|
self.q = queue.Queue()
|
||||||
|
|
||||||
with open(filename, "rb") as fh:
|
filename = tempfile.mktemp(suffix=".wav")
|
||||||
transcript = openai.Audio.transcribe("whisper-1", fh)
|
|
||||||
|
|
||||||
text = transcript["text"]
|
sample_rate = 16000 # 16kHz
|
||||||
return text
|
|
||||||
|
self.start_time = time.time()
|
||||||
|
|
||||||
|
with sf.SoundFile(filename, mode="x", samplerate=sample_rate, channels=1) as file:
|
||||||
|
with sd.InputStream(samplerate=sample_rate, channels=1, callback=self.callback):
|
||||||
|
prompt(self.get_prompt, refresh_interval=0.1)
|
||||||
|
|
||||||
|
while not self.q.empty():
|
||||||
|
file.write(self.q.get())
|
||||||
|
|
||||||
|
with open(filename, "rb") as fh:
|
||||||
|
transcript = openai.Audio.transcribe("whisper-1", fh)
|
||||||
|
|
||||||
|
text = transcript["text"]
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
api_key = os.getenv("OPENAI_API_KEY")
|
api_key = os.getenv("OPENAI_API_KEY")
|
||||||
if not api_key:
|
if not api_key:
|
||||||
raise ValueError("Please set the OPENAI_API_KEY environment variable.")
|
raise ValueError("Please set the OPENAI_API_KEY environment variable.")
|
||||||
print(record_and_transcribe())
|
print(Voice().record_and_transcribe())
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue