show loudness bar

This commit is contained in:
Paul Gauthier 2023-08-10 22:17:31 -03:00
parent 4dc704272e
commit 053751cb43
2 changed files with 47 additions and 24 deletions

View file

@ -436,11 +436,13 @@ class Commands:
def cmd_voice(self, args):
"Record and transcribe voice input"
if not voice.is_audio_available():
v = voice.Voice()
if not v.is_audio_available():
self.io.tool_error("Unable to import `sounddevice`, is portaudio installed?")
return
text = voice.record_and_transcribe()
text = v.record_and_transcribe()
self.io.add_to_file_history(text)
print()
self.io.user_input(text, log_only=False)

View file

@ -1,8 +1,11 @@
import os
import queue
import tempfile
import time
import numpy as np
import openai
from prompt_toolkit.shortcuts import prompt
try:
import sounddevice as sd
@ -14,31 +17,49 @@ import soundfile as sf
from .dump import dump # noqa: F401
def is_audio_available():
class Voice:
max_rms = 0
min_rms = 1e5
pct = 0
def is_audio_available(self):
return sd is not None
def record_and_transcribe():
q = queue.Queue()
import numpy as np
def callback(indata, frames, time, status):
def callback(self, indata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
q.put(indata.copy())
self.q.put(indata.copy())
rms = np.sqrt(np.mean(indata**2))
dump(rms)
self.max_rms = max(self.max_rms, rms)
self.min_rms = min(self.min_rms, rms)
rng = self.max_rms - self.min_rms
if rng > 0.001:
self.pct = (rms - self.min_rms) / rng
def get_prompt(self):
if np.isnan(self.pct):
bar = ""
else:
bar = "" * int(self.pct * 10)
dur = time.time() - self.start_time
return f"Recording, press ENTER when done... {dur:.1f}sec {bar}"
def record_and_transcribe(self):
self.q = queue.Queue()
filename = tempfile.mktemp(suffix=".wav")
sample_rate = 16000 # 16kHz
with sf.SoundFile(filename, mode="x", samplerate=sample_rate, channels=1) as file:
with sd.InputStream(samplerate=sample_rate, channels=1, callback=callback):
input("Recording... Press ENTER when done speaking...")
self.start_time = time.time()
while not q.empty():
file.write(q.get())
with sf.SoundFile(filename, mode="x", samplerate=sample_rate, channels=1) as file:
with sd.InputStream(samplerate=sample_rate, channels=1, callback=self.callback):
prompt(self.get_prompt, refresh_interval=0.1)
while not self.q.empty():
file.write(self.q.get())
with open(filename, "rb") as fh:
transcript = openai.Audio.transcribe("whisper-1", fh)
@ -51,4 +72,4 @@ if __name__ == "__main__":
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("Please set the OPENAI_API_KEY environment variable.")
print(record_and_transcribe())
print(Voice().record_and_transcribe())