From 053751cb4374dcc3a18dddc1b91908a66d80e7a6 Mon Sep 17 00:00:00 2001
From: Paul Gauthier <aider@paulg.org>
Date: Thu, 10 Aug 2023 22:17:31 -0300
Subject: [PATCH] show loudness bar

---
 aider/commands.py |  6 +++--
 aider/voice.py    | 65 +++++++++++++++++++++++++++++++----------------
 2 files changed, 47 insertions(+), 24 deletions(-)

diff --git a/aider/commands.py b/aider/commands.py
index 62bdc8c03..622d1fbdf 100644
--- a/aider/commands.py
+++ b/aider/commands.py
@@ -436,11 +436,13 @@ class Commands:
 
     def cmd_voice(self, args):
         "Record and transcribe voice input"
-        if not voice.is_audio_available():
+        v = voice.Voice()
+
+        if not v.is_audio_available():
             self.io.tool_error("Unable to import `sounddevice`, is portaudio installed?")
             return
 
-        text = voice.record_and_transcribe()
+        text = v.record_and_transcribe()
         self.io.add_to_file_history(text)
         print()
         self.io.user_input(text, log_only=False)
diff --git a/aider/voice.py b/aider/voice.py
index e175dfeab..5f6086cf5 100644
--- a/aider/voice.py
+++ b/aider/voice.py
@@ -1,8 +1,11 @@
 import os
 import queue
 import tempfile
+import time
 
+import numpy as np
 import openai
+from prompt_toolkit.shortcuts import prompt
 
 try:
     import sounddevice as sd
@@ -14,41 +17,59 @@ import soundfile as sf
 from .dump import dump  # noqa: F401
 
 
-def is_audio_available():
-    return sd is not None
+class Voice:
+    max_rms = 0
+    min_rms = 1e5
+    pct = 0
 
+    def is_audio_available(self):
+        return sd is not None
 
-def record_and_transcribe():
-    q = queue.Queue()
-
-    import numpy as np
-
-    def callback(indata, frames, time, status):
+    def callback(self, indata, frames, time, status):
         """This is called (from a separate thread) for each audio block."""
-        q.put(indata.copy())
+        self.q.put(indata.copy())
         rms = np.sqrt(np.mean(indata**2))
-        dump(rms)
+        self.max_rms = max(self.max_rms, rms)
+        self.min_rms = min(self.min_rms, rms)
 
-    filename = tempfile.mktemp(suffix=".wav")
+        rng = self.max_rms - self.min_rms
+        if rng > 0.001:
+            self.pct = (rms - self.min_rms) / rng
 
-    sample_rate = 16000  # 16kHz
+    def get_prompt(self):
+        if np.isnan(self.pct):
+            bar = ""
+        else:
+            bar = "█" * int(self.pct * 10)
 
-    with sf.SoundFile(filename, mode="x", samplerate=sample_rate, channels=1) as file:
-        with sd.InputStream(samplerate=sample_rate, channels=1, callback=callback):
-            input("Recording... Press ENTER when done speaking...")
+        dur = time.time() - self.start_time
+        return f"Recording, press ENTER when done... {dur:.1f}sec {bar}"
 
-        while not q.empty():
-            file.write(q.get())
+    def record_and_transcribe(self):
+        self.q = queue.Queue()
 
-    with open(filename, "rb") as fh:
-        transcript = openai.Audio.transcribe("whisper-1", fh)
+        filename = tempfile.mktemp(suffix=".wav")
 
-    text = transcript["text"]
-    return text
+        sample_rate = 16000  # 16kHz
+
+        self.start_time = time.time()
+
+        with sf.SoundFile(filename, mode="x", samplerate=sample_rate, channels=1) as file:
+            with sd.InputStream(samplerate=sample_rate, channels=1, callback=self.callback):
+                prompt(self.get_prompt, refresh_interval=0.1)
+
+            while not self.q.empty():
+                file.write(self.q.get())
+
+        with open(filename, "rb") as fh:
+            transcript = openai.Audio.transcribe("whisper-1", fh)
+
+        text = transcript["text"]
+        return text
 
 
 if __name__ == "__main__":
     api_key = os.getenv("OPENAI_API_KEY")
     if not api_key:
         raise ValueError("Please set the OPENAI_API_KEY environment variable.")
-    print(record_and_transcribe())
+    print(Voice().record_and_transcribe())