Refactor voice recording and transcription to allow for specifying the language.

2025-05-28 00:05:01 +00:00 · 2023-08-14 14:11:03 -03:00 · 2023-08-14 14:11:03 -03:00 · 23579d4741
commit 23579d4741
parent f4e1c1010b
2 changed files with 9 additions and 4 deletions
--- a/aider/commands.py
+++ b/aider/commands.py
@ -14,9 +14,14 @@ from .dump import dump  # noqa: F401
 class Commands:
-    def __init__(self, io, coder):
+    def __init__(self, io, coder, voice_language=None):
        self.io = io
        self.coder = coder
        if voice_language == 'auto':
            voice_language=None
        self.voice_language = voice_language
        self.tokenizer = tiktoken.encoding_for_model(coder.main_model.name)
    def is_command(self, inp):
@ -460,7 +465,7 @@ class Commands:
        history = "\n".join(history)
        dump(history)
-        text = v.record_and_transcribe(history)
+        text = v.record_and_transcribe(history, self.voice_language)
        if text:
            self.io.add_to_input_history(text)
            print()
--- a/aider/voice.py
+++ b/aider/voice.py
@ -60,7 +60,7 @@ class Voice:
        except KeyboardInterrupt:
            return
-    def raw_record_and_transcribe(self, history):
+    def raw_record_and_transcribe(self, history, language):
        self.q = queue.Queue()
        filename = tempfile.mktemp(suffix=".wav")
@ -77,7 +77,7 @@ class Voice:
                file.write(self.q.get())
        with open(filename, "rb") as fh:
-            transcript = openai.Audio.transcribe("whisper-1", fh, prompt=history)
+            transcript = openai.Audio.transcribe("whisper-1", fh, prompt=history, language=language)
        text = transcript["text"]
        return text