From 23579d4741609a3273bc38a76463367319f4d653 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Mon, 14 Aug 2023 14:11:03 -0300 Subject: [PATCH] Refactor voice recording and transcription to allow for specifying the language. --- aider/commands.py | 9 +++++++-- aider/voice.py | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/aider/commands.py b/aider/commands.py index b562ea4e5..f35eb5d49 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -14,9 +14,14 @@ from .dump import dump # noqa: F401 class Commands: - def __init__(self, io, coder): + def __init__(self, io, coder, voice_language=None): self.io = io self.coder = coder + + if voice_language == 'auto': + voice_language=None + + self.voice_language = voice_language self.tokenizer = tiktoken.encoding_for_model(coder.main_model.name) def is_command(self, inp): @@ -460,7 +465,7 @@ class Commands: history = "\n".join(history) dump(history) - text = v.record_and_transcribe(history) + text = v.record_and_transcribe(history, self.voice_language) if text: self.io.add_to_input_history(text) print() diff --git a/aider/voice.py b/aider/voice.py index d8b2e92cb..462693ab9 100644 --- a/aider/voice.py +++ b/aider/voice.py @@ -60,7 +60,7 @@ class Voice: except KeyboardInterrupt: return - def raw_record_and_transcribe(self, history): + def raw_record_and_transcribe(self, history, language): self.q = queue.Queue() filename = tempfile.mktemp(suffix=".wav") @@ -77,7 +77,7 @@ class Voice: file.write(self.q.get()) with open(filename, "rb") as fh: - transcript = openai.Audio.transcribe("whisper-1", fh, prompt=history) + transcript = openai.Audio.transcribe("whisper-1", fh, prompt=history, language=language) text = transcript["text"] return text