diff --git a/HISTORY.md b/HISTORY.md index 628caebaa..bacf233c3 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,7 @@ - [Only git commit dirty files that GPT tries to edit](https://github.com/paul-gauthier/aider/issues/200#issuecomment-1682750798) - Send chat history as prompt/context for Whisper voice transcription - Added `--voice-language` switch to constrain `/voice` to transcribe to a specific language +- Late-bind importing `sounddevice`, as it was slowing down aider startup - Improved --foo/--no-foo switch handling for command line and yml config settings ### v0.12.0 diff --git a/aider/commands.py b/aider/commands.py index 6047e9438..c05bd1204 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -14,6 +14,8 @@ from .dump import dump # noqa: F401 class Commands: + voice = None + def __init__(self, io, coder, voice_language=None): self.io = io self.coder = coder @@ -441,11 +443,13 @@ class Commands: def cmd_voice(self, args): "Record and transcribe voice input" - v = voice.Voice() - if not v.is_audio_available(): - self.io.tool_error("Unable to import `sounddevice`, is portaudio installed?") - return + if not self.voice: + try: + self.voice = voice.Voice() + except voice.SoundDeviceError: + self.io.tool_error("Unable to import `sounddevice`, is portaudio installed?") + return history_iter = self.io.get_input_history() @@ -464,7 +468,7 @@ class Commands: history.reverse() history = "\n".join(history) - text = v.record_and_transcribe(history, language=self.voice_language) + text = self.voice.record_and_transcribe(history, language=self.voice_language) if text: self.io.add_to_input_history(text) print() diff --git a/aider/voice.py b/aider/voice.py index 98a771a32..78f94f4ae 100644 --- a/aider/voice.py +++ b/aider/voice.py @@ -5,18 +5,16 @@ import time import numpy as np import openai +import soundfile as sf from prompt_toolkit.shortcuts import prompt -try: - import sounddevice as sd -except OSError: - sd = None - -import soundfile as sf - from .dump import dump # noqa: F401 +class SoundDeviceError(Exception): + pass + + class Voice: max_rms = 0 min_rms = 1e5 @@ -24,8 +22,14 @@ class Voice: threshold = 0.15 - def is_audio_available(self): - return sd is not None + def __init__(self): + try: + print("Initializing sound device...") + import sounddevice as sd + + self.sd = sd + except OSError: + raise SoundDeviceError def callback(self, indata, frames, time, status): """This is called (from a separate thread) for each audio block.""" @@ -69,7 +73,7 @@ class Voice: self.start_time = time.time() - with sd.InputStream(samplerate=sample_rate, channels=1, callback=self.callback): + with self.sd.InputStream(samplerate=sample_rate, channels=1, callback=self.callback): prompt(self.get_prompt, refresh_interval=0.1) with sf.SoundFile(filename, mode="x", samplerate=sample_rate, channels=1) as file: