Late-bind importing sounddevice, as it was slowing down aider startup

This commit is contained in:
Paul Gauthier 2023-08-19 15:18:46 -07:00
parent 4846317ef1
commit 963bc83a9c
3 changed files with 24 additions and 15 deletions

View file

@ -5,6 +5,7 @@
- [Only git commit dirty files that GPT tries to edit](https://github.com/paul-gauthier/aider/issues/200#issuecomment-1682750798) - [Only git commit dirty files that GPT tries to edit](https://github.com/paul-gauthier/aider/issues/200#issuecomment-1682750798)
- Send chat history as prompt/context for Whisper voice transcription - Send chat history as prompt/context for Whisper voice transcription
- Added `--voice-language` switch to constrain `/voice` to transcribe to a specific language - Added `--voice-language` switch to constrain `/voice` to transcribe to a specific language
- Late-bind importing `sounddevice`, as it was slowing down aider startup
- Improved --foo/--no-foo switch handling for command line and yml config settings - Improved --foo/--no-foo switch handling for command line and yml config settings
### v0.12.0 ### v0.12.0

View file

@ -14,6 +14,8 @@ from .dump import dump # noqa: F401
class Commands: class Commands:
voice = None
def __init__(self, io, coder, voice_language=None): def __init__(self, io, coder, voice_language=None):
self.io = io self.io = io
self.coder = coder self.coder = coder
@ -441,11 +443,13 @@ class Commands:
def cmd_voice(self, args): def cmd_voice(self, args):
"Record and transcribe voice input" "Record and transcribe voice input"
v = voice.Voice()
if not v.is_audio_available(): if not self.voice:
self.io.tool_error("Unable to import `sounddevice`, is portaudio installed?") try:
return self.voice = voice.Voice()
except voice.SoundDeviceError:
self.io.tool_error("Unable to import `sounddevice`, is portaudio installed?")
return
history_iter = self.io.get_input_history() history_iter = self.io.get_input_history()
@ -464,7 +468,7 @@ class Commands:
history.reverse() history.reverse()
history = "\n".join(history) history = "\n".join(history)
text = v.record_and_transcribe(history, language=self.voice_language) text = self.voice.record_and_transcribe(history, language=self.voice_language)
if text: if text:
self.io.add_to_input_history(text) self.io.add_to_input_history(text)
print() print()

View file

@ -5,18 +5,16 @@ import time
import numpy as np import numpy as np
import openai import openai
import soundfile as sf
from prompt_toolkit.shortcuts import prompt from prompt_toolkit.shortcuts import prompt
try:
import sounddevice as sd
except OSError:
sd = None
import soundfile as sf
from .dump import dump # noqa: F401 from .dump import dump # noqa: F401
class SoundDeviceError(Exception):
pass
class Voice: class Voice:
max_rms = 0 max_rms = 0
min_rms = 1e5 min_rms = 1e5
@ -24,8 +22,14 @@ class Voice:
threshold = 0.15 threshold = 0.15
def is_audio_available(self): def __init__(self):
return sd is not None try:
print("Initializing sound device...")
import sounddevice as sd
self.sd = sd
except OSError:
raise SoundDeviceError
def callback(self, indata, frames, time, status): def callback(self, indata, frames, time, status):
"""This is called (from a separate thread) for each audio block.""" """This is called (from a separate thread) for each audio block."""
@ -69,7 +73,7 @@ class Voice:
self.start_time = time.time() self.start_time = time.time()
with sd.InputStream(samplerate=sample_rate, channels=1, callback=self.callback): with self.sd.InputStream(samplerate=sample_rate, channels=1, callback=self.callback):
prompt(self.get_prompt, refresh_interval=0.1) prompt(self.get_prompt, refresh_interval=0.1)
with sf.SoundFile(filename, mode="x", samplerate=sample_rate, channels=1) as file: with sf.SoundFile(filename, mode="x", samplerate=sample_rate, channels=1) as file: