feat: ability to select audio input device

This commit is contained in:
Philippe de Reynal 2024-11-30 11:24:34 +01:00
parent e4a1d6fe89
commit e11faadf39
6 changed files with 317 additions and 281 deletions

View file

@ -22,7 +22,7 @@ def default_env_file(git_root):
def get_parser(default_config_files, git_root): def get_parser(default_config_files, git_root):
parser = configargparse.ArgumentParser( parser = configargparse.ArgumentParser(
description="aider is AI pair programming in your terminal", description="Aider is AI pair programming in your terminal",
add_config_file_help=True, add_config_file_help=True,
default_config_files=default_config_files, default_config_files=default_config_files,
config_file_parser_class=configargparse.YAMLConfigFileParser, config_file_parser_class=configargparse.YAMLConfigFileParser,
@ -770,6 +770,12 @@ def get_parser(default_config_files, git_root):
default="en", default="en",
help="Specify the language for voice using ISO 639-1 code (default: auto)", help="Specify the language for voice using ISO 639-1 code (default: auto)",
) )
group.add_argument(
"--voice-input-device",
metavar="VOICE_INPUT_DEVICE",
default=None,
help="Specify the input device name for voice recording",
)
return parser return parser

View file

@ -1085,7 +1085,7 @@ class Commands:
self.io.tool_error("To use /voice you must provide an OpenAI API key.") self.io.tool_error("To use /voice you must provide an OpenAI API key.")
return return
try: try:
self.voice = voice.Voice(audio_format=self.args.voice_format) self.voice = voice.Voice(audio_format=self.args.voice_format, device_name=self.args.voice_input_device)
except voice.SoundDeviceError: except voice.SoundDeviceError:
self.io.tool_error( self.io.tool_error(
"Unable to import `sounddevice` and/or `soundfile`, is portaudio installed?" "Unable to import `sounddevice` and/or `soundfile`, is portaudio installed?"

View file

@ -34,7 +34,7 @@ class Voice:
threshold = 0.15 threshold = 0.15
def __init__(self, audio_format="wav"): def __init__(self, audio_format="wav", device_name=None):
if sf is None: if sf is None:
raise SoundDeviceError raise SoundDeviceError
try: try:
@ -42,6 +42,27 @@ class Voice:
import sounddevice as sd import sounddevice as sd
self.sd = sd self.sd = sd
devices = sd.query_devices()
if device_name:
# Find the device with matching name
device_id = None
for i, device in enumerate(devices):
if device_name in device["name"]:
device_id = i
break
if device_id is None:
available_inputs = [d["name"] for d in devices if d["max_input_channels"] > 0]
raise ValueError(f"Device '{device_name}' not found. Available input devices: {available_inputs}")
print(f"Using input device: {device_name} (ID: {device_id})")
self.device_id = device_id
else:
self.device_id = None
except (OSError, ModuleNotFoundError): except (OSError, ModuleNotFoundError):
raise SoundDeviceError raise SoundDeviceError
if audio_format not in ["wav", "mp3", "webm"]: if audio_format not in ["wav", "mp3", "webm"]:
@ -93,7 +114,7 @@ class Voice:
temp_wav = tempfile.mktemp(suffix=".wav") temp_wav = tempfile.mktemp(suffix=".wav")
try: try:
sample_rate = int(self.sd.query_devices(None, "input")["default_samplerate"]) sample_rate = int(self.sd.query_devices(self.device_id, "input")["default_samplerate"])
except (TypeError, ValueError): except (TypeError, ValueError):
sample_rate = 16000 # fallback to 16kHz if unable to query device sample_rate = 16000 # fallback to 16kHz if unable to query device
except self.sd.PortAudioError: except self.sd.PortAudioError:
@ -104,7 +125,7 @@ class Voice:
self.start_time = time.time() self.start_time = time.time()
try: try:
with self.sd.InputStream(samplerate=sample_rate, channels=1, callback=self.callback): with self.sd.InputStream(samplerate=sample_rate, channels=1, callback=self.callback, device=self.device_id):
prompt(self.get_prompt, refresh_interval=0.1) prompt(self.get_prompt, refresh_interval=0.1)
except self.sd.PortAudioError as err: except self.sd.PortAudioError as err:
raise SoundDeviceError(f"Error accessing audio input device: {err}") raise SoundDeviceError(f"Error accessing audio input device: {err}")

View file

@ -368,3 +368,6 @@
## Specify the language for voice using ISO 639-1 code (default: auto) ## Specify the language for voice using ISO 639-1 code (default: auto)
#AIDER_VOICE_LANGUAGE=en #AIDER_VOICE_LANGUAGE=en
## Specify the voice input device name (default: system default)
#AIDER_VOICE_INPUT_DEVICE="MacBook Pro Microphone"

View file

@ -410,7 +410,8 @@ cog.outl("```")
## Specify the language for voice using ISO 639-1 code (default: auto) ## Specify the language for voice using ISO 639-1 code (default: auto)
#AIDER_VOICE_LANGUAGE=en #AIDER_VOICE_LANGUAGE=en
## Specify the voice input device name (default: system default)
#AIDER_VOICE_INPUT_DEVICE="MacBook Pro Microphone"
``` ```
<!--[[[end]]]--> <!--[[[end]]]-->

View file

@ -701,4 +701,9 @@ Environment variable: `AIDER_VOICE_FORMAT`
Specify the language for voice using ISO 639-1 code (default: auto) Specify the language for voice using ISO 639-1 code (default: auto)
Default: en Default: en
Environment variable: `AIDER_VOICE_LANGUAGE` Environment variable: `AIDER_VOICE_LANGUAGE`
### `--voice-input-device VOICE_INPUT_DEVICE`
Specify the voice input device name used for recording (default: system default)
Default: system default
Environment variable: `VOICE_INPUT_DEVICE`
<!--[[[end]]]--> <!--[[[end]]]-->