diff --git a/HISTORY.md b/HISTORY.md
index 628caebaa..bacf233c3 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -5,6 +5,7 @@
 - [Only git commit dirty files that GPT tries to edit](https://github.com/paul-gauthier/aider/issues/200#issuecomment-1682750798)
 - Send chat history as prompt/context for Whisper voice transcription
 - Added `--voice-language` switch to constrain `/voice` to transcribe to a specific language
+- Late-bind importing `sounddevice`, as it was slowing down aider startup
 - Improved --foo/--no-foo switch handling for command line and yml config settings
 
 ### v0.12.0
diff --git a/aider/commands.py b/aider/commands.py
index 6047e9438..c05bd1204 100644
--- a/aider/commands.py
+++ b/aider/commands.py
@@ -14,6 +14,8 @@ from .dump import dump  # noqa: F401
 
 
 class Commands:
+    voice = None
+
     def __init__(self, io, coder, voice_language=None):
         self.io = io
         self.coder = coder
@@ -441,11 +443,13 @@ class Commands:
 
     def cmd_voice(self, args):
         "Record and transcribe voice input"
-        v = voice.Voice()
 
-        if not v.is_audio_available():
-            self.io.tool_error("Unable to import `sounddevice`, is portaudio installed?")
-            return
+        if not self.voice:
+            try:
+                self.voice = voice.Voice()
+            except voice.SoundDeviceError:
+                self.io.tool_error("Unable to import `sounddevice`, is portaudio installed?")
+                return
 
         history_iter = self.io.get_input_history()
 
@@ -464,7 +468,7 @@ class Commands:
         history.reverse()
         history = "\n".join(history)
 
-        text = v.record_and_transcribe(history, language=self.voice_language)
+        text = self.voice.record_and_transcribe(history, language=self.voice_language)
         if text:
             self.io.add_to_input_history(text)
             print()
diff --git a/aider/voice.py b/aider/voice.py
index 98a771a32..78f94f4ae 100644
--- a/aider/voice.py
+++ b/aider/voice.py
@@ -5,18 +5,16 @@ import time
 
 import numpy as np
 import openai
+import soundfile as sf
 from prompt_toolkit.shortcuts import prompt
 
-try:
-    import sounddevice as sd
-except OSError:
-    sd = None
-
-import soundfile as sf
-
 from .dump import dump  # noqa: F401
 
 
+class SoundDeviceError(Exception):
+    pass
+
+
 class Voice:
     max_rms = 0
     min_rms = 1e5
@@ -24,8 +22,14 @@ class Voice:
 
     threshold = 0.15
 
-    def is_audio_available(self):
-        return sd is not None
+    def __init__(self):
+        try:
+            print("Initializing sound device...")
+            import sounddevice as sd
+
+            self.sd = sd
+        except OSError:
+            raise SoundDeviceError
 
     def callback(self, indata, frames, time, status):
         """This is called (from a separate thread) for each audio block."""
@@ -69,7 +73,7 @@ class Voice:
 
         self.start_time = time.time()
 
-        with sd.InputStream(samplerate=sample_rate, channels=1, callback=self.callback):
+        with self.sd.InputStream(samplerate=sample_rate, channels=1, callback=self.callback):
             prompt(self.get_prompt, refresh_interval=0.1)
 
         with sf.SoundFile(filename, mode="x", samplerate=sample_rate, channels=1) as file: