mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-31 17:55:01 +00:00
Submission : Speech transcription with the web audio API in the browser
This commit is contained in:
parent
3daf7d4df3
commit
e2dc7f6332
3 changed files with 203 additions and 0 deletions
10
aider/gui.py
10
aider/gui.py
|
@ -9,6 +9,7 @@ import streamlit as st
|
|||
from aider import urls
|
||||
from aider.coders import Coder
|
||||
from aider.dump import dump # noqa: F401
|
||||
from aider.gui_speech_to_text import SpeechToText
|
||||
from aider.io import InputOutput
|
||||
from aider.main import main as cli_main
|
||||
from aider.scrape import Scraper, has_playwright
|
||||
|
@ -153,6 +154,7 @@ class GUI:
|
|||
|
||||
# self.do_recommended_actions()
|
||||
self.do_add_to_chat()
|
||||
self.do_speech_to_text()
|
||||
self.do_recent_msgs()
|
||||
self.do_clear_chat_history()
|
||||
# st.container(height=150, border=False)
|
||||
|
@ -211,6 +213,14 @@ class GUI:
|
|||
with st.popover("Add a web page to the chat"):
|
||||
self.do_web()
|
||||
|
||||
def do_speech_to_text(self):
|
||||
# Initialize the speech-to-text component if not already done
|
||||
if not hasattr(self, "speech_to_text"):
|
||||
self.speech_to_text = SpeechToText()
|
||||
|
||||
# Render the speech-to-text component
|
||||
self.speech_to_text.render()
|
||||
|
||||
def do_add_image(self):
|
||||
with st.popover("Add image"):
|
||||
st.markdown("Hello World 👋")
|
||||
|
|
156
aider/gui_speech_to_text.js
Normal file
156
aider/gui_speech_to_text.js
Normal file
|
@ -0,0 +1,156 @@
|
|||
(function() {
|
||||
// Generate a unique ID for this component instance
|
||||
const compId = 'st-speech-to-text-' + Math.random().toString(36).substring(2, 9);
|
||||
|
||||
// Find the container element
|
||||
const container = document.getElementById('speech-to-text-container');
|
||||
if (!container) {
|
||||
console.error('Could not find speech-to-text-container');
|
||||
return;
|
||||
}
|
||||
|
||||
// Style the container
|
||||
container.style.display = 'flex';
|
||||
container.style.alignItems = 'center';
|
||||
container.style.padding = '5px';
|
||||
container.style.justifyContent = 'space-between';
|
||||
|
||||
// Create LED indicator
|
||||
const led = document.createElement('div');
|
||||
led.id = 'led-' + compId;
|
||||
led.style.width = '12px';
|
||||
led.style.height = '12px';
|
||||
led.style.borderRadius = '50%';
|
||||
led.style.backgroundColor = 'gray';
|
||||
led.style.marginRight = '10px';
|
||||
|
||||
// Create button
|
||||
const button = document.createElement('button');
|
||||
button.id = 'button-' + compId;
|
||||
button.textContent = 'Voice Input';
|
||||
button.style.padding = '4px 8px';
|
||||
|
||||
// Add elements to container
|
||||
container.appendChild(led);
|
||||
container.appendChild(button);
|
||||
|
||||
// Check if browser supports the Web Speech API
|
||||
if (!('webkitSpeechRecognition' in window) && !('SpeechRecognition' in window)) {
|
||||
button.disabled = true;
|
||||
button.textContent = 'Not supported';
|
||||
return;
|
||||
}
|
||||
|
||||
// Function to populate the chat input
|
||||
function populateChatInput(text) {
|
||||
const parentDoc = window.parent.document;
|
||||
let chatInput = parentDoc.querySelector('textarea[data-testid="stChatInputTextArea"]');
|
||||
|
||||
if (!chatInput) {
|
||||
console.error("Could not find chat input textarea");
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
// Try to access React internals
|
||||
const reactProps = Object.keys(chatInput).find(key => key.startsWith('__reactProps$'));
|
||||
|
||||
if (reactProps && chatInput[reactProps] && chatInput[reactProps].onChange) {
|
||||
// If we can access React props, use the onChange handler directly
|
||||
|
||||
// Append to the existing value
|
||||
chatInput.value = chatInput.value + ' ' + text;
|
||||
|
||||
// Create a synthetic event that React's onChange will accept
|
||||
const syntheticEvent = {
|
||||
target: chatInput,
|
||||
currentTarget: chatInput,
|
||||
preventDefault: () => {},
|
||||
stopPropagation: () => {},
|
||||
persist: () => {},
|
||||
isDefaultPrevented: () => false,
|
||||
isPropagationStopped: () => false,
|
||||
bubbles: true,
|
||||
cancelable: true,
|
||||
nativeEvent: new Event('input', { bubbles: true })
|
||||
};
|
||||
|
||||
// Call React's onChange handler
|
||||
chatInput[reactProps].onChange(syntheticEvent);
|
||||
} else {
|
||||
// Fallback to standard DOM approach with multiple event types
|
||||
|
||||
// Focus first
|
||||
chatInput.focus();
|
||||
|
||||
// Append to the existing value
|
||||
chatInput.value = chatInput.value + text;
|
||||
|
||||
// Dispatch multiple event types to ensure detection
|
||||
['input', 'change', 'blur', 'keydown', 'keyup'].forEach(eventType => {
|
||||
const event = new Event(eventType, { bubbles: true, cancelable: true });
|
||||
chatInput.dispatchEvent(event);
|
||||
});
|
||||
|
||||
// For Streamlit specifically, try to trigger any MutationObservers or polling
|
||||
// that might be watching for changes
|
||||
setTimeout(() => {
|
||||
chatInput.dispatchEvent(new Event('change', { bubbles: true }));
|
||||
chatInput.dispatchEvent(new Event('blur', { bubbles: true }));
|
||||
}, 100);
|
||||
}
|
||||
|
||||
return true;
|
||||
} catch (err) {
|
||||
console.error("Error setting chat input value:", err);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize speech recognition
|
||||
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
|
||||
const recognition = new SpeechRecognition();
|
||||
|
||||
recognition.continuous = false;
|
||||
recognition.interimResults = false;
|
||||
recognition.lang = 'en-US';
|
||||
|
||||
// Setup button click handler
|
||||
button.addEventListener('click', function() {
|
||||
led.style.backgroundColor = 'green';
|
||||
recognition.start();
|
||||
});
|
||||
|
||||
recognition.onresult = function(event) {
|
||||
const transcript = event.results[0][0].transcript;
|
||||
led.style.backgroundColor = 'gray';
|
||||
|
||||
// Try to populate the chat input directly
|
||||
const success = populateChatInput(transcript);
|
||||
|
||||
if (!success) {
|
||||
console.error('populateChatInput failed');
|
||||
|
||||
// Add a debug display with the transcribed text
|
||||
const debugContainer = document.createElement('div');
|
||||
debugContainer.style.padding = '10px';
|
||||
debugContainer.style.marginTop = '10px';
|
||||
debugContainer.style.backgroundColor = '#ffeeee';
|
||||
debugContainer.style.border = '1px solid #ffcccc';
|
||||
debugContainer.innerHTML = `
|
||||
<div><strong>Debug: Transcribed text</strong></div>
|
||||
<div><textarea style="width: 100%; height: 80px;">${transcript}</textarea></div>
|
||||
`;
|
||||
container.appendChild(debugContainer);
|
||||
}
|
||||
};
|
||||
|
||||
recognition.onerror = function(event) {
|
||||
console.error('Speech recognition error', event.error);
|
||||
led.style.backgroundColor = 'gray';
|
||||
};
|
||||
|
||||
recognition.onend = function() {
|
||||
led.style.backgroundColor = 'gray';
|
||||
};
|
||||
})();
|
37
aider/gui_speech_to_text.py
Normal file
37
aider/gui_speech_to_text.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
import base64
|
||||
import os
|
||||
|
||||
import streamlit as st
|
||||
import streamlit.components.v1 as components
|
||||
|
||||
|
||||
class SpeechToText:
|
||||
"""Class to handle speech-to-text functionality in the GUI"""
|
||||
|
||||
def render(self):
|
||||
"""Render the speech-to-text component with LED indicator"""
|
||||
self._js_dir = os.path.dirname(__file__)
|
||||
|
||||
# Create JS file path
|
||||
js_path = os.path.join(self._js_dir, "gui_speech_to_text.js")
|
||||
if not os.path.exists(js_path):
|
||||
st.error(f"JavaScript file not found: {js_path}")
|
||||
return
|
||||
|
||||
# Read the JS file for data URL
|
||||
with open(js_path, "r") as f:
|
||||
js_content = f.read()
|
||||
|
||||
# Create data URL for the JS file
|
||||
js_b64 = base64.b64encode(js_content.encode("utf-8")).decode("utf-8")
|
||||
js_data_url = f"data:text/javascript;base64,{js_b64}"
|
||||
|
||||
# Create simple HTML component with a container for the JS to populate
|
||||
components.html(
|
||||
f"""
|
||||
<div id="speech-to-text-container"></div>
|
||||
<!-- Load JS file via data URL since direct src paths don't work in Streamlit iframe -->
|
||||
<script src="{js_data_url}"></script>
|
||||
""",
|
||||
height=50,
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue