mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-25 14:55:00 +00:00
Merge adeb0dd4dd
into 3caab85931
This commit is contained in:
commit
24d33d5b94
3 changed files with 243 additions and 0 deletions
10
aider/gui.py
10
aider/gui.py
|
@ -9,6 +9,7 @@ import streamlit as st
|
||||||
from aider import urls
|
from aider import urls
|
||||||
from aider.coders import Coder
|
from aider.coders import Coder
|
||||||
from aider.dump import dump # noqa: F401
|
from aider.dump import dump # noqa: F401
|
||||||
|
from aider.gui_speech_to_text import SpeechToText
|
||||||
from aider.io import InputOutput
|
from aider.io import InputOutput
|
||||||
from aider.main import main as cli_main
|
from aider.main import main as cli_main
|
||||||
from aider.scrape import Scraper, has_playwright
|
from aider.scrape import Scraper, has_playwright
|
||||||
|
@ -153,6 +154,7 @@ class GUI:
|
||||||
|
|
||||||
# self.do_recommended_actions()
|
# self.do_recommended_actions()
|
||||||
self.do_add_to_chat()
|
self.do_add_to_chat()
|
||||||
|
self.do_speech_to_text()
|
||||||
self.do_recent_msgs()
|
self.do_recent_msgs()
|
||||||
self.do_clear_chat_history()
|
self.do_clear_chat_history()
|
||||||
# st.container(height=150, border=False)
|
# st.container(height=150, border=False)
|
||||||
|
@ -211,6 +213,14 @@ class GUI:
|
||||||
with st.popover("Add a web page to the chat"):
|
with st.popover("Add a web page to the chat"):
|
||||||
self.do_web()
|
self.do_web()
|
||||||
|
|
||||||
|
def do_speech_to_text(self):
|
||||||
|
# Initialize the speech-to-text component if not already done
|
||||||
|
if not hasattr(self, "speech_to_text"):
|
||||||
|
self.speech_to_text = SpeechToText()
|
||||||
|
|
||||||
|
# Render the speech-to-text component
|
||||||
|
self.speech_to_text.render()
|
||||||
|
|
||||||
def do_add_image(self):
|
def do_add_image(self):
|
||||||
with st.popover("Add image"):
|
with st.popover("Add image"):
|
||||||
st.markdown("Hello World 👋")
|
st.markdown("Hello World 👋")
|
||||||
|
|
196
aider/gui_speech_to_text.js
Normal file
196
aider/gui_speech_to_text.js
Normal file
|
@ -0,0 +1,196 @@
|
||||||
|
(function() {
|
||||||
|
// Generate a unique ID for this component instance
|
||||||
|
const compId = 'st-speech-to-text-' + Math.random().toString(36).substring(2, 9);
|
||||||
|
|
||||||
|
// Find the container element
|
||||||
|
const container = document.getElementById('speech-to-text-container');
|
||||||
|
if (!container) {
|
||||||
|
console.error('Could not find speech-to-text-container');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Style the container
|
||||||
|
container.style.display = 'flex';
|
||||||
|
container.style.alignItems = 'center';
|
||||||
|
container.style.padding = '5px';
|
||||||
|
container.style.justifyContent = 'space-between';
|
||||||
|
|
||||||
|
// Create LED indicator
|
||||||
|
const led = document.createElement('div');
|
||||||
|
led.id = 'led-' + compId;
|
||||||
|
led.style.width = '12px';
|
||||||
|
led.style.height = '12px';
|
||||||
|
led.style.borderRadius = '50%';
|
||||||
|
led.style.backgroundColor = 'gray';
|
||||||
|
led.style.marginRight = '10px';
|
||||||
|
|
||||||
|
// Create button
|
||||||
|
const button = document.createElement('button');
|
||||||
|
button.id = 'button-' + compId;
|
||||||
|
button.textContent = 'Voice Input';
|
||||||
|
button.style.padding = '4px 8px';
|
||||||
|
|
||||||
|
// Create stop button (initially hidden)
|
||||||
|
const stopButton = document.createElement('button');
|
||||||
|
stopButton.id = 'stop-button-' + compId;
|
||||||
|
stopButton.textContent = 'Stop';
|
||||||
|
stopButton.style.padding = '4px 8px';
|
||||||
|
stopButton.style.marginLeft = '5px';
|
||||||
|
stopButton.style.display = 'none';
|
||||||
|
|
||||||
|
// Create checkbox and label container
|
||||||
|
const checkContainer = document.createElement('div');
|
||||||
|
checkContainer.style.display = 'flex';
|
||||||
|
checkContainer.style.alignItems = 'center';
|
||||||
|
checkContainer.style.marginLeft = '10px';
|
||||||
|
|
||||||
|
// Create auto-transcribe checkbox
|
||||||
|
const autoTranscribe = document.createElement('input');
|
||||||
|
autoTranscribe.id = 'auto-transcribe-' + compId;
|
||||||
|
autoTranscribe.type = 'checkbox';
|
||||||
|
autoTranscribe.style.marginRight = '5px';
|
||||||
|
|
||||||
|
// Create label for checkbox
|
||||||
|
const label = document.createElement('label');
|
||||||
|
label.htmlFor = autoTranscribe.id;
|
||||||
|
label.textContent = 'Auto Transcribe';
|
||||||
|
label.style.fontSize = '14px';
|
||||||
|
label.style.color = 'white';
|
||||||
|
|
||||||
|
// Assemble components
|
||||||
|
checkContainer.appendChild(autoTranscribe);
|
||||||
|
checkContainer.appendChild(label);
|
||||||
|
|
||||||
|
// Add elements to container
|
||||||
|
container.appendChild(led);
|
||||||
|
container.appendChild(button);
|
||||||
|
container.appendChild(stopButton);
|
||||||
|
container.appendChild(checkContainer);
|
||||||
|
|
||||||
|
// Check if browser supports the Web Speech API
|
||||||
|
if (!('webkitSpeechRecognition' in window) && !('SpeechRecognition' in window)) {
|
||||||
|
button.disabled = true;
|
||||||
|
button.textContent = 'Not supported';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function to populate the chat input
|
||||||
|
function populateChatInput(text) {
|
||||||
|
const parentDoc = window.parent.document;
|
||||||
|
let chatInput = parentDoc.querySelector('textarea[data-testid="stChatInputTextArea"]');
|
||||||
|
const reactProps = Object.keys(chatInput).find(key => key.startsWith('__reactProps$'));
|
||||||
|
const syntheticEvent = { target: chatInput, currentTarget: chatInput,
|
||||||
|
preventDefault: () => {}, nativeEvent: new Event('input', { bubbles: true })};
|
||||||
|
|
||||||
|
if (!chatInput || !reactProps) {
|
||||||
|
if (!chatInput)
|
||||||
|
console.error("Could not find chat input textarea");
|
||||||
|
if (!reactProps)
|
||||||
|
console.error("Error setting chat input value:", err);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append to the existing value
|
||||||
|
chatInput.value = chatInput.value + ' ' + text;
|
||||||
|
// Call React's onChange handler
|
||||||
|
chatInput[reactProps].onChange(syntheticEvent);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize speech recognition
|
||||||
|
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
|
||||||
|
const recognition = new SpeechRecognition();
|
||||||
|
let isListening = false;
|
||||||
|
|
||||||
|
recognition.continuous = false;
|
||||||
|
recognition.interimResults = false;
|
||||||
|
// Use browser's language or fall back to 'en-US'
|
||||||
|
recognition.lang = navigator.language || 'en-US';
|
||||||
|
console.log('Speech recognition language:', recognition.lang);
|
||||||
|
|
||||||
|
// Setup button click handler
|
||||||
|
button.addEventListener('click', function() {
|
||||||
|
if (isListening) return;
|
||||||
|
|
||||||
|
isListening = true;
|
||||||
|
|
||||||
|
// Set initial LED color based on auto-transcribe mode
|
||||||
|
if (autoTranscribe.checked) {
|
||||||
|
led.style.backgroundColor = 'red'; // Red when waiting for voice
|
||||||
|
stopButton.style.display = 'inline-block';
|
||||||
|
recognition.continuous = true;
|
||||||
|
} else {
|
||||||
|
led.style.backgroundColor = 'lime';
|
||||||
|
}
|
||||||
|
|
||||||
|
recognition.start();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Setup stop button click handler
|
||||||
|
stopButton.addEventListener('click', function() {
|
||||||
|
if (isListening) {
|
||||||
|
recognition.stop();
|
||||||
|
stopButton.style.display = 'none';
|
||||||
|
isListening = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Handle speech detection
|
||||||
|
recognition.onspeechstart = function() {
|
||||||
|
console.log('Speech detected');
|
||||||
|
if (autoTranscribe.checked) {
|
||||||
|
led.style.backgroundColor = 'lime'; // Lime green when voice is detected
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Handle speech end
|
||||||
|
recognition.onspeechend = function() {
|
||||||
|
console.log('Speech ended');
|
||||||
|
if (autoTranscribe.checked && isListening) {
|
||||||
|
led.style.backgroundColor = 'red'; // Red when waiting for voice
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Combined event handler function for speech recognition events
|
||||||
|
function handleSpeechEvent(eventType, event) {
|
||||||
|
if (eventType === 'result') {
|
||||||
|
// Get the latest transcript
|
||||||
|
const resultIndex = event.resultIndex;
|
||||||
|
const transcript = event.results[resultIndex][0].transcript;
|
||||||
|
|
||||||
|
// Try to populate the chat input directly
|
||||||
|
const success = populateChatInput(transcript);
|
||||||
|
if (!success)
|
||||||
|
console.error('populateChatInput failed');
|
||||||
|
|
||||||
|
// If not in auto-transcribe mode, reset the LED
|
||||||
|
if (!autoTranscribe.checked) {
|
||||||
|
led.style.backgroundColor = 'gray';
|
||||||
|
}
|
||||||
|
// In auto-transcribe mode, we'll keep the LED color as is (lime while speaking)
|
||||||
|
// The LED will be set back to red in the speechend event
|
||||||
|
}
|
||||||
|
else if (eventType === 'error') {
|
||||||
|
console.error('Speech recognition error', event.error);
|
||||||
|
isListening = false;
|
||||||
|
stopButton.style.display = 'none';
|
||||||
|
led.style.backgroundColor = 'gray';
|
||||||
|
}
|
||||||
|
else if (eventType === 'end') {
|
||||||
|
// If auto transcribe is enabled and we're still supposed to be listening,
|
||||||
|
// restart recognition
|
||||||
|
if (autoTranscribe.checked && isListening) {
|
||||||
|
setTimeout(() => recognition.start(), 100);
|
||||||
|
} else {
|
||||||
|
isListening = false;
|
||||||
|
stopButton.style.display = 'none';
|
||||||
|
led.style.backgroundColor = 'gray';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set up event handlers using the combined function
|
||||||
|
recognition.onresult = function(event) { handleSpeechEvent('result', event); };
|
||||||
|
recognition.onerror = function(event) { handleSpeechEvent('error', event); };
|
||||||
|
recognition.onend = function() { handleSpeechEvent('end'); };
|
||||||
|
})();
|
37
aider/gui_speech_to_text.py
Normal file
37
aider/gui_speech_to_text.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
import base64
|
||||||
|
import os
|
||||||
|
|
||||||
|
import streamlit as st
|
||||||
|
import streamlit.components.v1 as components
|
||||||
|
|
||||||
|
|
||||||
|
class SpeechToText:
|
||||||
|
"""Class to handle speech-to-text functionality in the GUI"""
|
||||||
|
|
||||||
|
def render(self):
|
||||||
|
"""Render the speech-to-text component with LED indicator"""
|
||||||
|
self._js_dir = os.path.dirname(__file__)
|
||||||
|
|
||||||
|
# Create JS file path
|
||||||
|
js_path = os.path.join(self._js_dir, "gui_speech_to_text.js")
|
||||||
|
if not os.path.exists(js_path):
|
||||||
|
st.error(f"JavaScript file not found: {js_path}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Read the JS file for data URL
|
||||||
|
with open(js_path, "r") as f:
|
||||||
|
js_content = f.read()
|
||||||
|
|
||||||
|
# Create data URL for the JS file
|
||||||
|
js_b64 = base64.b64encode(js_content.encode("utf-8")).decode("utf-8")
|
||||||
|
js_data_url = f"data:text/javascript;base64,{js_b64}"
|
||||||
|
|
||||||
|
# Create simple HTML component with a container for the JS to populate
|
||||||
|
components.html(
|
||||||
|
f"""
|
||||||
|
<div id="speech-to-text-container"></div>
|
||||||
|
<!-- Load JS file via data URL since direct src paths don't work in Streamlit iframe -->
|
||||||
|
<script src="{js_data_url}"></script>
|
||||||
|
""",
|
||||||
|
height=50,
|
||||||
|
)
|
Loading…
Add table
Add a link
Reference in a new issue