feat: Add OpenAI TTS audio generation and playback for recordings

This commit is contained in:
Paul Gauthier (aider) 2025-03-14 18:47:06 -07:00
parent 874df40303
commit 278f748c1c
3 changed files with 173 additions and 20 deletions

View file

@ -42,7 +42,7 @@ document.addEventListener('DOMContentLoaded', function() {
// Also trigger toast and speech
showToast(message);
speakText(message);
speakText(message, timeInSeconds);
// Highlight this timestamp
highlightTimestamp(timeInSeconds);
@ -70,7 +70,7 @@ document.addEventListener('DOMContentLoaded', function() {
// Also trigger toast and speech
showToast(message);
speakText(message);
speakText(message, timeInSeconds);
// Highlight this timestamp
highlightTimestamp(timeInSeconds);
@ -180,23 +180,40 @@ document.addEventListener('DOMContentLoaded', function() {
}, 3000);
}
// Function to speak text using the Web Speech API
function speakText(text) {
// Check if speech synthesis is supported
if ('speechSynthesis' in window) {
// Create a new speech synthesis utterance
const utterance = new SpeechSynthesisUtterance(text);
// Optional: Configure voice properties
utterance.rate = 1.0; // Speech rate (0.1 to 10)
utterance.pitch = 1.0; // Speech pitch (0 to 2)
utterance.volume = 1.0; // Speech volume (0 to 1)
// Speak the text
window.speechSynthesis.speak(utterance);
} else {
console.warn('Speech synthesis not supported in this browser');
}
// Function to play pre-generated TTS audio files
function speakText(text, timeInSeconds) {
// Format time for filename (MM-SS)
const minutes = Math.floor(timeInSeconds / 60);
const seconds = timeInSeconds % 60;
const formattedTime = `${minutes.toString().padStart(2, '0')}-${seconds.toString().padStart(2, '0')}`;
// Get recording_id from the page or use default from the URL
const recordingId = typeof recording_id !== 'undefined' ? recording_id :
window.location.pathname.split('/').pop().replace('.html', '');
// Construct audio file path
const audioPath = `/assets/audio/${recordingId}/${formattedTime}.mp3`;
// Create and play audio
const audio = new Audio(audioPath);
// Error handling with fallback to browser TTS
audio.onerror = () => {
console.warn(`Failed to load audio: ${audioPath}`);
// Fallback to browser TTS
if ('speechSynthesis' in window) {
const utterance = new SpeechSynthesisUtterance(text);
utterance.rate = 1.0;
utterance.pitch = 1.0;
utterance.volume = 1.0;
window.speechSynthesis.speak(utterance);
}
};
// Play the audio
audio.play().catch(e => {
console.warn(`Error playing audio: ${e.message}`);
});
}
// Function to highlight the active timestamp in the transcript
@ -243,7 +260,7 @@ document.addEventListener('DOMContentLoaded', function() {
console.log(`marker! ${index} - ${time} - ${label}`);
// Speak the marker label and show toast
speakText(label);
speakText(label, time);
showToast(label);
// Highlight the corresponding timestamp in the transcript

View file

@ -7,6 +7,7 @@ layout: minimal
# Don't /drop read-only files added at launch
<script>
const recording_id = "dont-drop-original-read-files";
const recording_url = "https://gist.githubusercontent.com/paul-gauthier/c2e7b2751925fb7bb47036cdd37ec40d/raw/08e62ab539e2b5d4b52c15c31d9a0d241377c17c/707583.cast";
</script>