mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-28 00:05:01 +00:00
feat: Add OpenAI TTS audio generation and playback for recordings
This commit is contained in:
parent
874df40303
commit
278f748c1c
3 changed files with 173 additions and 20 deletions
|
@ -42,7 +42,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||||
|
|
||||||
// Also trigger toast and speech
|
// Also trigger toast and speech
|
||||||
showToast(message);
|
showToast(message);
|
||||||
speakText(message);
|
speakText(message, timeInSeconds);
|
||||||
|
|
||||||
// Highlight this timestamp
|
// Highlight this timestamp
|
||||||
highlightTimestamp(timeInSeconds);
|
highlightTimestamp(timeInSeconds);
|
||||||
|
@ -70,7 +70,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||||
|
|
||||||
// Also trigger toast and speech
|
// Also trigger toast and speech
|
||||||
showToast(message);
|
showToast(message);
|
||||||
speakText(message);
|
speakText(message, timeInSeconds);
|
||||||
|
|
||||||
// Highlight this timestamp
|
// Highlight this timestamp
|
||||||
highlightTimestamp(timeInSeconds);
|
highlightTimestamp(timeInSeconds);
|
||||||
|
@ -180,23 +180,40 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||||
}, 3000);
|
}, 3000);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Function to speak text using the Web Speech API
|
// Function to play pre-generated TTS audio files
|
||||||
function speakText(text) {
|
function speakText(text, timeInSeconds) {
|
||||||
// Check if speech synthesis is supported
|
// Format time for filename (MM-SS)
|
||||||
if ('speechSynthesis' in window) {
|
const minutes = Math.floor(timeInSeconds / 60);
|
||||||
// Create a new speech synthesis utterance
|
const seconds = timeInSeconds % 60;
|
||||||
const utterance = new SpeechSynthesisUtterance(text);
|
const formattedTime = `${minutes.toString().padStart(2, '0')}-${seconds.toString().padStart(2, '0')}`;
|
||||||
|
|
||||||
// Optional: Configure voice properties
|
// Get recording_id from the page or use default from the URL
|
||||||
utterance.rate = 1.0; // Speech rate (0.1 to 10)
|
const recordingId = typeof recording_id !== 'undefined' ? recording_id :
|
||||||
utterance.pitch = 1.0; // Speech pitch (0 to 2)
|
window.location.pathname.split('/').pop().replace('.html', '');
|
||||||
utterance.volume = 1.0; // Speech volume (0 to 1)
|
|
||||||
|
// Construct audio file path
|
||||||
// Speak the text
|
const audioPath = `/assets/audio/${recordingId}/${formattedTime}.mp3`;
|
||||||
window.speechSynthesis.speak(utterance);
|
|
||||||
} else {
|
// Create and play audio
|
||||||
console.warn('Speech synthesis not supported in this browser');
|
const audio = new Audio(audioPath);
|
||||||
}
|
|
||||||
|
// Error handling with fallback to browser TTS
|
||||||
|
audio.onerror = () => {
|
||||||
|
console.warn(`Failed to load audio: ${audioPath}`);
|
||||||
|
// Fallback to browser TTS
|
||||||
|
if ('speechSynthesis' in window) {
|
||||||
|
const utterance = new SpeechSynthesisUtterance(text);
|
||||||
|
utterance.rate = 1.0;
|
||||||
|
utterance.pitch = 1.0;
|
||||||
|
utterance.volume = 1.0;
|
||||||
|
window.speechSynthesis.speak(utterance);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Play the audio
|
||||||
|
audio.play().catch(e => {
|
||||||
|
console.warn(`Error playing audio: ${e.message}`);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Function to highlight the active timestamp in the transcript
|
// Function to highlight the active timestamp in the transcript
|
||||||
|
@ -243,7 +260,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||||
console.log(`marker! ${index} - ${time} - ${label}`);
|
console.log(`marker! ${index} - ${time} - ${label}`);
|
||||||
|
|
||||||
// Speak the marker label and show toast
|
// Speak the marker label and show toast
|
||||||
speakText(label);
|
speakText(label, time);
|
||||||
showToast(label);
|
showToast(label);
|
||||||
|
|
||||||
// Highlight the corresponding timestamp in the transcript
|
// Highlight the corresponding timestamp in the transcript
|
||||||
|
|
|
@ -7,6 +7,7 @@ layout: minimal
|
||||||
# Don't /drop read-only files added at launch
|
# Don't /drop read-only files added at launch
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
|
const recording_id = "dont-drop-original-read-files";
|
||||||
const recording_url = "https://gist.githubusercontent.com/paul-gauthier/c2e7b2751925fb7bb47036cdd37ec40d/raw/08e62ab539e2b5d4b52c15c31d9a0d241377c17c/707583.cast";
|
const recording_url = "https://gist.githubusercontent.com/paul-gauthier/c2e7b2751925fb7bb47036cdd37ec40d/raw/08e62ab539e2b5d4b52c15c31d9a0d241377c17c/707583.cast";
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,135 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Generate TTS audio files for recording commentary using OpenAI's API.
|
||||||
|
Usage: python scripts/recording_audio.py path/to/recording.md
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import requests
|
||||||
|
from pathlib import Path
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Load environment variables from .env file
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
||||||
|
OUTPUT_DIR = "aider/website/assets/audio"
|
||||||
|
VOICE = "onyx" # Options: alloy, echo, fable, onyx, nova, shimmer
|
||||||
|
|
||||||
|
def extract_recording_id(markdown_file):
|
||||||
|
"""Extract recording ID from the markdown file path."""
|
||||||
|
return Path(markdown_file).stem
|
||||||
|
|
||||||
|
def extract_commentary(markdown_file):
|
||||||
|
"""Extract commentary markers from markdown file."""
|
||||||
|
with open(markdown_file, 'r') as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
# Find Commentary section
|
||||||
|
commentary_match = re.search(r'## Commentary\s+(.*?)(?=##|\Z)', content, re.DOTALL)
|
||||||
|
if not commentary_match:
|
||||||
|
print(f"No Commentary section found in {markdown_file}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
commentary = commentary_match.group(1).strip()
|
||||||
|
|
||||||
|
# Extract timestamp-message pairs
|
||||||
|
markers = []
|
||||||
|
for line in commentary.split('\n'):
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith('- '):
|
||||||
|
line = line[2:] # Remove the list marker
|
||||||
|
match = re.match(r'(\d+):(\d+)\s+(.*)', line)
|
||||||
|
if match:
|
||||||
|
minutes, seconds, message = match.groups()
|
||||||
|
time_in_seconds = int(minutes) * 60 + int(seconds)
|
||||||
|
markers.append((time_in_seconds, message))
|
||||||
|
|
||||||
|
return markers
|
||||||
|
|
||||||
|
def generate_audio_openai(text, output_file):
|
||||||
|
"""Generate audio using OpenAI TTS API."""
|
||||||
|
if not OPENAI_API_KEY:
|
||||||
|
print("Error: OPENAI_API_KEY environment variable not set")
|
||||||
|
return False
|
||||||
|
|
||||||
|
url = "https://api.openai.com/v1/audio/speech"
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {OPENAI_API_KEY}",
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
}
|
||||||
|
data = {
|
||||||
|
"model": "tts-1",
|
||||||
|
"input": text,
|
||||||
|
"voice": VOICE
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(url, headers=headers, json=data)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
with open(output_file, 'wb') as f:
|
||||||
|
f.write(response.content)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(f"Error: {response.status_code}, {response.text}")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Exception during API call: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description='Generate TTS audio for recording commentary.')
|
||||||
|
parser.add_argument('markdown_file', help='Path to the recording markdown file')
|
||||||
|
parser.add_argument('--voice', default=VOICE, help=f'OpenAI voice to use (default: {VOICE})')
|
||||||
|
parser.add_argument('--output-dir', default=OUTPUT_DIR, help=f'Output directory (default: {OUTPUT_DIR})')
|
||||||
|
parser.add_argument('--dry-run', action='store_true', help='Print what would be done without generating audio')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Update globals with any command line overrides
|
||||||
|
global VOICE
|
||||||
|
VOICE = args.voice
|
||||||
|
|
||||||
|
recording_id = extract_recording_id(args.markdown_file)
|
||||||
|
print(f"Processing recording: {recording_id}")
|
||||||
|
|
||||||
|
# Create output directory
|
||||||
|
output_dir = os.path.join(args.output_dir, recording_id)
|
||||||
|
if not args.dry_run:
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Extract commentary markers
|
||||||
|
markers = extract_commentary(args.markdown_file)
|
||||||
|
|
||||||
|
if not markers:
|
||||||
|
print("No commentary markers found!")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Found {len(markers)} commentary markers")
|
||||||
|
|
||||||
|
# Generate audio for each marker
|
||||||
|
for time_sec, message in markers:
|
||||||
|
minutes = time_sec // 60
|
||||||
|
seconds = time_sec % 60
|
||||||
|
timestamp = f"{minutes:02d}-{seconds:02d}"
|
||||||
|
filename = f"{timestamp}.mp3"
|
||||||
|
output_file = os.path.join(output_dir, filename)
|
||||||
|
|
||||||
|
print(f"Marker at {minutes}:{seconds:02d} - {message}")
|
||||||
|
if args.dry_run:
|
||||||
|
print(f" Would generate: {output_file}")
|
||||||
|
else:
|
||||||
|
print(f" Generating: {output_file}")
|
||||||
|
success = generate_audio_openai(message, output_file)
|
||||||
|
if success:
|
||||||
|
print(f" ✓ Generated audio file")
|
||||||
|
else:
|
||||||
|
print(f" ✗ Failed to generate audio")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Add table
Add a link
Reference in a new issue