feat: Add metadata tracking and TTS fallback improvements

2025-05-20 12:24:59 +00:00 · 2025-03-14 18:52:41 -07:00 · 2025-03-14 18:52:41 -07:00 · 8404165db3
commit 8404165db3
parent aee52c01a3
2 changed files with 99 additions and 12 deletions
--- a/aider/website/_includes/recording.js
+++ b/aider/website/_includes/recording.js
@ -180,6 +180,20 @@ document.addEventListener('DOMContentLoaded', function() {
    }, 3000);
  }
  // Function to use browser's TTS as fallback
  function useBrowserTTS(text) {
    if ('speechSynthesis' in window) {
      console.log('Using browser TTS fallback');
      const utterance = new SpeechSynthesisUtterance(text);
      utterance.rate = 1.0;
      utterance.pitch = 1.0;
      utterance.volume = 1.0;
      window.speechSynthesis.speak(utterance);
      return true;
    }
    return false;
  }
  // Function to play pre-generated TTS audio files
  function speakText(text, timeInSeconds) {
    // Format time for filename (MM-SS)
@ -200,19 +214,14 @@ document.addEventListener('DOMContentLoaded', function() {
    // Error handling with fallback to browser TTS
    audio.onerror = () => {
      console.warn(`Failed to load audio: ${audioPath}`);
-      // Fallback to browser TTS
+      useBrowserTTS(text);
      if ('speechSynthesis' in window) {
        const utterance = new SpeechSynthesisUtterance(text);
        utterance.rate = 1.0;
        utterance.pitch = 1.0;
        utterance.volume = 1.0;
        window.speechSynthesis.speak(utterance);
      }
    };
    // Play the audio
    audio.play().catch(e => {
      console.warn(`Error playing audio: ${e.message}`);
      // Also fall back to browser TTS if play() fails
      useBrowserTTS(text);
    });
  }
--- a/scripts/recording_audio.py
+++ b/scripts/recording_audio.py
@ -5,6 +5,7 @@ Usage: python scripts/recording_audio.py path/to/recording.md
 """
 import argparse
 import json
 import os
 import re
 import sys
@ -80,6 +81,32 @@ def generate_audio_openai(text, output_file, voice=VOICE):
        return False
 def load_metadata(output_dir):
    """Load the audio metadata JSON file if it exists."""
    metadata_file = os.path.join(output_dir, "metadata.json")
    if os.path.exists(metadata_file):
        try:
            with open(metadata_file, "r") as f:
                return json.load(f)
        except json.JSONDecodeError:
            print(f"Warning: Could not parse metadata file {metadata_file}, will recreate it")
    return {}
 def save_metadata(output_dir, metadata):
    """Save the audio metadata to JSON file."""
    metadata_file = os.path.join(output_dir, "metadata.json")
    with open(metadata_file, "w") as f:
        json.dump(metadata, f, indent=2)
 def get_timestamp_key(time_sec):
    """Generate a consistent timestamp key format for metadata."""
    minutes = time_sec // 60
    seconds = time_sec % 60
    return f"{minutes:02d}-{seconds:02d}"
 def main():
    parser = argparse.ArgumentParser(description="Generate TTS audio for recording commentary.")
    parser.add_argument("markdown_file", help="Path to the recording markdown file")
@ -90,6 +117,9 @@ def main():
    parser.add_argument(
        "--dry-run", action="store_true", help="Print what would be done without generating audio"
    )
    parser.add_argument(
        "--force", action="store_true", help="Force regeneration of all audio files"
    )
    args = parser.parse_args()
@ -113,15 +143,52 @@ def main():
    print(f"Found {len(markers)} commentary markers")
    # Load existing metadata
    metadata = load_metadata(output_dir)
    # Create a dictionary of current markers for easier comparison
    current_markers = {}
    for time_sec, message in markers:
        timestamp_key = get_timestamp_key(time_sec)
        current_markers[timestamp_key] = message
    # Track files that need to be deleted (no longer in the markdown)
    files_to_delete = []
    for timestamp_key in metadata:
        if timestamp_key not in current_markers:
            files_to_delete.append(f"{timestamp_key}.mp3")
    # Delete files that are no longer needed
    if files_to_delete and not args.dry_run:
        for filename in files_to_delete:
            file_path = os.path.join(output_dir, filename)
            if os.path.exists(file_path):
                print(f"Removing obsolete file: {filename}")
                os.remove(file_path)
    elif files_to_delete:
        print(f"Would remove {len(files_to_delete)} obsolete files: {', '.join(files_to_delete)}")
    # Generate audio for each marker
    for time_sec, message in markers:
        timestamp_key = get_timestamp_key(time_sec)
        filename = f"{timestamp_key}.mp3"
        output_file = os.path.join(output_dir, filename)
        # Check if we need to generate this file
        needs_update = args.force or (
            timestamp_key not in metadata or 
            metadata[timestamp_key] != message
        )
        minutes = time_sec // 60
        seconds = time_sec % 60
-        timestamp = f"{minutes:02d}-{seconds:02d}"
+        
        filename = f"{timestamp}.mp3"
        output_file = os.path.join(output_dir, filename)
        print(f"Marker at {minutes}:{seconds:02d} - {message}")
        if not needs_update:
            print(f"  ✓ Audio file already exists with correct content")
            continue
        if args.dry_run:
            print(f"  Would generate: {output_file}")
        else:
@ -129,8 +196,19 @@ def main():
            success = generate_audio_openai(message, output_file, voice=selected_voice)
            if success:
                print(f"  ✓ Generated audio file")
                # Update metadata with the new message
                metadata[timestamp_key] = message
            else:
                print(f"  ✗ Failed to generate audio")
    # Save updated metadata
    if not args.dry_run:
        # Remove entries for deleted files
        for timestamp_key in list(metadata.keys()):
            if timestamp_key not in current_markers:
                del metadata[timestamp_key]
        save_metadata(output_dir, metadata)
 if __name__ == "__main__":