aider/scripts/recording_audio.py

#!/usr/bin/env python3
"""
Generate TTS audio files for recording commentary using OpenAI's API.
Usage: python scripts/recording_audio.py path/to/recording.md
"""

import argparse
import json
import os
import re
import subprocess
import tempfile
from pathlib import Path

import requests
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Configuration
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
OUTPUT_DIR = "aider/website/assets/audio"
VOICE = "onyx"  # Options: alloy, echo, fable, onyx, nova, shimmer
MP3_BITRATE = "32k"  # Lower bitrate for smaller files


def extract_recording_id(markdown_file):
    """Extract recording ID from the markdown file path."""
    return Path(markdown_file).stem


def extract_commentary(markdown_file):
    """Extract commentary markers from markdown file."""
    with open(markdown_file, "r") as f:
        content = f.read()

    # Find Commentary section
    commentary_match = re.search(r"## Commentary\s+(.*?)(?=##|\Z)", content, re.DOTALL)
    if not commentary_match:
        print(f"No Commentary section found in {markdown_file}")
        return []

    commentary = commentary_match.group(1).strip()

    # Extract timestamp-message pairs
    markers = []
    for line in commentary.split("\n"):
        line = line.strip()
        if line.startswith("- "):
            line = line[2:]  # Remove the list marker
            match = re.match(r"(\d+):(\d+)\s+(.*)", line)
            if match:
                minutes, seconds, message = match.groups()
                time_in_seconds = int(minutes) * 60 + int(seconds)
                markers.append((time_in_seconds, message))

    return markers


def check_ffmpeg():
    """Check if FFmpeg is available."""
    try:
        subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        return True
    except (subprocess.SubprocessError, FileNotFoundError):
        return False


def compress_audio(input_file, output_file, bitrate=MP3_BITRATE):
    """Compress audio file using FFmpeg."""
    if not check_ffmpeg():
        print("Warning: FFmpeg not found, skipping compression")
        return False

    try:
        subprocess.run(
            [
                "ffmpeg",
                "-i",
                input_file,
                "-b:a",
                bitrate,
                "-ac",
                "1",  # Mono audio
                "-y",  # Overwrite output file
                output_file,
            ],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        return True
    except subprocess.SubprocessError as e:
        print(f"Error compressing audio: {e}")
        return False


def generate_audio_openai(text, output_file, voice=VOICE, bitrate=MP3_BITRATE):
    """Generate audio using OpenAI TTS API and compress it."""
    if not OPENAI_API_KEY:
        print("Error: OPENAI_API_KEY environment variable not set")
        return False

    url = "https://api.openai.com/v1/audio/speech"
    headers = {"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"}
    data = {"model": "tts-1", "input": text, "voice": voice}

    try:
        response = requests.post(url, headers=headers, json=data)

        if response.status_code == 200:
            # Use a temporary file for the initial audio
            with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
                temp_path = temp_file.name
                temp_file.write(response.content)

            # Get original file size
            original_size = os.path.getsize(temp_path)

            # Compress the audio to reduce file size
            success = compress_audio(temp_path, output_file, bitrate)

            # If compression failed or FFmpeg not available, use the original file
            if not success:
                with open(output_file, "wb") as f:
                    f.write(response.content)
                print(f"  ℹ Using original file: {original_size} bytes")
            else:
                compressed_size = os.path.getsize(output_file)
                reduction = (1 - compressed_size / original_size) * 100
                print(
                    f"  ℹ Compressed: {original_size} → {compressed_size} bytes ({reduction:.1f}%"
                    " reduction)"
                )

            # Clean up the temporary file
            try:
                os.unlink(temp_path)
            except OSError:
                pass

            return True
        else:
            print(f"Error: {response.status_code}, {response.text}")
            return False
    except Exception as e:
        print(f"Exception during API call: {e}")
        return False


def load_metadata(output_dir):
    """Load the audio metadata JSON file if it exists."""
    metadata_file = os.path.join(output_dir, "metadata.json")

    if os.path.exists(metadata_file):
        try:
            with open(metadata_file, "r") as f:
                return json.load(f)
        except json.JSONDecodeError:
            print(f"Warning: Could not parse metadata file {metadata_file}, will recreate it")

    return {}


def save_metadata(output_dir, metadata):
    """Save the audio metadata to JSON file."""
    metadata_file = os.path.join(output_dir, "metadata.json")

    with open(metadata_file, "w") as f:
        json.dump(metadata, f, indent=2)


def get_timestamp_key(time_sec):
    """Generate a consistent timestamp key format for metadata."""
    minutes = time_sec // 60
    seconds = time_sec % 60
    return f"{minutes:02d}-{seconds:02d}"


def main():
    parser = argparse.ArgumentParser(description="Generate TTS audio for recording commentary.")
    parser.add_argument("markdown_file", help="Path to the recording markdown file")
    parser.add_argument("--voice", default=VOICE, help=f"OpenAI voice to use (default: {VOICE})")
    parser.add_argument(
        "--output-dir", default=OUTPUT_DIR, help=f"Output directory (default: {OUTPUT_DIR})"
    )
    parser.add_argument(
        "--dry-run", action="store_true", help="Print what would be done without generating audio"
    )
    parser.add_argument(
        "--force", action="store_true", help="Force regeneration of all audio files"
    )
    parser.add_argument(
        "--bitrate",
        default=MP3_BITRATE,
        help=f"MP3 bitrate for compression (default: {MP3_BITRATE})",
    )
    parser.add_argument(
        "--compress-only",
        action="store_true",
        help="Only compress existing files without generating new ones",
    )

    args = parser.parse_args()

    # Use args.voice directly instead of modifying global VOICE
    selected_voice = args.voice
    selected_bitrate = args.bitrate

    # Check if FFmpeg is available for compression
    if not check_ffmpeg() and not args.dry_run:
        print("Warning: FFmpeg not found. Audio compression will be skipped.")
        print("To enable compression, please install FFmpeg: https://ffmpeg.org/download.html")

    recording_id = extract_recording_id(args.markdown_file)
    print(f"Processing recording: {recording_id}")

    # Create output directory
    output_dir = os.path.join(args.output_dir, recording_id)
    print(f"Audio directory: {output_dir}")
    if not args.dry_run:
        os.makedirs(output_dir, exist_ok=True)

    # If compress-only flag is set, just compress existing files
    if args.compress_only:
        print("Compressing existing files only...")
        metadata = load_metadata(output_dir)
        for timestamp_key in metadata:
            filename = f"{timestamp_key}.mp3"
            file_path = os.path.join(output_dir, filename)

            if os.path.exists(file_path):
                temp_file = f"{file_path}.temp"
                print(f"Compressing: {filename}")

                if not args.dry_run:
                    success = compress_audio(file_path, temp_file, selected_bitrate)
                    if success:
                        # Get file sizes for reporting
                        original_size = os.path.getsize(file_path)
                        compressed_size = os.path.getsize(temp_file)
                        reduction = (1 - compressed_size / original_size) * 100

                        # Replace original with compressed version
                        os.replace(temp_file, file_path)
                        print(
                            f"  ✓ Compressed: {original_size} → {compressed_size} bytes"
                            f" ({reduction:.1f}% reduction)"
                        )
                    else:
                        print("  ✗ Failed to compress")
                        if os.path.exists(temp_file):
                            os.remove(temp_file)
                else:
                    print(f"  Would compress: {file_path}")

        return

    # Extract commentary markers
    markers = extract_commentary(args.markdown_file)

    if not markers:
        print("No commentary markers found!")
        return

    print(f"Found {len(markers)} commentary markers")

    # Load existing metadata
    metadata = load_metadata(output_dir)

    # Create a dictionary of current markers for easier comparison
    current_markers = {}
    for time_sec, message in markers:
        timestamp_key = get_timestamp_key(time_sec)
        current_markers[timestamp_key] = message

    # Track files that need to be deleted (no longer in the markdown)
    files_to_delete = []
    for timestamp_key in metadata:
        if timestamp_key not in current_markers:
            files_to_delete.append(f"{timestamp_key}.mp3")

    # Delete files that are no longer needed
    if files_to_delete and not args.dry_run:
        for filename in files_to_delete:
            file_path = os.path.join(output_dir, filename)
            if os.path.exists(file_path):
                print(f"Removing obsolete file: {filename}")
                os.remove(file_path)
    elif files_to_delete:
        print(f"Would remove {len(files_to_delete)} obsolete files: {', '.join(files_to_delete)}")

    # Generate audio for each marker
    for time_sec, message in markers:
        timestamp_key = get_timestamp_key(time_sec)
        filename = f"{timestamp_key}.mp3"
        output_file = os.path.join(output_dir, filename)

        # Check if we need to generate this file
        needs_update = args.force or (
            timestamp_key not in metadata or metadata[timestamp_key] != message
        )

        minutes = time_sec // 60
        seconds = time_sec % 60

        print(f"Marker at {minutes}:{seconds:02d} - {message}")

        if not needs_update:
            print("  ✓ Audio file already exists with correct content")
            continue

        if args.dry_run:
            print(f"  Would generate: {output_file}")
        else:
            print(f"  Generating: {output_file}")
            success = generate_audio_openai(
                message, output_file, voice=selected_voice, bitrate=selected_bitrate
            )
            if success:
                print("  ✓ Generated audio file")
                # Update metadata with the new message
                metadata[timestamp_key] = message
            else:
                print("  ✗ Failed to generate audio")

    # Save updated metadata
    if not args.dry_run:
        # Remove entries for deleted files
        for timestamp_key in list(metadata.keys()):
            if timestamp_key not in current_markers:
                del metadata[timestamp_key]

        save_metadata(output_dir, metadata)


if __name__ == "__main__":
    main()