diff --git a/scripts/recording_audio.py b/scripts/recording_audio.py index a1496feee..9073e5df0 100755 --- a/scripts/recording_audio.py +++ b/scripts/recording_audio.py @@ -8,7 +8,9 @@ import argparse import json import os import re +import subprocess import sys +import tempfile from pathlib import Path import requests @@ -21,6 +23,7 @@ load_dotenv() OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") OUTPUT_DIR = "aider/website/assets/audio" VOICE = "onyx" # Options: alloy, echo, fable, onyx, nova, shimmer +MP3_BITRATE = "32k" # Lower bitrate for smaller files def extract_recording_id(markdown_file): @@ -56,8 +59,42 @@ def extract_commentary(markdown_file): return markers -def generate_audio_openai(text, output_file, voice=VOICE): - """Generate audio using OpenAI TTS API.""" +def check_ffmpeg(): + """Check if FFmpeg is available.""" + try: + subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + return True + except (subprocess.SubprocessError, FileNotFoundError): + return False + + +def compress_audio(input_file, output_file, bitrate=MP3_BITRATE): + """Compress audio file using FFmpeg.""" + if not check_ffmpeg(): + print("Warning: FFmpeg not found, skipping compression") + return False + + try: + subprocess.run( + [ + "ffmpeg", + "-i", input_file, + "-b:a", bitrate, + "-ac", "1", # Mono audio + "-y", # Overwrite output file + output_file + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + return True + except subprocess.SubprocessError as e: + print(f"Error compressing audio: {e}") + return False + + +def generate_audio_openai(text, output_file, voice=VOICE, bitrate=MP3_BITRATE): + """Generate audio using OpenAI TTS API and compress it.""" if not OPENAI_API_KEY: print("Error: OPENAI_API_KEY environment variable not set") return False @@ -70,8 +107,33 @@ def generate_audio_openai(text, output_file, voice=VOICE): response = requests.post(url, headers=headers, json=data) if response.status_code == 200: - with open(output_file, "wb") as f: - f.write(response.content) + # Use a temporary file for the initial audio + with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file: + temp_path = temp_file.name + temp_file.write(response.content) + + # Get original file size + original_size = os.path.getsize(temp_path) + + # Compress the audio to reduce file size + success = compress_audio(temp_path, output_file, bitrate) + + # If compression failed or FFmpeg not available, use the original file + if not success: + with open(output_file, "wb") as f: + f.write(response.content) + print(f" ℹ Using original file: {original_size} bytes") + else: + compressed_size = os.path.getsize(output_file) + reduction = (1 - compressed_size / original_size) * 100 + print(f" ℹ Compressed: {original_size} → {compressed_size} bytes ({reduction:.1f}% reduction)") + + # Clean up the temporary file + try: + os.unlink(temp_path) + except OSError: + pass + return True else: print(f"Error: {response.status_code}, {response.text}") @@ -123,11 +185,23 @@ def main(): parser.add_argument( "--force", action="store_true", help="Force regeneration of all audio files" ) + parser.add_argument( + "--bitrate", default=MP3_BITRATE, help=f"MP3 bitrate for compression (default: {MP3_BITRATE})" + ) + parser.add_argument( + "--compress-only", action="store_true", help="Only compress existing files without generating new ones" + ) args = parser.parse_args() # Use args.voice directly instead of modifying global VOICE selected_voice = args.voice + selected_bitrate = args.bitrate + + # Check if FFmpeg is available for compression + if not check_ffmpeg() and not args.dry_run: + print("Warning: FFmpeg not found. Audio compression will be skipped.") + print("To enable compression, please install FFmpeg: https://ffmpeg.org/download.html") recording_id = extract_recording_id(args.markdown_file) print(f"Processing recording: {recording_id}") @@ -137,6 +211,38 @@ def main(): print(f"Audio directory: {output_dir}") if not args.dry_run: os.makedirs(output_dir, exist_ok=True) + + # If compress-only flag is set, just compress existing files + if args.compress_only: + print("Compressing existing files only...") + metadata = load_metadata(output_dir) + for timestamp_key in metadata: + filename = f"{timestamp_key}.mp3" + file_path = os.path.join(output_dir, filename) + + if os.path.exists(file_path): + temp_file = f"{file_path}.temp" + print(f"Compressing: {filename}") + + if not args.dry_run: + success = compress_audio(file_path, temp_file, selected_bitrate) + if success: + # Get file sizes for reporting + original_size = os.path.getsize(file_path) + compressed_size = os.path.getsize(temp_file) + reduction = (1 - compressed_size / original_size) * 100 + + # Replace original with compressed version + os.replace(temp_file, file_path) + print(f" ✓ Compressed: {original_size} → {compressed_size} bytes ({reduction:.1f}% reduction)") + else: + print(f" ✗ Failed to compress") + if os.path.exists(temp_file): + os.remove(temp_file) + else: + print(f" Would compress: {file_path}") + + return # Extract commentary markers markers = extract_commentary(args.markdown_file) @@ -196,7 +302,7 @@ def main(): print(f" Would generate: {output_file}") else: print(f" Generating: {output_file}") - success = generate_audio_openai(message, output_file, voice=selected_voice) + success = generate_audio_openai(message, output_file, voice=selected_voice, bitrate=selected_bitrate) if success: print(f" ✓ Generated audio file") # Update metadata with the new message