feat: compress MP3 files using FFmpeg with configurable bitrate

2025-05-20 04:14:59 +00:00 · 2025-03-14 18:55:31 -07:00 · 2025-03-14 18:55:31 -07:00 · 5cde755976
commit 5cde755976
parent 51aab7b656
1 changed files with 111 additions and 5 deletions
--- a/scripts/recording_audio.py
+++ b/scripts/recording_audio.py
@ -8,7 +8,9 @@ import argparse
 import json
 import os
 import re
+import subprocess
 import sys
+import tempfile
 from pathlib import Path

 import requests
@ -21,6 +23,7 @@ load_dotenv()
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
 OUTPUT_DIR = "aider/website/assets/audio"
 VOICE = "onyx"  # Options: alloy, echo, fable, onyx, nova, shimmer
+MP3_BITRATE = "32k"  # Lower bitrate for smaller files


 def extract_recording_id(markdown_file):
@ -56,8 +59,42 @@ def extract_commentary(markdown_file):
    return markers


-def generate_audio_openai(text, output_file, voice=VOICE):
-    """Generate audio using OpenAI TTS API."""
+def check_ffmpeg():
+    """Check if FFmpeg is available."""
+    try:
+        subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        return True
+    except (subprocess.SubprocessError, FileNotFoundError):
+        return False
+
+
+def compress_audio(input_file, output_file, bitrate=MP3_BITRATE):
+    """Compress audio file using FFmpeg."""
+    if not check_ffmpeg():
+        print("Warning: FFmpeg not found, skipping compression")
+        return False
+    
+    try:
+        subprocess.run(
+            [
+                "ffmpeg", 
+                "-i", input_file, 
+                "-b:a", bitrate, 
+                "-ac", "1",  # Mono audio
+                "-y",  # Overwrite output file
+                output_file
+            ],
+            stdout=subprocess.PIPE, 
+            stderr=subprocess.PIPE
+        )
+        return True
+    except subprocess.SubprocessError as e:
+        print(f"Error compressing audio: {e}")
+        return False
+
+
+def generate_audio_openai(text, output_file, voice=VOICE, bitrate=MP3_BITRATE):
+    """Generate audio using OpenAI TTS API and compress it."""
    if not OPENAI_API_KEY:
        print("Error: OPENAI_API_KEY environment variable not set")
        return False
@ -70,8 +107,33 @@ def generate_audio_openai(text, output_file, voice=VOICE):
        response = requests.post(url, headers=headers, json=data)

        if response.status_code == 200:
-            with open(output_file, "wb") as f:
-                f.write(response.content)
+            # Use a temporary file for the initial audio
+            with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
+                temp_path = temp_file.name
+                temp_file.write(response.content)
+            
+            # Get original file size
+            original_size = os.path.getsize(temp_path)
+            
+            # Compress the audio to reduce file size
+            success = compress_audio(temp_path, output_file, bitrate)
+            
+            # If compression failed or FFmpeg not available, use the original file
+            if not success:
+                with open(output_file, "wb") as f:
+                    f.write(response.content)
+                print(f"  ℹ Using original file: {original_size} bytes")
+            else:
+                compressed_size = os.path.getsize(output_file)
+                reduction = (1 - compressed_size / original_size) * 100
+                print(f"  ℹ Compressed: {original_size} → {compressed_size} bytes ({reduction:.1f}% reduction)")
+            
+            # Clean up the temporary file
+            try:
+                os.unlink(temp_path)
+            except OSError:
+                pass
+                
            return True
        else:
            print(f"Error: {response.status_code}, {response.text}")
@ -123,11 +185,23 @@ def main():
    parser.add_argument(
        "--force", action="store_true", help="Force regeneration of all audio files"
    )
+    parser.add_argument(
+        "--bitrate", default=MP3_BITRATE, help=f"MP3 bitrate for compression (default: {MP3_BITRATE})"
+    )
+    parser.add_argument(
+        "--compress-only", action="store_true", help="Only compress existing files without generating new ones"
+    )

    args = parser.parse_args()

    # Use args.voice directly instead of modifying global VOICE
    selected_voice = args.voice
+    selected_bitrate = args.bitrate
+
+    # Check if FFmpeg is available for compression
+    if not check_ffmpeg() and not args.dry_run:
+        print("Warning: FFmpeg not found. Audio compression will be skipped.")
+        print("To enable compression, please install FFmpeg: https://ffmpeg.org/download.html")

    recording_id = extract_recording_id(args.markdown_file)
    print(f"Processing recording: {recording_id}")
@ -137,6 +211,38 @@ def main():
    print(f"Audio directory: {output_dir}")
    if not args.dry_run:
        os.makedirs(output_dir, exist_ok=True)
+        
+    # If compress-only flag is set, just compress existing files
+    if args.compress_only:
+        print("Compressing existing files only...")
+        metadata = load_metadata(output_dir)
+        for timestamp_key in metadata:
+            filename = f"{timestamp_key}.mp3"
+            file_path = os.path.join(output_dir, filename)
+            
+            if os.path.exists(file_path):
+                temp_file = f"{file_path}.temp"
+                print(f"Compressing: {filename}")
+                
+                if not args.dry_run:
+                    success = compress_audio(file_path, temp_file, selected_bitrate)
+                    if success:
+                        # Get file sizes for reporting
+                        original_size = os.path.getsize(file_path)
+                        compressed_size = os.path.getsize(temp_file)
+                        reduction = (1 - compressed_size / original_size) * 100
+                        
+                        # Replace original with compressed version
+                        os.replace(temp_file, file_path)
+                        print(f"  ✓ Compressed: {original_size} → {compressed_size} bytes ({reduction:.1f}% reduction)")
+                    else:
+                        print(f"  ✗ Failed to compress")
+                        if os.path.exists(temp_file):
+                            os.remove(temp_file)
+                else:
+                    print(f"  Would compress: {file_path}")
+        
+        return

    # Extract commentary markers
    markers = extract_commentary(args.markdown_file)
@ -196,7 +302,7 @@ def main():
            print(f"  Would generate: {output_file}")
        else:
            print(f"  Generating: {output_file}")
-            success = generate_audio_openai(message, output_file, voice=selected_voice)
+            success = generate_audio_openai(message, output_file, voice=selected_voice, bitrate=selected_bitrate)
            if success:
                print(f"  ✓ Generated audio file")
                # Update metadata with the new message