feat: compress MP3 files using FFmpeg with configurable bitrate

This commit is contained in:
Paul Gauthier (aider) 2025-03-14 18:55:31 -07:00
parent 51aab7b656
commit 5cde755976

View file

@ -8,7 +8,9 @@ import argparse
import json
import os
import re
import subprocess
import sys
import tempfile
from pathlib import Path
import requests
@ -21,6 +23,7 @@ load_dotenv()
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
OUTPUT_DIR = "aider/website/assets/audio"
VOICE = "onyx" # Options: alloy, echo, fable, onyx, nova, shimmer
MP3_BITRATE = "32k" # Lower bitrate for smaller files
def extract_recording_id(markdown_file):
@ -56,8 +59,42 @@ def extract_commentary(markdown_file):
return markers
def generate_audio_openai(text, output_file, voice=VOICE):
"""Generate audio using OpenAI TTS API."""
def check_ffmpeg():
"""Check if FFmpeg is available."""
try:
subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return True
except (subprocess.SubprocessError, FileNotFoundError):
return False
def compress_audio(input_file, output_file, bitrate=MP3_BITRATE):
"""Compress audio file using FFmpeg."""
if not check_ffmpeg():
print("Warning: FFmpeg not found, skipping compression")
return False
try:
subprocess.run(
[
"ffmpeg",
"-i", input_file,
"-b:a", bitrate,
"-ac", "1", # Mono audio
"-y", # Overwrite output file
output_file
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
return True
except subprocess.SubprocessError as e:
print(f"Error compressing audio: {e}")
return False
def generate_audio_openai(text, output_file, voice=VOICE, bitrate=MP3_BITRATE):
"""Generate audio using OpenAI TTS API and compress it."""
if not OPENAI_API_KEY:
print("Error: OPENAI_API_KEY environment variable not set")
return False
@ -70,8 +107,33 @@ def generate_audio_openai(text, output_file, voice=VOICE):
response = requests.post(url, headers=headers, json=data)
if response.status_code == 200:
with open(output_file, "wb") as f:
f.write(response.content)
# Use a temporary file for the initial audio
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
temp_path = temp_file.name
temp_file.write(response.content)
# Get original file size
original_size = os.path.getsize(temp_path)
# Compress the audio to reduce file size
success = compress_audio(temp_path, output_file, bitrate)
# If compression failed or FFmpeg not available, use the original file
if not success:
with open(output_file, "wb") as f:
f.write(response.content)
print(f" Using original file: {original_size} bytes")
else:
compressed_size = os.path.getsize(output_file)
reduction = (1 - compressed_size / original_size) * 100
print(f" Compressed: {original_size}{compressed_size} bytes ({reduction:.1f}% reduction)")
# Clean up the temporary file
try:
os.unlink(temp_path)
except OSError:
pass
return True
else:
print(f"Error: {response.status_code}, {response.text}")
@ -123,11 +185,23 @@ def main():
parser.add_argument(
"--force", action="store_true", help="Force regeneration of all audio files"
)
parser.add_argument(
"--bitrate", default=MP3_BITRATE, help=f"MP3 bitrate for compression (default: {MP3_BITRATE})"
)
parser.add_argument(
"--compress-only", action="store_true", help="Only compress existing files without generating new ones"
)
args = parser.parse_args()
# Use args.voice directly instead of modifying global VOICE
selected_voice = args.voice
selected_bitrate = args.bitrate
# Check if FFmpeg is available for compression
if not check_ffmpeg() and not args.dry_run:
print("Warning: FFmpeg not found. Audio compression will be skipped.")
print("To enable compression, please install FFmpeg: https://ffmpeg.org/download.html")
recording_id = extract_recording_id(args.markdown_file)
print(f"Processing recording: {recording_id}")
@ -137,6 +211,38 @@ def main():
print(f"Audio directory: {output_dir}")
if not args.dry_run:
os.makedirs(output_dir, exist_ok=True)
# If compress-only flag is set, just compress existing files
if args.compress_only:
print("Compressing existing files only...")
metadata = load_metadata(output_dir)
for timestamp_key in metadata:
filename = f"{timestamp_key}.mp3"
file_path = os.path.join(output_dir, filename)
if os.path.exists(file_path):
temp_file = f"{file_path}.temp"
print(f"Compressing: {filename}")
if not args.dry_run:
success = compress_audio(file_path, temp_file, selected_bitrate)
if success:
# Get file sizes for reporting
original_size = os.path.getsize(file_path)
compressed_size = os.path.getsize(temp_file)
reduction = (1 - compressed_size / original_size) * 100
# Replace original with compressed version
os.replace(temp_file, file_path)
print(f" ✓ Compressed: {original_size}{compressed_size} bytes ({reduction:.1f}% reduction)")
else:
print(f" ✗ Failed to compress")
if os.path.exists(temp_file):
os.remove(temp_file)
else:
print(f" Would compress: {file_path}")
return
# Extract commentary markers
markers = extract_commentary(args.markdown_file)
@ -196,7 +302,7 @@ def main():
print(f" Would generate: {output_file}")
else:
print(f" Generating: {output_file}")
success = generate_audio_openai(message, output_file, voice=selected_voice)
success = generate_audio_openai(message, output_file, voice=selected_voice, bitrate=selected_bitrate)
if success:
print(f" ✓ Generated audio file")
# Update metadata with the new message