feat: compress MP3 files using FFmpeg with configurable bitrate

This commit is contained in:
Paul Gauthier (aider) 2025-03-14 18:55:31 -07:00
parent 51aab7b656
commit 5cde755976

View file

@ -8,7 +8,9 @@ import argparse
import json import json
import os import os
import re import re
import subprocess
import sys import sys
import tempfile
from pathlib import Path from pathlib import Path
import requests import requests
@ -21,6 +23,7 @@ load_dotenv()
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
OUTPUT_DIR = "aider/website/assets/audio" OUTPUT_DIR = "aider/website/assets/audio"
VOICE = "onyx" # Options: alloy, echo, fable, onyx, nova, shimmer VOICE = "onyx" # Options: alloy, echo, fable, onyx, nova, shimmer
MP3_BITRATE = "32k" # Lower bitrate for smaller files
def extract_recording_id(markdown_file): def extract_recording_id(markdown_file):
@ -56,8 +59,42 @@ def extract_commentary(markdown_file):
return markers return markers
def generate_audio_openai(text, output_file, voice=VOICE): def check_ffmpeg():
"""Generate audio using OpenAI TTS API.""" """Check if FFmpeg is available."""
try:
subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return True
except (subprocess.SubprocessError, FileNotFoundError):
return False
def compress_audio(input_file, output_file, bitrate=MP3_BITRATE):
"""Compress audio file using FFmpeg."""
if not check_ffmpeg():
print("Warning: FFmpeg not found, skipping compression")
return False
try:
subprocess.run(
[
"ffmpeg",
"-i", input_file,
"-b:a", bitrate,
"-ac", "1", # Mono audio
"-y", # Overwrite output file
output_file
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
return True
except subprocess.SubprocessError as e:
print(f"Error compressing audio: {e}")
return False
def generate_audio_openai(text, output_file, voice=VOICE, bitrate=MP3_BITRATE):
"""Generate audio using OpenAI TTS API and compress it."""
if not OPENAI_API_KEY: if not OPENAI_API_KEY:
print("Error: OPENAI_API_KEY environment variable not set") print("Error: OPENAI_API_KEY environment variable not set")
return False return False
@ -70,8 +107,33 @@ def generate_audio_openai(text, output_file, voice=VOICE):
response = requests.post(url, headers=headers, json=data) response = requests.post(url, headers=headers, json=data)
if response.status_code == 200: if response.status_code == 200:
with open(output_file, "wb") as f: # Use a temporary file for the initial audio
f.write(response.content) with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
temp_path = temp_file.name
temp_file.write(response.content)
# Get original file size
original_size = os.path.getsize(temp_path)
# Compress the audio to reduce file size
success = compress_audio(temp_path, output_file, bitrate)
# If compression failed or FFmpeg not available, use the original file
if not success:
with open(output_file, "wb") as f:
f.write(response.content)
print(f" Using original file: {original_size} bytes")
else:
compressed_size = os.path.getsize(output_file)
reduction = (1 - compressed_size / original_size) * 100
print(f" Compressed: {original_size}{compressed_size} bytes ({reduction:.1f}% reduction)")
# Clean up the temporary file
try:
os.unlink(temp_path)
except OSError:
pass
return True return True
else: else:
print(f"Error: {response.status_code}, {response.text}") print(f"Error: {response.status_code}, {response.text}")
@ -123,11 +185,23 @@ def main():
parser.add_argument( parser.add_argument(
"--force", action="store_true", help="Force regeneration of all audio files" "--force", action="store_true", help="Force regeneration of all audio files"
) )
parser.add_argument(
"--bitrate", default=MP3_BITRATE, help=f"MP3 bitrate for compression (default: {MP3_BITRATE})"
)
parser.add_argument(
"--compress-only", action="store_true", help="Only compress existing files without generating new ones"
)
args = parser.parse_args() args = parser.parse_args()
# Use args.voice directly instead of modifying global VOICE # Use args.voice directly instead of modifying global VOICE
selected_voice = args.voice selected_voice = args.voice
selected_bitrate = args.bitrate
# Check if FFmpeg is available for compression
if not check_ffmpeg() and not args.dry_run:
print("Warning: FFmpeg not found. Audio compression will be skipped.")
print("To enable compression, please install FFmpeg: https://ffmpeg.org/download.html")
recording_id = extract_recording_id(args.markdown_file) recording_id = extract_recording_id(args.markdown_file)
print(f"Processing recording: {recording_id}") print(f"Processing recording: {recording_id}")
@ -137,6 +211,38 @@ def main():
print(f"Audio directory: {output_dir}") print(f"Audio directory: {output_dir}")
if not args.dry_run: if not args.dry_run:
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
# If compress-only flag is set, just compress existing files
if args.compress_only:
print("Compressing existing files only...")
metadata = load_metadata(output_dir)
for timestamp_key in metadata:
filename = f"{timestamp_key}.mp3"
file_path = os.path.join(output_dir, filename)
if os.path.exists(file_path):
temp_file = f"{file_path}.temp"
print(f"Compressing: {filename}")
if not args.dry_run:
success = compress_audio(file_path, temp_file, selected_bitrate)
if success:
# Get file sizes for reporting
original_size = os.path.getsize(file_path)
compressed_size = os.path.getsize(temp_file)
reduction = (1 - compressed_size / original_size) * 100
# Replace original with compressed version
os.replace(temp_file, file_path)
print(f" ✓ Compressed: {original_size}{compressed_size} bytes ({reduction:.1f}% reduction)")
else:
print(f" ✗ Failed to compress")
if os.path.exists(temp_file):
os.remove(temp_file)
else:
print(f" Would compress: {file_path}")
return
# Extract commentary markers # Extract commentary markers
markers = extract_commentary(args.markdown_file) markers = extract_commentary(args.markdown_file)
@ -196,7 +302,7 @@ def main():
print(f" Would generate: {output_file}") print(f" Would generate: {output_file}")
else: else:
print(f" Generating: {output_file}") print(f" Generating: {output_file}")
success = generate_audio_openai(message, output_file, voice=selected_voice) success = generate_audio_openai(message, output_file, voice=selected_voice, bitrate=selected_bitrate)
if success: if success:
print(f" ✓ Generated audio file") print(f" ✓ Generated audio file")
# Update metadata with the new message # Update metadata with the new message