feat: add 24-hour cache for BigQuery download stats

This commit is contained in:
Paul Gauthier (aider) 2025-03-21 11:36:24 -07:00
parent ddc1556ae0
commit ac2439e25b

View file

@ -1,8 +1,12 @@
#!/usr/bin/env python3
import argparse
import json
import os
import sys
import time
from datetime import datetime, timedelta
from pathlib import Path
import requests
import yaml
@ -19,11 +23,78 @@ TOKENS_WEEKLY_TOOLTIP = "Number of tokens processed weekly by Aider users"
OPENROUTER_TOOLTIP = "Aider's ranking among applications on the OpenRouter platform"
SINGULARITY_TOOLTIP = "Percentage of the new code in Aider's last release written by Aider itself"
# Cache settings
CACHE_DIR = os.path.expanduser("~/.cache/aider-badges")
CACHE_DURATION = 24 * 60 * 60 # 24 hours in seconds
def ensure_cache_dir():
"""Create the cache directory if it doesn't exist"""
os.makedirs(CACHE_DIR, exist_ok=True)
def get_cache_path(package_name):
"""Get the path to the cache file for a package"""
return os.path.join(CACHE_DIR, f"{package_name}_downloads.json")
def read_from_cache(package_name):
"""
Read download statistics from cache if available and not expired
Returns (downloads, is_valid) tuple where is_valid is True if cache is valid
"""
cache_path = get_cache_path(package_name)
if not os.path.exists(cache_path):
return None, False
try:
with open(cache_path, 'r') as f:
cache_data = json.load(f)
# Check if cache is expired
timestamp = cache_data.get('timestamp', 0)
current_time = time.time()
if current_time - timestamp > CACHE_DURATION:
return None, False
return cache_data.get('downloads'), True
except Exception as e:
print(f"Error reading from cache: {e}", file=sys.stderr)
return None, False
def write_to_cache(package_name, downloads):
"""Write download statistics to cache"""
cache_path = get_cache_path(package_name)
try:
ensure_cache_dir()
cache_data = {
'downloads': downloads,
'timestamp': time.time(),
'datetime': datetime.now().isoformat()
}
with open(cache_path, 'w') as f:
json.dump(cache_data, f)
return True
except Exception as e:
print(f"Error writing to cache: {e}", file=sys.stderr)
return False
def get_downloads_from_bigquery(credentials_path=None, package_name="aider-chat"):
"""
Fetch download statistics for a package from Google BigQuery PyPI dataset
Uses a 24-hour cache to avoid unnecessary API calls
"""
# Check if we have a valid cached value
cached_downloads, is_valid = read_from_cache(package_name)
if is_valid:
print(f"Using cached download statistics for {package_name} (valid for 24 hours)")
return cached_downloads
print(f"Cache invalid or expired, fetching fresh download statistics for {package_name}")
try:
# Initialize credentials if path provided
credentials = None
@ -61,11 +132,18 @@ def get_downloads_from_bigquery(credentials_path=None, package_name="aider-chat"
# Get the first (and only) row
for row in results:
return row.total_downloads
downloads = row.total_downloads
# Write the result to cache
write_to_cache(package_name, downloads)
return downloads
return 0
except Exception as e:
print(f"Error fetching download statistics from BigQuery: {e}", file=sys.stderr)
# If there was an error but we have a cached value, use it even if expired
if cached_downloads is not None:
print("Using expired cached data due to BigQuery error")
return cached_downloads
return None
@ -332,6 +410,9 @@ def get_badges_html():
def main():
# Load environment variables from .env file
load_dotenv()
# Ensure cache directory exists
ensure_cache_dir()
parser = argparse.ArgumentParser(description="Get total downloads and GitHub stars for aider")
parser.add_argument(