aider/scripts/homepage.py
2025-04-11 08:38:22 +12:00

619 lines
23 KiB
Python
Executable file

#!/usr/bin/env python3
import argparse
import json
import os
import sys
import time
from datetime import datetime
import requests
import yaml
from dotenv import load_dotenv
from google.cloud import bigquery
from google.oauth2 import service_account
TOKENS_PER_WEEK = "15B"
# Badge tooltip texts
GITHUB_STARS_TOOLTIP = "Total number of GitHub stars the Aider project has received"
PYPI_DOWNLOADS_TOOLTIP = "Total number of installations via pip from PyPI"
TOKENS_WEEKLY_TOOLTIP = "Number of tokens processed weekly by Aider users"
OPENROUTER_TOOLTIP = "Aider's ranking among applications on the OpenRouter platform"
SINGULARITY_TOOLTIP = "Percentage of the new code in Aider's last release written by Aider itself"
# Cache settings
CACHE_DIR = os.path.expanduser("~/.cache/aider-badges")
CACHE_DURATION = 24 * 60 * 60 # 24 hours in seconds
def ensure_cache_dir():
"""Create the cache directory if it doesn't exist"""
os.makedirs(CACHE_DIR, exist_ok=True)
def get_cache_path(package_name):
"""Get the path to the cache file for a package"""
return os.path.join(CACHE_DIR, f"{package_name}_downloads.json")
def read_from_cache(package_name):
"""
Read download statistics from cache if available and not expired
Returns (downloads, is_valid) tuple where is_valid is True if cache is valid
"""
cache_path = get_cache_path(package_name)
if not os.path.exists(cache_path):
return None, False
try:
with open(cache_path, "r") as f:
cache_data = json.load(f)
# Check if cache is expired
timestamp = cache_data.get("timestamp", 0)
current_time = time.time()
if current_time - timestamp > CACHE_DURATION:
return None, False
return cache_data.get("downloads"), True
except Exception as e:
print(f"Error reading from cache: {e}", file=sys.stderr)
return None, False
def write_to_cache(package_name, downloads):
"""Write download statistics to cache"""
cache_path = get_cache_path(package_name)
try:
ensure_cache_dir()
cache_data = {
"downloads": downloads,
"timestamp": time.time(),
"datetime": datetime.now().isoformat(),
}
with open(cache_path, "w") as f:
json.dump(cache_data, f)
return True
except Exception as e:
print(f"Error writing to cache: {e}", file=sys.stderr)
return False
def get_downloads_from_bigquery(credentials_path=None, package_name="aider-chat"):
"""
Fetch download statistics for a package from Google BigQuery PyPI dataset
Uses a 24-hour cache to avoid unnecessary API calls
"""
# Check if we have a valid cached value
cached_downloads, is_valid = read_from_cache(package_name)
if is_valid:
print(f"Using cached download statistics for {package_name} (valid for 24 hours)")
return cached_downloads
print(f"Cache invalid or expired, fetching fresh download statistics for {package_name}")
try:
# Initialize credentials if path provided
credentials = None
if credentials_path:
credentials = service_account.Credentials.from_service_account_file(
credentials_path, scopes=["https://www.googleapis.com/auth/cloud-platform"]
)
# Create a client
client = bigquery.Client(credentials=credentials)
# Query to get total downloads for the package, excluding CI/CD systems
query = f"""
SELECT COUNT(*) as total_downloads
FROM `bigquery-public-data.pypi.file_downloads`
WHERE file.project = '{package_name}'
AND NOT (
-- Exclude common CI/CD systems based on installer name patterns
LOWER(details.installer.name) LIKE '%github%' OR
LOWER(details.installer.name) LIKE '%travis%' OR
LOWER(details.installer.name) LIKE '%circle%' OR
LOWER(details.installer.name) LIKE '%jenkins%' OR
LOWER(details.installer.name) LIKE '%gitlab%' OR
LOWER(details.installer.name) LIKE '%azure%' OR
LOWER(details.installer.name) LIKE '%ci%' OR
LOWER(details.installer.name) LIKE '%cd%' OR
LOWER(details.installer.name) LIKE '%bot%' OR
LOWER(details.installer.name) LIKE '%build%'
)
"""
# Execute the query
query_job = client.query(query)
results = query_job.result()
# Get the first (and only) row
for row in results:
downloads = row.total_downloads
# Write the result to cache
write_to_cache(package_name, downloads)
return downloads
return 0
except Exception as e:
print(f"Error fetching download statistics from BigQuery: {e}", file=sys.stderr)
# If there was an error but we have a cached value, use it even if expired
if cached_downloads is not None:
print("Using expired cached data due to BigQuery error")
return cached_downloads
return None
def get_total_downloads(
api_key=None, package_name="aider-chat", use_bigquery=False, credentials_path=None
):
"""
Fetch total downloads for a Python package
If use_bigquery is True, fetches from BigQuery.
Otherwise uses pepy.tech API (requires api_key).
"""
if use_bigquery:
print(f"Using BigQuery to fetch download statistics for {package_name}")
return get_downloads_from_bigquery(credentials_path, package_name)
# Fall back to pepy.tech API
print(f"Using pepy.tech API to fetch download statistics for {package_name}")
if not api_key:
print("API key not provided for pepy.tech", file=sys.stderr)
sys.exit(1)
url = f"https://api.pepy.tech/api/v2/projects/{package_name}"
headers = {"X-API-Key": api_key}
try:
response = requests.get(url, headers=headers)
response.raise_for_status() # Raise an exception for HTTP errors
data = response.json()
total_downloads = data.get("total_downloads", 0)
return total_downloads
except requests.exceptions.RequestException as e:
print(f"Error fetching download statistics from pepy.tech: {e}", file=sys.stderr)
sys.exit(1)
def get_github_stars(repo="paul-gauthier/aider"):
"""
Fetch the number of GitHub stars for a repository
"""
url = f"https://api.github.com/repos/{repo}"
headers = {"Accept": "application/vnd.github.v3+json"}
try:
response = requests.get(url, headers=headers)
response.raise_for_status() # Raise an exception for HTTP errors
data = response.json()
stars = data.get("stargazers_count", 0)
return stars
except requests.exceptions.RequestException as e:
print(f"Error fetching GitHub stars: {e}", file=sys.stderr)
return None
def get_latest_release_aider_percentage():
"""
Get the percentage of code written by Aider in the LATEST release
from the blame.yml file
"""
blame_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"aider",
"website",
"_data",
"blame.yml",
)
try:
with open(blame_path, "r") as f:
blame_data = yaml.safe_load(f)
if not blame_data or len(blame_data) == 0:
return 0, "unknown"
# Find the latest release by parsing version numbers
latest_version = None
latest_release = None
for release in blame_data:
version_tag = release.get("end_tag", "")
if not version_tag.startswith("v"):
continue
# Parse version like "v0.77.0" into a tuple (0, 77, 0)
try:
version_parts = tuple(int(part) for part in version_tag[1:].split("."))
if latest_version is None or version_parts > latest_version:
latest_version = version_parts
latest_release = release
except ValueError:
# Skip if version can't be parsed as integers
continue
if latest_release:
percentage = latest_release.get("aider_percentage", 0)
version = latest_release.get("end_tag", "unknown")
return percentage, version
return 0, "unknown"
except Exception as e:
print(f"Error reading blame data: {e}", file=sys.stderr)
return 0, "unknown"
def format_number(number):
"""
Format a large number with K, M, B suffixes with 1 decimal place
"""
if number is None:
return "0"
if number >= 1_000_000_000:
return f"{number / 1_000_000_000:.1f}B"
elif number >= 1_000_000:
return f"{number / 1_000_000:.1f}M"
elif number >= 1_000:
return f"{number / 1_000:.1f}K"
else:
return str(number)
def generate_badges_md(downloads, stars, aider_percentage):
"""
Generate markdown for badges with updated values
"""
# Format downloads to 1 decimal place with M suffix
downloads_formatted = format_number(downloads)
# Round aider percentage to whole number
aider_percent_rounded = round(aider_percentage)
markdown = f""" <a href="https://github.com/Aider-AI/aider/stargazers"><img alt="GitHub Stars" title="{GITHUB_STARS_TOOLTIP}"
src="https://img.shields.io/github/stars/Aider-AI/aider?style=flat-square&logo=github&color=f1c40f&labelColor=555555"/></a>
<a href="https://pypi.org/project/aider-chat/"><img alt="PyPI Downloads" title="{PYPI_DOWNLOADS_TOOLTIP}"
src="https://img.shields.io/badge/📦%20Installs-{downloads_formatted}-2ecc71?style=flat-square&labelColor=555555"/></a>
<img alt="Tokens per week" title="{TOKENS_WEEKLY_TOOLTIP}"
src="https://img.shields.io/badge/📈%20Tokens%2Fweek-{TOKENS_PER_WEEK}-3498db?style=flat-square&labelColor=555555"/>
<a href="https://openrouter.ai/#options-menu"><img alt="OpenRouter Ranking" title="{OPENROUTER_TOOLTIP}"
src="https://img.shields.io/badge/🏆%20OpenRouter-Top%2020-9b59b6?style=flat-square&labelColor=555555"/></a>
<a href="https://aider.chat/HISTORY.html"><img alt="Singularity" title="{SINGULARITY_TOOLTIP}"
src="https://img.shields.io/badge/🔄%20Singularity-{aider_percent_rounded}%25-e74c3c?style=flat-square&labelColor=555555"/></a>""" # noqa
return markdown
def get_badges_md():
"""
Get all statistics and return the generated badges markdown
"""
# Load environment variables from .env file
load_dotenv()
# Check if we should use BigQuery and get credentials path
bigquery_env = os.environ.get("USE_BIGQUERY", "false")
use_bigquery = bigquery_env.lower() in ("true", "1", "yes") or os.path.exists(bigquery_env)
credentials_path = bigquery_env if os.path.exists(bigquery_env) else None
# Get API key from environment variable if not using BigQuery
api_key = None
if not use_bigquery:
api_key = os.environ.get("PEPY_API_KEY")
if not api_key:
print(
(
"API key not provided and BigQuery not enabled. Please set PEPY_API_KEY"
" environment variable"
),
file=sys.stderr,
)
sys.exit(1)
# Get PyPI downloads for the default package
total_downloads = get_total_downloads(api_key, "aider-chat", use_bigquery, credentials_path)
# Get GitHub stars for the default repo
stars = get_github_stars("paul-gauthier/aider")
# Get Aider contribution percentage in latest release
percentage, _ = get_latest_release_aider_percentage()
# Generate and return badges markdown
return generate_badges_md(total_downloads, stars, percentage)
def get_badges_html():
"""
Get all statistics and return HTML-formatted badges
"""
# Load environment variables from .env file
load_dotenv()
# Check if we should use BigQuery and get credentials path
bigquery_env = os.environ.get("USE_BIGQUERY", "false")
use_bigquery = bigquery_env.lower() in ("true", "1", "yes") or os.path.exists(bigquery_env)
credentials_path = bigquery_env if os.path.exists(bigquery_env) else None
# Get API key from environment variable if not using BigQuery
api_key = None
if not use_bigquery:
api_key = os.environ.get("PEPY_API_KEY")
if not api_key:
print(
(
"API key not provided and BigQuery not enabled. Please set PEPY_API_KEY"
" environment variable"
),
file=sys.stderr,
)
sys.exit(1)
# Get PyPI downloads for the default package
total_downloads = get_total_downloads(api_key, "aider-chat", use_bigquery, credentials_path)
# Get GitHub stars for the default repo
stars = get_github_stars("paul-gauthier/aider")
# Get Aider contribution percentage in latest release
percentage, _ = get_latest_release_aider_percentage()
# Format values
downloads_formatted = format_number(total_downloads)
# Stars should be rounded to whole numbers
if stars is None:
stars_formatted = "0"
elif stars >= 1_000_000_000:
stars_formatted = f"{round(stars / 1_000_000_000)}B"
elif stars >= 1_000_000:
stars_formatted = f"{round(stars / 1_000_000)}M"
elif stars >= 1_000:
stars_formatted = f"{round(stars / 1_000)}K"
else:
stars_formatted = str(int(round(stars)))
aider_percent_rounded = round(percentage)
# Generate HTML badges
html = f"""<a href="https://github.com/Aider-AI/aider" class="github-badge badge-stars" title="{GITHUB_STARS_TOOLTIP}">
<span class="badge-label">⭐ GitHub Stars</span>
<span class="badge-value">{stars_formatted}</span>
</a>
<a href="https://pypi.org/project/aider-chat/" class="github-badge badge-installs" title="{PYPI_DOWNLOADS_TOOLTIP}">
<span class="badge-label">📦 Installs</span>
<span class="badge-value">{downloads_formatted}</span>
</a>
<div class="github-badge badge-tokens" title="{TOKENS_WEEKLY_TOOLTIP}">
<span class="badge-label">📈 Tokens/week</span>
<span class="badge-value">{TOKENS_PER_WEEK}</span>
</div>
<a href="https://openrouter.ai/#options-menu" class="github-badge badge-router" title="{OPENROUTER_TOOLTIP}">
<span class="badge-label">🏆 OpenRouter</span>
<span class="badge-value">Top 20</span>
</a>
<a href="/HISTORY.html" class="github-badge badge-coded" title="{SINGULARITY_TOOLTIP}">
<span class="badge-label">🔄 Singularity</span>
<span class="badge-value">{aider_percent_rounded}%</span>
</a>""" # noqa
return html
def get_testimonials_js():
"""
Extract testimonials from README.md and format them as JavaScript array
"""
# Path to README.md, relative to this script
readme_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "README.md"
)
testimonials = []
in_testimonials_section = False
try:
with open(readme_path, "r", encoding="utf-8") as f:
lines = f.readlines()
# Find the testimonials section
for i, line in enumerate(lines):
if line.strip() == "## Kind Words From Users":
in_testimonials_section = True
# Start processing from the next line
start_idx = i + 1
break
# If we found the section
if in_testimonials_section:
for i in range(start_idx, len(lines)):
line = lines[i]
# If we've hit another section, stop
if line.startswith("##"):
break
# Process testimonial lines
if line.strip().startswith('- *"'):
try:
# Get the full line
full_line = line.strip()
# Extract the quote text between *" and "*
if '*"' in full_line and '"*' in full_line:
quote_parts = full_line.split('*"')
if len(quote_parts) > 1:
quote_text = quote_parts[1].split('"*')[0].strip()
# Default values
author = "Anonymous"
link = ""
# Try to extract author and link if they exist
# Check for the em dash format first: "— [author](link)"
if "— [" in full_line and "](" in full_line:
author_parts = full_line.split("— [")
if len(author_parts) > 1:
author = author_parts[1].split("]")[0].strip()
# Extract the link if it exists
link_parts = full_line.split("](")
if len(link_parts) > 1:
link = link_parts[1].split(")")[0].strip()
# Check for regular dash format: "- [author](link)"
elif " - [" in full_line and "](" in full_line:
author_parts = full_line.split(" - [")
if len(author_parts) > 1:
author = author_parts[1].split("]")[0].strip()
# Extract the link if it exists
link_parts = full_line.split("](")
if len(link_parts) > 1:
link = link_parts[1].split(")")[0].strip()
# Check for em dash without link: "— author"
elif "" in full_line:
# Format without a link, just plain text author
author_parts = full_line.split("")
if len(author_parts) > 1:
author = author_parts[1].strip()
# Check for regular dash without link: "- author"
elif " - " in full_line:
# Format without a link, just plain text author
author_parts = full_line.split(" - ")
if len(author_parts) > 1:
author = author_parts[1].strip()
testimonials.append(
{"text": quote_text, "author": author, "link": link}
)
except Exception as e:
print(
f"Error parsing testimonial line: {line}. Error: {e}",
file=sys.stderr,
)
continue
# Format as JavaScript array with script tags
if not testimonials:
print("No testimonials found in README.md", file=sys.stderr)
return "<script>\nconst testimonials = [];\n</script>"
js_array = "<script>\nconst testimonials = [\n"
for i, t in enumerate(testimonials):
js_array += " {\n"
js_array += f" text: \"{t['text']}\",\n"
js_array += f" author: \"{t['author']}\",\n"
js_array += f" link: \"{t['link']}\"\n"
js_array += " }"
if i < len(testimonials) - 1:
js_array += ","
js_array += "\n"
js_array += "];\n</script>"
return js_array
except Exception as e:
print(f"Error reading testimonials from README: {e}", file=sys.stderr)
# Return empty array as fallback
return "<script>\nconst testimonials = [];\n</script>"
def main():
# Load environment variables from .env file
load_dotenv()
# Ensure cache directory exists
ensure_cache_dir()
parser = argparse.ArgumentParser(description="Get total downloads and GitHub stars for aider")
parser.add_argument(
"--api-key",
help=(
"pepy.tech API key (can also be set via PEPY_API_KEY in .env file or environment"
" variable)"
),
)
parser.add_argument(
"--package", default="aider-chat", help="Package name (default: aider-chat)"
)
parser.add_argument(
"--github-repo",
default="paul-gauthier/aider",
help="GitHub repository (default: paul-gauthier/aider)",
)
parser.add_argument("--markdown", action="store_true", help="Generate markdown badges block")
parser.add_argument(
"--use-bigquery",
action="store_true",
help="Use BigQuery to fetch download statistics instead of pepy.tech",
)
parser.add_argument(
"--credentials-path", help="Path to Google Cloud service account credentials JSON file"
)
args = parser.parse_args()
# Determine whether to use BigQuery and get credentials path
bigquery_env = os.environ.get("USE_BIGQUERY", "false")
use_bigquery = (
args.use_bigquery
or bigquery_env.lower() in ("true", "1", "yes")
or os.path.exists(bigquery_env)
)
credentials_path = args.credentials_path or (
bigquery_env if os.path.exists(bigquery_env) else None
)
# Check for required parameters
api_key = None
if not use_bigquery:
# Get API key from args or environment variable
api_key = args.api_key or os.environ.get("PEPY_API_KEY")
if not api_key:
print(
(
"API key not provided and BigQuery not enabled. Please set PEPY_API_KEY"
" environment variable, use --api-key, or enable BigQuery with --use-bigquery"
),
file=sys.stderr,
)
sys.exit(1)
elif use_bigquery and not credentials_path and not args.credentials_path:
print(
(
"BigQuery enabled but no credentials provided. Please set"
" USE_BIGQUERY to path of credentials file or use --credentials-path"
),
file=sys.stderr,
)
# Continue execution - BigQuery might work without explicit credentials in some environments
# Get PyPI downloads
total_downloads = get_total_downloads(api_key, args.package, use_bigquery, credentials_path)
print(f"Total downloads for {args.package}: {total_downloads:,}")
# Get GitHub stars
stars = get_github_stars(args.github_repo)
if stars is not None:
print(f"GitHub stars for {args.github_repo}: {stars:,}")
# Get Aider contribution percentage in latest release
percentage, version = get_latest_release_aider_percentage()
print(f"Aider wrote {percentage:.2f}% of code in the LATEST release ({version})")
# Get testimonials JavaScript
testimonials_js = get_testimonials_js()
print("\nTestimonials JavaScript:")
print(testimonials_js)
if __name__ == "__main__":
main()