refactor: Extract code contribution stats logic to separate module

This commit is contained in:
josix 2025-01-17 01:27:49 +08:00
parent 3d4fb68172
commit 3b40edec93
No known key found for this signature in database
GPG key ID: EF4F614562B02881
3 changed files with 109 additions and 71 deletions

View file

@ -4,7 +4,7 @@ import re
import subprocess
import sys
import tempfile
from collections import OrderedDict
from collections import OrderedDict, defaultdict
from os.path import expanduser
from pathlib import Path
@ -23,6 +23,7 @@ from aider.repo import ANY_GIT_ERROR
from aider.run_cmd import run_cmd
from aider.scrape import Scraper, install_playwright
from aider.utils import is_image_file
from aider.stats import hash_len, get_all_commit_hashes_between_tags, get_commit_authors, get_counts_for_file
from .dump import dump # noqa: F401
@ -1520,6 +1521,9 @@ class Commands:
self.io.tool_error("No main or master branch found. Please specify a revision.")
return
source_revision, target_revision = args.split("..") if ".." in args else (args, "HEAD")
commits = get_all_commit_hashes_between_tags(source_revision, target_revision)
commits = [commit[:hash_len] for commit in commits] if commits else []
authors = get_commit_authors(commits)
# Get files changed between revisions
diff_files = self.coder.repo.repo.git.diff(
@ -1536,48 +1540,28 @@ class Commands:
))]
self.io.tool_output(f"Found {len(files)} non-binary tracked files in the repository.")
total_lines = 0
aider_lines = 0
all_file_counts = {}
grand_total = defaultdict(int)
aider_total = 0
for file in files:
try:
# Run git blame for each file
blame_output = self.coder.repo.repo.git.blame(
f"{source_revision}..{target_revision}", "-M", "-C", "--line-porcelain", "--", file
)
# Parse blame output
for line in blame_output.split('filename'):
total_lines += 1
for field in line.split('\n'):
# Check author and committer lines for aider attribution
author_match = False
committer_match = False
if field.startswith("author ") or field.startswith("committer "):
author_match = "(aider)" in field.lower()
committer_match = "(aider)" in field.lower()
if author_match or committer_match:
aider_lines += 1
except Exception as e:
if "no such path" not in str(e).lower():
self.io.tool_error(f"Error processing {file}: {e}")
file_counts = get_counts_for_file(source_revision, target_revision, authors, file)
if file_counts:
all_file_counts[file] = file_counts
for author, count in file_counts.items():
grand_total[author] += count
if "(aider)" in author.lower():
aider_total += count
total_lines = sum(grand_total.values())
aider_percentage = (aider_total / total_lines) * 100 if total_lines > 0 else 0
# Calculate percentages
if total_lines > 0:
aider_percentage = (aider_lines / total_lines) * 100
human_lines = total_lines - aider_lines
human_percentage = (human_lines / total_lines) * 100
# Output overall statistics
self.io.tool_output(f"\nAnalysis from {source_revision} to {target_revision}:")
self.io.tool_output(f"Total lines analyzed: {total_lines:,}")
self.io.tool_output(f"Lines by aider: {aider_total:,} ({aider_percentage:.1f}%)")
self.io.tool_output(f"Lines by humans: {total_lines - aider_total:,} ({100 - aider_percentage:.1f}%)")
# Display results
self.io.tool_output("\nCode contribution statistics:")
self.io.tool_output(f"Total lines of code: {total_lines:,}")
self.io.tool_output(
f"Human-written code: {human_lines:,} lines ({human_percentage:.1f}%)"
)
self.io.tool_output(
f"Aider-written code: {aider_lines:,} lines ({aider_percentage:.1f}%)"
)
else:
self.io.tool_output("No lines of code found in the repository.")

84
aider/stats.py Normal file
View file

@ -0,0 +1,84 @@
import subprocess
import sys
from collections import defaultdict
hash_len = len("44e6fefc2")
def run(cmd):
# Get all commit hashes since the specified tag
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return result.stdout
def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
if end_tag:
res = run(["git", "rev-list", f"{start_tag}..{end_tag}"])
else:
res = run(["git", "rev-list", f"{start_tag}..HEAD"])
if res:
commit_hashes = res.strip().split("\n")
return commit_hashes
def get_commit_authors(commits):
commit_to_author = dict()
for commit in commits:
author = run(["git", "show", "-s", "--format=%an", commit]).strip()
commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip()
if commit_message.lower().startswith("aider:"):
author += " (aider)"
commit_to_author[commit] = author
return commit_to_author
def get_counts_for_file(start_tag, end_tag, authors, fname):
try:
if end_tag:
text = run(
[
"git",
"blame",
"-M100", # Detect moved lines within a file with 100% similarity
"-C100", # Detect moves across files with 100% similarity
"-C", # Increase detection effort
"-C", # Increase detection effort even more
"--abbrev=9",
f"{start_tag}..{end_tag}",
"--",
fname,
]
)
else:
text = run(
[
"git",
"blame",
"-M100", # Detect moved lines within a file with 100% similarity
"-C100", # Detect moves across files with 100% similarity
"-C", # Increase detection effort
"-C", # Increase detection effort even more
"--abbrev=9",
f"{start_tag}..HEAD",
"--",
fname,
]
)
if not text:
return None
text = text.splitlines()
line_counts = defaultdict(int)
for line in text:
if line.startswith("^"):
continue
hsh = line[:hash_len]
author = authors.get(hsh, "Unknown")
line_counts[author] += 1
return dict(line_counts)
except subprocess.CalledProcessError as e:
if "no such path" in str(e).lower():
# File doesn't exist in this revision range, which is okay
return None
else:
# Some other error occurred
print(f"Warning: Unable to blame file {fname}. Error: {e}", file=sys.stderr)
return None

View file

@ -2,8 +2,6 @@
import argparse
import os
import subprocess
import sys
from collections import defaultdict
from datetime import datetime
from operator import itemgetter
@ -12,6 +10,8 @@ import semver
import yaml
from tqdm import tqdm
from aider.stats import get_all_commit_hashes_between_tags, run, hash_len, get_commit_authors, get_counts_for_file
website_files = [
"aider/website/index.html",
"aider/website/share/index.md",
@ -68,36 +68,6 @@ def blame(start_tag, end_tag=None):
return all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date
def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
if end_tag:
res = run(["git", "rev-list", f"{start_tag}..{end_tag}"])
else:
res = run(["git", "rev-list", f"{start_tag}..HEAD"])
if res:
commit_hashes = res.strip().split("\n")
return commit_hashes
def run(cmd):
# Get all commit hashes since the specified tag
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return result.stdout
def get_commit_authors(commits):
commit_to_author = dict()
for commit in commits:
author = run(["git", "show", "-s", "--format=%an", commit]).strip()
commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip()
if commit_message.lower().startswith("aider:"):
author += " (aider)"
commit_to_author[commit] = author
return commit_to_author
hash_len = len("44e6fefc2")
def process_all_tags_since(start_tag):
tags = get_all_tags_since(start_tag)