mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-29 16:54:59 +00:00
refactor: Extract code contribution stats logic to separate module
This commit is contained in:
parent
3d4fb68172
commit
3b40edec93
3 changed files with 109 additions and 71 deletions
|
@ -4,7 +4,7 @@ import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict, defaultdict
|
||||||
from os.path import expanduser
|
from os.path import expanduser
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
@ -23,6 +23,7 @@ from aider.repo import ANY_GIT_ERROR
|
||||||
from aider.run_cmd import run_cmd
|
from aider.run_cmd import run_cmd
|
||||||
from aider.scrape import Scraper, install_playwright
|
from aider.scrape import Scraper, install_playwright
|
||||||
from aider.utils import is_image_file
|
from aider.utils import is_image_file
|
||||||
|
from aider.stats import hash_len, get_all_commit_hashes_between_tags, get_commit_authors, get_counts_for_file
|
||||||
|
|
||||||
from .dump import dump # noqa: F401
|
from .dump import dump # noqa: F401
|
||||||
|
|
||||||
|
@ -1520,6 +1521,9 @@ class Commands:
|
||||||
self.io.tool_error("No main or master branch found. Please specify a revision.")
|
self.io.tool_error("No main or master branch found. Please specify a revision.")
|
||||||
return
|
return
|
||||||
source_revision, target_revision = args.split("..") if ".." in args else (args, "HEAD")
|
source_revision, target_revision = args.split("..") if ".." in args else (args, "HEAD")
|
||||||
|
commits = get_all_commit_hashes_between_tags(source_revision, target_revision)
|
||||||
|
commits = [commit[:hash_len] for commit in commits] if commits else []
|
||||||
|
authors = get_commit_authors(commits)
|
||||||
|
|
||||||
# Get files changed between revisions
|
# Get files changed between revisions
|
||||||
diff_files = self.coder.repo.repo.git.diff(
|
diff_files = self.coder.repo.repo.git.diff(
|
||||||
|
@ -1535,49 +1539,29 @@ class Commands:
|
||||||
'.ttf', '.otf', '.woff', '.woff2', '.eot' # fonts
|
'.ttf', '.otf', '.woff', '.woff2', '.eot' # fonts
|
||||||
))]
|
))]
|
||||||
self.io.tool_output(f"Found {len(files)} non-binary tracked files in the repository.")
|
self.io.tool_output(f"Found {len(files)} non-binary tracked files in the repository.")
|
||||||
|
|
||||||
total_lines = 0
|
all_file_counts = {}
|
||||||
aider_lines = 0
|
grand_total = defaultdict(int)
|
||||||
|
aider_total = 0
|
||||||
for file in files:
|
for file in files:
|
||||||
try:
|
file_counts = get_counts_for_file(source_revision, target_revision, authors, file)
|
||||||
# Run git blame for each file
|
if file_counts:
|
||||||
blame_output = self.coder.repo.repo.git.blame(
|
all_file_counts[file] = file_counts
|
||||||
f"{source_revision}..{target_revision}", "-M", "-C", "--line-porcelain", "--", file
|
for author, count in file_counts.items():
|
||||||
)
|
grand_total[author] += count
|
||||||
|
if "(aider)" in author.lower():
|
||||||
# Parse blame output
|
aider_total += count
|
||||||
for line in blame_output.split('filename'):
|
total_lines = sum(grand_total.values())
|
||||||
total_lines += 1
|
aider_percentage = (aider_total / total_lines) * 100 if total_lines > 0 else 0
|
||||||
for field in line.split('\n'):
|
|
||||||
# Check author and committer lines for aider attribution
|
|
||||||
author_match = False
|
|
||||||
committer_match = False
|
|
||||||
if field.startswith("author ") or field.startswith("committer "):
|
|
||||||
author_match = "(aider)" in field.lower()
|
|
||||||
committer_match = "(aider)" in field.lower()
|
|
||||||
if author_match or committer_match:
|
|
||||||
aider_lines += 1
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
if "no such path" not in str(e).lower():
|
|
||||||
self.io.tool_error(f"Error processing {file}: {e}")
|
|
||||||
|
|
||||||
# Calculate percentages
|
# Calculate percentages
|
||||||
if total_lines > 0:
|
if total_lines > 0:
|
||||||
aider_percentage = (aider_lines / total_lines) * 100
|
# Output overall statistics
|
||||||
human_lines = total_lines - aider_lines
|
self.io.tool_output(f"\nAnalysis from {source_revision} to {target_revision}:")
|
||||||
human_percentage = (human_lines / total_lines) * 100
|
self.io.tool_output(f"Total lines analyzed: {total_lines:,}")
|
||||||
|
self.io.tool_output(f"Lines by aider: {aider_total:,} ({aider_percentage:.1f}%)")
|
||||||
|
self.io.tool_output(f"Lines by humans: {total_lines - aider_total:,} ({100 - aider_percentage:.1f}%)")
|
||||||
|
|
||||||
# Display results
|
|
||||||
self.io.tool_output("\nCode contribution statistics:")
|
|
||||||
self.io.tool_output(f"Total lines of code: {total_lines:,}")
|
|
||||||
self.io.tool_output(
|
|
||||||
f"Human-written code: {human_lines:,} lines ({human_percentage:.1f}%)"
|
|
||||||
)
|
|
||||||
self.io.tool_output(
|
|
||||||
f"Aider-written code: {aider_lines:,} lines ({aider_percentage:.1f}%)"
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
self.io.tool_output("No lines of code found in the repository.")
|
self.io.tool_output("No lines of code found in the repository.")
|
||||||
|
|
||||||
|
|
84
aider/stats.py
Normal file
84
aider/stats.py
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
hash_len = len("44e6fefc2")
|
||||||
|
|
||||||
|
def run(cmd):
|
||||||
|
# Get all commit hashes since the specified tag
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||||
|
return result.stdout
|
||||||
|
|
||||||
|
def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
|
||||||
|
if end_tag:
|
||||||
|
res = run(["git", "rev-list", f"{start_tag}..{end_tag}"])
|
||||||
|
else:
|
||||||
|
res = run(["git", "rev-list", f"{start_tag}..HEAD"])
|
||||||
|
|
||||||
|
if res:
|
||||||
|
commit_hashes = res.strip().split("\n")
|
||||||
|
return commit_hashes
|
||||||
|
|
||||||
|
def get_commit_authors(commits):
|
||||||
|
commit_to_author = dict()
|
||||||
|
for commit in commits:
|
||||||
|
author = run(["git", "show", "-s", "--format=%an", commit]).strip()
|
||||||
|
commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip()
|
||||||
|
if commit_message.lower().startswith("aider:"):
|
||||||
|
author += " (aider)"
|
||||||
|
commit_to_author[commit] = author
|
||||||
|
return commit_to_author
|
||||||
|
|
||||||
|
def get_counts_for_file(start_tag, end_tag, authors, fname):
|
||||||
|
try:
|
||||||
|
if end_tag:
|
||||||
|
text = run(
|
||||||
|
[
|
||||||
|
"git",
|
||||||
|
"blame",
|
||||||
|
"-M100", # Detect moved lines within a file with 100% similarity
|
||||||
|
"-C100", # Detect moves across files with 100% similarity
|
||||||
|
"-C", # Increase detection effort
|
||||||
|
"-C", # Increase detection effort even more
|
||||||
|
"--abbrev=9",
|
||||||
|
f"{start_tag}..{end_tag}",
|
||||||
|
"--",
|
||||||
|
fname,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
text = run(
|
||||||
|
[
|
||||||
|
"git",
|
||||||
|
"blame",
|
||||||
|
"-M100", # Detect moved lines within a file with 100% similarity
|
||||||
|
"-C100", # Detect moves across files with 100% similarity
|
||||||
|
"-C", # Increase detection effort
|
||||||
|
"-C", # Increase detection effort even more
|
||||||
|
"--abbrev=9",
|
||||||
|
f"{start_tag}..HEAD",
|
||||||
|
"--",
|
||||||
|
fname,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
text = text.splitlines()
|
||||||
|
line_counts = defaultdict(int)
|
||||||
|
for line in text:
|
||||||
|
if line.startswith("^"):
|
||||||
|
continue
|
||||||
|
hsh = line[:hash_len]
|
||||||
|
author = authors.get(hsh, "Unknown")
|
||||||
|
line_counts[author] += 1
|
||||||
|
|
||||||
|
return dict(line_counts)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
if "no such path" in str(e).lower():
|
||||||
|
# File doesn't exist in this revision range, which is okay
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
# Some other error occurred
|
||||||
|
print(f"Warning: Unable to blame file {fname}. Error: {e}", file=sys.stderr)
|
||||||
|
return None
|
|
@ -2,8 +2,6 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
@ -12,6 +10,8 @@ import semver
|
||||||
import yaml
|
import yaml
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from aider.stats import get_all_commit_hashes_between_tags, run, hash_len, get_commit_authors, get_counts_for_file
|
||||||
|
|
||||||
website_files = [
|
website_files = [
|
||||||
"aider/website/index.html",
|
"aider/website/index.html",
|
||||||
"aider/website/share/index.md",
|
"aider/website/share/index.md",
|
||||||
|
@ -68,36 +68,6 @@ def blame(start_tag, end_tag=None):
|
||||||
return all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date
|
return all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date
|
||||||
|
|
||||||
|
|
||||||
def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
|
|
||||||
if end_tag:
|
|
||||||
res = run(["git", "rev-list", f"{start_tag}..{end_tag}"])
|
|
||||||
else:
|
|
||||||
res = run(["git", "rev-list", f"{start_tag}..HEAD"])
|
|
||||||
|
|
||||||
if res:
|
|
||||||
commit_hashes = res.strip().split("\n")
|
|
||||||
return commit_hashes
|
|
||||||
|
|
||||||
|
|
||||||
def run(cmd):
|
|
||||||
# Get all commit hashes since the specified tag
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
||||||
return result.stdout
|
|
||||||
|
|
||||||
|
|
||||||
def get_commit_authors(commits):
|
|
||||||
commit_to_author = dict()
|
|
||||||
for commit in commits:
|
|
||||||
author = run(["git", "show", "-s", "--format=%an", commit]).strip()
|
|
||||||
commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip()
|
|
||||||
if commit_message.lower().startswith("aider:"):
|
|
||||||
author += " (aider)"
|
|
||||||
commit_to_author[commit] = author
|
|
||||||
return commit_to_author
|
|
||||||
|
|
||||||
|
|
||||||
hash_len = len("44e6fefc2")
|
|
||||||
|
|
||||||
|
|
||||||
def process_all_tags_since(start_tag):
|
def process_all_tags_since(start_tag):
|
||||||
tags = get_all_tags_since(start_tag)
|
tags = get_all_tags_since(start_tag)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue