diff --git a/aider/commands.py b/aider/commands.py index ee2ac7632..68938e025 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -4,7 +4,7 @@ import re import subprocess import sys import tempfile -from collections import OrderedDict +from collections import OrderedDict, defaultdict from os.path import expanduser from pathlib import Path @@ -23,6 +23,7 @@ from aider.repo import ANY_GIT_ERROR from aider.run_cmd import run_cmd from aider.scrape import Scraper, install_playwright from aider.utils import is_image_file +from aider.stats import hash_len, get_all_commit_hashes_between_tags, get_commit_authors, get_counts_for_file from .dump import dump # noqa: F401 @@ -1520,6 +1521,9 @@ class Commands: self.io.tool_error("No main or master branch found. Please specify a revision.") return source_revision, target_revision = args.split("..") if ".." in args else (args, "HEAD") + commits = get_all_commit_hashes_between_tags(source_revision, target_revision) + commits = [commit[:hash_len] for commit in commits] if commits else [] + authors = get_commit_authors(commits) # Get files changed between revisions diff_files = self.coder.repo.repo.git.diff( @@ -1535,49 +1539,29 @@ class Commands: '.ttf', '.otf', '.woff', '.woff2', '.eot' # fonts ))] self.io.tool_output(f"Found {len(files)} non-binary tracked files in the repository.") - - total_lines = 0 - aider_lines = 0 - + + all_file_counts = {} + grand_total = defaultdict(int) + aider_total = 0 for file in files: - try: - # Run git blame for each file - blame_output = self.coder.repo.repo.git.blame( - f"{source_revision}..{target_revision}", "-M", "-C", "--line-porcelain", "--", file - ) - - # Parse blame output - for line in blame_output.split('filename'): - total_lines += 1 - for field in line.split('\n'): - # Check author and committer lines for aider attribution - author_match = False - committer_match = False - if field.startswith("author ") or field.startswith("committer "): - author_match = "(aider)" in field.lower() - committer_match = "(aider)" in field.lower() - if author_match or committer_match: - aider_lines += 1 - - except Exception as e: - if "no such path" not in str(e).lower(): - self.io.tool_error(f"Error processing {file}: {e}") + file_counts = get_counts_for_file(source_revision, target_revision, authors, file) + if file_counts: + all_file_counts[file] = file_counts + for author, count in file_counts.items(): + grand_total[author] += count + if "(aider)" in author.lower(): + aider_total += count + total_lines = sum(grand_total.values()) + aider_percentage = (aider_total / total_lines) * 100 if total_lines > 0 else 0 # Calculate percentages if total_lines > 0: - aider_percentage = (aider_lines / total_lines) * 100 - human_lines = total_lines - aider_lines - human_percentage = (human_lines / total_lines) * 100 + # Output overall statistics + self.io.tool_output(f"\nAnalysis from {source_revision} to {target_revision}:") + self.io.tool_output(f"Total lines analyzed: {total_lines:,}") + self.io.tool_output(f"Lines by aider: {aider_total:,} ({aider_percentage:.1f}%)") + self.io.tool_output(f"Lines by humans: {total_lines - aider_total:,} ({100 - aider_percentage:.1f}%)") - # Display results - self.io.tool_output("\nCode contribution statistics:") - self.io.tool_output(f"Total lines of code: {total_lines:,}") - self.io.tool_output( - f"Human-written code: {human_lines:,} lines ({human_percentage:.1f}%)" - ) - self.io.tool_output( - f"Aider-written code: {aider_lines:,} lines ({aider_percentage:.1f}%)" - ) else: self.io.tool_output("No lines of code found in the repository.") diff --git a/aider/stats.py b/aider/stats.py new file mode 100644 index 000000000..457459782 --- /dev/null +++ b/aider/stats.py @@ -0,0 +1,84 @@ +import subprocess +import sys + +from collections import defaultdict + +hash_len = len("44e6fefc2") + +def run(cmd): + # Get all commit hashes since the specified tag + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + return result.stdout + +def get_all_commit_hashes_between_tags(start_tag, end_tag=None): + if end_tag: + res = run(["git", "rev-list", f"{start_tag}..{end_tag}"]) + else: + res = run(["git", "rev-list", f"{start_tag}..HEAD"]) + + if res: + commit_hashes = res.strip().split("\n") + return commit_hashes + +def get_commit_authors(commits): + commit_to_author = dict() + for commit in commits: + author = run(["git", "show", "-s", "--format=%an", commit]).strip() + commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip() + if commit_message.lower().startswith("aider:"): + author += " (aider)" + commit_to_author[commit] = author + return commit_to_author + +def get_counts_for_file(start_tag, end_tag, authors, fname): + try: + if end_tag: + text = run( + [ + "git", + "blame", + "-M100", # Detect moved lines within a file with 100% similarity + "-C100", # Detect moves across files with 100% similarity + "-C", # Increase detection effort + "-C", # Increase detection effort even more + "--abbrev=9", + f"{start_tag}..{end_tag}", + "--", + fname, + ] + ) + else: + text = run( + [ + "git", + "blame", + "-M100", # Detect moved lines within a file with 100% similarity + "-C100", # Detect moves across files with 100% similarity + "-C", # Increase detection effort + "-C", # Increase detection effort even more + "--abbrev=9", + f"{start_tag}..HEAD", + "--", + fname, + ] + ) + if not text: + return None + text = text.splitlines() + line_counts = defaultdict(int) + for line in text: + if line.startswith("^"): + continue + hsh = line[:hash_len] + author = authors.get(hsh, "Unknown") + line_counts[author] += 1 + + return dict(line_counts) + except subprocess.CalledProcessError as e: + if "no such path" in str(e).lower(): + # File doesn't exist in this revision range, which is okay + return None + else: + # Some other error occurred + print(f"Warning: Unable to blame file {fname}. Error: {e}", file=sys.stderr) + return None \ No newline at end of file diff --git a/scripts/blame.py b/scripts/blame.py index 40a561d8a..fa67c28c1 100755 --- a/scripts/blame.py +++ b/scripts/blame.py @@ -2,8 +2,6 @@ import argparse import os -import subprocess -import sys from collections import defaultdict from datetime import datetime from operator import itemgetter @@ -12,6 +10,8 @@ import semver import yaml from tqdm import tqdm +from aider.stats import get_all_commit_hashes_between_tags, run, hash_len, get_commit_authors, get_counts_for_file + website_files = [ "aider/website/index.html", "aider/website/share/index.md", @@ -68,36 +68,6 @@ def blame(start_tag, end_tag=None): return all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date -def get_all_commit_hashes_between_tags(start_tag, end_tag=None): - if end_tag: - res = run(["git", "rev-list", f"{start_tag}..{end_tag}"]) - else: - res = run(["git", "rev-list", f"{start_tag}..HEAD"]) - - if res: - commit_hashes = res.strip().split("\n") - return commit_hashes - - -def run(cmd): - # Get all commit hashes since the specified tag - result = subprocess.run(cmd, capture_output=True, text=True, check=True) - return result.stdout - - -def get_commit_authors(commits): - commit_to_author = dict() - for commit in commits: - author = run(["git", "show", "-s", "--format=%an", commit]).strip() - commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip() - if commit_message.lower().startswith("aider:"): - author += " (aider)" - commit_to_author[commit] = author - return commit_to_author - - -hash_len = len("44e6fefc2") - def process_all_tags_since(start_tag): tags = get_all_tags_since(start_tag)