diff --git a/aider/args.py b/aider/args.py index 08c9bde76..390fcd341 100644 --- a/aider/args.py +++ b/aider/args.py @@ -553,6 +553,13 @@ def get_parser(default_config_files, git_root): help="Run tests, fix problems found and then exit", default=False, ) + group.add_argument( + "--stats", + metavar="REVISIONS", + nargs="?", + const="HEAD", + help="Show code changes statistics between revisions", + ) ########## group = parser.add_argument_group("Analytics") diff --git a/aider/commands.py b/aider/commands.py index aaf6d7ddd..c44eb6f0c 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -4,7 +4,7 @@ import re import subprocess import sys import tempfile -from collections import OrderedDict +from collections import OrderedDict, defaultdict from os.path import expanduser from pathlib import Path @@ -23,6 +23,7 @@ from aider.repo import ANY_GIT_ERROR from aider.run_cmd import run_cmd from aider.scrape import Scraper, install_playwright from aider.utils import is_image_file +from aider.stats import hash_len, get_all_commit_hashes_between_tags, get_commit_authors, get_counts_for_file from .dump import dump # noqa: F401 @@ -1484,6 +1485,95 @@ class Commands: "Toggle multiline mode (swaps behavior of Enter and Meta+Enter)" self.io.toggle_multiline_mode() + def cmd_stats(self, args): + """Show statistics about code changes and aider's contributions by counting lines of code through git blame. + + Usage: + /stats Compare against main/master branch + /stats Compare against specific revision + /stats rev1..rev2 Compare between two specific revisions + + Examples: + /stats Show stats vs main/master branch + /stats HEAD~5 Show stats vs 5 commits ago + /stats v1.0.0 Show stats vs version 1.0.0 + /stats main..HEAD Show stats between main and current HEAD + + Lines are attributed to aider when the git author or committer contains "(aider)". + Binary files (images, audio, etc.) are excluded from the analysis. + """ + if not self.coder.repo: + self.io.tool_error("No git repository found.") + return + + try: + # Get the revision range + if not args: + # Default to comparing against main/master branch + for default_branch in ["main", "master"]: + try: + self.coder.repo.repo.rev_parse(default_branch) + args = default_branch + break + except: + continue + if not args: + self.io.tool_error("No main or master branch found. Please specify a revision.") + return + source_revision, target_revision = args.split("..") if ".." in args else (args, "HEAD") + commits = get_all_commit_hashes_between_tags(source_revision, target_revision) + commits = [commit[:hash_len] for commit in commits] if commits else [] + if not commits: + self.io.tool_error( + f"There are no commits between the specified revisions from {source_revision} to {target_revision}." + ) + return + authors = get_commit_authors(commits) + + # Get files changed between revisions + diff_files = self.coder.repo.repo.git.diff( + "--name-only", f"{source_revision}..{target_revision}" + ).splitlines() + # Filter out media files + files = [f for f in diff_files if not any(f.lower().endswith(ext) for ext in ( + '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.ico', '.svg', # images + '.mp3', '.wav', '.ogg', '.m4a', '.flac', # audio + '.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm', # video + '.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', # documents + '.zip', '.tar', '.gz', '.7z', '.rar', # archives + '.ttf', '.otf', '.woff', '.woff2', '.eot' # fonts + ))] + self.io.tool_output(f"Found {len(files)} non-binary tracked files in the repository.") + + all_file_counts = {} + grand_total = defaultdict(int) + aider_total = 0 + for file in files: + file_counts = get_counts_for_file(source_revision, target_revision, authors, file) + if file_counts: + all_file_counts[file] = file_counts + for author, count in file_counts.items(): + grand_total[author] += count + if "(aider)" in author.lower(): + aider_total += count + total_lines = sum(grand_total.values()) + aider_percentage = (aider_total / total_lines) * 100 if total_lines > 0 else 0 + + # Calculate percentages + if total_lines > 0: + # Output overall statistics + self.io.tool_output(f"\nAnalysis from {source_revision} to {target_revision}:") + self.io.tool_output(f"Total lines analyzed: {total_lines:,}") + self.io.tool_output(f"Lines by aider: {aider_total:,} ({aider_percentage:.1f}%)") + self.io.tool_output(f"Lines by humans: {total_lines - aider_total:,} ({100 - aider_percentage:.1f}%)") + + else: + self.io.tool_output("No lines of code found in the repository.") + + except Exception as e: + self.io.tool_error(f"Error analyzing aider statistics: {e}") + + def cmd_copy(self, args): "Copy the last assistant message to the clipboard" all_messages = self.coder.done_messages + self.coder.cur_messages diff --git a/aider/main.py b/aider/main.py index ea344f0ba..8ae9693b9 100644 --- a/aider/main.py +++ b/aider/main.py @@ -1142,6 +1142,11 @@ def main(argv=None, input=None, output=None, force_git_root=None, return_coder=F analytics.event("exit", reason="Completed --message-file") return + if args.stats: + commands.cmd_stats(args.stats) + analytics.event("exit", reason="Completed --stats") + return + if args.exit: analytics.event("exit", reason="Exit flag set") return diff --git a/aider/stats.py b/aider/stats.py new file mode 100644 index 000000000..f63f2b26e --- /dev/null +++ b/aider/stats.py @@ -0,0 +1,121 @@ +import subprocess +import sys + +from collections import defaultdict + +# Length of abbreviated git hash used in blame output +hash_len = len("44e6fefc2") + +def run(cmd): + """Execute a git command and return its output. + + Args: + cmd: List containing the command and its arguments + + Returns: + String output of the command + """ + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + return result.stdout + +def get_all_commit_hashes_between_tags(start_tag, end_tag=None): + """Get all commit hashes between two tags or from a tag to HEAD. + + Args: + start_tag: Starting tag or commit hash + end_tag: Ending tag or commit hash (defaults to HEAD) + + Returns: + List of commit hashes or None if no commits found + """ + if end_tag: + res = run(["git", "rev-list", f"{start_tag}..{end_tag}"]) + else: + res = run(["git", "rev-list", f"{start_tag}..HEAD"]) + + if res: + commit_hashes = res.strip().split("\n") + return commit_hashes + return None + +def get_commit_authors(commits): + """Map commit hashes to their authors, marking aider-generated commits. + + Args: + commits: List of commit hashes + + Returns: + Dictionary mapping commit hashes to author names + """ + commit_to_author = dict() + for commit in commits: + author = run(["git", "show", "-s", "--format=%an", commit]).strip() + commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip() + if commit_message.lower().startswith("aider:"): + author += " (aider)" + commit_to_author[commit] = author + return commit_to_author + +def get_counts_for_file(start_tag, end_tag, authors, fname): + """Count lines attributed to each author in a file using git blame. + + Args: + start_tag: Starting tag or commit hash + end_tag: Ending tag or commit hash (defaults to HEAD) + authors: Dictionary mapping commit hashes to author names + fname: File path to analyze + + Returns: + Dictionary mapping author names to line counts, or None if file not found + """ + try: + if end_tag: + text = run( + [ + "git", + "blame", + "-M100", # Detect moved lines within a file with 100% similarity + "-C100", # Detect moves across files with 100% similarity + "-C", # Increase detection effort + "-C", # Increase detection effort even more + "--abbrev=9", + f"{start_tag}..{end_tag}", + "--", + fname, + ] + ) + else: + text = run( + [ + "git", + "blame", + "-M100", # Detect moved lines within a file with 100% similarity + "-C100", # Detect moves across files with 100% similarity + "-C", # Increase detection effort + "-C", # Increase detection effort even more + "--abbrev=9", + f"{start_tag}..HEAD", + "--", + fname, + ] + ) + if not text: + return None + text = text.splitlines() + line_counts = defaultdict(int) + for line in text: + if line.startswith("^"): + continue + hsh = line[:hash_len] + author = authors.get(hsh, "Unknown") + line_counts[author] += 1 + + return dict(line_counts) + except subprocess.CalledProcessError as e: + if "no such path" in str(e).lower(): + # File doesn't exist in this revision range, which is okay + return None + else: + # Some other error occurred + print(f"Warning: Unable to blame file {fname}. Error: {e}", file=sys.stderr) + return None diff --git a/scripts/blame.py b/scripts/blame.py index 40a561d8a..fa67c28c1 100755 --- a/scripts/blame.py +++ b/scripts/blame.py @@ -2,8 +2,6 @@ import argparse import os -import subprocess -import sys from collections import defaultdict from datetime import datetime from operator import itemgetter @@ -12,6 +10,8 @@ import semver import yaml from tqdm import tqdm +from aider.stats import get_all_commit_hashes_between_tags, run, hash_len, get_commit_authors, get_counts_for_file + website_files = [ "aider/website/index.html", "aider/website/share/index.md", @@ -68,36 +68,6 @@ def blame(start_tag, end_tag=None): return all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date -def get_all_commit_hashes_between_tags(start_tag, end_tag=None): - if end_tag: - res = run(["git", "rev-list", f"{start_tag}..{end_tag}"]) - else: - res = run(["git", "rev-list", f"{start_tag}..HEAD"]) - - if res: - commit_hashes = res.strip().split("\n") - return commit_hashes - - -def run(cmd): - # Get all commit hashes since the specified tag - result = subprocess.run(cmd, capture_output=True, text=True, check=True) - return result.stdout - - -def get_commit_authors(commits): - commit_to_author = dict() - for commit in commits: - author = run(["git", "show", "-s", "--format=%an", commit]).strip() - commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip() - if commit_message.lower().startswith("aider:"): - author += " (aider)" - commit_to_author[commit] = author - return commit_to_author - - -hash_len = len("44e6fefc2") - def process_all_tags_since(start_tag): tags = get_all_tags_since(start_tag) diff --git a/tests/test_stats.py b/tests/test_stats.py new file mode 100644 index 000000000..86864f0a0 --- /dev/null +++ b/tests/test_stats.py @@ -0,0 +1,119 @@ +import unittest +import subprocess +from unittest.mock import patch, MagicMock +from collections import defaultdict + +from aider.stats import ( + get_all_commit_hashes_between_tags, + get_commit_authors, + get_counts_for_file, + hash_len, +) + + +class TestStats(unittest.TestCase): + @patch("aider.stats.run") + def test_get_all_commit_hashes_between_tags(self, mock_run): + # Test with end_tag + mock_run.return_value = "commit1\ncommit2\ncommit3" + result = get_all_commit_hashes_between_tags("v1.0.0", "v2.0.0") + mock_run.assert_called_with(["git", "rev-list", "v1.0.0..v2.0.0"]) + self.assertEqual(result, ["commit1", "commit2", "commit3"]) + + # Test without end_tag (defaults to HEAD) + mock_run.return_value = "commit4\ncommit5" + result = get_all_commit_hashes_between_tags("v1.0.0") + mock_run.assert_called_with(["git", "rev-list", "v1.0.0..HEAD"]) + self.assertEqual(result, ["commit4", "commit5"]) + + # Test with empty result + mock_run.return_value = "" + result = get_all_commit_hashes_between_tags("v1.0.0", "v1.0.0") + self.assertEqual(result, None) + + @patch("aider.stats.run") + def test_get_commit_authors(self, mock_run): + # Setup mock responses for git show commands + def mock_run_side_effect(cmd): + if cmd[0:3] == ["git", "show", "-s"]: + if "--format=%an" in cmd: + if cmd[-1] == "commit1": + return "Author1\n" + elif cmd[-1] == "commit2": + return "Author2\n" + elif "--format=%s" in cmd: + if cmd[-1] == "commit1": + return "Normal commit message\n" + elif cmd[-1] == "commit2": + return "aider: AI generated commit\n" + return "" + + mock_run.side_effect = mock_run_side_effect + + # Test author attribution with aider tag + commits = ["commit1", "commit2"] + result = get_commit_authors(commits) + + expected = { + "commit1": "Author1", + "commit2": "Author2 (aider)", + } + self.assertEqual(result, expected) + + @patch("aider.stats.run") + def test_get_counts_for_file(self, mock_run): + # Setup mock for git blame + blame_output = f""" +{hash_len * '0'} (Author1 2023-01-01 12:00:00 +0000 1) Line 1 +{hash_len * '1'} (Author2 2023-01-02 12:00:00 +0000 2) Line 2 +{hash_len * '1'} (Author2 2023-01-02 12:00:00 +0000 3) Line 3 +^{hash_len * '2'} (Author3 2023-01-03 12:00:00 +0000 4) Line 4 (not counted - from before start_tag) +""" + mock_run.return_value = blame_output.strip() + + # Mock authors dictionary + authors = { + "0" * hash_len: "Author1", + "1" * hash_len: "Author2 (aider)", + } + + # Test with end_tag + result = get_counts_for_file("v1.0.0", "v2.0.0", authors, "test_file.py") + mock_run.assert_called_with([ + "git", "blame", "-M", "-C", "-C", "--abbrev=9", + "v1.0.0..v2.0.0", "--", "test_file.py" + ]) + + expected = { + "Author1": 1, + "Author2 (aider)": 2, + } + self.assertEqual(result, expected) + + # Test with no end_tag + result = get_counts_for_file("v1.0.0", None, authors, "test_file.py") + mock_run.assert_called_with([ + "git", "blame", "-M", "-C", "-C", "--abbrev=9", + "v1.0.0..HEAD", "--", "test_file.py" + ]) + + @patch("aider.stats.run") + def test_get_counts_for_file_error_handling(self, mock_run): + # Test file not found error + error = subprocess.CalledProcessError(1, "git blame", stderr=b"no such path 'nonexistent.py'") + mock_run.side_effect = error + + result = get_counts_for_file("v1.0.0", "v2.0.0", {}, "nonexistent.py") + self.assertIsNone(result) + + # Test other git error + error = subprocess.CalledProcessError(1, "git blame", stderr=b"some other git error") + mock_run.side_effect = error + + with patch("sys.stderr"): # Suppress stderr output during test + result = get_counts_for_file("v1.0.0", "v2.0.0", {}, "test_file.py") + self.assertIsNone(result) + + +if __name__ == "__main__": + unittest.main()