This commit is contained in:
Josix 2025-05-16 19:38:37 +08:00 committed by GitHub
commit eb6fa89e65
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 345 additions and 33 deletions

View file

@ -553,6 +553,13 @@ def get_parser(default_config_files, git_root):
help="Run tests, fix problems found and then exit", help="Run tests, fix problems found and then exit",
default=False, default=False,
) )
group.add_argument(
"--stats",
metavar="REVISIONS",
nargs="?",
const="HEAD",
help="Show code changes statistics between revisions",
)
########## ##########
group = parser.add_argument_group("Analytics") group = parser.add_argument_group("Analytics")

View file

@ -4,7 +4,7 @@ import re
import subprocess import subprocess
import sys import sys
import tempfile import tempfile
from collections import OrderedDict from collections import OrderedDict, defaultdict
from os.path import expanduser from os.path import expanduser
from pathlib import Path from pathlib import Path
@ -23,6 +23,7 @@ from aider.repo import ANY_GIT_ERROR
from aider.run_cmd import run_cmd from aider.run_cmd import run_cmd
from aider.scrape import Scraper, install_playwright from aider.scrape import Scraper, install_playwright
from aider.utils import is_image_file from aider.utils import is_image_file
from aider.stats import hash_len, get_all_commit_hashes_between_tags, get_commit_authors, get_counts_for_file
from .dump import dump # noqa: F401 from .dump import dump # noqa: F401
@ -1484,6 +1485,95 @@ class Commands:
"Toggle multiline mode (swaps behavior of Enter and Meta+Enter)" "Toggle multiline mode (swaps behavior of Enter and Meta+Enter)"
self.io.toggle_multiline_mode() self.io.toggle_multiline_mode()
def cmd_stats(self, args):
"""Show statistics about code changes and aider's contributions by counting lines of code through git blame.
Usage:
/stats Compare against main/master branch
/stats <revision> Compare against specific revision
/stats rev1..rev2 Compare between two specific revisions
Examples:
/stats Show stats vs main/master branch
/stats HEAD~5 Show stats vs 5 commits ago
/stats v1.0.0 Show stats vs version 1.0.0
/stats main..HEAD Show stats between main and current HEAD
Lines are attributed to aider when the git author or committer contains "(aider)".
Binary files (images, audio, etc.) are excluded from the analysis.
"""
if not self.coder.repo:
self.io.tool_error("No git repository found.")
return
try:
# Get the revision range
if not args:
# Default to comparing against main/master branch
for default_branch in ["main", "master"]:
try:
self.coder.repo.repo.rev_parse(default_branch)
args = default_branch
break
except:
continue
if not args:
self.io.tool_error("No main or master branch found. Please specify a revision.")
return
source_revision, target_revision = args.split("..") if ".." in args else (args, "HEAD")
commits = get_all_commit_hashes_between_tags(source_revision, target_revision)
commits = [commit[:hash_len] for commit in commits] if commits else []
if not commits:
self.io.tool_error(
f"There are no commits between the specified revisions from {source_revision} to {target_revision}."
)
return
authors = get_commit_authors(commits)
# Get files changed between revisions
diff_files = self.coder.repo.repo.git.diff(
"--name-only", f"{source_revision}..{target_revision}"
).splitlines()
# Filter out media files
files = [f for f in diff_files if not any(f.lower().endswith(ext) for ext in (
'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.ico', '.svg', # images
'.mp3', '.wav', '.ogg', '.m4a', '.flac', # audio
'.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm', # video
'.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', # documents
'.zip', '.tar', '.gz', '.7z', '.rar', # archives
'.ttf', '.otf', '.woff', '.woff2', '.eot' # fonts
))]
self.io.tool_output(f"Found {len(files)} non-binary tracked files in the repository.")
all_file_counts = {}
grand_total = defaultdict(int)
aider_total = 0
for file in files:
file_counts = get_counts_for_file(source_revision, target_revision, authors, file)
if file_counts:
all_file_counts[file] = file_counts
for author, count in file_counts.items():
grand_total[author] += count
if "(aider)" in author.lower():
aider_total += count
total_lines = sum(grand_total.values())
aider_percentage = (aider_total / total_lines) * 100 if total_lines > 0 else 0
# Calculate percentages
if total_lines > 0:
# Output overall statistics
self.io.tool_output(f"\nAnalysis from {source_revision} to {target_revision}:")
self.io.tool_output(f"Total lines analyzed: {total_lines:,}")
self.io.tool_output(f"Lines by aider: {aider_total:,} ({aider_percentage:.1f}%)")
self.io.tool_output(f"Lines by humans: {total_lines - aider_total:,} ({100 - aider_percentage:.1f}%)")
else:
self.io.tool_output("No lines of code found in the repository.")
except Exception as e:
self.io.tool_error(f"Error analyzing aider statistics: {e}")
def cmd_copy(self, args): def cmd_copy(self, args):
"Copy the last assistant message to the clipboard" "Copy the last assistant message to the clipboard"
all_messages = self.coder.done_messages + self.coder.cur_messages all_messages = self.coder.done_messages + self.coder.cur_messages

View file

@ -1142,6 +1142,11 @@ def main(argv=None, input=None, output=None, force_git_root=None, return_coder=F
analytics.event("exit", reason="Completed --message-file") analytics.event("exit", reason="Completed --message-file")
return return
if args.stats:
commands.cmd_stats(args.stats)
analytics.event("exit", reason="Completed --stats")
return
if args.exit: if args.exit:
analytics.event("exit", reason="Exit flag set") analytics.event("exit", reason="Exit flag set")
return return

121
aider/stats.py Normal file
View file

@ -0,0 +1,121 @@
import subprocess
import sys
from collections import defaultdict
# Length of abbreviated git hash used in blame output
hash_len = len("44e6fefc2")
def run(cmd):
"""Execute a git command and return its output.
Args:
cmd: List containing the command and its arguments
Returns:
String output of the command
"""
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return result.stdout
def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
"""Get all commit hashes between two tags or from a tag to HEAD.
Args:
start_tag: Starting tag or commit hash
end_tag: Ending tag or commit hash (defaults to HEAD)
Returns:
List of commit hashes or None if no commits found
"""
if end_tag:
res = run(["git", "rev-list", f"{start_tag}..{end_tag}"])
else:
res = run(["git", "rev-list", f"{start_tag}..HEAD"])
if res:
commit_hashes = res.strip().split("\n")
return commit_hashes
return None
def get_commit_authors(commits):
"""Map commit hashes to their authors, marking aider-generated commits.
Args:
commits: List of commit hashes
Returns:
Dictionary mapping commit hashes to author names
"""
commit_to_author = dict()
for commit in commits:
author = run(["git", "show", "-s", "--format=%an", commit]).strip()
commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip()
if commit_message.lower().startswith("aider:"):
author += " (aider)"
commit_to_author[commit] = author
return commit_to_author
def get_counts_for_file(start_tag, end_tag, authors, fname):
"""Count lines attributed to each author in a file using git blame.
Args:
start_tag: Starting tag or commit hash
end_tag: Ending tag or commit hash (defaults to HEAD)
authors: Dictionary mapping commit hashes to author names
fname: File path to analyze
Returns:
Dictionary mapping author names to line counts, or None if file not found
"""
try:
if end_tag:
text = run(
[
"git",
"blame",
"-M100", # Detect moved lines within a file with 100% similarity
"-C100", # Detect moves across files with 100% similarity
"-C", # Increase detection effort
"-C", # Increase detection effort even more
"--abbrev=9",
f"{start_tag}..{end_tag}",
"--",
fname,
]
)
else:
text = run(
[
"git",
"blame",
"-M100", # Detect moved lines within a file with 100% similarity
"-C100", # Detect moves across files with 100% similarity
"-C", # Increase detection effort
"-C", # Increase detection effort even more
"--abbrev=9",
f"{start_tag}..HEAD",
"--",
fname,
]
)
if not text:
return None
text = text.splitlines()
line_counts = defaultdict(int)
for line in text:
if line.startswith("^"):
continue
hsh = line[:hash_len]
author = authors.get(hsh, "Unknown")
line_counts[author] += 1
return dict(line_counts)
except subprocess.CalledProcessError as e:
if "no such path" in str(e).lower():
# File doesn't exist in this revision range, which is okay
return None
else:
# Some other error occurred
print(f"Warning: Unable to blame file {fname}. Error: {e}", file=sys.stderr)
return None

View file

@ -2,8 +2,6 @@
import argparse import argparse
import os import os
import subprocess
import sys
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
from operator import itemgetter from operator import itemgetter
@ -12,6 +10,8 @@ import semver
import yaml import yaml
from tqdm import tqdm from tqdm import tqdm
from aider.stats import get_all_commit_hashes_between_tags, run, hash_len, get_commit_authors, get_counts_for_file
website_files = [ website_files = [
"aider/website/index.html", "aider/website/index.html",
"aider/website/share/index.md", "aider/website/share/index.md",
@ -68,36 +68,6 @@ def blame(start_tag, end_tag=None):
return all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date return all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date
def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
if end_tag:
res = run(["git", "rev-list", f"{start_tag}..{end_tag}"])
else:
res = run(["git", "rev-list", f"{start_tag}..HEAD"])
if res:
commit_hashes = res.strip().split("\n")
return commit_hashes
def run(cmd):
# Get all commit hashes since the specified tag
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return result.stdout
def get_commit_authors(commits):
commit_to_author = dict()
for commit in commits:
author = run(["git", "show", "-s", "--format=%an", commit]).strip()
commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip()
if commit_message.lower().startswith("aider:"):
author += " (aider)"
commit_to_author[commit] = author
return commit_to_author
hash_len = len("44e6fefc2")
def process_all_tags_since(start_tag): def process_all_tags_since(start_tag):
tags = get_all_tags_since(start_tag) tags = get_all_tags_since(start_tag)

119
tests/test_stats.py Normal file
View file

@ -0,0 +1,119 @@
import unittest
import subprocess
from unittest.mock import patch, MagicMock
from collections import defaultdict
from aider.stats import (
get_all_commit_hashes_between_tags,
get_commit_authors,
get_counts_for_file,
hash_len,
)
class TestStats(unittest.TestCase):
@patch("aider.stats.run")
def test_get_all_commit_hashes_between_tags(self, mock_run):
# Test with end_tag
mock_run.return_value = "commit1\ncommit2\ncommit3"
result = get_all_commit_hashes_between_tags("v1.0.0", "v2.0.0")
mock_run.assert_called_with(["git", "rev-list", "v1.0.0..v2.0.0"])
self.assertEqual(result, ["commit1", "commit2", "commit3"])
# Test without end_tag (defaults to HEAD)
mock_run.return_value = "commit4\ncommit5"
result = get_all_commit_hashes_between_tags("v1.0.0")
mock_run.assert_called_with(["git", "rev-list", "v1.0.0..HEAD"])
self.assertEqual(result, ["commit4", "commit5"])
# Test with empty result
mock_run.return_value = ""
result = get_all_commit_hashes_between_tags("v1.0.0", "v1.0.0")
self.assertEqual(result, None)
@patch("aider.stats.run")
def test_get_commit_authors(self, mock_run):
# Setup mock responses for git show commands
def mock_run_side_effect(cmd):
if cmd[0:3] == ["git", "show", "-s"]:
if "--format=%an" in cmd:
if cmd[-1] == "commit1":
return "Author1\n"
elif cmd[-1] == "commit2":
return "Author2\n"
elif "--format=%s" in cmd:
if cmd[-1] == "commit1":
return "Normal commit message\n"
elif cmd[-1] == "commit2":
return "aider: AI generated commit\n"
return ""
mock_run.side_effect = mock_run_side_effect
# Test author attribution with aider tag
commits = ["commit1", "commit2"]
result = get_commit_authors(commits)
expected = {
"commit1": "Author1",
"commit2": "Author2 (aider)",
}
self.assertEqual(result, expected)
@patch("aider.stats.run")
def test_get_counts_for_file(self, mock_run):
# Setup mock for git blame
blame_output = f"""
{hash_len * '0'} (Author1 2023-01-01 12:00:00 +0000 1) Line 1
{hash_len * '1'} (Author2 2023-01-02 12:00:00 +0000 2) Line 2
{hash_len * '1'} (Author2 2023-01-02 12:00:00 +0000 3) Line 3
^{hash_len * '2'} (Author3 2023-01-03 12:00:00 +0000 4) Line 4 (not counted - from before start_tag)
"""
mock_run.return_value = blame_output.strip()
# Mock authors dictionary
authors = {
"0" * hash_len: "Author1",
"1" * hash_len: "Author2 (aider)",
}
# Test with end_tag
result = get_counts_for_file("v1.0.0", "v2.0.0", authors, "test_file.py")
mock_run.assert_called_with([
"git", "blame", "-M", "-C", "-C", "--abbrev=9",
"v1.0.0..v2.0.0", "--", "test_file.py"
])
expected = {
"Author1": 1,
"Author2 (aider)": 2,
}
self.assertEqual(result, expected)
# Test with no end_tag
result = get_counts_for_file("v1.0.0", None, authors, "test_file.py")
mock_run.assert_called_with([
"git", "blame", "-M", "-C", "-C", "--abbrev=9",
"v1.0.0..HEAD", "--", "test_file.py"
])
@patch("aider.stats.run")
def test_get_counts_for_file_error_handling(self, mock_run):
# Test file not found error
error = subprocess.CalledProcessError(1, "git blame", stderr=b"no such path 'nonexistent.py'")
mock_run.side_effect = error
result = get_counts_for_file("v1.0.0", "v2.0.0", {}, "nonexistent.py")
self.assertIsNone(result)
# Test other git error
error = subprocess.CalledProcessError(1, "git blame", stderr=b"some other git error")
mock_run.side_effect = error
with patch("sys.stderr"): # Suppress stderr output during test
result = get_counts_for_file("v1.0.0", "v2.0.0", {}, "test_file.py")
self.assertIsNone(result)
if __name__ == "__main__":
unittest.main()