mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-29 08:44:59 +00:00
Merge 98d114e527
into 3caab85931
This commit is contained in:
commit
eb6fa89e65
6 changed files with 345 additions and 33 deletions
|
@ -553,6 +553,13 @@ def get_parser(default_config_files, git_root):
|
|||
help="Run tests, fix problems found and then exit",
|
||||
default=False,
|
||||
)
|
||||
group.add_argument(
|
||||
"--stats",
|
||||
metavar="REVISIONS",
|
||||
nargs="?",
|
||||
const="HEAD",
|
||||
help="Show code changes statistics between revisions",
|
||||
)
|
||||
|
||||
##########
|
||||
group = parser.add_argument_group("Analytics")
|
||||
|
|
|
@ -4,7 +4,7 @@ import re
|
|||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from collections import OrderedDict
|
||||
from collections import OrderedDict, defaultdict
|
||||
from os.path import expanduser
|
||||
from pathlib import Path
|
||||
|
||||
|
@ -23,6 +23,7 @@ from aider.repo import ANY_GIT_ERROR
|
|||
from aider.run_cmd import run_cmd
|
||||
from aider.scrape import Scraper, install_playwright
|
||||
from aider.utils import is_image_file
|
||||
from aider.stats import hash_len, get_all_commit_hashes_between_tags, get_commit_authors, get_counts_for_file
|
||||
|
||||
from .dump import dump # noqa: F401
|
||||
|
||||
|
@ -1484,6 +1485,95 @@ class Commands:
|
|||
"Toggle multiline mode (swaps behavior of Enter and Meta+Enter)"
|
||||
self.io.toggle_multiline_mode()
|
||||
|
||||
def cmd_stats(self, args):
|
||||
"""Show statistics about code changes and aider's contributions by counting lines of code through git blame.
|
||||
|
||||
Usage:
|
||||
/stats Compare against main/master branch
|
||||
/stats <revision> Compare against specific revision
|
||||
/stats rev1..rev2 Compare between two specific revisions
|
||||
|
||||
Examples:
|
||||
/stats Show stats vs main/master branch
|
||||
/stats HEAD~5 Show stats vs 5 commits ago
|
||||
/stats v1.0.0 Show stats vs version 1.0.0
|
||||
/stats main..HEAD Show stats between main and current HEAD
|
||||
|
||||
Lines are attributed to aider when the git author or committer contains "(aider)".
|
||||
Binary files (images, audio, etc.) are excluded from the analysis.
|
||||
"""
|
||||
if not self.coder.repo:
|
||||
self.io.tool_error("No git repository found.")
|
||||
return
|
||||
|
||||
try:
|
||||
# Get the revision range
|
||||
if not args:
|
||||
# Default to comparing against main/master branch
|
||||
for default_branch in ["main", "master"]:
|
||||
try:
|
||||
self.coder.repo.repo.rev_parse(default_branch)
|
||||
args = default_branch
|
||||
break
|
||||
except:
|
||||
continue
|
||||
if not args:
|
||||
self.io.tool_error("No main or master branch found. Please specify a revision.")
|
||||
return
|
||||
source_revision, target_revision = args.split("..") if ".." in args else (args, "HEAD")
|
||||
commits = get_all_commit_hashes_between_tags(source_revision, target_revision)
|
||||
commits = [commit[:hash_len] for commit in commits] if commits else []
|
||||
if not commits:
|
||||
self.io.tool_error(
|
||||
f"There are no commits between the specified revisions from {source_revision} to {target_revision}."
|
||||
)
|
||||
return
|
||||
authors = get_commit_authors(commits)
|
||||
|
||||
# Get files changed between revisions
|
||||
diff_files = self.coder.repo.repo.git.diff(
|
||||
"--name-only", f"{source_revision}..{target_revision}"
|
||||
).splitlines()
|
||||
# Filter out media files
|
||||
files = [f for f in diff_files if not any(f.lower().endswith(ext) for ext in (
|
||||
'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.ico', '.svg', # images
|
||||
'.mp3', '.wav', '.ogg', '.m4a', '.flac', # audio
|
||||
'.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm', # video
|
||||
'.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', # documents
|
||||
'.zip', '.tar', '.gz', '.7z', '.rar', # archives
|
||||
'.ttf', '.otf', '.woff', '.woff2', '.eot' # fonts
|
||||
))]
|
||||
self.io.tool_output(f"Found {len(files)} non-binary tracked files in the repository.")
|
||||
|
||||
all_file_counts = {}
|
||||
grand_total = defaultdict(int)
|
||||
aider_total = 0
|
||||
for file in files:
|
||||
file_counts = get_counts_for_file(source_revision, target_revision, authors, file)
|
||||
if file_counts:
|
||||
all_file_counts[file] = file_counts
|
||||
for author, count in file_counts.items():
|
||||
grand_total[author] += count
|
||||
if "(aider)" in author.lower():
|
||||
aider_total += count
|
||||
total_lines = sum(grand_total.values())
|
||||
aider_percentage = (aider_total / total_lines) * 100 if total_lines > 0 else 0
|
||||
|
||||
# Calculate percentages
|
||||
if total_lines > 0:
|
||||
# Output overall statistics
|
||||
self.io.tool_output(f"\nAnalysis from {source_revision} to {target_revision}:")
|
||||
self.io.tool_output(f"Total lines analyzed: {total_lines:,}")
|
||||
self.io.tool_output(f"Lines by aider: {aider_total:,} ({aider_percentage:.1f}%)")
|
||||
self.io.tool_output(f"Lines by humans: {total_lines - aider_total:,} ({100 - aider_percentage:.1f}%)")
|
||||
|
||||
else:
|
||||
self.io.tool_output("No lines of code found in the repository.")
|
||||
|
||||
except Exception as e:
|
||||
self.io.tool_error(f"Error analyzing aider statistics: {e}")
|
||||
|
||||
|
||||
def cmd_copy(self, args):
|
||||
"Copy the last assistant message to the clipboard"
|
||||
all_messages = self.coder.done_messages + self.coder.cur_messages
|
||||
|
|
|
@ -1142,6 +1142,11 @@ def main(argv=None, input=None, output=None, force_git_root=None, return_coder=F
|
|||
analytics.event("exit", reason="Completed --message-file")
|
||||
return
|
||||
|
||||
if args.stats:
|
||||
commands.cmd_stats(args.stats)
|
||||
analytics.event("exit", reason="Completed --stats")
|
||||
return
|
||||
|
||||
if args.exit:
|
||||
analytics.event("exit", reason="Exit flag set")
|
||||
return
|
||||
|
|
121
aider/stats.py
Normal file
121
aider/stats.py
Normal file
|
@ -0,0 +1,121 @@
|
|||
import subprocess
|
||||
import sys
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
# Length of abbreviated git hash used in blame output
|
||||
hash_len = len("44e6fefc2")
|
||||
|
||||
def run(cmd):
|
||||
"""Execute a git command and return its output.
|
||||
|
||||
Args:
|
||||
cmd: List containing the command and its arguments
|
||||
|
||||
Returns:
|
||||
String output of the command
|
||||
"""
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
return result.stdout
|
||||
|
||||
def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
|
||||
"""Get all commit hashes between two tags or from a tag to HEAD.
|
||||
|
||||
Args:
|
||||
start_tag: Starting tag or commit hash
|
||||
end_tag: Ending tag or commit hash (defaults to HEAD)
|
||||
|
||||
Returns:
|
||||
List of commit hashes or None if no commits found
|
||||
"""
|
||||
if end_tag:
|
||||
res = run(["git", "rev-list", f"{start_tag}..{end_tag}"])
|
||||
else:
|
||||
res = run(["git", "rev-list", f"{start_tag}..HEAD"])
|
||||
|
||||
if res:
|
||||
commit_hashes = res.strip().split("\n")
|
||||
return commit_hashes
|
||||
return None
|
||||
|
||||
def get_commit_authors(commits):
|
||||
"""Map commit hashes to their authors, marking aider-generated commits.
|
||||
|
||||
Args:
|
||||
commits: List of commit hashes
|
||||
|
||||
Returns:
|
||||
Dictionary mapping commit hashes to author names
|
||||
"""
|
||||
commit_to_author = dict()
|
||||
for commit in commits:
|
||||
author = run(["git", "show", "-s", "--format=%an", commit]).strip()
|
||||
commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip()
|
||||
if commit_message.lower().startswith("aider:"):
|
||||
author += " (aider)"
|
||||
commit_to_author[commit] = author
|
||||
return commit_to_author
|
||||
|
||||
def get_counts_for_file(start_tag, end_tag, authors, fname):
|
||||
"""Count lines attributed to each author in a file using git blame.
|
||||
|
||||
Args:
|
||||
start_tag: Starting tag or commit hash
|
||||
end_tag: Ending tag or commit hash (defaults to HEAD)
|
||||
authors: Dictionary mapping commit hashes to author names
|
||||
fname: File path to analyze
|
||||
|
||||
Returns:
|
||||
Dictionary mapping author names to line counts, or None if file not found
|
||||
"""
|
||||
try:
|
||||
if end_tag:
|
||||
text = run(
|
||||
[
|
||||
"git",
|
||||
"blame",
|
||||
"-M100", # Detect moved lines within a file with 100% similarity
|
||||
"-C100", # Detect moves across files with 100% similarity
|
||||
"-C", # Increase detection effort
|
||||
"-C", # Increase detection effort even more
|
||||
"--abbrev=9",
|
||||
f"{start_tag}..{end_tag}",
|
||||
"--",
|
||||
fname,
|
||||
]
|
||||
)
|
||||
else:
|
||||
text = run(
|
||||
[
|
||||
"git",
|
||||
"blame",
|
||||
"-M100", # Detect moved lines within a file with 100% similarity
|
||||
"-C100", # Detect moves across files with 100% similarity
|
||||
"-C", # Increase detection effort
|
||||
"-C", # Increase detection effort even more
|
||||
"--abbrev=9",
|
||||
f"{start_tag}..HEAD",
|
||||
"--",
|
||||
fname,
|
||||
]
|
||||
)
|
||||
if not text:
|
||||
return None
|
||||
text = text.splitlines()
|
||||
line_counts = defaultdict(int)
|
||||
for line in text:
|
||||
if line.startswith("^"):
|
||||
continue
|
||||
hsh = line[:hash_len]
|
||||
author = authors.get(hsh, "Unknown")
|
||||
line_counts[author] += 1
|
||||
|
||||
return dict(line_counts)
|
||||
except subprocess.CalledProcessError as e:
|
||||
if "no such path" in str(e).lower():
|
||||
# File doesn't exist in this revision range, which is okay
|
||||
return None
|
||||
else:
|
||||
# Some other error occurred
|
||||
print(f"Warning: Unable to blame file {fname}. Error: {e}", file=sys.stderr)
|
||||
return None
|
|
@ -2,8 +2,6 @@
|
|||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from operator import itemgetter
|
||||
|
@ -12,6 +10,8 @@ import semver
|
|||
import yaml
|
||||
from tqdm import tqdm
|
||||
|
||||
from aider.stats import get_all_commit_hashes_between_tags, run, hash_len, get_commit_authors, get_counts_for_file
|
||||
|
||||
website_files = [
|
||||
"aider/website/index.html",
|
||||
"aider/website/share/index.md",
|
||||
|
@ -68,36 +68,6 @@ def blame(start_tag, end_tag=None):
|
|||
return all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date
|
||||
|
||||
|
||||
def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
|
||||
if end_tag:
|
||||
res = run(["git", "rev-list", f"{start_tag}..{end_tag}"])
|
||||
else:
|
||||
res = run(["git", "rev-list", f"{start_tag}..HEAD"])
|
||||
|
||||
if res:
|
||||
commit_hashes = res.strip().split("\n")
|
||||
return commit_hashes
|
||||
|
||||
|
||||
def run(cmd):
|
||||
# Get all commit hashes since the specified tag
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
return result.stdout
|
||||
|
||||
|
||||
def get_commit_authors(commits):
|
||||
commit_to_author = dict()
|
||||
for commit in commits:
|
||||
author = run(["git", "show", "-s", "--format=%an", commit]).strip()
|
||||
commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip()
|
||||
if commit_message.lower().startswith("aider:"):
|
||||
author += " (aider)"
|
||||
commit_to_author[commit] = author
|
||||
return commit_to_author
|
||||
|
||||
|
||||
hash_len = len("44e6fefc2")
|
||||
|
||||
|
||||
def process_all_tags_since(start_tag):
|
||||
tags = get_all_tags_since(start_tag)
|
||||
|
|
119
tests/test_stats.py
Normal file
119
tests/test_stats.py
Normal file
|
@ -0,0 +1,119 @@
|
|||
import unittest
|
||||
import subprocess
|
||||
from unittest.mock import patch, MagicMock
|
||||
from collections import defaultdict
|
||||
|
||||
from aider.stats import (
|
||||
get_all_commit_hashes_between_tags,
|
||||
get_commit_authors,
|
||||
get_counts_for_file,
|
||||
hash_len,
|
||||
)
|
||||
|
||||
|
||||
class TestStats(unittest.TestCase):
|
||||
@patch("aider.stats.run")
|
||||
def test_get_all_commit_hashes_between_tags(self, mock_run):
|
||||
# Test with end_tag
|
||||
mock_run.return_value = "commit1\ncommit2\ncommit3"
|
||||
result = get_all_commit_hashes_between_tags("v1.0.0", "v2.0.0")
|
||||
mock_run.assert_called_with(["git", "rev-list", "v1.0.0..v2.0.0"])
|
||||
self.assertEqual(result, ["commit1", "commit2", "commit3"])
|
||||
|
||||
# Test without end_tag (defaults to HEAD)
|
||||
mock_run.return_value = "commit4\ncommit5"
|
||||
result = get_all_commit_hashes_between_tags("v1.0.0")
|
||||
mock_run.assert_called_with(["git", "rev-list", "v1.0.0..HEAD"])
|
||||
self.assertEqual(result, ["commit4", "commit5"])
|
||||
|
||||
# Test with empty result
|
||||
mock_run.return_value = ""
|
||||
result = get_all_commit_hashes_between_tags("v1.0.0", "v1.0.0")
|
||||
self.assertEqual(result, None)
|
||||
|
||||
@patch("aider.stats.run")
|
||||
def test_get_commit_authors(self, mock_run):
|
||||
# Setup mock responses for git show commands
|
||||
def mock_run_side_effect(cmd):
|
||||
if cmd[0:3] == ["git", "show", "-s"]:
|
||||
if "--format=%an" in cmd:
|
||||
if cmd[-1] == "commit1":
|
||||
return "Author1\n"
|
||||
elif cmd[-1] == "commit2":
|
||||
return "Author2\n"
|
||||
elif "--format=%s" in cmd:
|
||||
if cmd[-1] == "commit1":
|
||||
return "Normal commit message\n"
|
||||
elif cmd[-1] == "commit2":
|
||||
return "aider: AI generated commit\n"
|
||||
return ""
|
||||
|
||||
mock_run.side_effect = mock_run_side_effect
|
||||
|
||||
# Test author attribution with aider tag
|
||||
commits = ["commit1", "commit2"]
|
||||
result = get_commit_authors(commits)
|
||||
|
||||
expected = {
|
||||
"commit1": "Author1",
|
||||
"commit2": "Author2 (aider)",
|
||||
}
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
@patch("aider.stats.run")
|
||||
def test_get_counts_for_file(self, mock_run):
|
||||
# Setup mock for git blame
|
||||
blame_output = f"""
|
||||
{hash_len * '0'} (Author1 2023-01-01 12:00:00 +0000 1) Line 1
|
||||
{hash_len * '1'} (Author2 2023-01-02 12:00:00 +0000 2) Line 2
|
||||
{hash_len * '1'} (Author2 2023-01-02 12:00:00 +0000 3) Line 3
|
||||
^{hash_len * '2'} (Author3 2023-01-03 12:00:00 +0000 4) Line 4 (not counted - from before start_tag)
|
||||
"""
|
||||
mock_run.return_value = blame_output.strip()
|
||||
|
||||
# Mock authors dictionary
|
||||
authors = {
|
||||
"0" * hash_len: "Author1",
|
||||
"1" * hash_len: "Author2 (aider)",
|
||||
}
|
||||
|
||||
# Test with end_tag
|
||||
result = get_counts_for_file("v1.0.0", "v2.0.0", authors, "test_file.py")
|
||||
mock_run.assert_called_with([
|
||||
"git", "blame", "-M", "-C", "-C", "--abbrev=9",
|
||||
"v1.0.0..v2.0.0", "--", "test_file.py"
|
||||
])
|
||||
|
||||
expected = {
|
||||
"Author1": 1,
|
||||
"Author2 (aider)": 2,
|
||||
}
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
# Test with no end_tag
|
||||
result = get_counts_for_file("v1.0.0", None, authors, "test_file.py")
|
||||
mock_run.assert_called_with([
|
||||
"git", "blame", "-M", "-C", "-C", "--abbrev=9",
|
||||
"v1.0.0..HEAD", "--", "test_file.py"
|
||||
])
|
||||
|
||||
@patch("aider.stats.run")
|
||||
def test_get_counts_for_file_error_handling(self, mock_run):
|
||||
# Test file not found error
|
||||
error = subprocess.CalledProcessError(1, "git blame", stderr=b"no such path 'nonexistent.py'")
|
||||
mock_run.side_effect = error
|
||||
|
||||
result = get_counts_for_file("v1.0.0", "v2.0.0", {}, "nonexistent.py")
|
||||
self.assertIsNone(result)
|
||||
|
||||
# Test other git error
|
||||
error = subprocess.CalledProcessError(1, "git blame", stderr=b"some other git error")
|
||||
mock_run.side_effect = error
|
||||
|
||||
with patch("sys.stderr"): # Suppress stderr output during test
|
||||
result = get_counts_for_file("v1.0.0", "v2.0.0", {}, "test_file.py")
|
||||
self.assertIsNone(result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Loading…
Add table
Add a link
Reference in a new issue