mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-29 08:44:59 +00:00
Merge 98d114e527
into 3caab85931
This commit is contained in:
commit
eb6fa89e65
6 changed files with 345 additions and 33 deletions
|
@ -553,6 +553,13 @@ def get_parser(default_config_files, git_root):
|
||||||
help="Run tests, fix problems found and then exit",
|
help="Run tests, fix problems found and then exit",
|
||||||
default=False,
|
default=False,
|
||||||
)
|
)
|
||||||
|
group.add_argument(
|
||||||
|
"--stats",
|
||||||
|
metavar="REVISIONS",
|
||||||
|
nargs="?",
|
||||||
|
const="HEAD",
|
||||||
|
help="Show code changes statistics between revisions",
|
||||||
|
)
|
||||||
|
|
||||||
##########
|
##########
|
||||||
group = parser.add_argument_group("Analytics")
|
group = parser.add_argument_group("Analytics")
|
||||||
|
|
|
@ -4,7 +4,7 @@ import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict, defaultdict
|
||||||
from os.path import expanduser
|
from os.path import expanduser
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
@ -23,6 +23,7 @@ from aider.repo import ANY_GIT_ERROR
|
||||||
from aider.run_cmd import run_cmd
|
from aider.run_cmd import run_cmd
|
||||||
from aider.scrape import Scraper, install_playwright
|
from aider.scrape import Scraper, install_playwright
|
||||||
from aider.utils import is_image_file
|
from aider.utils import is_image_file
|
||||||
|
from aider.stats import hash_len, get_all_commit_hashes_between_tags, get_commit_authors, get_counts_for_file
|
||||||
|
|
||||||
from .dump import dump # noqa: F401
|
from .dump import dump # noqa: F401
|
||||||
|
|
||||||
|
@ -1484,6 +1485,95 @@ class Commands:
|
||||||
"Toggle multiline mode (swaps behavior of Enter and Meta+Enter)"
|
"Toggle multiline mode (swaps behavior of Enter and Meta+Enter)"
|
||||||
self.io.toggle_multiline_mode()
|
self.io.toggle_multiline_mode()
|
||||||
|
|
||||||
|
def cmd_stats(self, args):
|
||||||
|
"""Show statistics about code changes and aider's contributions by counting lines of code through git blame.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
/stats Compare against main/master branch
|
||||||
|
/stats <revision> Compare against specific revision
|
||||||
|
/stats rev1..rev2 Compare between two specific revisions
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
/stats Show stats vs main/master branch
|
||||||
|
/stats HEAD~5 Show stats vs 5 commits ago
|
||||||
|
/stats v1.0.0 Show stats vs version 1.0.0
|
||||||
|
/stats main..HEAD Show stats between main and current HEAD
|
||||||
|
|
||||||
|
Lines are attributed to aider when the git author or committer contains "(aider)".
|
||||||
|
Binary files (images, audio, etc.) are excluded from the analysis.
|
||||||
|
"""
|
||||||
|
if not self.coder.repo:
|
||||||
|
self.io.tool_error("No git repository found.")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get the revision range
|
||||||
|
if not args:
|
||||||
|
# Default to comparing against main/master branch
|
||||||
|
for default_branch in ["main", "master"]:
|
||||||
|
try:
|
||||||
|
self.coder.repo.repo.rev_parse(default_branch)
|
||||||
|
args = default_branch
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
if not args:
|
||||||
|
self.io.tool_error("No main or master branch found. Please specify a revision.")
|
||||||
|
return
|
||||||
|
source_revision, target_revision = args.split("..") if ".." in args else (args, "HEAD")
|
||||||
|
commits = get_all_commit_hashes_between_tags(source_revision, target_revision)
|
||||||
|
commits = [commit[:hash_len] for commit in commits] if commits else []
|
||||||
|
if not commits:
|
||||||
|
self.io.tool_error(
|
||||||
|
f"There are no commits between the specified revisions from {source_revision} to {target_revision}."
|
||||||
|
)
|
||||||
|
return
|
||||||
|
authors = get_commit_authors(commits)
|
||||||
|
|
||||||
|
# Get files changed between revisions
|
||||||
|
diff_files = self.coder.repo.repo.git.diff(
|
||||||
|
"--name-only", f"{source_revision}..{target_revision}"
|
||||||
|
).splitlines()
|
||||||
|
# Filter out media files
|
||||||
|
files = [f for f in diff_files if not any(f.lower().endswith(ext) for ext in (
|
||||||
|
'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.ico', '.svg', # images
|
||||||
|
'.mp3', '.wav', '.ogg', '.m4a', '.flac', # audio
|
||||||
|
'.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm', # video
|
||||||
|
'.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', # documents
|
||||||
|
'.zip', '.tar', '.gz', '.7z', '.rar', # archives
|
||||||
|
'.ttf', '.otf', '.woff', '.woff2', '.eot' # fonts
|
||||||
|
))]
|
||||||
|
self.io.tool_output(f"Found {len(files)} non-binary tracked files in the repository.")
|
||||||
|
|
||||||
|
all_file_counts = {}
|
||||||
|
grand_total = defaultdict(int)
|
||||||
|
aider_total = 0
|
||||||
|
for file in files:
|
||||||
|
file_counts = get_counts_for_file(source_revision, target_revision, authors, file)
|
||||||
|
if file_counts:
|
||||||
|
all_file_counts[file] = file_counts
|
||||||
|
for author, count in file_counts.items():
|
||||||
|
grand_total[author] += count
|
||||||
|
if "(aider)" in author.lower():
|
||||||
|
aider_total += count
|
||||||
|
total_lines = sum(grand_total.values())
|
||||||
|
aider_percentage = (aider_total / total_lines) * 100 if total_lines > 0 else 0
|
||||||
|
|
||||||
|
# Calculate percentages
|
||||||
|
if total_lines > 0:
|
||||||
|
# Output overall statistics
|
||||||
|
self.io.tool_output(f"\nAnalysis from {source_revision} to {target_revision}:")
|
||||||
|
self.io.tool_output(f"Total lines analyzed: {total_lines:,}")
|
||||||
|
self.io.tool_output(f"Lines by aider: {aider_total:,} ({aider_percentage:.1f}%)")
|
||||||
|
self.io.tool_output(f"Lines by humans: {total_lines - aider_total:,} ({100 - aider_percentage:.1f}%)")
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.io.tool_output("No lines of code found in the repository.")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.io.tool_error(f"Error analyzing aider statistics: {e}")
|
||||||
|
|
||||||
|
|
||||||
def cmd_copy(self, args):
|
def cmd_copy(self, args):
|
||||||
"Copy the last assistant message to the clipboard"
|
"Copy the last assistant message to the clipboard"
|
||||||
all_messages = self.coder.done_messages + self.coder.cur_messages
|
all_messages = self.coder.done_messages + self.coder.cur_messages
|
||||||
|
|
|
@ -1142,6 +1142,11 @@ def main(argv=None, input=None, output=None, force_git_root=None, return_coder=F
|
||||||
analytics.event("exit", reason="Completed --message-file")
|
analytics.event("exit", reason="Completed --message-file")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if args.stats:
|
||||||
|
commands.cmd_stats(args.stats)
|
||||||
|
analytics.event("exit", reason="Completed --stats")
|
||||||
|
return
|
||||||
|
|
||||||
if args.exit:
|
if args.exit:
|
||||||
analytics.event("exit", reason="Exit flag set")
|
analytics.event("exit", reason="Exit flag set")
|
||||||
return
|
return
|
||||||
|
|
121
aider/stats.py
Normal file
121
aider/stats.py
Normal file
|
@ -0,0 +1,121 @@
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
# Length of abbreviated git hash used in blame output
|
||||||
|
hash_len = len("44e6fefc2")
|
||||||
|
|
||||||
|
def run(cmd):
|
||||||
|
"""Execute a git command and return its output.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cmd: List containing the command and its arguments
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
String output of the command
|
||||||
|
"""
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||||
|
return result.stdout
|
||||||
|
|
||||||
|
def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
|
||||||
|
"""Get all commit hashes between two tags or from a tag to HEAD.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
start_tag: Starting tag or commit hash
|
||||||
|
end_tag: Ending tag or commit hash (defaults to HEAD)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of commit hashes or None if no commits found
|
||||||
|
"""
|
||||||
|
if end_tag:
|
||||||
|
res = run(["git", "rev-list", f"{start_tag}..{end_tag}"])
|
||||||
|
else:
|
||||||
|
res = run(["git", "rev-list", f"{start_tag}..HEAD"])
|
||||||
|
|
||||||
|
if res:
|
||||||
|
commit_hashes = res.strip().split("\n")
|
||||||
|
return commit_hashes
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_commit_authors(commits):
|
||||||
|
"""Map commit hashes to their authors, marking aider-generated commits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
commits: List of commit hashes
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary mapping commit hashes to author names
|
||||||
|
"""
|
||||||
|
commit_to_author = dict()
|
||||||
|
for commit in commits:
|
||||||
|
author = run(["git", "show", "-s", "--format=%an", commit]).strip()
|
||||||
|
commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip()
|
||||||
|
if commit_message.lower().startswith("aider:"):
|
||||||
|
author += " (aider)"
|
||||||
|
commit_to_author[commit] = author
|
||||||
|
return commit_to_author
|
||||||
|
|
||||||
|
def get_counts_for_file(start_tag, end_tag, authors, fname):
|
||||||
|
"""Count lines attributed to each author in a file using git blame.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
start_tag: Starting tag or commit hash
|
||||||
|
end_tag: Ending tag or commit hash (defaults to HEAD)
|
||||||
|
authors: Dictionary mapping commit hashes to author names
|
||||||
|
fname: File path to analyze
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary mapping author names to line counts, or None if file not found
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if end_tag:
|
||||||
|
text = run(
|
||||||
|
[
|
||||||
|
"git",
|
||||||
|
"blame",
|
||||||
|
"-M100", # Detect moved lines within a file with 100% similarity
|
||||||
|
"-C100", # Detect moves across files with 100% similarity
|
||||||
|
"-C", # Increase detection effort
|
||||||
|
"-C", # Increase detection effort even more
|
||||||
|
"--abbrev=9",
|
||||||
|
f"{start_tag}..{end_tag}",
|
||||||
|
"--",
|
||||||
|
fname,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
text = run(
|
||||||
|
[
|
||||||
|
"git",
|
||||||
|
"blame",
|
||||||
|
"-M100", # Detect moved lines within a file with 100% similarity
|
||||||
|
"-C100", # Detect moves across files with 100% similarity
|
||||||
|
"-C", # Increase detection effort
|
||||||
|
"-C", # Increase detection effort even more
|
||||||
|
"--abbrev=9",
|
||||||
|
f"{start_tag}..HEAD",
|
||||||
|
"--",
|
||||||
|
fname,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
text = text.splitlines()
|
||||||
|
line_counts = defaultdict(int)
|
||||||
|
for line in text:
|
||||||
|
if line.startswith("^"):
|
||||||
|
continue
|
||||||
|
hsh = line[:hash_len]
|
||||||
|
author = authors.get(hsh, "Unknown")
|
||||||
|
line_counts[author] += 1
|
||||||
|
|
||||||
|
return dict(line_counts)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
if "no such path" in str(e).lower():
|
||||||
|
# File doesn't exist in this revision range, which is okay
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
# Some other error occurred
|
||||||
|
print(f"Warning: Unable to blame file {fname}. Error: {e}", file=sys.stderr)
|
||||||
|
return None
|
|
@ -2,8 +2,6 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
@ -12,6 +10,8 @@ import semver
|
||||||
import yaml
|
import yaml
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from aider.stats import get_all_commit_hashes_between_tags, run, hash_len, get_commit_authors, get_counts_for_file
|
||||||
|
|
||||||
website_files = [
|
website_files = [
|
||||||
"aider/website/index.html",
|
"aider/website/index.html",
|
||||||
"aider/website/share/index.md",
|
"aider/website/share/index.md",
|
||||||
|
@ -68,36 +68,6 @@ def blame(start_tag, end_tag=None):
|
||||||
return all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date
|
return all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date
|
||||||
|
|
||||||
|
|
||||||
def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
|
|
||||||
if end_tag:
|
|
||||||
res = run(["git", "rev-list", f"{start_tag}..{end_tag}"])
|
|
||||||
else:
|
|
||||||
res = run(["git", "rev-list", f"{start_tag}..HEAD"])
|
|
||||||
|
|
||||||
if res:
|
|
||||||
commit_hashes = res.strip().split("\n")
|
|
||||||
return commit_hashes
|
|
||||||
|
|
||||||
|
|
||||||
def run(cmd):
|
|
||||||
# Get all commit hashes since the specified tag
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
||||||
return result.stdout
|
|
||||||
|
|
||||||
|
|
||||||
def get_commit_authors(commits):
|
|
||||||
commit_to_author = dict()
|
|
||||||
for commit in commits:
|
|
||||||
author = run(["git", "show", "-s", "--format=%an", commit]).strip()
|
|
||||||
commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip()
|
|
||||||
if commit_message.lower().startswith("aider:"):
|
|
||||||
author += " (aider)"
|
|
||||||
commit_to_author[commit] = author
|
|
||||||
return commit_to_author
|
|
||||||
|
|
||||||
|
|
||||||
hash_len = len("44e6fefc2")
|
|
||||||
|
|
||||||
|
|
||||||
def process_all_tags_since(start_tag):
|
def process_all_tags_since(start_tag):
|
||||||
tags = get_all_tags_since(start_tag)
|
tags = get_all_tags_since(start_tag)
|
||||||
|
|
119
tests/test_stats.py
Normal file
119
tests/test_stats.py
Normal file
|
@ -0,0 +1,119 @@
|
||||||
|
import unittest
|
||||||
|
import subprocess
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
from aider.stats import (
|
||||||
|
get_all_commit_hashes_between_tags,
|
||||||
|
get_commit_authors,
|
||||||
|
get_counts_for_file,
|
||||||
|
hash_len,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestStats(unittest.TestCase):
|
||||||
|
@patch("aider.stats.run")
|
||||||
|
def test_get_all_commit_hashes_between_tags(self, mock_run):
|
||||||
|
# Test with end_tag
|
||||||
|
mock_run.return_value = "commit1\ncommit2\ncommit3"
|
||||||
|
result = get_all_commit_hashes_between_tags("v1.0.0", "v2.0.0")
|
||||||
|
mock_run.assert_called_with(["git", "rev-list", "v1.0.0..v2.0.0"])
|
||||||
|
self.assertEqual(result, ["commit1", "commit2", "commit3"])
|
||||||
|
|
||||||
|
# Test without end_tag (defaults to HEAD)
|
||||||
|
mock_run.return_value = "commit4\ncommit5"
|
||||||
|
result = get_all_commit_hashes_between_tags("v1.0.0")
|
||||||
|
mock_run.assert_called_with(["git", "rev-list", "v1.0.0..HEAD"])
|
||||||
|
self.assertEqual(result, ["commit4", "commit5"])
|
||||||
|
|
||||||
|
# Test with empty result
|
||||||
|
mock_run.return_value = ""
|
||||||
|
result = get_all_commit_hashes_between_tags("v1.0.0", "v1.0.0")
|
||||||
|
self.assertEqual(result, None)
|
||||||
|
|
||||||
|
@patch("aider.stats.run")
|
||||||
|
def test_get_commit_authors(self, mock_run):
|
||||||
|
# Setup mock responses for git show commands
|
||||||
|
def mock_run_side_effect(cmd):
|
||||||
|
if cmd[0:3] == ["git", "show", "-s"]:
|
||||||
|
if "--format=%an" in cmd:
|
||||||
|
if cmd[-1] == "commit1":
|
||||||
|
return "Author1\n"
|
||||||
|
elif cmd[-1] == "commit2":
|
||||||
|
return "Author2\n"
|
||||||
|
elif "--format=%s" in cmd:
|
||||||
|
if cmd[-1] == "commit1":
|
||||||
|
return "Normal commit message\n"
|
||||||
|
elif cmd[-1] == "commit2":
|
||||||
|
return "aider: AI generated commit\n"
|
||||||
|
return ""
|
||||||
|
|
||||||
|
mock_run.side_effect = mock_run_side_effect
|
||||||
|
|
||||||
|
# Test author attribution with aider tag
|
||||||
|
commits = ["commit1", "commit2"]
|
||||||
|
result = get_commit_authors(commits)
|
||||||
|
|
||||||
|
expected = {
|
||||||
|
"commit1": "Author1",
|
||||||
|
"commit2": "Author2 (aider)",
|
||||||
|
}
|
||||||
|
self.assertEqual(result, expected)
|
||||||
|
|
||||||
|
@patch("aider.stats.run")
|
||||||
|
def test_get_counts_for_file(self, mock_run):
|
||||||
|
# Setup mock for git blame
|
||||||
|
blame_output = f"""
|
||||||
|
{hash_len * '0'} (Author1 2023-01-01 12:00:00 +0000 1) Line 1
|
||||||
|
{hash_len * '1'} (Author2 2023-01-02 12:00:00 +0000 2) Line 2
|
||||||
|
{hash_len * '1'} (Author2 2023-01-02 12:00:00 +0000 3) Line 3
|
||||||
|
^{hash_len * '2'} (Author3 2023-01-03 12:00:00 +0000 4) Line 4 (not counted - from before start_tag)
|
||||||
|
"""
|
||||||
|
mock_run.return_value = blame_output.strip()
|
||||||
|
|
||||||
|
# Mock authors dictionary
|
||||||
|
authors = {
|
||||||
|
"0" * hash_len: "Author1",
|
||||||
|
"1" * hash_len: "Author2 (aider)",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Test with end_tag
|
||||||
|
result = get_counts_for_file("v1.0.0", "v2.0.0", authors, "test_file.py")
|
||||||
|
mock_run.assert_called_with([
|
||||||
|
"git", "blame", "-M", "-C", "-C", "--abbrev=9",
|
||||||
|
"v1.0.0..v2.0.0", "--", "test_file.py"
|
||||||
|
])
|
||||||
|
|
||||||
|
expected = {
|
||||||
|
"Author1": 1,
|
||||||
|
"Author2 (aider)": 2,
|
||||||
|
}
|
||||||
|
self.assertEqual(result, expected)
|
||||||
|
|
||||||
|
# Test with no end_tag
|
||||||
|
result = get_counts_for_file("v1.0.0", None, authors, "test_file.py")
|
||||||
|
mock_run.assert_called_with([
|
||||||
|
"git", "blame", "-M", "-C", "-C", "--abbrev=9",
|
||||||
|
"v1.0.0..HEAD", "--", "test_file.py"
|
||||||
|
])
|
||||||
|
|
||||||
|
@patch("aider.stats.run")
|
||||||
|
def test_get_counts_for_file_error_handling(self, mock_run):
|
||||||
|
# Test file not found error
|
||||||
|
error = subprocess.CalledProcessError(1, "git blame", stderr=b"no such path 'nonexistent.py'")
|
||||||
|
mock_run.side_effect = error
|
||||||
|
|
||||||
|
result = get_counts_for_file("v1.0.0", "v2.0.0", {}, "nonexistent.py")
|
||||||
|
self.assertIsNone(result)
|
||||||
|
|
||||||
|
# Test other git error
|
||||||
|
error = subprocess.CalledProcessError(1, "git blame", stderr=b"some other git error")
|
||||||
|
mock_run.side_effect = error
|
||||||
|
|
||||||
|
with patch("sys.stderr"): # Suppress stderr output during test
|
||||||
|
result = get_counts_for_file("v1.0.0", "v2.0.0", {}, "test_file.py")
|
||||||
|
self.assertIsNone(result)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
Loading…
Add table
Add a link
Reference in a new issue