Merge 98d114e527 into 3caab85931

2025-05-29 08:44:59 +00:00 · 2025-05-16 19:38:37 +08:00 · 2025-05-16 19:38:37 +08:00 · eb6fa89e65
commit eb6fa89e65
parent 3caab85931 98d114e527
6 changed files with 345 additions and 33 deletions
--- a/aider/args.py
+++ b/aider/args.py
@ -553,6 +553,13 @@ def get_parser(default_config_files, git_root):
        help="Run tests, fix problems found and then exit",
        default=False,
    )
    group.add_argument(
        "--stats",
        metavar="REVISIONS",
        nargs="?",
        const="HEAD",
        help="Show code changes statistics between revisions",
    )
    ##########
    group = parser.add_argument_group("Analytics")
--- a/aider/commands.py
+++ b/aider/commands.py
@ -4,7 +4,7 @@ import re
 import subprocess
 import sys
 import tempfile
-from collections import OrderedDict
+from collections import OrderedDict, defaultdict
 from os.path import expanduser
 from pathlib import Path
@ -23,6 +23,7 @@ from aider.repo import ANY_GIT_ERROR
 from aider.run_cmd import run_cmd
 from aider.scrape import Scraper, install_playwright
 from aider.utils import is_image_file
 from aider.stats import hash_len, get_all_commit_hashes_between_tags, get_commit_authors, get_counts_for_file
 from .dump import dump  # noqa: F401
@ -1484,6 +1485,95 @@ class Commands:
        "Toggle multiline mode (swaps behavior of Enter and Meta+Enter)"
        self.io.toggle_multiline_mode()
    def cmd_stats(self, args):
        """Show statistics about code changes and aider's contributions by counting lines of code through git blame.
        Usage:
            /stats                   Compare against main/master branch
            /stats <revision>        Compare against specific revision
            /stats rev1..rev2        Compare between two specific revisions
        Examples:
            /stats                   Show stats vs main/master branch
            /stats HEAD~5            Show stats vs 5 commits ago
            /stats v1.0.0           Show stats vs version 1.0.0
            /stats main..HEAD        Show stats between main and current HEAD
        Lines are attributed to aider when the git author or committer contains "(aider)".
        Binary files (images, audio, etc.) are excluded from the analysis.
        """
        if not self.coder.repo:
            self.io.tool_error("No git repository found.")
            return
        try:
            # Get the revision range
            if not args:
                # Default to comparing against main/master branch
                for default_branch in ["main", "master"]:
                    try:
                        self.coder.repo.repo.rev_parse(default_branch)
                        args = default_branch
                        break
                    except:
                        continue
                if not args:
                    self.io.tool_error("No main or master branch found. Please specify a revision.")
                    return
            source_revision, target_revision = args.split("..") if ".." in args else (args, "HEAD")
            commits = get_all_commit_hashes_between_tags(source_revision, target_revision)
            commits = [commit[:hash_len] for commit in commits] if commits else []
            if not commits:
                self.io.tool_error(
                    f"There are no commits between the specified revisions from {source_revision} to {target_revision}."
                )
                return
            authors = get_commit_authors(commits)
            # Get files changed between revisions
            diff_files = self.coder.repo.repo.git.diff(
                "--name-only", f"{source_revision}..{target_revision}"
            ).splitlines()
            # Filter out media files
            files = [f for f in diff_files if not any(f.lower().endswith(ext) for ext in (
                '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.ico', '.svg',  # images
                '.mp3', '.wav', '.ogg', '.m4a', '.flac',  # audio
                '.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm',  # video
                '.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx',  # documents
                '.zip', '.tar', '.gz', '.7z', '.rar',  # archives
                '.ttf', '.otf', '.woff', '.woff2', '.eot'  # fonts
            ))]
            self.io.tool_output(f"Found {len(files)} non-binary tracked files in the repository.")
            all_file_counts = {}
            grand_total = defaultdict(int)
            aider_total = 0
            for file in files:
                file_counts = get_counts_for_file(source_revision, target_revision, authors, file)
                if file_counts:
                    all_file_counts[file] = file_counts
                    for author, count in file_counts.items():
                        grand_total[author] += count
                        if "(aider)" in author.lower():
                            aider_total += count
            total_lines = sum(grand_total.values())
            aider_percentage = (aider_total / total_lines) * 100 if total_lines > 0 else 0
            # Calculate percentages
            if total_lines > 0:
                # Output overall statistics
                self.io.tool_output(f"\nAnalysis from {source_revision} to {target_revision}:")
                self.io.tool_output(f"Total lines analyzed: {total_lines:,}")
                self.io.tool_output(f"Lines by aider: {aider_total:,} ({aider_percentage:.1f}%)")
                self.io.tool_output(f"Lines by humans: {total_lines - aider_total:,} ({100 - aider_percentage:.1f}%)")
            else:
                self.io.tool_output("No lines of code found in the repository.")
        except Exception as e:
            self.io.tool_error(f"Error analyzing aider statistics: {e}")
    def cmd_copy(self, args):
        "Copy the last assistant message to the clipboard"
        all_messages = self.coder.done_messages + self.coder.cur_messages
--- a/aider/main.py
+++ b/aider/main.py
@ -1142,6 +1142,11 @@ def main(argv=None, input=None, output=None, force_git_root=None, return_coder=F
        analytics.event("exit", reason="Completed --message-file")
        return
    if args.stats:
        commands.cmd_stats(args.stats)
        analytics.event("exit", reason="Completed --stats")
        return
    if args.exit:
        analytics.event("exit", reason="Exit flag set")
        return
--- a/aider/stats.py
+++ b/aider/stats.py
@ -0,0 +1,121 @@
 import subprocess
 import sys
 from collections import defaultdict
 # Length of abbreviated git hash used in blame output
 hash_len = len("44e6fefc2")
 def run(cmd):
    """Execute a git command and return its output.
    Args:
        cmd: List containing the command and its arguments
    Returns:
        String output of the command
    """
    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
    return result.stdout
 def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
    """Get all commit hashes between two tags or from a tag to HEAD.
    Args:
        start_tag: Starting tag or commit hash
        end_tag: Ending tag or commit hash (defaults to HEAD)
    Returns:
        List of commit hashes or None if no commits found
    """
    if end_tag:
        res = run(["git", "rev-list", f"{start_tag}..{end_tag}"])
    else:
        res = run(["git", "rev-list", f"{start_tag}..HEAD"])
    if res:
        commit_hashes = res.strip().split("\n")
        return commit_hashes
    return None
 def get_commit_authors(commits):
    """Map commit hashes to their authors, marking aider-generated commits.
    Args:
        commits: List of commit hashes
    Returns:
        Dictionary mapping commit hashes to author names
    """
    commit_to_author = dict()
    for commit in commits:
        author = run(["git", "show", "-s", "--format=%an", commit]).strip()
        commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip()
        if commit_message.lower().startswith("aider:"):
            author += " (aider)"
        commit_to_author[commit] = author
    return commit_to_author
 def get_counts_for_file(start_tag, end_tag, authors, fname):
    """Count lines attributed to each author in a file using git blame.
    Args:
        start_tag: Starting tag or commit hash
        end_tag: Ending tag or commit hash (defaults to HEAD)
        authors: Dictionary mapping commit hashes to author names
        fname: File path to analyze
    Returns:
        Dictionary mapping author names to line counts, or None if file not found
    """
    try:
        if end_tag:
            text = run(
                [
                    "git",
                    "blame",
                    "-M100",  # Detect moved lines within a file with 100% similarity
                    "-C100",  # Detect moves across files with 100% similarity
                    "-C",  # Increase detection effort
                    "-C",  # Increase detection effort even more
                    "--abbrev=9",
                    f"{start_tag}..{end_tag}",
                    "--",
                    fname,
                ]
            )
        else:
            text = run(
                [
                    "git",
                    "blame",
                    "-M100",  # Detect moved lines within a file with 100% similarity
                    "-C100",  # Detect moves across files with 100% similarity
                    "-C",  # Increase detection effort
                    "-C",  # Increase detection effort even more
                    "--abbrev=9",
                    f"{start_tag}..HEAD",
                    "--",
                    fname,
                ]
            )
        if not text:
            return None
        text = text.splitlines()
        line_counts = defaultdict(int)
        for line in text:
            if line.startswith("^"):
                continue
            hsh = line[:hash_len]
            author = authors.get(hsh, "Unknown")
            line_counts[author] += 1
        return dict(line_counts)
    except subprocess.CalledProcessError as e:
        if "no such path" in str(e).lower():
            # File doesn't exist in this revision range, which is okay
            return None
        else:
            # Some other error occurred
            print(f"Warning: Unable to blame file {fname}. Error: {e}", file=sys.stderr)
            return None
--- a/scripts/blame.py
+++ b/scripts/blame.py
@ -2,8 +2,6 @@
 import argparse
 import os
 import subprocess
 import sys
 from collections import defaultdict
 from datetime import datetime
 from operator import itemgetter
@ -12,6 +10,8 @@ import semver
 import yaml
 from tqdm import tqdm
 from aider.stats import get_all_commit_hashes_between_tags, run, hash_len, get_commit_authors, get_counts_for_file
 website_files = [
    "aider/website/index.html",
    "aider/website/share/index.md",
@ -68,36 +68,6 @@ def blame(start_tag, end_tag=None):
    return all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date
 def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
    if end_tag:
        res = run(["git", "rev-list", f"{start_tag}..{end_tag}"])
    else:
        res = run(["git", "rev-list", f"{start_tag}..HEAD"])
    if res:
        commit_hashes = res.strip().split("\n")
        return commit_hashes
 def run(cmd):
    # Get all commit hashes since the specified tag
    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
    return result.stdout
 def get_commit_authors(commits):
    commit_to_author = dict()
    for commit in commits:
        author = run(["git", "show", "-s", "--format=%an", commit]).strip()
        commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip()
        if commit_message.lower().startswith("aider:"):
            author += " (aider)"
        commit_to_author[commit] = author
    return commit_to_author
 hash_len = len("44e6fefc2")
 def process_all_tags_since(start_tag):
    tags = get_all_tags_since(start_tag)
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@ -0,0 +1,119 @@
 import unittest
 import subprocess
 from unittest.mock import patch, MagicMock
 from collections import defaultdict
 from aider.stats import (
    get_all_commit_hashes_between_tags,
    get_commit_authors,
    get_counts_for_file,
    hash_len,
 )
 class TestStats(unittest.TestCase):
    @patch("aider.stats.run")
    def test_get_all_commit_hashes_between_tags(self, mock_run):
        # Test with end_tag
        mock_run.return_value = "commit1\ncommit2\ncommit3"
        result = get_all_commit_hashes_between_tags("v1.0.0", "v2.0.0")
        mock_run.assert_called_with(["git", "rev-list", "v1.0.0..v2.0.0"])
        self.assertEqual(result, ["commit1", "commit2", "commit3"])
        # Test without end_tag (defaults to HEAD)
        mock_run.return_value = "commit4\ncommit5"
        result = get_all_commit_hashes_between_tags("v1.0.0")
        mock_run.assert_called_with(["git", "rev-list", "v1.0.0..HEAD"])
        self.assertEqual(result, ["commit4", "commit5"])
        # Test with empty result
        mock_run.return_value = ""
        result = get_all_commit_hashes_between_tags("v1.0.0", "v1.0.0")
        self.assertEqual(result, None)
    @patch("aider.stats.run")
    def test_get_commit_authors(self, mock_run):
        # Setup mock responses for git show commands
        def mock_run_side_effect(cmd):
            if cmd[0:3] == ["git", "show", "-s"]:
                if "--format=%an" in cmd:
                    if cmd[-1] == "commit1":
                        return "Author1\n"
                    elif cmd[-1] == "commit2":
                        return "Author2\n"
                elif "--format=%s" in cmd:
                    if cmd[-1] == "commit1":
                        return "Normal commit message\n"
                    elif cmd[-1] == "commit2":
                        return "aider: AI generated commit\n"
            return ""
        mock_run.side_effect = mock_run_side_effect
        # Test author attribution with aider tag
        commits = ["commit1", "commit2"]
        result = get_commit_authors(commits)
        expected = {
            "commit1": "Author1",
            "commit2": "Author2 (aider)",
        }
        self.assertEqual(result, expected)
    @patch("aider.stats.run")
    def test_get_counts_for_file(self, mock_run):
        # Setup mock for git blame
        blame_output = f"""
 {hash_len * '0'} (Author1 2023-01-01 12:00:00 +0000 1) Line 1
 {hash_len * '1'} (Author2 2023-01-02 12:00:00 +0000 2) Line 2
 {hash_len * '1'} (Author2 2023-01-02 12:00:00 +0000 3) Line 3
 ^{hash_len * '2'} (Author3 2023-01-03 12:00:00 +0000 4) Line 4 (not counted - from before start_tag)
 """
        mock_run.return_value = blame_output.strip()
        # Mock authors dictionary
        authors = {
            "0" * hash_len: "Author1",
            "1" * hash_len: "Author2 (aider)",
        }
        # Test with end_tag
        result = get_counts_for_file("v1.0.0", "v2.0.0", authors, "test_file.py")
        mock_run.assert_called_with([
            "git", "blame", "-M", "-C", "-C", "--abbrev=9", 
            "v1.0.0..v2.0.0", "--", "test_file.py"
        ])
        expected = {
            "Author1": 1,
            "Author2 (aider)": 2,
        }
        self.assertEqual(result, expected)
        # Test with no end_tag
        result = get_counts_for_file("v1.0.0", None, authors, "test_file.py")
        mock_run.assert_called_with([
            "git", "blame", "-M", "-C", "-C", "--abbrev=9", 
            "v1.0.0..HEAD", "--", "test_file.py"
        ])
    @patch("aider.stats.run")
    def test_get_counts_for_file_error_handling(self, mock_run):
        # Test file not found error
        error = subprocess.CalledProcessError(1, "git blame", stderr=b"no such path 'nonexistent.py'")
        mock_run.side_effect = error
        result = get_counts_for_file("v1.0.0", "v2.0.0", {}, "nonexistent.py")
        self.assertIsNone(result)
        # Test other git error
        error = subprocess.CalledProcessError(1, "git blame", stderr=b"some other git error")
        mock_run.side_effect = error
        with patch("sys.stderr"):  # Suppress stderr output during test
            result = get_counts_for_file("v1.0.0", "v2.0.0", {}, "test_file.py")
            self.assertIsNone(result)
 if __name__ == "__main__":
    unittest.main()