feat: Add comprehensive test suite for stats module with git-related functions

This commit is contained in:
josix 2025-03-14 20:27:23 +08:00
parent 58351eb391
commit 98d114e527
No known key found for this signature in database
GPG key ID: EF4F614562B02881
2 changed files with 158 additions and 2 deletions

View file

@ -3,14 +3,31 @@ import sys
from collections import defaultdict from collections import defaultdict
# Length of abbreviated git hash used in blame output
hash_len = len("44e6fefc2") hash_len = len("44e6fefc2")
def run(cmd): def run(cmd):
# Get all commit hashes since the specified tag """Execute a git command and return its output.
Args:
cmd: List containing the command and its arguments
Returns:
String output of the command
"""
result = subprocess.run(cmd, capture_output=True, text=True, check=True) result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return result.stdout return result.stdout
def get_all_commit_hashes_between_tags(start_tag, end_tag=None): def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
"""Get all commit hashes between two tags or from a tag to HEAD.
Args:
start_tag: Starting tag or commit hash
end_tag: Ending tag or commit hash (defaults to HEAD)
Returns:
List of commit hashes or None if no commits found
"""
if end_tag: if end_tag:
res = run(["git", "rev-list", f"{start_tag}..{end_tag}"]) res = run(["git", "rev-list", f"{start_tag}..{end_tag}"])
else: else:
@ -19,8 +36,17 @@ def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
if res: if res:
commit_hashes = res.strip().split("\n") commit_hashes = res.strip().split("\n")
return commit_hashes return commit_hashes
return None
def get_commit_authors(commits): def get_commit_authors(commits):
"""Map commit hashes to their authors, marking aider-generated commits.
Args:
commits: List of commit hashes
Returns:
Dictionary mapping commit hashes to author names
"""
commit_to_author = dict() commit_to_author = dict()
for commit in commits: for commit in commits:
author = run(["git", "show", "-s", "--format=%an", commit]).strip() author = run(["git", "show", "-s", "--format=%an", commit]).strip()
@ -31,6 +57,17 @@ def get_commit_authors(commits):
return commit_to_author return commit_to_author
def get_counts_for_file(start_tag, end_tag, authors, fname): def get_counts_for_file(start_tag, end_tag, authors, fname):
"""Count lines attributed to each author in a file using git blame.
Args:
start_tag: Starting tag or commit hash
end_tag: Ending tag or commit hash (defaults to HEAD)
authors: Dictionary mapping commit hashes to author names
fname: File path to analyze
Returns:
Dictionary mapping author names to line counts, or None if file not found
"""
try: try:
if end_tag: if end_tag:
text = run( text = run(
@ -81,4 +118,4 @@ def get_counts_for_file(start_tag, end_tag, authors, fname):
else: else:
# Some other error occurred # Some other error occurred
print(f"Warning: Unable to blame file {fname}. Error: {e}", file=sys.stderr) print(f"Warning: Unable to blame file {fname}. Error: {e}", file=sys.stderr)
return None return None

119
tests/test_stats.py Normal file
View file

@ -0,0 +1,119 @@
import unittest
import subprocess
from unittest.mock import patch, MagicMock
from collections import defaultdict
from aider.stats import (
get_all_commit_hashes_between_tags,
get_commit_authors,
get_counts_for_file,
hash_len,
)
class TestStats(unittest.TestCase):
@patch("aider.stats.run")
def test_get_all_commit_hashes_between_tags(self, mock_run):
# Test with end_tag
mock_run.return_value = "commit1\ncommit2\ncommit3"
result = get_all_commit_hashes_between_tags("v1.0.0", "v2.0.0")
mock_run.assert_called_with(["git", "rev-list", "v1.0.0..v2.0.0"])
self.assertEqual(result, ["commit1", "commit2", "commit3"])
# Test without end_tag (defaults to HEAD)
mock_run.return_value = "commit4\ncommit5"
result = get_all_commit_hashes_between_tags("v1.0.0")
mock_run.assert_called_with(["git", "rev-list", "v1.0.0..HEAD"])
self.assertEqual(result, ["commit4", "commit5"])
# Test with empty result
mock_run.return_value = ""
result = get_all_commit_hashes_between_tags("v1.0.0", "v1.0.0")
self.assertEqual(result, None)
@patch("aider.stats.run")
def test_get_commit_authors(self, mock_run):
# Setup mock responses for git show commands
def mock_run_side_effect(cmd):
if cmd[0:3] == ["git", "show", "-s"]:
if "--format=%an" in cmd:
if cmd[-1] == "commit1":
return "Author1\n"
elif cmd[-1] == "commit2":
return "Author2\n"
elif "--format=%s" in cmd:
if cmd[-1] == "commit1":
return "Normal commit message\n"
elif cmd[-1] == "commit2":
return "aider: AI generated commit\n"
return ""
mock_run.side_effect = mock_run_side_effect
# Test author attribution with aider tag
commits = ["commit1", "commit2"]
result = get_commit_authors(commits)
expected = {
"commit1": "Author1",
"commit2": "Author2 (aider)",
}
self.assertEqual(result, expected)
@patch("aider.stats.run")
def test_get_counts_for_file(self, mock_run):
# Setup mock for git blame
blame_output = f"""
{hash_len * '0'} (Author1 2023-01-01 12:00:00 +0000 1) Line 1
{hash_len * '1'} (Author2 2023-01-02 12:00:00 +0000 2) Line 2
{hash_len * '1'} (Author2 2023-01-02 12:00:00 +0000 3) Line 3
^{hash_len * '2'} (Author3 2023-01-03 12:00:00 +0000 4) Line 4 (not counted - from before start_tag)
"""
mock_run.return_value = blame_output.strip()
# Mock authors dictionary
authors = {
"0" * hash_len: "Author1",
"1" * hash_len: "Author2 (aider)",
}
# Test with end_tag
result = get_counts_for_file("v1.0.0", "v2.0.0", authors, "test_file.py")
mock_run.assert_called_with([
"git", "blame", "-M", "-C", "-C", "--abbrev=9",
"v1.0.0..v2.0.0", "--", "test_file.py"
])
expected = {
"Author1": 1,
"Author2 (aider)": 2,
}
self.assertEqual(result, expected)
# Test with no end_tag
result = get_counts_for_file("v1.0.0", None, authors, "test_file.py")
mock_run.assert_called_with([
"git", "blame", "-M", "-C", "-C", "--abbrev=9",
"v1.0.0..HEAD", "--", "test_file.py"
])
@patch("aider.stats.run")
def test_get_counts_for_file_error_handling(self, mock_run):
# Test file not found error
error = subprocess.CalledProcessError(1, "git blame", stderr=b"no such path 'nonexistent.py'")
mock_run.side_effect = error
result = get_counts_for_file("v1.0.0", "v2.0.0", {}, "nonexistent.py")
self.assertIsNone(result)
# Test other git error
error = subprocess.CalledProcessError(1, "git blame", stderr=b"some other git error")
mock_run.side_effect = error
with patch("sys.stderr"): # Suppress stderr output during test
result = get_counts_for_file("v1.0.0", "v2.0.0", {}, "test_file.py")
self.assertIsNone(result)
if __name__ == "__main__":
unittest.main()