Add support for parsing command line arguments to the blame script

This commit is contained in:
Paul Gauthier 2024-07-29 11:52:49 -03:00 committed by Paul Gauthier (aider)
parent 2ded5e6faa
commit 8ff7242bce

View file

@ -1,15 +1,17 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import argparse
import re
import subprocess import subprocess
import sys import sys
import argparse
from collections import defaultdict from collections import defaultdict
from aider.dump import dump from datetime import datetime
from operator import itemgetter from operator import itemgetter
import re
import semver import semver
import yaml import yaml
from datetime import datetime
from aider.dump import dump
def blame(start_tag, end_tag=None): def blame(start_tag, end_tag=None):
@ -18,7 +20,7 @@ def blame(start_tag, end_tag=None):
authors = get_commit_authors(commits) authors = get_commit_authors(commits)
py_files = run(['git', 'ls-files', '*.py']).strip().split('\n') py_files = run(["git", "ls-files", "*.py"]).strip().split("\n")
all_file_counts = {} all_file_counts = {}
grand_total = defaultdict(int) grand_total = defaultdict(int)
@ -35,7 +37,7 @@ def blame(start_tag, end_tag=None):
total_lines = sum(grand_total.values()) total_lines = sum(grand_total.values())
aider_percentage = (aider_total / total_lines) * 100 if total_lines > 0 else 0 aider_percentage = (aider_total / total_lines) * 100 if total_lines > 0 else 0
end_date = get_tag_date(end_tag if end_tag else 'HEAD') end_date = get_tag_date(end_tag if end_tag else "HEAD")
return all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date return all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date
@ -47,19 +49,16 @@ def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
res = run(["git", "rev-list", f"{start_tag}..HEAD"]) res = run(["git", "rev-list", f"{start_tag}..HEAD"])
if res: if res:
commit_hashes = res.strip().split('\n') commit_hashes = res.strip().split("\n")
return commit_hashes return commit_hashes
def run(cmd): def run(cmd):
# Get all commit hashes since the specified tag # Get all commit hashes since the specified tag
result = subprocess.run( result = subprocess.run(cmd, capture_output=True, text=True, check=True)
cmd,
capture_output=True,
text=True,
check=True
)
return result.stdout return result.stdout
def get_commit_authors(commits): def get_commit_authors(commits):
commit_to_author = dict() commit_to_author = dict()
for commit in commits: for commit in commits:
@ -71,64 +70,81 @@ def get_commit_authors(commits):
return commit_to_author return commit_to_author
hash_len = len('44e6fefc2') hash_len = len("44e6fefc2")
def process_all_tags_since(start_tag): def process_all_tags_since(start_tag):
tags = get_all_tags_since(start_tag) tags = get_all_tags_since(start_tag)
tags += ['HEAD'] # tags += ['HEAD']
results = [] results = []
for i in range(len(tags) - 1): for i in range(len(tags) - 1):
start_tag, end_tag = tags[i], tags[i+1] start_tag, end_tag = tags[i], tags[i + 1]
_, _, total_lines, aider_total, aider_percentage, end_date = blame(start_tag, end_tag) _, _, total_lines, aider_total, aider_percentage, end_date = blame(start_tag, end_tag)
results.append({ results.append(
'start_tag': start_tag, {
'end_tag': end_tag, "start_tag": start_tag,
'end_date': end_date.strftime('%Y-%m-%d'), "end_tag": end_tag,
'aider_percentage': round(aider_percentage, 2), "end_date": end_date.strftime("%Y-%m-%d"),
'aider_lines': aider_total, "aider_percentage": round(aider_percentage, 2),
'total_lines': total_lines "aider_lines": aider_total,
}) "total_lines": total_lines,
}
)
return results return results
def main(): def main():
parser = argparse.ArgumentParser(description="Get aider/non-aider blame stats") parser = argparse.ArgumentParser(description="Get aider/non-aider blame stats")
parser.add_argument("start_tag", help="The tag to start from") parser.add_argument("start_tag", help="The tag to start from")
parser.add_argument("--end-tag", help="The tag to end at (default: HEAD)", default=None) parser.add_argument("--end-tag", help="The tag to end at (default: HEAD)", default=None)
parser.add_argument("--all-since", action="store_true", help="Find all tags since the specified tag and print aider percentage between each pair of successive tags") parser.add_argument(
"--all-since",
action="store_true",
help=(
"Find all tags since the specified tag and print aider percentage between each pair of"
" successive tags"
),
)
args = parser.parse_args() args = parser.parse_args()
if args.all_since: if args.all_since:
results = process_all_tags_since(args.start_tag) results = process_all_tags_since(args.start_tag)
print(yaml.dump(results, sort_keys=False)) print(yaml.dump(results, sort_keys=False))
else: else:
all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date = blame(args.start_tag, args.end_tag) all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date = blame(
args.start_tag, args.end_tag
)
result = { result = {
'start_tag': args.start_tag, "start_tag": args.start_tag,
'end_tag': args.end_tag or 'HEAD', "end_tag": args.end_tag or "HEAD",
'end_date': end_date.strftime('%Y-%m-%d'), "end_date": end_date.strftime("%Y-%m-%d"),
'file_counts': all_file_counts, "file_counts": all_file_counts,
'grand_total': {author: count for author, count in sorted(grand_total.items(), key=itemgetter(1), reverse=True)}, "grand_total": {
'total_lines': total_lines, author: count
'aider_total': aider_total, for author, count in sorted(grand_total.items(), key=itemgetter(1), reverse=True)
'aider_percentage': round(aider_percentage, 2) },
"total_lines": total_lines,
"aider_total": aider_total,
"aider_percentage": round(aider_percentage, 2),
} }
print(yaml.dump(result, sort_keys=False)) print(yaml.dump(result, sort_keys=False))
def get_counts_for_file(start_tag, end_tag, authors, fname): def get_counts_for_file(start_tag, end_tag, authors, fname):
try: try:
if end_tag: if end_tag:
text = run(['git', 'blame', f'{start_tag}..{end_tag}', '--', fname]) text = run(["git", "blame", f"{start_tag}..{end_tag}", "--", fname])
else: else:
text = run(['git', 'blame', f'{start_tag}..HEAD', '--', fname]) text = run(["git", "blame", f"{start_tag}..HEAD", "--", fname])
if not text: if not text:
return None return None
text = text.splitlines() text = text.splitlines()
line_counts = defaultdict(int) line_counts = defaultdict(int)
for line in text: for line in text:
if line.startswith('^'): if line.startswith("^"):
continue continue
hsh = line[:hash_len] hsh = line[:hash_len]
author = authors.get(hsh, "Unknown") author = authors.get(hsh, "Unknown")
@ -136,21 +152,25 @@ def get_counts_for_file(start_tag, end_tag, authors, fname):
return dict(line_counts) return dict(line_counts)
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
#print(f"Warning: Unable to blame file {fname}. It may have been added after {start_tag} or removed before {end_tag or 'HEAD'}.", file=sys.stderr) # print(f"Warning: Unable to blame file {fname}. It may have been added after {start_tag} or removed before {end_tag or 'HEAD'}.", file=sys.stderr)
return None return None
def get_all_tags_since(start_tag): def get_all_tags_since(start_tag):
all_tags = run(['git', 'tag', '--sort=v:refname']).strip().split('\n') all_tags = run(["git", "tag", "--sort=v:refname"]).strip().split("\n")
start_version = semver.Version.parse(start_tag[1:]) # Remove 'v' prefix start_version = semver.Version.parse(start_tag[1:]) # Remove 'v' prefix
filtered_tags = [ filtered_tags = [
tag for tag in all_tags tag
for tag in all_tags
if semver.Version.is_valid(tag[1:]) and semver.Version.parse(tag[1:]) >= start_version if semver.Version.is_valid(tag[1:]) and semver.Version.parse(tag[1:]) >= start_version
] ]
return [tag for tag in filtered_tags if tag.endswith('.0')] return [tag for tag in filtered_tags if tag.endswith(".0")]
def get_tag_date(tag): def get_tag_date(tag):
date_str = run(['git', 'log', '-1', '--format=%ai', tag]).strip() date_str = run(["git", "log", "-1", "--format=%ai", tag]).strip()
return datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S %z') return datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S %z")
if __name__ == "__main__": if __name__ == "__main__":
main() main()