diff --git a/_posts/2024-05-24-self-assembly.html b/_posts/2024-05-24-self-assembly.html new file mode 100644 index 000000000..597d43b20 --- /dev/null +++ b/_posts/2024-05-24-self-assembly.html @@ -0,0 +1,67 @@ +--- +title: Aider has written 7% of its own code +excerpt: Aider is 14K lines of python and 7% of those were written by aider itself. +highlight_image: /assets/self-assembly.jpg +draft: true +--- + +# Aider has written 7% of its own code + +[![SWE Bench Lite results](/assets/self-assembly.jpg)](https://aider.chat/assets/self-assembly.jpg) + +The +[aider git repo] +currently contains about 4K commits, about 15% of which +were made by aider. + +The code is currently about 14K lines of python. +Aider wrote about 7% of those lines of code. +In the history of the repo, aider has added 4.8K lines of code +and deleted 1.5K lines. + +Here's the breakdown of the code aider wrote in the current code base. + +| File | Lines | Percent | +|---|---:|---:| +|aider/args.py| 6 of 449 | 1.34% | +|aider/coders/base_coder.py| 37 of 1354 | 2.73% | +|aider/coders/editblock_coder.py| 10 of 507 | 1.97% | +|aider/coders/editblock_func_coder.py| 8 of 141 | 5.67% | +|aider/coders/udiff_coder.py| 2 of 421 | 0.48% | +|aider/coders/wholefile_coder.py| 5 of 146 | 3.42% | +|aider/coders/wholefile_func_coder.py| 11 of 134 | 8.21% | +|aider/commands.py| 68 of 703 | 9.67% | +|aider/diffs.py| 15 of 129 | 11.63% | +|aider/gui.py| 2 of 533 | 0.38% | +|aider/history.py| 18 of 124 | 14.52% | +|aider/io.py| 45 of 368 | 12.23% | +|aider/linter.py| 29 of 240 | 12.08% | +|aider/main.py| 26 of 466 | 5.58% | +|aider/mdstream.py| 2 of 122 | 1.64% | +|aider/models.py| 18 of 549 | 3.28% | +|aider/repo.py| 20 of 266 | 7.52% | +|aider/repomap.py| 17 of 518 | 3.28% | +|aider/scrape.py| 11 of 199 | 5.53% | +|aider/versioncheck.py| 10 of 37 | 27.03% | +|aider/voice.py| 8 of 104 | 7.69% | +|benchmark/benchmark.py| 32 of 730 | 4.38% | +|benchmark/over_time.py| 32 of 60 | 53.33% | +|benchmark/swe_bench_lite.py| 39 of 71 | 54.93% | +|scripts/blame.py| 65 of 214 | 30.37% | +|scripts/versionbump.py| 92 of 123 | 74.80% | +|setup.py| 11 of 47 | 23.40% | +|tests/test_coder.py| 40 of 612 | 6.54% | +|tests/test_commands.py| 138 of 588 | 23.47% | +|tests/test_editblock.py| 23 of 403 | 5.71% | +|tests/test_io.py| 30 of 65 | 46.15% | +|tests/test_main.py| 25 of 239 | 10.46% | +|tests/test_models.py| 6 of 28 | 21.43% | +|tests/test_repo.py| 2 of 296 | 0.68% | +|tests/test_repomap.py| 70 of 217 | 32.26% | +|tests/test_udiff.py| 7 of 119 | 5.88% | +|tests/test_wholefile.py| 23 of 321 | 7.17% | +|**TOTAL**| **1003 of 14221** | **7.05%** | + + + + diff --git a/assets/self-assembly.jpg b/assets/self-assembly.jpg new file mode 100644 index 000000000..963cf07cc Binary files /dev/null and b/assets/self-assembly.jpg differ diff --git a/scripts/blame.py b/scripts/blame.py index 97edfd3d3..e3c7bbb49 100755 --- a/scripts/blame.py +++ b/scripts/blame.py @@ -1,32 +1,32 @@ #!/usr/bin/env python3 -import tempfile -import sys import subprocess +import sys +import tempfile from pathlib import Path + import pylab as plt -from aider.dump import dump from imgcat import imgcat +from aider.dump import dump + + def get_lines_with_commit_hash(filename, aider_commits, git_dname, verbose=False): result = subprocess.run( ["git", "-C", git_dname, "blame", "-l", filename], capture_output=True, text=True, - check=True + check=True, ) - hashes = [ - line.split()[0] - for line in result.stdout.splitlines() - ] + hashes = [line.split()[0] for line in result.stdout.splitlines()] lines = Path(filename).read_text().splitlines() num_aider_lines = 0 - for hsh,line in zip(hashes, lines): + for hsh, line in zip(hashes, lines): if hsh in aider_commits: num_aider_lines += 1 - prefix = '+' + prefix = "+" else: prefix = " " @@ -45,7 +45,7 @@ def get_aider_commits(git_dname): ["git", "-C", git_dname, "log", "--pretty=format:%H %s"], capture_output=True, text=True, - check=True + check=True, ) results = result.stdout.splitlines() @@ -59,7 +59,6 @@ def get_aider_commits(git_dname): dump(len(commits)) - return commits @@ -72,17 +71,28 @@ def show_commit_stats(commits): ["git", "show", "--stat", "--oneline", commit], capture_output=True, text=True, - check=True + check=True, ) added_lines = 0 deleted_lines = 0 for line in result.stdout.splitlines(): + if "changed," not in line: + continue + if "insertion" not in line and "deletion" not in line: + continue dump(line) - if "insertion" in line or "insertion(+)" in line: - added_lines += int(line.split()[line.split().index("insertion(+)") - 1]) - if "deletion" in line or "deletion(-)" in line: - deleted_lines += int(line.split()[line.split().index("deletion(-)") - 1]) + pieces = line.split(",") + try: + for piece in pieces: + if "insertion" in piece: + dump(piece) + added_lines += int(piece.strip().split()[0]) + if "deletion" in piece: + dump(piece) + deleted_lines += int(piece.strip().split()[0]) + except ValueError: + pass total_added_lines += added_lines total_deleted_lines += deleted_lines @@ -91,12 +101,13 @@ def show_commit_stats(commits): print(f"Total: +{total_added_lines} -{total_deleted_lines}") + def process_fnames(fnames, git_dname): if not git_dname: git_dname = "." aider_commits = get_aider_commits(git_dname) - show_commit_stats(aider_commits) + # show_commit_stats(aider_commits) total_lines = 0 total_aider_lines = 0 @@ -108,24 +119,24 @@ def process_fnames(fnames, git_dname): percent_modified = (num_aider_lines / num_lines) * 100 if num_lines > 0 else 0 if not num_aider_lines: continue - print(f"{fname}: {num_aider_lines}/{num_lines} ({percent_modified:.2f}%)") + print(f"|{fname}| {num_aider_lines} of {num_lines} | {percent_modified:.2f}% |") total_percent_modified = (total_aider_lines / total_lines) * 100 if total_lines > 0 else 0 - print(f"Total: {total_aider_lines}/{total_lines} lines by aider ({total_percent_modified:.2f}%)") + print( + f"Total: {total_aider_lines}/{total_lines} lines by aider ({total_percent_modified:.2f}%)" + ) return total_aider_lines, total_lines, total_percent_modified + def process_repo(git_dname=None): if not git_dname: git_dname = "." result = subprocess.run( - ["git", "-C", git_dname, "ls-files"], - capture_output=True, - text=True, - check=True + ["git", "-C", git_dname, "ls-files"], capture_output=True, text=True, check=True ) git_dname = Path(git_dname) - fnames = [git_dname/fname for fname in result.stdout.splitlines() if fname.endswith('.py')] + fnames = [git_dname / fname for fname in result.stdout.splitlines() if fname.endswith(".py")] return process_fnames(fnames, git_dname) @@ -136,7 +147,7 @@ def history(): ["git", "-C", git_dname, "log", "--pretty=format:%H %s"], capture_output=True, text=True, - check=True + check=True, ) commits = [] @@ -148,8 +159,8 @@ def history(): dump(len(commits)) num_commits = len(commits) - N=10 - step = (num_commits-1)/(N-1) + N = 10 + step = (num_commits - 1) / (N - 1) results = [] i = 0 while i < num_commits: @@ -176,21 +187,19 @@ def history(): aider_lines = [aider_lines for _, aider_lines, _, _ in results] total_lines = [total_lines for _, _, total_lines, _ in results] - plt.fill_between(x, aider_lines, label='Aider Lines', color='skyblue', alpha=0.5) - plt.fill_between(x, total_lines, label='Total Lines', color='lightgreen', alpha=0.5) - plt.xlabel('Commit Number') - plt.ylabel('Lines of Code') - plt.title('Aider Lines and Total Lines Over Time') + plt.fill_between(x, aider_lines, label="Aider Lines", color="skyblue", alpha=0.5) + plt.fill_between(x, total_lines, label="Total Lines", color="lightgreen", alpha=0.5) + plt.xlabel("Commit Number") + plt.ylabel("Lines of Code") + plt.title("Aider Lines and Total Lines Over Time") plt.legend() - plt.savefig('aider_plot.png') - with open('aider_plot.png', 'rb') as f: + plt.savefig("aider_plot.png") + with open("aider_plot.png", "rb") as f: imgcat(f.read()) - - def main(): - #return history() + # return history() if len(sys.argv) < 2: return process_repo()