diff --git a/_posts/2024-05-24-self-assembly.html b/_posts/2024-05-24-self-assembly.html
new file mode 100644
index 000000000..597d43b20
--- /dev/null
+++ b/_posts/2024-05-24-self-assembly.html
@@ -0,0 +1,67 @@
+---
+title: Aider has written 7% of its own code
+excerpt: Aider is 14K lines of python and 7% of those were written by aider itself.
+highlight_image: /assets/self-assembly.jpg
+draft: true
+---
+
+# Aider has written 7% of its own code
+
+[](https://aider.chat/assets/self-assembly.jpg)
+
+The
+[aider git repo]
+currently contains about 4K commits, about 15% of which
+were made by aider.
+
+The code is currently about 14K lines of python.
+Aider wrote about 7% of those lines of code.
+In the history of the repo, aider has added 4.8K lines of code
+and deleted 1.5K lines.
+
+Here's the breakdown of the code aider wrote in the current code base.
+
+| File | Lines | Percent |
+|---|---:|---:|
+|aider/args.py| 6 of 449 | 1.34% |
+|aider/coders/base_coder.py| 37 of 1354 | 2.73% |
+|aider/coders/editblock_coder.py| 10 of 507 | 1.97% |
+|aider/coders/editblock_func_coder.py| 8 of 141 | 5.67% |
+|aider/coders/udiff_coder.py| 2 of 421 | 0.48% |
+|aider/coders/wholefile_coder.py| 5 of 146 | 3.42% |
+|aider/coders/wholefile_func_coder.py| 11 of 134 | 8.21% |
+|aider/commands.py| 68 of 703 | 9.67% |
+|aider/diffs.py| 15 of 129 | 11.63% |
+|aider/gui.py| 2 of 533 | 0.38% |
+|aider/history.py| 18 of 124 | 14.52% |
+|aider/io.py| 45 of 368 | 12.23% |
+|aider/linter.py| 29 of 240 | 12.08% |
+|aider/main.py| 26 of 466 | 5.58% |
+|aider/mdstream.py| 2 of 122 | 1.64% |
+|aider/models.py| 18 of 549 | 3.28% |
+|aider/repo.py| 20 of 266 | 7.52% |
+|aider/repomap.py| 17 of 518 | 3.28% |
+|aider/scrape.py| 11 of 199 | 5.53% |
+|aider/versioncheck.py| 10 of 37 | 27.03% |
+|aider/voice.py| 8 of 104 | 7.69% |
+|benchmark/benchmark.py| 32 of 730 | 4.38% |
+|benchmark/over_time.py| 32 of 60 | 53.33% |
+|benchmark/swe_bench_lite.py| 39 of 71 | 54.93% |
+|scripts/blame.py| 65 of 214 | 30.37% |
+|scripts/versionbump.py| 92 of 123 | 74.80% |
+|setup.py| 11 of 47 | 23.40% |
+|tests/test_coder.py| 40 of 612 | 6.54% |
+|tests/test_commands.py| 138 of 588 | 23.47% |
+|tests/test_editblock.py| 23 of 403 | 5.71% |
+|tests/test_io.py| 30 of 65 | 46.15% |
+|tests/test_main.py| 25 of 239 | 10.46% |
+|tests/test_models.py| 6 of 28 | 21.43% |
+|tests/test_repo.py| 2 of 296 | 0.68% |
+|tests/test_repomap.py| 70 of 217 | 32.26% |
+|tests/test_udiff.py| 7 of 119 | 5.88% |
+|tests/test_wholefile.py| 23 of 321 | 7.17% |
+|**TOTAL**| **1003 of 14221** | **7.05%** |
+
+
+
+
diff --git a/assets/self-assembly.jpg b/assets/self-assembly.jpg
new file mode 100644
index 000000000..963cf07cc
Binary files /dev/null and b/assets/self-assembly.jpg differ
diff --git a/scripts/blame.py b/scripts/blame.py
index 97edfd3d3..e3c7bbb49 100755
--- a/scripts/blame.py
+++ b/scripts/blame.py
@@ -1,32 +1,32 @@
#!/usr/bin/env python3
-import tempfile
-import sys
import subprocess
+import sys
+import tempfile
from pathlib import Path
+
import pylab as plt
-from aider.dump import dump
from imgcat import imgcat
+from aider.dump import dump
+
+
def get_lines_with_commit_hash(filename, aider_commits, git_dname, verbose=False):
result = subprocess.run(
["git", "-C", git_dname, "blame", "-l", filename],
capture_output=True,
text=True,
- check=True
+ check=True,
)
- hashes = [
- line.split()[0]
- for line in result.stdout.splitlines()
- ]
+ hashes = [line.split()[0] for line in result.stdout.splitlines()]
lines = Path(filename).read_text().splitlines()
num_aider_lines = 0
- for hsh,line in zip(hashes, lines):
+ for hsh, line in zip(hashes, lines):
if hsh in aider_commits:
num_aider_lines += 1
- prefix = '+'
+ prefix = "+"
else:
prefix = " "
@@ -45,7 +45,7 @@ def get_aider_commits(git_dname):
["git", "-C", git_dname, "log", "--pretty=format:%H %s"],
capture_output=True,
text=True,
- check=True
+ check=True,
)
results = result.stdout.splitlines()
@@ -59,7 +59,6 @@ def get_aider_commits(git_dname):
dump(len(commits))
-
return commits
@@ -72,17 +71,28 @@ def show_commit_stats(commits):
["git", "show", "--stat", "--oneline", commit],
capture_output=True,
text=True,
- check=True
+ check=True,
)
added_lines = 0
deleted_lines = 0
for line in result.stdout.splitlines():
+ if "changed," not in line:
+ continue
+ if "insertion" not in line and "deletion" not in line:
+ continue
dump(line)
- if "insertion" in line or "insertion(+)" in line:
- added_lines += int(line.split()[line.split().index("insertion(+)") - 1])
- if "deletion" in line or "deletion(-)" in line:
- deleted_lines += int(line.split()[line.split().index("deletion(-)") - 1])
+ pieces = line.split(",")
+ try:
+ for piece in pieces:
+ if "insertion" in piece:
+ dump(piece)
+ added_lines += int(piece.strip().split()[0])
+ if "deletion" in piece:
+ dump(piece)
+ deleted_lines += int(piece.strip().split()[0])
+ except ValueError:
+ pass
total_added_lines += added_lines
total_deleted_lines += deleted_lines
@@ -91,12 +101,13 @@ def show_commit_stats(commits):
print(f"Total: +{total_added_lines} -{total_deleted_lines}")
+
def process_fnames(fnames, git_dname):
if not git_dname:
git_dname = "."
aider_commits = get_aider_commits(git_dname)
- show_commit_stats(aider_commits)
+ # show_commit_stats(aider_commits)
total_lines = 0
total_aider_lines = 0
@@ -108,24 +119,24 @@ def process_fnames(fnames, git_dname):
percent_modified = (num_aider_lines / num_lines) * 100 if num_lines > 0 else 0
if not num_aider_lines:
continue
- print(f"{fname}: {num_aider_lines}/{num_lines} ({percent_modified:.2f}%)")
+ print(f"|{fname}| {num_aider_lines} of {num_lines} | {percent_modified:.2f}% |")
total_percent_modified = (total_aider_lines / total_lines) * 100 if total_lines > 0 else 0
- print(f"Total: {total_aider_lines}/{total_lines} lines by aider ({total_percent_modified:.2f}%)")
+ print(
+ f"Total: {total_aider_lines}/{total_lines} lines by aider ({total_percent_modified:.2f}%)"
+ )
return total_aider_lines, total_lines, total_percent_modified
+
def process_repo(git_dname=None):
if not git_dname:
git_dname = "."
result = subprocess.run(
- ["git", "-C", git_dname, "ls-files"],
- capture_output=True,
- text=True,
- check=True
+ ["git", "-C", git_dname, "ls-files"], capture_output=True, text=True, check=True
)
git_dname = Path(git_dname)
- fnames = [git_dname/fname for fname in result.stdout.splitlines() if fname.endswith('.py')]
+ fnames = [git_dname / fname for fname in result.stdout.splitlines() if fname.endswith(".py")]
return process_fnames(fnames, git_dname)
@@ -136,7 +147,7 @@ def history():
["git", "-C", git_dname, "log", "--pretty=format:%H %s"],
capture_output=True,
text=True,
- check=True
+ check=True,
)
commits = []
@@ -148,8 +159,8 @@ def history():
dump(len(commits))
num_commits = len(commits)
- N=10
- step = (num_commits-1)/(N-1)
+ N = 10
+ step = (num_commits - 1) / (N - 1)
results = []
i = 0
while i < num_commits:
@@ -176,21 +187,19 @@ def history():
aider_lines = [aider_lines for _, aider_lines, _, _ in results]
total_lines = [total_lines for _, _, total_lines, _ in results]
- plt.fill_between(x, aider_lines, label='Aider Lines', color='skyblue', alpha=0.5)
- plt.fill_between(x, total_lines, label='Total Lines', color='lightgreen', alpha=0.5)
- plt.xlabel('Commit Number')
- plt.ylabel('Lines of Code')
- plt.title('Aider Lines and Total Lines Over Time')
+ plt.fill_between(x, aider_lines, label="Aider Lines", color="skyblue", alpha=0.5)
+ plt.fill_between(x, total_lines, label="Total Lines", color="lightgreen", alpha=0.5)
+ plt.xlabel("Commit Number")
+ plt.ylabel("Lines of Code")
+ plt.title("Aider Lines and Total Lines Over Time")
plt.legend()
- plt.savefig('aider_plot.png')
- with open('aider_plot.png', 'rb') as f:
+ plt.savefig("aider_plot.png")
+ with open("aider_plot.png", "rb") as f:
imgcat(f.read())
-
-
def main():
- #return history()
+ # return history()
if len(sys.argv) < 2:
return process_repo()