run many tests

2025-05-23 22:04:59 +00:00 · 2023-06-23 15:19:41 -07:00 · 2023-06-23 15:19:41 -07:00 · b16778d0f1
commit b16778d0f1
parent 1239d3e30c
1 changed files with 47 additions and 11 deletions
--- a/scripts/benchmark.py
+++ b/scripts/benchmark.py
@ -1,3 +1,4 @@
+import json
 import os
 import shutil
 import subprocess
@ -41,21 +42,45 @@ def main():
        print("Usage: python benchmark.py <dirname>")
        sys.exit(1)

-    dirname = sys.argv[1]
+    dirname = Path(sys.argv[1])

-    # with TemporaryDirectory() as tempdir:
-    tempdir = "tmp.benchmark"
-    os.mkdir(tempdir)
+    cwd = os.getcwd()

-    run_test(dirname, tempdir)
+    total_tests = 0
+    passed_tests = 0
+    for testname in os.listdir(dirname):
+        dump(testname)
+        results = run_test(dirname / testname)
+        os.chdir(cwd)
+
+        if results:
+            total_tests += 1
+            passed = results["tests_passed"]
+            if passed:
+                passed_tests += 1
+
+            dump(passed_tests, total_tests)


-def run_test(dirname, tempdir):
-    fnames = copy_exercise(dirname, tempdir)
-    os.chdir(tempdir)
+def run_test(testdir):
+    if not os.path.isdir(testdir):
+        print("Not a dir:", testdir)
+        return

-    instructions = Path("docs/instructions.md").read_text()
+    os.chdir(testdir)

+    started_fname = Path(".aider.started")
+    if started_fname.exists():
+        print(f"{testdir}/{started_fname} exists, skipping")
+        return
+    started_fname.touch()
+
+    fnames = []
+    for fname in os.listdir("."):
+        if "test" not in fname and os.path.isfile(fname) and fname[0] != ".":
+            fnames.append(fname)
+
+    instructions = Path(".docs/instructions.md").read_text()
    instructions += (
        "\n\n=====\n\nModify these files according to the above instructions: " + " ".join(fnames)
    )
@ -66,10 +91,11 @@ def run_test(dirname, tempdir):
    )

    main_model = models.Model("gpt-3.5-turbo")
+    edit_format = main_model.edit_format

    coder = Coder.create(
        main_model,
-        None,
+        edit_format,
        io,
        os.environ["OPENAI_API_KEY"],
        fnames=fnames,
@ -82,7 +108,17 @@ def run_test(dirname, tempdir):

    passed = run_tests()

-    dump(passed)
+    results = dict(
+        model=main_model.name,
+        edit_format=edit_format,
+        tests_passed=passed,
+        cost=coder.total_cost,
+    )
+    dump(results)
+
+    Path(".aider.results.json").write_text(json.dumps(results, indent=4))
+
+    return results


 def run_tests():