diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py
index 95ee6b818..81b8695fe 100755
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@@ -392,6 +392,7 @@ class Coder:
 
         if interrupted:
             self.io.tool_error("\n\n^C KeyboardInterrupt")
+            self.num_control_c += 1
             content += "\n^C KeyboardInterrupt"
 
         self.io.tool_output()
diff --git a/scripts/benchmark.py b/scripts/benchmark.py
index 17d5e1e42..504ce3913 100644
--- a/scripts/benchmark.py
+++ b/scripts/benchmark.py
@@ -1,8 +1,8 @@
 import json
 import os
-import shutil
 import subprocess
 import sys
+from json.decoder import JSONDecodeError
 from pathlib import Path
 
 from aider import models
@@ -10,32 +10,6 @@ from aider.coders import Coder
 from aider.dump import dump  # noqa: F401
 from aider.io import InputOutput
 
-# from tempfile import TemporaryDirectory
-
-
-def copy_exercise(dirname, tempdir):
-    # Copy all files from dirname to tempdir
-    for item in os.listdir(dirname):
-        s = os.path.join(dirname, item)
-        d = os.path.join(tempdir, item)
-        if os.path.isfile(s):
-            shutil.copy2(s, d)
-
-    add_files = []
-    for file in os.listdir(tempdir):
-        dump(file)
-        full_path = os.path.join(tempdir, file)
-
-        if "test" not in file and os.path.isfile(full_path):
-            add_files.append(file)
-
-    # Copy .docs subdir to tempdir as 'docs'
-    docs_src = os.path.join(dirname, ".docs")
-    docs_dst = os.path.join(tempdir, "docs")
-    shutil.copytree(docs_src, docs_dst, False, None)
-
-    return add_files
-
 
 def main():
     if len(sys.argv) != 2:
@@ -48,6 +22,7 @@ def main():
 
     total_tests = 0
     passed_tests = 0
+    total_cost = 0
     for testname in os.listdir(dirname):
         dump(testname)
         results = run_test(dirname / testname)
@@ -61,6 +36,12 @@ def main():
 
             dump(passed_tests, total_tests)
 
+            total_cost += results["cost"]
+            dump(total_cost)
+
+        ###
+        # input('next?')
+
 
 def run_test(testdir):
     if not os.path.isdir(testdir):
@@ -69,6 +50,14 @@ def run_test(testdir):
 
     os.chdir(testdir)
 
+    results_fname = Path(".aider.results.json")
+    if results_fname.exists():
+        try:
+            return json.loads(results_fname.read_text())
+        except JSONDecodeError:
+            print(f"{testdir}/{results_fname} failed to parse, skipping")
+            return
+
     started_fname = Path(".aider.started")
     if started_fname.exists():
         print(f"{testdir}/{started_fname} exists, skipping")
@@ -106,9 +95,13 @@ def run_test(testdir):
 
     coder.run(with_message=instructions)
 
+    if coder.num_control_c:
+        raise KeyboardInterrupt
+
     passed = run_tests()
 
     results = dict(
+        testdir=str(testdir),
         model=main_model.name,
         edit_format=edit_format,
         tests_passed=passed,
@@ -116,17 +109,24 @@ def run_test(testdir):
     )
     dump(results)
 
-    Path(".aider.results.json").write_text(json.dumps(results, indent=4))
+    results_fname.write_text(json.dumps(results, indent=4))
+    started_fname.unlink()
 
     return results
 
 
 def run_tests():
-    test_files = [file for file in os.listdir() if "test" in file and file.endswith(".py")]
+    test_files = [file for file in os.listdir() if file.endswith("_test.py")]
+    assert len(test_files)
+
     all_tests_passed = True
 
     for test_file in test_files:
-        result = subprocess.run(["python", test_file], capture_output=True, text=True)
+        dump(test_file)
+        result = subprocess.run(["pytest", test_file], capture_output=True, text=True)
+        print(result.stdout)
+        print(result.stderr)
+
         if result.returncode != 0:
             all_tests_passed = False
             print(f"Test {test_file} failed with the following output:\n{result.stderr}")