This commit is contained in:
Paul Gauthier 2023-06-23 16:11:38 -07:00
parent ae9ded4eaf
commit 1163ca7db0

View file

@ -1,10 +1,10 @@
import argparse
import json import json
import os import os
import subprocess import subprocess
import sys import time
from json.decoder import JSONDecodeError from json.decoder import JSONDecodeError
from pathlib import Path from pathlib import Path
import argparse
from aider import models from aider import models
from aider.coders import Coder from aider.coders import Coder
@ -13,17 +13,17 @@ from aider.io import InputOutput
def main(): def main():
parser = argparse.ArgumentParser(description='Aider Benchmark') parser = argparse.ArgumentParser(description="Aider Benchmark")
parser.add_argument('dirname', type=str, help='Directory name') parser.add_argument("dirname", type=str, help="Directory name")
parser.add_argument('--model', '-m', type=str, help='Model name') parser.add_argument("--model", "-m", type=str, help="Model name")
parser.add_argument('--edit-format', '-e', type=str, help='Edit format') parser.add_argument("--edit-format", "-e", type=str, help="Edit format")
args = parser.parse_args() args = parser.parse_args()
dirname = Path(args.dirname) dirname = Path(args.dirname)
cwd = os.getcwd() cwd = os.getcwd()
test_dnames = list(os.listdir(dirname)) test_dnames = sorted(os.listdir(dirname))
total_tests = len(test_dnames) total_tests = len(test_dnames)
completed_tests = 0 completed_tests = 0
@ -44,8 +44,15 @@ def main():
dump(passed_tests, completed_tests, total_tests) dump(passed_tests, completed_tests, total_tests)
pass_rate = 100 * passed_tests / completed_tests
dump(pass_rate)
total_cost += results["cost"] total_cost += results["cost"]
dump(total_cost) dump(total_cost)
projected_cost = total_cost * total_tests / completed_tests
dump(projected_cost)
print() print()
### ###
@ -92,6 +99,9 @@ def run_test(testdir, model_name, edit_format):
main_model = models.Model(model_name) main_model = models.Model(model_name)
edit_format = edit_format or main_model.edit_format edit_format = edit_format or main_model.edit_format
dump(main_model)
dump(edit_format)
coder = Coder.create( coder = Coder.create(
main_model, main_model,
edit_format, edit_format,
@ -103,7 +113,9 @@ def run_test(testdir, model_name, edit_format):
stream=False, stream=False,
) )
start = time.time()
coder.run(with_message=instructions) coder.run(with_message=instructions)
dur = time.time() - start
if coder.num_control_c: if coder.num_control_c:
raise KeyboardInterrupt raise KeyboardInterrupt
@ -116,6 +128,7 @@ def run_test(testdir, model_name, edit_format):
edit_format=edit_format, edit_format=edit_format,
tests_passed=passed, tests_passed=passed,
cost=coder.total_cost, cost=coder.total_cost,
duration=dur,
) )
dump(results) dump(results)