added --keyword

This commit is contained in:
Paul Gauthier 2023-06-23 17:38:16 -07:00
parent 0bab66defa
commit 57e16aa657

View file

@ -15,8 +15,10 @@ from aider.io import InputOutput
def main(): def main():
parser = argparse.ArgumentParser(description="Aider Benchmark") parser = argparse.ArgumentParser(description="Aider Benchmark")
parser.add_argument("dirname", type=str, help="Directory name") parser.add_argument("dirname", type=str, help="Directory name")
parser.add_argument("--model", "-m", type=str, help="Model name") parser.add_argument("--model", "-m", type=str, help="Model name", default="gpt-3.5-turbo")
parser.add_argument("--edit-format", "-e", type=str, help="Edit format") parser.add_argument("--edit-format", "-e", type=str, help="Edit format")
parser.add_argument("--keyword", "-k", type=str, help="Only run tests that contain keyword")
args = parser.parse_args() args = parser.parse_args()
dirname = Path(args.dirname) dirname = Path(args.dirname)
@ -32,6 +34,9 @@ def main():
total_cost = 0 total_cost = 0
for testname in test_dnames: for testname in test_dnames:
if args.keyword and args.keyword not in testname:
continue
dump(testname) dump(testname)
results = run_test(dirname / testname, args.model, args.edit_format) results = run_test(dirname / testname, args.model, args.edit_format)
os.chdir(cwd) os.chdir(cwd)
@ -66,6 +71,8 @@ def run_test(testdir, model_name, edit_format):
os.chdir(testdir) os.chdir(testdir)
history_fname = Path(".aider.chat.history.md")
results_fname = Path(".aider.results.json") results_fname = Path(".aider.results.json")
if results_fname.exists(): if results_fname.exists():
try: try:
@ -94,7 +101,7 @@ def run_test(testdir, model_name, edit_format):
io = InputOutput( io = InputOutput(
pretty=True, pretty=True,
yes=False, yes=False,
chat_history_file=".aider.chat.history.md", chat_history_file=history_fname,
) )
main_model = models.Model(model_name) main_model = models.Model(model_name)
@ -121,7 +128,7 @@ def run_test(testdir, model_name, edit_format):
if coder.num_control_c: if coder.num_control_c:
raise KeyboardInterrupt raise KeyboardInterrupt
passed = run_tests() passed = run_tests(history_fname)
results = dict( results = dict(
testdir=str(testdir), testdir=str(testdir),
@ -139,7 +146,7 @@ def run_test(testdir, model_name, edit_format):
return results return results
def run_tests(): def run_tests(history_fname):
test_files = [file for file in os.listdir() if file.endswith("_test.py")] test_files = [file for file in os.listdir() if file.endswith("_test.py")]
assert len(test_files) assert len(test_files)
@ -149,19 +156,26 @@ def run_tests():
dump(test_file) dump(test_file)
try: try:
result = subprocess.run( result = subprocess.run(
["pytest", test_file], capture_output=True, text=True, timeout=60 ["pytest", test_file],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
timeout=60,
) )
print(result.stdout)
print(result.stderr)
except subprocess.TimeoutExpired:
all_tests_passed = False
print(f"Test {test_file} timed out")
continue
if result.returncode != 0: if result.returncode != 0:
all_tests_passed = False all_tests_passed = False
print(f"Test {test_file} failed with the following output:\n{result.stderr}") print(f"Test {test_file} failed with the following output:\n{result.stderr}")
res = result.stdout
except subprocess.TimeoutExpired:
all_tests_passed = False
res = f"Test {test_file} timed out"
print(res)
with history_fname.open("a") as fh:
fh.write(f"```\n{res}\n```")
return all_tests_passed return all_tests_passed