aider/benchmark/rungrid.py
Paul Gauthier 426819e703 copy
2023-11-07 10:53:27 -08:00

61 lines
1.3 KiB
Python
Executable file

#!/usr/bin/env python
import subprocess
import sys
from aider.dump import dump # noqa: F401
def main():
models = [
"gpt-3.5-turbo-0301",
"gpt-3.5-turbo-0613",
# "gpt-3.5-turbo-16k-0613",
"gpt-3.5-turbo-1106",
# "gpt-4-0314",
# "gpt-4-0613",
]
edit_formats = [
"diff",
# "diff-func",
# "whole",
# "whole-func",
]
# for repeat in range(1, 2, 1):
for model in models:
for edit_format in edit_formats:
# dump(model, edit_format)
if "-func" in edit_format and "-03" in model:
continue
# if (model, edit_format) == ("gpt-3.5-turbo-16k-0613", "whole-func"):
# # sublist reliably hangs the API?
# continue
dirname = f"rungrid-nov-{model}-{edit_format}"
# dirname = f"rungrid-{model}-{edit_format}-repeat-{repeat}"
run(dirname, model, edit_format)
def run(dirname, model, edit_format):
cmd = [
"./benchmark/benchmark.py",
dirname,
"--model",
model,
"--edit-format",
edit_format,
"--threads",
"10",
"--cont",
]
print(" ".join(cmd))
subprocess.run(cmd, check=True)
if __name__ == "__main__":
status = main()
sys.exit(status)