diff --git a/scripts/benchmark.py b/scripts/benchmark.py index f23716b5f..e689fcf66 100644 --- a/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -1,6 +1,8 @@ import argparse +import datetime import json import os +import shutil import subprocess import time from json.decoder import JSONDecodeError @@ -11,6 +13,9 @@ from aider.coders import Coder from aider.dump import dump # noqa: F401 from aider.io import InputOutput +ORIGINAL_DNAME = Path("tmp.benchmark/practice") +assert ORIGINAL_DNAME.exists() + def main(): parser = argparse.ArgumentParser(description="Aider Benchmark") @@ -18,11 +23,33 @@ def main(): parser.add_argument("--model", "-m", type=str, help="Model name", default="gpt-3.5-turbo") parser.add_argument("--edit-format", "-e", type=str, help="Edit format") parser.add_argument("--keyword", "-k", type=str, help="Only run tests that contain keyword") + parser.add_argument( + "--clean", + "-c", + action="store_true", + help="Discard the current testdir and make a clean copy", + ) args = parser.parse_args() dirname = Path(args.dirname) + if args.clean and dirname.exists(): + print("Cleaning up and replacing", dirname) + dir_files = set(fn.name for fn in dirname.glob("*")) + original_files = set(fn.name for fn in ORIGINAL_DNAME.glob("*")) + if dir_files != original_files: + print("ERROR: will not delete dir that does not look like original tests", dirname) + return + + now = datetime.datetime.now() + now = now.strftime("%Y-%m-%d-%H-%M-%S-") + dest = dirname.parent / "OLD" / (now + dirname.name) + dirname.rename(dest) + + if not dirname.exists(): + shutil.copytree(ORIGINAL_DNAME, dirname) + cwd = os.getcwd() test_dnames = sorted(os.listdir(dirname))