From 99194e405162dab25ab4062bf570558fa126269f Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Fri, 23 Jun 2023 20:54:02 -0700 Subject: [PATCH] Better stats for retry outcomes --- aider/coders/wholefile_coder.py | 7 +++++-- scripts/benchmark.py | 19 ++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/aider/coders/wholefile_coder.py b/aider/coders/wholefile_coder.py index b9499b289..4c862f2e4 100644 --- a/aider/coders/wholefile_coder.py +++ b/aider/coders/wholefile_coder.py @@ -73,6 +73,7 @@ class WholeFileCoder(Coder): fname = list(chat_files)[0] else: show_chat_files = " ".join(chat_files) + # TODO: adopt the new allowed_to_edit() raise ValueError(f"{fname} is not one of: {show_chat_files}") elif fname: @@ -97,7 +98,9 @@ class WholeFileCoder(Coder): return "\n".join(output) - if fname: - raise ValueError("Started a ``` block without closing it") + # TODO: take the file even it wasn't closed properly? + # + # if fname: + # raise ValueError("Started a ``` block without closing it") return edited diff --git a/scripts/benchmark.py b/scripts/benchmark.py index fa5cd0be0..b67d9372d 100644 --- a/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -63,7 +63,7 @@ def main(): total_tests = len(test_dnames) completed_tests = 0 - passed_tests = 0 + passed_tests = [0] * args.retries total_cost = 0 @@ -79,12 +79,13 @@ def main(): completed_tests += 1 passed = results["tests_outcomes"][-1] if passed: - passed_tests += 1 + for i in range(len(results["tests_outcomes"]) - 1, args.retries): + passed_tests[i] += 1 - dump(passed_tests, completed_tests, total_tests) - - pass_rate = 100 * passed_tests / completed_tests - dump(pass_rate) + dump(completed_tests, total_tests) + for i in range(args.retries): + pass_rate = 100 * passed_tests[i] / completed_tests + dump(i, pass_rate) total_cost += results["cost"] dump(total_cost) @@ -97,6 +98,8 @@ def main(): ### # input('next?') + print(dirname / testname) + def run_test(testdir, model_name, edit_format, retries): if not os.path.isdir(testdir): @@ -150,7 +153,6 @@ def run_test(testdir, model_name, edit_format, retries): io, os.environ["OPENAI_API_KEY"], fnames=fnames, - # verbose=True, use_git=False, stream=False, pretty=False, @@ -174,6 +176,9 @@ def run_test(testdir, model_name, edit_format, retries): test_outcomes.append(True) break + errors = errors.splitlines() + errors = errors[:25] + errors = "\n".join(errors) instructions = errors instructions += "\n\nFix the code to resolve the test failures above."