Better stats for retry outcomes

This commit is contained in:
Paul Gauthier 2023-06-23 20:54:02 -07:00
parent 7fc1bfd430
commit 99194e4051
2 changed files with 17 additions and 9 deletions

View file

@ -73,6 +73,7 @@ class WholeFileCoder(Coder):
fname = list(chat_files)[0] fname = list(chat_files)[0]
else: else:
show_chat_files = " ".join(chat_files) show_chat_files = " ".join(chat_files)
# TODO: adopt the new allowed_to_edit()
raise ValueError(f"{fname} is not one of: {show_chat_files}") raise ValueError(f"{fname} is not one of: {show_chat_files}")
elif fname: elif fname:
@ -97,7 +98,9 @@ class WholeFileCoder(Coder):
return "\n".join(output) return "\n".join(output)
if fname: # TODO: take the file even it wasn't closed properly?
raise ValueError("Started a ``` block without closing it") #
# if fname:
# raise ValueError("Started a ``` block without closing it")
return edited return edited

View file

@ -63,7 +63,7 @@ def main():
total_tests = len(test_dnames) total_tests = len(test_dnames)
completed_tests = 0 completed_tests = 0
passed_tests = 0 passed_tests = [0] * args.retries
total_cost = 0 total_cost = 0
@ -79,12 +79,13 @@ def main():
completed_tests += 1 completed_tests += 1
passed = results["tests_outcomes"][-1] passed = results["tests_outcomes"][-1]
if passed: if passed:
passed_tests += 1 for i in range(len(results["tests_outcomes"]) - 1, args.retries):
passed_tests[i] += 1
dump(passed_tests, completed_tests, total_tests) dump(completed_tests, total_tests)
for i in range(args.retries):
pass_rate = 100 * passed_tests / completed_tests pass_rate = 100 * passed_tests[i] / completed_tests
dump(pass_rate) dump(i, pass_rate)
total_cost += results["cost"] total_cost += results["cost"]
dump(total_cost) dump(total_cost)
@ -97,6 +98,8 @@ def main():
### ###
# input('next?') # input('next?')
print(dirname / testname)
def run_test(testdir, model_name, edit_format, retries): def run_test(testdir, model_name, edit_format, retries):
if not os.path.isdir(testdir): if not os.path.isdir(testdir):
@ -150,7 +153,6 @@ def run_test(testdir, model_name, edit_format, retries):
io, io,
os.environ["OPENAI_API_KEY"], os.environ["OPENAI_API_KEY"],
fnames=fnames, fnames=fnames,
# verbose=True,
use_git=False, use_git=False,
stream=False, stream=False,
pretty=False, pretty=False,
@ -174,6 +176,9 @@ def run_test(testdir, model_name, edit_format, retries):
test_outcomes.append(True) test_outcomes.append(True)
break break
errors = errors.splitlines()
errors = errors[:25]
errors = "\n".join(errors)
instructions = errors instructions = errors
instructions += "\n\nFix the code to resolve the test failures above." instructions += "\n\nFix the code to resolve the test failures above."