track and report errors/asks during benchmarking

2025-05-23 22:04:59 +00:00 · 2023-06-26 10:33:16 -07:00 · 2023-06-26 10:33:16 -07:00 · 1370da14fb
commit 1370da14fb
parent cbfda391bb
2 changed files with 16 additions and 1 deletions
--- a/scripts/benchmark.py
+++ b/scripts/benchmark.py
@ -180,7 +180,7 @@ def summarize_results(dirname, all_results, total_tests=None):
        total_cost += results["cost"]
        duration += results["duration"]

-        for key in "model edit_format commit_hash".split():
+        for key in "model edit_format commit_hash num_error_outputs num_user_asks".split():
            val = results.get(key)
            variants[key].add(val)

@ -339,6 +339,8 @@ Only use standard python libraries, don't suggest installing any packages.
        cost=coder.total_cost,
        duration=dur,
        commit_hash=commit_hash,
+        num_error_outputs=io.num_error_outputs,
+        num_user_asks=io.num_user_asks,
    )
    dump(results)