From a98b531bcc9c3738110b2e538a246fb7e4dbe81f Mon Sep 17 00:00:00 2001 From: "Paul Gauthier (aider)" Date: Wed, 7 May 2025 21:02:00 -0700 Subject: [PATCH] feat: add prompt_tokens and completion_tokens to results summary --- benchmark/benchmark.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index a3c2ca850..b3bbc94ea 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -492,6 +492,8 @@ def summarize_results(dirname, stats_languages=None): res.syntax_errors = 0 res.indentation_errors = 0 res.lazy_comments = 0 + res.prompt_tokens = 0 + res.completion_tokens = 0 res.reasoning_effort = None res.thinking_tokens = None @@ -523,6 +525,9 @@ def summarize_results(dirname, stats_languages=None): res.syntax_errors += results.get("syntax_errors", 0) res.indentation_errors += results.get("indentation_errors", 0) + res.prompt_tokens += results.get("prompt_tokens", 0) + res.completion_tokens += results.get("completion_tokens", 0) + res.reasoning_effort = results.get("reasoning_effort") res.thinking_tokens = results.get("thinking_tokens") @@ -590,6 +595,8 @@ def summarize_results(dirname, stats_languages=None): show("syntax_errors") show("indentation_errors") show("exhausted_context_windows") + show("prompt_tokens", red=None) + show("completion_tokens", red=None) show("test_timeouts") print(f" total_tests: {res.total_tests}") @@ -950,6 +957,8 @@ def run_test_real( indentation_errors=indentation_errors, lazy_comments=lazy_comments, # Add the count of pattern matches to the results reasoning_effort=reasoning_effort, + prompt_tokens=coder.message_tokens_sent, + completion_tokens=coder.message_tokens_received, thinking_tokens=thinking_tokens, chat_hashes=list( zip(