diff --git a/aider/io.py b/aider/io.py index a5d9227f4..3ee5b8317 100644 --- a/aider/io.py +++ b/aider/io.py @@ -83,6 +83,9 @@ class AutoCompleter(Completer): class InputOutput: + num_error_outputs = 0 + num_user_asks = 0 + def __init__( self, pretty=True, @@ -208,6 +211,8 @@ class InputOutput: self.append_chat_history(hist) def confirm_ask(self, question, default="y"): + self.num_user_asks += 1 + if self.yes is True: res = "yes" elif self.yes is False: @@ -217,12 +222,16 @@ class InputOutput: hist = f"{question.strip()} {res.strip()}" self.append_chat_history(hist, linebreak=True, blockquote=True) + if self.yes in (True, False): + self.tool_output(hist) if not res or not res.strip(): return return res.strip().lower().startswith("y") def prompt_ask(self, question, default=None): + self.num_user_asks += 1 + if self.yes is True: res = "yes" elif self.yes is False: @@ -232,10 +241,14 @@ class InputOutput: hist = f"{question.strip()} {res.strip()}" self.append_chat_history(hist, linebreak=True, blockquote=True) + if self.yes in (True, False): + self.tool_output(hist) return res def tool_error(self, message): + self.num_error_outputs += 1 + if message.strip(): hist = f"{message.strip()}" self.append_chat_history(hist, linebreak=True, blockquote=True) diff --git a/scripts/benchmark.py b/scripts/benchmark.py index be4c92f52..41007864b 100644 --- a/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -180,7 +180,7 @@ def summarize_results(dirname, all_results, total_tests=None): total_cost += results["cost"] duration += results["duration"] - for key in "model edit_format commit_hash".split(): + for key in "model edit_format commit_hash num_error_outputs num_user_asks".split(): val = results.get(key) variants[key].add(val) @@ -339,6 +339,8 @@ Only use standard python libraries, don't suggest installing any packages. cost=coder.total_cost, duration=dur, commit_hash=commit_hash, + num_error_outputs=io.num_error_outputs, + num_user_asks=io.num_user_asks, ) dump(results)