Use the tokenizer to count tokens in partial_response_content after streaming responses complete, and show tokens & cost in that case too.

2025-05-24 14:25:00 +00:00 · 2024-07-28 15:50:25 -03:00 · 2024-07-28 15:50:25 -03:00 · f81acb839c
commit f81acb839c
parent 90b3542e75
1 changed files with 24 additions and 15 deletions
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@ -1169,18 +1169,7 @@ class Coder:
            self.io.tool_error(show_content_err)
            raise Exception("No data found in LLM response!")
-        tokens = None
+        self.calculate_and_show_tokens_and_cost(completion)
        if hasattr(completion, "usage") and completion.usage is not None:
            prompt_tokens = completion.usage.prompt_tokens
            completion_tokens = completion.usage.completion_tokens
            tokens = f"{prompt_tokens} prompt tokens, {completion_tokens} completion tokens"
            if self.main_model.info.get("input_cost_per_token"):
                cost = prompt_tokens * self.main_model.info.get("input_cost_per_token")
                if self.main_model.info.get("output_cost_per_token"):
                    cost += completion_tokens * self.main_model.info.get("output_cost_per_token")
                tokens += f", ${cost:.6f} cost"
                self.total_cost += cost
        show_resp = self.render_incremental_response(True)
        if self.show_pretty():
@ -1192,9 +1181,6 @@ class Coder:
        self.io.console.print(show_resp)
        if tokens is not None:
            self.io.tool_output(tokens)
        if (
            hasattr(completion.choices[0], "finish_reason")
            and completion.choices[0].finish_reason == "length"
@ -1242,6 +1228,8 @@ class Coder:
                sys.stdout.flush()
                yield text
        self.calculate_and_show_tokens_and_cost()
    def live_incremental_response(self, final):
        show_resp = self.render_incremental_response(final)
        self.mdstream.update(show_resp, final=final)
@ -1249,6 +1237,27 @@ class Coder:
    def render_incremental_response(self, final):
        return self.get_multi_response_content()
    def calculate_and_show_tokens_and_cost(self, completion=None):
        prompt_tokens = 0
        completion_tokens = 0
        cost = 0
        if completion and hasattr(completion, "usage") and completion.usage is not None:
            prompt_tokens = completion.usage.prompt_tokens
            completion_tokens = completion.usage.completion_tokens
        else:
            completion_tokens = self.main_model.token_count(self.partial_response_content)
        tokens = f"{prompt_tokens} prompt tokens, {completion_tokens} completion tokens"
        if self.main_model.info.get("input_cost_per_token"):
            cost += prompt_tokens * self.main_model.info.get("input_cost_per_token")
            if self.main_model.info.get("output_cost_per_token"):
                cost += completion_tokens * self.main_model.info.get("output_cost_per_token")
            tokens += f", ${cost:.6f} cost"
            self.total_cost += cost
        self.io.tool_output(tokens)
    def get_multi_response_content(self, final=False):
        cur = self.multi_response_content
        new = self.partial_response_content