Use the tokenizer to count tokens in partial_response_content after streaming responses complete, and show tokens & cost in that case too.

This commit is contained in:
Paul Gauthier (aider) 2024-07-28 15:50:25 -03:00
parent 90b3542e75
commit f81acb839c

View file

@ -1169,18 +1169,7 @@ class Coder:
self.io.tool_error(show_content_err) self.io.tool_error(show_content_err)
raise Exception("No data found in LLM response!") raise Exception("No data found in LLM response!")
tokens = None self.calculate_and_show_tokens_and_cost(completion)
if hasattr(completion, "usage") and completion.usage is not None:
prompt_tokens = completion.usage.prompt_tokens
completion_tokens = completion.usage.completion_tokens
tokens = f"{prompt_tokens} prompt tokens, {completion_tokens} completion tokens"
if self.main_model.info.get("input_cost_per_token"):
cost = prompt_tokens * self.main_model.info.get("input_cost_per_token")
if self.main_model.info.get("output_cost_per_token"):
cost += completion_tokens * self.main_model.info.get("output_cost_per_token")
tokens += f", ${cost:.6f} cost"
self.total_cost += cost
show_resp = self.render_incremental_response(True) show_resp = self.render_incremental_response(True)
if self.show_pretty(): if self.show_pretty():
@ -1192,9 +1181,6 @@ class Coder:
self.io.console.print(show_resp) self.io.console.print(show_resp)
if tokens is not None:
self.io.tool_output(tokens)
if ( if (
hasattr(completion.choices[0], "finish_reason") hasattr(completion.choices[0], "finish_reason")
and completion.choices[0].finish_reason == "length" and completion.choices[0].finish_reason == "length"
@ -1242,6 +1228,8 @@ class Coder:
sys.stdout.flush() sys.stdout.flush()
yield text yield text
self.calculate_and_show_tokens_and_cost()
def live_incremental_response(self, final): def live_incremental_response(self, final):
show_resp = self.render_incremental_response(final) show_resp = self.render_incremental_response(final)
self.mdstream.update(show_resp, final=final) self.mdstream.update(show_resp, final=final)
@ -1249,6 +1237,27 @@ class Coder:
def render_incremental_response(self, final): def render_incremental_response(self, final):
return self.get_multi_response_content() return self.get_multi_response_content()
def calculate_and_show_tokens_and_cost(self, completion=None):
prompt_tokens = 0
completion_tokens = 0
cost = 0
if completion and hasattr(completion, "usage") and completion.usage is not None:
prompt_tokens = completion.usage.prompt_tokens
completion_tokens = completion.usage.completion_tokens
else:
completion_tokens = self.main_model.token_count(self.partial_response_content)
tokens = f"{prompt_tokens} prompt tokens, {completion_tokens} completion tokens"
if self.main_model.info.get("input_cost_per_token"):
cost += prompt_tokens * self.main_model.info.get("input_cost_per_token")
if self.main_model.info.get("output_cost_per_token"):
cost += completion_tokens * self.main_model.info.get("output_cost_per_token")
tokens += f", ${cost:.6f} cost"
self.total_cost += cost
self.io.tool_output(tokens)
def get_multi_response_content(self, final=False): def get_multi_response_content(self, final=False):
cur = self.multi_response_content cur = self.multi_response_content
new = self.partial_response_content new = self.partial_response_content