mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
feat(grpc): return consumed token count and update response accordingly (#2035)
Fixes: #1920
This commit is contained in:
parent
de3a1a0a8e
commit
e843d7df0e
4 changed files with 20 additions and 4 deletions
|
@ -2332,6 +2332,10 @@ public:
|
|||
std::string completion_text = result.result_json.value("content", "");
|
||||
|
||||
reply.set_message(completion_text);
|
||||
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
||||
reply.set_tokens(tokens_predicted);
|
||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||
reply.set_prompt_tokens(tokens_evaluated);
|
||||
|
||||
// Send the reply
|
||||
writer->Write(reply);
|
||||
|
@ -2357,6 +2361,10 @@ public:
|
|||
task_result result = llama.queue_results.recv(task_id);
|
||||
if (!result.error && result.stop) {
|
||||
completion_text = result.result_json.value("content", "");
|
||||
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||
reply->set_prompt_tokens(tokens_evaluated);
|
||||
reply->set_tokens(tokens_predicted);
|
||||
reply->set_message(completion_text);
|
||||
}
|
||||
else
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue