feat(grpc): return consumed token count and update response accordingly (#2035)

Fixes: #1920
This commit is contained in:
Ettore Di Giacinto 2024-04-15 19:47:11 +02:00 committed by GitHub
parent de3a1a0a8e
commit e843d7df0e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 20 additions and 4 deletions

View file

@ -2332,6 +2332,10 @@ public:
std::string completion_text = result.result_json.value("content", "");
reply.set_message(completion_text);
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
reply.set_tokens(tokens_predicted);
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
reply.set_prompt_tokens(tokens_evaluated);
// Send the reply
writer->Write(reply);
@ -2357,6 +2361,10 @@ public:
task_result result = llama.queue_results.recv(task_id);
if (!result.error && result.stop) {
completion_text = result.result_json.value("content", "");
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
reply->set_prompt_tokens(tokens_evaluated);
reply->set_tokens(tokens_predicted);
reply->set_message(completion_text);
}
else