mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 02:24:59 +00:00
feat: add machine tag and inference timings (#4577)
* Add machine tag option, add extraUsage option, grpc-server -> proto -> endpoint extraUsage data is broken for now Signed-off-by: mintyleaf <mintyleafdev@gmail.com> * remove redurant timing fields, fix not working timings output Signed-off-by: mintyleaf <mintyleafdev@gmail.com> * use middleware for Machine-Tag only if tag is specified Signed-off-by: mintyleaf <mintyleafdev@gmail.com> --------- Signed-off-by: mintyleaf <mintyleafdev@gmail.com>
This commit is contained in:
parent
8027fdf1c7
commit
96f8ec0402
15 changed files with 137 additions and 48 deletions
|
@ -2408,6 +2408,13 @@ public:
|
|||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||
reply.set_prompt_tokens(tokens_evaluated);
|
||||
|
||||
if (result.result_json.contains("timings")) {
|
||||
double timing_prompt_processing = result.result_json.at("timings").value("prompt_ms", 0.0);
|
||||
reply.set_timing_prompt_processing(timing_prompt_processing);
|
||||
double timing_token_generation = result.result_json.at("timings").value("predicted_ms", 0.0);
|
||||
reply.set_timing_token_generation(timing_token_generation);
|
||||
}
|
||||
|
||||
// Log Request Correlation Id
|
||||
LOG_VERBOSE("correlation:", {
|
||||
{ "id", data["correlation_id"] }
|
||||
|
@ -2448,6 +2455,13 @@ public:
|
|||
reply->set_prompt_tokens(tokens_evaluated);
|
||||
reply->set_tokens(tokens_predicted);
|
||||
reply->set_message(completion_text);
|
||||
|
||||
if (result.result_json.contains("timings")) {
|
||||
double timing_prompt_processing = result.result_json.at("timings").value("prompt_ms", 0.0);
|
||||
reply->set_timing_prompt_processing(timing_prompt_processing);
|
||||
double timing_token_generation = result.result_json.at("timings").value("predicted_ms", 0.0);
|
||||
reply->set_timing_token_generation(timing_token_generation);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue