feat: add machine tag and inference timings (#4577)

* Add machine tag option, add extraUsage option, grpc-server -> proto -> endpoint extraUsage data is broken for now

Signed-off-by: mintyleaf <mintyleafdev@gmail.com>

* remove redurant timing fields, fix not working timings output

Signed-off-by: mintyleaf <mintyleafdev@gmail.com>

* use middleware for Machine-Tag only if tag is specified

Signed-off-by: mintyleaf <mintyleafdev@gmail.com>

---------

Signed-off-by: mintyleaf <mintyleafdev@gmail.com>
This commit is contained in:
mintyleaf 2025-01-17 20:05:58 +04:00 committed by GitHub
parent 8027fdf1c7
commit 96f8ec0402
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 137 additions and 48 deletions

View file

@ -52,6 +52,8 @@ func ComputeChoices(
tokenUsage.Prompt += prediction.Usage.Prompt
tokenUsage.Completion += prediction.Usage.Completion
tokenUsage.TimingPromptProcessing += prediction.Usage.TimingPromptProcessing
tokenUsage.TimingTokenGeneration += prediction.Usage.TimingTokenGeneration
finetunedResponse := backend.Finetune(*config, predInput, prediction.Response)
cb(finetunedResponse, &result)