feat(grpc): return consumed token count and update response accordingly (#2035)

Fixes: #1920
This commit is contained in:
Ettore Di Giacinto 2024-04-15 19:47:11 +02:00 committed by GitHub
parent de3a1a0a8e
commit e843d7df0e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 20 additions and 4 deletions

View file

@ -189,6 +189,12 @@ func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest,
} else {
go func() {
reply, err := inferenceModel.Predict(ctx, grpcPredOpts)
if tokenUsage.Prompt == 0 {
tokenUsage.Prompt = int(reply.PromptTokens)
}
if tokenUsage.Completion == 0 {
tokenUsage.Completion = int(reply.Tokens)
}
if err != nil {
rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err}
close(rawResultChannel)