mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-25 13:04:59 +00:00
feat(grpc): return consumed token count and update response accordingly (#2035)
Fixes: #1920
This commit is contained in:
parent
de3a1a0a8e
commit
e843d7df0e
4 changed files with 20 additions and 4 deletions
|
@ -189,6 +189,12 @@ func (llmbs *LLMBackendService) Inference(ctx context.Context, req *LLMRequest,
|
|||
} else {
|
||||
go func() {
|
||||
reply, err := inferenceModel.Predict(ctx, grpcPredOpts)
|
||||
if tokenUsage.Prompt == 0 {
|
||||
tokenUsage.Prompt = int(reply.PromptTokens)
|
||||
}
|
||||
if tokenUsage.Completion == 0 {
|
||||
tokenUsage.Completion = int(reply.Tokens)
|
||||
}
|
||||
if err != nil {
|
||||
rawResultChannel <- concurrency.ErrorOr[*LLMResponse]{Error: err}
|
||||
close(rawResultChannel)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue