Revert "feat: include tokens usage for streamed output (#4282)"

This reverts commit 0d6c3a7d57.
2025-05-20 10:35:01 +00:00 · 2024-12-08 16:31:48 +01:00 · 2024-12-08 16:31:48 +01:00 · 184fbc26bf
commit 184fbc26bf
parent 87b7648591
5 changed files with 10 additions and 25 deletions
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@ -117,12 +117,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 			ss := ""

 			var partialRune []byte
-			err := inferenceModel.PredictStream(ctx, opts, func(reply *proto.Reply) {
-				msg := reply.GetMessage()
-				partialRune = append(partialRune, msg...)
-
-				tokenUsage.Prompt = int(reply.PromptTokens)
-				tokenUsage.Completion = int(reply.Tokens)
+			err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) {
+				partialRune = append(partialRune, chars...)

 				for len(partialRune) > 0 {
 					r, size := utf8.DecodeRune(partialRune)
@ -136,10 +132,6 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im

 					partialRune = partialRune[size:]
 				}
-
-				if len(msg) == 0 {
-					tokenCallback("", tokenUsage)
-				}
 			})
 			return LLMResponse{
 				Response: ss,