From 3bac4724ac3895dfb790af66b9fa570c3bb2c03f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 31 May 2025 08:48:05 +0200 Subject: [PATCH] fix(streaming): stream complete runes (#5539) Signed-off-by: Ettore Di Giacinto --- core/backend/llm.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/core/backend/llm.go b/core/backend/llm.go index f36a568a..9d6f771f 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -135,19 +135,24 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing + // Process complete runes and accumulate them + var completeRunes []byte for len(partialRune) > 0 { r, size := utf8.DecodeRune(partialRune) if r == utf8.RuneError { // incomplete rune, wait for more bytes break } - - tokenCallback(string(r), tokenUsage) - ss += string(r) - + completeRunes = append(completeRunes, partialRune[:size]...) partialRune = partialRune[size:] } + // If we have complete runes, send them as a single token + if len(completeRunes) > 0 { + tokenCallback(string(completeRunes), tokenUsage) + ss += string(completeRunes) + } + if len(msg) == 0 { tokenCallback("", tokenUsage) }