fix(streaming): stream complete runes (#5539)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-05-31 08:48:05 +02:00 committed by GitHub
parent 59db154cbc
commit 3bac4724ac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -135,19 +135,24 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration
tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing
// Process complete runes and accumulate them
var completeRunes []byte
for len(partialRune) > 0 {
r, size := utf8.DecodeRune(partialRune)
if r == utf8.RuneError {
// incomplete rune, wait for more bytes
break
}
tokenCallback(string(r), tokenUsage)
ss += string(r)
completeRunes = append(completeRunes, partialRune[:size]...)
partialRune = partialRune[size:]
}
// If we have complete runes, send them as a single token
if len(completeRunes) > 0 {
tokenCallback(string(completeRunes), tokenUsage)
ss += string(completeRunes)
}
if len(msg) == 0 {
tokenCallback("", tokenUsage)
}