mirror of
https://github.com/mudler/LocalAI.git
synced 2025-06-05 10:25:00 +00:00
fix(streaming): stream complete runes (#5539)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
59db154cbc
commit
3bac4724ac
1 changed files with 9 additions and 4 deletions
|
@ -135,19 +135,24 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||||
tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration
|
tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration
|
||||||
tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing
|
tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing
|
||||||
|
|
||||||
|
// Process complete runes and accumulate them
|
||||||
|
var completeRunes []byte
|
||||||
for len(partialRune) > 0 {
|
for len(partialRune) > 0 {
|
||||||
r, size := utf8.DecodeRune(partialRune)
|
r, size := utf8.DecodeRune(partialRune)
|
||||||
if r == utf8.RuneError {
|
if r == utf8.RuneError {
|
||||||
// incomplete rune, wait for more bytes
|
// incomplete rune, wait for more bytes
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
completeRunes = append(completeRunes, partialRune[:size]...)
|
||||||
tokenCallback(string(r), tokenUsage)
|
|
||||||
ss += string(r)
|
|
||||||
|
|
||||||
partialRune = partialRune[size:]
|
partialRune = partialRune[size:]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we have complete runes, send them as a single token
|
||||||
|
if len(completeRunes) > 0 {
|
||||||
|
tokenCallback(string(completeRunes), tokenUsage)
|
||||||
|
ss += string(completeRunes)
|
||||||
|
}
|
||||||
|
|
||||||
if len(msg) == 0 {
|
if len(msg) == 0 {
|
||||||
tokenCallback("", tokenUsage)
|
tokenCallback("", tokenUsage)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue