From deeef5fc2426494c1916c252efa29493f2c0f24e Mon Sep 17 00:00:00 2001 From: Samuel Maynard Date: Thu, 31 Aug 2023 01:56:59 +0200 Subject: [PATCH] fix(utf8): prevent multi-byte utf8 characters from being mangled (#981) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Description** This PR fixes #677 using [suggested solution](https://github.com/go-skynet/LocalAI/issues/677#issuecomment-1695939097) from @yantoz before: ``` ❯ curl -N http://localhost:57541/v1/completions -H "Content-Type: application/json" -d '{ "model": "ggml-model-q4_0.bin", "prompt": "", "max_tokens": 32, "temperature": 0.7, "stream": true }' data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":"\ufffd"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":"\ufffd"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":"\ufffd"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":"\ufffd"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":" |"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":" I"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":"'"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":"m"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} ``` now: ``` ❯ curl -N http://localhost:57541/v1/completions -H Content-Type: application/json -d { "model": "ggml-model-q4_0.bin", "prompt": "", "max_tokens": 32, "temperature": 0.7, "stream": true } data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"index":0,"text":"😂"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"index":0,"text":" "}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"index":0,"text":"|"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"index":0,"text":" "}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"index":0,"text":"I"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"index":0,"text":"'"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"index":0,"text":"m"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} ``` **Notes for Reviewers** **[Signed commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)** - [X] Yes, I signed my commits. Co-authored-by: Ettore Di Giacinto --- api/backend/llm.go | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/api/backend/llm.go b/api/backend/llm.go index 01b3eb1b..8639e840 100644 --- a/api/backend/llm.go +++ b/api/backend/llm.go @@ -6,6 +6,7 @@ import ( "regexp" "strings" "sync" + "unicode/utf8" config "github.com/go-skynet/LocalAI/api/config" "github.com/go-skynet/LocalAI/api/options" @@ -97,9 +98,23 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c if tokenCallback != nil { ss := "" - err := inferenceModel.PredictStream(ctx, opts, func(s []byte) { - tokenCallback(string(s), tokenUsage) - ss += string(s) + + var partialRune []byte + err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) { + partialRune = append(partialRune, chars...) + + for len(partialRune) > 0 { + r, size := utf8.DecodeRune(partialRune) + if r == utf8.RuneError { + // incomplete rune, wait for more bytes + break + } + + tokenCallback(string(r), tokenUsage) + ss += string(r) + + partialRune = partialRune[size:] + } }) return LLMResponse{ Response: ss,