From 81be192279e016c2a35dbf130a67cc9e8ccdbc60 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 6 Feb 2025 00:49:15 +0100 Subject: [PATCH 1/6] chore: :arrow_up: Update leejet/stable-diffusion.cpp to `d46ed5e184b97c2018dc2e8105925bdb8775e02c` (#4769) :arrow_up: Update leejet/stable-diffusion.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e541b503..663a95de 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ BARKCPP_VERSION?=v1.0.0 # stablediffusion.cpp (ggml) STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp -STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024 +STABLEDIFFUSION_GGML_VERSION?=d46ed5e184b97c2018dc2e8105925bdb8775e02c ONNX_VERSION?=1.20.0 ONNX_ARCH?=x64 From d35595372d1b3f585175e638814c30bc6a20dd89 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 6 Feb 2025 09:02:51 +0100 Subject: [PATCH 2/6] chore: :arrow_up: Update ggerganov/llama.cpp to `d774ab3acc4fee41fbed6dbfc192b57d5f79f34b` (#4770) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 663a95de..7edb6f6a 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=3ec9fd4b77b6aca03a3c2bf678eae3f9517d6904 +CPPLLAMA_VERSION?=d774ab3acc4fee41fbed6dbfc192b57d5f79f34b # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 16ced071025888708a59ee40e740cedf24aff039 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 6 Feb 2025 11:59:14 +0100 Subject: [PATCH 3/6] chore(model gallery): add arliai_llama-3.3-70b-arliai-rpmax-v1.4 (#4772) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index d55adda9..b57d337f 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -523,6 +523,20 @@ - filename: Nohobby_L3.3-Prikol-70B-v0.4-Q4_K_M.gguf sha256: e1d67a40bdf0526bdfcaa16c6e4dfeecad41651e201b4009b65f4f444b773604 uri: huggingface://bartowski/Nohobby_L3.3-Prikol-70B-v0.4-GGUF/Nohobby_L3.3-Prikol-70B-v0.4-Q4_K_M.gguf +- !!merge <<: *llama33 + name: "arliai_llama-3.3-70b-arliai-rpmax-v1.4" + urls: + - https://huggingface.co/ArliAI/Llama-3.3-70B-ArliAI-RPMax-v1.4 + - https://huggingface.co/bartowski/ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-GGUF + description: | + RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations. + overrides: + parameters: + model: ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-Q4_K_M.gguf + files: + - filename: ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-Q4_K_M.gguf + sha256: 7c79e76e5c057cfe32529d930360fbebd29697948e5bac4e4b2eb6d2ee596e31 + uri: huggingface://bartowski/ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-GGUF/ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" @@ -1448,7 +1462,7 @@ sha256: 6063cf3cf90f72cfb6ad7564bca8229806cb9823a055adcbce3dc539c2a75765 uri: huggingface://bartowski/AGI-0_Art-Skynet-3B-GGUF/AGI-0_Art-Skynet-3B-Q4_K_M.gguf - !!merge <<: *llama32 - name: "localai-functioncall-llama3.2-3b-v0.5" + name: "LocalAI-functioncall-llama3.2-3b-v0.5" icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png urls: - https://huggingface.co/mudler/LocalAI-functioncall-llama3.2-3b-v0.5 From a801561f819bc79bc6e6c232b55c42586a406e42 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 6 Feb 2025 12:01:56 +0100 Subject: [PATCH 4/6] chore(model gallery): add tiger-lab_qwen2.5-32b-instruct-cft (#4773) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index b57d337f..98760238 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3605,6 +3605,20 @@ - filename: rubenroy_Gilgamesh-72B-Q4_K_M.gguf sha256: c6842b3bc882082c63243e762234ae697c1727bebed18b5241eb97e019f0cf68 uri: huggingface://bartowski/rubenroy_Gilgamesh-72B-GGUF/rubenroy_Gilgamesh-72B-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "tiger-lab_qwen2.5-32b-instruct-cft" + urls: + - https://huggingface.co/TIGER-Lab/Qwen2.5-32B-Instruct-CFT + - https://huggingface.co/bartowski/TIGER-Lab_Qwen2.5-32B-Instruct-CFT-GGUF + description: | + Qwen2.5-32B-Instruct-CFT is a 32B parameter model fine-tuned using our novel Critique Fine-Tuning (CFT) approach. Built upon the Qwen2.5-32B-Instruct base model, this variant is trained to critique and analyze responses rather than simply imitate them, leading to enhanced reasoning capabilities. + overrides: + parameters: + model: TIGER-Lab_Qwen2.5-32B-Instruct-CFT-Q4_K_M.gguf + files: + - filename: TIGER-Lab_Qwen2.5-32B-Instruct-CFT-Q4_K_M.gguf + sha256: 57e87e246db368f39f31f38e44ba8e9dc838a026f729f5a123aacc2aeb5a9402 + uri: huggingface://bartowski/TIGER-Lab_Qwen2.5-32B-Instruct-CFT-GGUF/TIGER-Lab_Qwen2.5-32B-Instruct-CFT-Q4_K_M.gguf - &llama31 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" ## LLama3.1 icon: https://avatars.githubusercontent.com/u/153379578 From e4b8ddb6a1c3f0d14dbdde217b24896951e03da3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 6 Feb 2025 12:03:59 +0100 Subject: [PATCH 5/6] chore(model gallery): add black-ink-guild_pernicious_prophecy_70b (#4774) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 98760238..4e75e71f 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -537,6 +537,22 @@ - filename: ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-Q4_K_M.gguf sha256: 7c79e76e5c057cfe32529d930360fbebd29697948e5bac4e4b2eb6d2ee596e31 uri: huggingface://bartowski/ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-GGUF/ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-Q4_K_M.gguf +- !!merge <<: *llama33 + name: "black-ink-guild_pernicious_prophecy_70b" + icon: https://huggingface.co/Black-Ink-Guild/Pernicious_Prophecy_70B/resolve/main/header.gif + urls: + - https://huggingface.co/Black-Ink-Guild/Pernicious_Prophecy_70B + - https://huggingface.co/bartowski/Black-Ink-Guild_Pernicious_Prophecy_70B-GGUF + description: | + Pernicious Prophecy 70B is a Llama-3.3 70B-based, two-step model designed by Black Ink Guild (SicariusSicariiStuff and invisietch) for uncensored roleplay, assistant tasks, and general usage. + NOTE: Pernicious Prophecy 70B is an uncensored model and can produce deranged, offensive, and dangerous outputs. You are solely responsible for anything that you choose to do with this model. + overrides: + parameters: + model: Black-Ink-Guild_Pernicious_Prophecy_70B-Q4_K_M.gguf + files: + - filename: Black-Ink-Guild_Pernicious_Prophecy_70B-Q4_K_M.gguf + sha256: d8d4874b837993546b750db3faf1c6e5d867883a6750f04f1f4986973d7c107b + uri: huggingface://bartowski/Black-Ink-Guild_Pernicious_Prophecy_70B-GGUF/Black-Ink-Guild_Pernicious_Prophecy_70B-Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" From 8d45670e4109db8968ffa5ae426f6656e9e0784c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 6 Feb 2025 12:41:08 +0100 Subject: [PATCH 6/6] fix(openai): consistently return stop reason (#4771) We were not returning a stop reason when no tool was actually called (even if specified). Fixes: https://github.com/mudler/LocalAI/issues/4716 Signed-off-by: Ettore Di Giacinto --- core/http/endpoints/openai/chat.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 3b8d3056..a94a729a 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -401,6 +401,11 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat log.Debug().Msgf("Text content to return: %s", textContentToReturn) noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0 + finishReason := "stop" + if len(input.Tools) > 0 { + finishReason = "tool_calls" + } + switch { case noActionsToRun: result, err := handleQuestion(config, input, ml, startupOptions, results, s, predInput) @@ -408,19 +413,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat log.Error().Err(err).Msg("error handling question") return } + *c = append(*c, schema.Choice{ - Message: &schema.Message{Role: "assistant", Content: &result}}) + FinishReason: finishReason, + Message: &schema.Message{Role: "assistant", Content: &result}}) default: toolChoice := schema.Choice{ + FinishReason: finishReason, Message: &schema.Message{ Role: "assistant", }, } - if len(input.Tools) > 0 { - toolChoice.FinishReason = "tool_calls" - } - for _, ss := range results { name, args := ss.Name, ss.Arguments if len(input.Tools) > 0 { @@ -438,7 +442,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat }, ) } else { - // otherwise we return more choices directly + // otherwise we return more choices directly (deprecated) *c = append(*c, schema.Choice{ FinishReason: "function_call", Message: &schema.Message{