From 296b97925fab0246184ac582621045565ce9a075 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 18 Jan 2025 23:21:27 +0100
Subject: [PATCH 01/29] chore: :arrow_up: Update leejet/stable-diffusion.cpp to
 `5eb15ef4d022bef4a391de4f5f6556e81fbb5024` (#4636)

:arrow_up: Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 944cad37..fc4eddf4 100644
--- a/Makefile
+++ b/Makefile
@@ -28,7 +28,7 @@ BARKCPP_VERSION?=v1.0.0
 
 # stablediffusion.cpp (ggml)
 STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
-STABLEDIFFUSION_GGML_VERSION?=dcf91f9e0f2cbf9da472ee2a556751ed4bab2d2a
+STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024
 
 ONNX_VERSION?=1.20.0
 ONNX_ARCH?=x64

From a752183fb58de465daa35688c93fbe7d4ed324e9 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 19 Jan 2025 08:38:33 +0100
Subject: [PATCH 02/29] chore: :arrow_up: Update ggerganov/llama.cpp to
 `a1649cc13f89946322358f92ea268ae1b7b5096c` (#4635)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index fc4eddf4..dfa91a15 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6
+CPPLLAMA_VERSION?=a1649cc13f89946322358f92ea268ae1b7b5096c
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From f496d0113b722847aaf4775394ccfd814255fef9 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sun, 19 Jan 2025 09:07:56 +0100
Subject: [PATCH 03/29] chore(deps): pin numba

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/python/transformers/requirements-cpu.txt      | 3 ++-
 backend/python/transformers/requirements-cublas11.txt | 1 +
 backend/python/transformers/requirements-cublas12.txt | 1 +
 backend/python/transformers/requirements-hipblas.txt  | 1 +
 backend/python/transformers/requirements-intel.txt    | 1 +
 backend/python/transformers/requirements.txt          | 3 +--
 6 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt
index 421c4b80..c88508e3 100644
--- a/backend/python/transformers/requirements-cpu.txt
+++ b/backend/python/transformers/requirements-cpu.txt
@@ -1,7 +1,8 @@
 torch==2.4.1
 llvmlite==0.43.0
+numba==0.60.0
 accelerate
 transformers
 bitsandbytes
 outetts
-sentence-transformers==3.3.1
+sentence-transformers==3.3.1
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt
index c5d18d09..0faa9cec 100644
--- a/backend/python/transformers/requirements-cublas11.txt
+++ b/backend/python/transformers/requirements-cublas11.txt
@@ -1,6 +1,7 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch==2.4.1+cu118
 llvmlite==0.43.0
+numba==0.60.0
 accelerate
 transformers
 bitsandbytes
diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt
index c0bcfc87..1e22312f 100644
--- a/backend/python/transformers/requirements-cublas12.txt
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -1,6 +1,7 @@
 torch==2.4.1
 accelerate
 llvmlite==0.43.0
+numba==0.60.0
 transformers
 bitsandbytes
 outetts
diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt
index e7f53860..47aa88db 100644
--- a/backend/python/transformers/requirements-hipblas.txt
+++ b/backend/python/transformers/requirements-hipblas.txt
@@ -3,6 +3,7 @@ torch==2.4.1+rocm6.0
 accelerate
 transformers
 llvmlite==0.43.0
+numba==0.60.0
 bitsandbytes
 outetts
 bitsandbytes
diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt
index aada6e00..708b0516 100644
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -4,6 +4,7 @@ torch==2.3.1+cxx11.abi
 oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
 llvmlite==0.43.0
+numba==0.60.0
 intel-extension-for-transformers
 bitsandbytes
 outetts
diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
index d353e4d0..db41b928 100644
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -3,5 +3,4 @@ protobuf
 certifi
 setuptools
 scipy==1.15.1
-numpy>=2.0.0
-numba==0.60.0
\ No newline at end of file
+numpy>=2.0.0
\ No newline at end of file

From 83e2dd5dff7b36d8cc9528d63ed0468145ef79df Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 19 Jan 2025 23:34:32 +0100
Subject: [PATCH 04/29] chore: :arrow_up: Update ggerganov/llama.cpp to
 `92bc493917d43b83e592349e138b54c90b1c3ea7` (#4640)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index dfa91a15..7aaad492 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=a1649cc13f89946322358f92ea268ae1b7b5096c
+CPPLLAMA_VERSION?=92bc493917d43b83e592349e138b54c90b1c3ea7
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From 30739d94a41139fe5c8cf68239cc7353d102c4fe Mon Sep 17 00:00:00 2001
From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com>
Date: Mon, 20 Jan 2025 10:34:19 +0100
Subject: [PATCH 05/29] chore(model gallery): add InternLM3-8b-Q4_K_M  (#4637)

chore(model gallery): add InternLM3-8b-Q4_K_M

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
---
 gallery/index.yaml | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index edd52725..61ecd107 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -10100,7 +10100,7 @@
   urls:
     - https://huggingface.co/internlm/internlm2_5-7b-chat-1m
     - https://huggingface.co/bartowski/internlm2_5-7b-chat-1m-GGUF
-  icon: https://github.com/InternLM/InternLM/assets/22529082/b9788105-8892-4398-8b47-b513a292378e
+  icon: https://avatars.githubusercontent.com/u/135356492
   tags:
     - internlm2
     - gguf
@@ -10121,6 +10121,31 @@
     - filename: internlm2_5-7b-chat-1m-Q4_K_M.gguf
       uri: huggingface://bartowski/internlm2_5-7b-chat-1m-GGUF/internlm2_5-7b-chat-1m-Q4_K_M.gguf
       sha256: 10d5e18a4125f9d4d74a9284a21e0c820b150af06dee48665e54ff6e1be3a564
+### Internlm3
+- name: "internlm3-8b-instruct"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/internlm/internlm3-8b-instruct
+    - https://huggingface.co/bartowski/internlm3-8b-instruct-GGUF
+  icon: https://avatars.githubusercontent.com/u/135356492
+  tags:
+    - internlm3
+    - gguf
+    - cpu
+    - gpu
+  description: |
+    InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning.  The model has the following characteristics:
+
+    Enhanced performance at reduced cost: State-of-the-art performance on reasoning and knowledge-intensive tasks surpass models like Llama3.1-8B and Qwen2.5-7B.
+
+    Deep thinking capability: InternLM3 supports both the deep thinking mode for solving complicated reasoning tasks via the long chain-of-thought and the normal response mode for fluent user interactions.
+  overrides:
+    parameters:
+      model: internlm3-8b-instruct-Q4_K_M.gguf
+  files:
+    - filename: internlm3-8b-instruct-Q4_K_M.gguf
+      uri: huggingface://bartowski/internlm3-8b-instruct-GGUF/internlm3-8b-instruct-Q4_K_M.gguf
+      sha256: 2a9644687318e8659c9cf9b40730d5cc2f5af06f786a50439c7c51359b23896e
 - &phi-3
   ### START Phi-3
   url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master"

From 390bb3f58bb5d878c852c71e473ae0754a8d817d Mon Sep 17 00:00:00 2001
From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com>
Date: Mon, 20 Jan 2025 10:35:05 +0100
Subject: [PATCH 06/29] fix(model gallery): minicpm-v-2.6 is based on qwen2
 (#4638)

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
---
 gallery/index.yaml | 54 +++++++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 61ecd107..1c170f99 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -5583,6 +5583,33 @@
     - filename: marco-o1-uncensored.Q4_K_M.gguf
       sha256: ad0440270a7254098f90779744d3e5b34fe49b7baf97c819909ba9c5648cc0d9
       uri: huggingface://QuantFactory/marco-o1-uncensored-GGUF/marco-o1-uncensored.Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "minicpm-v-2_6"
+  license: apache-2.0
+  icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png
+  urls:
+    - https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf
+    - https://huggingface.co/openbmb/MiniCPM-V-2_6
+  description: |
+    MiniCPM-V 2.6 is the latest and most capable model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters
+  tags:
+    - llm
+    - multimodal
+    - gguf
+    - gpu
+    - qwen2
+    - cpu
+  overrides:
+    mmproj: minicpm-v-2_6-mmproj-f16.gguf
+    parameters:
+      model: minicpm-v-2_6-Q4_K_M.gguf
+  files:
+    - filename: minicpm-v-2_6-Q4_K_M.gguf
+      sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
+      uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
+    - filename: minicpm-v-2_6-mmproj-f16.gguf
+      sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0
+      uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
 - &mistral03
   ## START Mistral
   url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
@@ -9211,33 +9238,6 @@
     - filename: minicpm-llama3-mmproj-f16.gguf
       sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e
       uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf
-- !!merge <<: *llama3
-  name: "minicpm-v-2_6"
-  license: apache-2.0
-  icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png
-  urls:
-    - https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf
-    - https://huggingface.co/openbmb/MiniCPM-V-2_6
-  description: |
-    MiniCPM-V 2.6 is the latest and most capable model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters
-  tags:
-    - llm
-    - multimodal
-    - gguf
-    - gpu
-    - llama3
-    - cpu
-  overrides:
-    mmproj: minicpm-v-2_6-mmproj-f16.gguf
-    parameters:
-      model: minicpm-v-2_6-Q4_K_M.gguf
-  files:
-    - filename: minicpm-v-2_6-Q4_K_M.gguf
-      sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
-      uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
-    - filename: minicpm-v-2_6-mmproj-f16.gguf
-      sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0
-      uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
 - !!merge <<: *llama3
   name: "llama-3-cursedstock-v1.8-8b-iq-imatrix"
   urls:

From 0c0e015b3893816a984f59cd5a6cfb25f5cf90c1 Mon Sep 17 00:00:00 2001
From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com>
Date: Mon, 20 Jan 2025 10:40:46 +0100
Subject: [PATCH 07/29] chore(model gallery): update icons and add missing ones
 (#4639)

* chore(model gallery): uniform github URLs for icons

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add icons to phi models

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add icons to QwenLM models

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): update icon for Arcee org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): update icon for Meta org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): update icon url for OpenCoder org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add icon for RWKV org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add icon for IBM-granite org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add icon for OpenBMB org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add icon for KatanemoLabs org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): update icon for Meta-Llama-3.1-8B-Instruct-abliterated

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): update icon for hermes-3-llama-3.1-8b-lorablated

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add icon for Google org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

---------

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 gallery/index.yaml | 53 +++++++++++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 1c170f99..fb5476f9 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -2,6 +2,7 @@
 - &phi4
   url: "github:mudler/LocalAI/gallery/phi-4-chat.yaml@master"
   name: "phi-4"
+  icon: https://avatars.githubusercontent.com/u/6154722
   license: mit
   tags:
     - llm
@@ -224,7 +225,7 @@
       uri: huggingface://bartowski/INTELLECT-1-Instruct-GGUF/INTELLECT-1-Instruct-Q4_K_M.gguf
 - &llama33
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
+  icon: https://avatars.githubusercontent.com/u/153379578
   license: llama3.3
   description: |
     The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
@@ -421,6 +422,7 @@
 - &rwkv
   url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
   name: "rwkv-6-world-7b"
+  icon: https://avatars.githubusercontent.com/u/132652788
   license: apache-2.0
   urls:
     - https://huggingface.co/RWKV/rwkv-6-world-7b
@@ -443,6 +445,7 @@
       uri: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf
 - &qwen25coder
   name: "qwen2.5-coder-14b"
+  icon: https://avatars.githubusercontent.com/u/141221163
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   license: apache-2.0
   tags:
@@ -628,7 +631,7 @@
       uri: huggingface://mraWdermacher/Qwen2.5-Coder-32B-Instruct-Uncensored-i1-GGUF/Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf
 - &opencoder
   name: "opencoder-8b-base"
-  icon: https://github.com/OpenCoder-llm/opencoder-llm.github.io/blob/main/static/images/opencoder_icon.jpg?raw=true
+  icon: https://avatars.githubusercontent.com/u/186387526
   url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
   urls:
     - https://huggingface.co/infly/OpenCoder-8B-Base
@@ -694,6 +697,7 @@
       uri: huggingface://QuantFactory/OpenCoder-1.5B-Instruct-GGUF/OpenCoder-1.5B-Instruct.Q4_K_M.gguf
 - &granite3
   name: "granite-3.0-1b-a400m-instruct"
+  icon: https://avatars.githubusercontent.com/u/167822367
   urls:
     - https://huggingface.co/ibm-granite/granite-3.0-1b-a400m-instruct
     - https://huggingface.co/QuantFactory/granite-3.0-1b-a400m-instruct-GGUF
@@ -781,7 +785,7 @@
 - &llama32
   ## llama3.2
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
+  icon: https://avatars.githubusercontent.com/u/153379578
   license: llama3.2
   description: |
     The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks.
@@ -950,7 +954,6 @@
       uri: huggingface://mradermacher/Llama-3.2-3B-Reasoning-Time-GGUF/Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf
 - !!merge <<: *llama32
   name: "llama-3.2-sun-2.5b-chat"
-  icon: https://i.ibb.co/PF0TdMJ/imagine-image-9a56cee7-0f4f-4cc2-b265-a5b8d04f266b.png
   urls:
     - https://huggingface.co/meditsolutions/Llama-3.2-SUN-2.5B-chat
     - https://huggingface.co/mradermacher/Llama-3.2-SUN-2.5B-chat-GGUF
@@ -982,7 +985,6 @@
       uri: huggingface://mradermacher/Llama-3.2-SUN-2.5B-chat-GGUF/Llama-3.2-SUN-2.5B-chat.Q4_K_M.gguf
 - !!merge <<: *llama32
   name: "llama-3.2-3b-instruct-uncensored"
-  icon: https://i.imgur.com/JOePyAN.png
   urls:
     - https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-uncensored-GGUF
     - https://huggingface.co/chuanli11/Llama-3.2-3B-Instruct-uncensored
@@ -1319,6 +1321,7 @@
 - &qwen25
   ## Qwen2.5
   name: "qwen2.5-14b-instruct"
+  icon: https://avatars.githubusercontent.com/u/141221163
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   license: apache-2.0
   description: |
@@ -1608,6 +1611,7 @@
       uri: huggingface://bartowski/qwen2.5-7b-ins-v3-GGUF/qwen2.5-7b-ins-v3-Q4_K_M.gguf
 - !!merge <<: *qwen25
   name: "supernova-medius"
+  icon: https://avatars.githubusercontent.com/u/126496414
   urls:
     - https://huggingface.co/arcee-ai/SuperNova-Medius-GGUF
   description: |
@@ -1762,7 +1766,7 @@
       uri: huggingface://bartowski/TheBeagle-v2beta-32B-MGS-GGUF/TheBeagle-v2beta-32B-MGS-Q4_K_M.gguf
 - !!merge <<: *qwen25
   name: "meraj-mini"
-  icon: https://i.ibb.co/CmPSSpq/Screenshot-2024-10-06-at-9-45-06-PM.png
+  icon: https://avatars.githubusercontent.com/u/126496414
   urls:
     - https://huggingface.co/arcee-ai/Meraj-Mini
     - https://huggingface.co/QuantFactory/Meraj-Mini-GGUF
@@ -2392,7 +2396,7 @@
       uri: huggingface://QuantFactory/Math-IIO-7B-Instruct-GGUF/Math-IIO-7B-Instruct.Q4_K_M.gguf
 - !!merge <<: *qwen25
   name: "virtuoso-small"
-  icon: https://i.ibb.co/pXD6Bcv/SW2-U-g-QQLSH1-ZAbxhs-Iu-A.webp
+  icon: https://avatars.githubusercontent.com/u/126496414
   urls:
     - https://huggingface.co/arcee-ai/Virtuoso-Small-GGUF
   description: |
@@ -2670,6 +2674,7 @@
     - cpu
     - function-calling
   name: "arch-function-1.5b"
+  icon: https://avatars.githubusercontent.com/u/112724757
   uri: "github:mudler/LocalAI/gallery/arch-function.yaml@master"
   urls:
     - https://huggingface.co/katanemolabs/Arch-Function-1.5B
@@ -3109,7 +3114,7 @@
       uri: huggingface://bartowski/Rombos-Qwen2.5-Writer-32b-GGUF/Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf
 - !!merge <<: *qwen25
   name: "sky-t1-32b-preview"
-  icon: https://raw.githubusercontent.com/NovaSky-AI/novasky-ai.github.io/main/assets/images/blue-bird-wider.jpeg
+  icon: https://github.com/NovaSky-AI/novasky-ai.github.io/raw/main/assets/images/blue-bird-wider.jpeg
   urls:
     - https://huggingface.co/NovaSky-AI/Sky-T1-32B-Preview
     - https://huggingface.co/bartowski/Sky-T1-32B-Preview-GGUF
@@ -3298,7 +3303,7 @@
 - &llama31
   ## LLama3.1
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
+  icon: https://avatars.githubusercontent.com/u/153379578
   name: "meta-llama-3.1-8b-instruct"
   license: llama3.1
   description: |
@@ -3387,7 +3392,7 @@
       sha256: 6d175432f66d10dfed9737f73a5073d513d18e1ee7bd4b9cf2a59deb359f36ff
 - !!merge <<: *llama31
   name: "meta-llama-3.1-8b-instruct-abliterated"
-  icon: https://i.imgur.com/KhorYYG.png
+  icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/AsTgL8VCgMHgobq4cr46b.png
   urls:
     - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated
     - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF
@@ -3416,7 +3421,7 @@
       uri: huggingface://mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf/Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
 - !!merge <<: *llama31
   name: "openbuddy-llama3.1-8b-v22.1-131k"
-  icon: https://raw.githubusercontent.com/OpenBuddy/OpenBuddy/main/media/demo.png
+  icon: https://github.com/OpenBuddy/OpenBuddy/raw/main/media/demo.png
   urls:
     - https://huggingface.co/sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF
   description: |
@@ -3592,7 +3597,7 @@
       sha256: 6557c5d5091f2507d19ab1f8bfb9ceb4e1536a755ab70f148b18aeb33741580f
       uri: huggingface://mradermacher/Llama-3.1-Techne-RP-8b-v1-GGUF/Llama-3.1-Techne-RP-8b-v1.Q4_K_M.gguf
 - !!merge <<: *llama31
-  icon: https://i.ibb.co/9hwFrvL/BLMs-Wkx-NQf-W-46-FZDg-ILhg.jpg
+  icon: https://avatars.githubusercontent.com/u/126496414
   name: "llama-spark"
   urls:
     - https://huggingface.co/arcee-ai/Llama-Spark
@@ -3710,7 +3715,6 @@
 - !!merge <<: *llama31
   name: "llama-3.1-supernova-lite-reflection-v1.0-i1"
   url: "github:mudler/LocalAI/gallery/llama3.1-reflective.yaml@master"
-  icon: https://i.ibb.co/r072p7j/eopi-ZVu-SQ0-G-Cav78-Byq-Tg.png
   urls:
     - https://huggingface.co/SE6446/Llama-3.1-SuperNova-Lite-Reflection-V1.0
     - https://huggingface.co/mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF
@@ -3725,7 +3729,7 @@
       uri: huggingface://mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF/Llama-3.1-SuperNova-Lite-Reflection-V1.0.i1-Q4_K_M.gguf
 - !!merge <<: *llama31
   name: "llama-3.1-supernova-lite"
-  icon: https://i.ibb.co/r072p7j/eopi-ZVu-SQ0-G-Cav78-Byq-Tg.png
+  icon: https://avatars.githubusercontent.com/u/126496414
   urls:
     - https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite
     - https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite-GGUF
@@ -4239,6 +4243,7 @@
       uri: huggingface://mradermacher/Hermes-3-Llama-3.1-70B-lorablated-GGUF/Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf
 - !!merge <<: *llama31
   name: "hermes-3-llama-3.1-8b-lorablated"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/4Hbw5n68jKUSBQeTqQIeT.png
   urls:
     - https://huggingface.co/mlabonne/Hermes-3-Llama-3.1-8B-lorablated-GGUF
   description: |
@@ -5254,6 +5259,7 @@
   ## Start QWEN2
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "qwen2-7b-instruct"
+  icon: https://avatars.githubusercontent.com/u/141221163
   license: apache-2.0
   description: |
     Qwen2 is the new series of Qwen large language models. For Qwen2, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters, including a Mixture-of-Experts model. This repo contains the instruction-tuned 7B Qwen2 model.
@@ -5360,7 +5366,7 @@
       uri: huggingface://bartowski/Einstein-v7-Qwen2-7B-GGUF/Einstein-v7-Qwen2-7B-Q4_K_M.gguf
 - !!merge <<: *qwen2
   name: "arcee-spark"
-  icon: https://i.ibb.co/80ssNWS/o-Vdk-Qx-ARNmzr-Pi1h-Efj-SA.webp
+  icon: https://avatars.githubusercontent.com/u/126496414
   description: |
     Arcee Spark is a powerful 7B parameter language model that punches well above its weight class. Initialized from Qwen2, this model underwent a sophisticated training process:
 
@@ -5398,7 +5404,7 @@
       uri: huggingface://Hercules-5.0-Qwen2-7B-Q4_K_M.gguf/Hercules-5.0-Qwen2-7B-Q4_K_M.gguf
 - !!merge <<: *qwen2
   name: "arcee-agent"
-  icon: https://i.ibb.co/CBHmTDn/136719a5-6d8a-4654-a618-46eabc788953.jpg
+  icon: https://avatars.githubusercontent.com/u/126496414
   description: |
     Arcee Agent is a cutting-edge 7B parameter language model specifically designed for function calling and tool use. Initialized from Qwen2-7B, it rivals the performance of much larger models while maintaining efficiency and speed. This model is particularly suited for developers, researchers, and businesses looking to implement sophisticated AI-driven solutions without the computational overhead of larger language models. Compute for training Arcee-Agent was provided by CrusoeAI. Arcee-Agent was trained using Spectrum.
   urls:
@@ -5586,7 +5592,7 @@
 - !!merge <<: *qwen2
   name: "minicpm-v-2_6"
   license: apache-2.0
-  icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png
+  icon: https://avatars.githubusercontent.com/u/89920203
   urls:
     - https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf
     - https://huggingface.co/openbmb/MiniCPM-V-2_6
@@ -6321,6 +6327,7 @@
 - &gemma
   url: "github:mudler/LocalAI/gallery/gemma.yaml@master"
   name: "gemma-2b"
+  icon: https://avatars.githubusercontent.com/u/1342004
   license: gemma
   urls:
     - https://ai.google.dev/gemma/docs
@@ -7036,7 +7043,7 @@
       uri: huggingface://bartowski/GWQ-9B-Preview2-GGUF/GWQ-9B-Preview2-Q4_K_M.gguf
 - &llama3
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
+  icon: https://avatars.githubusercontent.com/u/153379578
   name: "llama3-8b-instruct"
   license: llama3
   description: |
@@ -8503,7 +8510,7 @@
   urls:
     - https://huggingface.co/arcee-ai/Llama-3-SEC-Chat-GGUF
     - https://huggingface.co/arcee-ai/Llama-3-SEC-Chat
-  icon: https://i.ibb.co/kHtBmDN/w8m6-X4-HCQRa-IR86ar-Cm5gg.webp
+  icon: https://avatars.githubusercontent.com/u/126496414
   tags:
     - llama3
     - gguf
@@ -8536,7 +8543,7 @@
 - &yi-chat
   ### Start Yi
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
-  icon: "https://raw.githubusercontent.com/01-ai/Yi/main/assets/img/Yi_logo_icon_light.svg"
+  icon: "https://github.com/01-ai/Yi/raw/main/assets/img/Yi_logo_icon_light.svg"
   name: "yi-1.5-9b-chat"
   license: apache-2.0
   urls:
@@ -9165,7 +9172,7 @@
   urls:
     - https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf
   description: |
-    Bunny is a family of lightweight but powerful multimodal models. It offers multiple plug-and-play vision encoders, like EVA-CLIP, SigLIP and language backbones, including Llama-3-8B, Phi-1.5, StableLM-2, Qwen1.5, MiniCPM and Phi-2. To compensate for the decrease in model size, we construct more informative training data by curated selection from a broader data source.
+    Bunny is a family of lightweight but powerful multimodal models. It offers multiple plug-and-play vision encoders, like EVA-CLIP, SigLIP and language backbones, including Llama-3-8B, Phi-1.5, StableLM-2, Qwen1.5,  and Phi-2. To compensate for the decrease in model size, we construct more informative training data by curated selection from a broader data source.
 
     We provide Bunny-Llama-3-8B-V, which is built upon SigLIP and Llama-3-8B-Instruct. More details about this model can be found in GitHub.
   icon: https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf/resolve/main/icon.png
@@ -9214,7 +9221,7 @@
       uri: huggingface://xtuner/llava-llama-3-8b-v1_1-gguf/llava-llama-3-8b-v1_1-mmproj-f16.gguf
 - !!merge <<: *llama3
   name: "minicpm-llama3-v-2_5"
-  icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png
+  icon: https://avatars.githubusercontent.com/u/89920203
   urls:
     - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf
     - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5
@@ -10054,6 +10061,7 @@
     - llama2
     - cpu
   name: "phi-2-chat:Q8_0"
+  icon: https://avatars.githubusercontent.com/u/6154722
   overrides:
     parameters:
       model: phi-2-layla-v1-chatml-Q8_0.gguf
@@ -10150,6 +10158,7 @@
   ### START Phi-3
   url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master"
   name: "phi-3-mini-4k-instruct"
+  icon: https://avatars.githubusercontent.com/u/6154722
   license: mit
   description: |
     The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters.

From adebd557ce8446edbe097b3eeb54c524e6638e78 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 20 Jan 2025 10:45:10 +0100
Subject: [PATCH 08/29] chore(model gallery): add wayfarer-12b (#4641)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 105 +++++++++++++++++++++++----------------------
 1 file changed, 54 insertions(+), 51 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index fb5476f9..0397bd75 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -190,7 +190,7 @@
     - https://huggingface.co/Nitral-AI/NightWing3-10B-v0.1
     - https://huggingface.co/bartowski/NightWing3-10B-v0.1-GGUF
   description: |
-     Base model: (Falcon3-10B)
+    Base model: (Falcon3-10B)
   overrides:
     parameters:
       model: NightWing3-10B-v0.1-Q4_K_M.gguf
@@ -782,8 +782,7 @@
     - filename: salamandra-7b-instruct.Q4_K_M-f32.gguf
       sha256: bac8e8c1d1d9d53cbdb148b8ff9ad378ddb392429207099e85b5aae3a43bff3d
       uri: huggingface://cstr/salamandra-7b-instruct-GGUF/salamandra-7b-instruct.Q4_K_M-f32.gguf
-- &llama32
-  ## llama3.2
+- &llama32 ## llama3.2
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
   icon: https://avatars.githubusercontent.com/u/153379578
   license: llama3.2
@@ -1318,8 +1317,7 @@
     - filename: FineMath-Llama-3B-Q4_K_M.gguf
       sha256: 16c73b5cf2a417a7e1608bcc9469f1461fc3e759ce04a3a337f48df977dc158c
       uri: huggingface://bartowski/FineMath-Llama-3B-GGUF/FineMath-Llama-3B-Q4_K_M.gguf
-- &qwen25
-  ## Qwen2.5
+- &qwen25 ## Qwen2.5
   name: "qwen2.5-14b-instruct"
   icon: https://avatars.githubusercontent.com/u/141221163
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
@@ -3241,8 +3239,7 @@
     - filename: DRT-o1-14B-Q4_K_M.gguf
       sha256: 9619ca984cf4ce8e4f69bcde831de17b2ce05dd89536e3130608877521e3d328
       uri: huggingface://bartowski/DRT-o1-14B-GGUF/DRT-o1-14B-Q4_K_M.gguf
-- &smollm
-  ## SmolLM
+- &smollm ## SmolLM
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "smollm-1.7b-instruct"
   icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png
@@ -3300,8 +3297,7 @@
     - filename: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
       sha256: eaeac314e30b461413bc1cc819cdc0cd6a79265711fd0b8268702960a082c7bd
       uri: huggingface://QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF/Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
-- &llama31
-  ## LLama3.1
+- &llama31 ## LLama3.1
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
   icon: https://avatars.githubusercontent.com/u/153379578
   name: "meta-llama-3.1-8b-instruct"
@@ -5189,8 +5185,7 @@
     - filename: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
       sha256: 268390e07edd407ad93ea21a868b7ae995b5950e01cad0db9e1802ae5049d405
       uri: huggingface://bartowski/Dolphin3.0-Llama3.1-8B-GGUF/Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
-- &deepseek
-  ## Deepseek
+- &deepseek ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
   name: "deepseek-coder-v2-lite-instruct"
   icon: "https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true"
@@ -5255,8 +5250,7 @@
     - filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
       sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
       uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
-- &qwen2
-  ## Start QWEN2
+- &qwen2 ## Start QWEN2
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "qwen2-7b-instruct"
   icon: https://avatars.githubusercontent.com/u/141221163
@@ -5616,8 +5610,7 @@
     - filename: minicpm-v-2_6-mmproj-f16.gguf
       sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0
       uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
-- &mistral03
-  ## START Mistral
+- &mistral03 ## START Mistral
   url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
   name: "mistral-7b-instruct-v0.3"
   icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png
@@ -6222,8 +6215,35 @@
     - filename: Nera_Noctis-12B-Q4_K_M.gguf
       sha256: 0662a9a847adde046e6255c15d5a677ebf09ab00841547c8963668d14baf00ff
       uri: huggingface://bartowski/Nera_Noctis-12B-GGUF/Nera_Noctis-12B-Q4_K_M.gguf
-- &mudler
-  ### START mudler's LocalAI specific-models
+- !!merge <<: *mistral03
+  name: "wayfarer-12b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/LatitudeGames/Wayfarer-12B/resolve/main/wayfarer.jpg
+  urls:
+    - https://huggingface.co/LatitudeGames/Wayfarer-12B
+    - https://huggingface.co/bartowski/Wayfarer-12B-GGUF
+  description: |
+    We’ve heard over and over from AI Dungeon players that modern AI models are too nice, never letting them fail or die. While it may be good for a chatbot to be nice and helpful, great stories and games aren’t all rainbows and unicorns. They have conflict, tension, and even death. These create real stakes and consequences for characters and the journeys they go on.
+
+    Similarly, great games need opposition. You must be able to fail, die, and may even have to start over. This makes games more fun!
+
+    However, the vast majority of AI models, through alignment RLHF, have been trained away from darkness, violence, or conflict, preventing them from fulfilling this role. To give our players better options, we decided to train our own model to fix these issues.
+
+    Wayfarer is an adventure role-play model specifically trained to give players a challenging and dangerous experience. We thought they would like it, but since releasing it on AI Dungeon, players have reacted even more positively than we expected.
+
+    Because they loved it so much, we’ve decided to open-source the model so anyone can experience unforgivingly brutal AI adventures! Anyone can download the model to run locally.
+
+    Or if you want to easily try this model for free, you can do so at https://aidungeon.com.
+
+    We plan to continue improving and open-sourcing similar models, so please share any and all feedback on how we can improve model behavior. Below we share more details on how Wayfarer was created.
+  overrides:
+    parameters:
+      model: Wayfarer-12B-Q4_K_M.gguf
+  files:
+    - filename: Wayfarer-12B-Q4_K_M.gguf
+      sha256: 6cd9f290c820c64854fcdcfd312b066447acc2f63abe2e2e71af9bc4f1946c08
+      uri: huggingface://bartowski/Wayfarer-12B-GGUF/Wayfarer-12B-Q4_K_M.gguf
+- &mudler ### START mudler's LocalAI specific-models
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
   name: "LocalAI-llama3-8b-function-call-v0.2"
   icon: "https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/us5JKi9z046p8K-cn_M0w.webp"
@@ -6268,8 +6288,7 @@
     - filename: Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
       sha256: 579cbb229f9c11d0330759ff4733102d2491615a4c61289e26c09d1b3a583fec
       uri: huggingface://mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1-GGUF/Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
-- &parler-tts
-  ### START parler-tts
+- &parler-tts ### START parler-tts
   url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master"
   name: parler-tts-mini-v0.1
   overrides:
@@ -6286,8 +6305,7 @@
     - cpu
     - text-to-speech
     - python
-- &rerankers
-  ### START rerankers
+- &rerankers ### START rerankers
   url: "github:mudler/LocalAI/gallery/rerankers.yaml@master"
   name: cross-encoder
   parameters:
@@ -8540,8 +8558,7 @@
     - filename: Copus-2x8B.i1-Q4_K_M.gguf
       sha256: 685da1ba49e203e8f491105585143d76044286d4b4687bed37d325f6b55501e5
       uri: huggingface://mradermacher/Copus-2x8B-i1-GGUF/Copus-2x8B.i1-Q4_K_M.gguf
-- &yi-chat
-  ### Start Yi
+- &yi-chat ### Start Yi
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   icon: "https://github.com/01-ai/Yi/raw/main/assets/img/Yi_logo_icon_light.svg"
   name: "yi-1.5-9b-chat"
@@ -8752,8 +8769,7 @@
     - filename: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
       sha256: 3f309b59508342536a70edd6c4be6cf4f2cb97f2e32cbc79ad2ab3f4c02933a4
       uri: huggingface://Lewdiculous/Fimbulvetr-11B-v2-GGUF-IQ-Imatrix/Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
-- &noromaid
-  ### Start noromaid
+- &noromaid ### Start noromaid
   url: "github:mudler/LocalAI/gallery/noromaid.yaml@master"
   name: "noromaid-13b-0.4-DPO"
   icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png
@@ -8773,8 +8789,7 @@
     - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf
       sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1
       uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf
-- &wizardlm2
-  ### START Vicuna based
+- &wizardlm2 ### START Vicuna based
   url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master"
   name: "wizardlm2-7b"
   description: |
@@ -8829,8 +8844,7 @@
     - filename: moondream2-mmproj-f16.gguf
       sha256: 4cc1cb3660d87ff56432ebeb7884ad35d67c48c7b9f6b2856f305e39c38eed8f
       uri: huggingface://moondream/moondream2-gguf/moondream2-mmproj-f16.gguf
-- &llava
-  ### START LLaVa
+- &llava ### START LLaVa
   url: "github:mudler/LocalAI/gallery/llava.yaml@master"
   license: apache-2.0
   description: |
@@ -9688,8 +9702,7 @@
     - filename: Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf
       sha256: cdc0f4de6df2ba120835fbd25c2a0ae2af8548f46d2c40c7a018c51c3d19e0c0
       uri: huggingface://mradermacher/Freyja-v4.95-maldv-7b-NON-FICTION-i1-GGUF/Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf
-- &chatml
-  ### ChatML
+- &chatml ### ChatML
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "una-thepitbull-21.4b-v2"
   license: afl-3.0
@@ -9975,8 +9988,7 @@
     - filename: Triangulum-10B.Q4_K_M.gguf
       sha256: dd071f99edf6b166044bf229cdeec19419c4c348e3fc3d6587cfcc55e6fb85fa
       uri: huggingface://mradermacher/Triangulum-10B-GGUF/Triangulum-10B.Q4_K_M.gguf
-- &command-R
-  ### START Command-r
+- &command-R ### START Command-r
   url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
   name: "command-r-v01:q1_s"
   license: "cc-by-nc-4.0"
@@ -10031,8 +10043,7 @@
     - filename: "aya-23-35B-Q4_K_M.gguf"
       sha256: "57824768c1a945e21e028c8e9a29b39adb4838d489f5865c82601ab9ad98065d"
       uri: "huggingface://bartowski/aya-23-35B-GGUF/aya-23-35B-Q4_K_M.gguf"
-- &phi-2-chat
-  ### START Phi-2
+- &phi-2-chat ### START Phi-2
   url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master"
   license: mit
   description: |
@@ -10154,8 +10165,7 @@
     - filename: internlm3-8b-instruct-Q4_K_M.gguf
       uri: huggingface://bartowski/internlm3-8b-instruct-GGUF/internlm3-8b-instruct-Q4_K_M.gguf
       sha256: 2a9644687318e8659c9cf9b40730d5cc2f5af06f786a50439c7c51359b23896e
-- &phi-3
-  ### START Phi-3
+- &phi-3 ### START Phi-3
   url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master"
   name: "phi-3-mini-4k-instruct"
   icon: https://avatars.githubusercontent.com/u/6154722
@@ -10355,8 +10365,7 @@
     - filename: Phi-3.5-MoE-instruct-Q4_K_M.gguf
       sha256: 43e91bb720869bd8a92d8eb86bc3c74a52c49cf61642ca709b3d7bb89644df36
       uri: huggingface://bartowski/Phi-3.5-MoE-instruct-GGUF/Phi-3.5-MoE-instruct-Q4_K_M.gguf
-- &hermes-2-pro-mistral
-  ### START Hermes
+- &hermes-2-pro-mistral ### START Hermes
   url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
   name: "hermes-2-pro-mistral"
   icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png
@@ -10692,8 +10701,7 @@
     - filename: "galatolo-Q4_K.gguf"
       sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172"
       uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf"
-- &codellama
-  ### START Codellama
+- &codellama ### START Codellama
   url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
   name: "codellama-7b"
   license: llama2
@@ -10824,8 +10832,7 @@
     - filename: "llm-compiler-7b-ftd.Q4_K.gguf"
       uri: "huggingface://legraphista/llm-compiler-7b-ftd-IMat-GGUF/llm-compiler-7b-ftd.Q4_K.gguf"
       sha256: d862dd18ed335413787d0ad196522a9902a3c10a6456afdab8721822cb0ddde8
-- &openvino
-  ### START OpenVINO
+- &openvino ### START OpenVINO
   url: "github:mudler/LocalAI/gallery/openvino.yaml@master"
   name: "openvino-llama-3-8b-instruct-ov-int8"
   license: llama3
@@ -10939,8 +10946,7 @@
     - gpu
     - embedding
     - cpu
-- &sentencentransformers
-  ### START Embeddings
+- &sentencentransformers ### START Embeddings
   description: |
     This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity.
   urls:
@@ -10955,8 +10961,7 @@
   overrides:
     parameters:
       model: all-MiniLM-L6-v2
-- &dreamshaper
-  ### START Image generation
+- &dreamshaper ### START Image generation
   name: dreamshaper
   icon: https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dd9b038c-bd15-43ab-86ab-66e145ad7ff2/width=450/26072158-132340247-8k%20portrait%20of%20beautiful%20cyborg%20with%20brown%20hair,%20intricate,%20elegant,%20highly%20detailed,%20majestic,%20digital%20photography,%20art%20by%20artg_ed.jpeg
   license: other
@@ -11068,8 +11073,7 @@
     - filename: t5xxl_fp16.safetensors
       sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635
       uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors
-- &whisper
-  ## Whisper
+- &whisper ## Whisper
   url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
   name: "whisper-1"
   license: "MIT"
@@ -11249,8 +11253,7 @@
   description: |
     Stable Diffusion in NCNN with c++, supported txt2img and img2img
   name: stablediffusion-cpp
-- &piper
-  ## Piper TTS
+- &piper ## Piper TTS
   url: github:mudler/LocalAI/gallery/piper.yaml@master
   name: voice-en-us-kathleen-low
   icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png

From 83a8d90c52816832bd3362d6455501d479ce16ab Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 20 Jan 2025 10:50:29 +0100
Subject: [PATCH 09/29] chore(model gallery): add l3.3-70b-magnum-v4-se (#4642)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 0397bd75..d10cd32e 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -419,6 +419,22 @@
     - filename: L3.3-MS-Nevoria-70b-Q4_K_M.gguf
       sha256: e8b0763f263089a19d4b112b7ed5085cc5f1ed9ca49c5085baa8d51f4ded1f94
       uri: huggingface://bartowski/L3.3-MS-Nevoria-70b-GGUF/L3.3-MS-Nevoria-70b-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "l3.3-70b-magnum-v4-se"
+  urls:
+    - https://huggingface.co/Doctor-Shotgun/L3.3-70B-Magnum-v4-SE
+    - https://huggingface.co/bartowski/L3.3-70B-Magnum-v4-SE-GGUF
+  description: |
+    The Magnum v4 series is complete, but here's something a little extra I wanted to tack on as I wasn't entirely satisfied with the results of v4 72B. "SE" for Special Edition - this model is finetuned from meta-llama/Llama-3.3-70B-Instruct as an rsLoRA adapter. The dataset is a slightly revised variant of the v4 data with some elements of the v2 data re-introduced.
+
+    The objective, as with the other Magnum models, is to emulate the prose style and quality of the Claude 3 Sonnet/Opus series of models on a local scale, so don't be surprised to see "Claude-isms" in its output.
+  overrides:
+    parameters:
+      model: L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf
+  files:
+    - filename: L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf
+      sha256: 9724a6364a42caa3d5a1687258eb329c9af6cbb2ce01c8dd556c1a222a2e0352
+      uri: huggingface://bartowski/L3.3-70B-Magnum-v4-SE-GGUF/L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf
 - &rwkv
   url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
   name: "rwkv-6-world-7b"

From aeb1dca52ef940ec23f3ffddc7af2cc9afac69a7 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 20 Jan 2025 11:03:35 +0100
Subject: [PATCH 10/29] chore(model gallery): add l3.3-prikol-70b-v0.2 (#4643)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index d10cd32e..679ab002 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -435,6 +435,27 @@
     - filename: L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf
       sha256: 9724a6364a42caa3d5a1687258eb329c9af6cbb2ce01c8dd556c1a222a2e0352
       uri: huggingface://bartowski/L3.3-70B-Magnum-v4-SE-GGUF/L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "l3.3-prikol-70b-v0.2"
+  icon: https://files.catbox.moe/x9t3zo.png
+  urls:
+    - https://huggingface.co/Nohobby/L3.3-Prikol-70B-v0.2
+    - https://huggingface.co/bartowski/L3.3-Prikol-70B-v0.2-GGUF
+  description: |
+    A merge of some Llama 3.3 models because um uh yeah
+
+    Went extra schizo on the recipe, hoping for an extra fun result, and... Well, I guess it's an overall improvement over the previous revision. It's a tiny bit smarter, has even more distinct swipes and nice dialogues, but for some reason it's damn sloppy.
+
+    I've published the second step of this merge as a separate model, and I'd say the results are more interesting, but not as usable as this one. https://huggingface.co/Nohobby/AbominationSnowPig
+
+    Prompt format: Llama3 OR Llama3 Context and ChatML Instruct. It actually works a bit better this way
+  overrides:
+    parameters:
+      model: L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
+  files:
+    - filename: L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
+      sha256: fc0ff514efbc0b67981c2bf1423d5a2e1b8801e4266ba0c653ea148414fe5ffc
+      uri: huggingface://bartowski/L3.3-Prikol-70B-v0.2-GGUF/L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
 - &rwkv
   url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
   name: "rwkv-6-world-7b"

From a396040886fb5e2e13dee72811605956c7506ebc Mon Sep 17 00:00:00 2001
From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com>
Date: Mon, 20 Jan 2025 16:13:19 +0100
Subject: [PATCH 11/29] chore(model gallery): remove dead icons and update
 LLAVA and DeepSeek ones (#4645)

* chore(model gallery): update icons and add LLAVA ones

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): fix all complains related to yamllint

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

---------

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
---
 gallery/index.yaml | 69 +++++++++++++++++++++-------------------------
 1 file changed, 31 insertions(+), 38 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 679ab002..30687062 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -819,7 +819,7 @@
     - filename: salamandra-7b-instruct.Q4_K_M-f32.gguf
       sha256: bac8e8c1d1d9d53cbdb148b8ff9ad378ddb392429207099e85b5aae3a43bff3d
       uri: huggingface://cstr/salamandra-7b-instruct-GGUF/salamandra-7b-instruct.Q4_K_M-f32.gguf
-- &llama32 ## llama3.2
+- &llama32  ## llama3.2
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
   icon: https://avatars.githubusercontent.com/u/153379578
   license: llama3.2
@@ -1354,7 +1354,7 @@
     - filename: FineMath-Llama-3B-Q4_K_M.gguf
       sha256: 16c73b5cf2a417a7e1608bcc9469f1461fc3e759ce04a3a337f48df977dc158c
       uri: huggingface://bartowski/FineMath-Llama-3B-GGUF/FineMath-Llama-3B-Q4_K_M.gguf
-- &qwen25 ## Qwen2.5
+- &qwen25  ## Qwen2.5
   name: "qwen2.5-14b-instruct"
   icon: https://avatars.githubusercontent.com/u/141221163
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
@@ -2181,7 +2181,6 @@
       sha256: 42cf7a96784dc8f25c61c2404620c3e6548a024caa8dff6e435d7c86400d7ab8
       uri: huggingface://mradermacher/Qwen2.5-7B-nerd-uncensored-v1.7-GGUF/Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf
 - !!merge <<: *qwen25
-  icon: https://i.imgur.com/OxX2Usi.png
   name: "evathene-v1.0"
   urls:
     - https://huggingface.co/sophosympatheia/Evathene-v1.0
@@ -2540,7 +2539,6 @@
       sha256: 91907f29746625a62885793475956220b81d8a5a34b53686a1acd1d03fd403ea
       uri: huggingface://bartowski/72B-Qwen2.5-Kunou-v1-GGUF/72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf
 - !!merge <<: *qwen25
-  icon: https://i.imgur.com/OxX2Usi.png
   name: "evathene-v1.3"
   urls:
     - https://huggingface.co/sophosympatheia/Evathene-v1.3
@@ -3276,7 +3274,7 @@
     - filename: DRT-o1-14B-Q4_K_M.gguf
       sha256: 9619ca984cf4ce8e4f69bcde831de17b2ce05dd89536e3130608877521e3d328
       uri: huggingface://bartowski/DRT-o1-14B-GGUF/DRT-o1-14B-Q4_K_M.gguf
-- &smollm ## SmolLM
+- &smollm  ## SmolLM
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "smollm-1.7b-instruct"
   icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png
@@ -3334,7 +3332,7 @@
     - filename: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
       sha256: eaeac314e30b461413bc1cc819cdc0cd6a79265711fd0b8268702960a082c7bd
       uri: huggingface://QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF/Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
-- &llama31 ## LLama3.1
+- &llama31  ## LLama3.1
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
   icon: https://avatars.githubusercontent.com/u/153379578
   name: "meta-llama-3.1-8b-instruct"
@@ -4485,7 +4483,6 @@
       sha256: 27b10c3ca4507e8bf7d305d60e5313b54ef5fffdb43a03f36223d19d906e39f3
       uri: huggingface://mradermacher/L3.1-70Blivion-v0.1-rc1-70B-i1-GGUF/L3.1-70Blivion-v0.1-rc1-70B.i1-Q4_K_M.gguf
 - !!merge <<: *llama31
-  icon: https://i.imgur.com/sdN0Aqg.jpeg
   name: "llama-3.1-hawkish-8b"
   urls:
     - https://huggingface.co/mukaj/Llama-3.1-Hawkish-8B
@@ -5222,10 +5219,10 @@
     - filename: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
       sha256: 268390e07edd407ad93ea21a868b7ae995b5950e01cad0db9e1802ae5049d405
       uri: huggingface://bartowski/Dolphin3.0-Llama3.1-8B-GGUF/Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
-- &deepseek ## Deepseek
+- &deepseek  ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
   name: "deepseek-coder-v2-lite-instruct"
-  icon: "https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true"
+  icon: "https://avatars.githubusercontent.com/u/148330874"
   license: deepseek
   description: |
     DeepSeek-Coder-V2, an open-source Mixture-of-Experts (MoE) code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks. Specifically, DeepSeek-Coder-V2 is further pre-trained from DeepSeek-Coder-V2-Base with 6 trillion tokens sourced from a high-quality and multi-source corpus. Through this continued pre-training, DeepSeek-Coder-V2 substantially enhances the coding and mathematical reasoning capabilities of DeepSeek-Coder-V2-Base, while maintaining comparable performance in general language tasks. Compared to DeepSeek-Coder, DeepSeek-Coder-V2 demonstrates significant advancements in various aspects of code-related tasks, as well as reasoning and general capabilities. Additionally, DeepSeek-Coder-V2 expands its support for programming languages from 86 to 338, while extending the context length from 16K to 128K.
@@ -5287,7 +5284,7 @@
     - filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
       sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
       uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
-- &qwen2 ## Start QWEN2
+- &qwen2  ## Start QWEN2
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "qwen2-7b-instruct"
   icon: https://avatars.githubusercontent.com/u/141221163
@@ -5647,7 +5644,7 @@
     - filename: minicpm-v-2_6-mmproj-f16.gguf
       sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0
       uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
-- &mistral03 ## START Mistral
+- &mistral03  ## START Mistral
   url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
   name: "mistral-7b-instruct-v0.3"
   icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png
@@ -6155,7 +6152,6 @@
 - !!merge <<: *mistral03
   name: "mn-12b-mag-mell-r1-iq-arm-imatrix"
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
-  icon: "https://i.imgur.com/wjyAaTO.png"
   urls:
     - https://huggingface.co/inflatebot/MN-12B-Mag-Mell-R1
     - https://huggingface.co/Lewdiculous/MN-12B-Mag-Mell-R1-GGUF-IQ-ARM-Imatrix
@@ -6280,7 +6276,7 @@
     - filename: Wayfarer-12B-Q4_K_M.gguf
       sha256: 6cd9f290c820c64854fcdcfd312b066447acc2f63abe2e2e71af9bc4f1946c08
       uri: huggingface://bartowski/Wayfarer-12B-GGUF/Wayfarer-12B-Q4_K_M.gguf
-- &mudler ### START mudler's LocalAI specific-models
+- &mudler  ### START mudler's LocalAI specific-models
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
   name: "LocalAI-llama3-8b-function-call-v0.2"
   icon: "https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/us5JKi9z046p8K-cn_M0w.webp"
@@ -6325,7 +6321,7 @@
     - filename: Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
       sha256: 579cbb229f9c11d0330759ff4733102d2491615a4c61289e26c09d1b3a583fec
       uri: huggingface://mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1-GGUF/Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
-- &parler-tts ### START parler-tts
+- &parler-tts  ### START parler-tts
   url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master"
   name: parler-tts-mini-v0.1
   overrides:
@@ -6342,7 +6338,7 @@
     - cpu
     - text-to-speech
     - python
-- &rerankers ### START rerankers
+- &rerankers  ### START rerankers
   url: "github:mudler/LocalAI/gallery/rerankers.yaml@master"
   name: cross-encoder
   parameters:
@@ -7265,10 +7261,9 @@
   name: "l3-8b-stheno-v3.1"
   urls:
     - https://huggingface.co/Sao10K/L3-8B-Stheno-v3.1
-  icon: https://w.forfun.com/fetch/cb/cba2205390e517bea1ea60ca0b491af4.jpeg
   description: |
     - A model made for 1-on-1 Roleplay ideally, but one that is able to handle scenarios, RPGs and storywriting fine.
-    - Uncensored during actual roleplay scenarios. # I do not care for zero-shot prompting like what some people do. It is uncensored enough in actual usecases.
+    - Uncensored during actual roleplay scenarios.  # I do not care for zero-shot prompting like what some people do. It is uncensored enough in actual usecases.
     - I quite like the prose and style for this model.
   overrides:
     parameters:
@@ -8059,7 +8054,6 @@
   urls:
     - https://huggingface.co/bartowski/New-Dawn-Llama-3-70B-32K-v1.0-GGUF
     - https://huggingface.co/sophosympatheia/New-Dawn-Llama-3-70B-32K-v1.0
-  icon: https://imgur.com/tKzncGo.png
   description: |
     This model is a multi-level SLERP merge of several Llama 3 70B variants. See the merge recipe below for details. I extended the context window for this model out to 32K by snagging some layers from abacusai/Smaug-Llama-3-70B-Instruct-32K using a technique similar to what I used for Midnight Miqu, which was further honed by jukofyork.
     This model is uncensored. You are responsible for whatever you do with it.
@@ -8411,7 +8405,8 @@
     - filename: dolphin-2.9.2-Phi-3-Medium-abliterated-Q4_K_M.gguf
       sha256: 566331c2efe87725310aacb709ca15088a0063fa0ddc14a345bf20d69982156b
       uri: huggingface://bartowski/dolphin-2.9.2-Phi-3-Medium-abliterated-GGUF/dolphin-2.9.2-Phi-3-Medium-abliterated-Q4_K_M.gguf
-- url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+- !!merge <<: *llama3
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "llama-3-8b-instruct-dpo-v0.3-32k"
   license: llama3
   urls:
@@ -8595,7 +8590,7 @@
     - filename: Copus-2x8B.i1-Q4_K_M.gguf
       sha256: 685da1ba49e203e8f491105585143d76044286d4b4687bed37d325f6b55501e5
       uri: huggingface://mradermacher/Copus-2x8B-i1-GGUF/Copus-2x8B.i1-Q4_K_M.gguf
-- &yi-chat ### Start Yi
+- &yi-chat  ### Start Yi
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   icon: "https://github.com/01-ai/Yi/raw/main/assets/img/Yi_logo_icon_light.svg"
   name: "yi-1.5-9b-chat"
@@ -8806,7 +8801,7 @@
     - filename: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
       sha256: 3f309b59508342536a70edd6c4be6cf4f2cb97f2e32cbc79ad2ab3f4c02933a4
       uri: huggingface://Lewdiculous/Fimbulvetr-11B-v2-GGUF-IQ-Imatrix/Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
-- &noromaid ### Start noromaid
+- &noromaid  ### Start noromaid
   url: "github:mudler/LocalAI/gallery/noromaid.yaml@master"
   name: "noromaid-13b-0.4-DPO"
   icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png
@@ -8826,7 +8821,7 @@
     - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf
       sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1
       uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf
-- &wizardlm2 ### START Vicuna based
+- &wizardlm2  ### START Vicuna based
   url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master"
   name: "wizardlm2-7b"
   description: |
@@ -8881,7 +8876,9 @@
     - filename: moondream2-mmproj-f16.gguf
       sha256: 4cc1cb3660d87ff56432ebeb7884ad35d67c48c7b9f6b2856f305e39c38eed8f
       uri: huggingface://moondream/moondream2-gguf/moondream2-mmproj-f16.gguf
-- &llava ### START LLaVa
+- &llava  ### START LLaVa
+  name: "llava-1.6-vicuna"
+  icon: https://github.com/lobehub/lobe-icons/raw/master/packages/static-png/dark/llava-color.png
   url: "github:mudler/LocalAI/gallery/llava.yaml@master"
   license: apache-2.0
   description: |
@@ -8895,7 +8892,6 @@
     - gpu
     - llama2
     - cpu
-  name: "llava-1.6-vicuna"
   overrides:
     mmproj: mmproj-vicuna7b-f16.gguf
     parameters:
@@ -9363,7 +9359,6 @@
     June 18, 2024 Update, After extensive testing of the intermediate checkpoints, significant progress has been made.
     The model is slowly — I mean, really slowly — unlearning its alignment. By significantly lowering the learning rate, I was able to visibly observe deep behavioral changes, this process is taking longer than anticipated, but it's going to be worth it. Estimated time to completion: 4 more days.. I'm pleased to report that in several tests, the model not only maintained its intelligence but actually showed a slight improvement, especially in terms of common sense. An intermediate checkpoint of this model was used to create invisietch/EtherealRainbow-v0.3-rc7, with promising results. Currently, it seems like I'm on the right track. I hope this model will serve as a solid foundation for further merges, whether for role-playing (RP) or for uncensoring. This approach also allows us to save on actual fine-tuning, thereby reducing our carbon footprint. The merge process takes just a few minutes of CPU time, instead of days of GPU work.
     June 20, 2024 Update, Unaligning was partially successful, and the results are decent, but I am not fully satisfied. I decided to bite the bullet, and do a full finetune, god have mercy on my GPUs. I am also releasing the intermediate checkpoint of this model.
-  icon: https://i.imgur.com/Kpk1PgZ.png
   overrides:
     parameters:
       model: LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf
@@ -9389,7 +9384,6 @@
       uri: huggingface://bartowski/L3-8B-Lunaris-v1-GGUF/L3-8B-Lunaris-v1-Q4_K_M.gguf
 - !!merge <<: *llama3
   name: "llama-3_8b_unaligned_alpha_rp_soup-i1"
-  icon: https://i.imgur.com/pXcjpoV.png
   urls:
     - https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha_RP_Soup
     - https://huggingface.co/mradermacher/LLAMA-3_8B_Unaligned_Alpha_RP_Soup-i1-GGUF
@@ -9739,7 +9733,7 @@
     - filename: Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf
       sha256: cdc0f4de6df2ba120835fbd25c2a0ae2af8548f46d2c40c7a018c51c3d19e0c0
       uri: huggingface://mradermacher/Freyja-v4.95-maldv-7b-NON-FICTION-i1-GGUF/Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf
-- &chatml ### ChatML
+- &chatml  ### ChatML
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "una-thepitbull-21.4b-v2"
   license: afl-3.0
@@ -9787,7 +9781,6 @@
       sha256: 9c90f3a65332a03a6cbb563eee19c7586d9544f646ff9f33f7f1904b3d415ae2
       uri: huggingface://nold/HelpingAI-9B-GGUF/HelpingAI-9B_Q4_K_M.gguf
 - url: "github:mudler/LocalAI/gallery/chatml-hercules.yaml@master"
-  icon: "https://tse3.mm.bing.net/th/id/OIG1.vnrl3xpEcypR3McLW63q?pid=ImgGn"
   urls:
     - https://huggingface.co/Locutusque/Llama-3-Hercules-5.0-8B
     - https://huggingface.co/bartowski/Llama-3-Hercules-5.0-8B-GGUF
@@ -10025,7 +10018,7 @@
     - filename: Triangulum-10B.Q4_K_M.gguf
       sha256: dd071f99edf6b166044bf229cdeec19419c4c348e3fc3d6587cfcc55e6fb85fa
       uri: huggingface://mradermacher/Triangulum-10B-GGUF/Triangulum-10B.Q4_K_M.gguf
-- &command-R ### START Command-r
+- &command-R  ### START Command-r
   url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
   name: "command-r-v01:q1_s"
   license: "cc-by-nc-4.0"
@@ -10080,7 +10073,7 @@
     - filename: "aya-23-35B-Q4_K_M.gguf"
       sha256: "57824768c1a945e21e028c8e9a29b39adb4838d489f5865c82601ab9ad98065d"
       uri: "huggingface://bartowski/aya-23-35B-GGUF/aya-23-35B-Q4_K_M.gguf"
-- &phi-2-chat ### START Phi-2
+- &phi-2-chat  ### START Phi-2
   url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master"
   license: mit
   description: |
@@ -10202,7 +10195,7 @@
     - filename: internlm3-8b-instruct-Q4_K_M.gguf
       uri: huggingface://bartowski/internlm3-8b-instruct-GGUF/internlm3-8b-instruct-Q4_K_M.gguf
       sha256: 2a9644687318e8659c9cf9b40730d5cc2f5af06f786a50439c7c51359b23896e
-- &phi-3 ### START Phi-3
+- &phi-3  ### START Phi-3
   url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master"
   name: "phi-3-mini-4k-instruct"
   icon: https://avatars.githubusercontent.com/u/6154722
@@ -10402,7 +10395,7 @@
     - filename: Phi-3.5-MoE-instruct-Q4_K_M.gguf
       sha256: 43e91bb720869bd8a92d8eb86bc3c74a52c49cf61642ca709b3d7bb89644df36
       uri: huggingface://bartowski/Phi-3.5-MoE-instruct-GGUF/Phi-3.5-MoE-instruct-Q4_K_M.gguf
-- &hermes-2-pro-mistral ### START Hermes
+- &hermes-2-pro-mistral  ### START Hermes
   url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
   name: "hermes-2-pro-mistral"
   icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png
@@ -10738,7 +10731,7 @@
     - filename: "galatolo-Q4_K.gguf"
       sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172"
       uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf"
-- &codellama ### START Codellama
+- &codellama  ### START Codellama
   url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
   name: "codellama-7b"
   license: llama2
@@ -10869,7 +10862,7 @@
     - filename: "llm-compiler-7b-ftd.Q4_K.gguf"
       uri: "huggingface://legraphista/llm-compiler-7b-ftd-IMat-GGUF/llm-compiler-7b-ftd.Q4_K.gguf"
       sha256: d862dd18ed335413787d0ad196522a9902a3c10a6456afdab8721822cb0ddde8
-- &openvino ### START OpenVINO
+- &openvino  ### START OpenVINO
   url: "github:mudler/LocalAI/gallery/openvino.yaml@master"
   name: "openvino-llama-3-8b-instruct-ov-int8"
   license: llama3
@@ -10983,7 +10976,7 @@
     - gpu
     - embedding
     - cpu
-- &sentencentransformers ### START Embeddings
+- &sentencentransformers  ### START Embeddings
   description: |
     This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity.
   urls:
@@ -10998,7 +10991,7 @@
   overrides:
     parameters:
       model: all-MiniLM-L6-v2
-- &dreamshaper ### START Image generation
+- &dreamshaper  ### START Image generation
   name: dreamshaper
   icon: https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dd9b038c-bd15-43ab-86ab-66e145ad7ff2/width=450/26072158-132340247-8k%20portrait%20of%20beautiful%20cyborg%20with%20brown%20hair,%20intricate,%20elegant,%20highly%20detailed,%20majestic,%20digital%20photography,%20art%20by%20artg_ed.jpeg
   license: other
@@ -11110,7 +11103,7 @@
     - filename: t5xxl_fp16.safetensors
       sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635
       uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors
-- &whisper ## Whisper
+- &whisper  ## Whisper
   url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
   name: "whisper-1"
   license: "MIT"
@@ -11290,7 +11283,7 @@
   description: |
     Stable Diffusion in NCNN with c++, supported txt2img and img2img
   name: stablediffusion-cpp
-- &piper ## Piper TTS
+- &piper  ## Piper TTS
   url: github:mudler/LocalAI/gallery/piper.yaml@master
   name: voice-en-us-kathleen-low
   icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png

From 2f09aa1b850535d2cb820a49c19c9159867c1f0b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 20 Jan 2025 19:04:23 +0100
Subject: [PATCH 12/29] chore(model gallery): add sd-3.5-large-ggml (#4647)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml   | 30 ++++++++++++++++++++++++++++++
 gallery/sd-ggml.yaml | 12 ++++++++++++
 2 files changed, 42 insertions(+)
 create mode 100644 gallery/sd-ggml.yaml

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 30687062..bcb7866a 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -11028,6 +11028,36 @@
     - sd-3
     - gpu
   url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master"
+- name: sd-3.5-large-ggml
+  license: stabilityai-ai-community
+  url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
+  description: |
+    Stable Diffusion 3.5 Large is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
+  urls:
+    - https://huggingface.co/stabilityai/stable-diffusion-3.5-large
+    - https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF
+  tags:
+    - text-to-image
+    - flux
+    - gpu
+    - cpu
+  icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-large/media/main/sd3.5_large_demo.png
+  overrides:
+    parameters:
+      model: sd3.5_large-Q4_0.gguf
+  files:
+    - filename: "sd3.5_large-Q4_0.gguf"
+      sha256: "c79ed6cdaa7decaca6b05ccc636b956b37c47de9b104c56315ca8ed086347b00"
+      uri: "huggingface://second-state/stable-diffusion-3.5-large-GGUF/sd3.5_large-Q4_0.gguf"
+    - filename: clip_g.safetensors
+      sha256: ec310df2af79c318e24d20511b601a591ca8cd4f1fce1d8dff822a356bcdb1f4
+      uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/clip_g.safetensors
+    - filename: clip_l.safetensors
+      sha256: 660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
+      uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/clip_l.safetensors
+    - filename: t5xxl-Q5_0.gguf
+      sha256: f4df16c641a05c4a6ca717068ba3ee312875000f6fac0efbd152915553b5fc3e
+      uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/t5xxl-Q5_0.gguf
 - &flux
   name: flux.1-dev
   license: flux-1-dev-non-commercial-license
diff --git a/gallery/sd-ggml.yaml b/gallery/sd-ggml.yaml
new file mode 100644
index 00000000..d819eba8
--- /dev/null
+++ b/gallery/sd-ggml.yaml
@@ -0,0 +1,12 @@
+---
+name: "sd-ggml"
+
+config_file: |
+    backend: stablediffusion-ggml
+    step: 25
+    cfg_scale: 4.5
+    options:
+    - "clip_l_path:clip_l.safetensors"
+    - "clip_g_path:clip_g.safetensors"
+    - "t5xxl_path:t5xxl-Q5_0.gguf"
+    - "sampler:euler"

From 14a1e02f4478cef20d723f9fa91f0645c856b7c8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 20 Jan 2025 23:33:40 +0000
Subject: [PATCH 13/29] chore(deps): Bump docs/themes/hugo-theme-relearn from
 `80e448e` to `8dad5ee` (#4656)

chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `80e448e` to `8dad5ee`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](https://github.com/McShelby/hugo-theme-relearn/compare/80e448e5bdaa92c87ee0d0d86f1125c8606ebf5f...8dad5ee419e5bb2a0b380aa72d7a7389af4945f6)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 docs/themes/hugo-theme-relearn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
index 80e448e5..8dad5ee4 160000
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
@@ -1 +1 @@
-Subproject commit 80e448e5bdaa92c87ee0d0d86f1125c8606ebf5f
+Subproject commit 8dad5ee419e5bb2a0b380aa72d7a7389af4945f6

From 1a08948e63ce48dd32524cf4f7df88e6b69e639d Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 21 Jan 2025 08:37:13 +0100
Subject: [PATCH 14/29] chore: :arrow_up: Update ggerganov/llama.cpp to
 `aea8ddd5165d525a449e2fc3839db77a71f4a318` (#4657)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 7aaad492..53e5af7e 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=92bc493917d43b83e592349e138b54c90b1c3ea7
+CPPLLAMA_VERSION?=aea8ddd5165d525a449e2fc3839db77a71f4a318
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From b264a91b3f24ed8b2ec4c3161a8405be4e7019ad Mon Sep 17 00:00:00 2001
From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com>
Date: Tue, 21 Jan 2025 10:37:05 +0100
Subject: [PATCH 15/29] chore(model gallery): add Deepseek-R1-Distill models
 (#4646)

* chore(model gallery): add Deepseek-R1-Distill-Llama-8b

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add Deepseek-R1-Distill-Qwen-1.5b

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

---------

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
---
 gallery/index.yaml | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index bcb7866a..126bd14a 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -2696,6 +2696,23 @@
     - filename: Qwentile2.5-32B-Instruct-Q4_K_M.gguf
       sha256: e476d6e3c15c78fc3f986d7ae8fa35c16116843827f2e6243c05767cef2f3615
       uri: huggingface://bartowski/Qwentile2.5-32B-Instruct-GGUF/Qwentile2.5-32B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "deepseek-r1-distill-qwen-1.5b"
+  icon: "https://avatars.githubusercontent.com/u/148330874"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5b
+    - https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF
+  description: |
+    DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
+    Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
+    By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
+  overrides:
+    parameters:
+      model: deepseek-r1-distill-qwen-1.5b-Q4_K_M.gguf
+  files:
+    - filename: deepseek-r1-distill-qwen-1.5b-Q4_K_M.gguf
+      sha256: c2c43b6018cf7700ce0ddee8807deb1a9a26758ef878232f3a142d16df81f0fe
+      uri: huggingface://unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
 - &archfunct
   license: apache-2.0
   tags:
@@ -5219,6 +5236,23 @@
     - filename: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
       sha256: 268390e07edd407ad93ea21a868b7ae995b5950e01cad0db9e1802ae5049d405
       uri: huggingface://bartowski/Dolphin3.0-Llama3.1-8B-GGUF/Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "deepseek-r1-distill-llama-8b"
+  icon: "https://avatars.githubusercontent.com/u/148330874"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+    - https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF
+  description: |
+    DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
+    Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
+    By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
+  overrides:
+    parameters:
+      model: deepseek-r1-distill-llama-8b-Q4_K_M.gguf
+  files:
+    - filename: deepseek-r1-distill-llama-8b-Q4_K_M.gguf
+      sha256: f8eba201522ab44b79bc54166126bfaf836111ff4cbf2d13c59c3b57da10573b
+      uri: huggingface://unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
 - &deepseek  ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
   name: "deepseek-coder-v2-lite-instruct"

From 6831719e1e74f5ed0f58c40999bce9a8f4066959 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 21 Jan 2025 15:09:36 +0100
Subject: [PATCH 16/29] chore(model gallery): add deepseek-r1-distill-qwen-7b
 (#4660)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 126bd14a..c56e37b1 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -2713,6 +2713,22 @@
     - filename: deepseek-r1-distill-qwen-1.5b-Q4_K_M.gguf
       sha256: c2c43b6018cf7700ce0ddee8807deb1a9a26758ef878232f3a142d16df81f0fe
       uri: huggingface://unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "deepseek-r1-distill-qwen-7b"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF
+  description: |
+    DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
+    Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
+    By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
+      sha256: 731ece8d06dc7eda6f6572997feb9ee1258db0784827e642909d9b565641937b
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
 - &archfunct
   license: apache-2.0
   tags:

From e81ceff6812c43c401c110eafbcc140747266ea2 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 21 Jan 2025 23:04:29 +0100
Subject: [PATCH 17/29] chore: :arrow_up: Update ggerganov/llama.cpp to
 `6171c9d25820ccf676b243c172868819d882848f` (#4661)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 53e5af7e..44959fd3 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=aea8ddd5165d525a449e2fc3839db77a71f4a318
+CPPLLAMA_VERSION?=6171c9d25820ccf676b243c172868819d882848f
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From 0ec25b8b0743416a7ddd6f66f09dc1d1dd7fe07f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 22 Jan 2025 16:37:20 +0100
Subject: [PATCH 18/29] chore(model gallery): add sd-1.5-ggml and
 sd-3.5-medium-ggml (#4664)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 58 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index c56e37b1..4ce19bb4 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -11078,6 +11078,62 @@
     - sd-3
     - gpu
   url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master"
+- name: sd-1.5-ggml
+  license: creativeml-openrail-m
+  url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
+  description: |
+    Stable Diffusion 1.5
+  urls:
+    - https://huggingface.co/second-state/stable-diffusion-v1-5-GGUF
+  tags:
+    - text-to-image
+    - stablediffusion
+    - gpu
+    - cpu
+  overrides:
+    options:
+      - "sampler:euler"
+    parameters:
+      model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
+  files:
+    - filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
+      sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
+      uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
+- name: sd-3.5-medium-ggml
+  license: stabilityai-ai-community
+  url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
+  description: |
+    Stable Diffusion 3.5 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
+  urls:
+    - https://huggingface.co/stabilityai/stable-diffusion-3.5-medium
+    - https://huggingface.co/second-state/stable-diffusion-3.5-medium-GGUF
+  tags:
+    - text-to-image
+    - stablediffusion
+    - gpu
+    - cpu
+  icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-medium/media/main/sd3.5_medium_demo.jpg
+  overrides:
+    options:
+      - "clip_l_path:clip_l-Q4_0.gguf"
+      - "clip_g_path:clip_g-Q4_0.gguf"
+      - "t5xxl_path:t5xxl-Q4_0.gguf"
+      - "sampler:euler"
+    parameters:
+      model: sd3.5_medium-Q4_0.gguf
+  files:
+    - filename: "sd3.5_medium-Q4_0.gguf"
+      sha256: "3bb8c5e9ab0a841117089ed4ed81d885bb85161df2a766b812f829bc55b31adf"
+      uri: "huggingface://second-state/stable-diffusion-3.5-medium-GGUF/sd3.5_medium-Q4_0.gguf"
+    - filename: clip_g-Q4_0.gguf
+      sha256: c142411147e16b7c4b9cc1f5d977cbe596104435d76fde47172d3d35c5e58bb8
+      uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_g-Q4_0.gguf
+    - filename: clip_l-Q4_0.gguf
+      sha256: f5ad88ae2ac924eb4ac0298b77afa304b5e6014fc0c4128f0e3df40fdfcc0f8a
+      uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_l-Q4_0.gguf
+    - filename: t5xxl-Q4_0.gguf
+      sha256: 987ba47c158b890c274f78fd35324419f50941e846a49789f0977e9fe9d97ab7
+      uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/t5xxl-Q4_0.gguf
 - name: sd-3.5-large-ggml
   license: stabilityai-ai-community
   url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
@@ -11088,7 +11144,7 @@
     - https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF
   tags:
     - text-to-image
-    - flux
+    - stablediffusion
     - gpu
     - cpu
   icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-large/media/main/sd3.5_large_demo.png

From 10675ac28e80e990832c650174efec0e0d006838 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 22 Jan 2025 18:07:30 +0100
Subject: [PATCH 19/29] Update README.md

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4d415d16..78267e04 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@
 </p>
 
 <p align="center">
-<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
 </p>
 
 > :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)

From e15d29aba2982d07cb2bfec9267c076d73eab2b5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 22 Jan 2025 19:34:16 +0100
Subject: [PATCH 20/29] chore(stablediffusion-ncn): drop in favor of ggml
 implementation (#4652)

* chore(stablediffusion-ncn): drop in favor of ggml implementation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(ci): drop stablediffusion build

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): add

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): try to fixup current tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Try to fix tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Tests improvements

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): use quality to specify step

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): switch to sd-1.5

also increase prep time for downloading models

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .devcontainer/docker-compose-devcontainer.yml |  2 +-
 .env                                          |  6 +-
 .github/workflows/release.yaml                | 35 +----------
 .github/workflows/test.yml                    |  6 +-
 .vscode/launch.json                           |  2 +-
 Dockerfile                                    | 38 +-----------
 Makefile                                      | 36 +----------
 aio/cpu/image-gen.yaml                        | 59 +++---------------
 backend/go/image/stablediffusion/main.go      | 21 -------
 .../image/stablediffusion/stablediffusion.go  | 33 ----------
 core/config/backend_config.go                 |  2 +-
 core/config/config_test.go                    | 61 +++++++++++++++++++
 core/http/app_test.go                         | 17 +++---
 core/http/endpoints/openai/image.go           |  6 +-
 core/http/endpoints/openai/request.go         |  9 +++
 core/schema/openai.go                         |  5 +-
 pkg/model/initializers.go                     |  9 +--
 pkg/stablediffusion/generate.go               | 35 -----------
 pkg/stablediffusion/generate_unsupported.go   | 10 ---
 pkg/stablediffusion/stablediffusion.go        | 20 ------
 tests/e2e-aio/e2e_suite_test.go               |  2 +-
 tests/e2e-aio/e2e_test.go                     | 11 ++--
 22 files changed, 123 insertions(+), 302 deletions(-)
 delete mode 100644 backend/go/image/stablediffusion/main.go
 delete mode 100644 backend/go/image/stablediffusion/stablediffusion.go
 delete mode 100644 pkg/stablediffusion/generate.go
 delete mode 100644 pkg/stablediffusion/generate_unsupported.go
 delete mode 100644 pkg/stablediffusion/stablediffusion.go

diff --git a/.devcontainer/docker-compose-devcontainer.yml b/.devcontainer/docker-compose-devcontainer.yml
index 8795d64d..7ef22099 100644
--- a/.devcontainer/docker-compose-devcontainer.yml
+++ b/.devcontainer/docker-compose-devcontainer.yml
@@ -7,7 +7,7 @@ services:
       args:
       - FFMPEG=true
       - IMAGE_TYPE=extras
-      - GO_TAGS=stablediffusion p2p tts
+      - GO_TAGS=p2p tts
     env_file:
       - ../.env
     ports:
diff --git a/.env b/.env
index e92f7f3b..ee8db74e 100644
--- a/.env
+++ b/.env
@@ -38,12 +38,12 @@
 ## Uncomment and set to true to enable rebuilding from source
 # REBUILD=true
 
-## Enable go tags, available: stablediffusion, tts
-## stablediffusion: image generation with stablediffusion
+## Enable go tags, available: p2p, tts
+## p2p: enable distributed inferencing
 ## tts: enables text-to-speech with go-piper 
 ## (requires REBUILD=true)
 #
-# GO_TAGS=stablediffusion
+# GO_TAGS=p2p
 
 ## Path where to store generated images
 # LOCALAI_IMAGE_PATH=/tmp/generated/images
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 47a69b0f..e133ecb6 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -237,40 +237,7 @@ jobs:
           detached: true
           connect-timeout-seconds: 180
           limit-access-to-actor: true
-  build-stablediffusion:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-      - name: Build stablediffusion
-        run: |
-          export PATH=$PATH:$GOPATH/bin
-          make backend-assets/grpc/stablediffusion
-          mkdir -p release && cp backend-assets/grpc/stablediffusion release
-        env:
-          GO_TAGS: stablediffusion
-      - uses: actions/upload-artifact@v4
-        with:
-          name: stablediffusion
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
+
 
   build-macOS-x86_64:
     runs-on: macos-13
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 0ee93afa..444c89fb 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -105,9 +105,7 @@ jobs:
           # Pre-build piper before we start tests in order to have shared libraries in place
           make sources/go-piper && \
           GO_TAGS="tts" make -C sources/go-piper piper.o && \
-          sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
-          # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
-          PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
+          sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
         env:
           CUDA_VERSION: 12-4
       - name: Cache grpc
@@ -129,7 +127,7 @@ jobs:
           cd grpc && cd cmake/build && sudo make --jobs 5 install
       - name: Test
         run: |
-          PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
+          PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
       - name: Setup tmate session if tests fail
         if: ${{ failure() }}
         uses: mxschmitt/action-tmate@v3.19
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 50493421..f5e91508 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -26,7 +26,7 @@
                 "LOCALAI_P2P": "true",
                 "LOCALAI_FEDERATED": "true"
             },
-            "buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
+            "buildFlags": ["-tags", "p2p tts", "-v"],
             "envFile": "${workspaceFolder}/.env",
             "cwd": "${workspaceRoot}"
         }
diff --git a/Dockerfile b/Dockerfile
index 4ddc921d..8594c2a1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -69,14 +69,10 @@ ENV PATH=/opt/rocm/bin:${PATH}
 # OpenBLAS requirements and stable diffusion
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-        libopenblas-dev \
-        libopencv-dev && \
+        libopenblas-dev && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
-# Set up OpenCV
-RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
-
 WORKDIR /build
 
 ###################################
@@ -251,7 +247,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
 
 FROM requirements-drivers AS builder-base
 
-ARG GO_TAGS="stablediffusion tts p2p"
+ARG GO_TAGS="tts p2p"
 ARG GRPC_BACKENDS
 ARG MAKEFLAGS
 ARG LD_FLAGS="-s -w"
@@ -285,35 +281,12 @@ RUN <<EOT bash
     fi
 EOT
 
-
-###################################
-###################################
-
-# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
-# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
-FROM builder-base AS builder-sd
-
-# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
-COPY Makefile .
-COPY go.mod .
-COPY go.sum .
-COPY backend/backend.proto ./backend/backend.proto
-COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
-COPY pkg/grpc ./pkg/grpc
-COPY pkg/stablediffusion ./pkg/stablediffusion
-RUN git init
-RUN make sources/go-stable-diffusion
-RUN touch prepare-sources
-
-# Actually build the backend
-RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
-
 ###################################
 ###################################
 
 # The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
 # Adjustments to the build process should likely be made here.
-FROM builder-sd AS builder
+FROM builder-base AS builder
 
 # Install the pre-built GRPC
 COPY --from=grpc /opt/grpc /usr/local
@@ -353,8 +326,6 @@ ARG FFMPEG
 
 COPY --from=grpc /opt/grpc /usr/local
 
-COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
-
 COPY .devcontainer-scripts /.devcontainer-scripts
 
 # Add FFmpeg
@@ -427,9 +398,6 @@ COPY --from=builder /build/local-ai ./
 # Copy shared libraries for piper
 COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
 
-# do not let stablediffusion rebuild (requires an older version of absl)
-COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
-
 # Change the shell to bash so we can use [[ tests below
 SHELL ["/bin/bash", "-c"]
 # We try to strike a balance between individual layer size (as that affects total push time) and total image size
diff --git a/Makefile b/Makefile
index 44959fd3..312bfcc4 100644
--- a/Makefile
+++ b/Makefile
@@ -18,10 +18,6 @@ WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
 PIPER_REPO?=https://github.com/mudler/go-piper
 PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
 
-# stablediffusion version
-STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
-STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
-
 # bark.cpp
 BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
 BARKCPP_VERSION?=v1.0.0
@@ -179,11 +175,6 @@ ifeq ($(STATIC),true)
 	LD_FLAGS+=-linkmode external -extldflags -static
 endif
 
-ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
-#	OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
-	OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
-endif
-
 ifeq ($(findstring tts,$(GO_TAGS)),tts)
 #	OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
 #	OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
@@ -273,19 +264,6 @@ sources/go-piper:
 sources/go-piper/libpiper_binding.a: sources/go-piper
 	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
 
-## stable diffusion (onnx)
-sources/go-stable-diffusion:
-	mkdir -p sources/go-stable-diffusion
-	cd sources/go-stable-diffusion && \
-	git init && \
-	git remote add origin $(STABLEDIFFUSION_REPO) && \
-	git fetch origin && \
-	git checkout $(STABLEDIFFUSION_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
-	CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
-
 ## stablediffusion (ggml)
 sources/stablediffusion-ggml.cpp:
 	git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
@@ -331,20 +309,18 @@ sources/whisper.cpp:
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
 
-get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion backend/cpp/llama/llama.cpp
+get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
 
 replace:
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
 	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
-	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
 
 dropreplace:
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
 	$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
-	$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
 	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
 
 prepare-sources: get-sources replace
@@ -355,7 +331,6 @@ rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
 	$(MAKE) -C sources/go-llama.cpp clean
 	$(MAKE) -C sources/whisper.cpp clean
-	$(MAKE) -C sources/go-stable-diffusion clean
 	$(MAKE) -C sources/go-piper clean
 	$(MAKE) build
 
@@ -470,7 +445,7 @@ prepare-test: grpcs
 
 test: prepare test-models/testmodel.ggml grpcs
 	@echo 'Running tests'
-	export GO_TAGS="tts stablediffusion debug"
+	export GO_TAGS="tts debug"
 	$(MAKE) prepare-test
 	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
@@ -816,13 +791,6 @@ ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/piper
 endif
 
-backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/stablediffusion
-endif
-
 backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
diff --git a/aio/cpu/image-gen.yaml b/aio/cpu/image-gen.yaml
index 9de88a3f..ef374572 100644
--- a/aio/cpu/image-gen.yaml
+++ b/aio/cpu/image-gen.yaml
@@ -1,56 +1,17 @@
 name: stablediffusion
-backend: stablediffusion
+backend: stablediffusion-ggml
+cfg_scale: 4.5
+
+options:
+- sampler:euler
 parameters:
-  model: stablediffusion_assets
-
-license: "BSD-3"
-urls:
-- https://github.com/EdVince/Stable-Diffusion-NCNN
-- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
-
-description: |
-     Stable Diffusion in NCNN with c++, supported txt2img and img2img
+  model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
+step: 25
 
 download_files:
-- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
-  sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
-- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
-  sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
-- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
-  sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
-- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
-  sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
-- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
-  sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
-- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
-  sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
-- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
-  sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
-- filename: "stablediffusion_assets/log_sigmas.bin"
-  sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
-- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
-  sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
-- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
-  sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
-- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
-  sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
-- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
-  sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
-- filename: "stablediffusion_assets/vocab.txt"
-  sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
+- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
+  sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
+  uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
 
 usage: |
         curl http://localhost:8080/v1/images/generations \
diff --git a/backend/go/image/stablediffusion/main.go b/backend/go/image/stablediffusion/main.go
deleted file mode 100644
index ae259fa7..00000000
--- a/backend/go/image/stablediffusion/main.go
+++ /dev/null
@@ -1,21 +0,0 @@
-package main
-
-// Note: this is started internally by LocalAI and a server is allocated for each model
-
-import (
-	"flag"
-
-	grpc "github.com/mudler/LocalAI/pkg/grpc"
-)
-
-var (
-	addr = flag.String("addr", "localhost:50051", "the address to connect to")
-)
-
-func main() {
-	flag.Parse()
-
-	if err := grpc.StartServer(*addr, &Image{}); err != nil {
-		panic(err)
-	}
-}
diff --git a/backend/go/image/stablediffusion/stablediffusion.go b/backend/go/image/stablediffusion/stablediffusion.go
deleted file mode 100644
index 1733bf99..00000000
--- a/backend/go/image/stablediffusion/stablediffusion.go
+++ /dev/null
@@ -1,33 +0,0 @@
-package main
-
-// This is a wrapper to statisfy the GRPC service interface
-// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
-import (
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-	"github.com/mudler/LocalAI/pkg/stablediffusion"
-)
-
-type Image struct {
-	base.SingleThread
-	stablediffusion *stablediffusion.StableDiffusion
-}
-
-func (image *Image) Load(opts *pb.ModelOptions) error {
-	var err error
-	// Note: the Model here is a path to a directory containing the model files
-	image.stablediffusion, err = stablediffusion.New(opts.ModelFile)
-	return err
-}
-
-func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
-	return image.stablediffusion.GenerateImage(
-		int(opts.Height),
-		int(opts.Width),
-		int(opts.Mode),
-		int(opts.Step),
-		int(opts.Seed),
-		opts.PositivePrompt,
-		opts.NegativePrompt,
-		opts.Dst)
-}
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index a488f2a0..8ce93d9f 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -515,7 +515,7 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
 		}
 	}
 	if (u & FLAG_IMAGE) == FLAG_IMAGE {
-		imageBackends := []string{"diffusers", "stablediffusion"}
+		imageBackends := []string{"diffusers", "stablediffusion", "stablediffusion-ggml"}
 		if !slices.Contains(imageBackends, c.Backend) {
 			return false
 		}
diff --git a/core/config/config_test.go b/core/config/config_test.go
index 5122c907..85f18eae 100644
--- a/core/config/config_test.go
+++ b/core/config/config_test.go
@@ -48,5 +48,66 @@ var _ = Describe("Test cases for config related functions", func() {
 			// config should includes whisper-1 models's api.config
 			Expect(loadedModelNames).To(ContainElements("whisper-1"))
 		})
+
+		It("Test new loadconfig", func() {
+
+			bcl := NewBackendConfigLoader(os.Getenv("MODELS_PATH"))
+			err := bcl.LoadBackendConfigsFromPath(os.Getenv("MODELS_PATH"))
+			Expect(err).To(BeNil())
+			configs := bcl.GetAllBackendConfigs()
+			loadedModelNames := []string{}
+			for _, v := range configs {
+				loadedModelNames = append(loadedModelNames, v.Name)
+			}
+			Expect(configs).ToNot(BeNil())
+			totalModels := len(loadedModelNames)
+
+			Expect(loadedModelNames).To(ContainElements("code-search-ada-code-001"))
+
+			// config should includes text-embedding-ada-002 models's api.config
+			Expect(loadedModelNames).To(ContainElements("text-embedding-ada-002"))
+
+			// config should includes rwkv_test models's api.config
+			Expect(loadedModelNames).To(ContainElements("rwkv_test"))
+
+			// config should includes whisper-1 models's api.config
+			Expect(loadedModelNames).To(ContainElements("whisper-1"))
+
+			// create a temp directory and store a temporary model
+			tmpdir, err := os.MkdirTemp("", "test")
+			Expect(err).ToNot(HaveOccurred())
+			defer os.RemoveAll(tmpdir)
+
+			// create a temporary model
+			model := `name: "test-model"
+description: "test model"
+options:
+- foo
+- bar
+- baz
+`
+			modelFile := tmpdir + "/test-model.yaml"
+			err = os.WriteFile(modelFile, []byte(model), 0644)
+			Expect(err).ToNot(HaveOccurred())
+
+			err = bcl.LoadBackendConfigsFromPath(tmpdir)
+			Expect(err).ToNot(HaveOccurred())
+
+			configs = bcl.GetAllBackendConfigs()
+			Expect(len(configs)).ToNot(Equal(totalModels))
+
+			loadedModelNames = []string{}
+			var testModel BackendConfig
+			for _, v := range configs {
+				loadedModelNames = append(loadedModelNames, v.Name)
+				if v.Name == "test-model" {
+					testModel = v
+				}
+			}
+			Expect(loadedModelNames).To(ContainElements("test-model"))
+			Expect(testModel.Description).To(Equal("test model"))
+			Expect(testModel.Options).To(ContainElements("foo", "bar", "baz"))
+
+		})
 	})
 })
diff --git a/core/http/app_test.go b/core/http/app_test.go
index a2e2f758..f57a3ea7 100644
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -687,6 +687,10 @@ var _ = Describe("API test", func() {
 					Name: "model-gallery",
 					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml",
 				},
+				{
+					Name: "localai",
+					URL:  "https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/gallery/index.yaml",
+				},
 			}
 
 			application, err := application.New(
@@ -764,10 +768,8 @@ var _ = Describe("API test", func() {
 			}
 
 			response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-				ID: "model-gallery@stablediffusion",
-				Overrides: map[string]interface{}{
-					"parameters": map[string]interface{}{"model": "stablediffusion_assets"},
-				},
+				ID:   "localai@sd-1.5-ggml",
+				Name: "stablediffusion",
 			})
 
 			Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
@@ -778,14 +780,14 @@ var _ = Describe("API test", func() {
 				response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
 				fmt.Println(response)
 				return response["processed"].(bool)
-			}, "360s", "10s").Should(Equal(true))
+			}, "1200s", "10s").Should(Equal(true))
 
 			resp, err := http.Post(
 				"http://127.0.0.1:9090/v1/images/generations",
 				"application/json",
 				bytes.NewBuffer([]byte(`{
-					 			"prompt": "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text",
-								"mode": 2,  "seed":9000,
+					 			"prompt": "a lovely cat",
+								"step": 1,  "seed":9000,
 					 			"size": "256x256", "n":2}`)))
 			// The response should contain an URL
 			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
@@ -794,6 +796,7 @@ var _ = Describe("API test", func() {
 
 			imgUrlResp := &schema.OpenAIResponse{}
 			err = json.Unmarshal(dat, imgUrlResp)
+			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(dat))
 			Expect(imgUrlResp.Data).ToNot(Or(BeNil(), BeZero()))
 			imgUrl := imgUrlResp.Data[0].URL
 			Expect(imgUrl).To(ContainSubstring("http://127.0.0.1:9090/"), imgUrl)
diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go
index baaecd4e..bd3f0987 100644
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -72,7 +72,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
 		}
 
 		if m == "" {
-			m = model.StableDiffusionBackend
+			m = "stablediffusion"
 		}
 		log.Debug().Msgf("Loading model: %+v", m)
 
@@ -129,9 +129,9 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
 
 		switch config.Backend {
 		case "stablediffusion":
-			config.Backend = model.StableDiffusionBackend
+			config.Backend = model.StableDiffusionGGMLBackend
 		case "":
-			config.Backend = model.StableDiffusionBackend
+			config.Backend = model.StableDiffusionGGMLBackend
 		}
 
 		if !strings.Contains(input.Size, "x") {
diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go
index 2451f15f..4eaeec24 100644
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"strconv"
 
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
@@ -296,6 +297,14 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 			}
 		}
 	}
+
+	// If a quality was defined as number, convert it to step
+	if input.Quality != "" {
+		q, err := strconv.Atoi(input.Quality)
+		if err == nil {
+			config.Step = q
+		}
+	}
 }
 
 func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) {
diff --git a/core/schema/openai.go b/core/schema/openai.go
index b06120ae..e445bee1 100644
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -191,8 +191,9 @@ type OpenAIRequest struct {
 	Stream bool `json:"stream"`
 
 	// Image (not supported by OpenAI)
-	Mode int `json:"mode"`
-	Step int `json:"step"`
+	Mode    int    `json:"mode"`
+	Quality string `json:"quality"`
+	Step    int    `json:"step"`
 
 	// A grammar to constrain the LLM output
 	Grammar string `json:"grammar" yaml:"grammar"`
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index 756deea7..b2a5293b 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -29,6 +29,7 @@ var Aliases map[string]string = map[string]string{
 	"langchain-huggingface":  LCHuggingFaceBackend,
 	"transformers-musicgen":  TransformersBackend,
 	"sentencetransformers":   TransformersBackend,
+	"stablediffusion":        StableDiffusionGGMLBackend,
 }
 
 var TypeAlias map[string]string = map[string]string{
@@ -54,10 +55,10 @@ const (
 
 	LLamaCPPGRPC = "llama-cpp-grpc"
 
-	WhisperBackend         = "whisper"
-	StableDiffusionBackend = "stablediffusion"
-	PiperBackend           = "piper"
-	LCHuggingFaceBackend   = "huggingface"
+	WhisperBackend             = "whisper"
+	StableDiffusionGGMLBackend = "stablediffusion-ggml"
+	PiperBackend               = "piper"
+	LCHuggingFaceBackend       = "huggingface"
 
 	TransformersBackend = "transformers"
 	LocalStoreBackend   = "local-store"
diff --git a/pkg/stablediffusion/generate.go b/pkg/stablediffusion/generate.go
deleted file mode 100644
index cef96e80..00000000
--- a/pkg/stablediffusion/generate.go
+++ /dev/null
@@ -1,35 +0,0 @@
-//go:build stablediffusion
-// +build stablediffusion
-
-package stablediffusion
-
-import (
-	stableDiffusion "github.com/mudler/go-stable-diffusion"
-)
-
-func GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
-	if height > 512 || width > 512 {
-		return stableDiffusion.GenerateImageUpscaled(
-			height,
-			width,
-			step,
-			seed,
-			positive_prompt,
-			negative_prompt,
-			dst,
-			asset_dir,
-		)
-	}
-	return stableDiffusion.GenerateImage(
-		height,
-		width,
-		mode,
-		step,
-		seed,
-		positive_prompt,
-		negative_prompt,
-		dst,
-		"",
-		asset_dir,
-	)
-}
diff --git a/pkg/stablediffusion/generate_unsupported.go b/pkg/stablediffusion/generate_unsupported.go
deleted file mode 100644
index 9563bae0..00000000
--- a/pkg/stablediffusion/generate_unsupported.go
+++ /dev/null
@@ -1,10 +0,0 @@
-//go:build !stablediffusion
-// +build !stablediffusion
-
-package stablediffusion
-
-import "fmt"
-
-func GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
-	return fmt.Errorf("This version of LocalAI was built without the stablediffusion tag")
-}
diff --git a/pkg/stablediffusion/stablediffusion.go b/pkg/stablediffusion/stablediffusion.go
deleted file mode 100644
index e38db17f..00000000
--- a/pkg/stablediffusion/stablediffusion.go
+++ /dev/null
@@ -1,20 +0,0 @@
-package stablediffusion
-
-import "os"
-
-type StableDiffusion struct {
-	assetDir string
-}
-
-func New(assetDir string) (*StableDiffusion, error) {
-	if _, err := os.Stat(assetDir); err != nil {
-		return nil, err
-	}
-	return &StableDiffusion{
-		assetDir: assetDir,
-	}, nil
-}
-
-func (s *StableDiffusion) GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string) error {
-	return GenerateImage(height, width, mode, step, seed, positive_prompt, negative_prompt, dst, s.assetDir)
-}
diff --git a/tests/e2e-aio/e2e_suite_test.go b/tests/e2e-aio/e2e_suite_test.go
index 680bd3a5..4a10d41b 100644
--- a/tests/e2e-aio/e2e_suite_test.go
+++ b/tests/e2e-aio/e2e_suite_test.go
@@ -54,7 +54,7 @@ var _ = BeforeSuite(func() {
 	Eventually(func() error {
 		_, err := client.ListModels(context.TODO())
 		return err
-	}, "20m").ShouldNot(HaveOccurred())
+	}, "50m").ShouldNot(HaveOccurred())
 })
 
 var _ = AfterSuite(func() {
diff --git a/tests/e2e-aio/e2e_test.go b/tests/e2e-aio/e2e_test.go
index a9c55497..4d9eb4d8 100644
--- a/tests/e2e-aio/e2e_test.go
+++ b/tests/e2e-aio/e2e_test.go
@@ -123,8 +123,9 @@ var _ = Describe("E2E test", func() {
 			It("correctly", func() {
 				resp, err := client.CreateImage(context.TODO(),
 					openai.ImageRequest{
-						Prompt: "test",
-						Size:   openai.CreateImageSize512x512,
+						Prompt:  "test",
+						Quality: "1",
+						Size:    openai.CreateImageSize256x256,
 					},
 				)
 				Expect(err).ToNot(HaveOccurred())
@@ -135,7 +136,8 @@ var _ = Describe("E2E test", func() {
 				resp, err := client.CreateImage(context.TODO(),
 					openai.ImageRequest{
 						Prompt:         "test",
-						Size:           openai.CreateImageSize512x512,
+						Size:           openai.CreateImageSize256x256,
+						Quality:        "1",
 						ResponseFormat: openai.CreateImageResponseFormatURL,
 					},
 				)
@@ -147,7 +149,8 @@ var _ = Describe("E2E test", func() {
 				resp, err := client.CreateImage(context.TODO(),
 					openai.ImageRequest{
 						Prompt:         "test",
-						Size:           openai.CreateImageSize512x512,
+						Size:           openai.CreateImageSize256x256,
+						Quality:        "1",
 						ResponseFormat: openai.CreateImageResponseFormatB64JSON,
 					},
 				)

From e8eb0b2c50a7653c9d8dc3e2388eb4074705b4b7 Mon Sep 17 00:00:00 2001
From: Richard Palethorpe <io@richiejp.com>
Date: Wed, 22 Jan 2025 18:35:05 +0000
Subject: [PATCH 21/29] fix(stores): Stores fixes and testing (#4663)

* fix(stores): Actually check a vector is a unit vector/normalized

Instead of just summing the components to see if they equal 1.0, take
the actual magnitude/p-norm of the vector and check that is
approximately 1.0.

Note that this shouldn't change the order of results except in edge
cases if I am too lax with the precision of the equality
comparison. However it should improve performance for normalized
vectors which were being misclassified.

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* fix(stores): Add tests for known results and triangle inequality

This adds some more tests to check the cosine similarity function has
some expected mathematical properties.

Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: Richard Palethorpe <io@richiejp.com>
---
 backend/go/stores/store.go       |  14 +--
 tests/integration/stores_test.go | 143 ++++++++++++++++++++++++++++---
 2 files changed, 141 insertions(+), 16 deletions(-)

diff --git a/backend/go/stores/store.go b/backend/go/stores/store.go
index a4849b57..c8788a9c 100644
--- a/backend/go/stores/store.go
+++ b/backend/go/stores/store.go
@@ -311,12 +311,16 @@ func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error)
 }
 
 func isNormalized(k []float32) bool {
-	var sum float32
+	var sum float64
+
 	for _, v := range k {
-		sum += v
+		v64 := float64(v)
+		sum += v64*v64
 	}
 
-	return sum == 1.0
+	s := math.Sqrt(sum)
+
+	return s >= 0.99 && s <= 1.01
 }
 
 // TODO: This we could replace with handwritten SIMD code
@@ -328,7 +332,7 @@ func normalizedCosineSimilarity(k1, k2 []float32) float32 {
 		dot += k1[i] * k2[i]
 	}
 
-	assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot))
+	assert(dot >= -1.01 && dot <= 1.01, fmt.Sprintf("dot = %f", dot))
 
 	// 2.0 * (1.0 - dot) would be the Euclidean distance
 	return dot
@@ -418,7 +422,7 @@ func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 {
 
 	sim := float32(dot / (mag1 * math.Sqrt(mag2)))
 
-	assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim))
+	assert(sim >= -1.01 && sim <= 1.01, fmt.Sprintf("sim = %f", sim))
 
 	return sim
 }
diff --git a/tests/integration/stores_test.go b/tests/integration/stores_test.go
index 5ed46b19..9612bec0 100644
--- a/tests/integration/stores_test.go
+++ b/tests/integration/stores_test.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"embed"
 	"math"
+	"math/rand"
 	"os"
 	"path/filepath"
 
@@ -22,6 +23,19 @@ import (
 //go:embed backend-assets/*
 var backendAssets embed.FS
 
+func normalize(vecs [][]float32) {
+	for i, k := range vecs {
+		norm := float64(0)
+		for _, x := range k {
+			norm += float64(x * x)
+		}
+		norm = math.Sqrt(norm)
+		for j, x := range k {
+			vecs[i][j] = x / float32(norm)
+		}
+	}
+}
+
 var _ = Describe("Integration tests for the stores backend(s) and internal APIs", Label("stores"), func() {
 	Context("Embedded Store get,set and delete", func() {
 		var sl *model.ModelLoader
@@ -192,17 +206,8 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
 			// set 3 vectors that are at varying angles to {0.5, 0.5, 0.5}
 			keys := [][]float32{{0.1, 0.3, 0.5}, {0.5, 0.5, 0.5}, {0.6, 0.6, -0.6}, {0.7, -0.7, -0.7}}
 			vals := [][]byte{[]byte("test0"), []byte("test1"), []byte("test2"), []byte("test3")}
-			// normalize the keys
-			for i, k := range keys {
-				norm := float64(0)
-				for _, x := range k {
-					norm += float64(x * x)
-				}
-				norm = math.Sqrt(norm)
-				for j, x := range k {
-					keys[i][j] = x / float32(norm)
-				}
-			}
+
+			normalize(keys)
 
 			err := store.SetCols(context.Background(), sc, keys, vals)
 			Expect(err).ToNot(HaveOccurred())
@@ -225,5 +230,121 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
 			Expect(ks[1]).To(Equal(keys[1]))
 			Expect(vals[1]).To(Equal(vals[1]))
 		})
+
+		It("It produces the correct cosine similarities for orthogonal and opposite unit vectors", func() {
+			keys := [][]float32{{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}, {-1.0, 0.0, 0.0}}
+			vals := [][]byte{[]byte("x"), []byte("y"), []byte("z"), []byte("-z")}
+
+			err := store.SetCols(context.Background(), sc, keys, vals);
+			Expect(err).ToNot(HaveOccurred())
+
+			_, _, sims, err := store.Find(context.Background(), sc, keys[0], 4)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(sims).To(Equal([]float32{1.0, 0.0, 0.0, -1.0}))
+		})
+
+		It("It produces the correct cosine similarities for orthogonal and opposite vectors", func() {
+			keys := [][]float32{{1.0, 0.0, 1.0}, {0.0, 2.0, 0.0}, {0.0, 0.0, -1.0}, {-1.0, 0.0, -1.0}}
+			vals := [][]byte{[]byte("x"), []byte("y"), []byte("z"), []byte("-z")}
+
+			err := store.SetCols(context.Background(), sc, keys, vals);
+			Expect(err).ToNot(HaveOccurred())
+
+			_, _, sims, err := store.Find(context.Background(), sc, keys[0], 4)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(sims[0]).To(BeNumerically("~", 1, 0.1))
+			Expect(sims[1]).To(BeNumerically("~", 0, 0.1))
+			Expect(sims[2]).To(BeNumerically("~", -0.7, 0.1))
+			Expect(sims[3]).To(BeNumerically("~", -1, 0.1))
+		})
+
+		expectTriangleEq := func(keys [][]float32, vals [][]byte) {
+			sims := map[string]map[string]float32{}
+
+			// compare every key vector pair and store the similarities in a lookup table
+			// that uses the values as keys
+			for i, k := range keys {
+				_, valsk, simsk, err := store.Find(context.Background(), sc, k, 9)
+				Expect(err).ToNot(HaveOccurred())
+
+				for j, v := range valsk {
+					p := string(vals[i])
+					q := string(v)
+
+					if sims[p] == nil {
+						sims[p] = map[string]float32{}
+					}
+
+					//log.Debug().Strs("vals", []string{p, q}).Float32("similarity", simsk[j]).Send()
+
+					sims[p][q] = simsk[j]
+				}
+			}
+
+			// Check that the triangle inequality holds for every combination of the triplet
+			// u, v and w
+			for _, simsu := range sims {
+				for w, simw := range simsu {
+					// acos(u,w) <= ...
+					uws := math.Acos(float64(simw))
+
+					// ... acos(u,v) + acos(v,w)
+					for v, _ := range simsu {
+						uvws := math.Acos(float64(simsu[v])) + math.Acos(float64(sims[v][w]))
+
+						//log.Debug().Str("u", u).Str("v", v).Str("w", w).Send()
+						//log.Debug().Float32("uw", simw).Float32("uv", simsu[v]).Float32("vw", sims[v][w]).Send()
+						Expect(uws).To(BeNumerically("<=", uvws))
+					}
+				}
+			}
+		}
+
+		It("It obeys the triangle inequality for normalized values", func() {
+			keys := [][]float32{
+				{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0},
+				{-1.0, 0.0, 0.0}, {0.0, -1.0, 0.0}, {0.0, 0.0, -1.0},
+				{2.0, 3.0, 4.0}, {9.0, 7.0, 1.0}, {0.0, -1.2, 2.3},
+			}
+			vals := [][]byte{
+				[]byte("x"), []byte("y"), []byte("z"),
+				[]byte("-x"), []byte("-y"), []byte("-z"),
+				[]byte("u"), []byte("v"), []byte("w"),
+			}
+
+			normalize(keys[6:])
+
+			err := store.SetCols(context.Background(), sc, keys, vals);
+			Expect(err).ToNot(HaveOccurred())
+
+			expectTriangleEq(keys, vals)
+		})
+
+		It("It obeys the triangle inequality", func() {
+			rnd := rand.New(rand.NewSource(151))
+			keys := make([][]float32, 20)
+			vals := make([][]byte, 20)
+
+			for i := range keys {
+				k := make([]float32, 768)
+
+				for j := range k {
+					k[j] = rnd.Float32()
+				}
+
+				keys[i] = k
+			}
+
+			c := byte('a')
+			for i := range vals {
+				vals[i] = []byte{c}
+				c += 1
+			}
+
+			err := store.SetCols(context.Background(), sc, keys, vals);
+			Expect(err).ToNot(HaveOccurred())
+
+			expectTriangleEq(keys, vals)
+		})
 	})
 })

From a05737c7e43224c66eb0b995be54834747d0dd04 Mon Sep 17 00:00:00 2001
From: Peter Cover <raowanxiang@outlook.com>
Date: Thu, 23 Jan 2025 02:35:53 +0800
Subject: [PATCH 22/29] chore: fix some function names in comment (#4665)

Signed-off-by: petercover <raowanxiang@outlook.com>
---
 core/http/endpoints/localai/backend_monitor.go | 2 +-
 pkg/functions/functions.go                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/http/endpoints/localai/backend_monitor.go b/core/http/endpoints/localai/backend_monitor.go
index fa11b5c3..a1b93ac3 100644
--- a/core/http/endpoints/localai/backend_monitor.go
+++ b/core/http/endpoints/localai/backend_monitor.go
@@ -28,7 +28,7 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ct
 	}
 }
 
-// BackendMonitorEndpoint shuts down the specified backend
+// BackendShutdownEndpoint shuts down the specified backend
 // @Summary Backend monitor endpoint
 // @Param request body schema.BackendMonitorRequest true "Backend statistics request"
 // @Router /backend/shutdown [post]
diff --git a/pkg/functions/functions.go b/pkg/functions/functions.go
index 1a7e1ff1..477a43bb 100644
--- a/pkg/functions/functions.go
+++ b/pkg/functions/functions.go
@@ -34,7 +34,7 @@ type Tool struct {
 }
 type Tools []Tool
 
-// ToJSONNameStructure converts a list of functions to a JSON structure that can be parsed to a grammar
+// ToJSONStructure converts a list of functions to a JSON structure that can be parsed to a grammar
 // This allows the LLM to return a response of the type: { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
 func (f Functions) ToJSONStructure(name, args string) JSONFunctionStructure {
 	nameKey := defaultFunctionNameKey

From 715071b68dce1ed5d9691f55ee8d9e1571cd6fe4 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 22 Jan 2025 21:51:38 +0100
Subject: [PATCH 23/29] feat(swagger): update swagger (#4667)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 swagger/docs.go      | 3 +++
 swagger/swagger.json | 3 +++
 swagger/swagger.yaml | 2 ++
 3 files changed, 8 insertions(+)

diff --git a/swagger/docs.go b/swagger/docs.go
index 13a3d3f3..43bc8822 100644
--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -1645,6 +1645,9 @@ const docTemplate = `{
                 "prompt": {
                     "description": "Prompt is read only by completion/image API calls"
                 },
+                "quality": {
+                    "type": "string"
+                },
                 "repeat_last_n": {
                     "type": "integer"
                 },
diff --git a/swagger/swagger.json b/swagger/swagger.json
index 1c38e9da..7d39e5e9 100644
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -1638,6 +1638,9 @@
                 "prompt": {
                     "description": "Prompt is read only by completion/image API calls"
                 },
+                "quality": {
+                    "type": "string"
+                },
                 "repeat_last_n": {
                     "type": "integer"
                 },
diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
index 1692f4bb..e747464f 100644
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -570,6 +570,8 @@ definitions:
         type: number
       prompt:
         description: Prompt is read only by completion/image API calls
+      quality:
+        type: string
       repeat_last_n:
         type: integer
       repeat_penalty:

From e426ab7c23308ecf618e766345a0985c826423a1 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 23 Jan 2025 08:06:18 +0100
Subject: [PATCH 24/29] feat(faster-whisper): add backend (#4666)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Dockerfile                                    |  5 +-
 Makefile                                      | 13 ++-
 backend/python/faster-whisper/Makefile        | 20 ++++
 backend/python/faster-whisper/backend.py      | 94 +++++++++++++++++++
 backend/python/faster-whisper/install.sh      | 14 +++
 backend/python/faster-whisper/protogen.sh     |  6 ++
 .../faster-whisper/requirements-cpu.txt       |  8 ++
 .../faster-whisper/requirements-cublas11.txt  |  9 ++
 .../faster-whisper/requirements-cublas12.txt  |  8 ++
 .../faster-whisper/requirements-hipblas.txt   |  3 +
 .../faster-whisper/requirements-intel.txt     |  6 ++
 .../python/faster-whisper/requirements.txt    |  3 +
 backend/python/faster-whisper/run.sh          |  4 +
 backend/python/faster-whisper/test.sh         |  6 ++
 14 files changed, 196 insertions(+), 3 deletions(-)
 create mode 100644 backend/python/faster-whisper/Makefile
 create mode 100755 backend/python/faster-whisper/backend.py
 create mode 100755 backend/python/faster-whisper/install.sh
 create mode 100644 backend/python/faster-whisper/protogen.sh
 create mode 100644 backend/python/faster-whisper/requirements-cpu.txt
 create mode 100644 backend/python/faster-whisper/requirements-cublas11.txt
 create mode 100644 backend/python/faster-whisper/requirements-cublas12.txt
 create mode 100644 backend/python/faster-whisper/requirements-hipblas.txt
 create mode 100644 backend/python/faster-whisper/requirements-intel.txt
 create mode 100644 backend/python/faster-whisper/requirements.txt
 create mode 100755 backend/python/faster-whisper/run.sh
 create mode 100755 backend/python/faster-whisper/test.sh

diff --git a/Dockerfile b/Dockerfile
index 8594c2a1..b01f071d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -15,7 +15,7 @@ ARG TARGETARCH
 ARG TARGETVARIANT
 
 ENV DEBIAN_FRONTEND=noninteractive
-ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
+ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
 
 
 RUN apt-get update && \
@@ -414,6 +414,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
     if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
         make -C backend/python/parler-tts \
     ; fi && \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "faster-whisper" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/parler-tts \
+    ; fi && \
     if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
         make -C backend/python/diffusers \
     ; fi
diff --git a/Makefile b/Makefile
index 312bfcc4..efc5812b 100644
--- a/Makefile
+++ b/Makefile
@@ -533,10 +533,10 @@ protogen-go-clean:
 	$(RM) bin/*
 
 .PHONY: protogen-python
-protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen
+protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen faster-whisper-protogen
 
 .PHONY: protogen-python-clean
-protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean  exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean
+protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean  exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean faster-whisper-protogen-clean
 
 .PHONY: autogptq-protogen
 autogptq-protogen:
@@ -570,6 +570,14 @@ diffusers-protogen:
 diffusers-protogen-clean:
 	$(MAKE) -C backend/python/diffusers protogen-clean
 
+.PHONY: faster-whisper-protogen
+faster-whisper-protogen:
+	$(MAKE) -C backend/python/faster-whisper protogen
+
+.PHONY: faster-whisper-protogen-clean
+faster-whisper-protogen-clean:
+	$(MAKE) -C backend/python/faster-whisper protogen-clean
+
 .PHONY: exllama2-protogen
 exllama2-protogen:
 	$(MAKE) -C backend/python/exllama2 protogen
@@ -641,6 +649,7 @@ prepare-extra-conda-environments: protogen-python
 	$(MAKE) -C backend/python/bark
 	$(MAKE) -C backend/python/coqui
 	$(MAKE) -C backend/python/diffusers
+	$(MAKE) -C backend/python/faster-whisper
 	$(MAKE) -C backend/python/vllm
 	$(MAKE) -C backend/python/mamba
 	$(MAKE) -C backend/python/rerankers
diff --git a/backend/python/faster-whisper/Makefile b/backend/python/faster-whisper/Makefile
new file mode 100644
index 00000000..c0e5169f
--- /dev/null
+++ b/backend/python/faster-whisper/Makefile
@@ -0,0 +1,20 @@
+.DEFAULT_GOAL := install
+
+.PHONY: install
+install:
+	bash install.sh
+	$(MAKE) protogen
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+	$(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+	bash protogen.sh
+
+.PHONY: clean
+clean: protogen-clean
+	rm -rf venv __pycache__
\ No newline at end of file
diff --git a/backend/python/faster-whisper/backend.py b/backend/python/faster-whisper/backend.py
new file mode 100755
index 00000000..dbb8b3d9
--- /dev/null
+++ b/backend/python/faster-whisper/backend.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+"""
+This is an extra gRPC server of LocalAI for Bark TTS
+"""
+from concurrent import futures
+import time
+import argparse
+import signal
+import sys
+import os
+import backend_pb2
+import backend_pb2_grpc
+
+from faster_whisper import WhisperModel
+
+import grpc
+
+
+_ONE_DAY_IN_SECONDS = 60 * 60 * 24
+
+# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
+MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
+COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)
+
+# Implement the BackendServicer class with the service methods
+class BackendServicer(backend_pb2_grpc.BackendServicer):
+    """
+    BackendServicer is the class that implements the gRPC service
+    """
+    def Health(self, request, context):
+        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
+    def LoadModel(self, request, context):
+        device = "cpu"
+        # Get device
+        # device = "cuda" if request.CUDA else "cpu"
+        if request.CUDA:
+            device = "cuda"
+
+        try:
+            print("Preparing models, please wait", file=sys.stderr)
+            self.model = WhisperModel(request.Model, device=device, compute_type="float16")
+        except Exception as err:
+            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+        # Implement your logic here for the LoadModel service
+        # Replace this with your desired response
+        return backend_pb2.Result(message="Model loaded successfully", success=True)
+
+    def AudioTranscription(self, request, context):
+        resultSegments = []
+        text = ""
+        try:
+            segments, info = self.model.transcribe(request.dst, beam_size=5, condition_on_previous_text=False)
+            id = 0
+            for segment in segments:
+                print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
+                resultSegments.append(backend_pb2.TranscriptSegment(id=id, start=segment.start, end=segment.end, text=segment.text))
+                text += segment.text
+                id += 1            
+        except Exception as err:
+            print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
+
+        return backend_pb2.TranscriptResult(segments=resultSegments, text=text)
+
+def serve(address):
+    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
+    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
+    server.add_insecure_port(address)
+    server.start()
+    print("Server started. Listening on: " + address, file=sys.stderr)
+
+    # Define the signal handler function
+    def signal_handler(sig, frame):
+        print("Received termination signal. Shutting down...")
+        server.stop(0)
+        sys.exit(0)
+
+    # Set the signal handlers for SIGINT and SIGTERM
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+
+    try:
+        while True:
+            time.sleep(_ONE_DAY_IN_SECONDS)
+    except KeyboardInterrupt:
+        server.stop(0)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run the gRPC server.")
+    parser.add_argument(
+        "--addr", default="localhost:50051", help="The address to bind the server to."
+    )
+    args = parser.parse_args()
+
+    serve(args.addr)
diff --git a/backend/python/faster-whisper/install.sh b/backend/python/faster-whisper/install.sh
new file mode 100755
index 00000000..36443ef1
--- /dev/null
+++ b/backend/python/faster-whisper/install.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
+# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
+# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
+# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
+if [ "x${BUILD_PROFILE}" == "xintel" ]; then
+    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
+fi
+
+installRequirements
diff --git a/backend/python/faster-whisper/protogen.sh b/backend/python/faster-whisper/protogen.sh
new file mode 100644
index 00000000..32f39fbb
--- /dev/null
+++ b/backend/python/faster-whisper/protogen.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/faster-whisper/requirements-cpu.txt b/backend/python/faster-whisper/requirements-cpu.txt
new file mode 100644
index 00000000..3e03f3ad
--- /dev/null
+++ b/backend/python/faster-whisper/requirements-cpu.txt
@@ -0,0 +1,8 @@
+faster-whisper
+opencv-python
+accelerate
+compel
+peft
+sentencepiece
+torch==2.4.1
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/faster-whisper/requirements-cublas11.txt b/backend/python/faster-whisper/requirements-cublas11.txt
new file mode 100644
index 00000000..b7453295
--- /dev/null
+++ b/backend/python/faster-whisper/requirements-cublas11.txt
@@ -0,0 +1,9 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.4.1+cu118
+faster-whisper
+opencv-python
+accelerate
+compel
+peft
+sentencepiece
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/faster-whisper/requirements-cublas12.txt b/backend/python/faster-whisper/requirements-cublas12.txt
new file mode 100644
index 00000000..8f46fa4a
--- /dev/null
+++ b/backend/python/faster-whisper/requirements-cublas12.txt
@@ -0,0 +1,8 @@
+torch==2.4.1
+faster-whisper
+opencv-python
+accelerate
+compel
+peft
+sentencepiece
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/faster-whisper/requirements-hipblas.txt b/backend/python/faster-whisper/requirements-hipblas.txt
new file mode 100644
index 00000000..29413f05
--- /dev/null
+++ b/backend/python/faster-whisper/requirements-hipblas.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch
+faster-whisper
\ No newline at end of file
diff --git a/backend/python/faster-whisper/requirements-intel.txt b/backend/python/faster-whisper/requirements-intel.txt
new file mode 100644
index 00000000..417aa0b4
--- /dev/null
+++ b/backend/python/faster-whisper/requirements-intel.txt
@@ -0,0 +1,6 @@
+--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
+optimum[openvino]
+faster-whisper
\ No newline at end of file
diff --git a/backend/python/faster-whisper/requirements.txt b/backend/python/faster-whisper/requirements.txt
new file mode 100644
index 00000000..0f43df10
--- /dev/null
+++ b/backend/python/faster-whisper/requirements.txt
@@ -0,0 +1,3 @@
+grpcio==1.69.0
+protobuf
+grpcio-tools
\ No newline at end of file
diff --git a/backend/python/faster-whisper/run.sh b/backend/python/faster-whisper/run.sh
new file mode 100755
index 00000000..375c07e5
--- /dev/null
+++ b/backend/python/faster-whisper/run.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+source $(dirname $0)/../common/libbackend.sh
+
+startBackend $@
\ No newline at end of file
diff --git a/backend/python/faster-whisper/test.sh b/backend/python/faster-whisper/test.sh
new file mode 100755
index 00000000..6940b066
--- /dev/null
+++ b/backend/python/faster-whisper/test.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+runUnittests

From 200fe358f0c2f25a61b0b64478f10be945021f75 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 23 Jan 2025 08:06:43 +0100
Subject: [PATCH 25/29] chore: :arrow_up: Update ggerganov/llama.cpp to
 `6152129d05870cb38162c422c6ba80434e021e9f` (#4668)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index efc5812b..467b2d39 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=6171c9d25820ccf676b243c172868819d882848f
+CPPLLAMA_VERSION?=6152129d05870cb38162c422c6ba80434e021e9f
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From 89429a439b3a5c5571f8bfe9be228a56f94f7a84 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 23 Jan 2025 09:30:47 +0100
Subject: [PATCH 26/29] feat(transformers): add support to Mamba (#4669)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Dockerfile                                    |   6 +-
 Makefile                                      |  13 +-
 backend/python/mamba/Makefile                 |  29 ---
 backend/python/mamba/README.md                |   5 -
 backend/python/mamba/backend.py               | 179 ------------------
 backend/python/mamba/install.sh               |   9 -
 backend/python/mamba/requirements-after.txt   |   2 -
 backend/python/mamba/requirements-cpu.txt     |   2 -
 .../python/mamba/requirements-cublas11.txt    |   3 -
 .../python/mamba/requirements-cublas12.txt    |   2 -
 backend/python/mamba/requirements-install.txt |   6 -
 backend/python/mamba/requirements.txt         |   3 -
 backend/python/mamba/run.sh                   |   6 -
 backend/python/mamba/test.py                  |  76 --------
 backend/python/mamba/test.sh                  |   6 -
 backend/python/transformers/backend.py        |   6 +-
 pkg/model/initializers.go                     |   2 +
 17 files changed, 10 insertions(+), 345 deletions(-)
 delete mode 100644 backend/python/mamba/Makefile
 delete mode 100644 backend/python/mamba/README.md
 delete mode 100644 backend/python/mamba/backend.py
 delete mode 100755 backend/python/mamba/install.sh
 delete mode 100644 backend/python/mamba/requirements-after.txt
 delete mode 100644 backend/python/mamba/requirements-cpu.txt
 delete mode 100644 backend/python/mamba/requirements-cublas11.txt
 delete mode 100644 backend/python/mamba/requirements-cublas12.txt
 delete mode 100644 backend/python/mamba/requirements-install.txt
 delete mode 100644 backend/python/mamba/requirements.txt
 delete mode 100755 backend/python/mamba/run.sh
 delete mode 100644 backend/python/mamba/test.py
 delete mode 100755 backend/python/mamba/test.sh

diff --git a/Dockerfile b/Dockerfile
index b01f071d..9f699ac9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -15,8 +15,7 @@ ARG TARGETARCH
 ARG TARGETVARIANT
 
 ENV DEBIAN_FRONTEND=noninteractive
-ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
-
+ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
@@ -445,9 +444,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE
     ; fi && \
     if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
         make -C backend/python/rerankers \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/mamba \
     ; fi
 
 # Make sure the models directory exists
diff --git a/Makefile b/Makefile
index 467b2d39..fc649c4f 100644
--- a/Makefile
+++ b/Makefile
@@ -533,10 +533,10 @@ protogen-go-clean:
 	$(RM) bin/*
 
 .PHONY: protogen-python
-protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen faster-whisper-protogen
+protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen faster-whisper-protogen
 
 .PHONY: protogen-python-clean
-protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean  exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean faster-whisper-protogen-clean
+protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean  exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean faster-whisper-protogen-clean
 
 .PHONY: autogptq-protogen
 autogptq-protogen:
@@ -586,14 +586,6 @@ exllama2-protogen:
 exllama2-protogen-clean:
 	$(MAKE) -C backend/python/exllama2 protogen-clean
 
-.PHONY: mamba-protogen
-mamba-protogen:
-	$(MAKE) -C backend/python/mamba protogen
-
-.PHONY: mamba-protogen-clean
-mamba-protogen-clean:
-	$(MAKE) -C backend/python/mamba protogen-clean
-
 .PHONY: rerankers-protogen
 rerankers-protogen:
 	$(MAKE) -C backend/python/rerankers protogen
@@ -651,7 +643,6 @@ prepare-extra-conda-environments: protogen-python
 	$(MAKE) -C backend/python/diffusers
 	$(MAKE) -C backend/python/faster-whisper
 	$(MAKE) -C backend/python/vllm
-	$(MAKE) -C backend/python/mamba
 	$(MAKE) -C backend/python/rerankers
 	$(MAKE) -C backend/python/transformers
 	$(MAKE) -C backend/python/parler-tts
diff --git a/backend/python/mamba/Makefile b/backend/python/mamba/Makefile
deleted file mode 100644
index 52b1c53a..00000000
--- a/backend/python/mamba/Makefile
+++ /dev/null
@@ -1,29 +0,0 @@
-.PHONY: mamba
-mamba: protogen
-	bash install.sh 
-
-.PHONY: run
-run: protogen
-	@echo "Running mamba..."
-	bash run.sh
-	@echo "mamba run."
-
-.PHONY: test
-test: protogen
-	@echo "Testing mamba..."
-	bash test.sh
-	@echo "mamba tested."
-
-.PHONY: protogen
-protogen: backend_pb2_grpc.py backend_pb2.py
-
-.PHONY: protogen-clean
-protogen-clean:
-	$(RM) backend_pb2_grpc.py backend_pb2.py
-
-backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
-
-.PHONY: clean
-clean: protogen-clean
-	$(RM) -r venv __pycache__
\ No newline at end of file
diff --git a/backend/python/mamba/README.md b/backend/python/mamba/README.md
deleted file mode 100644
index d6ead917..00000000
--- a/backend/python/mamba/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Creating a separate environment for the mamba project
-
-```
-make mamba
-```
\ No newline at end of file
diff --git a/backend/python/mamba/backend.py b/backend/python/mamba/backend.py
deleted file mode 100644
index 3c15fea7..00000000
--- a/backend/python/mamba/backend.py
+++ /dev/null
@@ -1,179 +0,0 @@
-#!/usr/bin/env python3
-from concurrent import futures
-import time
-import argparse
-import signal
-import sys
-import os
-
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
-
-_ONE_DAY_IN_SECONDS = 60 * 60 * 24
-
-# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
-MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
-MAMBA_CHAT= os.environ.get('MAMBA_CHAT', '1') == '1'
-
-# Implement the BackendServicer class with the service methods
-class BackendServicer(backend_pb2_grpc.BackendServicer):
-    """
-    A gRPC servicer that implements the Backend service defined in backend.proto.
-    """
-    def generate(self,prompt, max_new_tokens):
-        """
-        Generates text based on the given prompt and maximum number of new tokens.
-
-        Args:
-            prompt (str): The prompt to generate text from.
-            max_new_tokens (int): The maximum number of new tokens to generate.
-
-        Returns:
-            str: The generated text.
-        """
-        self.generator.end_beam_search()
-
-        # Tokenizing the input
-        ids = self.generator.tokenizer.encode(prompt)
-
-        self.generator.gen_begin_reuse(ids)
-        initial_len = self.generator.sequence[0].shape[0]
-        has_leading_space = False
-        decoded_text = ''
-        for i in range(max_new_tokens):
-            token = self.generator.gen_single_token()
-            if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
-                has_leading_space = True
-
-            decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
-            if has_leading_space:
-                decoded_text = ' ' + decoded_text
-
-            if token.item() == self.generator.tokenizer.eos_token_id:
-                break
-        return decoded_text
-
-    def Health(self, request, context):
-        """
-        Returns a health check message.
-
-        Args:
-            request: The health check request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Reply: The health check reply.
-        """
-        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
-
-    def LoadModel(self, request, context):
-        """
-        Loads a language model.
-
-        Args:
-            request: The load model request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Result: The load model result.
-        """
-        try:
-            tokenizerModel = request.Tokenizer
-            if tokenizerModel == "":
-                tokenizerModel = request.Model
-
-            tokenizer = AutoTokenizer.from_pretrained(tokenizerModel)
-            if MAMBA_CHAT:
-                tokenizer.eos_token = "<|endoftext|>"
-                tokenizer.pad_token = tokenizer.eos_token
-            self.tokenizer = tokenizer
-            self.model = MambaLMHeadModel.from_pretrained(request.Model, device="cuda", dtype=torch.float16)
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-        return backend_pb2.Result(message="Model loaded successfully", success=True)
-
-    def Predict(self, request, context):
-        """
-        Generates text based on the given prompt and sampling parameters.
-
-        Args:
-            request: The predict request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Result: The predict result.
-        """
-        if request.TopP == 0:
-            request.TopP = 0.9
-
-        max_tokens = request.Tokens
-
-        if request.Tokens == 0:
-            max_tokens = 2000
-
-        # encoded_input = self.tokenizer(request.Prompt)
-        tokens = self.tokenizer(request.Prompt, return_tensors="pt")
-        input_ids = tokens.input_ids.to(device="cuda")
-        out = self.model.generate(input_ids=input_ids, max_length=max_tokens, temperature=request.Temperature,
-                                     top_p=request.TopP, eos_token_id=self.tokenizer.eos_token_id)
-
-        decoded = self.tokenizer.batch_decode(out)
-       
-        generated_text = decoded[0]
-
-        # Remove prompt from response if present
-        if request.Prompt in generated_text:
-            generated_text = generated_text.replace(request.Prompt, "")
-
-        return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
-
-    def PredictStream(self, request, context):
-        """
-        Generates text based on the given prompt and sampling parameters, and streams the results.
-
-        Args:
-            request: The predict stream request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Result: The predict stream result.
-        """
-        yield self.Predict(request, context)
-
-def serve(address):
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
-    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
-    server.add_insecure_port(address)
-    server.start()
-    print("Server started. Listening on: " + address, file=sys.stderr)
-
-    # Define the signal handler function
-    def signal_handler(sig, frame):
-        print("Received termination signal. Shutting down...")
-        server.stop(0)
-        sys.exit(0)
-
-    # Set the signal handlers for SIGINT and SIGTERM
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        while True:
-            time.sleep(_ONE_DAY_IN_SECONDS)
-    except KeyboardInterrupt:
-        server.stop(0)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Run the gRPC server.")
-    parser.add_argument(
-        "--addr", default="localhost:50051", help="The address to bind the server to."
-    )
-    args = parser.parse_args()
-
-    serve(args.addr)
diff --git a/backend/python/mamba/install.sh b/backend/python/mamba/install.sh
deleted file mode 100755
index db18eefc..00000000
--- a/backend/python/mamba/install.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-set -e
-
-LIMIT_TARGETS="cublas"
-EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
-
-source $(dirname $0)/../common/libbackend.sh
-
-installRequirements
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-after.txt b/backend/python/mamba/requirements-after.txt
deleted file mode 100644
index ea6890eb..00000000
--- a/backend/python/mamba/requirements-after.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-causal-conv1d==1.4.0
-mamba-ssm==2.2.2
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-cpu.txt b/backend/python/mamba/requirements-cpu.txt
deleted file mode 100644
index b4f1261f..00000000
--- a/backend/python/mamba/requirements-cpu.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-torch==2.4.1
-transformers
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-cublas11.txt b/backend/python/mamba/requirements-cublas11.txt
deleted file mode 100644
index ed0d4df5..00000000
--- a/backend/python/mamba/requirements-cublas11.txt
+++ /dev/null
@@ -1,3 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.4.1+cu118
-transformers
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-cublas12.txt b/backend/python/mamba/requirements-cublas12.txt
deleted file mode 100644
index b4f1261f..00000000
--- a/backend/python/mamba/requirements-cublas12.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-torch==2.4.1
-transformers
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-install.txt b/backend/python/mamba/requirements-install.txt
deleted file mode 100644
index 69d263f0..00000000
--- a/backend/python/mamba/requirements-install.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-# mabma does not specify it's build dependencies per PEP517, so we need to disable build isolation
-# this also means that we need to install the basic build dependencies into the venv ourselves
-# https://github.com/Dao-AILab/causal-conv1d/issues/24
-packaging
-setuptools
-wheel
\ No newline at end of file
diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt
deleted file mode 100644
index afc8b2a9..00000000
--- a/backend/python/mamba/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-grpcio==1.69.0
-protobuf
-certifi
\ No newline at end of file
diff --git a/backend/python/mamba/run.sh b/backend/python/mamba/run.sh
deleted file mode 100755
index 1afc3984..00000000
--- a/backend/python/mamba/run.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-LIMIT_TARGETS="cublas"
-
-source $(dirname $0)/../common/libbackend.sh
-
-startBackend $@
\ No newline at end of file
diff --git a/backend/python/mamba/test.py b/backend/python/mamba/test.py
deleted file mode 100644
index 83fb2651..00000000
--- a/backend/python/mamba/test.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import unittest
-import subprocess
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-import unittest
-import subprocess
-import time
-import grpc
-import backend_pb2_grpc
-import backend_pb2
-
-class TestBackendServicer(unittest.TestCase):
-    """
-    TestBackendServicer is the class that tests the gRPC service.
-
-    This class contains methods to test the startup and shutdown of the gRPC service.
-    """
-    def setUp(self):
-        self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
-        time.sleep(10)
-
-    def tearDown(self) -> None:
-        self.service.terminate()
-        self.service.wait()
-
-    def test_server_startup(self):
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.Health(backend_pb2.HealthMessage())
-                self.assertEqual(response.message, b'OK')
-        except Exception as err:
-            print(err)
-            self.fail("Server failed to start")
-        finally:
-            self.tearDown()
-    def test_load_model(self):
-        """
-        This method tests if the model is loaded successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
-                self.assertTrue(response.success)
-                self.assertEqual(response.message, "Model loaded successfully")
-        except Exception as err:
-            print(err)
-            self.fail("LoadModel service failed")
-        finally:
-            self.tearDown()
-
-    def test_text(self):
-        """
-        This method tests if the embeddings are generated successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
-                self.assertTrue(response.success)
-                req = backend_pb2.PredictOptions(Prompt="The capital of France is")
-                resp = stub.Predict(req)
-                self.assertIsNotNone(resp.message)
-        except Exception as err:
-            print(err)
-            self.fail("text service failed")
-        finally:
-            self.tearDown()
\ No newline at end of file
diff --git a/backend/python/mamba/test.sh b/backend/python/mamba/test.sh
deleted file mode 100755
index 6940b066..00000000
--- a/backend/python/mamba/test.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-runUnittests
diff --git a/backend/python/transformers/backend.py b/backend/python/transformers/backend.py
index 9b65c6db..b0d5875b 100644
--- a/backend/python/transformers/backend.py
+++ b/backend/python/transformers/backend.py
@@ -21,7 +21,7 @@ import torch.cuda
 
 
 XPU=os.environ.get("XPU", "0") == "1"
-from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
+from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria, MambaConfig, MambaForCausalLM
 from transformers import AutoProcessor, MusicgenForConditionalGeneration
 from scipy.io import wavfile
 import outetts
@@ -245,6 +245,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                 autoTokenizer = False
                 self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
                 self.SentenceTransformer = True
+            elif request.Type == "Mamba":
+                autoTokenizer = False
+                self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+                self.model = MambaForCausalLM.from_pretrained(model_name)
             else:
                 print("Automodel", file=sys.stderr)
                 self.model = AutoModel.from_pretrained(model_name, 
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index b2a5293b..d5f1459b 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -29,12 +29,14 @@ var Aliases map[string]string = map[string]string{
 	"langchain-huggingface":  LCHuggingFaceBackend,
 	"transformers-musicgen":  TransformersBackend,
 	"sentencetransformers":   TransformersBackend,
+	"mamba":                  TransformersBackend,
 	"stablediffusion":        StableDiffusionGGMLBackend,
 }
 
 var TypeAlias map[string]string = map[string]string{
 	"sentencetransformers":   "SentenceTransformer",
 	"huggingface-embeddings": "SentenceTransformer",
+	"mamba":                  "Mamba",
 	"transformers-musicgen":  "MusicgenForConditionalGeneration",
 }
 

From 318225f631189c6d8952eac5125d220ca76246f5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 23 Jan 2025 09:46:16 +0100
Subject: [PATCH 27/29] chore(parler-tts): drop backend (#4672)

We support at this point more extensive backends that are SOTA and
support also voice cloning, and many other features. This backend is
superseded and also poses significant maintenance burden as there is an
open issue https://github.com/mudler/LocalAI/issues/3941 which is still
open as it deps are pinning old versions of grpc.

Closes https://github.com/mudler/LocalAI/issues/3941

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Dockerfile                                    |   7 +-
 Makefile                                      |  13 +-
 backend/python/parler-tts/Makefile            |  44 ------
 backend/python/parler-tts/backend.py          | 125 ------------------
 backend/python/parler-tts/install.sh          |  28 ----
 backend/python/parler-tts/protogen.sh         |   6 -
 .../python/parler-tts/requirements-after.txt  |   4 -
 .../python/parler-tts/requirements-cpu.txt    |   3 -
 .../parler-tts/requirements-cublas11.txt      |   5 -
 .../parler-tts/requirements-cublas12.txt      |   4 -
 .../parler-tts/requirements-hipblas.txt       |   5 -
 .../python/parler-tts/requirements-intel.txt  |   8 --
 backend/python/parler-tts/requirements.txt    |   4 -
 backend/python/parler-tts/run.sh              |   4 -
 backend/python/parler-tts/test.py             |  81 ------------
 backend/python/parler-tts/test.sh             |   6 -
 16 files changed, 4 insertions(+), 343 deletions(-)
 delete mode 100644 backend/python/parler-tts/Makefile
 delete mode 100644 backend/python/parler-tts/backend.py
 delete mode 100755 backend/python/parler-tts/install.sh
 delete mode 100755 backend/python/parler-tts/protogen.sh
 delete mode 100644 backend/python/parler-tts/requirements-after.txt
 delete mode 100644 backend/python/parler-tts/requirements-cpu.txt
 delete mode 100644 backend/python/parler-tts/requirements-cublas11.txt
 delete mode 100644 backend/python/parler-tts/requirements-cublas12.txt
 delete mode 100644 backend/python/parler-tts/requirements-hipblas.txt
 delete mode 100644 backend/python/parler-tts/requirements-intel.txt
 delete mode 100644 backend/python/parler-tts/requirements.txt
 delete mode 100755 backend/python/parler-tts/run.sh
 delete mode 100644 backend/python/parler-tts/test.py
 delete mode 100755 backend/python/parler-tts/test.sh

diff --git a/Dockerfile b/Dockerfile
index 9f699ac9..625d2869 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -15,7 +15,7 @@ ARG TARGETARCH
 ARG TARGETVARIANT
 
 ENV DEBIAN_FRONTEND=noninteractive
-ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
+ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
@@ -410,11 +410,8 @@ RUN if [[ ( "${IMAGE_TYPE}" == "extras ")]]; then \
 RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
         make -C backend/python/coqui \
     ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/parler-tts \
-    ; fi && \
     if [[ ( "${EXTRA_BACKENDS}" =~ "faster-whisper" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/parler-tts \
+        make -C backend/python/faster-whisper \
     ; fi && \
     if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
         make -C backend/python/diffusers \
diff --git a/Makefile b/Makefile
index fc649c4f..04e280d8 100644
--- a/Makefile
+++ b/Makefile
@@ -533,10 +533,10 @@ protogen-go-clean:
 	$(RM) bin/*
 
 .PHONY: protogen-python
-protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen faster-whisper-protogen
+protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen openvoice-protogen faster-whisper-protogen
 
 .PHONY: protogen-python-clean
-protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean  exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean faster-whisper-protogen-clean
+protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean  exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean faster-whisper-protogen-clean
 
 .PHONY: autogptq-protogen
 autogptq-protogen:
@@ -602,14 +602,6 @@ transformers-protogen:
 transformers-protogen-clean:
 	$(MAKE) -C backend/python/transformers protogen-clean
 
-.PHONY: parler-tts-protogen
-parler-tts-protogen:
-	$(MAKE) -C backend/python/parler-tts protogen
-
-.PHONY: parler-tts-protogen-clean
-parler-tts-protogen-clean:
-	$(MAKE) -C backend/python/parler-tts protogen-clean
-
 .PHONY: kokoro-protogen
 kokoro-protogen:
 	$(MAKE) -C backend/python/kokoro protogen
@@ -645,7 +637,6 @@ prepare-extra-conda-environments: protogen-python
 	$(MAKE) -C backend/python/vllm
 	$(MAKE) -C backend/python/rerankers
 	$(MAKE) -C backend/python/transformers
-	$(MAKE) -C backend/python/parler-tts
 	$(MAKE) -C backend/python/kokoro
 	$(MAKE) -C backend/python/openvoice
 	$(MAKE) -C backend/python/exllama2
diff --git a/backend/python/parler-tts/Makefile b/backend/python/parler-tts/Makefile
deleted file mode 100644
index 48da2f3f..00000000
--- a/backend/python/parler-tts/Makefile
+++ /dev/null
@@ -1,44 +0,0 @@
-export CONDA_ENV_PATH = "parler.yml"
-SKIP_CONDA?=0
-ifeq ($(BUILD_TYPE), cublas)
-export CONDA_ENV_PATH = "parler-nvidia.yml"
-endif
-
-# Intel GPU are supposed to have dependencies installed in the main python
-# environment, so we skip conda installation for SYCL builds.
-# https://github.com/intel/intel-extension-for-pytorch/issues/538
-ifneq (,$(findstring sycl,$(BUILD_TYPE)))
-export SKIP_CONDA=1
-endif
-
-.PHONY: parler-tts
-parler-tts:
-	@echo "Installing $(CONDA_ENV_PATH)..."
-	bash install.sh $(CONDA_ENV_PATH)
-	$(MAKE) protogen
-
-.PHONY: run
-run: protogen
-	@echo "Running transformers..."
-	bash run.sh
-	@echo "transformers run."
-
-.PHONY: test
-test: protogen
-	@echo "Testing transformers..."
-	bash test.sh
-	@echo "transformers tested."
-
-.PHONY: protogen
-protogen: backend_pb2_grpc.py backend_pb2.py
-
-.PHONY: protogen-clean
-protogen-clean:
-	$(RM) backend_pb2_grpc.py backend_pb2.py
-
-backend_pb2_grpc.py backend_pb2.py:
-	bash protogen.sh
-
-.PHONY: clean
-clean: protogen-clean
-	$(RM) -r venv __pycache__
\ No newline at end of file
diff --git a/backend/python/parler-tts/backend.py b/backend/python/parler-tts/backend.py
deleted file mode 100644
index 655990d7..00000000
--- a/backend/python/parler-tts/backend.py
+++ /dev/null
@@ -1,125 +0,0 @@
-#!/usr/bin/env python3
-"""
-Extra gRPC server for MusicgenForConditionalGeneration models.
-"""
-from concurrent import futures
-
-import argparse
-import signal
-import sys
-import os
-
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-from scipy.io.wavfile import write as write_wav
-
-from parler_tts import ParlerTTSForConditionalGeneration
-from transformers import AutoTokenizer
-import soundfile as sf  
-import torch
-
-_ONE_DAY_IN_SECONDS = 60 * 60 * 24
-
-# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
-MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
-
-# Implement the BackendServicer class with the service methods
-class BackendServicer(backend_pb2_grpc.BackendServicer):
-    """
-    A gRPC servicer for the backend service.
-
-    This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
-    """
-    def Health(self, request, context):
-        """
-        A gRPC method that returns the health status of the backend service.
-
-        Args:
-            request: A HealthRequest object that contains the request parameters.
-            context: A grpc.ServicerContext object that provides information about the RPC.
-
-        Returns:
-            A Reply object that contains the health status of the backend service.
-        """
-        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
-
-    def LoadModel(self, request, context):
-        """
-        A gRPC method that loads a model into memory.
-
-        Args:
-            request: A LoadModelRequest object that contains the request parameters.
-            context: A grpc.ServicerContext object that provides information about the RPC.
-
-        Returns:
-            A Result object that contains the result of the LoadModel operation.
-        """
-        model_name = request.Model
-        device = "cuda:0" if torch.cuda.is_available() else "cpu"
-        try:
-            self.model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device)
-            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-
-        return backend_pb2.Result(message="Model loaded successfully", success=True)
-
-    def TTS(self, request, context):
-        model_name = request.model
-        voice = request.voice
-        if voice == "":
-            voice = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
-        if model_name == "":
-            return backend_pb2.Result(success=False, message="request.model is required")
-        try:
-            device = "cuda:0" if torch.cuda.is_available() else "cpu"
-            input_ids = self.tokenizer(voice, return_tensors="pt").input_ids.to(device)
-            prompt_input_ids = self.tokenizer(request.text, return_tensors="pt").input_ids.to(device)
-           
-            generation = self.model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
-            audio_arr = generation.cpu().numpy().squeeze()
-            print("[parler-tts] TTS generated!", file=sys.stderr)
-            sf.write(request.dst, audio_arr, self.model.config.sampling_rate)
-            print("[parler-tts] TTS saved to", request.dst, file=sys.stderr)
-            print("[parler-tts] TTS for", file=sys.stderr)
-            print(request, file=sys.stderr)
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-        return backend_pb2.Result(success=True)
-
-
-def serve(address):
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
-    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
-    server.add_insecure_port(address)
-    server.start()
-    print("[parler-tts] Server started. Listening on: " + address, file=sys.stderr)
-
-    # Define the signal handler function
-    def signal_handler(sig, frame):
-        print("[parler-tts] Received termination signal. Shutting down...")
-        server.stop(0)
-        sys.exit(0)
-
-    # Set the signal handlers for SIGINT and SIGTERM
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        while True:
-            time.sleep(_ONE_DAY_IN_SECONDS)
-    except KeyboardInterrupt:
-        server.stop(0)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Run the gRPC server.")
-    parser.add_argument(
-        "--addr", default="localhost:50051", help="The address to bind the server to."
-    )
-    args = parser.parse_args()
-    print(f"[parler-tts] startup: {args}", file=sys.stderr)
-    serve(args.addr)
diff --git a/backend/python/parler-tts/install.sh b/backend/python/parler-tts/install.sh
deleted file mode 100755
index 14df9b14..00000000
--- a/backend/python/parler-tts/install.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
-# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
-# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
-# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
-if [ "x${BUILD_PROFILE}" == "xintel" ]; then
-    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
-fi
-
-
-installRequirements
-
-
-# https://github.com/descriptinc/audiotools/issues/101
-# incompatible protobuf versions.
-PYDIR=python3.10
-pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
-
-if [ ! -d ${pyenv} ]; then
-    echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
-    exit 1
-fi
-
-curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
diff --git a/backend/python/parler-tts/protogen.sh b/backend/python/parler-tts/protogen.sh
deleted file mode 100755
index 32f39fbb..00000000
--- a/backend/python/parler-tts/protogen.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-after.txt b/backend/python/parler-tts/requirements-after.txt
deleted file mode 100644
index 702074de..00000000
--- a/backend/python/parler-tts/requirements-after.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
-llvmlite==0.43.0
-numba==0.60.0
-grpcio-tools==1.42.0
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-cpu.txt b/backend/python/parler-tts/requirements-cpu.txt
deleted file mode 100644
index 2021fc20..00000000
--- a/backend/python/parler-tts/requirements-cpu.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-transformers
-accelerate
-torch==2.4.1
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-cublas11.txt b/backend/python/parler-tts/requirements-cublas11.txt
deleted file mode 100644
index 9f8fe9ff..00000000
--- a/backend/python/parler-tts/requirements-cublas11.txt
+++ /dev/null
@@ -1,5 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.4.1+cu118
-torchaudio==2.4.1+cu118
-transformers
-accelerate
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-cublas12.txt b/backend/python/parler-tts/requirements-cublas12.txt
deleted file mode 100644
index 53716949..00000000
--- a/backend/python/parler-tts/requirements-cublas12.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-torch==2.4.1
-torchaudio==2.4.1
-transformers
-accelerate
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-hipblas.txt b/backend/python/parler-tts/requirements-hipblas.txt
deleted file mode 100644
index b8758537..00000000
--- a/backend/python/parler-tts/requirements-hipblas.txt
+++ /dev/null
@@ -1,5 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch==2.3.0+rocm6.0
-torchaudio==2.3.0+rocm6.0
-transformers
-accelerate
diff --git a/backend/python/parler-tts/requirements-intel.txt b/backend/python/parler-tts/requirements-intel.txt
deleted file mode 100644
index f6814bd9..00000000
--- a/backend/python/parler-tts/requirements-intel.txt
+++ /dev/null
@@ -1,8 +0,0 @@
---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch==2.3.110+xpu
-torch==2.3.1+cxx11.abi
-torchaudio==2.3.1+cxx11.abi
-oneccl_bind_pt==2.3.100+xpu
-optimum[openvino]
-transformers
-accelerate
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt
deleted file mode 100644
index e6ba016b..00000000
--- a/backend/python/parler-tts/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-grpcio==1.69.0
-certifi
-llvmlite==0.43.0
-setuptools
\ No newline at end of file
diff --git a/backend/python/parler-tts/run.sh b/backend/python/parler-tts/run.sh
deleted file mode 100755
index 375c07e5..00000000
--- a/backend/python/parler-tts/run.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
-
-startBackend $@
\ No newline at end of file
diff --git a/backend/python/parler-tts/test.py b/backend/python/parler-tts/test.py
deleted file mode 100644
index 639d43a9..00000000
--- a/backend/python/parler-tts/test.py
+++ /dev/null
@@ -1,81 +0,0 @@
-"""
-A test script to test the gRPC service
-"""
-import unittest
-import subprocess
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-
-class TestBackendServicer(unittest.TestCase):
-    """
-    TestBackendServicer is the class that tests the gRPC service
-    """
-    def setUp(self):
-        """
-        This method sets up the gRPC service by starting the server
-        """
-        self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
-        time.sleep(10)
-
-    def tearDown(self) -> None:
-        """
-        This method tears down the gRPC service by terminating the server
-        """
-        self.service.terminate()
-        self.service.wait()
-
-    def test_server_startup(self):
-        """
-        This method tests if the server starts up successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.Health(backend_pb2.HealthMessage())
-                self.assertEqual(response.message, b'OK')
-        except Exception as err:
-            print(err)
-            self.fail("Server failed to start")
-        finally:
-            self.tearDown()
-
-    def test_load_model(self):
-        """
-        This method tests if the model is loaded successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
-                self.assertTrue(response.success)
-                self.assertEqual(response.message, "Model loaded successfully")
-        except Exception as err:
-            print(err)
-            self.fail("LoadModel service failed")
-        finally:
-            self.tearDown()
-
-    def test_tts(self):
-        """
-        This method tests if the embeddings are generated successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
-                self.assertTrue(response.success)
-                tts_request = backend_pb2.TTSRequest(text="Hey, how are you doing today?")
-                tts_response = stub.TTS(tts_request)
-                self.assertIsNotNone(tts_response)
-        except Exception as err:
-            print(err)
-            self.fail("TTS service failed")
-        finally:
-            self.tearDown()
\ No newline at end of file
diff --git a/backend/python/parler-tts/test.sh b/backend/python/parler-tts/test.sh
deleted file mode 100755
index 6940b066..00000000
--- a/backend/python/parler-tts/test.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-runUnittests

From 073eaec7295fe1fc5c9f2297fc6de6c0a85c36a1 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 23 Jan 2025 10:00:36 +0100
Subject: [PATCH 28/29] chore(openvoice): drop backend (#4673)

The project (MeloTTS) has been quite since long, newer backends are much
performant and better quality overall.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Dockerfile                                    |   5 +-
 Makefile                                      |  13 +-
 backend/python/openvoice/Makefile             |  25 ---
 backend/python/openvoice/backend.py           | 158 ------------------
 backend/python/openvoice/install.sh           |  16 --
 backend/python/openvoice/requirements-cpu.txt |   7 -
 .../openvoice/requirements-cublas11.txt       |   8 -
 .../openvoice/requirements-cublas12.txt       |   7 -
 .../python/openvoice/requirements-hipblas.txt |   8 -
 .../python/openvoice/requirements-intel.txt   |  24 ---
 backend/python/openvoice/requirements.txt     |  17 --
 backend/python/openvoice/run.sh               |   4 -
 backend/python/openvoice/test.py              |  82 ---------
 backend/python/openvoice/test.sh              |  12 --
 14 files changed, 3 insertions(+), 383 deletions(-)
 delete mode 100644 backend/python/openvoice/Makefile
 delete mode 100755 backend/python/openvoice/backend.py
 delete mode 100755 backend/python/openvoice/install.sh
 delete mode 100644 backend/python/openvoice/requirements-cpu.txt
 delete mode 100644 backend/python/openvoice/requirements-cublas11.txt
 delete mode 100644 backend/python/openvoice/requirements-cublas12.txt
 delete mode 100644 backend/python/openvoice/requirements-hipblas.txt
 delete mode 100644 backend/python/openvoice/requirements-intel.txt
 delete mode 100644 backend/python/openvoice/requirements.txt
 delete mode 100755 backend/python/openvoice/run.sh
 delete mode 100644 backend/python/openvoice/test.py
 delete mode 100755 backend/python/openvoice/test.sh

diff --git a/Dockerfile b/Dockerfile
index 625d2869..566e03bc 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -15,7 +15,7 @@ ARG TARGETARCH
 ARG TARGETVARIANT
 
 ENV DEBIAN_FRONTEND=noninteractive
-ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
+ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
@@ -420,9 +420,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
 RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
         make -C backend/python/kokoro \
     ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/openvoice \
-    ; fi && \
     if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
         make -C backend/python/exllama2 \
     ; fi && \
diff --git a/Makefile b/Makefile
index 04e280d8..9c4f3778 100644
--- a/Makefile
+++ b/Makefile
@@ -533,10 +533,10 @@ protogen-go-clean:
 	$(RM) bin/*
 
 .PHONY: protogen-python
-protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen openvoice-protogen faster-whisper-protogen
+protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
 
 .PHONY: protogen-python-clean
-protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean  exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean faster-whisper-protogen-clean
+protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean  exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
 
 .PHONY: autogptq-protogen
 autogptq-protogen:
@@ -610,14 +610,6 @@ kokoro-protogen:
 kokoro-protogen-clean:
 	$(MAKE) -C backend/python/kokoro protogen-clean
 
-.PHONY: openvoice-protogen
-openvoice-protogen:
-	$(MAKE) -C backend/python/openvoice protogen
-
-.PHONY: openvoice-protogen-clean
-openvoice-protogen-clean:
-	$(MAKE) -C backend/python/openvoice protogen-clean
-
 .PHONY: vllm-protogen
 vllm-protogen:
 	$(MAKE) -C backend/python/vllm protogen
@@ -638,7 +630,6 @@ prepare-extra-conda-environments: protogen-python
 	$(MAKE) -C backend/python/rerankers
 	$(MAKE) -C backend/python/transformers
 	$(MAKE) -C backend/python/kokoro
-	$(MAKE) -C backend/python/openvoice
 	$(MAKE) -C backend/python/exllama2
 
 prepare-test-extra: protogen-python
diff --git a/backend/python/openvoice/Makefile b/backend/python/openvoice/Makefile
deleted file mode 100644
index a187a00f..00000000
--- a/backend/python/openvoice/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-.DEFAULT_GOAL := install
-
-.PHONY: install
-install: protogen
-	bash install.sh
-
-.PHONY: protogen
-protogen: backend_pb2_grpc.py backend_pb2.py
-
-.PHONY: protogen-clean
-protogen-clean:
-	$(RM) backend_pb2_grpc.py backend_pb2.py
-
-backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
-
-.PHONY: clean
-clean: protogen-clean
-	rm -rf venv __pycache__
-
-.PHONY: test
-test: protogen
-	@echo "Testing openvoice..."
-	bash test.sh
-	@echo "openvoice tested."
\ No newline at end of file
diff --git a/backend/python/openvoice/backend.py b/backend/python/openvoice/backend.py
deleted file mode 100755
index 7dde08cf..00000000
--- a/backend/python/openvoice/backend.py
+++ /dev/null
@@ -1,158 +0,0 @@
-#!/usr/bin/env python3
-"""
-Extra gRPC server for OpenVoice models.
-"""
-from concurrent import futures
-
-import argparse
-import signal
-import sys
-import os
-import torch
-from openvoice import se_extractor
-from openvoice.api import ToneColorConverter
-from melo.api import TTS
-
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-
-_ONE_DAY_IN_SECONDS = 60 * 60 * 24
-
-# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
-MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
-
-# Implement the BackendServicer class with the service methods
-class BackendServicer(backend_pb2_grpc.BackendServicer):
-    """
-    A gRPC servicer for the backend service.
-
-    This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
-    """
-    def Health(self, request, context):
-        """
-        A gRPC method that returns the health status of the backend service.
-
-        Args:
-            request: A HealthRequest object that contains the request parameters.
-            context: A grpc.ServicerContext object that provides information about the RPC.
-
-        Returns:
-            A Reply object that contains the health status of the backend service.
-        """
-        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
-
-    def LoadModel(self, request, context):
-        """
-        A gRPC method that loads a model into memory.
-
-        Args:
-            request: A LoadModelRequest object that contains the request parameters.
-            context: A grpc.ServicerContext object that provides information about the RPC.
-
-        Returns:
-            A Result object that contains the result of the LoadModel operation.
-        """
-        model_name = request.Model
-        try:
-
-            self.clonedVoice = False
-            # Assume directory from request.ModelFile.
-            # Only if request.LoraAdapter it's not an absolute path
-            if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
-                # get base path of modelFile
-                modelFileBase = os.path.dirname(request.ModelFile)
-                request.AudioPath = os.path.join(modelFileBase, request.AudioPath)
-            if request.AudioPath != "":
-                self.clonedVoice = True
-
-            self.modelpath = request.ModelFile
-            self.speaker = request.Type
-            self.ClonedVoicePath = request.AudioPath
-            
-            ckpt_converter = request.Model+'/converter'
-            device = "cuda:0" if torch.cuda.is_available() else "cpu"
-            self.device = device
-            self.tone_color_converter = None
-            if self.clonedVoice:
-                self.tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
-                self.tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
-       
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-
-        return backend_pb2.Result(message="Model loaded successfully", success=True)
-
-    def TTS(self, request, context):
-        model_name = request.model
-        if model_name == "":
-            return backend_pb2.Result(success=False, message="request.model is required")
-        try:
-            # Speed is adjustable
-            speed = 1.0
-            voice = "EN"
-            if request.voice:
-                voice = request.voice
-            model = TTS(language=voice, device=self.device)
-            speaker_ids = model.hps.data.spk2id
-            speaker_key = self.speaker
-            modelpath = self.modelpath
-            for s in speaker_ids.keys():
-                print(f"Speaker: {s} - ID: {speaker_ids[s]}")
-            speaker_id = speaker_ids[speaker_key]
-            speaker_key = speaker_key.lower().replace('_', '-')
-            source_se = torch.load(f'{modelpath}/base_speakers/ses/{speaker_key}.pth', map_location=self.device)
-            model.tts_to_file(request.text, speaker_id, request.dst, speed=speed)
-            if self.clonedVoice:
-                reference_speaker = self.ClonedVoicePath
-                target_se, audio_name = se_extractor.get_se(reference_speaker, self.tone_color_converter, vad=False)
-                # Run the tone color converter
-                encode_message = "@MyShell"
-                self.tone_color_converter.convert(
-                    audio_src_path=request.dst, 
-                    src_se=source_se, 
-                    tgt_se=target_se, 
-                    output_path=request.dst,
-                    message=encode_message)
-           
-            print("[OpenVoice] TTS generated!", file=sys.stderr)
-            print("[OpenVoice] TTS saved to", request.dst, file=sys.stderr)
-            print(request, file=sys.stderr)
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-        return backend_pb2.Result(success=True)
-
-def serve(address):
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
-    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
-    server.add_insecure_port(address)
-    server.start()
-    print("[OpenVoice] Server started. Listening on: " + address, file=sys.stderr)
-
-    # Define the signal handler function
-    def signal_handler(sig, frame):
-        print("[OpenVoice] Received termination signal. Shutting down...")
-        server.stop(0)
-        sys.exit(0)
-
-    # Set the signal handlers for SIGINT and SIGTERM
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        while True:
-            time.sleep(_ONE_DAY_IN_SECONDS)
-    except KeyboardInterrupt:
-        server.stop(0)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Run the gRPC server.")
-    parser.add_argument(
-        "--addr", default="localhost:50051", help="The address to bind the server to."
-    )
-    args = parser.parse_args()
-    print(f"[OpenVoice] startup: {args}", file=sys.stderr)
-    serve(args.addr)
diff --git a/backend/python/openvoice/install.sh b/backend/python/openvoice/install.sh
deleted file mode 100755
index 24db146b..00000000
--- a/backend/python/openvoice/install.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
-# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
-# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
-# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
-if [ "x${BUILD_PROFILE}" == "xintel" ]; then
-    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
-fi
-
-installRequirements
-
-python -m unidic download
diff --git a/backend/python/openvoice/requirements-cpu.txt b/backend/python/openvoice/requirements-cpu.txt
deleted file mode 100644
index dd2eb221..00000000
--- a/backend/python/openvoice/requirements-cpu.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-torch==2.4.1
-git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
-whisper-timestamped
-pydub==0.25.1
-wavmark==0.0.3
-eng_to_ipa==0.0.2
\ No newline at end of file
diff --git a/backend/python/openvoice/requirements-cublas11.txt b/backend/python/openvoice/requirements-cublas11.txt
deleted file mode 100644
index 84ecc344..00000000
--- a/backend/python/openvoice/requirements-cublas11.txt
+++ /dev/null
@@ -1,8 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.4.1+cu118
-git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
-whisper-timestamped
-pydub==0.25.1
-wavmark==0.0.3
-eng_to_ipa==0.0.2
\ No newline at end of file
diff --git a/backend/python/openvoice/requirements-cublas12.txt b/backend/python/openvoice/requirements-cublas12.txt
deleted file mode 100644
index dd2eb221..00000000
--- a/backend/python/openvoice/requirements-cublas12.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-torch==2.4.1
-git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
-whisper-timestamped
-pydub==0.25.1
-wavmark==0.0.3
-eng_to_ipa==0.0.2
\ No newline at end of file
diff --git a/backend/python/openvoice/requirements-hipblas.txt b/backend/python/openvoice/requirements-hipblas.txt
deleted file mode 100644
index 4c2d6649..00000000
--- a/backend/python/openvoice/requirements-hipblas.txt
+++ /dev/null
@@ -1,8 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch==2.4.1+rocm6.0
-git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
-whisper-timestamped
-pydub==0.25.1
-wavmark==0.0.3
-eng_to_ipa==0.0.2
\ No newline at end of file
diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt
deleted file mode 100644
index 39b2b8b0..00000000
--- a/backend/python/openvoice/requirements-intel.txt
+++ /dev/null
@@ -1,24 +0,0 @@
---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch==2.3.110+xpu
-torch==2.3.1+cxx11.abi
-torchaudio==2.3.1+cxx11.abi
-oneccl_bind_pt==2.3.100+xpu
-optimum[openvino]
-grpcio==1.69.0
-protobuf
-librosa==0.9.1
-faster-whisper==0.9.0
-pydub==0.25.1
-wavmark==0.0.3
-eng_to_ipa==0.0.2
-inflect==7.0.0
-unidecode==1.3.7
-whisper-timestamped==1.14.2
-openai
-python-dotenv
-pypinyin==0.50.0
-cn2an==0.5.22
-jieba==0.42.1
-langid==1.1.6
-git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt
deleted file mode 100644
index 62b886bb..00000000
--- a/backend/python/openvoice/requirements.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-grpcio==1.69.0
-protobuf
-librosa
-faster-whisper
-inflect
-unidecode
-openai
-python-dotenv
-pypinyin
-cn2an==0.5.22
-numpy==1.22.0
-networkx==2.8.8
-jieba==0.42.1
-gradio==5.9.1
-langid==1.1.6
-llvmlite==0.43.0
-setuptools
\ No newline at end of file
diff --git a/backend/python/openvoice/run.sh b/backend/python/openvoice/run.sh
deleted file mode 100755
index 375c07e5..00000000
--- a/backend/python/openvoice/run.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
-
-startBackend $@
\ No newline at end of file
diff --git a/backend/python/openvoice/test.py b/backend/python/openvoice/test.py
deleted file mode 100644
index 82f08785..00000000
--- a/backend/python/openvoice/test.py
+++ /dev/null
@@ -1,82 +0,0 @@
-"""
-A test script to test the gRPC service
-"""
-import unittest
-import subprocess
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-
-class TestBackendServicer(unittest.TestCase):
-    """
-    TestBackendServicer is the class that tests the gRPC service
-    """
-    def setUp(self):
-        """
-        This method sets up the gRPC service by starting the server
-        """
-        self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
-        time.sleep(30)
-
-    def tearDown(self) -> None:
-        """
-        This method tears down the gRPC service by terminating the server
-        """
-        self.service.terminate()
-        self.service.wait()
-
-    def test_server_startup(self):
-        """
-        This method tests if the server starts up successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.Health(backend_pb2.HealthMessage())
-                self.assertEqual(response.message, b'OK')
-        except Exception as err:
-            print(err)
-            self.fail("Server failed to start")
-        finally:
-            self.tearDown()
-
-    def test_load_model(self):
-        """
-        This method tests if the model is loaded successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="checkpoints_v2", 
-                                                                    Type="en-us"))
-                self.assertTrue(response.success)
-                self.assertEqual(response.message, "Model loaded successfully")
-        except Exception as err:
-            print(err)
-            self.fail("LoadModel service failed")
-        finally:
-            self.tearDown()
-
-    def test_tts(self):
-        """
-        This method tests if the embeddings are generated successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen"))
-                self.assertTrue(response.success)
-                tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story", voice="EN")
-                tts_response = stub.TTS(tts_request)
-                self.assertIsNotNone(tts_response)
-        except Exception as err:
-            print(err)
-            self.fail("TTS service failed")
-        finally:
-            self.tearDown()
\ No newline at end of file
diff --git a/backend/python/openvoice/test.sh b/backend/python/openvoice/test.sh
deleted file mode 100755
index 6c0a840f..00000000
--- a/backend/python/openvoice/test.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-# Download checkpoints if not present
-if [ ! -d "checkpoints_v2" ]; then
-    wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
-    unzip checkpoints_v2.zip
-fi
-
-runUnittests

From eef80b9880f6d5bc875c0a2b57d289fde7248566 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Thu, 23 Jan 2025 10:02:57 +0100
Subject: [PATCH 29/29] chore(ci): cleanup tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/test-extra.yml | 51 --------------------------------
 1 file changed, 51 deletions(-)

diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml
index e99ea516..7f2445c8 100644
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -78,57 +78,6 @@ jobs:
           make --jobs=5 --output-sync=target -C backend/python/diffusers
           make --jobs=5 --output-sync=target -C backend/python/diffusers test
 
-  tests-parler-tts:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-      - name: Test parler-tts
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/parler-tts
-           make --jobs=5 --output-sync=target -C backend/python/parler-tts test
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.19
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
-
-  tests-openvoice:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-      - name: Test openvoice
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/openvoice
-           make --jobs=5 --output-sync=target -C backend/python/openvoice test
-
   # tests-transformers-musicgen:
   #   runs-on: ubuntu-latest
   #   steps: