feat(backends): Drop bert.cpp (#4272)

* feat(backends): Drop bert.cpp use llama.cpp 3.2 as a drop-in replacement for bert.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(tests): make test more robust Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-30 23:44:59 +00:00 · 2024-11-27 16:34:28 +01:00 · 2024-11-27 16:34:28 +01:00 · 3c3050f68e
commit 3c3050f68e
parent 1688ba7f2a
13 changed files with 40 additions and 184 deletions
--- a/gallery/bert-embeddings.yaml
+++ b/gallery/bert-embeddings.yaml
@ -1,12 +0,0 @@
---
-name: "bert-embeddings"
-
-config_file: |
-    parameters:
-      model: bert-MiniLM-L6-v2q4_0.bin
-    backend: bert-embeddings
-    embeddings: true
-files:
-  - filename: "bert-MiniLM-L6-v2q4_0.bin"
-    sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad"
-    uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin"
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@ -380,6 +380,7 @@
  urls:
    - https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF
  overrides:
+    embeddings: true
    parameters:
      model: llama-3.2-1b-instruct-q4_k_m.gguf
  files:
@ -8732,16 +8733,13 @@
    - filename: "ggml-model-whisper-tiny.en-q8_0.bin"
      uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin"
      sha256: 5bc2b3860aa151a4c6e7bb095e1fcce7cf12c7b020ca08dcec0c6d018bb7dd94
-## Bert embeddings
- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master"
+## Bert embeddings (llama3.2 drop-in)
+- !!merge <<: *llama32
  name: "bert-embeddings"
-  license: "Apache 2.0"
-  urls:
-    - https://huggingface.co/skeskinen/ggml
+  description: |
+      llama3.2 embeddings model. Using as drop-in replacement for bert-embeddings
  tags:
    - embeddings
-  description: |
-    Bert model that can be used for embeddings
 ## Stable Diffusion
 - url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master
  license: "BSD-3"