feat(backends): Drop bert.cpp (#4272)

* feat(backends): Drop bert.cpp use llama.cpp 3.2 as a drop-in replacement for bert.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(tests): make test more robust Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-25 21:15:00 +00:00 · 2024-11-27 16:34:28 +01:00 · 2024-11-27 16:34:28 +01:00 · 3c3050f68e
commit 3c3050f68e
parent 1688ba7f2a
13 changed files with 40 additions and 184 deletions
--- a/embedded/models/bert-cpp.yaml
+++ b/embedded/models/bert-cpp.yaml
@ -1,23 +0,0 @@
-backend: bert-embeddings
-embeddings: true
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: bert-cpp-minilm-v6
-
-parameters:
-  model: bert-MiniLM-L6-v2q4_0.bin
-
-download_files:
- filename: "bert-MiniLM-L6-v2q4_0.bin"
-  sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad"
-  uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin"
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-      "input": "Your text string goes here",
-      "model": "bert-cpp-minilm-v6"
-    }'