feat(backends): Drop bert.cpp (#4272)

* feat(backends): Drop bert.cpp

use llama.cpp 3.2 as a drop-in replacement for bert.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): make test more robust

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2024-11-27 16:34:28 +01:00 committed by GitHub
parent 1688ba7f2a
commit 3c3050f68e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 40 additions and 184 deletions

View file

@ -1,12 +0,0 @@
---
name: "bert-embeddings"
config_file: |
parameters:
model: bert-MiniLM-L6-v2q4_0.bin
backend: bert-embeddings
embeddings: true
files:
- filename: "bert-MiniLM-L6-v2q4_0.bin"
sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad"
uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin"

View file

@ -380,6 +380,7 @@
urls:
- https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF
overrides:
embeddings: true
parameters:
model: llama-3.2-1b-instruct-q4_k_m.gguf
files:
@ -8732,16 +8733,13 @@
- filename: "ggml-model-whisper-tiny.en-q8_0.bin"
uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin"
sha256: 5bc2b3860aa151a4c6e7bb095e1fcce7cf12c7b020ca08dcec0c6d018bb7dd94
## Bert embeddings
- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master"
## Bert embeddings (llama3.2 drop-in)
- !!merge <<: *llama32
name: "bert-embeddings"
license: "Apache 2.0"
urls:
- https://huggingface.co/skeskinen/ggml
description: |
llama3.2 embeddings model. Using as drop-in replacement for bert-embeddings
tags:
- embeddings
description: |
Bert model that can be used for embeddings
## Stable Diffusion
- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master
license: "BSD-3"