mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-25 21:15:00 +00:00
feat: add bert.cpp embeddings (#222)
This commit is contained in:
parent
e6db14e2f1
commit
f8ee20991c
14 changed files with 104 additions and 53 deletions
|
@ -14,6 +14,7 @@ import (
|
|||
"github.com/rs/zerolog/log"
|
||||
|
||||
rwkv "github.com/donomii/go-rwkv.cpp"
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
|
@ -22,13 +23,15 @@ import (
|
|||
type ModelLoader struct {
|
||||
ModelPath string
|
||||
mu sync.Mutex
|
||||
|
||||
// TODO: this needs generics
|
||||
models map[string]*llama.LLama
|
||||
gptmodels map[string]*gptj.GPTJ
|
||||
gpt2models map[string]*gpt2.GPT2
|
||||
gptstablelmmodels map[string]*gpt2.StableLM
|
||||
rwkv map[string]*rwkv.RwkvState
|
||||
promptsTemplates map[string]*template.Template
|
||||
bert map[string]*bert.Bert
|
||||
|
||||
promptsTemplates map[string]*template.Template
|
||||
}
|
||||
|
||||
func NewModelLoader(modelPath string) *ModelLoader {
|
||||
|
@ -39,6 +42,7 @@ func NewModelLoader(modelPath string) *ModelLoader {
|
|||
gptstablelmmodels: make(map[string]*gpt2.StableLM),
|
||||
models: make(map[string]*llama.LLama),
|
||||
rwkv: make(map[string]*rwkv.RwkvState),
|
||||
bert: make(map[string]*bert.Bert),
|
||||
promptsTemplates: make(map[string]*template.Template),
|
||||
}
|
||||
}
|
||||
|
@ -156,6 +160,38 @@ func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, erro
|
|||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadBERT(modelName string) (*bert.Bert, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.bert[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := bert.New(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.bert[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
@ -299,6 +335,8 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla
|
|||
return ml.LoadGPT2Model(modelFile)
|
||||
case "gptj":
|
||||
return ml.LoadGPTJModel(modelFile)
|
||||
case "bert-embeddings":
|
||||
return ml.LoadBERT(modelFile)
|
||||
case "rwkv":
|
||||
return ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
|
||||
default:
|
||||
|
@ -361,5 +399,13 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt
|
|||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = ml.LoadBERT(modelFile)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue