feat: llama.cpp gRPC C++ backend (#1170)

* wip: llama.cpp c++ gRPC server

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* make it work, attach it to the build process

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* update deps

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: add protobuf dep

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* try fix protobuf on cmake

* cmake: workarounds

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* add packages

* cmake: use fixed version of grpc

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* cmake(grpc): install locally

* install grpc

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* install required deps for grpc on debian bullseye

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* debug

* debug

* Fixups

* no need to install cmake manually

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* ci: fixup macOS

* use brew whenever possible

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* macOS fixups

* debug

* fix container build

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* workaround

* try mac

https://stackoverflow.com/questions/23905661/on-mac-g-clang-fails-to-search-usr-local-include-and-usr-local-lib-by-def

* Disable temp. arm64 docker image builds

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2023-10-16 21:46:29 +02:00 committed by GitHub
parent 8034ed3473
commit 128694213f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 1145 additions and 16 deletions

View file

@ -17,6 +17,7 @@ import (
const (
LlamaBackend = "llama"
LlamaStableBackend = "llama-stable"
LLamaCPP = "llama-cpp"
BloomzBackend = "bloomz"
StarcoderBackend = "starcoder"
GPTJBackend = "gptj"
@ -41,8 +42,9 @@ const (
)
var AutoLoadBackends []string = []string{
LlamaBackend,
LLamaCPP,
LlamaStableBackend,
LlamaBackend,
Gpt4All,
FalconBackend,
GPTNeoXBackend,
@ -175,11 +177,6 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err er
}
switch backend {
case LlamaBackend, LlamaStableBackend, GPTJBackend, DollyBackend,
MPTBackend, Gpt2Backend, FalconBackend,
GPTNeoXBackend, ReplitBackend, StarcoderBackend, BloomzBackend,
RwkvBackend, LCHuggingFaceBackend, BertEmbeddingsBackend, FalconGGMLBackend, StableDiffusionBackend, WhisperBackend:
return ml.LoadModel(o.model, ml.grpcModel(backend, o))
case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All:
o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "gpt4all")
return ml.LoadModel(o.model, ml.grpcModel(Gpt4All, o))
@ -187,7 +184,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err er
o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data")
return ml.LoadModel(o.model, ml.grpcModel(PiperBackend, o))
default:
return nil, fmt.Errorf("backend unsupported: %s", o.backendString)
return ml.LoadModel(o.model, ml.grpcModel(backend, o))
}
}