feat: move other backends to grpc

This finally makes everything more consistent

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2023-07-15 01:19:43 +02:00
parent 5dcfdbe51d
commit 1d0ed95a54
54 changed files with 3171 additions and 1712 deletions

View file

@ -4,18 +4,13 @@ import (
"context"
"fmt"
"os"
"os/signal"
"path/filepath"
"strings"
"syscall"
"time"
rwkv "github.com/donomii/go-rwkv.cpp"
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
"github.com/go-skynet/LocalAI/pkg/langchain"
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
"github.com/go-skynet/LocalAI/pkg/tts"
bloomz "github.com/go-skynet/bloomz.cpp"
bert "github.com/go-skynet/go-bert.cpp"
"github.com/hashicorp/go-multierror"
"github.com/hpcloud/tail"
"github.com/phayes/freeport"
@ -27,20 +22,22 @@ import (
const tokenizerSuffix = ".tokenizer.json"
const (
LlamaBackend = "llama"
BloomzBackend = "bloomz"
StarcoderBackend = "starcoder"
GPTJBackend = "gptj"
DollyBackend = "dolly"
MPTBackend = "mpt"
GPTNeoXBackend = "gptneox"
ReplitBackend = "replit"
Gpt2Backend = "gpt2"
Gpt4AllLlamaBackend = "gpt4all-llama"
Gpt4AllMptBackend = "gpt4all-mpt"
Gpt4AllJBackend = "gpt4all-j"
Gpt4All = "gpt4all"
FalconBackend = "falcon"
LlamaBackend = "llama"
BloomzBackend = "bloomz"
StarcoderBackend = "starcoder"
GPTJBackend = "gptj"
DollyBackend = "dolly"
MPTBackend = "mpt"
GPTNeoXBackend = "gptneox"
ReplitBackend = "replit"
Gpt2Backend = "gpt2"
Gpt4AllLlamaBackend = "gpt4all-llama"
Gpt4AllMptBackend = "gpt4all-mpt"
Gpt4AllJBackend = "gpt4all-j"
Gpt4All = "gpt4all"
FalconBackend = "falcon"
FalconGGMLBackend = "falcon-ggml"
BertEmbeddingsBackend = "bert-embeddings"
RwkvBackend = "rwkv"
WhisperBackend = "whisper"
@ -54,77 +51,39 @@ var autoLoadBackends []string = []string{
LlamaBackend,
Gpt4All,
RwkvBackend,
FalconBackend,
WhisperBackend,
BertEmbeddingsBackend,
GPTNeoXBackend,
BertEmbeddingsBackend,
FalconGGMLBackend,
GPTJBackend,
Gpt2Backend,
DollyBackend,
MPTBackend,
ReplitBackend,
StarcoderBackend,
FalconBackend,
BloomzBackend,
}
var bertEmbeddings = func(modelFile string) (interface{}, error) {
return bert.New(modelFile)
}
var bloomzLM = func(modelFile string) (interface{}, error) {
return bloomz.New(modelFile)
}
var stableDiffusion = func(assetDir string) (interface{}, error) {
return stablediffusion.New(assetDir)
}
func piperTTS(assetDir string) func(s string) (interface{}, error) {
return func(s string) (interface{}, error) {
return tts.New(assetDir)
}
}
var whisperModel = func(modelFile string) (interface{}, error) {
return whisper.New(modelFile)
}
var lcHuggingFace = func(repoId string) (interface{}, error) {
return langchain.NewHuggingFace(repoId)
}
// func llamaLM(opts ...llama.ModelOption) func(string) (interface{}, error) {
// return func(s string) (interface{}, error) {
// return llama.New(s, opts...)
// }
// }
// func gpt4allLM(opts ...gpt4all.ModelOption) func(string) (interface{}, error) {
// return func(s string) (interface{}, error) {
// return gpt4all.New(s, opts...)
// }
// }
func rwkvLM(tokenFile string, threads uint32) func(string) (interface{}, error) {
return func(s string) (interface{}, error) {
log.Debug().Msgf("Loading RWKV", s, tokenFile)
model := rwkv.LoadFiles(s, tokenFile, threads)
if model == nil {
return nil, fmt.Errorf("could not load model")
}
return model, nil
func (ml *ModelLoader) StopGRPC() {
for _, p := range ml.grpcProcesses {
p.Stop()
}
}
// starts the grpcModelProcess for the backend, and returns a grpc client
// It also loads the model
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (interface{}, error) {
return func(s string) (interface{}, error) {
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc.Client, error) {
return func(s string) (*grpc.Client, error) {
log.Debug().Msgf("Loading GRPC Model", backend, *o)
grpcProcess := filepath.Join(o.assetDir, "backend-assets", "grpc", backend)
// Check if the file exists
if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
return nil, fmt.Errorf("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS", grpcProcess)
}
// Make sure the process is executable
if err := os.Chmod(grpcProcess, 0755); err != nil {
return nil, err
@ -151,6 +110,14 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (inter
return nil, err
}
// clean up process
go func() {
c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
<-c
grpcControlProcess.Stop()
}()
go func() {
t, err := tail.TailFile(grpcControlProcess.StderrPath(), tail.Config{Follow: true})
if err != nil {
@ -200,7 +167,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (inter
log.Debug().Msgf("GRPC: Loading model with options: %+v", options)
res, err := client.LoadModel(context.TODO(), &options)
res, err := client.LoadModel(o.context, &options)
if err != nil {
return nil, err
}
@ -212,63 +179,37 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (inter
}
}
func (ml *ModelLoader) BackendLoader(opts ...Option) (model interface{}, err error) {
//backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32, assetDir string) (model interface{}, err error) {
func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err error) {
o := NewOptions(opts...)
log.Debug().Msgf("Loading model %s from %s", o.backendString, o.modelFile)
switch strings.ToLower(o.backendString) {
case LlamaBackend:
return ml.LoadModel(o.modelFile, ml.grpcModel(LlamaBackend, o))
case BloomzBackend:
return ml.LoadModel(o.modelFile, bloomzLM)
case GPTJBackend:
return ml.LoadModel(o.modelFile, ml.grpcModel(GPTJBackend, o))
case DollyBackend:
return ml.LoadModel(o.modelFile, ml.grpcModel(DollyBackend, o))
case MPTBackend:
return ml.LoadModel(o.modelFile, ml.grpcModel(MPTBackend, o))
case Gpt2Backend:
return ml.LoadModel(o.modelFile, ml.grpcModel(Gpt2Backend, o))
case FalconBackend:
return ml.LoadModel(o.modelFile, ml.grpcModel(FalconBackend, o))
case GPTNeoXBackend:
return ml.LoadModel(o.modelFile, ml.grpcModel(GPTNeoXBackend, o))
case ReplitBackend:
return ml.LoadModel(o.modelFile, ml.grpcModel(ReplitBackend, o))
case StableDiffusionBackend:
return ml.LoadModel(o.modelFile, stableDiffusion)
case PiperBackend:
return ml.LoadModel(o.modelFile, piperTTS(filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data")))
case StarcoderBackend:
return ml.LoadModel(o.modelFile, ml.grpcModel(StarcoderBackend, o))
backend := strings.ToLower(o.backendString)
switch backend {
case LlamaBackend, GPTJBackend, DollyBackend,
MPTBackend, Gpt2Backend, FalconBackend,
GPTNeoXBackend, ReplitBackend, StarcoderBackend, BloomzBackend,
RwkvBackend, LCHuggingFaceBackend, BertEmbeddingsBackend, FalconGGMLBackend, StableDiffusionBackend, WhisperBackend:
return ml.LoadModel(o.modelFile, ml.grpcModel(backend, o))
case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All:
o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "gpt4all")
return ml.LoadModel(o.modelFile, ml.grpcModel(Gpt4All, o))
// return ml.LoadModel(o.modelFile, gpt4allLM(gpt4all.SetThreads(int(o.threads)), gpt4all.SetLibrarySearchPath(filepath.Join(o.assetDir, "backend-assets", "gpt4all"))))
case BertEmbeddingsBackend:
return ml.LoadModel(o.modelFile, bertEmbeddings)
case RwkvBackend:
return ml.LoadModel(o.modelFile, rwkvLM(filepath.Join(ml.ModelPath, o.modelFile+tokenizerSuffix), o.threads))
case WhisperBackend:
return ml.LoadModel(o.modelFile, whisperModel)
case LCHuggingFaceBackend:
return ml.LoadModel(o.modelFile, lcHuggingFace)
case PiperBackend:
o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data")
return ml.LoadModel(o.modelFile, ml.grpcModel(PiperBackend, o))
default:
return nil, fmt.Errorf("backend unsupported: %s", o.backendString)
}
}
func (ml *ModelLoader) GreedyLoader(opts ...Option) (interface{}, error) {
func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
o := NewOptions(opts...)
log.Debug().Msgf("Loading model '%s' greedly", o.modelFile)
// Is this really needed? BackendLoader already does this
ml.mu.Lock()
m, exists := ml.models[o.modelFile]
if exists {
if m := ml.checkIsLoaded(o.modelFile); m != nil {
log.Debug().Msgf("Model '%s' already loaded", o.modelFile)
ml.mu.Unlock()
return m, nil
@ -285,7 +226,7 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (interface{}, error) {
model, modelerr := ml.BackendLoader(
WithBackendString(b),
WithModelFile(o.modelFile),
WithLoadGRPCOpts(o.gRPCOptions),
WithLoadGRPCLLMModelOpts(o.gRPCOptions),
WithThreads(o.threads),
WithAssetDir(o.assetDir),
)

View file

@ -2,6 +2,7 @@ package model
import (
"bytes"
"context"
"fmt"
"io/ioutil"
"os"
@ -10,6 +11,7 @@ import (
"sync"
"text/template"
"github.com/go-skynet/LocalAI/pkg/grpc"
process "github.com/mudler/go-processmanager"
"github.com/rs/zerolog/log"
)
@ -18,7 +20,7 @@ type ModelLoader struct {
ModelPath string
mu sync.Mutex
// TODO: this needs generics
models map[string]interface{}
models map[string]*grpc.Client
grpcProcesses map[string]*process.Process
promptsTemplates map[string]*template.Template
}
@ -26,7 +28,7 @@ type ModelLoader struct {
func NewModelLoader(modelPath string) *ModelLoader {
return &ModelLoader{
ModelPath: modelPath,
models: make(map[string]interface{}),
models: make(map[string]*grpc.Client),
promptsTemplates: make(map[string]*template.Template),
grpcProcesses: make(map[string]*process.Process),
}
@ -113,14 +115,14 @@ func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
return nil
}
func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (interface{}, error)) (interface{}, error) {
func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (*grpc.Client, error)) (*grpc.Client, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
// Check if we already have a loaded model
if m, ok := ml.models[modelName]; ok {
if model := ml.checkIsLoaded(modelName); model != nil {
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
return m, nil
return model, nil
}
// Load the model and keep it in memory for later use
@ -140,3 +142,25 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (interfac
ml.models[modelName] = model
return model, nil
}
func (ml *ModelLoader) checkIsLoaded(s string) *grpc.Client {
if m, ok := ml.models[s]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", s)
if !m.HealthCheck(context.Background()) {
log.Debug().Msgf("GRPC Model not responding", s)
if !ml.grpcProcesses[s].IsAlive() {
log.Debug().Msgf("GRPC Process is not responding", s)
// stop and delete the process, this forces to re-load the model and re-create again the service
ml.grpcProcesses[s].Stop()
delete(ml.grpcProcesses, s)
delete(ml.models, s)
return nil
}
}
return m
}
return nil
}

View file

@ -1,6 +1,8 @@
package model
import (
"context"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
)
@ -9,6 +11,7 @@ type Options struct {
modelFile string
threads uint32
assetDir string
context context.Context
gRPCOptions *pb.ModelOptions
}
@ -27,7 +30,7 @@ func WithModelFile(modelFile string) Option {
}
}
func WithLoadGRPCOpts(opts *pb.ModelOptions) Option {
func WithLoadGRPCLLMModelOpts(opts *pb.ModelOptions) Option {
return func(o *Options) {
o.gRPCOptions = opts
}
@ -45,8 +48,17 @@ func WithAssetDir(assetDir string) Option {
}
}
func WithContext(ctx context.Context) Option {
return func(o *Options) {
o.context = ctx
}
}
func NewOptions(opts ...Option) *Options {
o := &Options{}
o := &Options{
gRPCOptions: &pb.ModelOptions{},
context: context.Background(),
}
for _, opt := range opts {
opt(o)
}