mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-22 19:44:59 +00:00
feat(grpc): backend SPI pluggable in embedding mode (#1621)
* run server * grpc backend embedded support * backend providable
This commit is contained in:
parent
efe2883c5d
commit
d5d82ba344
8 changed files with 196 additions and 20 deletions
|
@ -166,7 +166,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
|||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (*grpc.Client, error) {
|
||||
func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (grpc.Backend, error) {
|
||||
if parallel {
|
||||
return addr.GRPC(parallel, ml.wd), nil
|
||||
}
|
||||
|
@ -177,7 +177,7 @@ func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (*grpc.C
|
|||
return ml.grpcClients[string(addr)], nil
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) BackendLoader(opts ...Option) (client *grpc.Client, err error) {
|
||||
func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err error) {
|
||||
o := NewOptions(opts...)
|
||||
|
||||
if o.model != "" {
|
||||
|
@ -220,7 +220,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client *grpc.Client, err e
|
|||
return ml.resolveAddress(addr, o.parallelRequests)
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
|
||||
func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
|
||||
o := NewOptions(opts...)
|
||||
|
||||
ml.mu.Lock()
|
||||
|
|
|
@ -59,7 +59,7 @@ type ModelLoader struct {
|
|||
ModelPath string
|
||||
mu sync.Mutex
|
||||
// TODO: this needs generics
|
||||
grpcClients map[string]*grpc.Client
|
||||
grpcClients map[string]grpc.Backend
|
||||
models map[string]ModelAddress
|
||||
grpcProcesses map[string]*process.Process
|
||||
templates map[TemplateType]map[string]*template.Template
|
||||
|
@ -68,7 +68,7 @@ type ModelLoader struct {
|
|||
|
||||
type ModelAddress string
|
||||
|
||||
func (m ModelAddress) GRPC(parallel bool, wd *WatchDog) *grpc.Client {
|
||||
func (m ModelAddress) GRPC(parallel bool, wd *WatchDog) grpc.Backend {
|
||||
enableWD := false
|
||||
if wd != nil {
|
||||
enableWD = true
|
||||
|
@ -79,7 +79,7 @@ func (m ModelAddress) GRPC(parallel bool, wd *WatchDog) *grpc.Client {
|
|||
func NewModelLoader(modelPath string) *ModelLoader {
|
||||
nml := &ModelLoader{
|
||||
ModelPath: modelPath,
|
||||
grpcClients: make(map[string]*grpc.Client),
|
||||
grpcClients: make(map[string]grpc.Backend),
|
||||
models: make(map[string]ModelAddress),
|
||||
templates: make(map[TemplateType]map[string]*template.Template),
|
||||
grpcProcesses: make(map[string]*process.Process),
|
||||
|
@ -163,7 +163,7 @@ func (ml *ModelLoader) StopModel(modelName string) error {
|
|||
}
|
||||
|
||||
func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress {
|
||||
var client *grpc.Client
|
||||
var client grpc.Backend
|
||||
if m, ok := ml.models[s]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", s)
|
||||
if c, ok := ml.grpcClients[s]; ok {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue