feat(grpc): backend SPI pluggable in embedding mode (#1621)

* run server

* grpc backend embedded support

* backend providable
This commit is contained in:
coyzeng 2024-01-23 15:56:36 +08:00 committed by GitHub
parent efe2883c5d
commit d5d82ba344
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 196 additions and 20 deletions

View file

@ -166,7 +166,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
}
}
func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (*grpc.Client, error) {
func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (grpc.Backend, error) {
if parallel {
return addr.GRPC(parallel, ml.wd), nil
}
@ -177,7 +177,7 @@ func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (*grpc.C
return ml.grpcClients[string(addr)], nil
}
func (ml *ModelLoader) BackendLoader(opts ...Option) (client *grpc.Client, err error) {
func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err error) {
o := NewOptions(opts...)
if o.model != "" {
@ -220,7 +220,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client *grpc.Client, err e
return ml.resolveAddress(addr, o.parallelRequests)
}
func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
o := NewOptions(opts...)
ml.mu.Lock()

View file

@ -59,7 +59,7 @@ type ModelLoader struct {
ModelPath string
mu sync.Mutex
// TODO: this needs generics
grpcClients map[string]*grpc.Client
grpcClients map[string]grpc.Backend
models map[string]ModelAddress
grpcProcesses map[string]*process.Process
templates map[TemplateType]map[string]*template.Template
@ -68,7 +68,7 @@ type ModelLoader struct {
type ModelAddress string
func (m ModelAddress) GRPC(parallel bool, wd *WatchDog) *grpc.Client {
func (m ModelAddress) GRPC(parallel bool, wd *WatchDog) grpc.Backend {
enableWD := false
if wd != nil {
enableWD = true
@ -79,7 +79,7 @@ func (m ModelAddress) GRPC(parallel bool, wd *WatchDog) *grpc.Client {
func NewModelLoader(modelPath string) *ModelLoader {
nml := &ModelLoader{
ModelPath: modelPath,
grpcClients: make(map[string]*grpc.Client),
grpcClients: make(map[string]grpc.Backend),
models: make(map[string]ModelAddress),
templates: make(map[TemplateType]map[string]*template.Template),
grpcProcesses: make(map[string]*process.Process),
@ -163,7 +163,7 @@ func (ml *ModelLoader) StopModel(modelName string) error {
}
func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress {
var client *grpc.Client
var client grpc.Backend
if m, ok := ml.models[s]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", s)
if c, ok := ml.grpcClients[s]; ok {