mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
feat(loader): enhance single active backend by treating as singleton (#5107)
feat(loader): enhance single active backend by treating at singleton Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
c59975ab05
commit
2c425e9c69
24 changed files with 92 additions and 71 deletions
|
@ -509,7 +509,23 @@ func (ml *ModelLoader) stopActiveBackends(modelID string, singleActiveBackend bo
|
|||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) Close() {
|
||||
if !ml.singletonMode {
|
||||
return
|
||||
}
|
||||
ml.singletonLock.Unlock()
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) lockBackend() {
|
||||
if !ml.singletonMode {
|
||||
return
|
||||
}
|
||||
ml.singletonLock.Lock()
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
|
||||
ml.lockBackend() // grab the singleton lock if needed
|
||||
|
||||
o := NewOptions(opts...)
|
||||
|
||||
// Return earlier if we have a model already loaded
|
||||
|
@ -520,7 +536,7 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
|
|||
return m.GRPC(o.parallelRequests, ml.wd), nil
|
||||
}
|
||||
|
||||
ml.stopActiveBackends(o.modelID, o.singleActiveBackend)
|
||||
ml.stopActiveBackends(o.modelID, ml.singletonMode)
|
||||
|
||||
// if a backend is defined, return the loader directly
|
||||
if o.backendString != "" {
|
||||
|
@ -533,6 +549,7 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
|
|||
// get backends embedded in the binary
|
||||
autoLoadBackends, err := ml.ListAvailableBackends(o.assetDir)
|
||||
if err != nil {
|
||||
ml.Close() // we failed, release the lock
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
@ -564,5 +581,7 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
|
|||
}
|
||||
}
|
||||
|
||||
ml.Close() // make sure to release the lock in case of failure
|
||||
|
||||
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
|
||||
}
|
||||
|
|
|
@ -18,16 +18,19 @@ import (
|
|||
|
||||
// TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we seperate directories for .bin/.yaml and .tmpl
|
||||
type ModelLoader struct {
|
||||
ModelPath string
|
||||
mu sync.Mutex
|
||||
models map[string]*Model
|
||||
wd *WatchDog
|
||||
ModelPath string
|
||||
mu sync.Mutex
|
||||
singletonLock sync.Mutex
|
||||
singletonMode bool
|
||||
models map[string]*Model
|
||||
wd *WatchDog
|
||||
}
|
||||
|
||||
func NewModelLoader(modelPath string) *ModelLoader {
|
||||
func NewModelLoader(modelPath string, singleActiveBackend bool) *ModelLoader {
|
||||
nml := &ModelLoader{
|
||||
ModelPath: modelPath,
|
||||
models: make(map[string]*Model),
|
||||
ModelPath: modelPath,
|
||||
models: make(map[string]*Model),
|
||||
singletonMode: singleActiveBackend,
|
||||
}
|
||||
|
||||
return nml
|
||||
|
|
|
@ -17,10 +17,9 @@ type Options struct {
|
|||
|
||||
externalBackends map[string]string
|
||||
|
||||
grpcAttempts int
|
||||
grpcAttemptsDelay int
|
||||
singleActiveBackend bool
|
||||
parallelRequests bool
|
||||
grpcAttempts int
|
||||
grpcAttemptsDelay int
|
||||
parallelRequests bool
|
||||
}
|
||||
|
||||
type Option func(*Options)
|
||||
|
@ -88,12 +87,6 @@ func WithContext(ctx context.Context) Option {
|
|||
}
|
||||
}
|
||||
|
||||
func WithSingleActiveBackend() Option {
|
||||
return func(o *Options) {
|
||||
o.singleActiveBackend = true
|
||||
}
|
||||
}
|
||||
|
||||
func WithModelID(id string) Option {
|
||||
return func(o *Options) {
|
||||
o.modelID = id
|
||||
|
|
|
@ -21,7 +21,7 @@ var _ = Describe("ModelLoader", func() {
|
|||
// Setup the model loader with a test directory
|
||||
modelPath = "/tmp/test_model_path"
|
||||
os.Mkdir(modelPath, 0755)
|
||||
modelLoader = model.NewModelLoader(modelPath)
|
||||
modelLoader = model.NewModelLoader(modelPath, false)
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue