feat(loader): enhance single active backend by treating as singleton (#5107)

feat(loader): enhance single active backend by treating at singleton

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-04-01 20:58:11 +02:00 committed by GitHub
parent c59975ab05
commit 2c425e9c69
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 92 additions and 71 deletions

View file

@ -509,7 +509,23 @@ func (ml *ModelLoader) stopActiveBackends(modelID string, singleActiveBackend bo
}
}
func (ml *ModelLoader) Close() {
if !ml.singletonMode {
return
}
ml.singletonLock.Unlock()
}
func (ml *ModelLoader) lockBackend() {
if !ml.singletonMode {
return
}
ml.singletonLock.Lock()
}
func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
ml.lockBackend() // grab the singleton lock if needed
o := NewOptions(opts...)
// Return earlier if we have a model already loaded
@ -520,7 +536,7 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
return m.GRPC(o.parallelRequests, ml.wd), nil
}
ml.stopActiveBackends(o.modelID, o.singleActiveBackend)
ml.stopActiveBackends(o.modelID, ml.singletonMode)
// if a backend is defined, return the loader directly
if o.backendString != "" {
@ -533,6 +549,7 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
// get backends embedded in the binary
autoLoadBackends, err := ml.ListAvailableBackends(o.assetDir)
if err != nil {
ml.Close() // we failed, release the lock
return nil, err
}
@ -564,5 +581,7 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
}
}
ml.Close() // make sure to release the lock in case of failure
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
}

View file

@ -18,16 +18,19 @@ import (
// TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we seperate directories for .bin/.yaml and .tmpl
type ModelLoader struct {
ModelPath string
mu sync.Mutex
models map[string]*Model
wd *WatchDog
ModelPath string
mu sync.Mutex
singletonLock sync.Mutex
singletonMode bool
models map[string]*Model
wd *WatchDog
}
func NewModelLoader(modelPath string) *ModelLoader {
func NewModelLoader(modelPath string, singleActiveBackend bool) *ModelLoader {
nml := &ModelLoader{
ModelPath: modelPath,
models: make(map[string]*Model),
ModelPath: modelPath,
models: make(map[string]*Model),
singletonMode: singleActiveBackend,
}
return nml

View file

@ -17,10 +17,9 @@ type Options struct {
externalBackends map[string]string
grpcAttempts int
grpcAttemptsDelay int
singleActiveBackend bool
parallelRequests bool
grpcAttempts int
grpcAttemptsDelay int
parallelRequests bool
}
type Option func(*Options)
@ -88,12 +87,6 @@ func WithContext(ctx context.Context) Option {
}
}
func WithSingleActiveBackend() Option {
return func(o *Options) {
o.singleActiveBackend = true
}
}
func WithModelID(id string) Option {
return func(o *Options) {
o.modelID = id

View file

@ -21,7 +21,7 @@ var _ = Describe("ModelLoader", func() {
// Setup the model loader with a test directory
modelPath = "/tmp/test_model_path"
os.Mkdir(modelPath, 0755)
modelLoader = model.NewModelLoader(modelPath)
modelLoader = model.NewModelLoader(modelPath, false)
})
AfterEach(func() {