mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
fix: race during stop of active backends (#5106)
* chore: drop double call to stop all backends, refactors Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: do lock when cycling to models to delete Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
2f9203cd2a
commit
05f7004487
4 changed files with 45 additions and 37 deletions
3
.env
3
.env
|
@ -29,6 +29,9 @@
|
||||||
## Enable/Disable single backend (useful if only one GPU is available)
|
## Enable/Disable single backend (useful if only one GPU is available)
|
||||||
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
||||||
|
|
||||||
|
# Forces shutdown of the backends if busy (only if LOCALAI_SINGLE_ACTIVE_BACKEND is set)
|
||||||
|
# LOCALAI_FORCE_BACKEND_SHUTDOWN=true
|
||||||
|
|
||||||
## Specify a build type. Available: cublas, openblas, clblas.
|
## Specify a build type. Available: cublas, openblas, clblas.
|
||||||
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
||||||
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
|
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
|
||||||
|
|
|
@ -473,8 +473,6 @@ func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err e
|
||||||
backend = realBackend
|
backend = realBackend
|
||||||
}
|
}
|
||||||
|
|
||||||
ml.stopActiveBackends(o.modelID, o.singleActiveBackend)
|
|
||||||
|
|
||||||
var backendToConsume string
|
var backendToConsume string
|
||||||
|
|
||||||
switch backend {
|
switch backend {
|
||||||
|
@ -497,13 +495,17 @@ func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err e
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ml *ModelLoader) stopActiveBackends(modelID string, singleActiveBackend bool) {
|
func (ml *ModelLoader) stopActiveBackends(modelID string, singleActiveBackend bool) {
|
||||||
|
if !singleActiveBackend {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// If we can have only one backend active, kill all the others (except external backends)
|
// If we can have only one backend active, kill all the others (except external backends)
|
||||||
if singleActiveBackend {
|
|
||||||
log.Debug().Msgf("Stopping all backends except '%s'", modelID)
|
// Stop all backends except the one we are going to load
|
||||||
err := ml.StopGRPC(allExcept(modelID))
|
log.Debug().Msgf("Stopping all backends except '%s'", modelID)
|
||||||
if err != nil {
|
err := ml.StopGRPC(allExcept(modelID))
|
||||||
log.Error().Err(err).Str("keptModel", modelID).Msg("error while shutting down all backends except for the keptModel - greedyloader continuing")
|
if err != nil {
|
||||||
}
|
log.Error().Err(err).Str("keptModel", modelID).Msg("error while shutting down all backends except for the keptModel - greedyloader continuing")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -520,10 +522,12 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
|
||||||
|
|
||||||
ml.stopActiveBackends(o.modelID, o.singleActiveBackend)
|
ml.stopActiveBackends(o.modelID, o.singleActiveBackend)
|
||||||
|
|
||||||
|
// if a backend is defined, return the loader directly
|
||||||
if o.backendString != "" {
|
if o.backendString != "" {
|
||||||
return ml.backendLoader(opts...)
|
return ml.backendLoader(opts...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Otherwise scan for backends in the asset directory
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
// get backends embedded in the binary
|
// get backends embedded in the binary
|
||||||
|
|
|
@ -142,26 +142,6 @@ func (ml *ModelLoader) LoadModel(modelID, modelName string, loader func(string,
|
||||||
func (ml *ModelLoader) ShutdownModel(modelName string) error {
|
func (ml *ModelLoader) ShutdownModel(modelName string) error {
|
||||||
ml.mu.Lock()
|
ml.mu.Lock()
|
||||||
defer ml.mu.Unlock()
|
defer ml.mu.Unlock()
|
||||||
model, ok := ml.models[modelName]
|
|
||||||
if !ok {
|
|
||||||
return fmt.Errorf("model %s not found", modelName)
|
|
||||||
}
|
|
||||||
|
|
||||||
retries := 1
|
|
||||||
for model.GRPC(false, ml.wd).IsBusy() {
|
|
||||||
log.Debug().Msgf("%s busy. Waiting.", modelName)
|
|
||||||
dur := time.Duration(retries*2) * time.Second
|
|
||||||
if dur > retryTimeout {
|
|
||||||
dur = retryTimeout
|
|
||||||
}
|
|
||||||
time.Sleep(dur)
|
|
||||||
retries++
|
|
||||||
|
|
||||||
if retries > 10 && os.Getenv("LOCALAI_FORCE_BACKEND_SHUTDOWN") == "true" {
|
|
||||||
log.Warn().Msgf("Model %s is still busy after %d retries. Forcing shutdown.", modelName, retries)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return ml.deleteProcess(modelName)
|
return ml.deleteProcess(modelName)
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,25 +9,43 @@ import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/hpcloud/tail"
|
"github.com/hpcloud/tail"
|
||||||
process "github.com/mudler/go-processmanager"
|
process "github.com/mudler/go-processmanager"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var forceBackendShutdown bool = os.Getenv("LOCALAI_FORCE_BACKEND_SHUTDOWN") == "true"
|
||||||
|
|
||||||
func (ml *ModelLoader) deleteProcess(s string) error {
|
func (ml *ModelLoader) deleteProcess(s string) error {
|
||||||
|
model, ok := ml.models[s]
|
||||||
|
if !ok {
|
||||||
|
log.Debug().Msgf("Model %s not found", s)
|
||||||
|
return fmt.Errorf("model %s not found", s)
|
||||||
|
}
|
||||||
|
|
||||||
defer delete(ml.models, s)
|
defer delete(ml.models, s)
|
||||||
|
|
||||||
|
retries := 1
|
||||||
|
for model.GRPC(false, ml.wd).IsBusy() {
|
||||||
|
log.Debug().Msgf("%s busy. Waiting.", s)
|
||||||
|
dur := time.Duration(retries*2) * time.Second
|
||||||
|
if dur > retryTimeout {
|
||||||
|
dur = retryTimeout
|
||||||
|
}
|
||||||
|
time.Sleep(dur)
|
||||||
|
retries++
|
||||||
|
|
||||||
|
if retries > 10 && forceBackendShutdown {
|
||||||
|
log.Warn().Msgf("Model %s is still busy after %d retries. Forcing shutdown.", s, retries)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Deleting process %s", s)
|
log.Debug().Msgf("Deleting process %s", s)
|
||||||
|
|
||||||
m, exists := ml.models[s]
|
process := model.Process()
|
||||||
if !exists {
|
|
||||||
log.Error().Msgf("Model does not exist %s", s)
|
|
||||||
// Nothing to do
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
process := m.Process()
|
|
||||||
if process == nil {
|
if process == nil {
|
||||||
log.Error().Msgf("No process for %s", s)
|
log.Error().Msgf("No process for %s", s)
|
||||||
// Nothing to do as there is no process
|
// Nothing to do as there is no process
|
||||||
|
@ -44,9 +62,12 @@ func (ml *ModelLoader) deleteProcess(s string) error {
|
||||||
|
|
||||||
func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error {
|
func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error {
|
||||||
var err error = nil
|
var err error = nil
|
||||||
|
ml.mu.Lock()
|
||||||
|
defer ml.mu.Unlock()
|
||||||
|
|
||||||
for k, m := range ml.models {
|
for k, m := range ml.models {
|
||||||
if filter(k, m.Process()) {
|
if filter(k, m.Process()) {
|
||||||
e := ml.ShutdownModel(k)
|
e := ml.deleteProcess(k)
|
||||||
err = errors.Join(err, e)
|
err = errors.Join(err, e)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue