fix: do lock when cycling to models to delete

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-26 12:45:01 +00:00 · 2025-03-31 22:42:15 +02:00 · 2025-03-31 22:42:15 +02:00 · ce703bf6b6
commit ce703bf6b6
parent 04aafc4173
3 changed files with 33 additions and 29 deletions
--- a/.env
+++ b/.env
@ -29,6 +29,9 @@
 ## Enable/Disable single backend (useful if only one GPU is available)
 # LOCALAI_SINGLE_ACTIVE_BACKEND=true
 # Forces shutdown of the backends if busy (only if LOCALAI_SINGLE_ACTIVE_BACKEND is set)
 # LOCALAI_FORCE_BACKEND_SHUTDOWN=true
 ## Specify a build type. Available: cublas, openblas, clblas.
 ## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
 ## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@ -142,26 +142,6 @@ func (ml *ModelLoader) LoadModel(modelID, modelName string, loader func(string,
 func (ml *ModelLoader) ShutdownModel(modelName string) error {
 	ml.mu.Lock()
 	defer ml.mu.Unlock()
 	model, ok := ml.models[modelName]
 	if !ok {
 		return fmt.Errorf("model %s not found", modelName)
 	}
 	retries := 1
 	for model.GRPC(false, ml.wd).IsBusy() {
 		log.Debug().Msgf("%s busy. Waiting.", modelName)
 		dur := time.Duration(retries*2) * time.Second
 		if dur > retryTimeout {
 			dur = retryTimeout
 		}
 		time.Sleep(dur)
 		retries++
 		if retries > 10 && os.Getenv("LOCALAI_FORCE_BACKEND_SHUTDOWN") == "true" {
 			log.Warn().Msgf("Model %s is still busy after %d retries. Forcing shutdown.", modelName, retries)
 			break
 		}
 	}
 	return ml.deleteProcess(modelName)
 }
--- a/pkg/model/process.go
+++ b/pkg/model/process.go
@ -9,25 +9,43 @@ import (
 	"strconv"
 	"strings"
 	"syscall"
 	"time"
 	"github.com/hpcloud/tail"
 	process "github.com/mudler/go-processmanager"
 	"github.com/rs/zerolog/log"
 )
 var forceBackendShutdown bool = os.Getenv("LOCALAI_FORCE_BACKEND_SHUTDOWN") == "true"
 func (ml *ModelLoader) deleteProcess(s string) error {
 	model, ok := ml.models[s]
 	if !ok {
 		log.Debug().Msgf("Model %s not found", s)
 		return fmt.Errorf("model %s not found", s)
 	}
 	defer delete(ml.models, s)
 	retries := 1
 	for model.GRPC(false, ml.wd).IsBusy() {
 		log.Debug().Msgf("%s busy. Waiting.", s)
 		dur := time.Duration(retries*2) * time.Second
 		if dur > retryTimeout {
 			dur = retryTimeout
 		}
 		time.Sleep(dur)
 		retries++
 		if retries > 10 && forceBackendShutdown {
 			log.Warn().Msgf("Model %s is still busy after %d retries. Forcing shutdown.", s, retries)
 			break
 		}
 	}
 	log.Debug().Msgf("Deleting process %s", s)
-	m, exists := ml.models[s]
+	process := model.Process()
 	if !exists {
 		log.Error().Msgf("Model does not exist %s", s)
 		// Nothing to do
 		return nil
 	}
 	process := m.Process()
 	if process == nil {
 		log.Error().Msgf("No process for %s", s)
 		// Nothing to do as there is no process
@ -44,9 +62,12 @@ func (ml *ModelLoader) deleteProcess(s string) error {
 func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error {
 	var err error = nil
 	ml.mu.Lock()
 	defer ml.mu.Unlock()
 	for k, m := range ml.models {
 		if filter(k, m.Process()) {
-			e := ml.ShutdownModel(k)
+			e := ml.deleteProcess(k)
 			err = errors.Join(err, e)
 		}
 	}