feat: queue up requests if not running parallel requests (#1296)

Return a GRPC which handles a lock in case it is not meant to be parallel. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 10:35:01 +00:00 · 2023-11-16 22:20:16 +01:00 · 2023-11-16 22:20:16 +01:00 · 548959b50f
commit 548959b50f
parent 2addb9f99a
5 changed files with 64 additions and 16 deletions
--- a/pkg/model/process.go
+++ b/pkg/model/process.go
@ -17,7 +17,7 @@ import (
 func (ml *ModelLoader) StopAllExcept(s string) {
 	ml.StopGRPC(func(id string, p *process.Process) bool {
 		if id != s {
-			for ml.models[id].GRPC().IsBusy() {
+			for ml.models[id].GRPC(false).IsBusy() {
 				log.Debug().Msgf("%s busy. Waiting.", id)
 				time.Sleep(2 * time.Second)
 			}