feat: queue up requests if not running parallel requests (#1296)

Return a GRPC which handles a lock in case it is not meant to be parallel. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 10:35:01 +00:00 · 2023-11-16 22:20:16 +01:00 · 2023-11-16 22:20:16 +01:00 · 548959b50f
commit 548959b50f
parent 2addb9f99a
5 changed files with 64 additions and 16 deletions
--- a/api/localai/backend_monitor.go
+++ b/api/localai/backend_monitor.go
@ -123,13 +123,12 @@ func BackendMonitorEndpoint(bm BackendMonitor) func(c *fiber.Ctx) error {
 			return err
 		}

-		client := bm.options.Loader.CheckIsLoaded(backendId)
-
-		if client == "" {
+		model := bm.options.Loader.CheckIsLoaded(backendId)
+		if model == "" {
 			return fmt.Errorf("backend %s is not currently loaded", backendId)
 		}

-		status, rpcErr := client.GRPC().Status(context.TODO())
+		status, rpcErr := model.GRPC(false).Status(context.TODO())
 		if rpcErr != nil {
 			log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error())
 			val, slbErr := bm.SampleLocalBackendProcess(backendId)