feat: allow to run parallel requests (#1290)

* feat: allow to run parallel requests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixup Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 02:24:59 +00:00 · 2023-11-16 08:20:05 +01:00 · 2023-11-16 08:20:05 +01:00 · fdd95d1d86
commit fdd95d1d86
parent 66a558ff41
9 changed files with 91 additions and 44 deletions
--- a/main.go
+++ b/main.go
@ -16,9 +16,9 @@ import (
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/internal"
+	"github.com/go-skynet/LocalAI/metrics"
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	model "github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/metrics"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 	progressbar "github.com/schollz/progressbar/v3"
@ -63,6 +63,11 @@ func main() {
 				EnvVars: []string{"SINGLE_ACTIVE_BACKEND"},
 				Usage:   "Allow only one backend to be running.",
 			},
+			&cli.BoolFlag{
+				Name:    "parallel-requests",
+				EnvVars: []string{"PARALLEL_REQUESTS"},
+				Usage:   "Enable backends to handle multiple requests in parallel. This is for backends that supports multiple requests in parallel, like llama.cpp or vllm",
+			},
 			&cli.BoolFlag{
 				Name:    "cors",
 				EnvVars: []string{"CORS"},
@ -193,7 +198,9 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
 				options.WithUploadLimitMB(ctx.Int("upload-limit")),
 				options.WithApiKeys(ctx.StringSlice("api-keys")),
 			}
-
+			if ctx.Bool("parallel-requests") {
+				opts = append(opts, options.EnableParallelBackendRequests)
+			}
 			if ctx.Bool("single-active-backend") {
 				opts = append(opts, options.EnableSingleBackend)
 			}