Merge branch 'master' into default_miro

2025-06-27 21:24:59 +00:00 · 2024-09-17 12:24:39 +02:00 · 2024-09-17 12:24:39 +02:00 · 3a1727a4fe
commit 3a1727a4fe
parent 3eb1c1c689 eee1fb2c75
141 changed files with 2956 additions and 1498 deletions
--- a/core/backend/backend_suite_test.go
+++ b/core/backend/backend_suite_test.go
@ -0,0 +1,13 @@
+package backend_test
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestBackend(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "Backend test suite")
+}
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@ -9,6 +9,8 @@ import (
 	"sync"
 	"unicode/utf8"

+	"github.com/rs/zerolog/log"
+
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"

@ -87,7 +89,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 			case string:
 				protoMessages[i].Content = ct
 			default:
-				return nil, fmt.Errorf("Unsupported type for schema.Message.Content for inference: %T", ct)
+				return nil, fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct)
 			}
 		}
 	}
@ -181,13 +183,37 @@ func Finetune(config config.BackendConfig, input, prediction string) string {
 		mu.Lock()
 		reg, ok := cutstrings[c]
 		if !ok {
-			cutstrings[c] = regexp.MustCompile(c)
+			r, err := regexp.Compile(c)
+			if err != nil {
+				log.Fatal().Err(err).Msg("failed to compile regex")
+			}
+			cutstrings[c] = r
 			reg = cutstrings[c]
 		}
 		mu.Unlock()
 		prediction = reg.ReplaceAllString(prediction, "")
 	}

+	// extract results from the response which can be for instance inside XML tags
+	var predResult string
+	for _, r := range config.ExtractRegex {
+		mu.Lock()
+		reg, ok := cutstrings[r]
+		if !ok {
+			regex, err := regexp.Compile(r)
+			if err != nil {
+				log.Fatal().Err(err).Msg("failed to compile regex")
+			}
+			cutstrings[r] = regex
+			reg = regex
+		}
+		mu.Unlock()
+		predResult += reg.FindString(prediction)
+	}
+	if predResult != "" {
+		prediction = predResult
+	}
+
 	for _, c := range config.TrimSpace {
 		prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
 	}
--- a/core/backend/llm_test.go
+++ b/core/backend/llm_test.go
@ -0,0 +1,109 @@
+package backend_test
+
+import (
+	. "github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("LLM tests", func() {
+	Context("Finetune LLM output", func() {
+		var (
+			testConfig config.BackendConfig
+			input      string
+			prediction string
+			result     string
+		)
+
+		BeforeEach(func() {
+			testConfig = config.BackendConfig{
+				PredictionOptions: schema.PredictionOptions{
+					Echo: false,
+				},
+				LLMConfig: config.LLMConfig{
+					Cutstrings:   []string{`<.*?>`},                  // Example regex for removing XML tags
+					ExtractRegex: []string{`<result>(.*?)</result>`}, // Example regex to extract from tags
+					TrimSpace:    []string{" ", "\n"},
+					TrimSuffix:   []string{".", "!"},
+				},
+			}
+		})
+
+		Context("when echo is enabled", func() {
+			BeforeEach(func() {
+				testConfig.Echo = true
+				input = "Hello"
+				prediction = "World"
+			})
+
+			It("should prepend input to prediction", func() {
+				result = Finetune(testConfig, input, prediction)
+				Expect(result).To(Equal("HelloWorld"))
+			})
+		})
+
+		Context("when echo is disabled", func() {
+			BeforeEach(func() {
+				testConfig.Echo = false
+				input = "Hello"
+				prediction = "World"
+			})
+
+			It("should not modify the prediction with input", func() {
+				result = Finetune(testConfig, input, prediction)
+				Expect(result).To(Equal("World"))
+			})
+		})
+
+		Context("when cutstrings regex is applied", func() {
+			BeforeEach(func() {
+				input = ""
+				prediction = "<div>Hello</div> World"
+			})
+
+			It("should remove substrings matching cutstrings regex", func() {
+				result = Finetune(testConfig, input, prediction)
+				Expect(result).To(Equal("Hello World"))
+			})
+		})
+
+		Context("when extract regex is applied", func() {
+			BeforeEach(func() {
+				input = ""
+				prediction = "<response><result>42</result></response>"
+			})
+
+			It("should extract substrings matching the extract regex", func() {
+				result = Finetune(testConfig, input, prediction)
+				Expect(result).To(Equal("42"))
+			})
+		})
+
+		Context("when trimming spaces", func() {
+			BeforeEach(func() {
+				input = ""
+				prediction = "   Hello World   "
+			})
+
+			It("should trim spaces from the prediction", func() {
+				result = Finetune(testConfig, input, prediction)
+				Expect(result).To(Equal("Hello World"))
+			})
+		})
+
+		Context("when trimming suffixes", func() {
+			BeforeEach(func() {
+				input = ""
+				prediction = "Hello World."
+			})
+
+			It("should trim suffixes from the prediction", func() {
+				result = Finetune(testConfig, input, prediction)
+				Expect(result).To(Equal("Hello World"))
+			})
+		})
+	})
+})
--- a/core/backend/soundgeneration.go
+++ b/core/backend/soundgeneration.go
@ -0,0 +1,74 @@
+package backend
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/utils"
+)
+
+func SoundGeneration(
+	backend string,
+	modelFile string,
+	text string,
+	duration *float32,
+	temperature *float32,
+	doSample *bool,
+	sourceFile *string,
+	sourceDivisor *int32,
+	loader *model.ModelLoader,
+	appConfig *config.ApplicationConfig,
+	backendConfig config.BackendConfig,
+) (string, *proto.Result, error) {
+	if backend == "" {
+		return "", nil, fmt.Errorf("backend is a required parameter")
+	}
+
+	grpcOpts := gRPCModelOpts(backendConfig)
+	opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
+		model.WithBackendString(backend),
+		model.WithModel(modelFile),
+		model.WithContext(appConfig.Context),
+		model.WithAssetDir(appConfig.AssetsDestination),
+		model.WithLoadGRPCLoadModelOpts(grpcOpts),
+	})
+
+	soundGenModel, err := loader.BackendLoader(opts...)
+	if err != nil {
+		return "", nil, err
+	}
+
+	if soundGenModel == nil {
+		return "", nil, fmt.Errorf("could not load sound generation model")
+	}
+
+	if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
+		return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
+	}
+
+	fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "sound_generation", ".wav")
+	filePath := filepath.Join(appConfig.AudioDir, fileName)
+
+	res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
+		Text:        text,
+		Model:       modelFile,
+		Dst:         filePath,
+		Sample:      doSample,
+		Duration:    duration,
+		Temperature: temperature,
+		Src:         sourceFile,
+		SrcDivisor:  sourceDivisor,
+	})
+
+	// return RPC error if any
+	if !res.Success {
+		return "", nil, fmt.Errorf(res.Message)
+	}
+
+	return filePath, res, err
+}
--- a/core/backend/transcript.go
+++ b/core/backend/transcript.go
@ -3,12 +3,13 @@ package backend
 import (
 	"context"
 	"fmt"
+	"time"

 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"

 	"github.com/mudler/LocalAI/pkg/grpc/proto"
-	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/model"
 )

 func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
@ -21,19 +22,40 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
 		model.WithAssetDir(appConfig.AssetsDestination),
 	})

-	whisperModel, err := ml.BackendLoader(opts...)
+	transcriptionModel, err := ml.BackendLoader(opts...)
 	if err != nil {
 		return nil, err
 	}

-	if whisperModel == nil {
-		return nil, fmt.Errorf("could not load whisper model")
+	if transcriptionModel == nil {
+		return nil, fmt.Errorf("could not load transcription model")
 	}

-	return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
+	r, err := transcriptionModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
 		Dst:       audio,
 		Language:  language,
 		Translate: translate,
 		Threads:   uint32(*backendConfig.Threads),
 	})
+	if err != nil {
+		return nil, err
+	}
+	tr := &schema.TranscriptionResult{
+		Text: r.Text,
+	}
+	for _, s := range r.Segments {
+		var tks []int
+		for _, t := range s.Tokens {
+			tks = append(tks, int(t))
+		}
+		tr.Segments = append(tr.Segments,
+			schema.Segment{
+				Text:   s.Text,
+				Id:     int(s.Id),
+				Start:  time.Duration(s.Start),
+				End:    time.Duration(s.End),
+				Tokens: tks,
+			})
+	}
+	return tr, err
 }
--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@ -9,31 +9,15 @@ import (
 	"github.com/mudler/LocalAI/core/config"

 	"github.com/mudler/LocalAI/pkg/grpc/proto"
-	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/utils"
 )

-func generateUniqueFileName(dir, baseName, ext string) string {
-	counter := 1
-	fileName := baseName + ext
-
-	for {
-		filePath := filepath.Join(dir, fileName)
-		_, err := os.Stat(filePath)
-		if os.IsNotExist(err) {
-			return fileName
-		}
-
-		counter++
-		fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
-	}
-}
-
 func ModelTTS(
 	backend,
 	text,
 	modelFile,
-	voice ,
+	voice,
 	language string,
 	loader *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
@ -66,7 +50,7 @@ func ModelTTS(
 		return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
 	}

-	fileName := generateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
+	fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
 	filePath := filepath.Join(appConfig.AudioDir, fileName)

 	// If the model file is not empty, we pass it joined with the model path
@ -88,12 +72,15 @@ func ModelTTS(
 	}

 	res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
-		Text:  text,
-		Model: modelPath,
-		Voice: voice,
-		Dst:   filePath,
+		Text:     text,
+		Model:    modelPath,
+		Voice:    voice,
+		Dst:      filePath,
 		Language: &language,
 	})
+	if err != nil {
+		return "", nil, err
+	}

 	// return RPC error if any
 	if !res.Success {
--- a/core/cli/api/p2p.go
+++ b/core/cli/api/p2p.go
@ -0,0 +1,80 @@
+package cli_api
+
+import (
+	"context"
+	"fmt"
+	"net"
+	"os"
+	"strings"
+
+	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/mudler/edgevpn/pkg/node"
+
+	"github.com/rs/zerolog/log"
+)
+
+func StartP2PStack(ctx context.Context, address, token, networkID string, federated bool) error {
+	var n *node.Node
+	// Here we are avoiding creating multiple nodes:
+	// - if the federated mode is enabled, we create a federated node and expose a service
+	// - exposing a service creates a node with specific options, and we don't want to create another node
+
+	// If the federated mode is enabled, we expose a service to the local instance running
+	// at r.Address
+	if federated {
+		_, port, err := net.SplitHostPort(address)
+		if err != nil {
+			return err
+		}
+
+		// Here a new node is created and started
+		// and a service is exposed by the node
+		node, err := p2p.ExposeService(ctx, "localhost", port, token, p2p.NetworkID(networkID, p2p.FederatedID))
+		if err != nil {
+			return err
+		}
+
+		if err := p2p.ServiceDiscoverer(ctx, node, token, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil {
+			return err
+		}
+
+		n = node
+	}
+
+	// If the p2p mode is enabled, we start the service discovery
+	if token != "" {
+		// If a node wasn't created previously, create it
+		if n == nil {
+			node, err := p2p.NewNode(token)
+			if err != nil {
+				return err
+			}
+			err = node.Start(ctx)
+			if err != nil {
+				return fmt.Errorf("starting new node: %w", err)
+			}
+			n = node
+		}
+
+		// Attach a ServiceDiscoverer to the p2p node
+		log.Info().Msg("Starting P2P server discovery...")
+		if err := p2p.ServiceDiscoverer(ctx, n, token, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
+			var tunnelAddresses []string
+			for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) {
+				if v.IsOnline() {
+					tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
+				} else {
+					log.Info().Msgf("Node %s is offline", v.ID)
+				}
+			}
+			tunnelEnvVar := strings.Join(tunnelAddresses, ",")
+
+			os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
+			log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
+		}, true); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
--- a/core/cli/cli.go
+++ b/core/cli/cli.go
@ -8,12 +8,13 @@ import (
 var CLI struct {
 	cliContext.Context `embed:""`

-	Run        RunCMD        `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
-	Federated  FederatedCLI  `cmd:"" help:"Run LocalAI in federated mode"`
-	Models     ModelsCMD     `cmd:"" help:"Manage LocalAI models and definitions"`
-	TTS        TTSCMD        `cmd:"" help:"Convert text to speech"`
-	Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
-	Worker     worker.Worker `cmd:"" help:"Run workers to distribute workload (llama.cpp-only)"`
-	Util       UtilCMD       `cmd:"" help:"Utility commands"`
-	Explorer   ExplorerCMD   `cmd:"" help:"Run p2p explorer"`
+	Run             RunCMD             `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
+	Federated       FederatedCLI       `cmd:"" help:"Run LocalAI in federated mode"`
+	Models          ModelsCMD          `cmd:"" help:"Manage LocalAI models and definitions"`
+	TTS             TTSCMD             `cmd:"" help:"Convert text to speech"`
+	SoundGeneration SoundGenerationCMD `cmd:"" help:"Generates audio files from text or audio"`
+	Transcript      TranscriptCMD      `cmd:"" help:"Convert audio to text"`
+	Worker          worker.Worker      `cmd:"" help:"Run workers to distribute workload (llama.cpp-only)"`
+	Util            UtilCMD            `cmd:"" help:"Utility commands"`
+	Explorer        ExplorerCMD        `cmd:"" help:"Run p2p explorer"`
 }
--- a/core/cli/run.go
+++ b/core/cli/run.go
@ -3,11 +3,10 @@ package cli
 import (
 	"context"
 	"fmt"
-	"net"
-	"os"
 	"strings"
 	"time"

+	cli_api "github.com/mudler/LocalAI/core/cli/api"
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http"
@ -42,29 +41,34 @@ type RunCMD struct {
 	Threads     int  `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
 	ContextSize int  `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"`

-	Address                string   `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
-	CORS                   bool     `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
-	CORSAllowOrigins       string   `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
-	LibraryPath            string   `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
-	CSRF                   bool     `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
-	UploadLimit            int      `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
-	APIKeys                []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
-	DisableWebUI           bool     `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
-	DisablePredownloadScan bool     `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
-	OpaqueErrors           bool     `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
-	Peer2Peer              bool     `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
-	Peer2PeerToken         string   `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
-	Peer2PeerNetworkID     string   `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
-	ParallelRequests       bool     `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
-	SingleActiveBackend    bool     `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
-	PreloadBackendOnly     bool     `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
-	ExternalGRPCBackends   []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
-	EnableWatchdogIdle     bool     `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"`
-	WatchdogIdleTimeout    string   `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
-	EnableWatchdogBusy     bool     `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
-	WatchdogBusyTimeout    string   `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
-	Federated              bool     `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
-	DisableGalleryEndpoint bool     `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
+	Address                            string   `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
+	CORS                               bool     `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
+	CORSAllowOrigins                   string   `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
+	LibraryPath                        string   `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
+	CSRF                               bool     `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
+	UploadLimit                        int      `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
+	APIKeys                            []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
+	DisableWebUI                       bool     `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
+	DisablePredownloadScan             bool     `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
+	OpaqueErrors                       bool     `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
+	UseSubtleKeyComparison             bool     `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
+	DisableApiKeyRequirementForHttpGet bool     `env:"LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET" default:"false" help:"If true, a valid API key is not required to issue GET requests to portions of the web ui. This should only be enabled in secure testing environments" group:"hardening"`
+	HttpGetExemptedEndpoints           []string `env:"LOCALAI_HTTP_GET_EXEMPTED_ENDPOINTS" default:"^/$,^/browse/?$,^/talk/?$,^/p2p/?$,^/chat/?$,^/text2image/?$,^/tts/?$,^/static/.*$,^/swagger.*$" help:"If LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET is overriden to true, this is the list of endpoints to exempt. Only adjust this in case of a security incident or as a result of a personal security posture review" group:"hardening"`
+	Peer2Peer                          bool     `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
+	Peer2PeerDHTInterval               int      `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
+	Peer2PeerOTPInterval               int      `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"`
+	Peer2PeerToken                     string   `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
+	Peer2PeerNetworkID                 string   `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
+	ParallelRequests                   bool     `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
+	SingleActiveBackend                bool     `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
+	PreloadBackendOnly                 bool     `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
+	ExternalGRPCBackends               []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
+	EnableWatchdogIdle                 bool     `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"`
+	WatchdogIdleTimeout                string   `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
+	EnableWatchdogBusy                 bool     `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
+	WatchdogBusyTimeout                string   `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
+	Federated                          bool     `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
+	DisableGalleryEndpoint             bool     `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
 }

 func (r *RunCMD) Run(ctx *cliContext.Context) error {
@ -96,6 +100,9 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
 		config.WithOpaqueErrors(r.OpaqueErrors),
 		config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan),
+		config.WithSubtleKeyComparison(r.UseSubtleKeyComparison),
+		config.WithDisableApiKeyRequirementForHttpGet(r.DisableApiKeyRequirementForHttpGet),
+		config.WithHttpGetExemptedEndpoints(r.HttpGetExemptedEndpoints),
 		config.WithP2PNetworkID(r.Peer2PeerNetworkID),
 	}

@ -107,7 +114,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 			// IF no token is provided, and p2p is enabled,
 			// we generate one and wait for the user to pick up the token (this is for interactive)
 			log.Info().Msg("No token provided, generating one")
-			token = p2p.GenerateToken()
+			token = p2p.GenerateToken(r.Peer2PeerDHTInterval, r.Peer2PeerOTPInterval)
 			log.Info().Msg("Generated Token:")
 			fmt.Println(token)

@ -115,45 +122,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 			fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
 		}
 		opts = append(opts, config.WithP2PToken(token))
-
-		node, err := p2p.NewNode(token)
-		if err != nil {
-			return err
-		}
-
-		log.Info().Msg("Starting P2P server discovery...")
-		if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
-			var tunnelAddresses []string
-			for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID)) {
-				if v.IsOnline() {
-					tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
-				} else {
-					log.Info().Msgf("Node %s is offline", v.ID)
-				}
-			}
-			tunnelEnvVar := strings.Join(tunnelAddresses, ",")
-
-			os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
-			log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
-		}, true); err != nil {
-			return err
-		}
 	}

-	if r.Federated {
-		_, port, err := net.SplitHostPort(r.Address)
-		if err != nil {
-			return err
-		}
-		fedCtx := context.Background()
-		node, err := p2p.ExposeService(fedCtx, "localhost", port, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID))
-		if err != nil {
-			return err
-		}
+	backgroundCtx := context.Background()

-		if err := p2p.ServiceDiscoverer(fedCtx, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID), nil, false); err != nil {
-			return err
-		}
+	if err := cli_api.StartP2PStack(backgroundCtx, r.Address, token, r.Peer2PeerNetworkID, r.Federated); err != nil {
+		return err
 	}

 	idleWatchDog := r.EnableWatchdogIdle
--- a/core/cli/soundgeneration.go
+++ b/core/cli/soundgeneration.go
@ -0,0 +1,110 @@
+package cli
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+
+	"github.com/mudler/LocalAI/core/backend"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/rs/zerolog/log"
+)
+
+type SoundGenerationCMD struct {
+	Text []string `arg:""`
+
+	Backend                string   `short:"b" required:"" help:"Backend to run the SoundGeneration model"`
+	Model                  string   `short:"m" required:"" help:"Model name to run the SoundGeneration"`
+	Duration               string   `short:"d" help:"If specified, the length of audio to generate in seconds"`
+	Temperature            string   `short:"t" help:"If specified, the temperature of the generation"`
+	InputFile              string   `short:"i" help:"If specified, the input file to condition generation upon"`
+	InputFileSampleDivisor string   `short:"f" help:"If InputFile and this divisor is specified, the first portion of the sample file will be used"`
+	DoSample               bool     `short:"s" default:"true" help:"Enables sampling from the model. Better quality at the cost of speed. Defaults to enabled."`
+	OutputFile             string   `short:"o" type:"path" help:"The path to write the output wav file"`
+	ModelsPath             string   `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
+	BackendAssetsPath      string   `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+	ExternalGRPCBackends   []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
+}
+
+func parseToFloat32Ptr(input string) *float32 {
+	f, err := strconv.ParseFloat(input, 32)
+	if err != nil {
+		return nil
+	}
+	f2 := float32(f)
+	return &f2
+}
+
+func parseToInt32Ptr(input string) *int32 {
+	i, err := strconv.ParseInt(input, 10, 32)
+	if err != nil {
+		return nil
+	}
+	i2 := int32(i)
+	return &i2
+}
+
+func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
+	outputFile := t.OutputFile
+	outputDir := t.BackendAssetsPath
+	if outputFile != "" {
+		outputDir = filepath.Dir(outputFile)
+	}
+
+	text := strings.Join(t.Text, " ")
+
+	externalBackends := make(map[string]string)
+	// split ":" to get backend name and the uri
+	for _, v := range t.ExternalGRPCBackends {
+		backend := v[:strings.IndexByte(v, ':')]
+		uri := v[strings.IndexByte(v, ':')+1:]
+		externalBackends[backend] = uri
+		fmt.Printf("TMP externalBackends[%q]=%q\n\n", backend, uri)
+	}
+
+	opts := &config.ApplicationConfig{
+		ModelPath:            t.ModelsPath,
+		Context:              context.Background(),
+		AudioDir:             outputDir,
+		AssetsDestination:    t.BackendAssetsPath,
+		ExternalGRPCBackends: externalBackends,
+	}
+	ml := model.NewModelLoader(opts.ModelPath)
+
+	defer func() {
+		err := ml.StopAllGRPC()
+		if err != nil {
+			log.Error().Err(err).Msg("unable to stop all grpc processes")
+		}
+	}()
+
+	options := config.BackendConfig{}
+	options.SetDefaults()
+
+	var inputFile *string
+	if t.InputFile != "" {
+		inputFile = &t.InputFile
+	}
+
+	filePath, _, err := backend.SoundGeneration(t.Backend, t.Model, text,
+		parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
+		inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)
+
+	if err != nil {
+		return err
+	}
+	if outputFile != "" {
+		if err := os.Rename(filePath, outputFile); err != nil {
+			return err
+		}
+		fmt.Printf("Generate file %s\n", outputFile)
+	} else {
+		fmt.Printf("Generate file %s\n", filePath)
+	}
+	return nil
+}
--- a/core/cli/worker/worker.go
+++ b/core/cli/worker/worker.go
@ -2,6 +2,7 @@ package worker

 type WorkerFlags struct {
 	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+	ExtraLLamaCPPArgs string `name:"llama-cpp-args" env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
 }

 type Worker struct {
--- a/core/cli/worker/worker_llamacpp.go
+++ b/core/cli/worker/worker_llamacpp.go
@ -3,6 +3,7 @@ package worker
 import (
 	"fmt"
 	"os"
+	"strings"
 	"syscall"

 	cliContext "github.com/mudler/LocalAI/core/cli/context"
@ -12,7 +13,6 @@ import (
 )

 type LLamaCPP struct {
-	Args        []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
 	WorkerFlags `embed:""`
 }

@ -34,9 +34,8 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
 		"llama-cpp-rpc-server",
 	)

-	args := os.Args[4:]
+	args := strings.Split(r.ExtraLLamaCPPArgs, " ")
 	args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
-
 	args = append([]string{grpcProcess}, args...)
 	return syscall.Exec(
 		grpcProcess,
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@ -8,6 +8,7 @@ import (
 	"fmt"
 	"os"
 	"os/exec"
+	"strings"
 	"time"

 	cliContext "github.com/mudler/LocalAI/core/cli/context"
@ -20,12 +21,11 @@ import (

 type P2P struct {
 	WorkerFlags        `embed:""`
-	Token              string   `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
-	NoRunner           bool     `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
-	RunnerAddress      string   `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
-	RunnerPort         string   `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
-	ExtraLLamaCPPArgs  []string `env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
-	Peer2PeerNetworkID string   `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
+	Token              string `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
+	NoRunner           bool   `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
+	RunnerAddress      string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
+	RunnerPort         string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
+	Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
 }

 func (r *P2P) Run(ctx *cliContext.Context) error {
@ -65,44 +65,42 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 			return err
 		}
 		log.Info().Msgf("You need to start llama-cpp-rpc-server on '%s:%s'", address, p)
+	} else {
+		// Start llama.cpp directly from the version we have pre-packaged
+		go func() {
+			for {
+				log.Info().Msgf("Starting llama-cpp-rpc-server on '%s:%d'", address, port)

-		return nil
-	}
+				grpcProcess := assets.ResolvePath(
+					r.BackendAssetsPath,
+					"util",
+					"llama-cpp-rpc-server",
+				)
+				extraArgs := strings.Split(r.ExtraLLamaCPPArgs, " ")
+				args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...)
+				args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)

-	// Start llama.cpp directly from the version we have pre-packaged
-	go func() {
-		for {
-			log.Info().Msgf("Starting llama-cpp-rpc-server on '%s:%d'", address, port)
+				cmd := exec.Command(
+					grpcProcess, args...,
+				)

-			grpcProcess := assets.ResolvePath(
-				r.BackendAssetsPath,
-				"util",
-				"llama-cpp-rpc-server",
-			)
+				cmd.Env = os.Environ()

-			args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, r.ExtraLLamaCPPArgs...)
-			args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
+				cmd.Stderr = os.Stdout
+				cmd.Stdout = os.Stdout

-			cmd := exec.Command(
-				grpcProcess, args...,
-			)
+				if err := cmd.Start(); err != nil {
+					log.Error().Any("grpcProcess", grpcProcess).Any("args", args).Err(err).Msg("Failed to start llama-cpp-rpc-server")
+				}

-			cmd.Env = os.Environ()
-
-			cmd.Stderr = os.Stdout
-			cmd.Stdout = os.Stdout
-
-			if err := cmd.Start(); err != nil {
-				log.Error().Any("grpcProcess", grpcProcess).Any("args", args).Err(err).Msg("Failed to start llama-cpp-rpc-server")
+				cmd.Wait()
 			}
+		}()

-			cmd.Wait()
+		_, err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID))
+		if err != nil {
+			return err
 		}
-	}()
-
-	_, err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID))
-	if err != nil {
-		return err
 	}

 	for {
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@ -4,6 +4,7 @@ import (
 	"context"
 	"embed"
 	"encoding/json"
+	"regexp"
 	"time"

 	"github.com/mudler/LocalAI/pkg/xsysinfo"
@ -16,7 +17,6 @@ type ApplicationConfig struct {
 	ModelPath                           string
 	LibPath                             string
 	UploadLimitMB, Threads, ContextSize int
-	DisableWebUI                        bool
 	F16                                 bool
 	Debug                               bool
 	ImageDir                            string
@ -31,11 +31,17 @@ type ApplicationConfig struct {
 	PreloadModelsFromPath               string
 	CORSAllowOrigins                    string
 	ApiKeys                             []string
-	EnforcePredownloadScans             bool
-	OpaqueErrors                        bool
 	P2PToken                            string
 	P2PNetworkID                        string

+	DisableWebUI                       bool
+	EnforcePredownloadScans            bool
+	OpaqueErrors                       bool
+	UseSubtleKeyComparison             bool
+	DisableApiKeyRequirementForHttpGet bool
+	HttpGetExemptedEndpoints           []*regexp.Regexp
+	DisableGalleryEndpoint             bool
+
 	ModelLibraryURL string

 	Galleries []Gallery
@ -57,8 +63,6 @@ type ApplicationConfig struct {
 	ModelsURL []string

 	WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
-
-	DisableGalleryEndpoint bool
 }

 type AppOption func(*ApplicationConfig)
@ -327,6 +331,32 @@ func WithOpaqueErrors(opaque bool) AppOption {
 	}
 }

+func WithSubtleKeyComparison(subtle bool) AppOption {
+	return func(o *ApplicationConfig) {
+		o.UseSubtleKeyComparison = subtle
+	}
+}
+
+func WithDisableApiKeyRequirementForHttpGet(required bool) AppOption {
+	return func(o *ApplicationConfig) {
+		o.DisableApiKeyRequirementForHttpGet = required
+	}
+}
+
+func WithHttpGetExemptedEndpoints(endpoints []string) AppOption {
+	return func(o *ApplicationConfig) {
+		o.HttpGetExemptedEndpoints = []*regexp.Regexp{}
+		for _, epr := range endpoints {
+			r, err := regexp.Compile(epr)
+			if err == nil && r != nil {
+				o.HttpGetExemptedEndpoints = append(o.HttpGetExemptedEndpoints, r)
+			} else {
+				log.Warn().Err(err).Str("regex", epr).Msg("Error while compiling HTTP Get Exemption regex, skipping this entry.")
+			}
+		}
+	}
+}
+
 // ToConfigLoaderOptions returns a slice of ConfigLoader Option.
 // Some options defined at the application level are going to be passed as defaults for
 // all the configuration for the models.
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@ -126,6 +126,7 @@ type LLMConfig struct {
 	Grammar         string   `yaml:"grammar"`
 	StopWords       []string `yaml:"stopwords"`
 	Cutstrings      []string `yaml:"cutstrings"`
+	ExtractRegex    []string `yaml:"extract_regex"`
 	TrimSpace       []string `yaml:"trimspace"`
 	TrimSuffix      []string `yaml:"trimsuffix"`

--- a/core/http/app.go
+++ b/core/http/app.go
@ -3,13 +3,15 @@ package http
 import (
 	"embed"
 	"errors"
+	"fmt"
 	"net/http"
-	"strings"

+	"github.com/dave-gray101/v2keyauth"
 	"github.com/mudler/LocalAI/pkg/utils"

 	"github.com/mudler/LocalAI/core/http/endpoints/localai"
 	"github.com/mudler/LocalAI/core/http/endpoints/openai"
+	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/http/routes"

 	"github.com/mudler/LocalAI/core/config"
@ -137,37 +139,14 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
 		})
 	}

-	// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
-	auth := func(c *fiber.Ctx) error {
-		if len(appConfig.ApiKeys) == 0 {
-			return c.Next()
-		}
-
-		if len(appConfig.ApiKeys) == 0 {
-			return c.Next()
-		}
-
-		authHeader := readAuthHeader(c)
-		if authHeader == "" {
-			return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
-		}
-
-		// If it's a bearer token
-		authHeaderParts := strings.Split(authHeader, " ")
-		if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
-			return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
-		}
-
-		apiKey := authHeaderParts[1]
-		for _, key := range appConfig.ApiKeys {
-			if apiKey == key {
-				return c.Next()
-			}
-		}
-
-		return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
+	kaConfig, err := middleware.GetKeyAuthConfig(appConfig)
+	if err != nil || kaConfig == nil {
+		return nil, fmt.Errorf("failed to create key auth config: %w", err)
 	}

+	// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
+	app.Use(v2keyauth.New(*kaConfig))
+
 	if appConfig.CORS {
 		var c func(ctx *fiber.Ctx) error
 		if appConfig.CORSAllowOrigins == "" {
@ -192,13 +171,13 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
 	galleryService := services.NewGalleryService(appConfig)
 	galleryService.Start(appConfig.Context, cl)

-	routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth)
-	routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService, auth)
-	routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth)
+	routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig)
+	routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService)
+	routes.RegisterOpenAIRoutes(app, cl, ml, appConfig)
 	if !appConfig.DisableWebUI {
-		routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth)
+		routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService)
 	}
-	routes.RegisterJINARoutes(app, cl, ml, appConfig, auth)
+	routes.RegisterJINARoutes(app, cl, ml, appConfig)

 	httpFS := http.FS(embedDirStatic)

--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@ -772,6 +772,17 @@ var _ = Describe("API test", func() {
 			Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error:"))
 		})

+		It("shows the external backend", func() {
+			// do an http request to the /system endpoint
+			resp, err := http.Get("http://127.0.0.1:9090/system")
+			Expect(err).ToNot(HaveOccurred())
+			Expect(resp.StatusCode).To(Equal(200))
+			dat, err := io.ReadAll(resp.Body)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(string(dat)).To(ContainSubstring("huggingface"))
+			Expect(string(dat)).To(ContainSubstring("llama-cpp"))
+		})
+
 		It("transcribes audio", func() {
 			if runtime.GOOS != "linux" {
 				Skip("test supported only on linux")
--- a/core/http/endpoints/elevenlabs/soundgeneration.go
+++ b/core/http/endpoints/elevenlabs/soundgeneration.go
@ -0,0 +1,65 @@
+package elevenlabs
+
+import (
+	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/rs/zerolog/log"
+)
+
+// SoundGenerationEndpoint is the ElevenLabs SoundGeneration endpoint https://elevenlabs.io/docs/api-reference/sound-generation
+// @Summary Generates audio from the input text.
+// @Param request body schema.ElevenLabsSoundGenerationRequest true "query params"
+// @Success 200 {string} binary	 "Response"
+// @Router /v1/sound-generation [post]
+func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		input := new(schema.ElevenLabsSoundGenerationRequest)
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
+		if err != nil {
+			modelFile = input.ModelID
+			log.Warn().Str("ModelID", input.ModelID).Msg("Model not found in context")
+		}
+
+		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
+			config.LoadOptionDebug(appConfig.Debug),
+			config.LoadOptionThreads(appConfig.Threads),
+			config.LoadOptionContextSize(appConfig.ContextSize),
+			config.LoadOptionF16(appConfig.F16),
+		)
+		if err != nil {
+			modelFile = input.ModelID
+			log.Warn().Str("Request ModelID", input.ModelID).Err(err).Msg("error during LoadBackendConfigFileByName, using request ModelID")
+		} else {
+			if input.ModelID != "" {
+				modelFile = input.ModelID
+			} else {
+				modelFile = cfg.Model
+			}
+		}
+		log.Debug().Str("modelFile", "modelFile").Str("backend", cfg.Backend).Msg("Sound Generation Request about to be sent to backend")
+
+		if input.Duration != nil {
+			log.Debug().Float32("duration", *input.Duration).Msg("duration set")
+		}
+		if input.Temperature != nil {
+			log.Debug().Float32("temperature", *input.Temperature).Msg("temperature set")
+		}
+
+		// TODO: Support uploading files?
+		filePath, _, err := backend.SoundGeneration(cfg.Backend, modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
+		if err != nil {
+			return err
+		}
+		return c.Download(filePath)
+
+	}
+}
--- a/core/http/endpoints/localai/system.go
+++ b/core/http/endpoints/localai/system.go
@ -0,0 +1,29 @@
+package localai
+
+import (
+	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/model"
+)
+
+// SystemInformations returns the system informations
+// @Summary Show the LocalAI instance information
+// @Success 200 {object} schema.SystemInformationResponse "Response"
+// @Router /system [get]
+func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		availableBackends, err := ml.ListAvailableBackends(appConfig.AssetsDestination)
+		if err != nil {
+			return err
+		}
+		for b := range appConfig.ExternalGRPCBackends {
+			availableBackends = append(availableBackends, b)
+		}
+		return c.JSON(
+			schema.SystemInformationResponse{
+				Backends: availableBackends,
+			},
+		)
+	}
+}
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@ -25,9 +25,8 @@ import (
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/chat/completions [post]
 func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
-	textContentToReturn := ""
-	id := uuid.New().String()
-	created := int(time.Now().Unix())
+	var id, textContentToReturn string
+	var created int

 	process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
 		initialMessage := schema.OpenAIResponse{
@ -69,9 +68,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup

 		textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
 		result = functions.CleanupLLMResult(result, config.FunctionsConfig)
-		results := functions.ParseFunctionCall(result, config.FunctionsConfig)
+		functionResults := functions.ParseFunctionCall(result, config.FunctionsConfig)
 		log.Debug().Msgf("Text content to return: %s", textContentToReturn)
-		noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0
+		noActionToRun := len(functionResults) > 0 && functionResults[0].Name == noAction || len(functionResults) == 0

 		switch {
 		case noActionToRun:
@ -84,7 +83,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			}
 			responses <- initialMessage

-			result, err := handleQuestion(config, req, ml, startupOptions, results, result, prompt)
+			result, err := handleQuestion(config, req, ml, startupOptions, functionResults, result, prompt)
 			if err != nil {
 				log.Error().Err(err).Msg("error handling question")
 				return
@ -106,7 +105,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			responses <- resp

 		default:
-			for i, ss := range results {
+			for i, ss := range functionResults {
 				name, args := ss.Name, ss.Arguments

 				initialMessage := schema.OpenAIResponse{
@ -159,6 +158,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 	}

 	return func(c *fiber.Ctx) error {
+		textContentToReturn = ""
+		id = uuid.New().String()
+		created = int(time.Now().Unix())
+
 		modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
--- a/core/http/middleware/auth.go
+++ b/core/http/middleware/auth.go
@ -0,0 +1,93 @@
+package middleware
+
+import (
+	"crypto/subtle"
+	"errors"
+
+	"github.com/dave-gray101/v2keyauth"
+	"github.com/gofiber/fiber/v2"
+	"github.com/gofiber/fiber/v2/middleware/keyauth"
+	"github.com/mudler/LocalAI/core/config"
+)
+
+// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
+// Currently this requires an upstream patch - and feature patches are no longer accepted to v2
+// Therefore `dave-gray101/v2keyauth` contains the v2 backport of the middleware until v3 stabilizes and we migrate.
+
+func GetKeyAuthConfig(applicationConfig *config.ApplicationConfig) (*v2keyauth.Config, error) {
+	customLookup, err := v2keyauth.MultipleKeySourceLookup([]string{"header:Authorization", "header:x-api-key", "header:xi-api-key"}, keyauth.ConfigDefault.AuthScheme)
+	if err != nil {
+		return nil, err
+	}
+
+	return &v2keyauth.Config{
+		CustomKeyLookup: customLookup,
+		Next:            getApiKeyRequiredFilterFunction(applicationConfig),
+		Validator:       getApiKeyValidationFunction(applicationConfig),
+		ErrorHandler:    getApiKeyErrorHandler(applicationConfig),
+		AuthScheme:      "Bearer",
+	}, nil
+}
+
+func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.ErrorHandler {
+	return func(ctx *fiber.Ctx, err error) error {
+		if errors.Is(err, v2keyauth.ErrMissingOrMalformedAPIKey) {
+			if len(applicationConfig.ApiKeys) == 0 {
+				return ctx.Next() // if no keys are set up, any error we get here is not an error.
+			}
+			if applicationConfig.OpaqueErrors {
+				return ctx.SendStatus(403)
+			}
+		}
+		if applicationConfig.OpaqueErrors {
+			return ctx.SendStatus(500)
+		}
+		return err
+	}
+}
+
+func getApiKeyValidationFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx, string) (bool, error) {
+
+	if applicationConfig.UseSubtleKeyComparison {
+		return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
+			if len(applicationConfig.ApiKeys) == 0 {
+				return true, nil // If no keys are setup, accept everything
+			}
+			for _, validKey := range applicationConfig.ApiKeys {
+				if subtle.ConstantTimeCompare([]byte(apiKey), []byte(validKey)) == 1 {
+					return true, nil
+				}
+			}
+			return false, v2keyauth.ErrMissingOrMalformedAPIKey
+		}
+	}
+
+	return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
+		if len(applicationConfig.ApiKeys) == 0 {
+			return true, nil // If no keys are setup, accept everything
+		}
+		for _, validKey := range applicationConfig.ApiKeys {
+			if apiKey == validKey {
+				return true, nil
+			}
+		}
+		return false, v2keyauth.ErrMissingOrMalformedAPIKey
+	}
+}
+
+func getApiKeyRequiredFilterFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx) bool {
+	if applicationConfig.DisableApiKeyRequirementForHttpGet {
+		return func(c *fiber.Ctx) bool {
+			if c.Method() != "GET" {
+				return false
+			}
+			for _, rx := range applicationConfig.HttpGetExemptedEndpoints {
+				if rx.MatchString(c.Path()) {
+					return true
+				}
+			}
+			return false
+		}
+	}
+	return func(c *fiber.Ctx) bool { return false }
+}
--- a/core/http/routes/elevenlabs.go
+++ b/core/http/routes/elevenlabs.go
@ -10,10 +10,11 @@ import (
 func RegisterElevenLabsRoutes(app *fiber.App,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
-	appConfig *config.ApplicationConfig,
-	auth func(*fiber.Ctx) error) {
+	appConfig *config.ApplicationConfig) {

 	// Elevenlabs
-	app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
+	app.Post("/v1/text-to-speech/:voice-id", elevenlabs.TTSEndpoint(cl, ml, appConfig))
+
+	app.Post("/v1/sound-generation", elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))

 }
--- a/core/http/routes/jina.go
+++ b/core/http/routes/jina.go
@ -11,8 +11,7 @@ import (
 func RegisterJINARoutes(app *fiber.App,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
-	appConfig *config.ApplicationConfig,
-	auth func(*fiber.Ctx) error) {
+	appConfig *config.ApplicationConfig) {

 	// POST endpoint to mimic the reranking
 	app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig))
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@ -15,33 +15,32 @@ func RegisterLocalAIRoutes(app *fiber.App,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
-	galleryService *services.GalleryService,
-	auth func(*fiber.Ctx) error) {
+	galleryService *services.GalleryService) {

 	app.Get("/swagger/*", swagger.HandlerDefault) // default

 	// LocalAI API endpoints
 	if !appConfig.DisableGalleryEndpoint {
 		modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
-		app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
-		app.Post("/models/delete/:name", auth, modelGalleryEndpointService.DeleteModelGalleryEndpoint())
+		app.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
+		app.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())

-		app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
-		app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
-		app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
-		app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
-		app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
-		app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
+		app.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
+		app.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
+		app.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint())
+		app.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint())
+		app.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
+		app.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
 	}

-	app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
+	app.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))

 	// Stores
 	sl := model.NewModelLoader("")
-	app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
-	app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
-	app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
-	app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
+	app.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
+	app.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
+	app.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
+	app.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))

 	// Kubernetes health checks
 	ok := func(c *fiber.Ctx) error {
@ -51,23 +50,25 @@ func RegisterLocalAIRoutes(app *fiber.App,
 	app.Get("/healthz", ok)
 	app.Get("/readyz", ok)

-	app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
+	app.Get("/metrics", localai.LocalAIMetricsEndpoint())

 	// Experimental Backend Statistics Module
 	backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
-	app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitorService))
-	app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitorService))
+	app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
+	app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))

 	// p2p
 	if p2p.IsP2PEnabled() {
-		app.Get("/api/p2p", auth, localai.ShowP2PNodes(appConfig))
-		app.Get("/api/p2p/token", auth, localai.ShowP2PToken(appConfig))
+		app.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
+		app.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
 	}

-	app.Get("/version", auth, func(c *fiber.Ctx) error {
+	app.Get("/version", func(c *fiber.Ctx) error {
 		return c.JSON(struct {
 			Version string `json:"version"`
 		}{Version: internal.PrintableVersion()})
 	})

+	app.Get("/system", auth, localai.SystemInformations(ml, appConfig))
+
 }
--- a/core/http/routes/openai.go
+++ b/core/http/routes/openai.go
@ -11,66 +11,65 @@ import (
 func RegisterOpenAIRoutes(app *fiber.App,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
-	appConfig *config.ApplicationConfig,
-	auth func(*fiber.Ctx) error) {
+	appConfig *config.ApplicationConfig) {
 	// openAI compatible API endpoint

 	// chat
-	app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
-	app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
+	app.Post("/v1/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
+	app.Post("/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))

 	// edit
-	app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
-	app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
+	app.Post("/v1/edits", openai.EditEndpoint(cl, ml, appConfig))
+	app.Post("/edits", openai.EditEndpoint(cl, ml, appConfig))

 	// assistant
-	app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
-	app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
-	app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
-	app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
-	app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
-	app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
-	app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
-	app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
-	app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
-	app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
-	app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
-	app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
-	app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
-	app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
-	app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
-	app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
-	app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
-	app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+	app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
+	app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
+	app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
+	app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
+	app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
+	app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
+	app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
+	app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
+	app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
+	app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
+	app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
+	app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
+	app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
+	app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
+	app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
+	app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
+	app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+	app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))

 	// files
-	app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
-	app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
-	app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig))
-	app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig))
-	app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
-	app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
-	app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
-	app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
-	app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
-	app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
+	app.Post("/v1/files", openai.UploadFilesEndpoint(cl, appConfig))
+	app.Post("/files", openai.UploadFilesEndpoint(cl, appConfig))
+	app.Get("/v1/files", openai.ListFilesEndpoint(cl, appConfig))
+	app.Get("/files", openai.ListFilesEndpoint(cl, appConfig))
+	app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
+	app.Get("/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
+	app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
+	app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
+	app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
+	app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))

 	// completion
-	app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
-	app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
-	app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
+	app.Post("/v1/completions", openai.CompletionEndpoint(cl, ml, appConfig))
+	app.Post("/completions", openai.CompletionEndpoint(cl, ml, appConfig))
+	app.Post("/v1/engines/:model/completions", openai.CompletionEndpoint(cl, ml, appConfig))

 	// embeddings
-	app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
-	app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
-	app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
+	app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
+	app.Post("/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
+	app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))

 	// audio
-	app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig))
-	app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig))
+	app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(cl, ml, appConfig))
+	app.Post("/v1/audio/speech", localai.TTSEndpoint(cl, ml, appConfig))

 	// images
-	app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig))
+	app.Post("/v1/images/generations", openai.ImageEndpoint(cl, ml, appConfig))

 	if appConfig.ImageDir != "" {
 		app.Static("/generated-images", appConfig.ImageDir)
@ -81,6 +80,6 @@ func RegisterOpenAIRoutes(app *fiber.App,
 	}

 	// List models
-	app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
-	app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
+	app.Get("/v1/models", openai.ListModelsEndpoint(cl, ml))
+	app.Get("/models", openai.ListModelsEndpoint(cl, ml))
 }
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@ -59,8 +59,7 @@ func RegisterUIRoutes(app *fiber.App,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
-	galleryService *services.GalleryService,
-	auth func(*fiber.Ctx) error) {
+	galleryService *services.GalleryService) {

 	// keeps the state of models that are being installed from the UI
 	var processingModels = NewModelOpCache()
@ -85,10 +84,10 @@ func RegisterUIRoutes(app *fiber.App,
 		return processingModelsData, taskTypes
 	}

-	app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml, modelStatus))
+	app.Get("/", localai.WelcomeEndpoint(appConfig, cl, ml, modelStatus))

 	if p2p.IsP2PEnabled() {
-		app.Get("/p2p", auth, func(c *fiber.Ctx) error {
+		app.Get("/p2p", func(c *fiber.Ctx) error {
 			summary := fiber.Map{
 				"Title":   "LocalAI - P2P dashboard",
 				"Version": internal.PrintableVersion(),
@ -104,17 +103,17 @@ func RegisterUIRoutes(app *fiber.App,
 		})

 		/* show nodes live! */
-		app.Get("/p2p/ui/workers", auth, func(c *fiber.Ctx) error {
+		app.Get("/p2p/ui/workers", func(c *fiber.Ctx) error {
 			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
 		})
-		app.Get("/p2p/ui/workers-federation", auth, func(c *fiber.Ctx) error {
+		app.Get("/p2p/ui/workers-federation", func(c *fiber.Ctx) error {
 			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
 		})

-		app.Get("/p2p/ui/workers-stats", auth, func(c *fiber.Ctx) error {
+		app.Get("/p2p/ui/workers-stats", func(c *fiber.Ctx) error {
 			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
 		})
-		app.Get("/p2p/ui/workers-federation-stats", auth, func(c *fiber.Ctx) error {
+		app.Get("/p2p/ui/workers-federation-stats", func(c *fiber.Ctx) error {
 			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
 		})
 	}
@ -122,7 +121,7 @@ func RegisterUIRoutes(app *fiber.App,
 	if !appConfig.DisableGalleryEndpoint {

 		// Show the Models page (all models)
-		app.Get("/browse", auth, func(c *fiber.Ctx) error {
+		app.Get("/browse", func(c *fiber.Ctx) error {
 			term := c.Query("term")

 			models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
@ -167,7 +166,7 @@ func RegisterUIRoutes(app *fiber.App,

 		// Show the models, filtered from the user input
 		// https://htmx.org/examples/active-search/
-		app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error {
+		app.Post("/browse/search/models", func(c *fiber.Ctx) error {
 			form := struct {
 				Search string `form:"search"`
 			}{}
@ -188,7 +187,7 @@ func RegisterUIRoutes(app *fiber.App,

 		// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
 		// https://htmx.org/examples/progress-bar/
-		app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error {
+		app.Post("/browse/install/model/:id", func(c *fiber.Ctx) error {
 			galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
 			log.Debug().Msgf("UI job submitted to install  : %+v\n", galleryID)

@ -215,7 +214,7 @@ func RegisterUIRoutes(app *fiber.App,

 		// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
 		// https://htmx.org/examples/progress-bar/
-		app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error {
+		app.Post("/browse/delete/model/:id", func(c *fiber.Ctx) error {
 			galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
 			log.Debug().Msgf("UI job submitted to delete  : %+v\n", galleryID)
 			var galleryName = galleryID
@ -255,7 +254,7 @@ func RegisterUIRoutes(app *fiber.App,
 		// Display the job current progress status
 		// If the job is done, we trigger the /browse/job/:uid route
 		// https://htmx.org/examples/progress-bar/
-		app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error {
+		app.Get("/browse/job/progress/:uid", func(c *fiber.Ctx) error {
 			jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!

 			status := galleryService.GetStatus(jobUID)
@ -279,7 +278,7 @@ func RegisterUIRoutes(app *fiber.App,

 		// this route is hit when the job is done, and we display the
 		// final state (for now just displays "Installation completed")
-		app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error {
+		app.Get("/browse/job/:uid", func(c *fiber.Ctx) error {
 			jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!

 			status := galleryService.GetStatus(jobUID)
@ -303,7 +302,7 @@ func RegisterUIRoutes(app *fiber.App,
 	}

 	// Show the Chat page
-	app.Get("/chat/:model", auth, func(c *fiber.Ctx) error {
+	app.Get("/chat/:model", func(c *fiber.Ctx) error {
 		backendConfigs, _ := services.ListModels(cl, ml, "", true)

 		summary := fiber.Map{
@ -318,7 +317,7 @@ func RegisterUIRoutes(app *fiber.App,
 		return c.Render("views/chat", summary)
 	})

-	app.Get("/talk/", auth, func(c *fiber.Ctx) error {
+	app.Get("/talk/", func(c *fiber.Ctx) error {
 		backendConfigs, _ := services.ListModels(cl, ml, "", true)

 		if len(backendConfigs) == 0 {
@ -338,7 +337,7 @@ func RegisterUIRoutes(app *fiber.App,
 		return c.Render("views/talk", summary)
 	})

-	app.Get("/chat/", auth, func(c *fiber.Ctx) error {
+	app.Get("/chat/", func(c *fiber.Ctx) error {

 		backendConfigs, _ := services.ListModels(cl, ml, "", true)

@ -359,7 +358,7 @@ func RegisterUIRoutes(app *fiber.App,
 		return c.Render("views/chat", summary)
 	})

-	app.Get("/text2image/:model", auth, func(c *fiber.Ctx) error {
+	app.Get("/text2image/:model", func(c *fiber.Ctx) error {
 		backendConfigs := cl.GetAllBackendConfigs()

 		summary := fiber.Map{
@ -374,7 +373,7 @@ func RegisterUIRoutes(app *fiber.App,
 		return c.Render("views/text2image", summary)
 	})

-	app.Get("/text2image/", auth, func(c *fiber.Ctx) error {
+	app.Get("/text2image/", func(c *fiber.Ctx) error {

 		backendConfigs := cl.GetAllBackendConfigs()

@ -395,7 +394,7 @@ func RegisterUIRoutes(app *fiber.App,
 		return c.Render("views/text2image", summary)
 	})

-	app.Get("/tts/:model", auth, func(c *fiber.Ctx) error {
+	app.Get("/tts/:model", func(c *fiber.Ctx) error {
 		backendConfigs := cl.GetAllBackendConfigs()

 		summary := fiber.Map{
@ -410,7 +409,7 @@ func RegisterUIRoutes(app *fiber.App,
 		return c.Render("views/tts", summary)
 	})

-	app.Get("/tts/", auth, func(c *fiber.Ctx) error {
+	app.Get("/tts/", func(c *fiber.Ctx) error {

 		backendConfigs := cl.GetAllBackendConfigs()

--- a/core/http/views/partials/head.html
+++ b/core/http/views/partials/head.html
@ -6,11 +6,7 @@
    rel="stylesheet"
    href="/static/assets/highlightjs.css"
  />
-  <script defer src="/static/assets/anime.min.js"></script>
-    <script
-    defer
-    src="/static/assets/highlightjs.js"
-  ></script>
+  <script defer src="/static/assets/highlightjs.js"></script>
    <script
    defer
    src="/static/assets/alpine.js"
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@ -28,9 +28,15 @@ import (
 	"github.com/mudler/edgevpn/pkg/logger"
 )

-func generateNewConnectionData() *node.YAMLConnectionConfig {
+func generateNewConnectionData(DHTInterval, OTPInterval int) *node.YAMLConnectionConfig {
 	maxMessSize := 20 << 20 // 20MB
 	keyLength := 43
+	if DHTInterval == 0 {
+		DHTInterval = 360
+	}
+	if OTPInterval == 0 {
+		OTPInterval = 9000
+	}

 	return &node.YAMLConnectionConfig{
 		MaxMessageSize: maxMessSize,
@ -40,21 +46,21 @@ func generateNewConnectionData() *node.YAMLConnectionConfig {
 		OTP: node.OTP{
 			DHT: node.OTPConfig{
 				Key:      eutils.RandStringRunes(keyLength),
-				Interval: 120,
+				Interval: DHTInterval,
 				Length:   keyLength,
 			},
 			Crypto: node.OTPConfig{
 				Key:      eutils.RandStringRunes(keyLength),
-				Interval: 9000,
+				Interval: OTPInterval,
 				Length:   keyLength,
 			},
 		},
 	}
 }

-func GenerateToken() string {
+func GenerateToken(DHTInterval, OTPInterval int) string {
 	// Generates a new config and exit
-	return generateNewConnectionData().Base64()
+	return generateNewConnectionData(DHTInterval, OTPInterval).Base64()
 }

 func IsP2PEnabled() bool {
@ -202,13 +208,9 @@ func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID stri
 func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID string, allocate bool) (chan NodeData, error) {
 	tunnels := make(chan NodeData)

-	err := n.Start(ctx)
-	if err != nil {
-		return nil, fmt.Errorf("creating a new node: %w", err)
-	}
 	ledger, err := n.Ledger()
 	if err != nil {
-		return nil, fmt.Errorf("creating a new node: %w", err)
+		return nil, fmt.Errorf("getting the ledger: %w", err)
 	}
 	// get new services, allocate and return to the channel

--- a/core/p2p/p2p_disabled.go
+++ b/core/p2p/p2p_disabled.go
@ -10,7 +10,7 @@ import (
 	"github.com/mudler/edgevpn/pkg/node"
 )

-func GenerateToken() string {
+func GenerateToken(DHTInterval, OTPInterval int) string {
 	return "not implemented"
 }

--- a/core/schema/elevenlabs.go
+++ b/core/schema/elevenlabs.go
@ -4,3 +4,11 @@ type ElevenLabsTTSRequest struct {
 	Text    string `json:"text" yaml:"text"`
 	ModelID string `json:"model_id" yaml:"model_id"`
 }
+
+type ElevenLabsSoundGenerationRequest struct {
+	Text        string   `json:"text" yaml:"text"`
+	ModelID     string   `json:"model_id" yaml:"model_id"`
+	Duration    *float32 `json:"duration_seconds,omitempty" yaml:"duration_seconds,omitempty"`
+	Temperature *float32 `json:"prompt_influence,omitempty" yaml:"prompt_influence,omitempty"`
+	DoSample    *bool    `json:"do_sample,omitempty" yaml:"do_sample,omitempty"`
+}
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@ -70,3 +70,7 @@ type P2PNodesResponse struct {
 	Nodes          []p2p.NodeData `json:"nodes" yaml:"nodes"`
 	FederatedNodes []p2p.NodeData `json:"federated_nodes" yaml:"federated_nodes"`
 }
+
+type SystemInformationResponse struct {
+	Backends []string `json:"backends"`
+}
--- a/core/services/backend_monitor.go
+++ b/core/services/backend_monitor.go
@ -107,7 +107,7 @@ func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.Status
 		return nil, err
 	}
 	modelAddr := bms.modelLoader.CheckIsLoaded(backendId)
-	if modelAddr == "" {
+	if modelAddr == nil {
 		return nil, fmt.Errorf("backend %s is not currently loaded", backendId)
 	}