Revert "[Refactor]: Core/API Split" (#1550)

Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9.
2025-05-20 10:35:01 +00:00 · 2024-01-05 12:04:46 -05:00 · 2024-01-05 12:04:46 -05:00 · db926896bd
commit db926896bd
parent ab7b4d5ee9
77 changed files with 3132 additions and 3456 deletions
--- a/core/http/api.go
+++ b/core/http/api.go
@ -1,169 +0,0 @@
-package http
-
-import (
-	"errors"
-	"strings"
-
-	"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
-	"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/internal"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-
-	"github.com/gofiber/fiber/v2"
-	"github.com/gofiber/fiber/v2/middleware/cors"
-	"github.com/gofiber/fiber/v2/middleware/logger"
-	"github.com/gofiber/fiber/v2/middleware/recover"
-)
-
-func App(cl *services.ConfigLoader, ml *model.ModelLoader, options *schema.StartupOptions) (*fiber.App, error) {
-
-	// Return errors as JSON responses
-	app := fiber.New(fiber.Config{
-		BodyLimit:             options.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
-		DisableStartupMessage: options.DisableMessage,
-		// Override default error handler
-		ErrorHandler: func(ctx *fiber.Ctx, err error) error {
-			// Status code defaults to 500
-			code := fiber.StatusInternalServerError
-
-			// Retrieve the custom status code if it's a *fiber.Error
-			var e *fiber.Error
-			if errors.As(err, &e) {
-				code = e.Code
-			}
-
-			// Send custom error page
-			return ctx.Status(code).JSON(
-				schema.ErrorResponse{
-					Error: &schema.APIError{Message: err.Error(), Code: code},
-				},
-			)
-		},
-	})
-
-	if options.Debug {
-		app.Use(logger.New(logger.Config{
-			Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
-		}))
-	}
-
-	// Default middleware config
-	app.Use(recover.New())
-
-	if options.Metrics != nil {
-		app.Use(localai.MetricsAPIMiddleware(options.Metrics))
-	}
-
-	// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
-	auth := func(c *fiber.Ctx) error {
-		if len(options.ApiKeys) == 0 {
-			return c.Next()
-		}
-
-		authHeader := c.Get("Authorization")
-		if authHeader == "" {
-			return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
-		}
-		authHeaderParts := strings.Split(authHeader, " ")
-		if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
-			return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
-		}
-
-		apiKey := authHeaderParts[1]
-		for _, key := range options.ApiKeys {
-			if apiKey == key {
-				return c.Next()
-			}
-		}
-
-		return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
-
-	}
-
-	if options.CORS {
-		var c func(ctx *fiber.Ctx) error
-		if options.CORSAllowOrigins == "" {
-			c = cors.New()
-		} else {
-			c = cors.New(cors.Config{AllowOrigins: options.CORSAllowOrigins})
-		}
-
-		app.Use(c)
-	}
-
-	// LocalAI API endpoints
-	galleryService := services.NewGalleryApplier(options.ModelPath)
-	galleryService.Start(options.Context, cl)
-
-	app.Get("/version", auth, func(c *fiber.Ctx) error {
-		return c.JSON(struct {
-			Version string `json:"version"`
-		}{Version: internal.PrintableVersion()})
-	})
-
-	modelGalleryService := localai.CreateModelGalleryEndpointService(options.Galleries, options.ModelPath, galleryService)
-	app.Post("/models/apply", auth, modelGalleryService.ApplyModelGalleryEndpoint())
-	app.Get("/models/available", auth, modelGalleryService.ListModelFromGalleryEndpoint())
-	app.Get("/models/galleries", auth, modelGalleryService.ListModelGalleriesEndpoint())
-	app.Post("/models/galleries", auth, modelGalleryService.AddModelGalleryEndpoint())
-	app.Delete("/models/galleries", auth, modelGalleryService.RemoveModelGalleryEndpoint())
-	app.Get("/models/jobs/:uuid", auth, modelGalleryService.GetOpStatusEndpoint())
-	app.Get("/models/jobs", auth, modelGalleryService.GetAllStatusEndpoint())
-
-	// openAI compatible API endpoint
-
-	// chat
-	app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, options))
-	app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, options))
-
-	// edit
-	app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, options))
-	app.Post("/edits", auth, openai.EditEndpoint(cl, ml, options))
-
-	// completion
-	app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, options))
-	app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, options))
-	app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, options))
-
-	// embeddings
-	app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, options))
-	app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, options))
-	app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, options))
-
-	// audio
-	app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, options))
-	app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, options))
-
-	// images
-	app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, options))
-
-	if options.ImageDir != "" {
-		app.Static("/generated-images", options.ImageDir)
-	}
-
-	if options.AudioDir != "" {
-		app.Static("/generated-audio", options.AudioDir)
-	}
-
-	ok := func(c *fiber.Ctx) error {
-		return c.SendStatus(200)
-	}
-
-	// Kubernetes health checks
-	app.Get("/healthz", ok)
-	app.Get("/readyz", ok)
-
-	app.Get("/metrics", localai.MetricsHandler())
-
-	backendMonitor := services.NewBackendMonitor(cl, ml, options)
-	app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitor))
-	app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitor))
-
-	// model listing
-	app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
-	app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
-
-	return app, nil
-}
--- a/core/http/api_test.go
+++ b/core/http/api_test.go
@ -1,867 +0,0 @@
-package http_test
-
-import (
-	"bytes"
-	"context"
-	"embed"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"io"
-	"net/http"
-	"os"
-	"path/filepath"
-	"runtime"
-
-	server "github.com/go-skynet/LocalAI/core/http"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/core/startup"
-	"github.com/go-skynet/LocalAI/pkg/gallery"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-	"github.com/go-skynet/LocalAI/pkg/utils"
-	"github.com/gofiber/fiber/v2"
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-	"gopkg.in/yaml.v3"
-
-	openaigo "github.com/otiai10/openaigo"
-	"github.com/sashabaranov/go-openai"
-	"github.com/sashabaranov/go-openai/jsonschema"
-)
-
-type modelApplyRequest struct {
-	ID        string                 `json:"id"`
-	URL       string                 `json:"url"`
-	Name      string                 `json:"name"`
-	Overrides map[string]interface{} `json:"overrides"`
-}
-
-func getModelStatus(url string) (response map[string]interface{}) {
-	// Create the HTTP request
-	resp, err := http.Get(url)
-	if err != nil {
-		fmt.Println("Error creating request:", err)
-		return
-	}
-	defer resp.Body.Close()
-
-	body, err := io.ReadAll(resp.Body)
-	if err != nil {
-		fmt.Println("Error reading response body:", err)
-		return
-	}
-
-	// Unmarshal the response into a map[string]interface{}
-	err = json.Unmarshal(body, &response)
-	if err != nil {
-		fmt.Println("Error unmarshaling JSON response:", err)
-		return
-	}
-	return
-}
-
-func getModels(url string) (response []gallery.GalleryModel) {
-	utils.GetURI(url, func(url string, i []byte) error {
-		// Unmarshal YAML data into a struct
-		return json.Unmarshal(i, &response)
-	})
-	return
-}
-
-func postModelApplyRequest(url string, request modelApplyRequest) (response map[string]interface{}) {
-
-	//url := "http://localhost:AI/models/apply"
-
-	// Create the request payload
-
-	payload, err := json.Marshal(request)
-	if err != nil {
-		fmt.Println("Error marshaling JSON:", err)
-		return
-	}
-
-	// Create the HTTP request
-	req, err := http.NewRequest("POST", url, bytes.NewBuffer(payload))
-	if err != nil {
-		fmt.Println("Error creating request:", err)
-		return
-	}
-	req.Header.Set("Content-Type", "application/json")
-
-	// Make the request
-	client := &http.Client{}
-	resp, err := client.Do(req)
-	if err != nil {
-		fmt.Println("Error making request:", err)
-		return
-	}
-	defer resp.Body.Close()
-
-	body, err := io.ReadAll(resp.Body)
-	if err != nil {
-		fmt.Println("Error reading response body:", err)
-		return
-	}
-
-	// Unmarshal the response into a map[string]interface{}
-	err = json.Unmarshal(body, &response)
-	if err != nil {
-		fmt.Println("Error unmarshaling JSON response:", err)
-		return
-	}
-	return
-}
-
-//go:embed backend-assets/*
-var backendAssets embed.FS
-
-var _ = Describe("API test", func() {
-
-	var app *fiber.App
-	var client *openai.Client
-	var client2 *openaigo.Client
-	var c context.Context
-	var cancel context.CancelFunc
-	var tmpdir string
-
-	commonOpts := []schema.AppOption{
-		schema.WithDebug(true),
-		schema.WithDisableMessage(true),
-	}
-
-	Context("API with ephemeral models", func() {
-		BeforeEach(func() {
-			var err error
-			tmpdir, err = os.MkdirTemp("", "")
-			Expect(err).ToNot(HaveOccurred())
-
-			c, cancel = context.WithCancel(context.Background())
-
-			g := []gallery.GalleryModel{
-				{
-					Name: "bert",
-					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
-				},
-				{
-					Name:            "bert2",
-					URL:             "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
-					Overrides:       map[string]interface{}{"foo": "bar"},
-					AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
-				},
-			}
-			out, err := yaml.Marshal(g)
-			Expect(err).ToNot(HaveOccurred())
-			err = os.WriteFile(filepath.Join(tmpdir, "gallery_simple.yaml"), out, 0644)
-			Expect(err).ToNot(HaveOccurred())
-
-			galleries := []gallery.Gallery{
-				{
-					Name: "test",
-					URL:  "file://" + filepath.Join(tmpdir, "gallery_simple.yaml"),
-				},
-			}
-
-			metricsService, err := services.SetupMetrics()
-			Expect(err).ToNot(HaveOccurred())
-
-			cl, ml, options, err := startup.Startup(
-				append(commonOpts,
-					schema.WithMetrics(metricsService),
-					schema.WithContext(c),
-					schema.WithGalleries(galleries),
-					schema.WithModelPath(tmpdir),
-					schema.WithBackendAssets(backendAssets),
-					schema.WithBackendAssetsOutput(tmpdir))...)
-
-			Expect(err).ToNot(HaveOccurred())
-			app, err = server.App(cl, ml, options)
-			Expect(err).ToNot(HaveOccurred())
-			go app.Listen("127.0.0.1:9090")
-
-			defaultConfig := openai.DefaultConfig("")
-			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
-
-			client2 = openaigo.NewClient("")
-			client2.BaseURL = defaultConfig.BaseURL
-
-			// Wait for API to be ready
-			client = openai.NewClientWithConfig(defaultConfig)
-			Eventually(func() error {
-				_, err := client.ListModels(context.TODO())
-				return err
-			}, "2m").ShouldNot(HaveOccurred())
-		})
-
-		AfterEach(func() {
-			cancel()
-			app.Shutdown()
-			os.RemoveAll(tmpdir)
-		})
-
-		Context("Applying models", func() {
-			It("applies models from a gallery", func() {
-
-				models := getModels("http://127.0.0.1:9090/models/available")
-				Expect(len(models)).To(Equal(2), fmt.Sprint(models))
-				Expect(models[0].Installed).To(BeFalse(), fmt.Sprint(models))
-				Expect(models[1].Installed).To(BeFalse(), fmt.Sprint(models))
-
-				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					ID: "test@bert2",
-				})
-
-				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
-
-				uuid := response["uuid"].(string)
-				resp := map[string]interface{}{}
-				Eventually(func() bool {
-					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-					fmt.Println(response)
-					resp = response
-					return response["processed"].(bool)
-				}, "360s", "10s").Should(Equal(true))
-				Expect(resp["message"]).ToNot(ContainSubstring("error"))
-
-				dat, err := os.ReadFile(filepath.Join(tmpdir, "bert2.yaml"))
-				Expect(err).ToNot(HaveOccurred())
-
-				_, err = os.ReadFile(filepath.Join(tmpdir, "foo.yaml"))
-				Expect(err).ToNot(HaveOccurred())
-
-				content := map[string]interface{}{}
-				err = yaml.Unmarshal(dat, &content)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(content["backend"]).To(Equal("bert-embeddings"))
-				Expect(content["foo"]).To(Equal("bar"))
-
-				models = getModels("http://127.0.0.1:9090/models/available")
-				Expect(len(models)).To(Equal(2), fmt.Sprint(models))
-				Expect(models[0].Name).To(Or(Equal("bert"), Equal("bert2")))
-				Expect(models[1].Name).To(Or(Equal("bert"), Equal("bert2")))
-				for _, m := range models {
-					if m.Name == "bert2" {
-						Expect(m.Installed).To(BeTrue())
-					} else {
-						Expect(m.Installed).To(BeFalse())
-					}
-				}
-			})
-			It("overrides models", func() {
-				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
-					Name: "bert",
-					Overrides: map[string]interface{}{
-						"backend": "llama",
-					},
-				})
-
-				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
-
-				uuid := response["uuid"].(string)
-
-				Eventually(func() bool {
-					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-					return response["processed"].(bool)
-				}, "360s", "10s").Should(Equal(true))
-
-				dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml"))
-				Expect(err).ToNot(HaveOccurred())
-
-				content := map[string]interface{}{}
-				err = yaml.Unmarshal(dat, &content)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(content["backend"]).To(Equal("llama"))
-			})
-			It("apply models without overrides", func() {
-				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:       "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
-					Name:      "bert",
-					Overrides: map[string]interface{}{},
-				})
-
-				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
-
-				uuid := response["uuid"].(string)
-
-				Eventually(func() bool {
-					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-					return response["processed"].(bool)
-				}, "360s", "10s").Should(Equal(true))
-
-				dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml"))
-				Expect(err).ToNot(HaveOccurred())
-
-				content := map[string]interface{}{}
-				err = yaml.Unmarshal(dat, &content)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(content["backend"]).To(Equal("bert-embeddings"))
-			})
-
-			It("runs openllama(llama-ggml backend)", Label("llama"), func() {
-				if runtime.GOOS != "linux" {
-					Skip("test supported only on linux")
-				}
-				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:       "github:go-skynet/model-gallery/openllama_3b.yaml",
-					Name:      "openllama_3b",
-					Overrides: map[string]interface{}{"backend": "llama-ggml", "mmap": true, "f16": true, "context_size": 128},
-				})
-
-				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
-
-				uuid := response["uuid"].(string)
-
-				Eventually(func() bool {
-					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-					return response["processed"].(bool)
-				}, "360s", "10s").Should(Equal(true))
-
-				By("testing completion")
-				resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "openllama_3b", Prompt: "Count up to five: one, two, three, four, "})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Choices)).To(Equal(1))
-				Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
-
-				By("testing functions")
-				resp2, err := client.CreateChatCompletion(
-					context.TODO(),
-					openai.ChatCompletionRequest{
-						Model: "openllama_3b",
-						Messages: []openai.ChatCompletionMessage{
-							{
-								Role:    "user",
-								Content: "What is the weather like in San Francisco (celsius)?",
-							},
-						},
-						Functions: []openai.FunctionDefinition{
-							openai.FunctionDefinition{
-								Name:        "get_current_weather",
-								Description: "Get the current weather",
-								Parameters: jsonschema.Definition{
-									Type: jsonschema.Object,
-									Properties: map[string]jsonschema.Definition{
-										"location": {
-											Type:        jsonschema.String,
-											Description: "The city and state, e.g. San Francisco, CA",
-										},
-										"unit": {
-											Type: jsonschema.String,
-											Enum: []string{"celcius", "fahrenheit"},
-										},
-									},
-									Required: []string{"location"},
-								},
-							},
-						},
-					})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp2.Choices)).To(Equal(1))
-				Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
-				Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
-
-				var res map[string]string
-				err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(res["location"]).To(Equal("San Francisco, California, United States"), fmt.Sprint(res))
-				Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
-				Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
-
-			})
-
-			It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() {
-				if runtime.GOOS != "linux" {
-					Skip("test supported only on linux")
-				}
-				modelName := "codellama"
-				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:       "github:go-skynet/model-gallery/codellama-7b-instruct.yaml",
-					Name:      modelName,
-					Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128},
-				})
-
-				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
-
-				uuid := response["uuid"].(string)
-
-				Eventually(func() bool {
-					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-					return response["processed"].(bool)
-				}, "360s", "10s").Should(Equal(true))
-
-				By("testing chat")
-				resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{
-					{
-						Role:    "user",
-						Content: "How much is 2+2?",
-					},
-				}})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Choices)).To(Equal(1))
-				Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")))
-
-				By("testing functions")
-				resp2, err := client.CreateChatCompletion(
-					context.TODO(),
-					openai.ChatCompletionRequest{
-						Model: modelName,
-						Messages: []openai.ChatCompletionMessage{
-							{
-								Role:    "user",
-								Content: "What is the weather like in San Francisco (celsius)?",
-							},
-						},
-						Functions: []openai.FunctionDefinition{
-							openai.FunctionDefinition{
-								Name:        "get_current_weather",
-								Description: "Get the current weather",
-								Parameters: jsonschema.Definition{
-									Type: jsonschema.Object,
-									Properties: map[string]jsonschema.Definition{
-										"location": {
-											Type:        jsonschema.String,
-											Description: "The city and state, e.g. San Francisco, CA",
-										},
-										"unit": {
-											Type: jsonschema.String,
-											Enum: []string{"celcius", "fahrenheit"},
-										},
-									},
-									Required: []string{"location"},
-								},
-							},
-						},
-					})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp2.Choices)).To(Equal(1))
-				Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
-				Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
-
-				var res map[string]string
-				err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
-				Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
-				Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
-			})
-
-			It("runs gpt4all", Label("gpt4all"), func() {
-				if runtime.GOOS != "linux" {
-					Skip("test supported only on linux")
-				}
-
-				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:  "github:go-skynet/model-gallery/gpt4all-j.yaml",
-					Name: "gpt4all-j",
-				})
-
-				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
-
-				uuid := response["uuid"].(string)
-
-				Eventually(func() bool {
-					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-					return response["processed"].(bool)
-				}, "960s", "10s").Should(Equal(true))
-
-				resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Choices)).To(Equal(1))
-				Expect(resp.Choices[0].Message.Content).To(ContainSubstring("well"))
-			})
-
-		})
-	})
-
-	Context("Model gallery", func() {
-		BeforeEach(func() {
-			var err error
-			tmpdir, err = os.MkdirTemp("", "")
-			Expect(err).ToNot(HaveOccurred())
-
-			c, cancel = context.WithCancel(context.Background())
-
-			galleries := []gallery.Gallery{
-				{
-					Name: "model-gallery",
-					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml",
-				},
-			}
-
-			metricsService, err := services.SetupMetrics()
-			Expect(err).ToNot(HaveOccurred())
-
-			cl, ml, options, err := startup.Startup(
-				append(commonOpts,
-					schema.WithContext(c),
-					schema.WithMetrics(metricsService),
-					schema.WithAudioDir(tmpdir),
-					schema.WithImageDir(tmpdir),
-					schema.WithGalleries(galleries),
-					schema.WithModelPath(tmpdir),
-					schema.WithBackendAssets(backendAssets),
-					schema.WithBackendAssetsOutput(tmpdir))...,
-			)
-			Expect(err).ToNot(HaveOccurred())
-			app, err = server.App(cl, ml, options)
-			go app.Listen("127.0.0.1:9090")
-
-			defaultConfig := openai.DefaultConfig("")
-			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
-
-			client2 = openaigo.NewClient("")
-			client2.BaseURL = defaultConfig.BaseURL
-
-			// Wait for API to be ready
-			client = openai.NewClientWithConfig(defaultConfig)
-			Eventually(func() error {
-				_, err := client.ListModels(context.TODO())
-				return err
-			}, "2m").ShouldNot(HaveOccurred())
-		})
-
-		AfterEach(func() {
-			cancel()
-			app.Shutdown()
-			os.RemoveAll(tmpdir)
-		})
-		It("installs and is capable to run tts", Label("tts"), func() {
-			if runtime.GOOS != "linux" {
-				Skip("test supported only on linux")
-			}
-
-			response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-				ID: "model-gallery@voice-en-us-kathleen-low",
-			})
-
-			Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
-
-			uuid := response["uuid"].(string)
-
-			Eventually(func() bool {
-				response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-				fmt.Println(response)
-				return response["processed"].(bool)
-			}, "360s", "10s").Should(Equal(true))
-
-			// An HTTP Post to the /tts endpoint should return a wav audio file
-			resp, err := http.Post("http://127.0.0.1:9090/tts", "application/json", bytes.NewBuffer([]byte(`{"input": "Hello world", "model": "en-us-kathleen-low.onnx"}`)))
-			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
-			dat, err := io.ReadAll(resp.Body)
-			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
-
-			Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat)))
-			Expect(resp.Header.Get("Content-Type")).To(Equal("audio/x-wav"))
-		})
-		It("installs and is capable to generate images", Label("stablediffusion"), func() {
-			if runtime.GOOS != "linux" {
-				Skip("test supported only on linux")
-			}
-
-			response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-				ID: "model-gallery@stablediffusion",
-				Overrides: map[string]interface{}{
-					"parameters": map[string]interface{}{"model": "stablediffusion_assets"},
-				},
-			})
-
-			Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
-
-			uuid := response["uuid"].(string)
-
-			Eventually(func() bool {
-				response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-				fmt.Println(response)
-				return response["processed"].(bool)
-			}, "360s", "10s").Should(Equal(true))
-
-			resp, err := http.Post(
-				"http://127.0.0.1:9090/v1/images/generations",
-				"application/json",
-				bytes.NewBuffer([]byte(`{
-					 			"prompt": "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text",
-								"mode": 2,  "seed":9000,
-					 			"size": "256x256", "n":2}`)))
-			// The response should contain an URL
-			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
-			dat, err := io.ReadAll(resp.Body)
-			Expect(err).ToNot(HaveOccurred(), string(dat))
-			Expect(string(dat)).To(ContainSubstring("http://127.0.0.1:9090/"), string(dat))
-			Expect(string(dat)).To(ContainSubstring(".png"), string(dat))
-
-		})
-	})
-
-	Context("API query", func() {
-		BeforeEach(func() {
-			c, cancel = context.WithCancel(context.Background())
-
-			metricsService, err := services.SetupMetrics()
-			Expect(err).ToNot(HaveOccurred())
-
-			cl, ml, options, err := startup.Startup(
-				append(commonOpts,
-					schema.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
-					schema.WithContext(c),
-					schema.WithModelPath(os.Getenv("MODELS_PATH")),
-					schema.WithMetrics(metricsService),
-				)...)
-			Expect(err).ToNot(HaveOccurred())
-			app, err = server.App(cl, ml, options)
-			Expect(err).ToNot(HaveOccurred())
-			go app.Listen("127.0.0.1:9090")
-
-			defaultConfig := openai.DefaultConfig("")
-			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
-
-			client2 = openaigo.NewClient("")
-			client2.BaseURL = defaultConfig.BaseURL
-
-			// Wait for API to be ready
-			client = openai.NewClientWithConfig(defaultConfig)
-			Eventually(func() error {
-				_, err := client.ListModels(context.TODO())
-				return err
-			}, "2m").ShouldNot(HaveOccurred())
-		})
-		AfterEach(func() {
-			cancel()
-			app.Shutdown()
-		})
-		It("returns the models list", func() {
-			models, err := client.ListModels(context.TODO())
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
-		})
-		It("can generate completions", func() {
-			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(resp.Choices)).To(Equal(1))
-			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
-		})
-
-		It("can generate chat completions ", func() {
-			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(resp.Choices)).To(Equal(1))
-			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
-		})
-
-		It("can generate completions from model configs", func() {
-			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: "abcdedfghikl"})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(resp.Choices)).To(Equal(1))
-			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
-		})
-
-		It("can generate chat completions from model configs", func() {
-			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(resp.Choices)).To(Equal(1))
-			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
-		})
-
-		It("returns errors", func() {
-			backends := len(model.AutoLoadBackends) + 1 // +1 for huggingface
-			_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
-			Expect(err).To(HaveOccurred())
-			Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("error, status code: 500, message: could not load model - all backends returned error: %d errors occurred:", backends)))
-		})
-		It("transcribes audio", func() {
-			if runtime.GOOS != "linux" {
-				Skip("test supported only on linux")
-			}
-			resp, err := client.CreateTranscription(
-				context.Background(),
-				openai.AudioRequest{
-					Model:    openai.Whisper1,
-					FilePath: filepath.Join(os.Getenv("TEST_DIR"), "audio.wav"),
-				},
-			)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(resp.Text).To(ContainSubstring("This is the Micro Machine Man presenting"))
-		})
-
-		It("calculate embeddings", func() {
-			if runtime.GOOS != "linux" {
-				Skip("test supported only on linux")
-			}
-			resp, err := client.CreateEmbeddings(
-				context.Background(),
-				openai.EmbeddingRequest{
-					Model: openai.AdaEmbeddingV2,
-					Input: []string{"sun", "cat"},
-				},
-			)
-			Expect(err).ToNot(HaveOccurred(), err)
-			Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
-			Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
-
-			sunEmbedding := resp.Data[0].Embedding
-			resp2, err := client.CreateEmbeddings(
-				context.Background(),
-				openai.EmbeddingRequest{
-					Model: openai.AdaEmbeddingV2,
-					Input: []string{"sun"},
-				},
-			)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
-		})
-
-		Context("External gRPC calls", func() {
-			It("calculate embeddings with sentencetransformers", func() {
-				if runtime.GOOS != "linux" {
-					Skip("test supported only on linux")
-				}
-				resp, err := client.CreateEmbeddings(
-					context.Background(),
-					openai.EmbeddingRequest{
-						Model: openai.AdaCodeSearchCode,
-						Input: []string{"sun", "cat"},
-					},
-				)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
-				Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
-
-				sunEmbedding := resp.Data[0].Embedding
-				resp2, err := client.CreateEmbeddings(
-					context.Background(),
-					openai.EmbeddingRequest{
-						Model: openai.AdaCodeSearchCode,
-						Input: []string{"sun"},
-					},
-				)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
-				Expect(resp2.Data[0].Embedding).ToNot(Equal(resp.Data[1].Embedding))
-			})
-		})
-
-		Context("backends", func() {
-			It("runs rwkv completion", func() {
-				if runtime.GOOS != "linux" {
-					Skip("test supported only on linux")
-				}
-				resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Choices) > 0).To(BeTrue())
-				Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
-
-				stream, err := client.CreateCompletionStream(context.TODO(), openai.CompletionRequest{
-					Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,", Stream: true,
-				})
-				Expect(err).ToNot(HaveOccurred())
-				defer stream.Close()
-
-				tokens := 0
-				text := ""
-				for {
-					response, err := stream.Recv()
-					if errors.Is(err, io.EOF) {
-						break
-					}
-
-					Expect(err).ToNot(HaveOccurred())
-					text += response.Choices[0].Text
-					tokens++
-				}
-				Expect(text).ToNot(BeEmpty())
-				Expect(text).To(ContainSubstring("five"))
-				Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
-			})
-			It("runs rwkv chat completion", func() {
-				if runtime.GOOS != "linux" {
-					Skip("test supported only on linux")
-				}
-				resp, err := client.CreateChatCompletion(context.TODO(),
-					openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Choices) > 0).To(BeTrue())
-				Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
-
-				stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
-				Expect(err).ToNot(HaveOccurred())
-				defer stream.Close()
-
-				tokens := 0
-				text := ""
-				for {
-					response, err := stream.Recv()
-					if errors.Is(err, io.EOF) {
-						break
-					}
-
-					Expect(err).ToNot(HaveOccurred())
-					text += response.Choices[0].Delta.Content
-					tokens++
-				}
-				Expect(text).ToNot(BeEmpty())
-				Expect(text).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
-
-				Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
-			})
-		})
-	})
-
-	Context("Config file", func() {
-		BeforeEach(func() {
-			c, cancel = context.WithCancel(context.Background())
-
-			metricsService, err := services.SetupMetrics()
-			Expect(err).ToNot(HaveOccurred())
-
-			cl, ml, options, err := startup.Startup(
-				append(commonOpts,
-					schema.WithContext(c),
-					schema.WithMetrics(metricsService),
-					schema.WithModelPath(os.Getenv("MODELS_PATH")),
-					schema.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
-			)
-			Expect(err).ToNot(HaveOccurred())
-			app, err = server.App(cl, ml, options)
-			Expect(err).ToNot(HaveOccurred())
-			go app.Listen("127.0.0.1:9090")
-
-			defaultConfig := openai.DefaultConfig("")
-			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
-			client2 = openaigo.NewClient("")
-			client2.BaseURL = defaultConfig.BaseURL
-			// Wait for API to be ready
-			client = openai.NewClientWithConfig(defaultConfig)
-			Eventually(func() error {
-				_, err := client.ListModels(context.TODO())
-				return err
-			}, "2m").ShouldNot(HaveOccurred())
-		})
-		AfterEach(func() {
-			cancel()
-			app.Shutdown()
-		})
-		It("can generate chat completions from config file (list1)", func() {
-			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "abcdedfghikl"}}})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(resp.Choices)).To(Equal(1))
-			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
-		})
-		It("can generate chat completions from config file (list2)", func() {
-			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "abcdedfghikl"}}})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(resp.Choices)).To(Equal(1))
-			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
-		})
-		It("can generate edit completions from config file", func() {
-			request := openaigo.EditCreateRequestBody{
-				Model:       "list2",
-				Instruction: "foo",
-				Input:       "bar",
-			}
-			resp, err := client2.CreateEdit(context.Background(), request)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(resp.Choices)).To(Equal(1))
-			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
-		})
-
-	})
-})
--- a/core/http/apt_suite_test.go
+++ b/core/http/apt_suite_test.go
@ -1,13 +0,0 @@
-package http_test
-
-import (
-	"testing"
-
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-func TestLocalAI(t *testing.T) {
-	RegisterFailHandler(Fail)
-	RunSpecs(t, "LocalAI test suite")
-}
--- a/core/http/endpoints/localai/backend_monitor.go
+++ b/core/http/endpoints/localai/backend_monitor.go
@ -1,34 +0,0 @@
-package localai
-
-import (
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-	"github.com/gofiber/fiber/v2"
-)
-
-func BackendMonitorEndpoint(bm *services.BackendMonitor) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		input := new(schema.BackendMonitorRequest)
-		// Get input data from the request body
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-
-		resp, err := bm.CheckAndSample(input.Model)
-		if err != nil {
-			return err
-		}
-		return c.JSON(resp)
-	}
-}
-
-func BackendShutdownEndpoint(bm *services.BackendMonitor) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		input := new(schema.BackendMonitorRequest)
-		// Get input data from the request body
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-		return bm.ShutdownModel(input.Model)
-	}
-}
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@ -1,148 +0,0 @@
-package localai
-
-import (
-	"encoding/json"
-	"fmt"
-	"slices"
-
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/gallery"
-	"github.com/gofiber/fiber/v2"
-	"github.com/google/uuid"
-	"github.com/rs/zerolog/log"
-)
-
-/// Endpoint Service
-
-type ModelGalleryEndpointService struct {
-	galleries      []gallery.Gallery
-	modelPath      string
-	galleryApplier *services.GalleryApplier
-}
-
-type GalleryModel struct {
-	ID string `json:"id"`
-	gallery.GalleryModel
-}
-
-func CreateModelGalleryEndpointService(galleries []gallery.Gallery, modelPath string, galleryApplier *services.GalleryApplier) ModelGalleryEndpointService {
-	return ModelGalleryEndpointService{
-		galleries:      galleries,
-		modelPath:      modelPath,
-		galleryApplier: galleryApplier,
-	}
-}
-
-func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		status := mgs.galleryApplier.GetStatus(c.Params("uuid"))
-		if status == nil {
-			return fmt.Errorf("could not find any status for ID")
-		}
-		return c.JSON(status)
-	}
-}
-
-func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		return c.JSON(mgs.galleryApplier.GetAllStatus())
-	}
-}
-
-func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		input := new(GalleryModel)
-		// Get input data from the request body
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-
-		uuid, err := uuid.NewUUID()
-		if err != nil {
-			return err
-		}
-		mgs.galleryApplier.C <- gallery.GalleryOp{
-			Req:         input.GalleryModel,
-			Id:          uuid.String(),
-			GalleryName: input.ID,
-			Galleries:   mgs.galleries,
-		}
-		return c.JSON(struct {
-			ID        string `json:"uuid"`
-			StatusURL string `json:"status"`
-		}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
-	}
-}
-
-func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
-
-		models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
-		if err != nil {
-			return err
-		}
-		log.Debug().Msgf("Models found from galleries: %+v", models)
-		for _, m := range models {
-			log.Debug().Msgf("Model found from galleries: %+v", m)
-		}
-		dat, err := json.Marshal(models)
-		if err != nil {
-			return err
-		}
-		return c.Send(dat)
-	}
-}
-
-// NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
-func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		log.Debug().Msgf("Listing model galleries %+v", mgs.galleries)
-		dat, err := json.Marshal(mgs.galleries)
-		if err != nil {
-			return err
-		}
-		return c.Send(dat)
-	}
-}
-
-func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		input := new(gallery.Gallery)
-		// Get input data from the request body
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-		if slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
-			return gallery.Name == input.Name
-		}) {
-			return fmt.Errorf("%s already exists", input.Name)
-		}
-		dat, err := json.Marshal(mgs.galleries)
-		if err != nil {
-			return err
-		}
-		log.Debug().Msgf("Adding %+v to gallery list", *input)
-		mgs.galleries = append(mgs.galleries, *input)
-		return c.Send(dat)
-	}
-}
-
-func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		input := new(gallery.Gallery)
-		// Get input data from the request body
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-		if !slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
-			return gallery.Name == input.Name
-		}) {
-			return fmt.Errorf("%s is not currently registered", input.Name)
-		}
-		mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
-			return gallery.Name == input.Name
-		})
-		return c.Send(nil)
-	}
-}
--- a/core/http/endpoints/localai/metrics.go
+++ b/core/http/endpoints/localai/metrics.go
@ -1,42 +0,0 @@
-package localai
-
-import (
-	"time"
-
-	"github.com/go-skynet/LocalAI/pkg/schema"
-	"github.com/gofiber/fiber/v2"
-	"github.com/gofiber/fiber/v2/middleware/adaptor"
-	"github.com/prometheus/client_golang/prometheus/promhttp"
-)
-
-func MetricsHandler() fiber.Handler {
-	return adaptor.HTTPHandler(promhttp.Handler())
-}
-
-type apiMiddlewareConfig struct {
-	Filter  func(c *fiber.Ctx) bool
-	metrics *schema.LocalAIMetrics
-}
-
-func MetricsAPIMiddleware(metrics *schema.LocalAIMetrics) fiber.Handler {
-	cfg := apiMiddlewareConfig{
-		metrics: metrics,
-		Filter: func(c *fiber.Ctx) bool {
-			return c.Path() == "/metrics"
-		},
-	}
-
-	return func(c *fiber.Ctx) error {
-		if cfg.Filter != nil && cfg.Filter(c) {
-			return c.Next()
-		}
-		path := c.Path()
-		method := c.Method()
-
-		start := time.Now()
-		err := c.Next()
-		elapsed := float64(time.Since(start)) / float64(time.Second)
-		cfg.metrics.ObserveAPICall(method, path, elapsed)
-		return err
-	}
-}
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@ -1,25 +0,0 @@
-package localai
-
-import (
-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-	"github.com/gofiber/fiber/v2"
-)
-
-func TTSEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader, so *schema.StartupOptions) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		input := new(schema.TTSRequest)
-		// Get input data from the request body
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-
-		filePath, _, err := backend.ModelTTS(input.Backend, input.Input, input.Model, ml, so)
-		if err != nil {
-			return err
-		}
-		return c.Download(filePath)
-	}
-}
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@ -1,97 +0,0 @@
-package openai
-
-import (
-	"bufio"
-	"bytes"
-	"encoding/json"
-	"fmt"
-
-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-	"github.com/gofiber/fiber/v2"
-	"github.com/rs/zerolog/log"
-	"github.com/valyala/fasthttp"
-)
-
-func ChatEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader, startupOptions *schema.StartupOptions) func(c *fiber.Ctx) error {
-
-	emptyMessage := ""
-
-	return func(c *fiber.Ctx) error {
-		modelName, input, err := readInput(c, startupOptions, ml, true)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
-		}
-
-		// The scary comment I feel like I forgot about along the way:
-		//
-		// functions are not supported in stream mode (yet?)
-		//
-		if input.Stream {
-			log.Debug().Msgf("Stream request received")
-			c.Context().SetContentType("text/event-stream")
-			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
-			//	c.Set("Content-Type", "text/event-stream")
-			c.Set("Cache-Control", "no-cache")
-			c.Set("Connection", "keep-alive")
-			c.Set("Transfer-Encoding", "chunked")
-
-			responses, err := backend.StreamingChatGenerationOpenAIRequest(modelName, input, cl, ml, startupOptions)
-			if err != nil {
-				return fmt.Errorf("failed establishing streaming chat request :%w", err)
-			}
-			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
-				usage := &schema.OpenAIUsage{}
-				id := ""
-				created := 0
-				for ev := range responses {
-					usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it
-					id = ev.ID
-					created = ev.Created // Similarly, grab the ID and created from any / the last response so we can use it for the stop
-					var buf bytes.Buffer
-					enc := json.NewEncoder(&buf)
-					enc.Encode(ev)
-					log.Debug().Msgf("Sending chunk: %s", buf.String())
-					_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
-					if err != nil {
-						log.Debug().Msgf("Sending chunk failed: %v", err)
-						input.Cancel()
-						break
-					}
-					w.Flush()
-				}
-
-				resp := &schema.OpenAIResponse{
-					ID:      id,
-					Created: created,
-					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
-					Choices: []schema.Choice{
-						{
-							FinishReason: "stop",
-							Index:        0,
-							Delta:        &schema.Message{Content: &emptyMessage},
-						}},
-					Object: "chat.completion.chunk",
-					Usage:  *usage,
-				}
-				respData, _ := json.Marshal(resp)
-
-				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
-				w.WriteString("data: [DONE]\n\n")
-				w.Flush()
-			}))
-			return nil
-		}
-		//////////////////////////////////////////
-
-		resp, err := backend.ChatGenerationOpenAIRequest(modelName, input, cl, ml, startupOptions)
-		if err != nil {
-			return fmt.Errorf("error generating chat request: +%w", err)
-		}
-		respData, _ := json.Marshal(resp) // TODO this is only used for the debug log and costs performance. monitor this?
-		log.Debug().Msgf("Response: %s", respData)
-		return c.JSON(resp)
-	}
-}
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@ -1,91 +0,0 @@
-package openai
-
-import (
-	"bufio"
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"time"
-
-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-	"github.com/gofiber/fiber/v2"
-	"github.com/google/uuid"
-	"github.com/rs/zerolog/log"
-	"github.com/valyala/fasthttp"
-)
-
-// https://platform.openai.com/docs/api-reference/completions
-func CompletionEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader, so *schema.StartupOptions) func(c *fiber.Ctx) error {
-	id := uuid.New().String()
-	created := int(time.Now().Unix())
-
-	return func(c *fiber.Ctx) error {
-		modelName, input, err := readInput(c, so, ml, true)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
-		}
-
-		log.Debug().Msgf("`input`: %+v", input)
-
-		if input.Stream {
-			log.Debug().Msgf("Stream request received")
-			c.Context().SetContentType("text/event-stream")
-			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
-			//c.Set("Content-Type", "text/event-stream")
-			c.Set("Cache-Control", "no-cache")
-			c.Set("Connection", "keep-alive")
-			c.Set("Transfer-Encoding", "chunked")
-
-			responses, err := backend.StreamingCompletionGenerationOpenAIRequest(modelName, input, cl, ml, so)
-			if err != nil {
-				return fmt.Errorf("failed establishing streaming completion request :%w", err)
-			}
-			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
-
-				for ev := range responses {
-					var buf bytes.Buffer
-					enc := json.NewEncoder(&buf)
-					enc.Encode(ev)
-
-					log.Debug().Msgf("Sending chunk: %s", buf.String())
-					fmt.Fprintf(w, "data: %v\n", buf.String())
-					w.Flush()
-				}
-
-				resp := &schema.OpenAIResponse{
-					ID:      id,
-					Created: created,
-					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
-					Choices: []schema.Choice{
-						{
-							Index:        0,
-							FinishReason: "stop",
-						},
-					},
-					Object: "text_completion",
-				}
-				respData, _ := json.Marshal(resp)
-
-				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
-				w.WriteString("data: [DONE]\n\n")
-				w.Flush()
-			}))
-			return nil
-		}
-
-		///////////
-
-		resp, err := backend.CompletionGenerationOpenAIRequest(modelName, input, cl, ml, so)
-		if err != nil {
-			return fmt.Errorf("error generating completion request: +%w", err)
-		}
-		jsonResult, _ := json.Marshal(resp)
-		log.Debug().Msgf("Response: %s", jsonResult)
-
-		// Return the prediction in the response body
-		return c.JSON(resp)
-	}
-}
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@ -1,34 +0,0 @@
-package openai
-
-import (
-	"encoding/json"
-	"fmt"
-
-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-	"github.com/gofiber/fiber/v2"
-
-	"github.com/rs/zerolog/log"
-)
-
-func EditEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader, so *schema.StartupOptions) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		modelFile, input, err := readInput(c, so, ml, true)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
-		}
-
-		resp, err := backend.EditGenerationOpenAIRequest(modelFile, input, cl, ml, so)
-		if err != nil {
-			return err
-		}
-
-		jsonResult, _ := json.Marshal(resp)
-		log.Debug().Msgf("Response: %s", jsonResult)
-
-		// Return the prediction in the response body
-		return c.JSON(resp)
-	}
-}
--- a/core/http/endpoints/openai/embeddings.go
+++ b/core/http/endpoints/openai/embeddings.go
@ -1,35 +0,0 @@
-package openai
-
-import (
-	"encoding/json"
-	"fmt"
-
-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-
-	"github.com/gofiber/fiber/v2"
-	"github.com/rs/zerolog/log"
-)
-
-// https://platform.openai.com/docs/api-reference/embeddings
-func EmbeddingsEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader, so *schema.StartupOptions) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		modelFile, input, err := readInput(c, so, ml, true)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
-		}
-
-		resp, err := backend.EmbeddingOpenAIRequest(modelFile, input, cl, ml, so)
-		if err != nil {
-			return err
-		}
-
-		jsonResult, _ := json.Marshal(resp)
-		log.Debug().Msgf("Response: %s", jsonResult)
-
-		// Return the prediction in the response body
-		return c.JSON(resp)
-	}
-}
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@ -1,48 +0,0 @@
-package openai
-
-import (
-	"encoding/json"
-	"fmt"
-
-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-	"github.com/gofiber/fiber/v2"
-	"github.com/rs/zerolog/log"
-)
-
-// https://platform.openai.com/docs/api-reference/images/create
-
-/*
-*
-
-	curl http://localhost:8080/v1/images/generations \
-	  -H "Content-Type: application/json" \
-	  -d '{
-	    "prompt": "A cute baby sea otter",
-	    "n": 1,
-	    "size": "512x512"
-	  }'
-
-*
-*/
-func ImageEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader, so *schema.StartupOptions) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		modelName, input, err := readInput(c, so, ml, true)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
-		}
-
-		resp, err := backend.ImageGenerationOpenAIRequest(modelName, input, cl, ml, so)
-		if err != nil {
-			return fmt.Errorf("error generating image request: +%w", err)
-		}
-
-		jsonResult, _ := json.Marshal(resp)
-		log.Debug().Msgf("Response: %s", jsonResult)
-
-		// Return the prediction in the response body
-		return c.JSON(resp)
-	}
-}
--- a/core/http/endpoints/openai/list.go
+++ b/core/http/endpoints/openai/list.go
@ -1,69 +0,0 @@
-package openai
-
-import (
-	"regexp"
-
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-	"github.com/gofiber/fiber/v2"
-)
-
-func ListModelsEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		models, err := ml.ListModels()
-		if err != nil {
-			return err
-		}
-		var mm map[string]interface{} = map[string]interface{}{}
-
-		openAIModels := []schema.OpenAIModel{}
-
-		var filterFn func(name string) bool
-		filter := c.Query("filter")
-
-		// If filter is not specified, do not filter the list by model name
-		if filter == "" {
-			filterFn = func(_ string) bool { return true }
-		} else {
-			// If filter _IS_ specified, we compile it to a regex which is used to create the filterFn
-			rxp, err := regexp.Compile(filter)
-			if err != nil {
-				return err
-			}
-			filterFn = func(name string) bool {
-				return rxp.MatchString(name)
-			}
-		}
-
-		// By default, exclude any loose files that are already referenced by a configuration file.
-		excludeConfigured := c.QueryBool("excludeConfigured", true)
-
-		// Start with the known configurations
-		for _, c := range cl.GetAllConfigs() {
-			if excludeConfigured {
-				mm[c.Model] = nil
-			}
-
-			if filterFn(c.Name) {
-				openAIModels = append(openAIModels, schema.OpenAIModel{ID: c.Name, Object: "model"})
-			}
-		}
-
-		// Then iterate through the loose files:
-		for _, m := range models {
-			// And only adds them if they shouldn't be skipped.
-			if _, exists := mm[m]; !exists && filterFn(m) {
-				openAIModels = append(openAIModels, schema.OpenAIModel{ID: m, Object: "model"})
-			}
-		}
-
-		return c.JSON(struct {
-			Object string               `json:"object"`
-			Data   []schema.OpenAIModel `json:"data"`
-		}{
-			Object: "list",
-			Data:   openAIModels,
-		})
-	}
-}
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@ -1,57 +0,0 @@
-package openai
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"strings"
-
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-	"github.com/gofiber/fiber/v2"
-	"github.com/rs/zerolog/log"
-)
-
-func readInput(c *fiber.Ctx, o *schema.StartupOptions, ml *model.ModelLoader, randomModel bool) (string, *schema.OpenAIRequest, error) {
-	input := new(schema.OpenAIRequest)
-	ctx, cancel := context.WithCancel(o.Context)
-	input.Context = ctx
-	input.Cancel = cancel
-	// Get input data from the request body
-	if err := c.BodyParser(input); err != nil {
-		return "", nil, fmt.Errorf("failed parsing request body: %w", err)
-	}
-
-	modelFile := input.Model
-
-	if c.Params("model") != "" {
-		modelFile = c.Params("model")
-	}
-
-	received, _ := json.Marshal(input)
-
-	log.Debug().Msgf("Request received: %s", string(received))
-
-	// Set model from bearer token, if available
-	bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
-	bearerExists := bearer != "" && ml.ExistsInModelPath(bearer)
-
-	// If no model was specified, take the first available
-	if modelFile == "" && !bearerExists && randomModel {
-		models, _ := ml.ListModels()
-		if len(models) > 0 {
-			modelFile = models[0]
-			log.Debug().Msgf("No model specified, using: %s", modelFile)
-		} else {
-			log.Debug().Msgf("No model specified, returning error")
-			return "", nil, fmt.Errorf("no model specified")
-		}
-	}
-
-	// If a model is found in bearer token takes precedence
-	if bearerExists {
-		log.Debug().Msgf("Using model from bearer token: %s", bearer)
-		modelFile = bearer
-	}
-	return modelFile, input, nil
-}
--- a/core/http/endpoints/openai/transcription.go
+++ b/core/http/endpoints/openai/transcription.go
@ -1,49 +0,0 @@
-package openai
-
-import (
-	"fmt"
-	"net/http"
-	"os"
-	"path"
-
-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-	"github.com/go-skynet/LocalAI/pkg/utils"
-
-	"github.com/gofiber/fiber/v2"
-	"github.com/rs/zerolog/log"
-)
-
-// https://platform.openai.com/docs/api-reference/audio/create
-func TranscriptEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader, so *schema.StartupOptions) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		modelName, input, err := readInput(c, so, ml, true)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
-		}
-
-		// retrieve the file data from the request
-		file, err := c.FormFile("file")
-		if err != nil {
-			return err
-		}
-
-		dst, err := utils.CreateTempFileFromMultipartFile(file, "", "transcription") // 3rd param formerly whisper
-		if err != nil {
-			return err
-		}
-
-		log.Debug().Msgf("Audio file copied to: %+v", dst)
-		defer os.RemoveAll(path.Dir(dst))
-
-		tr, err := backend.TranscriptionOpenAIRequest(modelName, input, dst, cl, ml, so)
-		if err != nil {
-			return fmt.Errorf("error generating transcription request: +%w", err)
-		}
-		log.Debug().Msgf("Trascribed: %+v", tr)
-		// TODO: handle different outputs here
-		return c.Status(http.StatusOK).JSON(tr)
-	}
-}