refactor: move remaining api packages to core (#1731)

* core 1 * api/openai/files fix * core 2 - core/config * move over core api.go and tests to the start of core/http * move over localai specific endpoints to core/http, begin the service/endpoint split there * refactor big chunk on the plane * refactor chunk 2 on plane, next step: port and modify changes to request.go * easy fixes for request.go, major changes not done yet * lintfix * json tag lintfix? * gitignore and .keep files * strange fix attempt: rename the config dir?
2025-05-28 14:35:00 +00:00 · 2024-03-01 10:19:53 -05:00 · 2024-03-01 10:19:53 -05:00 · 1c312685aa
commit 1c312685aa
parent 316de82f51
50 changed files with 1440 additions and 1206 deletions
--- a/core/backend/embeddings.go
+++ b/core/backend/embeddings.go
@ -3,36 +3,36 @@ package backend
 import (
 	"fmt"

-	config "github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/options"
+	"github.com/go-skynet/LocalAI/core/config"
+
 	"github.com/go-skynet/LocalAI/pkg/grpc"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 )

-func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.Config, o *options.Option) (func() ([]float32, error), error) {
-	if !c.Embeddings {
+func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
+	if !backendConfig.Embeddings {
 		return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
 	}

-	modelFile := c.Model
+	modelFile := backendConfig.Model

-	grpcOpts := gRPCModelOpts(c)
+	grpcOpts := gRPCModelOpts(backendConfig)

 	var inferenceModel interface{}
 	var err error

-	opts := modelOpts(c, o, []model.Option{
+	opts := modelOpts(backendConfig, appConfig, []model.Option{
 		model.WithLoadGRPCLoadModelOpts(grpcOpts),
-		model.WithThreads(uint32(c.Threads)),
-		model.WithAssetDir(o.AssetsDestination),
+		model.WithThreads(uint32(backendConfig.Threads)),
+		model.WithAssetDir(appConfig.AssetsDestination),
 		model.WithModel(modelFile),
-		model.WithContext(o.Context),
+		model.WithContext(appConfig.Context),
 	})

-	if c.Backend == "" {
+	if backendConfig.Backend == "" {
 		inferenceModel, err = loader.GreedyLoader(opts...)
 	} else {
-		opts = append(opts, model.WithBackendString(c.Backend))
+		opts = append(opts, model.WithBackendString(backendConfig.Backend))
 		inferenceModel, err = loader.BackendLoader(opts...)
 	}
 	if err != nil {
@ -43,7 +43,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
 	switch model := inferenceModel.(type) {
 	case grpc.Backend:
 		fn = func() ([]float32, error) {
-			predictOptions := gRPCPredictOpts(c, loader.ModelPath)
+			predictOptions := gRPCPredictOpts(backendConfig, loader.ModelPath)
 			if len(tokens) > 0 {
 				embeds := []int32{}

@ -52,7 +52,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
 				}
 				predictOptions.EmbeddingTokens = embeds

-				res, err := model.Embeddings(o.Context, predictOptions)
+				res, err := model.Embeddings(appConfig.Context, predictOptions)
 				if err != nil {
 					return nil, err
 				}
@ -61,7 +61,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
 			}
 			predictOptions.Embeddings = s

-			res, err := model.Embeddings(o.Context, predictOptions)
+			res, err := model.Embeddings(appConfig.Context, predictOptions)
 			if err != nil {
 				return nil, err
 			}
--- a/core/backend/image.go
+++ b/core/backend/image.go
@ -1,33 +1,33 @@
 package backend

 import (
-	config "github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/options"
+	"github.com/go-skynet/LocalAI/core/config"
+
 	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 )

-func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, c config.Config, o *options.Option) (func() error, error) {
+func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {

-	opts := modelOpts(c, o, []model.Option{
-		model.WithBackendString(c.Backend),
-		model.WithAssetDir(o.AssetsDestination),
-		model.WithThreads(uint32(c.Threads)),
-		model.WithContext(o.Context),
-		model.WithModel(c.Model),
+	opts := modelOpts(backendConfig, appConfig, []model.Option{
+		model.WithBackendString(backendConfig.Backend),
+		model.WithAssetDir(appConfig.AssetsDestination),
+		model.WithThreads(uint32(backendConfig.Threads)),
+		model.WithContext(appConfig.Context),
+		model.WithModel(backendConfig.Model),
 		model.WithLoadGRPCLoadModelOpts(&proto.ModelOptions{
-			CUDA:          c.CUDA || c.Diffusers.CUDA,
-			SchedulerType: c.Diffusers.SchedulerType,
-			PipelineType:  c.Diffusers.PipelineType,
-			CFGScale:      c.Diffusers.CFGScale,
-			LoraAdapter:   c.LoraAdapter,
-			LoraScale:     c.LoraScale,
-			LoraBase:      c.LoraBase,
-			IMG2IMG:       c.Diffusers.IMG2IMG,
-			CLIPModel:     c.Diffusers.ClipModel,
-			CLIPSubfolder: c.Diffusers.ClipSubFolder,
-			CLIPSkip:      int32(c.Diffusers.ClipSkip),
-			ControlNet:    c.Diffusers.ControlNet,
+			CUDA:          backendConfig.CUDA || backendConfig.Diffusers.CUDA,
+			SchedulerType: backendConfig.Diffusers.SchedulerType,
+			PipelineType:  backendConfig.Diffusers.PipelineType,
+			CFGScale:      backendConfig.Diffusers.CFGScale,
+			LoraAdapter:   backendConfig.LoraAdapter,
+			LoraScale:     backendConfig.LoraScale,
+			LoraBase:      backendConfig.LoraBase,
+			IMG2IMG:       backendConfig.Diffusers.IMG2IMG,
+			CLIPModel:     backendConfig.Diffusers.ClipModel,
+			CLIPSubfolder: backendConfig.Diffusers.ClipSubFolder,
+			CLIPSkip:      int32(backendConfig.Diffusers.ClipSkip),
+			ControlNet:    backendConfig.Diffusers.ControlNet,
 		}),
 	})

@ -40,19 +40,19 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat

 	fn := func() error {
 		_, err := inferenceModel.GenerateImage(
-			o.Context,
+			appConfig.Context,
 			&proto.GenerateImageRequest{
 				Height:           int32(height),
 				Width:            int32(width),
 				Mode:             int32(mode),
 				Step:             int32(step),
 				Seed:             int32(seed),
-				CLIPSkip:         int32(c.Diffusers.ClipSkip),
+				CLIPSkip:         int32(backendConfig.Diffusers.ClipSkip),
 				PositivePrompt:   positive_prompt,
 				NegativePrompt:   negative_prompt,
 				Dst:              dst,
 				Src:              src,
-				EnableParameters: c.Diffusers.EnableParameters,
+				EnableParameters: backendConfig.Diffusers.EnableParameters,
 			})
 		return err
 	}
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@ -8,8 +8,8 @@ import (
 	"sync"
 	"unicode/utf8"

-	config "github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/options"
+	"github.com/go-skynet/LocalAI/core/config"
+
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/go-skynet/LocalAI/pkg/grpc"
 	model "github.com/go-skynet/LocalAI/pkg/model"
@ -26,7 +26,7 @@ type TokenUsage struct {
 	Completion int
 }

-func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
+func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
 	modelFile := c.Model

 	grpcOpts := gRPCModelOpts(c)
@ -140,7 +140,7 @@ func ModelInference(ctx context.Context, s string, images []string, loader *mode
 var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
 var mu sync.Mutex = sync.Mutex{}

-func Finetune(config config.Config, input, prediction string) string {
+func Finetune(config config.BackendConfig, input, prediction string) string {
 	if config.Echo {
 		prediction = input + prediction
 	}
--- a/core/backend/options.go
+++ b/core/backend/options.go
@ -4,19 +4,17 @@ import (
 	"os"
 	"path/filepath"

+	"github.com/go-skynet/LocalAI/core/config"
 	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	model "github.com/go-skynet/LocalAI/pkg/model"
-
-	config "github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/options"
 )

-func modelOpts(c config.Config, o *options.Option, opts []model.Option) []model.Option {
-	if o.SingleBackend {
+func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
+	if so.SingleBackend {
 		opts = append(opts, model.WithSingleActiveBackend())
 	}

-	if o.ParallelBackendRequests {
+	if so.ParallelBackendRequests {
 		opts = append(opts, model.EnableParallelRequests)
 	}

@ -28,14 +26,14 @@ func modelOpts(c config.Config, o *options.Option, opts []model.Option) []model.
 		opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
 	}

-	for k, v := range o.ExternalGRPCBackends {
+	for k, v := range so.ExternalGRPCBackends {
 		opts = append(opts, model.WithExternalBackend(k, v))
 	}

 	return opts
 }

-func gRPCModelOpts(c config.Config) *pb.ModelOptions {
+func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
 	b := 512
 	if c.Batch != 0 {
 		b = c.Batch
@ -84,7 +82,7 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
 	}
 }

-func gRPCPredictOpts(c config.Config, modelPath string) *pb.PredictOptions {
+func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOptions {
 	promptCachePath := ""
 	if c.PromptCachePath != "" {
 		p := filepath.Join(modelPath, c.PromptCachePath)
--- a/core/backend/transcript.go
+++ b/core/backend/transcript.go
@ -4,25 +4,24 @@ import (
 	"context"
 	"fmt"

-	config "github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/core/schema"

-	"github.com/go-skynet/LocalAI/core/options"
 	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 )

-func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*schema.Result, error) {
+func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.Result, error) {

-	opts := modelOpts(c, o, []model.Option{
+	opts := modelOpts(backendConfig, appConfig, []model.Option{
 		model.WithBackendString(model.WhisperBackend),
-		model.WithModel(c.Model),
-		model.WithContext(o.Context),
-		model.WithThreads(uint32(c.Threads)),
-		model.WithAssetDir(o.AssetsDestination),
+		model.WithModel(backendConfig.Model),
+		model.WithContext(appConfig.Context),
+		model.WithThreads(uint32(backendConfig.Threads)),
+		model.WithAssetDir(appConfig.AssetsDestination),
 	})

-	whisperModel, err := o.Loader.BackendLoader(opts...)
+	whisperModel, err := ml.BackendLoader(opts...)
 	if err != nil {
 		return nil, err
 	}
@ -34,6 +33,6 @@ func ModelTranscription(audio, language string, loader *model.ModelLoader, c con
 	return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
 		Dst:      audio,
 		Language: language,
-		Threads:  uint32(c.Threads),
+		Threads:  uint32(backendConfig.Threads),
 	})
 }
--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@ -6,8 +6,8 @@ import (
 	"os"
 	"path/filepath"

-	config "github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/options"
+	"github.com/go-skynet/LocalAI/core/config"
+
 	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/pkg/utils"
@ -29,22 +29,22 @@ func generateUniqueFileName(dir, baseName, ext string) string {
 	}
 }

-func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option, c config.Config) (string, *proto.Result, error) {
+func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) {
 	bb := backend
 	if bb == "" {
 		bb = model.PiperBackend
 	}

-	grpcOpts := gRPCModelOpts(c)
+	grpcOpts := gRPCModelOpts(backendConfig)

-	opts := modelOpts(config.Config{}, o, []model.Option{
+	opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
 		model.WithBackendString(bb),
 		model.WithModel(modelFile),
-		model.WithContext(o.Context),
-		model.WithAssetDir(o.AssetsDestination),
+		model.WithContext(appConfig.Context),
+		model.WithAssetDir(appConfig.AssetsDestination),
 		model.WithLoadGRPCLoadModelOpts(grpcOpts),
 	})
-	piperModel, err := o.Loader.BackendLoader(opts...)
+	piperModel, err := loader.BackendLoader(opts...)
 	if err != nil {
 		return "", nil, err
 	}
@ -53,19 +53,19 @@ func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *opt
 		return "", nil, fmt.Errorf("could not load piper model")
 	}

-	if err := os.MkdirAll(o.AudioDir, 0755); err != nil {
+	if err := os.MkdirAll(appConfig.AudioDir, 0755); err != nil {
 		return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
 	}

-	fileName := generateUniqueFileName(o.AudioDir, "piper", ".wav")
-	filePath := filepath.Join(o.AudioDir, fileName)
+	fileName := generateUniqueFileName(appConfig.AudioDir, "piper", ".wav")
+	filePath := filepath.Join(appConfig.AudioDir, fileName)

 	// If the model file is not empty, we pass it joined with the model path
 	modelPath := ""
 	if modelFile != "" {
 		if bb != model.TransformersMusicGen {
-			modelPath = filepath.Join(o.Loader.ModelPath, modelFile)
-			if err := utils.VerifyPath(modelPath, o.Loader.ModelPath); err != nil {
+			modelPath = filepath.Join(loader.ModelPath, modelFile)
+			if err := utils.VerifyPath(modelPath, appConfig.ModelPath); err != nil {
 				return "", nil, err
 			}
 		} else {
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@ -1,4 +1,4 @@
-package options
+package config

 import (
 	"context"
@ -6,16 +6,14 @@ import (
 	"encoding/json"
 	"time"

-	"github.com/go-skynet/LocalAI/metrics"
 	"github.com/go-skynet/LocalAI/pkg/gallery"
-	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )

-type Option struct {
+type ApplicationConfig struct {
 	Context                             context.Context
 	ConfigFile                          string
-	Loader                              *model.ModelLoader
+	ModelPath                           string
 	UploadLimitMB, Threads, ContextSize int
 	F16                                 bool
 	Debug, DisableMessage               bool
@ -27,7 +25,6 @@ type Option struct {
 	PreloadModelsFromPath               string
 	CORSAllowOrigins                    string
 	ApiKeys                             []string
-	Metrics                             *metrics.Metrics

 	ModelLibraryURL string

@ -52,10 +49,10 @@ type Option struct {
 	WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
 }

-type AppOption func(*Option)
+type AppOption func(*ApplicationConfig)

-func NewOptions(o ...AppOption) *Option {
-	opt := &Option{
+func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
+	opt := &ApplicationConfig{
 		Context:        context.Background(),
 		UploadLimitMB:  15,
 		Threads:        1,
@ -70,63 +67,69 @@ func NewOptions(o ...AppOption) *Option {
 }

 func WithModelsURL(urls ...string) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.ModelsURL = urls
 	}
 }

+func WithModelPath(path string) AppOption {
+	return func(o *ApplicationConfig) {
+		o.ModelPath = path
+	}
+}
+
 func WithCors(b bool) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.CORS = b
 	}
 }

 func WithModelLibraryURL(url string) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.ModelLibraryURL = url
 	}
 }

-var EnableWatchDog = func(o *Option) {
+var EnableWatchDog = func(o *ApplicationConfig) {
 	o.WatchDog = true
 }

-var EnableWatchDogIdleCheck = func(o *Option) {
+var EnableWatchDogIdleCheck = func(o *ApplicationConfig) {
 	o.WatchDog = true
 	o.WatchDogIdle = true
 }

-var EnableWatchDogBusyCheck = func(o *Option) {
+var EnableWatchDogBusyCheck = func(o *ApplicationConfig) {
 	o.WatchDog = true
 	o.WatchDogBusy = true
 }

 func SetWatchDogBusyTimeout(t time.Duration) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.WatchDogBusyTimeout = t
 	}
 }

 func SetWatchDogIdleTimeout(t time.Duration) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.WatchDogIdleTimeout = t
 	}
 }

-var EnableSingleBackend = func(o *Option) {
+var EnableSingleBackend = func(o *ApplicationConfig) {
 	o.SingleBackend = true
 }

-var EnableParallelBackendRequests = func(o *Option) {
+var EnableParallelBackendRequests = func(o *ApplicationConfig) {
 	o.ParallelBackendRequests = true
 }

-var EnableGalleriesAutoload = func(o *Option) {
+var EnableGalleriesAutoload = func(o *ApplicationConfig) {
 	o.AutoloadGalleries = true
 }

 func WithExternalBackend(name string, uri string) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		if o.ExternalGRPCBackends == nil {
 			o.ExternalGRPCBackends = make(map[string]string)
 		}
@ -135,25 +138,25 @@ func WithExternalBackend(name string, uri string) AppOption {
 }

 func WithCorsAllowOrigins(b string) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.CORSAllowOrigins = b
 	}
 }

 func WithBackendAssetsOutput(out string) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.AssetsDestination = out
 	}
 }

 func WithBackendAssets(f embed.FS) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.BackendAssets = f
 	}
 }

 func WithStringGalleries(galls string) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		if galls == "" {
 			log.Debug().Msgf("no galleries to load")
 			o.Galleries = []gallery.Gallery{}
@ -168,102 +171,96 @@ func WithStringGalleries(galls string) AppOption {
 }

 func WithGalleries(galleries []gallery.Gallery) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.Galleries = append(o.Galleries, galleries...)
 	}
 }

 func WithContext(ctx context.Context) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.Context = ctx
 	}
 }

 func WithYAMLConfigPreload(configFile string) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.PreloadModelsFromPath = configFile
 	}
 }

 func WithJSONStringPreload(configFile string) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.PreloadJSONModels = configFile
 	}
 }
 func WithConfigFile(configFile string) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.ConfigFile = configFile
 	}
 }

-func WithModelLoader(loader *model.ModelLoader) AppOption {
-	return func(o *Option) {
-		o.Loader = loader
-	}
-}
-
 func WithUploadLimitMB(limit int) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.UploadLimitMB = limit
 	}
 }

 func WithThreads(threads int) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.Threads = threads
 	}
 }

 func WithContextSize(ctxSize int) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.ContextSize = ctxSize
 	}
 }

 func WithF16(f16 bool) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.F16 = f16
 	}
 }

 func WithDebug(debug bool) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.Debug = debug
 	}
 }

 func WithDisableMessage(disableMessage bool) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.DisableMessage = disableMessage
 	}
 }

 func WithAudioDir(audioDir string) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.AudioDir = audioDir
 	}
 }

 func WithImageDir(imageDir string) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.ImageDir = imageDir
 	}
 }

 func WithUploadDir(uploadDir string) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.UploadDir = uploadDir
 	}
 }

 func WithApiKeys(apiKeys []string) AppOption {
-	return func(o *Option) {
+	return func(o *ApplicationConfig) {
 		o.ApiKeys = apiKeys
 	}
 }

-func WithMetrics(meter *metrics.Metrics) AppOption {
-	return func(o *Option) {
-		o.Metrics = meter
-	}
-}
+// func WithMetrics(meter *metrics.Metrics) AppOption {
+// 	return func(o *StartupOptions) {
+// 		o.Metrics = meter
+// 	}
+// }
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@ -9,15 +9,16 @@ import (
 	"strings"
 	"sync"

+	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/go-skynet/LocalAI/pkg/downloader"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v3"
 )

-type Config struct {
-	PredictionOptions `yaml:"parameters"`
-	Name              string `yaml:"name"`
+type BackendConfig struct {
+	schema.PredictionOptions `yaml:"parameters"`
+	Name                     string `yaml:"name"`

 	F16            bool              `yaml:"f16"`
 	Threads        int               `yaml:"threads"`
@ -159,37 +160,55 @@ type TemplateConfig struct {
 	Functions   string `yaml:"function"`
 }

-type ConfigLoader struct {
-	configs map[string]Config
-	sync.Mutex
-}
-
-func (c *Config) SetFunctionCallString(s string) {
+func (c *BackendConfig) SetFunctionCallString(s string) {
 	c.functionCallString = s
 }

-func (c *Config) SetFunctionCallNameString(s string) {
+func (c *BackendConfig) SetFunctionCallNameString(s string) {
 	c.functionCallNameString = s
 }

-func (c *Config) ShouldUseFunctions() bool {
+func (c *BackendConfig) ShouldUseFunctions() bool {
 	return ((c.functionCallString != "none" || c.functionCallString == "") || c.ShouldCallSpecificFunction())
 }

-func (c *Config) ShouldCallSpecificFunction() bool {
+func (c *BackendConfig) ShouldCallSpecificFunction() bool {
 	return len(c.functionCallNameString) > 0
 }

-func (c *Config) FunctionToCall() string {
+func (c *BackendConfig) FunctionToCall() string {
 	return c.functionCallNameString
 }

+func defaultPredictOptions(modelFile string) schema.PredictionOptions {
+	return schema.PredictionOptions{
+		TopP:        0.7,
+		TopK:        80,
+		Maxtokens:   512,
+		Temperature: 0.9,
+		Model:       modelFile,
+	}
+}
+
+func DefaultConfig(modelFile string) *BackendConfig {
+	return &BackendConfig{
+		PredictionOptions: defaultPredictOptions(modelFile),
+	}
+}
+
+////// Config Loader ////////
+
+type BackendConfigLoader struct {
+	configs map[string]BackendConfig
+	sync.Mutex
+}
+
 // Load a config file for a model
-func Load(modelName, modelPath string, cm *ConfigLoader, debug bool, threads, ctx int, f16 bool) (*Config, error) {
+func LoadBackendConfigFileByName(modelName, modelPath string, cl *BackendConfigLoader, debug bool, threads, ctx int, f16 bool) (*BackendConfig, error) {
 	// Load a config file if present after the model name
 	modelConfig := filepath.Join(modelPath, modelName+".yaml")

-	var cfg *Config
+	var cfg *BackendConfig

 	defaults := func() {
 		cfg = DefaultConfig(modelName)
@ -199,13 +218,13 @@ func Load(modelName, modelPath string, cm *ConfigLoader, debug bool, threads, ct
 		cfg.Debug = debug
 	}

-	cfgExisting, exists := cm.GetConfig(modelName)
+	cfgExisting, exists := cl.GetBackendConfig(modelName)
 	if !exists {
 		if _, err := os.Stat(modelConfig); err == nil {
-			if err := cm.LoadConfig(modelConfig); err != nil {
+			if err := cl.LoadBackendConfig(modelConfig); err != nil {
 				return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
 			}
-			cfgExisting, exists = cm.GetConfig(modelName)
+			cfgExisting, exists = cl.GetBackendConfig(modelName)
 			if exists {
 				cfg = &cfgExisting
 			} else {
@ -238,29 +257,13 @@ func Load(modelName, modelPath string, cm *ConfigLoader, debug bool, threads, ct
 	return cfg, nil
 }

-func defaultPredictOptions(modelFile string) PredictionOptions {
-	return PredictionOptions{
-		TopP:        0.7,
-		TopK:        80,
-		Maxtokens:   512,
-		Temperature: 0.9,
-		Model:       modelFile,
+func NewBackendConfigLoader() *BackendConfigLoader {
+	return &BackendConfigLoader{
+		configs: make(map[string]BackendConfig),
 	}
 }
-
-func DefaultConfig(modelFile string) *Config {
-	return &Config{
-		PredictionOptions: defaultPredictOptions(modelFile),
-	}
-}
-
-func NewConfigLoader() *ConfigLoader {
-	return &ConfigLoader{
-		configs: make(map[string]Config),
-	}
-}
-func ReadConfigFile(file string) ([]*Config, error) {
-	c := &[]*Config{}
+func ReadBackendConfigFile(file string) ([]*BackendConfig, error) {
+	c := &[]*BackendConfig{}
 	f, err := os.ReadFile(file)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read config file: %w", err)
@ -272,8 +275,8 @@ func ReadConfigFile(file string) ([]*Config, error) {
 	return *c, nil
 }

-func ReadConfig(file string) (*Config, error) {
-	c := &Config{}
+func ReadBackendConfig(file string) (*BackendConfig, error) {
+	c := &BackendConfig{}
 	f, err := os.ReadFile(file)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read config file: %w", err)
@ -285,10 +288,10 @@ func ReadConfig(file string) (*Config, error) {
 	return c, nil
 }

-func (cm *ConfigLoader) LoadConfigFile(file string) error {
+func (cm *BackendConfigLoader) LoadBackendConfigFile(file string) error {
 	cm.Lock()
 	defer cm.Unlock()
-	c, err := ReadConfigFile(file)
+	c, err := ReadBackendConfigFile(file)
 	if err != nil {
 		return fmt.Errorf("cannot load config file: %w", err)
 	}
@ -299,49 +302,49 @@ func (cm *ConfigLoader) LoadConfigFile(file string) error {
 	return nil
 }

-func (cm *ConfigLoader) LoadConfig(file string) error {
-	cm.Lock()
-	defer cm.Unlock()
-	c, err := ReadConfig(file)
+func (cl *BackendConfigLoader) LoadBackendConfig(file string) error {
+	cl.Lock()
+	defer cl.Unlock()
+	c, err := ReadBackendConfig(file)
 	if err != nil {
 		return fmt.Errorf("cannot read config file: %w", err)
 	}

-	cm.configs[c.Name] = *c
+	cl.configs[c.Name] = *c
 	return nil
 }

-func (cm *ConfigLoader) GetConfig(m string) (Config, bool) {
-	cm.Lock()
-	defer cm.Unlock()
-	v, exists := cm.configs[m]
+func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) {
+	cl.Lock()
+	defer cl.Unlock()
+	v, exists := cl.configs[m]
 	return v, exists
 }

-func (cm *ConfigLoader) GetAllConfigs() []Config {
-	cm.Lock()
-	defer cm.Unlock()
-	var res []Config
-	for _, v := range cm.configs {
+func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
+	cl.Lock()
+	defer cl.Unlock()
+	var res []BackendConfig
+	for _, v := range cl.configs {
 		res = append(res, v)
 	}
 	return res
 }

-func (cm *ConfigLoader) ListConfigs() []string {
-	cm.Lock()
-	defer cm.Unlock()
+func (cl *BackendConfigLoader) ListBackendConfigs() []string {
+	cl.Lock()
+	defer cl.Unlock()
 	var res []string
-	for k := range cm.configs {
+	for k := range cl.configs {
 		res = append(res, k)
 	}
 	return res
 }

 // Preload prepare models if they are not local but url or huggingface repositories
-func (cm *ConfigLoader) Preload(modelPath string) error {
-	cm.Lock()
-	defer cm.Unlock()
+func (cl *BackendConfigLoader) Preload(modelPath string) error {
+	cl.Lock()
+	defer cl.Unlock()

 	status := func(fileName, current, total string, percent float64) {
 		utils.DisplayDownloadFunction(fileName, current, total, percent)
@ -349,7 +352,7 @@ func (cm *ConfigLoader) Preload(modelPath string) error {

 	log.Info().Msgf("Preloading models from %s", modelPath)

-	for i, config := range cm.configs {
+	for i, config := range cl.configs {

 		// Download files and verify their SHA
 		for _, file := range config.DownloadFiles {
@ -381,25 +384,25 @@ func (cm *ConfigLoader) Preload(modelPath string) error {
 				}
 			}

-			cc := cm.configs[i]
+			cc := cl.configs[i]
 			c := &cc
 			c.PredictionOptions.Model = md5Name
-			cm.configs[i] = *c
+			cl.configs[i] = *c
 		}
-		if cm.configs[i].Name != "" {
-			log.Info().Msgf("Model name: %s", cm.configs[i].Name)
+		if cl.configs[i].Name != "" {
+			log.Info().Msgf("Model name: %s", cl.configs[i].Name)
 		}
-		if cm.configs[i].Description != "" {
-			log.Info().Msgf("Model description: %s", cm.configs[i].Description)
+		if cl.configs[i].Description != "" {
+			log.Info().Msgf("Model description: %s", cl.configs[i].Description)
 		}
-		if cm.configs[i].Usage != "" {
-			log.Info().Msgf("Model usage: \n%s", cm.configs[i].Usage)
+		if cl.configs[i].Usage != "" {
+			log.Info().Msgf("Model usage: \n%s", cl.configs[i].Usage)
 		}
 	}
 	return nil
 }

-func (cm *ConfigLoader) LoadConfigs(path string) error {
+func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string) error {
 	cm.Lock()
 	defer cm.Unlock()
 	entries, err := os.ReadDir(path)
@ -419,7 +422,7 @@ func (cm *ConfigLoader) LoadConfigs(path string) error {
 		if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") {
 			continue
 		}
-		c, err := ReadConfig(filepath.Join(path, file.Name()))
+		c, err := ReadBackendConfig(filepath.Join(path, file.Name()))
 		if err == nil {
 			cm.configs[c.Name] = *c
 		}
--- a/core/config/config_test.go
+++ b/core/config/config_test.go
@ -4,8 +4,7 @@ import (
 	"os"

 	. "github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/options"
-	"github.com/go-skynet/LocalAI/pkg/model"
+
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
@ -19,7 +18,7 @@ var _ = Describe("Test cases for config related functions", func() {
 	Context("Test Read configuration functions", func() {
 		configFile = os.Getenv("CONFIG_FILE")
 		It("Test ReadConfigFile", func() {
-			config, err := ReadConfigFile(configFile)
+			config, err := ReadBackendConfigFile(configFile)
 			Expect(err).To(BeNil())
 			Expect(config).ToNot(BeNil())
 			// two configs in config.yaml
@ -28,29 +27,26 @@ var _ = Describe("Test cases for config related functions", func() {
 		})

 		It("Test LoadConfigs", func() {
-			cm := NewConfigLoader()
-			opts := options.NewOptions()
-			modelLoader := model.NewModelLoader(os.Getenv("MODELS_PATH"))
-			options.WithModelLoader(modelLoader)(opts)
-
-			err := cm.LoadConfigs(opts.Loader.ModelPath)
+			cm := NewBackendConfigLoader()
+			opts := NewApplicationConfig()
+			err := cm.LoadBackendConfigsFromPath(opts.ModelPath)
 			Expect(err).To(BeNil())
-			Expect(cm.ListConfigs()).ToNot(BeNil())
+			Expect(cm.ListBackendConfigs()).ToNot(BeNil())

 			// config should includes gpt4all models's api.config
-			Expect(cm.ListConfigs()).To(ContainElements("gpt4all"))
+			Expect(cm.ListBackendConfigs()).To(ContainElements("gpt4all"))

 			// config should includes gpt2 models's api.config
-			Expect(cm.ListConfigs()).To(ContainElements("gpt4all-2"))
+			Expect(cm.ListBackendConfigs()).To(ContainElements("gpt4all-2"))

 			// config should includes text-embedding-ada-002 models's api.config
-			Expect(cm.ListConfigs()).To(ContainElements("text-embedding-ada-002"))
+			Expect(cm.ListBackendConfigs()).To(ContainElements("text-embedding-ada-002"))

 			// config should includes rwkv_test models's api.config
-			Expect(cm.ListConfigs()).To(ContainElements("rwkv_test"))
+			Expect(cm.ListBackendConfigs()).To(ContainElements("rwkv_test"))

 			// config should includes whisper-1 models's api.config
-			Expect(cm.ListConfigs()).To(ContainElements("whisper-1"))
+			Expect(cm.ListBackendConfigs()).To(ContainElements("whisper-1"))
 		})
 	})
 })
--- a/core/http/api.go
+++ b/core/http/api.go
@ -3,122 +3,29 @@ package http
 import (
 	"encoding/json"
 	"errors"
-	"fmt"
 	"os"
 	"strings"

-	"github.com/go-skynet/LocalAI/api/localai"
-	"github.com/go-skynet/LocalAI/api/openai"
-	config "github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/options"
+	"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
+	"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
+
+	"github.com/go-skynet/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/go-skynet/LocalAI/core/services"
 	"github.com/go-skynet/LocalAI/internal"
-	"github.com/go-skynet/LocalAI/metrics"
-	"github.com/go-skynet/LocalAI/pkg/assets"
 	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/startup"

 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/cors"
 	"github.com/gofiber/fiber/v2/middleware/logger"
 	"github.com/gofiber/fiber/v2/middleware/recover"
-	"github.com/rs/zerolog"
-	"github.com/rs/zerolog/log"
 )

-func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader, error) {
-	options := options.NewOptions(opts...)
-
-	zerolog.SetGlobalLevel(zerolog.InfoLevel)
-	if options.Debug {
-		zerolog.SetGlobalLevel(zerolog.DebugLevel)
-	}
-
-	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.Loader.ModelPath)
-	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
-
-	startup.PreloadModelsConfigurations(options.ModelLibraryURL, options.Loader.ModelPath, options.ModelsURL...)
-
-	cl := config.NewConfigLoader()
-	if err := cl.LoadConfigs(options.Loader.ModelPath); err != nil {
-		log.Error().Msgf("error loading config files: %s", err.Error())
-	}
-
-	if options.ConfigFile != "" {
-		if err := cl.LoadConfigFile(options.ConfigFile); err != nil {
-			log.Error().Msgf("error loading config file: %s", err.Error())
-		}
-	}
-
-	if err := cl.Preload(options.Loader.ModelPath); err != nil {
-		log.Error().Msgf("error downloading models: %s", err.Error())
-	}
-
-	if options.PreloadJSONModels != "" {
-		if err := localai.ApplyGalleryFromString(options.Loader.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil {
-			return nil, nil, err
-		}
-	}
-
-	if options.PreloadModelsFromPath != "" {
-		if err := localai.ApplyGalleryFromFile(options.Loader.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil {
-			return nil, nil, err
-		}
-	}
-
-	if options.Debug {
-		for _, v := range cl.ListConfigs() {
-			cfg, _ := cl.GetConfig(v)
-			log.Debug().Msgf("Model: %s (config: %+v)", v, cfg)
-		}
-	}
-
-	if options.AssetsDestination != "" {
-		// Extract files from the embedded FS
-		err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
-		log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
-		if err != nil {
-			log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
-		}
-	}
-
-	// turn off any process that was started by GRPC if the context is canceled
-	go func() {
-		<-options.Context.Done()
-		log.Debug().Msgf("Context canceled, shutting down")
-		options.Loader.StopAllGRPC()
-	}()
-
-	if options.WatchDog {
-		wd := model.NewWatchDog(
-			options.Loader,
-			options.WatchDogBusyTimeout,
-			options.WatchDogIdleTimeout,
-			options.WatchDogBusy,
-			options.WatchDogIdle)
-		options.Loader.SetWatchDog(wd)
-		go wd.Run()
-		go func() {
-			<-options.Context.Done()
-			log.Debug().Msgf("Context canceled, shutting down")
-			wd.Shutdown()
-		}()
-	}
-
-	return options, cl, nil
-}
-
-func App(opts ...options.AppOption) (*fiber.App, error) {
-
-	options, cl, err := Startup(opts...)
-	if err != nil {
-		return nil, fmt.Errorf("failed basic startup tasks with error %s", err.Error())
-	}
-
+func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
 	// Return errors as JSON responses
 	app := fiber.New(fiber.Config{
-		BodyLimit:             options.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
-		DisableStartupMessage: options.DisableMessage,
+		BodyLimit:             appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
+		DisableStartupMessage: appConfig.DisableMessage,
 		// Override default error handler
 		ErrorHandler: func(ctx *fiber.Ctx, err error) error {
 			// Status code defaults to 500
@ -139,7 +46,7 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
 		},
 	})

-	if options.Debug {
+	if appConfig.Debug {
 		app.Use(logger.New(logger.Config{
 			Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
 		}))
@ -147,17 +54,25 @@ func App(opts ...options.AppOption) (*fiber.App, error) {

 	// Default middleware config

-	if !options.Debug {
+	if !appConfig.Debug {
 		app.Use(recover.New())
 	}

-	if options.Metrics != nil {
-		app.Use(metrics.APIMiddleware(options.Metrics))
+	metricsService, err := services.NewLocalAIMetricsService()
+	if err != nil {
+		return nil, err
+	}
+
+	if metricsService != nil {
+		app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
+		app.Hooks().OnShutdown(func() error {
+			return metricsService.Shutdown()
+		})
 	}

 	// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
 	auth := func(c *fiber.Ctx) error {
-		if len(options.ApiKeys) == 0 {
+		if len(appConfig.ApiKeys) == 0 {
 			return c.Next()
 		}

@ -172,10 +87,10 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
 			}

 			// Add file keys to options.ApiKeys
-			options.ApiKeys = append(options.ApiKeys, fileKeys...)
+			appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...)
 		}

-		if len(options.ApiKeys) == 0 {
+		if len(appConfig.ApiKeys) == 0 {
 			return c.Next()
 		}

@ -189,7 +104,7 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
 		}

 		apiKey := authHeaderParts[1]
-		for _, key := range options.ApiKeys {
+		for _, key := range appConfig.ApiKeys {
 			if apiKey == key {
 				return c.Next()
 			}
@ -199,20 +114,20 @@ func App(opts ...options.AppOption) (*fiber.App, error) {

 	}

-	if options.CORS {
+	if appConfig.CORS {
 		var c func(ctx *fiber.Ctx) error
-		if options.CORSAllowOrigins == "" {
+		if appConfig.CORSAllowOrigins == "" {
 			c = cors.New()
 		} else {
-			c = cors.New(cors.Config{AllowOrigins: options.CORSAllowOrigins})
+			c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins})
 		}

 		app.Use(c)
 	}

 	// LocalAI API endpoints
-	galleryService := localai.NewGalleryService(options.Loader.ModelPath)
-	galleryService.Start(options.Context, cl)
+	galleryService := services.NewGalleryService(appConfig.ModelPath)
+	galleryService.Start(appConfig.Context, cl)

 	app.Get("/version", auth, func(c *fiber.Ctx) error {
 		return c.JSON(struct {
@ -220,69 +135,63 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
 		}{Version: internal.PrintableVersion()})
 	})

-	// Make sure directories exists
-	os.MkdirAll(options.ImageDir, 0755)
-	os.MkdirAll(options.AudioDir, 0755)
-	os.MkdirAll(options.UploadDir, 0755)
-	os.MkdirAll(options.Loader.ModelPath, 0755)
-
 	// Load upload json
-	openai.LoadUploadConfig(options.UploadDir)
+	openai.LoadUploadConfig(appConfig.UploadDir)

-	modelGalleryService := localai.CreateModelGalleryService(options.Galleries, options.Loader.ModelPath, galleryService)
-	app.Post("/models/apply", auth, modelGalleryService.ApplyModelGalleryEndpoint())
-	app.Get("/models/available", auth, modelGalleryService.ListModelFromGalleryEndpoint())
-	app.Get("/models/galleries", auth, modelGalleryService.ListModelGalleriesEndpoint())
-	app.Post("/models/galleries", auth, modelGalleryService.AddModelGalleryEndpoint())
-	app.Delete("/models/galleries", auth, modelGalleryService.RemoveModelGalleryEndpoint())
-	app.Get("/models/jobs/:uuid", auth, modelGalleryService.GetOpStatusEndpoint())
-	app.Get("/models/jobs", auth, modelGalleryService.GetAllStatusEndpoint())
+	modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
+	app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
+	app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
+	app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
+	app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
+	app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
+	app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
+	app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())

 	// openAI compatible API endpoint

 	// chat
-	app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, options))
-	app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, options))
+	app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
+	app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))

 	// edit
-	app.Post("/v1/edits", auth, openai.EditEndpoint(cl, options))
-	app.Post("/edits", auth, openai.EditEndpoint(cl, options))
+	app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
+	app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))

 	// files
-	app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, options))
-	app.Post("/files", auth, openai.UploadFilesEndpoint(cl, options))
-	app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, options))
-	app.Get("/files", auth, openai.ListFilesEndpoint(cl, options))
-	app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, options))
-	app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, options))
-	app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, options))
-	app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, options))
-	app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, options))
-	app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, options))
+	app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
+	app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
+	app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig))
+	app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig))
+	app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
+	app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
+	app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
+	app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
+	app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
+	app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))

 	// completion
-	app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, options))
-	app.Post("/completions", auth, openai.CompletionEndpoint(cl, options))
-	app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, options))
+	app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
+	app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
+	app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))

 	// embeddings
-	app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, options))
-	app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, options))
-	app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, options))
+	app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
+	app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
+	app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))

 	// audio
-	app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, options))
-	app.Post("/tts", auth, localai.TTSEndpoint(cl, options))
+	app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig))
+	app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))

 	// images
-	app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, options))
+	app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig))

-	if options.ImageDir != "" {
-		app.Static("/generated-images", options.ImageDir)
+	if appConfig.ImageDir != "" {
+		app.Static("/generated-images", appConfig.ImageDir)
 	}

-	if options.AudioDir != "" {
-		app.Static("/generated-audio", options.AudioDir)
+	if appConfig.AudioDir != "" {
+		app.Static("/generated-audio", appConfig.AudioDir)
 	}

 	ok := func(c *fiber.Ctx) error {
@ -294,15 +203,15 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
 	app.Get("/readyz", ok)

 	// Experimental Backend Statistics Module
-	backendMonitor := localai.NewBackendMonitor(cl, options) // Split out for now
+	backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now
 	app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitor))
 	app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitor))

 	// models
-	app.Get("/v1/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
-	app.Get("/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
+	app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
+	app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))

-	app.Get("/metrics", metrics.MetricsHandler())
+	app.Get("/metrics", localai.LocalAIMetricsEndpoint())

 	return app, nil
 }
--- a/core/http/api_test.go
+++ b/core/http/api_test.go
@ -13,9 +13,10 @@ import (
 	"path/filepath"
 	"runtime"

+	"github.com/go-skynet/LocalAI/core/config"
 	. "github.com/go-skynet/LocalAI/core/http"
-	"github.com/go-skynet/LocalAI/core/options"
-	"github.com/go-skynet/LocalAI/metrics"
+	"github.com/go-skynet/LocalAI/core/startup"
+
 	"github.com/go-skynet/LocalAI/pkg/downloader"
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/go-skynet/LocalAI/pkg/model"
@ -127,25 +128,33 @@ var backendAssets embed.FS
 var _ = Describe("API test", func() {

 	var app *fiber.App
-	var modelLoader *model.ModelLoader
 	var client *openai.Client
 	var client2 *openaigo.Client
 	var c context.Context
 	var cancel context.CancelFunc
 	var tmpdir string
+	var modelDir string
+	var bcl *config.BackendConfigLoader
+	var ml *model.ModelLoader
+	var applicationConfig *config.ApplicationConfig

-	commonOpts := []options.AppOption{
-		options.WithDebug(true),
-		options.WithDisableMessage(true),
+	commonOpts := []config.AppOption{
+		config.WithDebug(true),
+		config.WithDisableMessage(true),
 	}

 	Context("API with ephemeral models", func() {
-		BeforeEach(func() {
+
+		BeforeEach(func(sc SpecContext) {
 			var err error
 			tmpdir, err = os.MkdirTemp("", "")
 			Expect(err).ToNot(HaveOccurred())

-			modelLoader = model.NewModelLoader(tmpdir)
+			modelDir = filepath.Join(tmpdir, "models")
+			backendAssetsDir := filepath.Join(tmpdir, "backend-assets")
+			err = os.Mkdir(backendAssetsDir, 0755)
+			Expect(err).ToNot(HaveOccurred())
+
 			c, cancel = context.WithCancel(context.Background())

 			g := []gallery.GalleryModel{
@ -172,16 +181,18 @@ var _ = Describe("API test", func() {
 				},
 			}

-			metricsService, err := metrics.SetupMetrics()
+			bcl, ml, applicationConfig, err = startup.Startup(
+				append(commonOpts,
+					config.WithContext(c),
+					config.WithGalleries(galleries),
+					config.WithModelPath(modelDir),
+					config.WithBackendAssets(backendAssets),
+					config.WithBackendAssetsOutput(backendAssetsDir))...)
 			Expect(err).ToNot(HaveOccurred())

-			app, err = App(
-				append(commonOpts,
-					options.WithMetrics(metricsService),
-					options.WithContext(c),
-					options.WithGalleries(galleries),
-					options.WithModelLoader(modelLoader), options.WithBackendAssets(backendAssets), options.WithBackendAssetsOutput(tmpdir))...)
+			app, err = App(bcl, ml, applicationConfig)
 			Expect(err).ToNot(HaveOccurred())
+
 			go app.Listen("127.0.0.1:9090")

 			defaultConfig := openai.DefaultConfig("")
@ -198,15 +209,21 @@ var _ = Describe("API test", func() {
 			}, "2m").ShouldNot(HaveOccurred())
 		})

-		AfterEach(func() {
+		AfterEach(func(sc SpecContext) {
 			cancel()
-			app.Shutdown()
-			os.RemoveAll(tmpdir)
+			if app != nil {
+				err := app.Shutdown()
+				Expect(err).ToNot(HaveOccurred())
+			}
+			err := os.RemoveAll(tmpdir)
+			Expect(err).ToNot(HaveOccurred())
+			_, err = os.ReadDir(tmpdir)
+			Expect(err).To(HaveOccurred())
 		})

 		Context("Applying models", func() {
-			It("applies models from a gallery", func() {

+			It("applies models from a gallery", func() {
 				models := getModels("http://127.0.0.1:9090/models/available")
 				Expect(len(models)).To(Equal(2), fmt.Sprint(models))
 				Expect(models[0].Installed).To(BeFalse(), fmt.Sprint(models))
@ -228,10 +245,10 @@ var _ = Describe("API test", func() {
 				}, "360s", "10s").Should(Equal(true))
 				Expect(resp["message"]).ToNot(ContainSubstring("error"))

-				dat, err := os.ReadFile(filepath.Join(tmpdir, "bert2.yaml"))
+				dat, err := os.ReadFile(filepath.Join(modelDir, "bert2.yaml"))
 				Expect(err).ToNot(HaveOccurred())

-				_, err = os.ReadFile(filepath.Join(tmpdir, "foo.yaml"))
+				_, err = os.ReadFile(filepath.Join(modelDir, "foo.yaml"))
 				Expect(err).ToNot(HaveOccurred())

 				content := map[string]interface{}{}
@ -253,6 +270,7 @@ var _ = Describe("API test", func() {
 				}
 			})
 			It("overrides models", func() {
+
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
 					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
 					Name: "bert",
@ -270,7 +288,7 @@ var _ = Describe("API test", func() {
 					return response["processed"].(bool)
 				}, "360s", "10s").Should(Equal(true))

-				dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml"))
+				dat, err := os.ReadFile(filepath.Join(modelDir, "bert.yaml"))
 				Expect(err).ToNot(HaveOccurred())

 				content := map[string]interface{}{}
@ -294,7 +312,7 @@ var _ = Describe("API test", func() {
 					return response["processed"].(bool)
 				}, "360s", "10s").Should(Equal(true))

-				dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml"))
+				dat, err := os.ReadFile(filepath.Join(modelDir, "bert.yaml"))
 				Expect(err).ToNot(HaveOccurred())

 				content := map[string]interface{}{}
@ -483,8 +501,11 @@ var _ = Describe("API test", func() {
 			var err error
 			tmpdir, err = os.MkdirTemp("", "")
 			Expect(err).ToNot(HaveOccurred())
+			modelDir = filepath.Join(tmpdir, "models")
+			backendAssetsDir := filepath.Join(tmpdir, "backend-assets")
+			err = os.Mkdir(backendAssetsDir, 0755)
+			Expect(err).ToNot(HaveOccurred())

-			modelLoader = model.NewModelLoader(tmpdir)
 			c, cancel = context.WithCancel(context.Background())

 			galleries := []gallery.Gallery{
@ -494,21 +515,20 @@ var _ = Describe("API test", func() {
 				},
 			}

-			metricsService, err := metrics.SetupMetrics()
-			Expect(err).ToNot(HaveOccurred())
-
-			app, err = App(
+			bcl, ml, applicationConfig, err = startup.Startup(
 				append(commonOpts,
-					options.WithContext(c),
-					options.WithMetrics(metricsService),
-					options.WithAudioDir(tmpdir),
-					options.WithImageDir(tmpdir),
-					options.WithGalleries(galleries),
-					options.WithModelLoader(modelLoader),
-					options.WithBackendAssets(backendAssets),
-					options.WithBackendAssetsOutput(tmpdir))...,
+					config.WithContext(c),
+					config.WithAudioDir(tmpdir),
+					config.WithImageDir(tmpdir),
+					config.WithGalleries(galleries),
+					config.WithModelPath(modelDir),
+					config.WithBackendAssets(backendAssets),
+					config.WithBackendAssetsOutput(tmpdir))...,
 			)
 			Expect(err).ToNot(HaveOccurred())
+			app, err = App(bcl, ml, applicationConfig)
+			Expect(err).ToNot(HaveOccurred())
+
 			go app.Listen("127.0.0.1:9090")

 			defaultConfig := openai.DefaultConfig("")
@ -527,8 +547,14 @@ var _ = Describe("API test", func() {

 		AfterEach(func() {
 			cancel()
-			app.Shutdown()
-			os.RemoveAll(tmpdir)
+			if app != nil {
+				err := app.Shutdown()
+				Expect(err).ToNot(HaveOccurred())
+			}
+			err := os.RemoveAll(tmpdir)
+			Expect(err).ToNot(HaveOccurred())
+			_, err = os.ReadDir(tmpdir)
+			Expect(err).To(HaveOccurred())
 		})
 		It("installs and is capable to run tts", Label("tts"), func() {
 			if runtime.GOOS != "linux" {
@ -599,20 +625,20 @@ var _ = Describe("API test", func() {

 	Context("API query", func() {
 		BeforeEach(func() {
-			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
+			modelPath := os.Getenv("MODELS_PATH")
 			c, cancel = context.WithCancel(context.Background())

-			metricsService, err := metrics.SetupMetrics()
-			Expect(err).ToNot(HaveOccurred())
+			var err error

-			app, err = App(
+			bcl, ml, applicationConfig, err = startup.Startup(
 				append(commonOpts,
-					options.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
-					options.WithContext(c),
-					options.WithModelLoader(modelLoader),
-					options.WithMetrics(metricsService),
+					config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
+					config.WithContext(c),
+					config.WithModelPath(modelPath),
 				)...)
 			Expect(err).ToNot(HaveOccurred())
+			app, err = App(bcl, ml, applicationConfig)
+			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")

 			defaultConfig := openai.DefaultConfig("")
@ -630,7 +656,10 @@ var _ = Describe("API test", func() {
 		})
 		AfterEach(func() {
 			cancel()
-			app.Shutdown()
+			if app != nil {
+				err := app.Shutdown()
+				Expect(err).ToNot(HaveOccurred())
+			}
 		})
 		It("returns the models list", func() {
 			models, err := client.ListModels(context.TODO())
@ -811,20 +840,20 @@ var _ = Describe("API test", func() {

 	Context("Config file", func() {
 		BeforeEach(func() {
-			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
+			modelPath := os.Getenv("MODELS_PATH")
 			c, cancel = context.WithCancel(context.Background())

-			metricsService, err := metrics.SetupMetrics()
-			Expect(err).ToNot(HaveOccurred())
-
-			app, err = App(
+			var err error
+			bcl, ml, applicationConfig, err = startup.Startup(
 				append(commonOpts,
-					options.WithContext(c),
-					options.WithMetrics(metricsService),
-					options.WithModelLoader(modelLoader),
-					options.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
+					config.WithContext(c),
+					config.WithModelPath(modelPath),
+					config.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
 			)
 			Expect(err).ToNot(HaveOccurred())
+			app, err = App(bcl, ml, applicationConfig)
+			Expect(err).ToNot(HaveOccurred())
+
 			go app.Listen("127.0.0.1:9090")

 			defaultConfig := openai.DefaultConfig("")
@ -840,7 +869,10 @@ var _ = Describe("API test", func() {
 		})
 		AfterEach(func() {
 			cancel()
-			app.Shutdown()
+			if app != nil {
+				err := app.Shutdown()
+				Expect(err).ToNot(HaveOccurred())
+			}
 		})
 		It("can generate chat completions from config file (list1)", func() {
 			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}})
--- a/core/http/ctx/fiber.go
+++ b/core/http/ctx/fiber.go
@ -0,0 +1,43 @@
+package fiberContext
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+)
+
+// ModelFromContext returns the model from the context
+// If no model is specified, it will take the first available
+// Takes a model string as input which should be the one received from the user request.
+// It returns the model name resolved from the context and an error if any.
+func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
+	if ctx.Params("model") != "" {
+		modelInput = ctx.Params("model")
+	}
+
+	// Set model from bearer token, if available
+	bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ")
+	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
+
+	// If no model was specified, take the first available
+	if modelInput == "" && !bearerExists && firstModel {
+		models, _ := loader.ListModels()
+		if len(models) > 0 {
+			modelInput = models[0]
+			log.Debug().Msgf("No model specified, using: %s", modelInput)
+		} else {
+			log.Debug().Msgf("No model specified, returning error")
+			return "", fmt.Errorf("no model specified")
+		}
+	}
+
+	// If a model is found in bearer token takes precedence
+	if bearerExists {
+		log.Debug().Msgf("Using model from bearer token: %s", bearer)
+		modelInput = bearer
+	}
+	return modelInput, nil
+}
--- a/core/http/endpoints/localai/backend_monitor.go
+++ b/core/http/endpoints/localai/backend_monitor.go
@ -0,0 +1,36 @@
+package localai
+
+import (
+	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/gofiber/fiber/v2"
+)
+
+func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+
+		input := new(schema.BackendMonitorRequest)
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		resp, err := bm.CheckAndSample(input.Model)
+		if err != nil {
+			return err
+		}
+		return c.JSON(resp)
+	}
+}
+
+func BackendShutdownEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		input := new(schema.BackendMonitorRequest)
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		return bm.ShutdownModel(input.Model)
+	}
+}
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@ -0,0 +1,146 @@
+package localai
+
+import (
+	"encoding/json"
+	"fmt"
+	"slices"
+
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/go-skynet/LocalAI/pkg/gallery"
+	"github.com/gofiber/fiber/v2"
+	"github.com/google/uuid"
+	"github.com/rs/zerolog/log"
+)
+
+type ModelGalleryEndpointService struct {
+	galleries      []gallery.Gallery
+	modelPath      string
+	galleryApplier *services.GalleryService
+}
+
+type GalleryModel struct {
+	ID string `json:"id"`
+	gallery.GalleryModel
+}
+
+func CreateModelGalleryEndpointService(galleries []gallery.Gallery, modelPath string, galleryApplier *services.GalleryService) ModelGalleryEndpointService {
+	return ModelGalleryEndpointService{
+		galleries:      galleries,
+		modelPath:      modelPath,
+		galleryApplier: galleryApplier,
+	}
+}
+
+func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		status := mgs.galleryApplier.GetStatus(c.Params("uuid"))
+		if status == nil {
+			return fmt.Errorf("could not find any status for ID")
+		}
+		return c.JSON(status)
+	}
+}
+
+func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		return c.JSON(mgs.galleryApplier.GetAllStatus())
+	}
+}
+
+func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		input := new(GalleryModel)
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		uuid, err := uuid.NewUUID()
+		if err != nil {
+			return err
+		}
+		mgs.galleryApplier.C <- gallery.GalleryOp{
+			Req:         input.GalleryModel,
+			Id:          uuid.String(),
+			GalleryName: input.ID,
+			Galleries:   mgs.galleries,
+		}
+		return c.JSON(struct {
+			ID        string `json:"uuid"`
+			StatusURL string `json:"status"`
+		}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
+	}
+}
+
+func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
+
+		models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
+		if err != nil {
+			return err
+		}
+		log.Debug().Msgf("Models found from galleries: %+v", models)
+		for _, m := range models {
+			log.Debug().Msgf("Model found from galleries: %+v", m)
+		}
+		dat, err := json.Marshal(models)
+		if err != nil {
+			return err
+		}
+		return c.Send(dat)
+	}
+}
+
+// NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
+func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		log.Debug().Msgf("Listing model galleries %+v", mgs.galleries)
+		dat, err := json.Marshal(mgs.galleries)
+		if err != nil {
+			return err
+		}
+		return c.Send(dat)
+	}
+}
+
+func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		input := new(gallery.Gallery)
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+		if slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
+			return gallery.Name == input.Name
+		}) {
+			return fmt.Errorf("%s already exists", input.Name)
+		}
+		dat, err := json.Marshal(mgs.galleries)
+		if err != nil {
+			return err
+		}
+		log.Debug().Msgf("Adding %+v to gallery list", *input)
+		mgs.galleries = append(mgs.galleries, *input)
+		return c.Send(dat)
+	}
+}
+
+func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		input := new(gallery.Gallery)
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+		if !slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
+			return gallery.Name == input.Name
+		}) {
+			return fmt.Errorf("%s is not currently registered", input.Name)
+		}
+		mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
+			return gallery.Name == input.Name
+		})
+		return c.Send(nil)
+	}
+}
--- a/core/http/endpoints/localai/metrics.go
+++ b/core/http/endpoints/localai/metrics.go
@ -0,0 +1,43 @@
+package localai
+
+import (
+	"time"
+
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/gofiber/fiber/v2"
+	"github.com/gofiber/fiber/v2/middleware/adaptor"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
+)
+
+func LocalAIMetricsEndpoint() fiber.Handler {
+
+	return adaptor.HTTPHandler(promhttp.Handler())
+}
+
+type apiMiddlewareConfig struct {
+	Filter         func(c *fiber.Ctx) bool
+	metricsService *services.LocalAIMetricsService
+}
+
+func LocalAIMetricsAPIMiddleware(metrics *services.LocalAIMetricsService) fiber.Handler {
+	cfg := apiMiddlewareConfig{
+		metricsService: metrics,
+		Filter: func(c *fiber.Ctx) bool {
+			return c.Path() == "/metrics"
+		},
+	}
+
+	return func(c *fiber.Ctx) error {
+		if cfg.Filter != nil && cfg.Filter(c) {
+			return c.Next()
+		}
+		path := c.Path()
+		method := c.Method()
+
+		start := time.Now()
+		err := c.Next()
+		elapsed := float64(time.Since(start)) / float64(time.Second)
+		cfg.metricsService.ObserveAPICall(method, path, elapsed)
+		return err
+	}
+}
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@ -0,0 +1,48 @@
+package localai
+
+import (
+	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/go-skynet/LocalAI/core/config"
+	fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+	"github.com/go-skynet/LocalAI/pkg/model"
+
+	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+)
+
+func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+
+		input := new(schema.TTSRequest)
+
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false)
+		if err != nil {
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		}
+		cfg, err := config.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, cl, false, 0, 0, false)
+		if err != nil {
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		} else {
+			modelFile = cfg.Model
+		}
+		log.Debug().Msgf("Request for model: %s", modelFile)
+
+		if input.Backend != "" {
+			cfg.Backend = input.Backend
+		}
+
+		filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, ml, appConfig, *cfg)
+		if err != nil {
+			return err
+		}
+		return c.Download(filePath)
+	}
+}
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@ -0,0 +1,609 @@
+package openai
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/go-skynet/LocalAI/pkg/grammar"
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/gofiber/fiber/v2"
+	"github.com/google/uuid"
+	"github.com/rs/zerolog/log"
+	"github.com/valyala/fasthttp"
+)
+
+func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	emptyMessage := ""
+	id := uuid.New().String()
+	created := int(time.Now().Unix())
+
+	process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
+		initialMessage := schema.OpenAIResponse{
+			ID:      id,
+			Created: created,
+			Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
+			Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
+			Object:  "chat.completion.chunk",
+		}
+		responses <- initialMessage
+
+		ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
+			resp := schema.OpenAIResponse{
+				ID:      id,
+				Created: created,
+				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
+				Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
+				Object:  "chat.completion.chunk",
+				Usage: schema.OpenAIUsage{
+					PromptTokens:     usage.Prompt,
+					CompletionTokens: usage.Completion,
+					TotalTokens:      usage.Prompt + usage.Completion,
+				},
+			}
+
+			responses <- resp
+			return true
+		})
+		close(responses)
+	}
+	processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
+		result := ""
+		_, tokenUsage, _ := ComputeChoices(req, prompt, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
+			result += s
+			// TODO: Change generated BNF grammar to be compliant with the schema so we can
+			// stream the result token by token here.
+			return true
+		})
+
+		results := parseFunctionCall(result, config.FunctionsConfig.ParallelCalls)
+		noActionToRun := len(results) > 0 && results[0].name == noAction
+
+		switch {
+		case noActionToRun:
+			initialMessage := schema.OpenAIResponse{
+				ID:      id,
+				Created: created,
+				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
+				Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
+				Object:  "chat.completion.chunk",
+			}
+			responses <- initialMessage
+
+			result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt)
+			if err != nil {
+				log.Error().Msgf("error handling question: %s", err.Error())
+				return
+			}
+
+			resp := schema.OpenAIResponse{
+				ID:      id,
+				Created: created,
+				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
+				Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}},
+				Object:  "chat.completion.chunk",
+				Usage: schema.OpenAIUsage{
+					PromptTokens:     tokenUsage.Prompt,
+					CompletionTokens: tokenUsage.Completion,
+					TotalTokens:      tokenUsage.Prompt + tokenUsage.Completion,
+				},
+			}
+
+			responses <- resp
+
+		default:
+			for i, ss := range results {
+				name, args := ss.name, ss.arguments
+
+				initialMessage := schema.OpenAIResponse{
+					ID:      id,
+					Created: created,
+					Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
+					Choices: []schema.Choice{{
+						Delta: &schema.Message{
+							Role: "assistant",
+							ToolCalls: []schema.ToolCall{
+								{
+									Index: i,
+									ID:    id,
+									Type:  "function",
+									FunctionCall: schema.FunctionCall{
+										Name: name,
+									},
+								},
+							},
+						}}},
+					Object: "chat.completion.chunk",
+				}
+				responses <- initialMessage
+
+				responses <- schema.OpenAIResponse{
+					ID:      id,
+					Created: created,
+					Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
+					Choices: []schema.Choice{{
+						Delta: &schema.Message{
+							Role: "assistant",
+							ToolCalls: []schema.ToolCall{
+								{
+									Index: i,
+									ID:    id,
+									Type:  "function",
+									FunctionCall: schema.FunctionCall{
+										Arguments: args,
+									},
+								},
+							},
+						}}},
+					Object: "chat.completion.chunk",
+				}
+			}
+		}
+
+		close(responses)
+	}
+
+	return func(c *fiber.Ctx) error {
+		processFunctions := false
+		funcs := grammar.Functions{}
+		modelFile, input, err := readRequest(c, ml, startupOptions, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, startupOptions.Debug, startupOptions.Threads, startupOptions.ContextSize, startupOptions.F16)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+		log.Debug().Msgf("Configuration read: %+v", config)
+
+		// Allow the user to set custom actions via config file
+		// to be "embedded" in each model
+		noActionName := "answer"
+		noActionDescription := "use this action to answer without performing any action"
+
+		if config.FunctionsConfig.NoActionFunctionName != "" {
+			noActionName = config.FunctionsConfig.NoActionFunctionName
+		}
+		if config.FunctionsConfig.NoActionDescriptionName != "" {
+			noActionDescription = config.FunctionsConfig.NoActionDescriptionName
+		}
+
+		if input.ResponseFormat.Type == "json_object" {
+			input.Grammar = grammar.JSONBNF
+		}
+
+		// process functions if we have any defined or if we have a function call string
+		if len(input.Functions) > 0 && config.ShouldUseFunctions() {
+			log.Debug().Msgf("Response needs to process functions")
+
+			processFunctions = true
+
+			noActionGrammar := grammar.Function{
+				Name:        noActionName,
+				Description: noActionDescription,
+				Parameters: map[string]interface{}{
+					"properties": map[string]interface{}{
+						"message": map[string]interface{}{
+							"type":        "string",
+							"description": "The message to reply the user with",
+						}},
+				},
+			}
+
+			// Append the no action function
+			funcs = append(funcs, input.Functions...)
+			if !config.FunctionsConfig.DisableNoAction {
+				funcs = append(funcs, noActionGrammar)
+			}
+
+			// Force picking one of the functions by the request
+			if config.FunctionToCall() != "" {
+				funcs = funcs.Select(config.FunctionToCall())
+			}
+
+			// Update input grammar
+			jsStruct := funcs.ToJSONStructure()
+			config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls)
+		} else if input.JSONFunctionGrammarObject != nil {
+			config.Grammar = input.JSONFunctionGrammarObject.Grammar("", config.FunctionsConfig.ParallelCalls)
+		}
+
+		// functions are not supported in stream mode (yet?)
+		toStream := input.Stream
+
+		log.Debug().Msgf("Parameters: %+v", config)
+
+		var predInput string
+
+		suppressConfigSystemPrompt := false
+		mess := []string{}
+		for messageIndex, i := range input.Messages {
+			var content string
+			role := i.Role
+
+			// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
+			// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
+			if i.FunctionCall != nil && i.Role == "assistant" {
+				roleFn := "assistant_function_call"
+				r := config.Roles[roleFn]
+				if r != "" {
+					role = roleFn
+				}
+			}
+			r := config.Roles[role]
+			contentExists := i.Content != nil && i.StringContent != ""
+
+			// First attempt to populate content via a chat message specific template
+			if config.TemplateConfig.ChatMessage != "" {
+				chatMessageData := model.ChatMessageTemplateData{
+					SystemPrompt: config.SystemPrompt,
+					Role:         r,
+					RoleName:     role,
+					Content:      i.StringContent,
+					FunctionName: i.Name,
+					MessageIndex: messageIndex,
+				}
+				templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
+				if err != nil {
+					log.Error().Msgf("error processing message %+v using template \"%s\": %v. Skipping!", chatMessageData, config.TemplateConfig.ChatMessage, err)
+				} else {
+					if templatedChatMessage == "" {
+						log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
+						continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
+					}
+					log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
+					content = templatedChatMessage
+				}
+			}
+			// If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
+			if content == "" {
+				if r != "" {
+					if contentExists {
+						content = fmt.Sprint(r, i.StringContent)
+					}
+					if i.FunctionCall != nil {
+						j, err := json.Marshal(i.FunctionCall)
+						if err == nil {
+							if contentExists {
+								content += "\n" + fmt.Sprint(r, " ", string(j))
+							} else {
+								content = fmt.Sprint(r, " ", string(j))
+							}
+						}
+					}
+				} else {
+					if contentExists {
+						content = fmt.Sprint(i.StringContent)
+					}
+					if i.FunctionCall != nil {
+						j, err := json.Marshal(i.FunctionCall)
+						if err == nil {
+							if contentExists {
+								content += "\n" + string(j)
+							} else {
+								content = string(j)
+							}
+						}
+					}
+				}
+				// Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
+				if contentExists && role == "system" {
+					suppressConfigSystemPrompt = true
+				}
+			}
+
+			mess = append(mess, content)
+		}
+
+		predInput = strings.Join(mess, "\n")
+		log.Debug().Msgf("Prompt (before templating): %s", predInput)
+
+		if toStream {
+			log.Debug().Msgf("Stream request received")
+			c.Context().SetContentType("text/event-stream")
+			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
+			//	c.Set("Content-Type", "text/event-stream")
+			c.Set("Cache-Control", "no-cache")
+			c.Set("Connection", "keep-alive")
+			c.Set("Transfer-Encoding", "chunked")
+		}
+
+		templateFile := ""
+
+		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+		if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
+			templateFile = config.Model
+		}
+
+		if config.TemplateConfig.Chat != "" && !processFunctions {
+			templateFile = config.TemplateConfig.Chat
+		}
+
+		if config.TemplateConfig.Functions != "" && processFunctions {
+			templateFile = config.TemplateConfig.Functions
+		}
+
+		if templateFile != "" {
+			templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
+				SystemPrompt:         config.SystemPrompt,
+				SuppressSystemPrompt: suppressConfigSystemPrompt,
+				Input:                predInput,
+				Functions:            funcs,
+			})
+			if err == nil {
+				predInput = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", predInput)
+			} else {
+				log.Debug().Msgf("Template failed loading: %s", err.Error())
+			}
+		}
+
+		log.Debug().Msgf("Prompt (after templating): %s", predInput)
+		if processFunctions {
+			log.Debug().Msgf("Grammar: %+v", config.Grammar)
+		}
+
+		switch {
+		case toStream:
+			responses := make(chan schema.OpenAIResponse)
+
+			if !processFunctions {
+				go process(predInput, input, config, ml, responses)
+			} else {
+				go processTools(noActionName, predInput, input, config, ml, responses)
+			}
+
+			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
+				usage := &schema.OpenAIUsage{}
+				toolsCalled := false
+				for ev := range responses {
+					usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it
+					if len(ev.Choices[0].Delta.ToolCalls) > 0 {
+						toolsCalled = true
+					}
+					var buf bytes.Buffer
+					enc := json.NewEncoder(&buf)
+					enc.Encode(ev)
+					log.Debug().Msgf("Sending chunk: %s", buf.String())
+					_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
+					if err != nil {
+						log.Debug().Msgf("Sending chunk failed: %v", err)
+						input.Cancel()
+						break
+					}
+					w.Flush()
+				}
+
+				finishReason := "stop"
+				if toolsCalled {
+					finishReason = "tool_calls"
+				} else if toolsCalled && len(input.Tools) == 0 {
+					finishReason = "function_call"
+				}
+
+				resp := &schema.OpenAIResponse{
+					ID:      id,
+					Created: created,
+					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+					Choices: []schema.Choice{
+						{
+							FinishReason: finishReason,
+							Index:        0,
+							Delta:        &schema.Message{Content: &emptyMessage},
+						}},
+					Object: "chat.completion.chunk",
+					Usage:  *usage,
+				}
+				respData, _ := json.Marshal(resp)
+
+				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
+				w.WriteString("data: [DONE]\n\n")
+				w.Flush()
+			}))
+			return nil
+
+		// no streaming mode
+		default:
+			result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
+				if !processFunctions {
+					// no function is called, just reply and use stop as finish reason
+					*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
+					return
+				}
+
+				results := parseFunctionCall(s, config.FunctionsConfig.ParallelCalls)
+				noActionsToRun := len(results) > 0 && results[0].name == noActionName
+
+				switch {
+				case noActionsToRun:
+					result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput)
+					if err != nil {
+						log.Error().Msgf("error handling question: %s", err.Error())
+						return
+					}
+					*c = append(*c, schema.Choice{
+						Message: &schema.Message{Role: "assistant", Content: &result}})
+				default:
+					toolChoice := schema.Choice{
+						Message: &schema.Message{
+							Role: "assistant",
+						},
+					}
+
+					if len(input.Tools) > 0 {
+						toolChoice.FinishReason = "tool_calls"
+					}
+
+					for _, ss := range results {
+						name, args := ss.name, ss.arguments
+						if len(input.Tools) > 0 {
+							// If we are using tools, we condense the function calls into
+							// a single response choice with all the tools
+							toolChoice.Message.ToolCalls = append(toolChoice.Message.ToolCalls,
+								schema.ToolCall{
+									ID:   id,
+									Type: "function",
+									FunctionCall: schema.FunctionCall{
+										Name:      name,
+										Arguments: args,
+									},
+								},
+							)
+						} else {
+							// otherwise we return more choices directly
+							*c = append(*c, schema.Choice{
+								FinishReason: "function_call",
+								Message: &schema.Message{
+									Role: "assistant",
+									FunctionCall: map[string]interface{}{
+										"name":      name,
+										"arguments": args,
+									},
+								},
+							})
+						}
+					}
+
+					if len(input.Tools) > 0 {
+						// we need to append our result if we are using tools
+						*c = append(*c, toolChoice)
+					}
+				}
+
+			}, nil)
+			if err != nil {
+				return err
+			}
+
+			resp := &schema.OpenAIResponse{
+				ID:      id,
+				Created: created,
+				Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+				Choices: result,
+				Object:  "chat.completion",
+				Usage: schema.OpenAIUsage{
+					PromptTokens:     tokenUsage.Prompt,
+					CompletionTokens: tokenUsage.Completion,
+					TotalTokens:      tokenUsage.Prompt + tokenUsage.Completion,
+				},
+			}
+			respData, _ := json.Marshal(resp)
+			log.Debug().Msgf("Response: %s", respData)
+
+			// Return the prediction in the response body
+			return c.JSON(resp)
+		}
+
+	}
+}
+
+func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, args, prompt string) (string, error) {
+	log.Debug().Msgf("nothing to do, computing a reply")
+
+	// If there is a message that the LLM already sends as part of the JSON reply, use it
+	arguments := map[string]interface{}{}
+	json.Unmarshal([]byte(args), &arguments)
+	m, exists := arguments["message"]
+	if exists {
+		switch message := m.(type) {
+		case string:
+			if message != "" {
+				log.Debug().Msgf("Reply received from LLM: %s", message)
+				message = backend.Finetune(*config, prompt, message)
+				log.Debug().Msgf("Reply received from LLM(finetuned): %s", message)
+
+				return message, nil
+			}
+		}
+	}
+
+	log.Debug().Msgf("No action received from LLM, without a message, computing a reply")
+	// Otherwise ask the LLM to understand the JSON output and the context, and return a message
+	// Note: This costs (in term of CPU/GPU) another computation
+	config.Grammar = ""
+	images := []string{}
+	for _, m := range input.Messages {
+		images = append(images, m.StringImages...)
+	}
+
+	predFunc, err := backend.ModelInference(input.Context, prompt, images, ml, *config, o, nil)
+	if err != nil {
+		log.Error().Msgf("inference error: %s", err.Error())
+		return "", err
+	}
+
+	prediction, err := predFunc()
+	if err != nil {
+		log.Error().Msgf("inference error: %s", err.Error())
+		return "", err
+	}
+	return backend.Finetune(*config, prompt, prediction.Response), nil
+}
+
+type funcCallResults struct {
+	name      string
+	arguments string
+}
+
+func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults {
+	results := []funcCallResults{}
+
+	// TODO: use generics to avoid this code duplication
+	if multipleResults {
+		ss := []map[string]interface{}{}
+		s := utils.EscapeNewLines(llmresult)
+		json.Unmarshal([]byte(s), &ss)
+		log.Debug().Msgf("Function return: %s %+v", s, ss)
+
+		for _, s := range ss {
+			func_name, ok := s["function"]
+			if !ok {
+				continue
+			}
+			args, ok := s["arguments"]
+			if !ok {
+				continue
+			}
+			d, _ := json.Marshal(args)
+			funcName, ok := func_name.(string)
+			if !ok {
+				continue
+			}
+			results = append(results, funcCallResults{name: funcName, arguments: string(d)})
+		}
+	} else {
+		// As we have to change the result before processing, we can't stream the answer token-by-token (yet?)
+		ss := map[string]interface{}{}
+		// This prevent newlines to break JSON parsing for clients
+		s := utils.EscapeNewLines(llmresult)
+		json.Unmarshal([]byte(s), &ss)
+		log.Debug().Msgf("Function return: %s %+v", s, ss)
+
+		// The grammar defines the function name as "function", while OpenAI returns "name"
+		func_name, ok := ss["function"]
+		if !ok {
+			return results
+		}
+		// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
+		args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
+		if !ok {
+			return results
+		}
+		d, _ := json.Marshal(args)
+		funcName, ok := func_name.(string)
+		if !ok {
+			return results
+		}
+		results = append(results, funcCallResults{name: funcName, arguments: string(d)})
+	}
+
+	return results
+}
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@ -0,0 +1,199 @@
+package openai
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"time"
+
+	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/go-skynet/LocalAI/core/config"
+
+	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/go-skynet/LocalAI/pkg/grammar"
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	"github.com/google/uuid"
+	"github.com/rs/zerolog/log"
+	"github.com/valyala/fasthttp"
+)
+
+// https://platform.openai.com/docs/api-reference/completions
+func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	id := uuid.New().String()
+	created := int(time.Now().Unix())
+
+	process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
+		ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
+			resp := schema.OpenAIResponse{
+				ID:      id,
+				Created: created,
+				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
+				Choices: []schema.Choice{
+					{
+						Index: 0,
+						Text:  s,
+					},
+				},
+				Object: "text_completion",
+				Usage: schema.OpenAIUsage{
+					PromptTokens:     usage.Prompt,
+					CompletionTokens: usage.Completion,
+					TotalTokens:      usage.Prompt + usage.Completion,
+				},
+			}
+			log.Debug().Msgf("Sending goroutine: %s", s)
+
+			responses <- resp
+			return true
+		})
+		close(responses)
+	}
+
+	return func(c *fiber.Ctx) error {
+		modelFile, input, err := readRequest(c, ml, appConfig, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		log.Debug().Msgf("`input`: %+v", input)
+
+		config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		if input.ResponseFormat.Type == "json_object" {
+			input.Grammar = grammar.JSONBNF
+		}
+
+		log.Debug().Msgf("Parameter Config: %+v", config)
+
+		if input.Stream {
+			log.Debug().Msgf("Stream request received")
+			c.Context().SetContentType("text/event-stream")
+			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
+			//c.Set("Content-Type", "text/event-stream")
+			c.Set("Cache-Control", "no-cache")
+			c.Set("Connection", "keep-alive")
+			c.Set("Transfer-Encoding", "chunked")
+		}
+
+		templateFile := ""
+
+		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+		if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
+			templateFile = config.Model
+		}
+
+		if config.TemplateConfig.Completion != "" {
+			templateFile = config.TemplateConfig.Completion
+		}
+
+		if input.Stream {
+			if len(config.PromptStrings) > 1 {
+				return errors.New("cannot handle more than 1 `PromptStrings` when Streaming")
+			}
+
+			predInput := config.PromptStrings[0]
+
+			if templateFile != "" {
+				templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
+					Input: predInput,
+				})
+				if err == nil {
+					predInput = templatedInput
+					log.Debug().Msgf("Template found, input modified to: %s", predInput)
+				}
+			}
+
+			responses := make(chan schema.OpenAIResponse)
+
+			go process(predInput, input, config, ml, responses)
+
+			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
+
+				for ev := range responses {
+					var buf bytes.Buffer
+					enc := json.NewEncoder(&buf)
+					enc.Encode(ev)
+
+					log.Debug().Msgf("Sending chunk: %s", buf.String())
+					fmt.Fprintf(w, "data: %v\n", buf.String())
+					w.Flush()
+				}
+
+				resp := &schema.OpenAIResponse{
+					ID:      id,
+					Created: created,
+					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+					Choices: []schema.Choice{
+						{
+							Index:        0,
+							FinishReason: "stop",
+						},
+					},
+					Object: "text_completion",
+				}
+				respData, _ := json.Marshal(resp)
+
+				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
+				w.WriteString("data: [DONE]\n\n")
+				w.Flush()
+			}))
+			return nil
+		}
+
+		var result []schema.Choice
+
+		totalTokenUsage := backend.TokenUsage{}
+
+		for k, i := range config.PromptStrings {
+			if templateFile != "" {
+				// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+				templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
+					SystemPrompt: config.SystemPrompt,
+					Input:        i,
+				})
+				if err == nil {
+					i = templatedInput
+					log.Debug().Msgf("Template found, input modified to: %s", i)
+				}
+			}
+
+			r, tokenUsage, err := ComputeChoices(
+				input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {
+					*c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
+				}, nil)
+			if err != nil {
+				return err
+			}
+
+			totalTokenUsage.Prompt += tokenUsage.Prompt
+			totalTokenUsage.Completion += tokenUsage.Completion
+
+			result = append(result, r...)
+		}
+
+		resp := &schema.OpenAIResponse{
+			ID:      id,
+			Created: created,
+			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+			Choices: result,
+			Object:  "text_completion",
+			Usage: schema.OpenAIUsage{
+				PromptTokens:     totalTokenUsage.Prompt,
+				CompletionTokens: totalTokenUsage.Completion,
+				TotalTokens:      totalTokenUsage.Prompt + totalTokenUsage.Completion,
+			},
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@ -0,0 +1,94 @@
+package openai
+
+import (
+	"encoding/json"
+	"fmt"
+	"time"
+
+	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/go-skynet/LocalAI/core/config"
+
+	"github.com/go-skynet/LocalAI/core/schema"
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	"github.com/google/uuid"
+
+	"github.com/rs/zerolog/log"
+)
+
+func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		modelFile, input, err := readRequest(c, ml, appConfig, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		log.Debug().Msgf("Parameter Config: %+v", config)
+
+		templateFile := ""
+
+		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+		if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
+			templateFile = config.Model
+		}
+
+		if config.TemplateConfig.Edit != "" {
+			templateFile = config.TemplateConfig.Edit
+		}
+
+		var result []schema.Choice
+		totalTokenUsage := backend.TokenUsage{}
+
+		for _, i := range config.InputStrings {
+			if templateFile != "" {
+				templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
+					Input:        i,
+					Instruction:  input.Instruction,
+					SystemPrompt: config.SystemPrompt,
+				})
+				if err == nil {
+					i = templatedInput
+					log.Debug().Msgf("Template found, input modified to: %s", i)
+				}
+			}
+
+			r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {
+				*c = append(*c, schema.Choice{Text: s})
+			}, nil)
+			if err != nil {
+				return err
+			}
+
+			totalTokenUsage.Prompt += tokenUsage.Prompt
+			totalTokenUsage.Completion += tokenUsage.Completion
+
+			result = append(result, r...)
+		}
+
+		id := uuid.New().String()
+		created := int(time.Now().Unix())
+		resp := &schema.OpenAIResponse{
+			ID:      id,
+			Created: created,
+			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+			Choices: result,
+			Object:  "edit",
+			Usage: schema.OpenAIUsage{
+				PromptTokens:     totalTokenUsage.Prompt,
+				CompletionTokens: totalTokenUsage.Completion,
+				TotalTokens:      totalTokenUsage.Prompt + totalTokenUsage.Completion,
+			},
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
--- a/core/http/endpoints/openai/embeddings.go
+++ b/core/http/endpoints/openai/embeddings.go
@ -0,0 +1,79 @@
+package openai
+
+import (
+	"encoding/json"
+	"fmt"
+	"time"
+
+	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/pkg/model"
+
+	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/google/uuid"
+
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+)
+
+// https://platform.openai.com/docs/api-reference/embeddings
+func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		model, input, err := readRequest(c, ml, appConfig, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		config, input, err := mergeRequestWithConfig(model, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		log.Debug().Msgf("Parameter Config: %+v", config)
+		items := []schema.Item{}
+
+		for i, s := range config.InputToken {
+			// get the model function to call for the result
+			embedFn, err := backend.ModelEmbedding("", s, ml, *config, appConfig)
+			if err != nil {
+				return err
+			}
+
+			embeddings, err := embedFn()
+			if err != nil {
+				return err
+			}
+			items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
+		}
+
+		for i, s := range config.InputStrings {
+			// get the model function to call for the result
+			embedFn, err := backend.ModelEmbedding(s, []int{}, ml, *config, appConfig)
+			if err != nil {
+				return err
+			}
+
+			embeddings, err := embedFn()
+			if err != nil {
+				return err
+			}
+			items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
+		}
+
+		id := uuid.New().String()
+		created := int(time.Now().Unix())
+		resp := &schema.OpenAIResponse{
+			ID:      id,
+			Created: created,
+			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+			Data:    items,
+			Object:  "list",
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
--- a/core/http/endpoints/openai/files.go
+++ b/core/http/endpoints/openai/files.go
@ -0,0 +1,218 @@
+package openai
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"time"
+
+	"github.com/go-skynet/LocalAI/core/config"
+
+	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+)
+
+var uploadedFiles []File
+
+const uploadedFilesFile = "uploadedFiles.json"
+
+// File represents the structure of a file object from the OpenAI API.
+type File struct {
+	ID        string    `json:"id"`         // Unique identifier for the file
+	Object    string    `json:"object"`     // Type of the object (e.g., "file")
+	Bytes     int       `json:"bytes"`      // Size of the file in bytes
+	CreatedAt time.Time `json:"created_at"` // The time at which the file was created
+	Filename  string    `json:"filename"`   // The name of the file
+	Purpose   string    `json:"purpose"`    // The purpose of the file (e.g., "fine-tune", "classifications", etc.)
+}
+
+func saveUploadConfig(uploadDir string) {
+	file, err := json.MarshalIndent(uploadedFiles, "", " ")
+	if err != nil {
+		log.Error().Msgf("Failed to JSON marshal the uploadedFiles: %s", err)
+	}
+
+	err = os.WriteFile(filepath.Join(uploadDir, uploadedFilesFile), file, 0644)
+	if err != nil {
+		log.Error().Msgf("Failed to save uploadedFiles to file: %s", err)
+	}
+}
+
+func LoadUploadConfig(uploadPath string) {
+	uploadFilePath := filepath.Join(uploadPath, uploadedFilesFile)
+
+	_, err := os.Stat(uploadFilePath)
+	if os.IsNotExist(err) {
+		log.Debug().Msgf("No uploadedFiles file found at %s", uploadFilePath)
+		return
+	}
+
+	file, err := os.ReadFile(uploadFilePath)
+	if err != nil {
+		log.Error().Msgf("Failed to read file: %s", err)
+	} else {
+		err = json.Unmarshal(file, &uploadedFiles)
+		if err != nil {
+			log.Error().Msgf("Failed to JSON unmarshal the file into uploadedFiles: %s", err)
+		}
+	}
+}
+
+// UploadFilesEndpoint https://platform.openai.com/docs/api-reference/files/create
+func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		file, err := c.FormFile("file")
+		if err != nil {
+			return err
+		}
+
+		// Check the file size
+		if file.Size > int64(appConfig.UploadLimitMB*1024*1024) {
+			return c.Status(fiber.StatusBadRequest).SendString(fmt.Sprintf("File size %d exceeds upload limit %d", file.Size, appConfig.UploadLimitMB))
+		}
+
+		purpose := c.FormValue("purpose", "") //TODO put in purpose dirs
+		if purpose == "" {
+			return c.Status(fiber.StatusBadRequest).SendString("Purpose is not defined")
+		}
+
+		// Sanitize the filename to prevent directory traversal
+		filename := utils.SanitizeFileName(file.Filename)
+
+		savePath := filepath.Join(appConfig.UploadDir, filename)
+
+		// Check if file already exists
+		if _, err := os.Stat(savePath); !os.IsNotExist(err) {
+			return c.Status(fiber.StatusBadRequest).SendString("File already exists")
+		}
+
+		err = c.SaveFile(file, savePath)
+		if err != nil {
+			return c.Status(fiber.StatusInternalServerError).SendString("Failed to save file: " + err.Error())
+		}
+
+		f := File{
+			ID:        fmt.Sprintf("file-%d", time.Now().Unix()),
+			Object:    "file",
+			Bytes:     int(file.Size),
+			CreatedAt: time.Now(),
+			Filename:  file.Filename,
+			Purpose:   purpose,
+		}
+
+		uploadedFiles = append(uploadedFiles, f)
+		saveUploadConfig(appConfig.UploadDir)
+		return c.Status(fiber.StatusOK).JSON(f)
+	}
+}
+
+// ListFilesEndpoint https://platform.openai.com/docs/api-reference/files/list
+func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	type ListFiles struct {
+		Data   []File
+		Object string
+	}
+
+	return func(c *fiber.Ctx) error {
+		var listFiles ListFiles
+
+		purpose := c.Query("purpose")
+		if purpose == "" {
+			listFiles.Data = uploadedFiles
+		} else {
+			for _, f := range uploadedFiles {
+				if purpose == f.Purpose {
+					listFiles.Data = append(listFiles.Data, f)
+				}
+			}
+		}
+		listFiles.Object = "list"
+		return c.Status(fiber.StatusOK).JSON(listFiles)
+	}
+}
+
+func getFileFromRequest(c *fiber.Ctx) (*File, error) {
+	id := c.Params("file_id")
+	if id == "" {
+		return nil, fmt.Errorf("file_id parameter is required")
+	}
+
+	for _, f := range uploadedFiles {
+		if id == f.ID {
+			return &f, nil
+		}
+	}
+
+	return nil, fmt.Errorf("unable to find file id %s", id)
+}
+
+// GetFilesEndpoint https://platform.openai.com/docs/api-reference/files/retrieve
+func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		file, err := getFileFromRequest(c)
+		if err != nil {
+			return c.Status(fiber.StatusInternalServerError).SendString(err.Error())
+		}
+
+		return c.JSON(file)
+	}
+}
+
+// DeleteFilesEndpoint https://platform.openai.com/docs/api-reference/files/delete
+func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	type DeleteStatus struct {
+		Id      string
+		Object  string
+		Deleted bool
+	}
+
+	return func(c *fiber.Ctx) error {
+		file, err := getFileFromRequest(c)
+		if err != nil {
+			return c.Status(fiber.StatusInternalServerError).SendString(err.Error())
+		}
+
+		err = os.Remove(filepath.Join(appConfig.UploadDir, file.Filename))
+		if err != nil {
+			// If the file doesn't exist then we should just continue to remove it
+			if !errors.Is(err, os.ErrNotExist) {
+				return c.Status(fiber.StatusInternalServerError).SendString(fmt.Sprintf("Unable to delete file: %s, %v", file.Filename, err))
+			}
+		}
+
+		// Remove upload from list
+		for i, f := range uploadedFiles {
+			if f.ID == file.ID {
+				uploadedFiles = append(uploadedFiles[:i], uploadedFiles[i+1:]...)
+				break
+			}
+		}
+
+		saveUploadConfig(appConfig.UploadDir)
+		return c.JSON(DeleteStatus{
+			Id:      file.ID,
+			Object:  "file",
+			Deleted: true,
+		})
+	}
+}
+
+// GetFilesContentsEndpoint https://platform.openai.com/docs/api-reference/files/retrieve-contents
+func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		file, err := getFileFromRequest(c)
+		if err != nil {
+			return c.Status(fiber.StatusInternalServerError).SendString(err.Error())
+		}
+
+		fileContents, err := os.ReadFile(filepath.Join(appConfig.UploadDir, file.Filename))
+		if err != nil {
+			return c.Status(fiber.StatusInternalServerError).SendString(err.Error())
+		}
+
+		return c.Send(fileContents)
+	}
+}
--- a/core/http/endpoints/openai/files_test.go
+++ b/core/http/endpoints/openai/files_test.go
@ -0,0 +1,287 @@
+package openai
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"mime/multipart"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/go-skynet/LocalAI/core/config"
+
+	utils2 "github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/gofiber/fiber/v2"
+	"github.com/stretchr/testify/assert"
+
+	"testing"
+)
+
+type ListFiles struct {
+	Data   []File
+	Object string
+}
+
+func startUpApp() (app *fiber.App, option *config.ApplicationConfig, loader *config.BackendConfigLoader) {
+	// Preparing the mocked objects
+	loader = &config.BackendConfigLoader{}
+
+	option = &config.ApplicationConfig{
+		UploadLimitMB: 10,
+		UploadDir:     "test_dir",
+	}
+
+	_ = os.RemoveAll(option.UploadDir)
+
+	app = fiber.New(fiber.Config{
+		BodyLimit: 20 * 1024 * 1024, // sets the limit to 20MB.
+	})
+
+	// Create a Test Server
+	app.Post("/files", UploadFilesEndpoint(loader, option))
+	app.Get("/files", ListFilesEndpoint(loader, option))
+	app.Get("/files/:file_id", GetFilesEndpoint(loader, option))
+	app.Delete("/files/:file_id", DeleteFilesEndpoint(loader, option))
+	app.Get("/files/:file_id/content", GetFilesContentsEndpoint(loader, option))
+
+	return
+}
+
+func TestUploadFileExceedSizeLimit(t *testing.T) {
+	// Preparing the mocked objects
+	loader := &config.BackendConfigLoader{}
+
+	option := &config.ApplicationConfig{
+		UploadLimitMB: 10,
+		UploadDir:     "test_dir",
+	}
+
+	_ = os.RemoveAll(option.UploadDir)
+
+	app := fiber.New(fiber.Config{
+		BodyLimit: 20 * 1024 * 1024, // sets the limit to 20MB.
+	})
+
+	// Create a Test Server
+	app.Post("/files", UploadFilesEndpoint(loader, option))
+	app.Get("/files", ListFilesEndpoint(loader, option))
+	app.Get("/files/:file_id", GetFilesEndpoint(loader, option))
+	app.Delete("/files/:file_id", DeleteFilesEndpoint(loader, option))
+	app.Get("/files/:file_id/content", GetFilesContentsEndpoint(loader, option))
+
+	t.Run("UploadFilesEndpoint file size exceeds limit", func(t *testing.T) {
+		resp, err := CallFilesUploadEndpoint(t, app, "foo.txt", "file", "fine-tune", 11, option)
+		assert.NoError(t, err)
+
+		assert.Equal(t, fiber.StatusBadRequest, resp.StatusCode)
+		assert.Contains(t, bodyToString(resp, t), "exceeds upload limit")
+	})
+	t.Run("UploadFilesEndpoint purpose not defined", func(t *testing.T) {
+		resp, _ := CallFilesUploadEndpoint(t, app, "foo.txt", "file", "", 5, option)
+
+		assert.Equal(t, fiber.StatusBadRequest, resp.StatusCode)
+		assert.Contains(t, bodyToString(resp, t), "Purpose is not defined")
+	})
+	t.Run("UploadFilesEndpoint file already exists", func(t *testing.T) {
+		f1 := CallFilesUploadEndpointWithCleanup(t, app, "foo.txt", "file", "fine-tune", 5, option)
+
+		resp, err := CallFilesUploadEndpoint(t, app, "foo.txt", "file", "fine-tune", 5, option)
+		fmt.Println(f1)
+		fmt.Printf("ERror: %v", err)
+
+		assert.Equal(t, fiber.StatusBadRequest, resp.StatusCode)
+		assert.Contains(t, bodyToString(resp, t), "File already exists")
+	})
+	t.Run("UploadFilesEndpoint file uploaded successfully", func(t *testing.T) {
+		file := CallFilesUploadEndpointWithCleanup(t, app, "test.txt", "file", "fine-tune", 5, option)
+
+		// Check if file exists in the disk
+		filePath := filepath.Join(option.UploadDir, utils2.SanitizeFileName("test.txt"))
+		_, err := os.Stat(filePath)
+
+		assert.False(t, os.IsNotExist(err))
+		assert.Equal(t, file.Bytes, 5242880)
+		assert.NotEmpty(t, file.CreatedAt)
+		assert.Equal(t, file.Filename, "test.txt")
+		assert.Equal(t, file.Purpose, "fine-tune")
+	})
+	t.Run("ListFilesEndpoint without purpose parameter", func(t *testing.T) {
+		resp, err := CallListFilesEndpoint(t, app, "")
+		assert.NoError(t, err)
+
+		assert.Equal(t, 200, resp.StatusCode)
+
+		listFiles := responseToListFile(t, resp)
+		if len(listFiles.Data) != len(uploadedFiles) {
+			t.Errorf("Expected %v files, got %v files", len(uploadedFiles), len(listFiles.Data))
+		}
+	})
+	t.Run("ListFilesEndpoint with valid purpose parameter", func(t *testing.T) {
+		_ = CallFilesUploadEndpointWithCleanup(t, app, "test.txt", "file", "fine-tune", 5, option)
+
+		resp, err := CallListFilesEndpoint(t, app, "fine-tune")
+		assert.NoError(t, err)
+
+		listFiles := responseToListFile(t, resp)
+		if len(listFiles.Data) != 1 {
+			t.Errorf("Expected 1 file, got %v files", len(listFiles.Data))
+		}
+	})
+	t.Run("ListFilesEndpoint with invalid query parameter", func(t *testing.T) {
+		resp, err := CallListFilesEndpoint(t, app, "not-so-fine-tune")
+		assert.NoError(t, err)
+		assert.Equal(t, 200, resp.StatusCode)
+
+		listFiles := responseToListFile(t, resp)
+
+		if len(listFiles.Data) != 0 {
+			t.Errorf("Expected 0 file, got %v files", len(listFiles.Data))
+		}
+	})
+	t.Run("GetFilesContentsEndpoint get file content", func(t *testing.T) {
+		req := httptest.NewRequest("GET", "/files", nil)
+		resp, _ := app.Test(req)
+		assert.Equal(t, 200, resp.StatusCode)
+
+		var listFiles ListFiles
+		if err := json.Unmarshal(bodyToByteArray(resp, t), &listFiles); err != nil {
+			t.Errorf("Failed to decode response: %v", err)
+			return
+		}
+
+		if len(listFiles.Data) != 0 {
+			t.Errorf("Expected 0 file, got %v files", len(listFiles.Data))
+		}
+	})
+}
+
+func CallListFilesEndpoint(t *testing.T, app *fiber.App, purpose string) (*http.Response, error) {
+	var target string
+	if purpose != "" {
+		target = fmt.Sprintf("/files?purpose=%s", purpose)
+	} else {
+		target = "/files"
+	}
+	req := httptest.NewRequest("GET", target, nil)
+	return app.Test(req)
+}
+
+func CallFilesContentEndpoint(t *testing.T, app *fiber.App, fileId string) (*http.Response, error) {
+	request := httptest.NewRequest("GET", "/files?file_id="+fileId, nil)
+	return app.Test(request)
+}
+
+func CallFilesUploadEndpoint(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) (*http.Response, error) {
+	// Create a file that exceeds the limit
+	file := createTestFile(t, fileName, fileSize, appConfig)
+
+	// Creating a new HTTP Request
+	body, writer := newMultipartFile(file.Name(), tag, purpose)
+
+	req := httptest.NewRequest(http.MethodPost, "/files", body)
+	req.Header.Set(fiber.HeaderContentType, writer.FormDataContentType())
+	return app.Test(req)
+}
+
+func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) File {
+	// Create a file that exceeds the limit
+	file := createTestFile(t, fileName, fileSize, appConfig)
+
+	// Creating a new HTTP Request
+	body, writer := newMultipartFile(file.Name(), tag, purpose)
+
+	req := httptest.NewRequest(http.MethodPost, "/files", body)
+	req.Header.Set(fiber.HeaderContentType, writer.FormDataContentType())
+	resp, err := app.Test(req)
+	assert.NoError(t, err)
+	f := responseToFile(t, resp)
+
+	id := f.ID
+	t.Cleanup(func() {
+		_, err := CallFilesDeleteEndpoint(t, app, id)
+		assert.NoError(t, err)
+	})
+
+	return f
+
+}
+
+func CallFilesDeleteEndpoint(t *testing.T, app *fiber.App, fileId string) (*http.Response, error) {
+	target := fmt.Sprintf("/files/%s", fileId)
+	req := httptest.NewRequest(http.MethodDelete, target, nil)
+	return app.Test(req)
+}
+
+// Helper to create multi-part file
+func newMultipartFile(filePath, tag, purpose string) (*strings.Reader, *multipart.Writer) {
+	body := new(strings.Builder)
+	writer := multipart.NewWriter(body)
+	file, _ := os.Open(filePath)
+	defer file.Close()
+	part, _ := writer.CreateFormFile(tag, filepath.Base(filePath))
+	io.Copy(part, file)
+
+	if purpose != "" {
+		_ = writer.WriteField("purpose", purpose)
+	}
+
+	writer.Close()
+	return strings.NewReader(body.String()), writer
+}
+
+// Helper to create test files
+func createTestFile(t *testing.T, name string, sizeMB int, option *config.ApplicationConfig) *os.File {
+	err := os.MkdirAll(option.UploadDir, 0755)
+	if err != nil {
+
+		t.Fatalf("Error MKDIR: %v", err)
+	}
+
+	file, _ := os.Create(name)
+	file.WriteString(strings.Repeat("a", sizeMB*1024*1024)) // sizeMB MB File
+
+	t.Cleanup(func() {
+		os.Remove(name)
+		os.RemoveAll(option.UploadDir)
+	})
+	return file
+}
+
+func bodyToString(resp *http.Response, t *testing.T) string {
+	return string(bodyToByteArray(resp, t))
+}
+
+func bodyToByteArray(resp *http.Response, t *testing.T) []byte {
+	bodyBytes, err := io.ReadAll(resp.Body)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return bodyBytes
+}
+
+func responseToFile(t *testing.T, resp *http.Response) File {
+	var file File
+	responseToString := bodyToString(resp, t)
+
+	err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&file)
+	if err != nil {
+		t.Errorf("Failed to decode response: %s", err)
+	}
+
+	return file
+}
+
+func responseToListFile(t *testing.T, resp *http.Response) ListFiles {
+	var listFiles ListFiles
+	responseToString := bodyToString(resp, t)
+
+	err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&listFiles)
+	if err != nil {
+		fmt.Printf("Failed to decode response: %s", err)
+	}
+
+	return listFiles
+}
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@ -0,0 +1,239 @@
+package openai
+
+import (
+	"bufio"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/google/uuid"
+
+	"github.com/go-skynet/LocalAI/core/backend"
+
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+)
+
+func downloadFile(url string) (string, error) {
+	// Get the data
+	resp, err := http.Get(url)
+	if err != nil {
+		return "", err
+	}
+	defer resp.Body.Close()
+
+	// Create the file
+	out, err := os.CreateTemp("", "image")
+	if err != nil {
+		return "", err
+	}
+	defer out.Close()
+
+	// Write the body to file
+	_, err = io.Copy(out, resp.Body)
+	return out.Name(), err
+}
+
+// https://platform.openai.com/docs/api-reference/images/create
+
+/*
+*
+
+	curl http://localhost:8080/v1/images/generations \
+	  -H "Content-Type: application/json" \
+	  -d '{
+	    "prompt": "A cute baby sea otter",
+	    "n": 1,
+	    "size": "512x512"
+	  }'
+
+*
+*/
+func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		m, input, err := readRequest(c, ml, appConfig, false)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		if m == "" {
+			m = model.StableDiffusionBackend
+		}
+		log.Debug().Msgf("Loading model: %+v", m)
+
+		config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, 0, 0, false)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		src := ""
+		if input.File != "" {
+
+			fileData := []byte{}
+			// check if input.File is an URL, if so download it and save it
+			// to a temporary file
+			if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") {
+				out, err := downloadFile(input.File)
+				if err != nil {
+					return fmt.Errorf("failed downloading file:%w", err)
+				}
+				defer os.RemoveAll(out)
+
+				fileData, err = os.ReadFile(out)
+				if err != nil {
+					return fmt.Errorf("failed reading file:%w", err)
+				}
+
+			} else {
+				// base 64 decode the file and write it somewhere
+				// that we will cleanup
+				fileData, err = base64.StdEncoding.DecodeString(input.File)
+				if err != nil {
+					return err
+				}
+			}
+
+			// Create a temporary file
+			outputFile, err := os.CreateTemp(appConfig.ImageDir, "b64")
+			if err != nil {
+				return err
+			}
+			// write the base64 result
+			writer := bufio.NewWriter(outputFile)
+			_, err = writer.Write(fileData)
+			if err != nil {
+				outputFile.Close()
+				return err
+			}
+			outputFile.Close()
+			src = outputFile.Name()
+			defer os.RemoveAll(src)
+		}
+
+		log.Debug().Msgf("Parameter Config: %+v", config)
+
+		switch config.Backend {
+		case "stablediffusion":
+			config.Backend = model.StableDiffusionBackend
+		case "tinydream":
+			config.Backend = model.TinyDreamBackend
+		case "":
+			config.Backend = model.StableDiffusionBackend
+		}
+
+		sizeParts := strings.Split(input.Size, "x")
+		if len(sizeParts) != 2 {
+			return fmt.Errorf("invalid value for 'size'")
+		}
+		width, err := strconv.Atoi(sizeParts[0])
+		if err != nil {
+			return fmt.Errorf("invalid value for 'size'")
+		}
+		height, err := strconv.Atoi(sizeParts[1])
+		if err != nil {
+			return fmt.Errorf("invalid value for 'size'")
+		}
+
+		b64JSON := false
+		if input.ResponseFormat.Type == "b64_json" {
+			b64JSON = true
+		}
+		// src and clip_skip
+		var result []schema.Item
+		for _, i := range config.PromptStrings {
+			n := input.N
+			if input.N == 0 {
+				n = 1
+			}
+			for j := 0; j < n; j++ {
+				prompts := strings.Split(i, "|")
+				positive_prompt := prompts[0]
+				negative_prompt := ""
+				if len(prompts) > 1 {
+					negative_prompt = prompts[1]
+				}
+
+				mode := 0
+				step := config.Step
+				if step == 0 {
+					step = 15
+				}
+
+				if input.Mode != 0 {
+					mode = input.Mode
+				}
+
+				if input.Step != 0 {
+					step = input.Step
+				}
+
+				tempDir := ""
+				if !b64JSON {
+					tempDir = appConfig.ImageDir
+				}
+				// Create a temporary file
+				outputFile, err := os.CreateTemp(tempDir, "b64")
+				if err != nil {
+					return err
+				}
+				outputFile.Close()
+				output := outputFile.Name() + ".png"
+				// Rename the temporary file
+				err = os.Rename(outputFile.Name(), output)
+				if err != nil {
+					return err
+				}
+
+				baseURL := c.BaseURL()
+
+				fn, err := backend.ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, src, output, ml, *config, appConfig)
+				if err != nil {
+					return err
+				}
+				if err := fn(); err != nil {
+					return err
+				}
+
+				item := &schema.Item{}
+
+				if b64JSON {
+					defer os.RemoveAll(output)
+					data, err := os.ReadFile(output)
+					if err != nil {
+						return err
+					}
+					item.B64JSON = base64.StdEncoding.EncodeToString(data)
+				} else {
+					base := filepath.Base(output)
+					item.URL = baseURL + "/generated-images/" + base
+				}
+
+				result = append(result, *item)
+			}
+		}
+
+		id := uuid.New().String()
+		created := int(time.Now().Unix())
+		resp := &schema.OpenAIResponse{
+			ID:      id,
+			Created: created,
+			Data:    result,
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
--- a/core/http/endpoints/openai/inference.go
+++ b/core/http/endpoints/openai/inference.go
@ -0,0 +1,55 @@
+package openai
+
+import (
+	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/go-skynet/LocalAI/core/config"
+
+	"github.com/go-skynet/LocalAI/core/schema"
+	model "github.com/go-skynet/LocalAI/pkg/model"
+)
+
+func ComputeChoices(
+	req *schema.OpenAIRequest,
+	predInput string,
+	config *config.BackendConfig,
+	o *config.ApplicationConfig,
+	loader *model.ModelLoader,
+	cb func(string, *[]schema.Choice),
+	tokenCallback func(string, backend.TokenUsage) bool) ([]schema.Choice, backend.TokenUsage, error) {
+	n := req.N // number of completions to return
+	result := []schema.Choice{}
+
+	if n == 0 {
+		n = 1
+	}
+
+	images := []string{}
+	for _, m := range req.Messages {
+		images = append(images, m.StringImages...)
+	}
+
+	// get the model function to call for the result
+	predFunc, err := backend.ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback)
+	if err != nil {
+		return result, backend.TokenUsage{}, err
+	}
+
+	tokenUsage := backend.TokenUsage{}
+
+	for i := 0; i < n; i++ {
+		prediction, err := predFunc()
+		if err != nil {
+			return result, backend.TokenUsage{}, err
+		}
+
+		tokenUsage.Prompt += prediction.Usage.Prompt
+		tokenUsage.Completion += prediction.Usage.Completion
+
+		finetunedResponse := backend.Finetune(*config, predInput, prediction.Response)
+		cb(finetunedResponse, &result)
+
+		//result = append(result, Choice{Text: prediction})
+
+	}
+	return result, tokenUsage, err
+}
--- a/core/http/endpoints/openai/list.go
+++ b/core/http/endpoints/openai/list.go
@ -0,0 +1,69 @@
+package openai
+
+import (
+	"regexp"
+
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/core/schema"
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+)
+
+func ListModelsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		models, err := ml.ListModels()
+		if err != nil {
+			return err
+		}
+		var mm map[string]interface{} = map[string]interface{}{}
+
+		dataModels := []schema.OpenAIModel{}
+
+		var filterFn func(name string) bool
+		filter := c.Query("filter")
+
+		// If filter is not specified, do not filter the list by model name
+		if filter == "" {
+			filterFn = func(_ string) bool { return true }
+		} else {
+			// If filter _IS_ specified, we compile it to a regex which is used to create the filterFn
+			rxp, err := regexp.Compile(filter)
+			if err != nil {
+				return err
+			}
+			filterFn = func(name string) bool {
+				return rxp.MatchString(name)
+			}
+		}
+
+		// By default, exclude any loose files that are already referenced by a configuration file.
+		excludeConfigured := c.QueryBool("excludeConfigured", true)
+
+		// Start with the known configurations
+		for _, c := range cl.GetAllBackendConfigs() {
+			if excludeConfigured {
+				mm[c.Model] = nil
+			}
+
+			if filterFn(c.Name) {
+				dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"})
+			}
+		}
+
+		// Then iterate through the loose files:
+		for _, m := range models {
+			// And only adds them if they shouldn't be skipped.
+			if _, exists := mm[m]; !exists && filterFn(m) {
+				dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
+			}
+		}
+
+		return c.JSON(struct {
+			Object string               `json:"object"`
+			Data   []schema.OpenAIModel `json:"data"`
+		}{
+			Object: "list",
+			Data:   dataModels,
+		})
+	}
+}
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@ -0,0 +1,281 @@
+package openai
+
+import (
+	"context"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+
+	"github.com/go-skynet/LocalAI/core/config"
+	fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/go-skynet/LocalAI/pkg/grammar"
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+)
+
+func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
+	input := new(schema.OpenAIRequest)
+
+	// Get input data from the request body
+	if err := c.BodyParser(input); err != nil {
+		return "", nil, fmt.Errorf("failed parsing request body: %w", err)
+	}
+
+	received, _ := json.Marshal(input)
+
+	ctx, cancel := context.WithCancel(o.Context)
+	input.Context = ctx
+	input.Cancel = cancel
+
+	log.Debug().Msgf("Request received: %s", string(received))
+
+	modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, firstModel)
+
+	return modelFile, input, err
+}
+
+// this function check if the string is an URL, if it's an URL downloads the image in memory
+// encodes it in base64 and returns the base64 string
+func getBase64Image(s string) (string, error) {
+	if strings.HasPrefix(s, "http") {
+		// download the image
+		resp, err := http.Get(s)
+		if err != nil {
+			return "", err
+		}
+		defer resp.Body.Close()
+
+		// read the image data into memory
+		data, err := io.ReadAll(resp.Body)
+		if err != nil {
+			return "", err
+		}
+
+		// encode the image data in base64
+		encoded := base64.StdEncoding.EncodeToString(data)
+
+		// return the base64 string
+		return encoded, nil
+	}
+
+	// if the string instead is prefixed with "data:image/jpeg;base64,", drop it
+	if strings.HasPrefix(s, "data:image/jpeg;base64,") {
+		return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
+	}
+	return "", fmt.Errorf("not valid string")
+}
+
+func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) {
+	if input.Echo {
+		config.Echo = input.Echo
+	}
+	if input.TopK != 0 {
+		config.TopK = input.TopK
+	}
+	if input.TopP != 0 {
+		config.TopP = input.TopP
+	}
+
+	if input.Backend != "" {
+		config.Backend = input.Backend
+	}
+
+	if input.ClipSkip != 0 {
+		config.Diffusers.ClipSkip = input.ClipSkip
+	}
+
+	if input.ModelBaseName != "" {
+		config.AutoGPTQ.ModelBaseName = input.ModelBaseName
+	}
+
+	if input.NegativePromptScale != 0 {
+		config.NegativePromptScale = input.NegativePromptScale
+	}
+
+	if input.UseFastTokenizer {
+		config.UseFastTokenizer = input.UseFastTokenizer
+	}
+
+	if input.NegativePrompt != "" {
+		config.NegativePrompt = input.NegativePrompt
+	}
+
+	if input.RopeFreqBase != 0 {
+		config.RopeFreqBase = input.RopeFreqBase
+	}
+
+	if input.RopeFreqScale != 0 {
+		config.RopeFreqScale = input.RopeFreqScale
+	}
+
+	if input.Grammar != "" {
+		config.Grammar = input.Grammar
+	}
+
+	if input.Temperature != 0 {
+		config.Temperature = input.Temperature
+	}
+
+	if input.Maxtokens != 0 {
+		config.Maxtokens = input.Maxtokens
+	}
+
+	switch stop := input.Stop.(type) {
+	case string:
+		if stop != "" {
+			config.StopWords = append(config.StopWords, stop)
+		}
+	case []interface{}:
+		for _, pp := range stop {
+			if s, ok := pp.(string); ok {
+				config.StopWords = append(config.StopWords, s)
+			}
+		}
+	}
+
+	if len(input.Tools) > 0 {
+		for _, tool := range input.Tools {
+			input.Functions = append(input.Functions, tool.Function)
+		}
+	}
+
+	if input.ToolsChoice != nil {
+		var toolChoice grammar.Tool
+		json.Unmarshal([]byte(input.ToolsChoice.(string)), &toolChoice)
+		input.FunctionCall = map[string]interface{}{
+			"name": toolChoice.Function.Name,
+		}
+	}
+
+	// Decode each request's message content
+	index := 0
+	for i, m := range input.Messages {
+		switch content := m.Content.(type) {
+		case string:
+			input.Messages[i].StringContent = content
+		case []interface{}:
+			dat, _ := json.Marshal(content)
+			c := []schema.Content{}
+			json.Unmarshal(dat, &c)
+			for _, pp := range c {
+				if pp.Type == "text" {
+					input.Messages[i].StringContent = pp.Text
+				} else if pp.Type == "image_url" {
+					// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
+					base64, err := getBase64Image(pp.ImageURL.URL)
+					if err == nil {
+						input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
+						// set a placeholder for each image
+						input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent
+						index++
+					} else {
+						fmt.Print("Failed encoding image", err)
+					}
+				}
+			}
+		}
+	}
+
+	if input.RepeatPenalty != 0 {
+		config.RepeatPenalty = input.RepeatPenalty
+	}
+
+	if input.Keep != 0 {
+		config.Keep = input.Keep
+	}
+
+	if input.Batch != 0 {
+		config.Batch = input.Batch
+	}
+
+	if input.F16 {
+		config.F16 = input.F16
+	}
+
+	if input.IgnoreEOS {
+		config.IgnoreEOS = input.IgnoreEOS
+	}
+
+	if input.Seed != 0 {
+		config.Seed = input.Seed
+	}
+
+	if input.Mirostat != 0 {
+		config.LLMConfig.Mirostat = input.Mirostat
+	}
+
+	if input.MirostatETA != 0 {
+		config.LLMConfig.MirostatETA = input.MirostatETA
+	}
+
+	if input.MirostatTAU != 0 {
+		config.LLMConfig.MirostatTAU = input.MirostatTAU
+	}
+
+	if input.TypicalP != 0 {
+		config.TypicalP = input.TypicalP
+	}
+
+	switch inputs := input.Input.(type) {
+	case string:
+		if inputs != "" {
+			config.InputStrings = append(config.InputStrings, inputs)
+		}
+	case []interface{}:
+		for _, pp := range inputs {
+			switch i := pp.(type) {
+			case string:
+				config.InputStrings = append(config.InputStrings, i)
+			case []interface{}:
+				tokens := []int{}
+				for _, ii := range i {
+					tokens = append(tokens, int(ii.(float64)))
+				}
+				config.InputToken = append(config.InputToken, tokens)
+			}
+		}
+	}
+
+	// Can be either a string or an object
+	switch fnc := input.FunctionCall.(type) {
+	case string:
+		if fnc != "" {
+			config.SetFunctionCallString(fnc)
+		}
+	case map[string]interface{}:
+		var name string
+		n, exists := fnc["name"]
+		if exists {
+			nn, e := n.(string)
+			if e {
+				name = nn
+			}
+		}
+		config.SetFunctionCallNameString(name)
+	}
+
+	switch p := input.Prompt.(type) {
+	case string:
+		config.PromptStrings = append(config.PromptStrings, p)
+	case []interface{}:
+		for _, pp := range p {
+			if s, ok := pp.(string); ok {
+				config.PromptStrings = append(config.PromptStrings, s)
+			}
+		}
+	}
+}
+
+func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) {
+	cfg, err := config.LoadBackendConfigFileByName(modelFile, loader.ModelPath, cm, debug, threads, ctx, f16)
+
+	// Set the parameters for the language model prediction
+	updateRequestConfig(cfg, input)
+
+	return cfg, input, err
+}
--- a/core/http/endpoints/openai/transcription.go
+++ b/core/http/endpoints/openai/transcription.go
@ -0,0 +1,71 @@
+package openai
+
+import (
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"path"
+	"path/filepath"
+
+	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/go-skynet/LocalAI/core/config"
+	model "github.com/go-skynet/LocalAI/pkg/model"
+
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+)
+
+// https://platform.openai.com/docs/api-reference/audio/create
+func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		m, input, err := readRequest(c, ml, appConfig, false)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+		// retrieve the file data from the request
+		file, err := c.FormFile("file")
+		if err != nil {
+			return err
+		}
+		f, err := file.Open()
+		if err != nil {
+			return err
+		}
+		defer f.Close()
+
+		dir, err := os.MkdirTemp("", "whisper")
+
+		if err != nil {
+			return err
+		}
+		defer os.RemoveAll(dir)
+
+		dst := filepath.Join(dir, path.Base(file.Filename))
+		dstFile, err := os.Create(dst)
+		if err != nil {
+			return err
+		}
+
+		if _, err := io.Copy(dstFile, f); err != nil {
+			log.Debug().Msgf("Audio file copying error %+v - %+v - err %+v", file.Filename, dst, err)
+			return err
+		}
+
+		log.Debug().Msgf("Audio file copied to: %+v", dst)
+
+		tr, err := backend.ModelTranscription(dst, input.Language, ml, *config, appConfig)
+		if err != nil {
+			return err
+		}
+
+		log.Debug().Msgf("Trascribed: %+v", tr)
+		// TODO: handle different outputs here
+		return c.Status(http.StatusOK).JSON(tr)
+	}
+}
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@ -0,0 +1,21 @@
+package schema
+
+import (
+	gopsutil "github.com/shirou/gopsutil/v3/process"
+)
+
+type BackendMonitorRequest struct {
+	Model string `json:"model" yaml:"model"`
+}
+
+type BackendMonitorResponse struct {
+	MemoryInfo    *gopsutil.MemoryInfoStat
+	MemoryPercent float32
+	CPUPercent    float64
+}
+
+type TTSRequest struct {
+	Model   string `json:"model" yaml:"model"`
+	Input   string `json:"input" yaml:"input"`
+	Backend string `json:"backend" yaml:"backend"`
+}
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@ -3,8 +3,6 @@ package schema
 import (
 	"context"

-	config "github.com/go-skynet/LocalAI/core/config"
-
 	"github.com/go-skynet/LocalAI/pkg/grammar"
 )

@ -108,10 +106,10 @@ type ChatCompletionResponseFormat struct {
 }

 type OpenAIRequest struct {
-	config.PredictionOptions
+	PredictionOptions

-	Context context.Context
-	Cancel  context.CancelFunc
+	Context context.Context    `json:"-"`
+	Cancel  context.CancelFunc `json:"-"`

 	// whisper
 	File string `json:"file" validate:"required"`
--- a/core/schema/prediction.go
+++ b/core/schema/prediction.go
@ -1,4 +1,4 @@
-package config
+package schema

 type PredictionOptions struct {

--- a/core/services/backend_monitor.go
+++ b/core/services/backend_monitor.go
@ -0,0 +1,140 @@
+package services
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	"github.com/go-skynet/LocalAI/pkg/model"
+
+	"github.com/rs/zerolog/log"
+
+	gopsutil "github.com/shirou/gopsutil/v3/process"
+)
+
+type BackendMonitor struct {
+	configLoader *config.BackendConfigLoader
+	modelLoader  *model.ModelLoader
+	options      *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name.
+}
+
+func NewBackendMonitor(configLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, appConfig *config.ApplicationConfig) BackendMonitor {
+	return BackendMonitor{
+		configLoader: configLoader,
+		modelLoader:  modelLoader,
+		options:      appConfig,
+	}
+}
+
+func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string, error) {
+	config, exists := bm.configLoader.GetBackendConfig(modelName)
+	var backendId string
+	if exists {
+		backendId = config.Model
+	} else {
+		// Last ditch effort: use it raw, see if a backend happens to match.
+		backendId = modelName
+	}
+
+	if !strings.HasSuffix(backendId, ".bin") {
+		backendId = fmt.Sprintf("%s.bin", backendId)
+	}
+
+	return backendId, nil
+}
+
+func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) {
+	config, exists := bm.configLoader.GetBackendConfig(model)
+	var backend string
+	if exists {
+		backend = config.Model
+	} else {
+		// Last ditch effort: use it raw, see if a backend happens to match.
+		backend = model
+	}
+
+	if !strings.HasSuffix(backend, ".bin") {
+		backend = fmt.Sprintf("%s.bin", backend)
+	}
+
+	pid, err := bm.modelLoader.GetGRPCPID(backend)
+
+	if err != nil {
+		log.Error().Msgf("model %s : failed to find pid %+v", model, err)
+		return nil, err
+	}
+
+	// Name is slightly frightening but this does _not_ create a new process, rather it looks up an existing process by PID.
+	backendProcess, err := gopsutil.NewProcess(int32(pid))
+
+	if err != nil {
+		log.Error().Msgf("model %s [PID %d] : error getting process info %+v", model, pid, err)
+		return nil, err
+	}
+
+	memInfo, err := backendProcess.MemoryInfo()
+
+	if err != nil {
+		log.Error().Msgf("model %s [PID %d] : error getting memory info %+v", model, pid, err)
+		return nil, err
+	}
+
+	memPercent, err := backendProcess.MemoryPercent()
+	if err != nil {
+		log.Error().Msgf("model %s [PID %d] : error getting memory percent %+v", model, pid, err)
+		return nil, err
+	}
+
+	cpuPercent, err := backendProcess.CPUPercent()
+	if err != nil {
+		log.Error().Msgf("model %s [PID %d] : error getting cpu percent %+v", model, pid, err)
+		return nil, err
+	}
+
+	return &schema.BackendMonitorResponse{
+		MemoryInfo:    memInfo,
+		MemoryPercent: memPercent,
+		CPUPercent:    cpuPercent,
+	}, nil
+}
+
+func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse, error) {
+	backendId, err := bm.getModelLoaderIDFromModelName(modelName)
+	if err != nil {
+		return nil, err
+	}
+	modelAddr := bm.modelLoader.CheckIsLoaded(backendId)
+	if modelAddr == "" {
+		return nil, fmt.Errorf("backend %s is not currently loaded", backendId)
+	}
+
+	status, rpcErr := modelAddr.GRPC(false, nil).Status(context.TODO())
+	if rpcErr != nil {
+		log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error())
+		val, slbErr := bm.SampleLocalBackendProcess(backendId)
+		if slbErr != nil {
+			return nil, fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error())
+		}
+		return &proto.StatusResponse{
+			State: proto.StatusResponse_ERROR,
+			Memory: &proto.MemoryUsageData{
+				Total: val.MemoryInfo.VMS,
+				Breakdown: map[string]uint64{
+					"gopsutil-RSS": val.MemoryInfo.RSS,
+				},
+			},
+		}, nil
+	}
+	return status, nil
+}
+
+func (bm BackendMonitor) ShutdownModel(modelName string) error {
+	backendId, err := bm.getModelLoaderIDFromModelName(modelName)
+	if err != nil {
+		return err
+	}
+	return bm.modelLoader.ShutdownModel(backendId)
+}
--- a/core/services/gallery.go
+++ b/core/services/gallery.go
@ -0,0 +1,167 @@
+package services
+
+import (
+	"context"
+	"encoding/json"
+	"os"
+	"strings"
+	"sync"
+
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/pkg/gallery"
+	"github.com/go-skynet/LocalAI/pkg/utils"
+	"gopkg.in/yaml.v2"
+)
+
+type GalleryService struct {
+	modelPath string
+	sync.Mutex
+	C        chan gallery.GalleryOp
+	statuses map[string]*gallery.GalleryOpStatus
+}
+
+func NewGalleryService(modelPath string) *GalleryService {
+	return &GalleryService{
+		modelPath: modelPath,
+		C:         make(chan gallery.GalleryOp),
+		statuses:  make(map[string]*gallery.GalleryOpStatus),
+	}
+}
+
+func prepareModel(modelPath string, req gallery.GalleryModel, cl *config.BackendConfigLoader, downloadStatus func(string, string, string, float64)) error {
+
+	config, err := gallery.GetGalleryConfigFromURL(req.URL)
+	if err != nil {
+		return err
+	}
+
+	config.Files = append(config.Files, req.AdditionalFiles...)
+
+	return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus)
+}
+
+func (g *GalleryService) UpdateStatus(s string, op *gallery.GalleryOpStatus) {
+	g.Lock()
+	defer g.Unlock()
+	g.statuses[s] = op
+}
+
+func (g *GalleryService) GetStatus(s string) *gallery.GalleryOpStatus {
+	g.Lock()
+	defer g.Unlock()
+
+	return g.statuses[s]
+}
+
+func (g *GalleryService) GetAllStatus() map[string]*gallery.GalleryOpStatus {
+	g.Lock()
+	defer g.Unlock()
+
+	return g.statuses
+}
+
+func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader) {
+	go func() {
+		for {
+			select {
+			case <-c.Done():
+				return
+			case op := <-g.C:
+				utils.ResetDownloadTimers()
+
+				g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Message: "processing", Progress: 0})
+
+				// updates the status with an error
+				updateError := func(e error) {
+					g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Error: e, Processed: true, Message: "error: " + e.Error()})
+				}
+
+				// displayDownload displays the download progress
+				progressCallback := func(fileName string, current string, total string, percentage float64) {
+					g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Message: "processing", FileName: fileName, Progress: percentage, TotalFileSize: total, DownloadedFileSize: current})
+					utils.DisplayDownloadFunction(fileName, current, total, percentage)
+				}
+
+				var err error
+				// if the request contains a gallery name, we apply the gallery from the gallery list
+				if op.GalleryName != "" {
+					if strings.Contains(op.GalleryName, "@") {
+						err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback)
+					} else {
+						err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback)
+					}
+				} else {
+					err = prepareModel(g.modelPath, op.Req, cl, progressCallback)
+				}
+
+				if err != nil {
+					updateError(err)
+					continue
+				}
+
+				// Reload models
+				err = cl.LoadBackendConfigsFromPath(g.modelPath)
+				if err != nil {
+					updateError(err)
+					continue
+				}
+
+				err = cl.Preload(g.modelPath)
+				if err != nil {
+					updateError(err)
+					continue
+				}
+
+				g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Processed: true, Message: "completed", Progress: 100})
+			}
+		}
+	}()
+}
+
+type galleryModel struct {
+	gallery.GalleryModel `yaml:",inline"` // https://github.com/go-yaml/yaml/issues/63
+	ID                   string           `json:"id"`
+}
+
+func processRequests(modelPath, s string, cm *config.BackendConfigLoader, galleries []gallery.Gallery, requests []galleryModel) error {
+	var err error
+	for _, r := range requests {
+		utils.ResetDownloadTimers()
+		if r.ID == "" {
+			err = prepareModel(modelPath, r.GalleryModel, cm, utils.DisplayDownloadFunction)
+		} else {
+			if strings.Contains(r.ID, "@") {
+				err = gallery.InstallModelFromGallery(
+					galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction)
+			} else {
+				err = gallery.InstallModelFromGalleryByName(
+					galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction)
+			}
+		}
+	}
+	return err
+}
+
+func ApplyGalleryFromFile(modelPath, s string, cl *config.BackendConfigLoader, galleries []gallery.Gallery) error {
+	dat, err := os.ReadFile(s)
+	if err != nil {
+		return err
+	}
+	var requests []galleryModel
+
+	if err := yaml.Unmarshal(dat, &requests); err != nil {
+		return err
+	}
+
+	return processRequests(modelPath, s, cl, galleries, requests)
+}
+
+func ApplyGalleryFromString(modelPath, s string, cl *config.BackendConfigLoader, galleries []gallery.Gallery) error {
+	var requests []galleryModel
+	err := json.Unmarshal([]byte(s), &requests)
+	if err != nil {
+		return err
+	}
+
+	return processRequests(modelPath, s, cl, galleries, requests)
+}
--- a/core/services/metrics.go
+++ b/core/services/metrics.go
@ -0,0 +1,54 @@
+package services
+
+import (
+	"context"
+
+	"github.com/rs/zerolog/log"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/exporters/prometheus"
+	"go.opentelemetry.io/otel/metric"
+	metricApi "go.opentelemetry.io/otel/sdk/metric"
+)
+
+type LocalAIMetricsService struct {
+	Meter         metric.Meter
+	ApiTimeMetric metric.Float64Histogram
+}
+
+func (m *LocalAIMetricsService) ObserveAPICall(method string, path string, duration float64) {
+	opts := metric.WithAttributes(
+		attribute.String("method", method),
+		attribute.String("path", path),
+	)
+	m.ApiTimeMetric.Record(context.Background(), duration, opts)
+}
+
+// setupOTelSDK bootstraps the OpenTelemetry pipeline.
+// If it does not return an error, make sure to call shutdown for proper cleanup.
+func NewLocalAIMetricsService() (*LocalAIMetricsService, error) {
+	exporter, err := prometheus.New()
+	if err != nil {
+		return nil, err
+	}
+	provider := metricApi.NewMeterProvider(metricApi.WithReader(exporter))
+	meter := provider.Meter("github.com/go-skynet/LocalAI")
+
+	apiTimeMetric, err := meter.Float64Histogram("api_call", metric.WithDescription("api calls"))
+	if err != nil {
+		return nil, err
+	}
+
+	return &LocalAIMetricsService{
+		Meter:         meter,
+		ApiTimeMetric: apiTimeMetric,
+	}, nil
+}
+
+func (lams LocalAIMetricsService) Shutdown() error {
+	// TODO: Not sure how to actually do this:
+	//// setupOTelSDK bootstraps the OpenTelemetry pipeline.
+	//// If it does not return an error, make sure to call shutdown for proper cleanup.
+
+	log.Warn().Msgf("LocalAIMetricsService Shutdown called, but OTelSDK proper shutdown not yet implemented?")
+	return nil
+}
--- a/core/startup/config_file_watcher.go
+++ b/core/startup/config_file_watcher.go
@ -0,0 +1,100 @@
+package startup
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path"
+
+	"github.com/fsnotify/fsnotify"
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/imdario/mergo"
+	"github.com/rs/zerolog/log"
+)
+
+type WatchConfigDirectoryCloser func() error
+
+func ReadApiKeysJson(configDir string, appConfig *config.ApplicationConfig) error {
+	fileContent, err := os.ReadFile(path.Join(configDir, "api_keys.json"))
+	if err == nil {
+		// Parse JSON content from the file
+		var fileKeys []string
+		err := json.Unmarshal(fileContent, &fileKeys)
+		if err == nil {
+			appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...)
+			return nil
+		}
+		return err
+	}
+	return err
+}
+
+func ReadExternalBackendsJson(configDir string, appConfig *config.ApplicationConfig) error {
+	fileContent, err := os.ReadFile(path.Join(configDir, "external_backends.json"))
+	if err != nil {
+		return err
+	}
+	// Parse JSON content from the file
+	var fileBackends map[string]string
+	err = json.Unmarshal(fileContent, &fileBackends)
+	if err != nil {
+		return err
+	}
+	err = mergo.Merge(&appConfig.ExternalGRPCBackends, fileBackends)
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+var CONFIG_FILE_UPDATES = map[string]func(configDir string, appConfig *config.ApplicationConfig) error{
+	"api_keys.json":          ReadApiKeysJson,
+	"external_backends.json": ReadExternalBackendsJson,
+}
+
+func WatchConfigDirectory(configDir string, appConfig *config.ApplicationConfig) (WatchConfigDirectoryCloser, error) {
+	if len(configDir) == 0 {
+		return nil, fmt.Errorf("configDir blank")
+	}
+	configWatcher, err := fsnotify.NewWatcher()
+	if err != nil {
+		log.Fatal().Msgf("Unable to create a watcher for the LocalAI Configuration Directory: %+v", err)
+	}
+	ret := func() error {
+		configWatcher.Close()
+		return nil
+	}
+
+	// Start listening for events.
+	go func() {
+		for {
+			select {
+			case event, ok := <-configWatcher.Events:
+				if !ok {
+					return
+				}
+				if event.Has(fsnotify.Write) {
+					for targetName, watchFn := range CONFIG_FILE_UPDATES {
+						if event.Name == targetName {
+							err := watchFn(configDir, appConfig)
+							log.Warn().Msgf("WatchConfigDirectory goroutine for %s: failed to update options: %+v", targetName, err)
+						}
+					}
+				}
+			case _, ok := <-configWatcher.Errors:
+				if !ok {
+					return
+				}
+				log.Error().Msgf("WatchConfigDirectory goroutine error: %+v", err)
+			}
+		}
+	}()
+
+	// Add a path.
+	err = configWatcher.Add(configDir)
+	if err != nil {
+		return ret, fmt.Errorf("unable to establish watch on the LocalAI Configuration Directory: %+v", err)
+	}
+
+	return ret, nil
+}
--- a/core/startup/startup.go
+++ b/core/startup/startup.go
@ -0,0 +1,128 @@
+package startup
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/go-skynet/LocalAI/internal"
+	"github.com/go-skynet/LocalAI/pkg/assets"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	pkgStartup "github.com/go-skynet/LocalAI/pkg/startup"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+)
+
+func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
+	options := config.NewApplicationConfig(opts...)
+
+	zerolog.SetGlobalLevel(zerolog.InfoLevel)
+	if options.Debug {
+		zerolog.SetGlobalLevel(zerolog.DebugLevel)
+	}
+
+	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath)
+	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
+
+	// Make sure directories exists
+	if options.ModelPath == "" {
+		return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty")
+	}
+	err := os.MkdirAll(options.ModelPath, 0755)
+	if err != nil {
+		return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err)
+	}
+	if options.ImageDir != "" {
+		err := os.MkdirAll(options.ImageDir, 0755)
+		if err != nil {
+			return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err)
+		}
+	}
+	if options.AudioDir != "" {
+		err := os.MkdirAll(options.AudioDir, 0755)
+		if err != nil {
+			return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err)
+		}
+	}
+	if options.UploadDir != "" {
+		err := os.MkdirAll(options.UploadDir, 0755)
+		if err != nil {
+			return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err)
+		}
+	}
+
+	//
+	pkgStartup.PreloadModelsConfigurations(options.ModelLibraryURL, options.ModelPath, options.ModelsURL...)
+
+	cl := config.NewBackendConfigLoader()
+	ml := model.NewModelLoader(options.ModelPath)
+
+	if err := cl.LoadBackendConfigsFromPath(options.ModelPath); err != nil {
+		log.Error().Msgf("error loading config files: %s", err.Error())
+	}
+
+	if options.ConfigFile != "" {
+		if err := cl.LoadBackendConfigFile(options.ConfigFile); err != nil {
+			log.Error().Msgf("error loading config file: %s", err.Error())
+		}
+	}
+
+	if err := cl.Preload(options.ModelPath); err != nil {
+		log.Error().Msgf("error downloading models: %s", err.Error())
+	}
+
+	if options.PreloadJSONModels != "" {
+		if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil {
+			return nil, nil, nil, err
+		}
+	}
+
+	if options.PreloadModelsFromPath != "" {
+		if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil {
+			return nil, nil, nil, err
+		}
+	}
+
+	if options.Debug {
+		for _, v := range cl.ListBackendConfigs() {
+			cfg, _ := cl.GetBackendConfig(v)
+			log.Debug().Msgf("Model: %s (config: %+v)", v, cfg)
+		}
+	}
+
+	if options.AssetsDestination != "" {
+		// Extract files from the embedded FS
+		err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
+		log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
+		if err != nil {
+			log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
+		}
+	}
+
+	// turn off any process that was started by GRPC if the context is canceled
+	go func() {
+		<-options.Context.Done()
+		log.Debug().Msgf("Context canceled, shutting down")
+		ml.StopAllGRPC()
+	}()
+
+	if options.WatchDog {
+		wd := model.NewWatchDog(
+			ml,
+			options.WatchDogBusyTimeout,
+			options.WatchDogIdleTimeout,
+			options.WatchDogBusy,
+			options.WatchDogIdle)
+		ml.SetWatchDog(wd)
+		go wd.Run()
+		go func() {
+			<-options.Context.Done()
+			log.Debug().Msgf("Context canceled, shutting down")
+			wd.Shutdown()
+		}()
+	}
+
+	log.Info().Msg("core/startup process completed!")
+	return cl, ml, options, nil
+}