Revert "[Refactor]: Core/API Split" (#1550)

Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9.
2025-05-20 10:35:01 +00:00 · 2024-01-05 12:04:46 -05:00 · 2024-01-05 12:04:46 -05:00 · db926896bd
commit db926896bd
parent ab7b4d5ee9
77 changed files with 3132 additions and 3456 deletions
--- a/core/backend/embeddings.go
+++ b/core/backend/embeddings.go
@ -1,144 +0,0 @@
-package backend
-
-import (
-	"fmt"
-	"time"
-
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/grpc"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-	"github.com/google/uuid"
-	"github.com/rs/zerolog/log"
-)
-
-func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c schema.Config, o *schema.StartupOptions) (func() ([]float32, error), error) {
-	if !c.Embeddings {
-		return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
-	}
-
-	modelFile := c.Model
-
-	grpcOpts := gRPCModelOpts(c)
-
-	var inferenceModel interface{}
-	var err error
-
-	opts := modelOpts(c, o, []model.Option{
-		model.WithLoadGRPCLoadModelOpts(grpcOpts),
-		model.WithThreads(uint32(c.Threads)),
-		model.WithAssetDir(o.AssetsDestination),
-		model.WithModel(modelFile),
-		model.WithContext(o.Context),
-		model.WithExternalBackends(o.ExternalGRPCBackends, false),
-	})
-
-	if c.Backend == "" {
-		inferenceModel, err = loader.GreedyLoader(opts...)
-	} else {
-		opts = append(opts, model.WithBackendString(c.Backend))
-		inferenceModel, err = loader.BackendLoader(opts...)
-	}
-	if err != nil {
-		return nil, err
-	}
-
-	var fn func() ([]float32, error)
-	switch model := inferenceModel.(type) {
-	case *grpc.Client:
-		fn = func() ([]float32, error) {
-			predictOptions := gRPCPredictOpts(c, loader.ModelPath)
-			if len(tokens) > 0 {
-				embeds := []int32{}
-
-				for _, t := range tokens {
-					embeds = append(embeds, int32(t))
-				}
-				predictOptions.EmbeddingTokens = embeds
-
-				res, err := model.Embeddings(o.Context, predictOptions)
-				if err != nil {
-					return nil, err
-				}
-
-				return res.Embeddings, nil
-			}
-			predictOptions.Embeddings = s
-
-			res, err := model.Embeddings(o.Context, predictOptions)
-			if err != nil {
-				return nil, err
-			}
-
-			return res.Embeddings, nil
-		}
-	default:
-		fn = func() ([]float32, error) {
-			return nil, fmt.Errorf("embeddings not supported by the backend")
-		}
-	}
-
-	return func() ([]float32, error) {
-		embeds, err := fn()
-		if err != nil {
-			return embeds, err
-		}
-		// Remove trailing 0s
-		for i := len(embeds) - 1; i >= 0; i-- {
-			if embeds[i] == 0.0 {
-				embeds = embeds[:i]
-			} else {
-				break
-			}
-		}
-		return embeds, nil
-	}, nil
-}
-
-func EmbeddingOpenAIRequest(modelName string, input *schema.OpenAIRequest, cl *services.ConfigLoader, ml *model.ModelLoader, startupOptions *schema.StartupOptions) (*schema.OpenAIResponse, error) {
-	config, input, err := ReadConfigFromFileAndCombineWithOpenAIRequest(modelName, input, cl, startupOptions)
-	if err != nil {
-		return nil, fmt.Errorf("failed reading parameters from request:%w", err)
-	}
-
-	log.Debug().Msgf("Parameter Config: %+v", config)
-	items := []schema.Item{}
-
-	for i, s := range config.InputToken {
-		// get the model function to call for the result
-		embedFn, err := ModelEmbedding("", s, ml, *config, startupOptions)
-		if err != nil {
-			return nil, err
-		}
-
-		embeddings, err := embedFn()
-		if err != nil {
-			return nil, err
-		}
-		items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
-	}
-
-	for i, s := range config.InputStrings {
-		// get the model function to call for the result
-		embedFn, err := ModelEmbedding(s, []int{}, ml, *config, startupOptions)
-		if err != nil {
-			return nil, err
-		}
-
-		embeddings, err := embedFn()
-		if err != nil {
-			return nil, err
-		}
-		items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
-	}
-
-	id := uuid.New().String()
-	created := int(time.Now().Unix())
-	return &schema.OpenAIResponse{
-		ID:      id,
-		Created: created,
-		Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
-		Data:    items,
-		Object:  "list",
-	}, nil
-}
--- a/core/backend/image.go
+++ b/core/backend/image.go
@ -1,210 +0,0 @@
-package backend
-
-import (
-	"encoding/base64"
-	"fmt"
-	"os"
-	"path"
-	"path/filepath"
-	"strconv"
-	"strings"
-	"time"
-
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-	"github.com/go-skynet/LocalAI/pkg/utils"
-	"github.com/google/uuid"
-	"github.com/rs/zerolog/log"
-)
-
-func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, c schema.Config, o *schema.StartupOptions) (func() error, error) {
-
-	opts := modelOpts(c, o, []model.Option{
-		model.WithBackendString(c.Backend),
-		model.WithAssetDir(o.AssetsDestination),
-		model.WithThreads(uint32(c.Threads)),
-		model.WithContext(o.Context),
-		model.WithModel(c.Model),
-		model.WithLoadGRPCLoadModelOpts(&proto.ModelOptions{
-			CUDA:          c.CUDA || c.Diffusers.CUDA,
-			SchedulerType: c.Diffusers.SchedulerType,
-			PipelineType:  c.Diffusers.PipelineType,
-			CFGScale:      c.Diffusers.CFGScale,
-			LoraAdapter:   c.LoraAdapter,
-			LoraScale:     c.LoraScale,
-			LoraBase:      c.LoraBase,
-			IMG2IMG:       c.Diffusers.IMG2IMG,
-			CLIPModel:     c.Diffusers.ClipModel,
-			CLIPSubfolder: c.Diffusers.ClipSubFolder,
-			CLIPSkip:      int32(c.Diffusers.ClipSkip),
-			ControlNet:    c.Diffusers.ControlNet,
-		}),
-		model.WithExternalBackends(o.ExternalGRPCBackends, false),
-	})
-
-	inferenceModel, err := loader.BackendLoader(
-		opts...,
-	)
-	if err != nil {
-		return nil, err
-	}
-
-	fn := func() error {
-		_, err := inferenceModel.GenerateImage(
-			o.Context,
-			&proto.GenerateImageRequest{
-				Height:           int32(height),
-				Width:            int32(width),
-				Mode:             int32(mode),
-				Step:             int32(step),
-				Seed:             int32(seed),
-				CLIPSkip:         int32(c.Diffusers.ClipSkip),
-				PositivePrompt:   positive_prompt,
-				NegativePrompt:   negative_prompt,
-				Dst:              dst,
-				Src:              src,
-				EnableParameters: c.Diffusers.EnableParameters,
-			})
-		return err
-	}
-
-	return fn, nil
-}
-
-func ImageGenerationOpenAIRequest(modelName string, input *schema.OpenAIRequest, cl *services.ConfigLoader, ml *model.ModelLoader, startupOptions *schema.StartupOptions) (*schema.OpenAIResponse, error) {
-	id := uuid.New().String()
-	created := int(time.Now().Unix())
-
-	if modelName == "" {
-		modelName = model.StableDiffusionBackend
-	}
-	log.Debug().Msgf("Loading model: %+v", modelName)
-
-	config, input, err := ReadConfigFromFileAndCombineWithOpenAIRequest(modelName, input, cl, startupOptions)
-	if err != nil {
-		return nil, fmt.Errorf("failed reading parameters from request: %w", err)
-	}
-
-	src := ""
-	if input.File != "" {
-		if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") {
-			src, err = utils.CreateTempFileFromUrl(input.File, "", "image-src")
-			if err != nil {
-				return nil, fmt.Errorf("failed downloading file:%w", err)
-			}
-		} else {
-			src, err = utils.CreateTempFileFromBase64(input.File, "", "base64-image-src")
-			if err != nil {
-				return nil, fmt.Errorf("error creating temporary image source file: %w", err)
-			}
-		}
-	}
-
-	log.Debug().Msgf("Parameter Config: %+v", config)
-
-	switch config.Backend {
-	case "stablediffusion":
-		config.Backend = model.StableDiffusionBackend
-	case "tinydream":
-		config.Backend = model.TinyDreamBackend
-	case "":
-		config.Backend = model.StableDiffusionBackend
-	}
-
-	sizeParts := strings.Split(input.Size, "x")
-	if len(sizeParts) != 2 {
-		return nil, fmt.Errorf("invalid value for 'size'")
-	}
-	width, err := strconv.Atoi(sizeParts[0])
-	if err != nil {
-		return nil, fmt.Errorf("invalid value for 'size'")
-	}
-	height, err := strconv.Atoi(sizeParts[1])
-	if err != nil {
-		return nil, fmt.Errorf("invalid value for 'size'")
-	}
-
-	b64JSON := false
-	if input.ResponseFormat.Type == "b64_json" {
-		b64JSON = true
-	}
-	// src and clip_skip
-	var result []schema.Item
-	for _, i := range config.PromptStrings {
-		n := input.N
-		if input.N == 0 {
-			n = 1
-		}
-		for j := 0; j < n; j++ {
-			prompts := strings.Split(i, "|")
-			positive_prompt := prompts[0]
-			negative_prompt := ""
-			if len(prompts) > 1 {
-				negative_prompt = prompts[1]
-			}
-
-			mode := 0
-			step := config.Step
-			if step == 0 {
-				step = 15
-			}
-
-			if input.Mode != 0 {
-				mode = input.Mode
-			}
-
-			if input.Step != 0 {
-				step = input.Step
-			}
-
-			tempDir := ""
-			if !b64JSON {
-				tempDir = startupOptions.ImageDir
-			}
-			// Create a temporary file
-			outputFile, err := os.CreateTemp(tempDir, "b64")
-			if err != nil {
-				return nil, err
-			}
-			outputFile.Close()
-			output := outputFile.Name() + ".png"
-			// Rename the temporary file
-			err = os.Rename(outputFile.Name(), output)
-			if err != nil {
-				return nil, err
-			}
-
-			fn, err := ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, src, output, ml, *config, startupOptions)
-			if err != nil {
-				return nil, err
-			}
-			if err := fn(); err != nil {
-				return nil, err
-			}
-
-			item := &schema.Item{}
-
-			if b64JSON {
-				defer os.RemoveAll(output)
-				data, err := os.ReadFile(output)
-				if err != nil {
-					return nil, err
-				}
-				item.B64JSON = base64.StdEncoding.EncodeToString(data)
-			} else {
-				base := filepath.Base(output)
-				item.URL = path.Join(startupOptions.ImageDir, base)
-			}
-
-			result = append(result, *item)
-		}
-	}
-
-	return &schema.OpenAIResponse{
-		ID:      id,
-		Created: created,
-		Data:    result,
-	}, nil
-}
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@ -1,861 +0,0 @@
-package backend
-
-import (
-	"context"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"os"
-	"path/filepath"
-	"regexp"
-	"strings"
-	"sync"
-	"time"
-	"unicode/utf8"
-
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/gallery"
-	"github.com/go-skynet/LocalAI/pkg/grammar"
-	"github.com/go-skynet/LocalAI/pkg/grpc"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-	"github.com/go-skynet/LocalAI/pkg/utils"
-	"github.com/google/uuid"
-	"github.com/rs/zerolog/log"
-)
-
-////////// TYPES //////////////
-
-type LLMResponse struct {
-	Response string // should this be []byte?
-	Usage    TokenUsage
-}
-
-// TODO: Test removing this and using the variant in pkg/schema someday?
-type TokenUsage struct {
-	Prompt     int
-	Completion int
-}
-
-type TemplateConfigBindingFn func(*schema.Config) *string
-
-// type LLMStreamProcessor func(s string, req *schema.OpenAIRequest, config *schema.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse)
-
-/////// CONSTS ///////////
-
-const DEFAULT_NO_ACTION_NAME = "answer"
-const DEFAULT_NO_ACTION_DESCRIPTION = "use this action to answer without performing any action"
-
-////// INFERENCE /////////
-
-func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c schema.Config, o *schema.StartupOptions, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
-	modelFile := c.Model
-
-	grpcOpts := gRPCModelOpts(c)
-
-	var inferenceModel *grpc.Client
-	var err error
-
-	opts := modelOpts(c, o, []model.Option{
-		model.WithLoadGRPCLoadModelOpts(grpcOpts),
-		model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup
-		model.WithAssetDir(o.AssetsDestination),
-		model.WithModel(modelFile),
-		model.WithContext(o.Context),
-		model.WithExternalBackends(o.ExternalGRPCBackends, false),
-	})
-
-	if c.Backend != "" {
-		opts = append(opts, model.WithBackendString(c.Backend))
-	}
-
-	// Check if the modelFile exists, if it doesn't try to load it from the gallery
-	if o.AutoloadGalleries { // experimental
-		if _, err := os.Stat(modelFile); os.IsNotExist(err) {
-			utils.ResetDownloadTimers()
-			// if we failed to load the model, we try to download it
-			err := gallery.InstallModelFromGalleryByName(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
-			if err != nil {
-				return nil, err
-			}
-		}
-	}
-
-	if c.Backend == "" {
-		inferenceModel, err = loader.GreedyLoader(opts...)
-	} else {
-		inferenceModel, err = loader.BackendLoader(opts...)
-	}
-
-	if err != nil {
-		return nil, err
-	}
-
-	// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
-	fn := func() (LLMResponse, error) {
-		opts := gRPCPredictOpts(c, loader.ModelPath)
-		opts.Prompt = s
-		opts.Images = images
-
-		tokenUsage := TokenUsage{}
-
-		// check the per-model feature flag for usage, since tokenCallback may have a cost.
-		// Defaults to off as for now it is still experimental
-		if c.FeatureFlag.Enabled("usage") {
-			userTokenCallback := tokenCallback
-			if userTokenCallback == nil {
-				userTokenCallback = func(token string, usage TokenUsage) bool {
-					return true
-				}
-			}
-
-			promptInfo, pErr := inferenceModel.TokenizeString(ctx, opts)
-			if pErr == nil && promptInfo.Length > 0 {
-				tokenUsage.Prompt = int(promptInfo.Length)
-			}
-
-			tokenCallback = func(token string, usage TokenUsage) bool {
-				tokenUsage.Completion++
-				return userTokenCallback(token, tokenUsage)
-			}
-		}
-
-		if tokenCallback != nil {
-			ss := ""
-
-			var partialRune []byte
-			err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) {
-				partialRune = append(partialRune, chars...)
-
-				for len(partialRune) > 0 {
-					r, size := utf8.DecodeRune(partialRune)
-					if r == utf8.RuneError {
-						// incomplete rune, wait for more bytes
-						break
-					}
-
-					tokenCallback(string(r), tokenUsage)
-					ss += string(r)
-
-					partialRune = partialRune[size:]
-				}
-			})
-			return LLMResponse{
-				Response: ss,
-				Usage:    tokenUsage,
-			}, err
-		} else {
-			// TODO: Is the chicken bit the only way to get here? is that acceptable?
-			reply, err := inferenceModel.Predict(ctx, opts)
-			if err != nil {
-				return LLMResponse{}, err
-			}
-			return LLMResponse{
-				Response: string(reply.Message),
-				Usage:    tokenUsage,
-			}, err
-		}
-	}
-
-	return fn, nil
-}
-
-var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
-var mu sync.Mutex = sync.Mutex{}
-
-func Finetune(config schema.Config, input, prediction string) string {
-	if config.Echo {
-		prediction = input + prediction
-	}
-
-	for _, c := range config.Cutstrings {
-		mu.Lock()
-		reg, ok := cutstrings[c]
-		if !ok {
-			cutstrings[c] = regexp.MustCompile(c)
-			reg = cutstrings[c]
-		}
-		mu.Unlock()
-		prediction = reg.ReplaceAllString(prediction, "")
-	}
-
-	for _, c := range config.TrimSpace {
-		prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
-	}
-
-	for _, c := range config.TrimSuffix {
-		prediction = strings.TrimSpace(strings.TrimSuffix(prediction, c))
-	}
-	return prediction
-
-}
-
-////// CONFIG AND REQUEST HANDLING ///////////////
-
-func ReadConfigFromFileAndCombineWithOpenAIRequest(modelFile string, input *schema.OpenAIRequest, cm *services.ConfigLoader, startupOptions *schema.StartupOptions) (*schema.Config, *schema.OpenAIRequest, error) {
-	// Load a config file if present after the model name
-	modelConfig := filepath.Join(startupOptions.ModelPath, modelFile+".yaml")
-
-	var cfg *schema.Config
-
-	defaults := func() {
-		cfg = schema.DefaultConfig(modelFile)
-		cfg.ContextSize = startupOptions.ContextSize
-		cfg.Threads = startupOptions.Threads
-		cfg.F16 = startupOptions.F16
-		cfg.Debug = startupOptions.Debug
-	}
-
-	cfgExisting, exists := cm.GetConfig(modelFile)
-	if !exists {
-		if _, err := os.Stat(modelConfig); err == nil {
-			if err := cm.LoadConfig(modelConfig); err != nil {
-				return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
-			}
-			cfgExisting, exists = cm.GetConfig(modelFile)
-			if exists {
-				cfg = &cfgExisting
-			} else {
-				defaults()
-			}
-		} else {
-			defaults()
-		}
-	} else {
-		cfg = &cfgExisting
-	}
-
-	// Set the parameters for the language model prediction
-	schema.UpdateConfigFromOpenAIRequest(cfg, input)
-
-	// Don't allow 0 as setting
-	if cfg.Threads == 0 {
-		if startupOptions.Threads != 0 {
-			cfg.Threads = startupOptions.Threads
-		} else {
-			cfg.Threads = 4
-		}
-	}
-
-	// Enforce debug flag if passed from CLI
-	if startupOptions.Debug {
-		cfg.Debug = true
-	}
-
-	return cfg, input, nil
-}
-
-func ComputeChoices(
-	req *schema.OpenAIRequest,
-	predInput string,
-	config *schema.Config,
-	o *schema.StartupOptions,
-	loader *model.ModelLoader,
-	cb func(string, *[]schema.Choice),
-	tokenCallback func(string, TokenUsage) bool) ([]schema.Choice, TokenUsage, error) {
-	n := req.N // number of completions to return
-	result := []schema.Choice{}
-
-	if n == 0 {
-		n = 1
-	}
-
-	images := []string{}
-	for _, m := range req.Messages {
-		images = append(images, m.StringImages...)
-	}
-
-	// get the model function to call for the result
-	predFunc, err := ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback)
-	if err != nil {
-		return result, TokenUsage{}, err
-	}
-
-	tokenUsage := TokenUsage{}
-
-	for i := 0; i < n; i++ {
-		prediction, err := predFunc()
-		if err != nil {
-			return result, TokenUsage{}, err
-		}
-
-		tokenUsage.Prompt += prediction.Usage.Prompt
-		tokenUsage.Completion += prediction.Usage.Completion
-
-		finetunedResponse := Finetune(*config, predInput, prediction.Response)
-		cb(finetunedResponse, &result)
-
-		//result = append(result, Choice{Text: prediction})
-
-	}
-	return result, tokenUsage, err
-}
-
-// TODO: No functions???? Commonize with prepareChatGenerationOpenAIRequest below?
-func prepareGenerationOpenAIRequest(bindingFn TemplateConfigBindingFn, modelName string, input *schema.OpenAIRequest, cl *services.ConfigLoader, ml *model.ModelLoader, startupOptions *schema.StartupOptions) (*schema.Config, error) {
-	config, input, err := ReadConfigFromFileAndCombineWithOpenAIRequest(modelName, input, cl, startupOptions)
-	if err != nil {
-		return nil, fmt.Errorf("failed reading parameters from request:%w", err)
-	}
-
-	if input.ResponseFormat.Type == "json_object" {
-		input.Grammar = grammar.JSONBNF
-	}
-
-	log.Debug().Msgf("Parameter Config: %+v", config)
-
-	configTemplate := bindingFn(config)
-
-	// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-	if (*configTemplate == "") && (ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model))) {
-		*configTemplate = config.Model
-	}
-	if *configTemplate == "" {
-		return nil, fmt.Errorf(("failed to find templateConfig"))
-	}
-
-	return config, nil
-}
-
-////////// SPECIFIC REQUESTS //////////////
-// TODO: For round one of the refactor, give each of the three primary text endpoints their own function?
-// SEMITODO: During a merge, edit/completion were semi-combined - but remain nominally split
-// Can cleanup into a common form later if possible easier if they are all here for now
-// If they remain different, extract each of these named segments to a seperate file
-
-func prepareChatGenerationOpenAIRequest(modelName string, input *schema.OpenAIRequest, cl *services.ConfigLoader, ml *model.ModelLoader, startupOptions *schema.StartupOptions) (*schema.Config, string, bool, error) {
-
-	// IMPORTANT DEFS
-	funcs := grammar.Functions{}
-
-	// The Basic Begining
-
-	config, input, err := ReadConfigFromFileAndCombineWithOpenAIRequest(modelName, input, cl, startupOptions)
-	if err != nil {
-		return nil, "", false, fmt.Errorf("failed reading parameters from request:%w", err)
-	}
-	log.Debug().Msgf("Configuration read: %+v", config)
-
-	// Special Input/Config Handling
-
-	// Allow the user to set custom actions via config file
-	// to be "embedded" in each model - but if they are missing, use defaults.
-	if config.FunctionsConfig.NoActionFunctionName == "" {
-		config.FunctionsConfig.NoActionFunctionName = DEFAULT_NO_ACTION_NAME
-	}
-	if config.FunctionsConfig.NoActionDescriptionName == "" {
-		config.FunctionsConfig.NoActionDescriptionName = DEFAULT_NO_ACTION_DESCRIPTION
-	}
-
-	if input.ResponseFormat.Type == "json_object" {
-		input.Grammar = grammar.JSONBNF
-	}
-
-	processFunctions := len(input.Functions) > 0 && config.ShouldUseFunctions()
-
-	if processFunctions {
-		log.Debug().Msgf("Response needs to process functions")
-
-		noActionGrammar := grammar.Function{
-			Name:        config.FunctionsConfig.NoActionFunctionName,
-			Description: config.FunctionsConfig.NoActionDescriptionName,
-			Parameters: map[string]interface{}{
-				"properties": map[string]interface{}{
-					"message": map[string]interface{}{
-						"type":        "string",
-						"description": "The message to reply the user with",
-					}},
-			},
-		}
-
-		// Append the no action function
-		funcs = append(funcs, input.Functions...)
-		if !config.FunctionsConfig.DisableNoAction {
-			funcs = append(funcs, noActionGrammar)
-		}
-
-		// Force picking one of the functions by the request
-		if config.FunctionToCall() != "" {
-			funcs = funcs.Select(config.FunctionToCall())
-		}
-
-		// Update input grammar
-		jsStruct := funcs.ToJSONStructure()
-		config.Grammar = jsStruct.Grammar("")
-	} else if input.JSONFunctionGrammarObject != nil {
-		config.Grammar = input.JSONFunctionGrammarObject.Grammar("")
-	}
-
-	log.Debug().Msgf("Parameters: %+v", config)
-
-	var predInput string
-
-	suppressConfigSystemPrompt := false
-	mess := []string{}
-	for messageIndex, i := range input.Messages {
-		var content string
-		role := i.Role
-
-		// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
-		// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
-		if i.FunctionCall != nil && i.Role == "assistant" {
-			roleFn := "assistant_function_call"
-			r := config.Roles[roleFn]
-			if r != "" {
-				role = roleFn
-			}
-		}
-		r := config.Roles[role]
-		contentExists := i.Content != nil && i.StringContent != ""
-		// First attempt to populate content via a chat message specific template
-		if config.TemplateConfig.ChatMessage != "" {
-			chatMessageData := model.ChatMessageTemplateData{
-				SystemPrompt: config.SystemPrompt,
-				Role:         r,
-				RoleName:     role,
-				Content:      i.StringContent,
-				MessageIndex: messageIndex,
-			}
-			templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
-			if err != nil {
-				log.Error().Msgf("error processing message %+v using template \"%s\": %v. Skipping!", chatMessageData, config.TemplateConfig.ChatMessage, err)
-			} else {
-				if templatedChatMessage == "" {
-					log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
-					continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
-				}
-				log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
-				content = templatedChatMessage
-			}
-		}
-		// If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
-		if content == "" {
-			if r != "" {
-				if contentExists {
-					content = fmt.Sprint(r, i.StringContent)
-				}
-				if i.FunctionCall != nil {
-					j, err := json.Marshal(i.FunctionCall)
-					if err == nil {
-						if contentExists {
-							content += "\n" + fmt.Sprint(r, " ", string(j))
-						} else {
-							content = fmt.Sprint(r, " ", string(j))
-						}
-					}
-				}
-			} else {
-				if contentExists {
-					content = fmt.Sprint(i.StringContent)
-				}
-				if i.FunctionCall != nil {
-					j, err := json.Marshal(i.FunctionCall)
-					if err == nil {
-						if contentExists {
-							content += "\n" + string(j)
-						} else {
-							content = string(j)
-						}
-					}
-				}
-			}
-			// Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
-			if contentExists && role == "system" {
-				suppressConfigSystemPrompt = true
-			}
-		}
-
-		mess = append(mess, content)
-	}
-
-	predInput = strings.Join(mess, "\n")
-	log.Debug().Msgf("Prompt (before templating): %s", predInput)
-
-	templateFile := ""
-
-	// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-	if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
-		templateFile = config.Model
-	}
-
-	if config.TemplateConfig.Chat != "" && !processFunctions {
-		templateFile = config.TemplateConfig.Chat
-	}
-
-	if config.TemplateConfig.Functions != "" && processFunctions {
-		templateFile = config.TemplateConfig.Functions
-	}
-
-	if templateFile != "" {
-		templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
-			SystemPrompt:         config.SystemPrompt,
-			SuppressSystemPrompt: suppressConfigSystemPrompt,
-			Input:                predInput,
-			Functions:            funcs,
-		})
-		if err == nil {
-			predInput = templatedInput
-			log.Debug().Msgf("Template found, input modified to: %s", predInput)
-		} else {
-			log.Debug().Msgf("Template failed loading: %s", err.Error())
-		}
-	}
-
-	log.Debug().Msgf("Prompt (after templating): %s", predInput)
-	if processFunctions {
-		log.Debug().Msgf("Grammar: %+v", config.Grammar)
-	}
-
-	return config, predInput, processFunctions, nil
-
-}
-
-func EditGenerationOpenAIRequest(modelName string, input *schema.OpenAIRequest, cl *services.ConfigLoader, ml *model.ModelLoader, startupOptions *schema.StartupOptions) (*schema.OpenAIResponse, error) {
-	id := uuid.New().String()
-	created := int(time.Now().Unix())
-
-	binding := func(config *schema.Config) *string {
-		return &config.TemplateConfig.Edit
-	}
-
-	config, err := prepareGenerationOpenAIRequest(binding, modelName, input, cl, ml, startupOptions)
-	if err != nil {
-		return nil, err
-	}
-
-	var result []schema.Choice
-	totalTokenUsage := TokenUsage{}
-
-	for _, i := range config.InputStrings {
-		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-		templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, config.TemplateConfig.Edit, model.PromptTemplateData{
-			Input:        i,
-			Instruction:  input.Instruction,
-			SystemPrompt: config.SystemPrompt,
-		})
-		if err == nil {
-			i = templatedInput
-			log.Debug().Msgf("Template found, input modified to: %s", i)
-		}
-
-		r, tokenUsage, err := ComputeChoices(input, i, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
-			*c = append(*c, schema.Choice{Text: s})
-		}, nil)
-		if err != nil {
-			return nil, err
-		}
-
-		totalTokenUsage.Prompt += tokenUsage.Prompt
-		totalTokenUsage.Completion += tokenUsage.Completion
-
-		result = append(result, r...)
-	}
-
-	return &schema.OpenAIResponse{
-		ID:      id,
-		Created: created,
-		Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
-		Choices: result,
-		Object:  "edit",
-		Usage: schema.OpenAIUsage{
-			PromptTokens:     totalTokenUsage.Prompt,
-			CompletionTokens: totalTokenUsage.Completion,
-			TotalTokens:      totalTokenUsage.Prompt + totalTokenUsage.Completion,
-		},
-	}, nil
-}
-
-func ChatGenerationOpenAIRequest(modelName string, input *schema.OpenAIRequest, cl *services.ConfigLoader, ml *model.ModelLoader, startupOptions *schema.StartupOptions) (*schema.OpenAIResponse, error) {
-
-	// DEFS
-	id := uuid.New().String()
-	created := int(time.Now().Unix())
-
-	// Prepare
-	config, predInput, processFunctions, err := prepareChatGenerationOpenAIRequest(modelName, input, cl, ml, startupOptions)
-	if err != nil {
-		return nil, err
-	}
-
-	result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
-		if processFunctions {
-			// As we have to change the result before processing, we can't stream the answer (yet?)
-			ss := map[string]interface{}{}
-			// This prevent newlines to break JSON parsing for clients
-			s = utils.EscapeNewLines(s)
-			json.Unmarshal([]byte(s), &ss)
-			log.Debug().Msgf("Function return: %s %+v", s, ss)
-
-			// The grammar defines the function name as "function", while OpenAI returns "name"
-			func_name := ss["function"]
-			// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
-			args := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
-			d, _ := json.Marshal(args)
-
-			ss["arguments"] = string(d)
-			ss["name"] = func_name
-
-			// if do nothing, reply with a message
-			if func_name == config.FunctionsConfig.NoActionFunctionName {
-				log.Debug().Msgf("nothing to do, computing a reply")
-
-				// If there is a message that the LLM already sends as part of the JSON reply, use it
-				arguments := map[string]interface{}{}
-				json.Unmarshal([]byte(d), &arguments)
-				m, exists := arguments["message"]
-				if exists {
-					switch message := m.(type) {
-					case string:
-						if message != "" {
-							log.Debug().Msgf("Reply received from LLM: %s", message)
-							message = Finetune(*config, predInput, message)
-							log.Debug().Msgf("Reply received from LLM(finetuned): %s", message)
-
-							*c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &message}})
-							return
-						}
-					}
-				}
-
-				log.Debug().Msgf("No action received from LLM, without a message, computing a reply")
-				// Otherwise ask the LLM to understand the JSON output and the context, and return a message
-				// Note: This costs (in term of CPU) another computation
-				config.Grammar = ""
-				images := []string{}
-				for _, m := range input.Messages {
-					images = append(images, m.StringImages...)
-				}
-				predFunc, err := ModelInference(input.Context, predInput, images, ml, *config, startupOptions, nil)
-				if err != nil {
-					log.Error().Msgf("inference error: %s", err.Error())
-					return
-				}
-
-				prediction, err := predFunc()
-				if err != nil {
-					log.Error().Msgf("inference error: %s", err.Error())
-					return
-				}
-
-				fineTunedResponse := Finetune(*config, predInput, prediction.Response)
-				*c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &fineTunedResponse}})
-			} else {
-				// otherwise reply with the function call
-				*c = append(*c, schema.Choice{
-					FinishReason: "function_call",
-					Message:      &schema.Message{Role: "assistant", FunctionCall: ss},
-				})
-			}
-
-			return
-		}
-		*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
-	}, nil)
-	if err != nil {
-		return nil, err
-	}
-
-	return &schema.OpenAIResponse{
-		ID:      id,
-		Created: created,
-		Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
-		Choices: result,
-		Object:  "chat.completion",
-		Usage: schema.OpenAIUsage{
-			PromptTokens:     tokenUsage.Prompt,
-			CompletionTokens: tokenUsage.Completion,
-			TotalTokens:      tokenUsage.Prompt + tokenUsage.Completion,
-		},
-	}, nil
-
-}
-
-func CompletionGenerationOpenAIRequest(modelName string, input *schema.OpenAIRequest, cl *services.ConfigLoader, ml *model.ModelLoader, startupOptions *schema.StartupOptions) (*schema.OpenAIResponse, error) {
-	// Prepare
-	id := uuid.New().String()
-	created := int(time.Now().Unix())
-
-	binding := func(config *schema.Config) *string {
-		return &config.TemplateConfig.Completion
-	}
-
-	config, err := prepareGenerationOpenAIRequest(binding, modelName, input, cl, ml, startupOptions)
-	if err != nil {
-		return nil, err
-	}
-
-	var result []schema.Choice
-
-	totalTokenUsage := TokenUsage{}
-
-	for k, i := range config.PromptStrings {
-		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-		templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, config.TemplateConfig.Completion, model.PromptTemplateData{
-			SystemPrompt: config.SystemPrompt,
-			Input:        i,
-		})
-		if err == nil {
-			i = templatedInput
-			log.Debug().Msgf("Template found, input modified to: %s", i)
-		}
-
-		r, tokenUsage, err := ComputeChoices(
-			input, i, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
-				*c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
-			}, nil)
-		if err != nil {
-			return nil, err
-		}
-
-		totalTokenUsage.Prompt += tokenUsage.Prompt
-		totalTokenUsage.Completion += tokenUsage.Completion
-
-		result = append(result, r...)
-	}
-
-	return &schema.OpenAIResponse{
-		ID:      id,
-		Created: created,
-		Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
-		Choices: result,
-		Object:  "text_completion",
-		Usage: schema.OpenAIUsage{
-			PromptTokens:     totalTokenUsage.Prompt,
-			CompletionTokens: totalTokenUsage.Completion,
-			TotalTokens:      totalTokenUsage.Prompt + totalTokenUsage.Completion,
-		},
-	}, nil
-}
-
-func StreamingChatGenerationOpenAIRequest(modelName string, input *schema.OpenAIRequest, cl *services.ConfigLoader, ml *model.ModelLoader, startupOptions *schema.StartupOptions) (chan schema.OpenAIResponse, error) {
-
-	// DEFS
-	emptyMessage := ""
-	id := uuid.New().String()
-	created := int(time.Now().Unix())
-
-	// Prepare
-	config, predInput, processFunctions, err := prepareChatGenerationOpenAIRequest(modelName, input, cl, ml, startupOptions)
-	if err != nil {
-		return nil, err
-	}
-
-	if processFunctions {
-		// TODO: unused variable means I did something wrong. investigate once stable
-		log.Debug().Msgf("StreamingChatGenerationOpenAIRequest with processFunctions=true for %s?", config.Name)
-	}
-
-	processor := func(s string, req *schema.OpenAIRequest, config *schema.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
-		initialMessage := schema.OpenAIResponse{
-			ID:      id,
-			Created: created,
-			Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-			Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
-			Object:  "chat.completion.chunk",
-		}
-		responses <- initialMessage
-
-		ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage TokenUsage) bool {
-			resp := schema.OpenAIResponse{
-				ID:      id,
-				Created: created,
-				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-				Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
-				Object:  "chat.completion.chunk",
-				Usage: schema.OpenAIUsage{
-					PromptTokens:     usage.Prompt,
-					CompletionTokens: usage.Completion,
-					TotalTokens:      usage.Prompt + usage.Completion,
-				},
-			}
-
-			responses <- resp
-			return true
-		})
-		close(responses)
-	}
-	log.Trace().Msg("StreamingChatGenerationOpenAIRequest :: About to create response channel")
-
-	responses := make(chan schema.OpenAIResponse)
-
-	log.Trace().Msg("StreamingChatGenerationOpenAIRequest :: About to start processor goroutine")
-
-	go processor(predInput, input, config, ml, responses)
-
-	log.Trace().Msg("StreamingChatGenerationOpenAIRequest :: DONE! successfully returning to caller!")
-
-	return responses, nil
-
-}
-
-func StreamingCompletionGenerationOpenAIRequest(modelName string, input *schema.OpenAIRequest, cl *services.ConfigLoader, ml *model.ModelLoader, startupOptions *schema.StartupOptions) (chan schema.OpenAIResponse, error) {
-	// DEFS
-	id := uuid.New().String()
-	created := int(time.Now().Unix())
-
-	binding := func(config *schema.Config) *string {
-		return &config.TemplateConfig.Completion
-	}
-
-	// Prepare
-
-	config, err := prepareGenerationOpenAIRequest(binding, modelName, input, cl, ml, startupOptions)
-	if err != nil {
-		return nil, err
-	}
-
-	processor := func(s string, req *schema.OpenAIRequest, config *schema.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
-		ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage TokenUsage) bool {
-			resp := schema.OpenAIResponse{
-				ID:      id,
-				Created: created,
-				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-				Choices: []schema.Choice{
-					{
-						Index: 0,
-						Text:  s,
-					},
-				},
-				Object: "text_completion",
-				Usage: schema.OpenAIUsage{
-					PromptTokens:     usage.Prompt,
-					CompletionTokens: usage.Completion,
-					TotalTokens:      usage.Prompt + usage.Completion,
-				},
-			}
-			log.Debug().Msgf("Sending goroutine: %s", s)
-
-			responses <- resp
-			return true
-		})
-		close(responses)
-	}
-
-	if len(config.PromptStrings) > 1 {
-		return nil, errors.New("cannot handle more than 1 `PromptStrings` when Streaming")
-
-	}
-
-	predInput := config.PromptStrings[0]
-
-	//A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-	templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, config.TemplateConfig.Completion, model.PromptTemplateData{
-		Input: predInput,
-	})
-	if err == nil {
-		predInput = templatedInput
-		log.Debug().Msgf("Template found, input modified to: %s", predInput)
-	}
-
-	log.Trace().Msg("StreamingCompletionGenerationOpenAIRequest :: About to create response channel")
-
-	responses := make(chan schema.OpenAIResponse)
-
-	log.Trace().Msg("StreamingCompletionGenerationOpenAIRequest :: About to start processor goroutine")
-
-	go processor(predInput, input, config, ml, responses)
-
-	log.Trace().Msg("StreamingCompletionGenerationOpenAIRequest :: DONE! successfully returning to caller!")
-
-	return responses, nil
-}
--- a/core/backend/options.go
+++ b/core/backend/options.go
@ -1,125 +0,0 @@
-package backend
-
-import (
-	"os"
-	"path/filepath"
-
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-)
-
-func modelOpts(c schema.Config, o *schema.StartupOptions, opts []model.Option) []model.Option {
-	if o.SingleBackend {
-		opts = append(opts, model.WithSingleActiveBackend())
-	}
-
-	if o.ParallelBackendRequests {
-		opts = append(opts, model.EnableParallelRequests)
-	}
-
-	if c.GRPC.Attempts != 0 {
-		opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
-	}
-
-	if c.GRPC.AttemptsSleepTime != 0 {
-		opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
-	}
-
-	for k, v := range o.ExternalGRPCBackends {
-		opts = append(opts, model.WithExternalBackend(k, v))
-	}
-
-	return opts
-}
-
-func gRPCModelOpts(c schema.Config) *pb.ModelOptions {
-	b := 512
-	if c.Batch != 0 {
-		b = c.Batch
-	}
-
-	return &pb.ModelOptions{
-		ContextSize:    int32(c.ContextSize),
-		Seed:           int32(c.Seed),
-		NBatch:         int32(b),
-		NoMulMatQ:      c.NoMulMatQ,
-		CUDA:           c.CUDA, // diffusers, transformers
-		DraftModel:     c.DraftModel,
-		AudioPath:      c.VallE.AudioPath,
-		Quantization:   c.Quantization,
-		MMProj:         c.MMProj,
-		YarnExtFactor:  c.YarnExtFactor,
-		YarnAttnFactor: c.YarnAttnFactor,
-		YarnBetaFast:   c.YarnBetaFast,
-		YarnBetaSlow:   c.YarnBetaSlow,
-		LoraAdapter:    c.LoraAdapter,
-		LoraBase:       c.LoraBase,
-		LoraScale:      c.LoraScale,
-		NGQA:           c.NGQA,
-		RMSNormEps:     c.RMSNormEps,
-		F16Memory:      c.F16,
-		MLock:          c.MMlock,
-		RopeFreqBase:   c.RopeFreqBase,
-		RopeFreqScale:  c.RopeFreqScale,
-		NUMA:           c.NUMA,
-		Embeddings:     c.Embeddings,
-		LowVRAM:        c.LowVRAM,
-		NGPULayers:     int32(c.NGPULayers),
-		MMap:           c.MMap,
-		MainGPU:        c.MainGPU,
-		Threads:        int32(c.Threads),
-		TensorSplit:    c.TensorSplit,
-		// AutoGPTQ
-		ModelBaseName:    c.AutoGPTQ.ModelBaseName,
-		Device:           c.AutoGPTQ.Device,
-		UseTriton:        c.AutoGPTQ.Triton,
-		UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer,
-		// RWKV
-		Tokenizer: c.Tokenizer,
-	}
-}
-
-func gRPCPredictOpts(c schema.Config, modelPath string) *pb.PredictOptions {
-	promptCachePath := ""
-	if c.PromptCachePath != "" {
-		p := filepath.Join(modelPath, c.PromptCachePath)
-		os.MkdirAll(filepath.Dir(p), 0755)
-		promptCachePath = p
-	}
-	return &pb.PredictOptions{
-		Temperature:         float32(c.Temperature),
-		TopP:                float32(c.TopP),
-		NDraft:              c.NDraft,
-		TopK:                int32(c.TopK),
-		Tokens:              int32(c.Maxtokens),
-		Threads:             int32(c.Threads),
-		PromptCacheAll:      c.PromptCacheAll,
-		PromptCacheRO:       c.PromptCacheRO,
-		PromptCachePath:     promptCachePath,
-		F16KV:               c.F16,
-		DebugMode:           c.Debug,
-		Grammar:             c.Grammar,
-		NegativePromptScale: c.NegativePromptScale,
-		RopeFreqBase:        c.RopeFreqBase,
-		RopeFreqScale:       c.RopeFreqScale,
-		NegativePrompt:      c.NegativePrompt,
-		Mirostat:            int32(c.LLMConfig.Mirostat),
-		MirostatETA:         float32(c.LLMConfig.MirostatETA),
-		MirostatTAU:         float32(c.LLMConfig.MirostatTAU),
-		Debug:               c.Debug,
-		StopPrompts:         c.StopWords,
-		Repeat:              int32(c.RepeatPenalty),
-		NKeep:               int32(c.Keep),
-		Batch:               int32(c.Batch),
-		IgnoreEOS:           c.IgnoreEOS,
-		Seed:                int32(c.Seed),
-		FrequencyPenalty:    float32(c.FrequencyPenalty),
-		MLock:               c.MMlock,
-		MMap:                c.MMap,
-		MainGPU:             c.MainGPU,
-		TensorSplit:         c.TensorSplit,
-		TailFreeSamplingZ:   float32(c.TFZ),
-		TypicalP:            float32(c.TypicalP),
-	}
-}
--- a/core/backend/transcription.go
+++ b/core/backend/transcription.go
@ -1,52 +0,0 @@
-package backend
-
-import (
-	"context"
-	"fmt"
-
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-)
-
-func ModelTranscription(audio, language string, loader *model.ModelLoader, c schema.Config, o *schema.StartupOptions) (*schema.WhisperResult, error) {
-
-	opts := modelOpts(c, o, []model.Option{
-		model.WithBackendString(model.WhisperBackend),
-		model.WithModel(c.Model),
-		model.WithContext(o.Context),
-		model.WithThreads(uint32(c.Threads)),
-		model.WithAssetDir(o.AssetsDestination),
-		model.WithExternalBackends(o.ExternalGRPCBackends, false),
-	})
-
-	whisperModel, err := loader.BackendLoader(opts...)
-	if err != nil {
-		return nil, err
-	}
-
-	if whisperModel == nil {
-		return nil, fmt.Errorf("could not load whisper model")
-	}
-
-	return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
-		Dst:      audio,
-		Language: language,
-		Threads:  uint32(c.Threads),
-	})
-}
-
-func TranscriptionOpenAIRequest(modelName string, input *schema.OpenAIRequest, audioFilePath string, cl *services.ConfigLoader, ml *model.ModelLoader, startupOptions *schema.StartupOptions) (*schema.WhisperResult, error) {
-	config, input, err := ReadConfigFromFileAndCombineWithOpenAIRequest(modelName, input, cl, startupOptions)
-	if err != nil {
-		return nil, fmt.Errorf("failed reading parameters from request:%w", err)
-	}
-
-	tr, err := ModelTranscription(audioFilePath, input.Language, ml, *config, startupOptions)
-	if err != nil {
-		return nil, err
-	}
-
-	return tr, nil
-}
--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@ -1,79 +0,0 @@
-package backend
-
-import (
-	"context"
-	"fmt"
-	"os"
-	"path/filepath"
-
-	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/schema"
-	"github.com/go-skynet/LocalAI/pkg/utils"
-)
-
-func generateUniqueFileName(dir, baseName, ext string) string {
-	counter := 1
-	fileName := baseName + ext
-
-	for {
-		filePath := filepath.Join(dir, fileName)
-		_, err := os.Stat(filePath)
-		if os.IsNotExist(err) {
-			return fileName
-		}
-
-		counter++
-		fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
-	}
-}
-
-func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *schema.StartupOptions) (string, *proto.Result, error) {
-	bb := backend
-	if bb == "" {
-		bb = model.PiperBackend
-	}
-	opts := modelOpts(schema.Config{}, o, []model.Option{
-		model.WithBackendString(bb),
-		model.WithModel(modelFile),
-		model.WithContext(o.Context),
-		model.WithAssetDir(o.AssetsDestination),
-		model.WithExternalBackends(o.ExternalGRPCBackends, false),
-	})
-	piperModel, err := loader.BackendLoader(opts...)
-	if err != nil {
-		return "", nil, err
-	}
-
-	if piperModel == nil {
-		return "", nil, fmt.Errorf("could not load piper model")
-	}
-
-	if err := os.MkdirAll(o.AudioDir, 0755); err != nil {
-		return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
-	}
-
-	fileName := generateUniqueFileName(o.AudioDir, "piper", ".wav")
-	filePath := filepath.Join(o.AudioDir, fileName)
-
-	// If the model file is not empty, we pass it joined with the model path
-	modelPath := ""
-	if modelFile != "" {
-		if bb != model.TransformersMusicGen {
-			modelPath = filepath.Join(o.ModelPath, modelFile)
-			if err := utils.VerifyPath(modelPath, o.ModelPath); err != nil {
-				return "", nil, err
-			}
-		} else {
-			modelPath = modelFile
-		}
-	}
-
-	res, err := piperModel.TTS(context.Background(), &proto.TTSRequest{
-		Text:  text,
-		Model: modelPath,
-		Dst:   filePath,
-	})
-
-	return filePath, res, err
-}