[Refactor]: Core/API Split (#1506)

Refactors api folder to core, creates firm split between backend code and api frontend.
2025-05-31 07:54:59 +00:00 · 2024-01-05 09:34:56 -05:00 · 2024-01-05 09:34:56 -05:00 · ab7b4d5ee9
commit ab7b4d5ee9
parent bcf02449b3
77 changed files with 3441 additions and 3117 deletions
--- a/core/http/endpoints/localai/backend_monitor.go
+++ b/core/http/endpoints/localai/backend_monitor.go
@ -0,0 +1,34 @@
+package localai
+
+import (
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/go-skynet/LocalAI/pkg/schema"
+	"github.com/gofiber/fiber/v2"
+)
+
+func BackendMonitorEndpoint(bm *services.BackendMonitor) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		input := new(schema.BackendMonitorRequest)
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		resp, err := bm.CheckAndSample(input.Model)
+		if err != nil {
+			return err
+		}
+		return c.JSON(resp)
+	}
+}
+
+func BackendShutdownEndpoint(bm *services.BackendMonitor) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		input := new(schema.BackendMonitorRequest)
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+		return bm.ShutdownModel(input.Model)
+	}
+}
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@ -0,0 +1,148 @@
+package localai
+
+import (
+	"encoding/json"
+	"fmt"
+	"slices"
+
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/go-skynet/LocalAI/pkg/gallery"
+	"github.com/gofiber/fiber/v2"
+	"github.com/google/uuid"
+	"github.com/rs/zerolog/log"
+)
+
+/// Endpoint Service
+
+type ModelGalleryEndpointService struct {
+	galleries      []gallery.Gallery
+	modelPath      string
+	galleryApplier *services.GalleryApplier
+}
+
+type GalleryModel struct {
+	ID string `json:"id"`
+	gallery.GalleryModel
+}
+
+func CreateModelGalleryEndpointService(galleries []gallery.Gallery, modelPath string, galleryApplier *services.GalleryApplier) ModelGalleryEndpointService {
+	return ModelGalleryEndpointService{
+		galleries:      galleries,
+		modelPath:      modelPath,
+		galleryApplier: galleryApplier,
+	}
+}
+
+func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		status := mgs.galleryApplier.GetStatus(c.Params("uuid"))
+		if status == nil {
+			return fmt.Errorf("could not find any status for ID")
+		}
+		return c.JSON(status)
+	}
+}
+
+func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		return c.JSON(mgs.galleryApplier.GetAllStatus())
+	}
+}
+
+func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		input := new(GalleryModel)
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		uuid, err := uuid.NewUUID()
+		if err != nil {
+			return err
+		}
+		mgs.galleryApplier.C <- gallery.GalleryOp{
+			Req:         input.GalleryModel,
+			Id:          uuid.String(),
+			GalleryName: input.ID,
+			Galleries:   mgs.galleries,
+		}
+		return c.JSON(struct {
+			ID        string `json:"uuid"`
+			StatusURL string `json:"status"`
+		}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
+	}
+}
+
+func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
+
+		models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
+		if err != nil {
+			return err
+		}
+		log.Debug().Msgf("Models found from galleries: %+v", models)
+		for _, m := range models {
+			log.Debug().Msgf("Model found from galleries: %+v", m)
+		}
+		dat, err := json.Marshal(models)
+		if err != nil {
+			return err
+		}
+		return c.Send(dat)
+	}
+}
+
+// NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
+func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		log.Debug().Msgf("Listing model galleries %+v", mgs.galleries)
+		dat, err := json.Marshal(mgs.galleries)
+		if err != nil {
+			return err
+		}
+		return c.Send(dat)
+	}
+}
+
+func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		input := new(gallery.Gallery)
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+		if slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
+			return gallery.Name == input.Name
+		}) {
+			return fmt.Errorf("%s already exists", input.Name)
+		}
+		dat, err := json.Marshal(mgs.galleries)
+		if err != nil {
+			return err
+		}
+		log.Debug().Msgf("Adding %+v to gallery list", *input)
+		mgs.galleries = append(mgs.galleries, *input)
+		return c.Send(dat)
+	}
+}
+
+func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		input := new(gallery.Gallery)
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+		if !slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
+			return gallery.Name == input.Name
+		}) {
+			return fmt.Errorf("%s is not currently registered", input.Name)
+		}
+		mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
+			return gallery.Name == input.Name
+		})
+		return c.Send(nil)
+	}
+}
--- a/core/http/endpoints/localai/metrics.go
+++ b/core/http/endpoints/localai/metrics.go
@ -0,0 +1,42 @@
+package localai
+
+import (
+	"time"
+
+	"github.com/go-skynet/LocalAI/pkg/schema"
+	"github.com/gofiber/fiber/v2"
+	"github.com/gofiber/fiber/v2/middleware/adaptor"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
+)
+
+func MetricsHandler() fiber.Handler {
+	return adaptor.HTTPHandler(promhttp.Handler())
+}
+
+type apiMiddlewareConfig struct {
+	Filter  func(c *fiber.Ctx) bool
+	metrics *schema.LocalAIMetrics
+}
+
+func MetricsAPIMiddleware(metrics *schema.LocalAIMetrics) fiber.Handler {
+	cfg := apiMiddlewareConfig{
+		metrics: metrics,
+		Filter: func(c *fiber.Ctx) bool {
+			return c.Path() == "/metrics"
+		},
+	}
+
+	return func(c *fiber.Ctx) error {
+		if cfg.Filter != nil && cfg.Filter(c) {
+			return c.Next()
+		}
+		path := c.Path()
+		method := c.Method()
+
+		start := time.Now()
+		err := c.Next()
+		elapsed := float64(time.Since(start)) / float64(time.Second)
+		cfg.metrics.ObserveAPICall(method, path, elapsed)
+		return err
+	}
+}
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@ -0,0 +1,25 @@
+package localai
+
+import (
+	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/go-skynet/LocalAI/pkg/schema"
+	"github.com/gofiber/fiber/v2"
+)
+
+func TTSEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader, so *schema.StartupOptions) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		input := new(schema.TTSRequest)
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		filePath, _, err := backend.ModelTTS(input.Backend, input.Input, input.Model, ml, so)
+		if err != nil {
+			return err
+		}
+		return c.Download(filePath)
+	}
+}
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@ -0,0 +1,97 @@
+package openai
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"fmt"
+
+	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/go-skynet/LocalAI/pkg/schema"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+	"github.com/valyala/fasthttp"
+)
+
+func ChatEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader, startupOptions *schema.StartupOptions) func(c *fiber.Ctx) error {
+
+	emptyMessage := ""
+
+	return func(c *fiber.Ctx) error {
+		modelName, input, err := readInput(c, startupOptions, ml, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		// The scary comment I feel like I forgot about along the way:
+		//
+		// functions are not supported in stream mode (yet?)
+		//
+		if input.Stream {
+			log.Debug().Msgf("Stream request received")
+			c.Context().SetContentType("text/event-stream")
+			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
+			//	c.Set("Content-Type", "text/event-stream")
+			c.Set("Cache-Control", "no-cache")
+			c.Set("Connection", "keep-alive")
+			c.Set("Transfer-Encoding", "chunked")
+
+			responses, err := backend.StreamingChatGenerationOpenAIRequest(modelName, input, cl, ml, startupOptions)
+			if err != nil {
+				return fmt.Errorf("failed establishing streaming chat request :%w", err)
+			}
+			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
+				usage := &schema.OpenAIUsage{}
+				id := ""
+				created := 0
+				for ev := range responses {
+					usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it
+					id = ev.ID
+					created = ev.Created // Similarly, grab the ID and created from any / the last response so we can use it for the stop
+					var buf bytes.Buffer
+					enc := json.NewEncoder(&buf)
+					enc.Encode(ev)
+					log.Debug().Msgf("Sending chunk: %s", buf.String())
+					_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
+					if err != nil {
+						log.Debug().Msgf("Sending chunk failed: %v", err)
+						input.Cancel()
+						break
+					}
+					w.Flush()
+				}
+
+				resp := &schema.OpenAIResponse{
+					ID:      id,
+					Created: created,
+					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+					Choices: []schema.Choice{
+						{
+							FinishReason: "stop",
+							Index:        0,
+							Delta:        &schema.Message{Content: &emptyMessage},
+						}},
+					Object: "chat.completion.chunk",
+					Usage:  *usage,
+				}
+				respData, _ := json.Marshal(resp)
+
+				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
+				w.WriteString("data: [DONE]\n\n")
+				w.Flush()
+			}))
+			return nil
+		}
+		//////////////////////////////////////////
+
+		resp, err := backend.ChatGenerationOpenAIRequest(modelName, input, cl, ml, startupOptions)
+		if err != nil {
+			return fmt.Errorf("error generating chat request: +%w", err)
+		}
+		respData, _ := json.Marshal(resp) // TODO this is only used for the debug log and costs performance. monitor this?
+		log.Debug().Msgf("Response: %s", respData)
+		return c.JSON(resp)
+	}
+}
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@ -0,0 +1,91 @@
+package openai
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"time"
+
+	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/go-skynet/LocalAI/pkg/schema"
+	"github.com/gofiber/fiber/v2"
+	"github.com/google/uuid"
+	"github.com/rs/zerolog/log"
+	"github.com/valyala/fasthttp"
+)
+
+// https://platform.openai.com/docs/api-reference/completions
+func CompletionEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader, so *schema.StartupOptions) func(c *fiber.Ctx) error {
+	id := uuid.New().String()
+	created := int(time.Now().Unix())
+
+	return func(c *fiber.Ctx) error {
+		modelName, input, err := readInput(c, so, ml, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		log.Debug().Msgf("`input`: %+v", input)
+
+		if input.Stream {
+			log.Debug().Msgf("Stream request received")
+			c.Context().SetContentType("text/event-stream")
+			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
+			//c.Set("Content-Type", "text/event-stream")
+			c.Set("Cache-Control", "no-cache")
+			c.Set("Connection", "keep-alive")
+			c.Set("Transfer-Encoding", "chunked")
+
+			responses, err := backend.StreamingCompletionGenerationOpenAIRequest(modelName, input, cl, ml, so)
+			if err != nil {
+				return fmt.Errorf("failed establishing streaming completion request :%w", err)
+			}
+			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
+
+				for ev := range responses {
+					var buf bytes.Buffer
+					enc := json.NewEncoder(&buf)
+					enc.Encode(ev)
+
+					log.Debug().Msgf("Sending chunk: %s", buf.String())
+					fmt.Fprintf(w, "data: %v\n", buf.String())
+					w.Flush()
+				}
+
+				resp := &schema.OpenAIResponse{
+					ID:      id,
+					Created: created,
+					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+					Choices: []schema.Choice{
+						{
+							Index:        0,
+							FinishReason: "stop",
+						},
+					},
+					Object: "text_completion",
+				}
+				respData, _ := json.Marshal(resp)
+
+				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
+				w.WriteString("data: [DONE]\n\n")
+				w.Flush()
+			}))
+			return nil
+		}
+
+		///////////
+
+		resp, err := backend.CompletionGenerationOpenAIRequest(modelName, input, cl, ml, so)
+		if err != nil {
+			return fmt.Errorf("error generating completion request: +%w", err)
+		}
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@ -0,0 +1,34 @@
+package openai
+
+import (
+	"encoding/json"
+	"fmt"
+
+	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/go-skynet/LocalAI/pkg/schema"
+	"github.com/gofiber/fiber/v2"
+
+	"github.com/rs/zerolog/log"
+)
+
+func EditEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader, so *schema.StartupOptions) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		modelFile, input, err := readInput(c, so, ml, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		resp, err := backend.EditGenerationOpenAIRequest(modelFile, input, cl, ml, so)
+		if err != nil {
+			return err
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
--- a/core/http/endpoints/openai/embeddings.go
+++ b/core/http/endpoints/openai/embeddings.go
@ -0,0 +1,35 @@
+package openai
+
+import (
+	"encoding/json"
+	"fmt"
+
+	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/go-skynet/LocalAI/pkg/schema"
+
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+)
+
+// https://platform.openai.com/docs/api-reference/embeddings
+func EmbeddingsEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader, so *schema.StartupOptions) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		modelFile, input, err := readInput(c, so, ml, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		resp, err := backend.EmbeddingOpenAIRequest(modelFile, input, cl, ml, so)
+		if err != nil {
+			return err
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@ -0,0 +1,48 @@
+package openai
+
+import (
+	"encoding/json"
+	"fmt"
+
+	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/go-skynet/LocalAI/pkg/schema"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+)
+
+// https://platform.openai.com/docs/api-reference/images/create
+
+/*
+*
+
+	curl http://localhost:8080/v1/images/generations \
+	  -H "Content-Type: application/json" \
+	  -d '{
+	    "prompt": "A cute baby sea otter",
+	    "n": 1,
+	    "size": "512x512"
+	  }'
+
+*
+*/
+func ImageEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader, so *schema.StartupOptions) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		modelName, input, err := readInput(c, so, ml, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		resp, err := backend.ImageGenerationOpenAIRequest(modelName, input, cl, ml, so)
+		if err != nil {
+			return fmt.Errorf("error generating image request: +%w", err)
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
--- a/core/http/endpoints/openai/list.go
+++ b/core/http/endpoints/openai/list.go
@ -0,0 +1,69 @@
+package openai
+
+import (
+	"regexp"
+
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/go-skynet/LocalAI/pkg/schema"
+	"github.com/gofiber/fiber/v2"
+)
+
+func ListModelsEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		models, err := ml.ListModels()
+		if err != nil {
+			return err
+		}
+		var mm map[string]interface{} = map[string]interface{}{}
+
+		openAIModels := []schema.OpenAIModel{}
+
+		var filterFn func(name string) bool
+		filter := c.Query("filter")
+
+		// If filter is not specified, do not filter the list by model name
+		if filter == "" {
+			filterFn = func(_ string) bool { return true }
+		} else {
+			// If filter _IS_ specified, we compile it to a regex which is used to create the filterFn
+			rxp, err := regexp.Compile(filter)
+			if err != nil {
+				return err
+			}
+			filterFn = func(name string) bool {
+				return rxp.MatchString(name)
+			}
+		}
+
+		// By default, exclude any loose files that are already referenced by a configuration file.
+		excludeConfigured := c.QueryBool("excludeConfigured", true)
+
+		// Start with the known configurations
+		for _, c := range cl.GetAllConfigs() {
+			if excludeConfigured {
+				mm[c.Model] = nil
+			}
+
+			if filterFn(c.Name) {
+				openAIModels = append(openAIModels, schema.OpenAIModel{ID: c.Name, Object: "model"})
+			}
+		}
+
+		// Then iterate through the loose files:
+		for _, m := range models {
+			// And only adds them if they shouldn't be skipped.
+			if _, exists := mm[m]; !exists && filterFn(m) {
+				openAIModels = append(openAIModels, schema.OpenAIModel{ID: m, Object: "model"})
+			}
+		}
+
+		return c.JSON(struct {
+			Object string               `json:"object"`
+			Data   []schema.OpenAIModel `json:"data"`
+		}{
+			Object: "list",
+			Data:   openAIModels,
+		})
+	}
+}
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@ -0,0 +1,57 @@
+package openai
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/go-skynet/LocalAI/pkg/schema"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+)
+
+func readInput(c *fiber.Ctx, o *schema.StartupOptions, ml *model.ModelLoader, randomModel bool) (string, *schema.OpenAIRequest, error) {
+	input := new(schema.OpenAIRequest)
+	ctx, cancel := context.WithCancel(o.Context)
+	input.Context = ctx
+	input.Cancel = cancel
+	// Get input data from the request body
+	if err := c.BodyParser(input); err != nil {
+		return "", nil, fmt.Errorf("failed parsing request body: %w", err)
+	}
+
+	modelFile := input.Model
+
+	if c.Params("model") != "" {
+		modelFile = c.Params("model")
+	}
+
+	received, _ := json.Marshal(input)
+
+	log.Debug().Msgf("Request received: %s", string(received))
+
+	// Set model from bearer token, if available
+	bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
+	bearerExists := bearer != "" && ml.ExistsInModelPath(bearer)
+
+	// If no model was specified, take the first available
+	if modelFile == "" && !bearerExists && randomModel {
+		models, _ := ml.ListModels()
+		if len(models) > 0 {
+			modelFile = models[0]
+			log.Debug().Msgf("No model specified, using: %s", modelFile)
+		} else {
+			log.Debug().Msgf("No model specified, returning error")
+			return "", nil, fmt.Errorf("no model specified")
+		}
+	}
+
+	// If a model is found in bearer token takes precedence
+	if bearerExists {
+		log.Debug().Msgf("Using model from bearer token: %s", bearer)
+		modelFile = bearer
+	}
+	return modelFile, input, nil
+}
--- a/core/http/endpoints/openai/transcription.go
+++ b/core/http/endpoints/openai/transcription.go
@ -0,0 +1,49 @@
+package openai
+
+import (
+	"fmt"
+	"net/http"
+	"os"
+	"path"
+
+	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/go-skynet/LocalAI/pkg/schema"
+	"github.com/go-skynet/LocalAI/pkg/utils"
+
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+)
+
+// https://platform.openai.com/docs/api-reference/audio/create
+func TranscriptEndpoint(cl *services.ConfigLoader, ml *model.ModelLoader, so *schema.StartupOptions) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		modelName, input, err := readInput(c, so, ml, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		// retrieve the file data from the request
+		file, err := c.FormFile("file")
+		if err != nil {
+			return err
+		}
+
+		dst, err := utils.CreateTempFileFromMultipartFile(file, "", "transcription") // 3rd param formerly whisper
+		if err != nil {
+			return err
+		}
+
+		log.Debug().Msgf("Audio file copied to: %+v", dst)
+		defer os.RemoveAll(path.Dir(dst))
+
+		tr, err := backend.TranscriptionOpenAIRequest(modelName, input, dst, cl, ml, so)
+		if err != nil {
+			return fmt.Errorf("error generating transcription request: +%w", err)
+		}
+		log.Debug().Msgf("Trascribed: %+v", tr)
+		// TODO: handle different outputs here
+		return c.Status(http.StatusOK).JSON(tr)
+	}
+}