wire things up

2025-06-29 22:20:43 +00:00 · 2023-04-29 08:23:45 +02:00 · 2023-04-29 08:23:45 +02:00 · 44c0f341a2
commit 44c0f341a2
parent b08a1701dd
3 changed files with 129 additions and 159 deletions
--- a/api/api.go
+++ b/api/api.go
@ -61,14 +61,14 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16
 	app.Use(cors.New())
 	// openAI compatible API endpoint
-	app.Post("/v1/chat/completions", openAIEndpoint(cm, ChatEndpoint, debug, loader, threads, ctxSize, f16))
+	app.Post("/v1/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
-	app.Post("/chat/completions", openAIEndpoint(cm, ChatEndpoint, debug, loader, threads, ctxSize, f16))
+	app.Post("/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
-	app.Post("/v1/edits", openAIEndpoint(cm, EditEndpoint, debug, loader, threads, ctxSize, f16))
+	app.Post("/v1/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
-	app.Post("/edits", openAIEndpoint(cm, EditEndpoint, debug, loader, threads, ctxSize, f16))
+	app.Post("/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
-	app.Post("/v1/completions", openAIEndpoint(cm, CompletionEndpoint, debug, loader, threads, ctxSize, f16))
+	app.Post("/v1/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
-	app.Post("/completions", openAIEndpoint(cm, CompletionEndpoint, debug, loader, threads, ctxSize, f16))
+	app.Post("/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
 	app.Get("/v1/models", listModels(loader, cm))
 	app.Get("/models", listModels(loader, cm))
--- a/api/openai.go
+++ b/api/openai.go
@ -6,7 +6,6 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"regexp"
 	"strings"
 	model "github.com/go-skynet/LocalAI/pkg/model"
@ -146,19 +145,11 @@ func updateConfig(config *Config, input *OpenAIRequest) {
 	}
 }
 type EndpointType uint8
 const (
 	ChatEndpoint       EndpointType = iota
 	CompletionEndpoint EndpointType = iota
 	EditEndpoint       EndpointType = iota
 )
 func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
 	input := new(OpenAIRequest)
 	// Get input data from the request body
 	if err := c.BodyParser(input); err != nil {
-		return err
+		return nil, nil, err
 	}
 	modelFile := input.Model
@ -178,7 +169,7 @@ func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug
 			log.Debug().Msgf("No model specified, using: %s", modelFile)
 		} else {
 			log.Debug().Msgf("No model specified, returning error")
-			return nil, fmt.Errorf("no model specified")
+			return nil, nil, fmt.Errorf("no model specified")
 		}
 	}
@ -192,7 +183,7 @@ func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug
 	modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
 	if _, err := os.Stat(modelConfig); err == nil {
 		if err := cm.LoadConfig(modelConfig); err != nil {
-			return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
+			return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
 		}
 	}
@ -226,6 +217,7 @@ func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug
 	return config, input, nil
 }
 // https://platform.openai.com/docs/api-reference/completions
 func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
@ -251,32 +243,13 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
 			log.Debug().Msgf("Template found, input modified to: %s", predInput)
 		}
-		result := []Choice{}
+		result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
-
+			*c = append(*c, Choice{Text: s})
-		n := input.N
+		})
 		if input.N == 0 {
 			n = 1
 		}
 		// get the model function to call for the result
 		predFunc, err := ModelInference(predInput, loader, *config)
 		if err != nil {
 			return err
 		}
 		for i := 0; i < n; i++ {
 			prediction, err := predFunc()
 			if err != nil {
 				return err
 			}
 			prediction = Finetune(*config, predInput, prediction)
 			result = append(result, Choice{Text: prediction})
 		}
 		resp := &OpenAIResponse{
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
@ -291,18 +264,29 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
 	}
 }
-// https://platform.openai.com/docs/api-reference/completions
+func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
 func openAIEndpoint(cm ConfigMerger, endpointType EndpointType, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
-		chat := endpointType == ChatEndpoint
+		log.Debug().Msgf("Parameter Config: %+v", config)
-		completion := endpointType == CompletionEndpoint
+
-		edit := endpointType == EditEndpoint
+		predInput := input.Prompt
 		mess := []string{}
 		for _, i := range input.Messages {
 			r := config.Roles[i.Role]
 			if r == "" {
 				r = i.Role
 			}
 			content := fmt.Sprint(r, " ", i.Content)
 			mess = append(mess, content)
 		}
 		predInput = strings.Join(mess, "\n")
 		if input.Stream {
 			log.Debug().Msgf("Stream request received")
@ -313,133 +297,42 @@ func openAIEndpoint(cm ConfigMerger, endpointType EndpointType, debug bool, load
 			c.Set("Transfer-Encoding", "chunked")
 		}
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		predInput := input.Prompt
 		if chat {
 			mess := []string{}
 			for _, i := range input.Messages {
 				r := config.Roles[i.Role]
 				if r == "" {
 					r = i.Role
 				}
 				content := fmt.Sprint(r, " ", i.Content)
 				mess = append(mess, content)
 			}
 			predInput = strings.Join(mess, "\n")
 		}
 		templateFile := config.Model
-		switch {
+		if config.TemplateConfig.Chat != "" {
 		case config.TemplateConfig.Chat != "" && chat:
 			templateFile = config.TemplateConfig.Chat
 		case config.TemplateConfig.Completion != "" && completion:
 			templateFile = config.TemplateConfig.Completion
 		case config.TemplateConfig.Edit != "" && edit:
 			templateFile = config.TemplateConfig.Edit
 		}
-		if edit {
+		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-			e := ""
+		templatedInput, err := loader.TemplatePrefix(templateFile, struct {
-			if config.TemplateConfig.Edit == "" {
+			Input string
-				e = ".edit"
+		}{Input: predInput})
-			}
+		if err == nil {
-			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+			predInput = templatedInput
-			templatedInput, err := loader.TemplatePrefix(templateFile+e, struct {
+			log.Debug().Msgf("Template found, input modified to: %s", predInput)
 				Input       string
 				Instruction string
 			}{Input: input.Input, Instruction: input.Instruction})
 			if err == nil {
 				predInput = templatedInput
 				log.Debug().Msgf("Template found, input modified to: %s", predInput)
 			}
 		} else {
 			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 			templatedInput, err := loader.TemplatePrefix(templateFile, struct {
 				Input string
 			}{Input: predInput})
 			if err == nil {
 				predInput = templatedInput
 				log.Debug().Msgf("Template found, input modified to: %s", predInput)
 			}
 		}
-		result := []Choice{}
+		result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
-
+			if input.Stream {
-		n := input.N
+				*c = append(*c, Choice{Delta: &Message{Role: "assistant", Content: s}})
-
+			} else {
-		if input.N == 0 {
+				*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}})
-			n = 1
+			}
-		}
+		})
 		// get the model function to call for the result
 		predFunc, err := ModelInference(predInput, loader, *config)
 		if err != nil {
 			return err
 		}
 		finetunePrediction := func(prediction string) string {
 			if config.Echo {
 				prediction = predInput + prediction
 			}
 			for _, c := range config.Cutstrings {
 				mu.Lock()
 				reg, ok := cutstrings[c]
 				if !ok {
 					cutstrings[c] = regexp.MustCompile(c)
 					reg = cutstrings[c]
 				}
 				mu.Unlock()
 				prediction = reg.ReplaceAllString(prediction, "")
 			}
 			for _, c := range config.TrimSpace {
 				prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
 			}
 			return prediction
 		}
 		for i := 0; i < n; i++ {
 			prediction, err := predFunc()
 			if err != nil {
 				return err
 			}
 			prediction = finetunePrediction(prediction)
 			if chat {
 				if input.Stream {
 					result = append(result, Choice{Delta: &Message{Role: "assistant", Content: prediction}})
 				} else {
 					result = append(result, Choice{Message: &Message{Role: "assistant", Content: prediction}})
 				}
 			} else {
 				result = append(result, Choice{Text: prediction})
 			}
 		}
 		resp := &OpenAIResponse{
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
 			Object:  "chat.completion",
 		}
 		if input.Stream && chat {
 			resp.Object = "chat.completion.chunk"
 		} else if chat {
 			resp.Object = "chat.completion"
 		} else if edit {
 			resp.Object = "edit"
 		} else {
 			resp.Object = "text_completion"
 		}
 		jsonResult, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", jsonResult)
 		if input.Stream {
 			resp.Object = "chat.completion.chunk"
 			jsonResult, _ := json.Marshal(resp)
 			log.Debug().Msgf("Response: %s", jsonResult)
 			log.Debug().Msgf("Handling stream request")
 			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
 				fmt.Fprintf(w, "event: data\n")
@ -464,10 +357,57 @@ func openAIEndpoint(cm ConfigMerger, endpointType EndpointType, debug bool, load
 				//		w.Flush()
 			}))
 			return nil
 		} else {
 			// Return the prediction in the response body
 			return c.JSON(resp)
 		}
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
 func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		predInput := input.Input
 		templateFile := config.Model
 		if config.TemplateConfig.Edit != "" {
 			templateFile = config.TemplateConfig.Edit
 		}
 		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 		templatedInput, err := loader.TemplatePrefix(templateFile, struct {
 			Input       string
 			Instruction string
 		}{Input: predInput, Instruction: input.Instruction})
 		if err == nil {
 			predInput = templatedInput
 			log.Debug().Msgf("Template found, input modified to: %s", predInput)
 		}
 		result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
 			*c = append(*c, Choice{Text: s})
 		})
 		if err != nil {
 			return err
 		}
 		resp := &OpenAIResponse{
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
 			Object:  "edit",
 		}
 		jsonResult, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", jsonResult)
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
--- a/api/prediction.go
+++ b/api/prediction.go
@ -189,6 +189,36 @@ func ModelInference(s string, loader *model.ModelLoader, c Config) (func() (stri
 	}, nil
 }
 func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, loader *model.ModelLoader, cb func(string, *[]Choice)) ([]Choice, error) {
 	result := []Choice{}
 	n := input.N
 	if input.N == 0 {
 		n = 1
 	}
 	// get the model function to call for the result
 	predFunc, err := ModelInference(predInput, loader, *config)
 	if err != nil {
 		return result, err
 	}
 	for i := 0; i < n; i++ {
 		prediction, err := predFunc()
 		if err != nil {
 			return result, err
 		}
 		prediction = Finetune(*config, predInput, prediction)
 		cb(prediction, &result)
 		//result = append(result, Choice{Text: prediction})
 	}
 	return result, err
 }
 var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
 var mu sync.Mutex = sync.Mutex{}