feat(template): read jinja templates from gguf files (#4332)

* Read jinja templates as fallback Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Move templating out of model loader Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Test TemplateMessages Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Set role and content from transformers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Tests: be more flexible Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * More jinja Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Small refactoring and adaptations Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-28 14:35:00 +00:00 · 2024-12-08 13:50:33 +01:00 · 2024-12-08 13:50:33 +01:00 · cea5a0ea42
commit cea5a0ea42
parent f5e1527a5a
23 changed files with 971 additions and 785 deletions
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@ -14,6 +14,8 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
+	"github.com/mudler/LocalAI/pkg/templates"
+
 	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
@ -24,7 +26,7 @@ import (
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/chat/completions [post]
-func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	var id, textContentToReturn string
 	var created int

@ -298,148 +300,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		// If we are using the tokenizer template, we don't need to process the messages
 		// unless we are processing functions
 		if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn {
-			suppressConfigSystemPrompt := false
-			mess := []string{}
-			for messageIndex, i := range input.Messages {
-				var content string
-				role := i.Role
-
-				// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
-				// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
-				if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
-					roleFn := "assistant_function_call"
-					r := config.Roles[roleFn]
-					if r != "" {
-						role = roleFn
-					}
-				}
-				r := config.Roles[role]
-				contentExists := i.Content != nil && i.StringContent != ""
-
-				fcall := i.FunctionCall
-				if len(i.ToolCalls) > 0 {
-					fcall = i.ToolCalls
-				}
-
-				// First attempt to populate content via a chat message specific template
-				if config.TemplateConfig.ChatMessage != "" {
-					chatMessageData := model.ChatMessageTemplateData{
-						SystemPrompt: config.SystemPrompt,
-						Role:         r,
-						RoleName:     role,
-						Content:      i.StringContent,
-						FunctionCall: fcall,
-						FunctionName: i.Name,
-						LastMessage:  messageIndex == (len(input.Messages) - 1),
-						Function:     config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
-						MessageIndex: messageIndex,
-					}
-					templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
-					if err != nil {
-						log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping")
-					} else {
-						if templatedChatMessage == "" {
-							log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
-							continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
-						}
-						log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
-						content = templatedChatMessage
-					}
-				}
-
-				marshalAnyRole := func(f any) {
-					j, err := json.Marshal(f)
-					if err == nil {
-						if contentExists {
-							content += "\n" + fmt.Sprint(r, " ", string(j))
-						} else {
-							content = fmt.Sprint(r, " ", string(j))
-						}
-					}
-				}
-				marshalAny := func(f any) {
-					j, err := json.Marshal(f)
-					if err == nil {
-						if contentExists {
-							content += "\n" + string(j)
-						} else {
-							content = string(j)
-						}
-					}
-				}
-				// If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
-				if content == "" {
-					if r != "" {
-						if contentExists {
-							content = fmt.Sprint(r, i.StringContent)
-						}
-
-						if i.FunctionCall != nil {
-							marshalAnyRole(i.FunctionCall)
-						}
-						if i.ToolCalls != nil {
-							marshalAnyRole(i.ToolCalls)
-						}
-					} else {
-						if contentExists {
-							content = fmt.Sprint(i.StringContent)
-						}
-						if i.FunctionCall != nil {
-							marshalAny(i.FunctionCall)
-						}
-						if i.ToolCalls != nil {
-							marshalAny(i.ToolCalls)
-						}
-					}
-					// Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
-					if contentExists && role == "system" {
-						suppressConfigSystemPrompt = true
-					}
-				}
-
-				mess = append(mess, content)
-			}
-
-			joinCharacter := "\n"
-			if config.TemplateConfig.JoinChatMessagesByCharacter != nil {
-				joinCharacter = *config.TemplateConfig.JoinChatMessagesByCharacter
-			}
-
-			predInput = strings.Join(mess, joinCharacter)
-			log.Debug().Msgf("Prompt (before templating): %s", predInput)
-
-			templateFile := ""
-
-			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-			if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
-				templateFile = config.Model
-			}
-
-			if config.TemplateConfig.Chat != "" && !shouldUseFn {
-				templateFile = config.TemplateConfig.Chat
-			}
-
-			if config.TemplateConfig.Functions != "" && shouldUseFn {
-				templateFile = config.TemplateConfig.Functions
-			}
-
-			if templateFile != "" {
-				templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
-					SystemPrompt:         config.SystemPrompt,
-					SuppressSystemPrompt: suppressConfigSystemPrompt,
-					Input:                predInput,
-					Functions:            funcs,
-				})
-				if err == nil {
-					predInput = templatedInput
-					log.Debug().Msgf("Template found, input modified to: %s", predInput)
-				} else {
-					log.Debug().Msgf("Template failed loading: %s", err.Error())
-				}
-			}
+			predInput = evaluator.TemplateMessages(input.Messages, config, funcs, shouldUseFn)

 			log.Debug().Msgf("Prompt (after templating): %s", predInput)
-			if shouldUseFn && config.Grammar != "" {
+			if config.Grammar != "" {
 				log.Debug().Msgf("Grammar: %+v", config.Grammar)
 			}
 		}
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@ -16,6 +16,7 @@ import (
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
 	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/templates"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
 )
@ -25,7 +26,7 @@ import (
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/completions [post]
-func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	id := uuid.New().String()
 	created := int(time.Now().Unix())

@ -94,17 +95,6 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 			c.Set("Transfer-Encoding", "chunked")
 		}

-		templateFile := ""
-
-		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-		if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
-			templateFile = config.Model
-		}
-
-		if config.TemplateConfig.Completion != "" {
-			templateFile = config.TemplateConfig.Completion
-		}
-
 		if input.Stream {
 			if len(config.PromptStrings) > 1 {
 				return errors.New("cannot handle more than 1 `PromptStrings` when Streaming")
@ -112,15 +102,13 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a

 			predInput := config.PromptStrings[0]

-			if templateFile != "" {
-				templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
-					Input:        predInput,
-					SystemPrompt: config.SystemPrompt,
-				})
-				if err == nil {
-					predInput = templatedInput
-					log.Debug().Msgf("Template found, input modified to: %s", predInput)
-				}
+			templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{
+				Input:        predInput,
+				SystemPrompt: config.SystemPrompt,
+			})
+			if err == nil {
+				predInput = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", predInput)
 			}

 			responses := make(chan schema.OpenAIResponse)
@ -165,16 +153,13 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 		totalTokenUsage := backend.TokenUsage{}

 		for k, i := range config.PromptStrings {
-			if templateFile != "" {
-				// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-				templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
-					SystemPrompt: config.SystemPrompt,
-					Input:        i,
-				})
-				if err == nil {
-					i = templatedInput
-					log.Debug().Msgf("Template found, input modified to: %s", i)
-				}
+			templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{
+				SystemPrompt: config.SystemPrompt,
+				Input:        i,
+			})
+			if err == nil {
+				i = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", i)
 			}

 			r, tokenUsage, err := ComputeChoices(
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@ -12,6 +12,7 @@ import (
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/schema"
 	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/templates"

 	"github.com/rs/zerolog/log"
 )
@ -21,7 +22,8 @@ import (
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/edits [post]
-func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+
 	return func(c *fiber.Ctx) error {
 		modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
 		if err != nil {
@ -35,31 +37,18 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConf

 		log.Debug().Msgf("Parameter Config: %+v", config)

-		templateFile := ""
-
-		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-		if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
-			templateFile = config.Model
-		}
-
-		if config.TemplateConfig.Edit != "" {
-			templateFile = config.TemplateConfig.Edit
-		}
-
 		var result []schema.Choice
 		totalTokenUsage := backend.TokenUsage{}

 		for _, i := range config.InputStrings {
-			if templateFile != "" {
-				templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
-					Input:        i,
-					Instruction:  input.Instruction,
-					SystemPrompt: config.SystemPrompt,
-				})
-				if err == nil {
-					i = templatedInput
-					log.Debug().Msgf("Template found, input modified to: %s", i)
-				}
+			templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.EditPromptTemplate, *config, templates.PromptTemplateData{
+				Input:        i,
+				Instruction:  input.Instruction,
+				SystemPrompt: config.SystemPrompt,
+			})
+			if err == nil {
+				i = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", i)
 			}

 			r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {