feat(llama2): add template for chat messages (#782)

Co-authored-by: Aman Karmani <aman@tmm1.net> Lays some of the groundwork for LLAMA2 compatibility as well as other future models with complex prompting schemes. Started small refactoring in pkg/model/loader.go regarding template loading. Currently still a part of ModelLoader, but should be easy to add template loading for situations other than overall prompt templates and the new chat-specific per-message templates Adds support for new chat-endpoint-specific, per-message templates as an alternative to the existing Role: XYZ sprintf method. Includes a temporary prompt template as an example, since I have a few questions before we merge in the model-gallery side changes (see ) Minor debug logging changes.
2025-05-20 10:35:01 +00:00 · 2023-07-22 11:31:39 -04:00 · 2023-07-22 11:31:39 -04:00 · c6bf67f446
commit c6bf67f446
parent 5ee186b8e5
8 changed files with 237 additions and 123 deletions
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@ -4,43 +4,81 @@ import (
 	"bytes"
 	"context"
 	"fmt"
-	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strings"
 	"sync"
 	"text/template"

+	grammar "github.com/go-skynet/LocalAI/pkg/grammar"
 	"github.com/go-skynet/LocalAI/pkg/grpc"
 	process "github.com/mudler/go-processmanager"
 	"github.com/rs/zerolog/log"
 )

+// Rather than pass an interface{} to the prompt template:
+// These are the definitions of all possible variables LocalAI will currently populate for use in a prompt template file
+// Please note: Not all of these are populated on every endpoint - your template should either be tested for each endpoint you map it to, or tolerant of zero values.
+type PromptTemplateData struct {
+	Input        string
+	Instruction  string
+	Functions    []grammar.Function
+	MessageIndex int
+}
+
+// TODO: Ask mudler about FunctionCall stuff being useful at the message level?
+type ChatMessageTemplateData struct {
+	SystemPrompt string
+	Role         string
+	RoleName     string
+	Content      string
+	MessageIndex int
+}
+
+// Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go?
+// Technically, order doesn't _really_ matter, but the count must stay in sync, see tests/integration/reflect_test.go
+type TemplateType int
+
+const (
+	ChatPromptTemplate TemplateType = iota
+	ChatMessageTemplate
+	CompletionPromptTemplate
+	EditPromptTemplate
+	FunctionsPromptTemplate
+
+	// The following TemplateType is **NOT** a valid value and MUST be last. It exists to make the sanity integration tests simpler!
+	IntegrationTestTemplate
+)
+
+// new idea: what if we declare a struct of these here, and use a loop to check?
+
+// TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we seperate directories for .bin/.yaml and .tmpl
 type ModelLoader struct {
 	ModelPath string
 	mu        sync.Mutex
 	// TODO: this needs generics
-	models           map[string]*grpc.Client
-	grpcProcesses    map[string]*process.Process
-	promptsTemplates map[string]*template.Template
+	models        map[string]*grpc.Client
+	grpcProcesses map[string]*process.Process
+	templates     map[TemplateType]map[string]*template.Template
 }

 func NewModelLoader(modelPath string) *ModelLoader {
-	return &ModelLoader{
-		ModelPath:        modelPath,
-		models:           make(map[string]*grpc.Client),
-		promptsTemplates: make(map[string]*template.Template),
-		grpcProcesses:    make(map[string]*process.Process),
+	nml := &ModelLoader{
+		ModelPath:     modelPath,
+		models:        make(map[string]*grpc.Client),
+		templates:     make(map[TemplateType]map[string]*template.Template),
+		grpcProcesses: make(map[string]*process.Process),
 	}
+	nml.initializeTemplateMap()
+	return nml
 }

 func (ml *ModelLoader) ExistsInModelPath(s string) bool {
-	_, err := os.Stat(filepath.Join(ml.ModelPath, s))
-	return err == nil
+	return existsInPath(ml.ModelPath, s)
 }

 func (ml *ModelLoader) ListModels() ([]string, error) {
-	files, err := ioutil.ReadDir(ml.ModelPath)
+	files, err := os.ReadDir(ml.ModelPath)
 	if err != nil {
 		return []string{}, err
 	}
@ -58,63 +96,6 @@ func (ml *ModelLoader) ListModels() ([]string, error) {
 	return models, nil
 }

-func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string, error) {
-	ml.mu.Lock()
-	defer ml.mu.Unlock()
-
-	m, ok := ml.promptsTemplates[modelName]
-	if !ok {
-		modelFile := filepath.Join(ml.ModelPath, modelName)
-		if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
-			return "", err
-		}
-
-		t, exists := ml.promptsTemplates[modelName]
-		if exists {
-			m = t
-		}
-	}
-	if m == nil {
-		return "", fmt.Errorf("failed loading any template")
-	}
-
-	var buf bytes.Buffer
-
-	if err := m.Execute(&buf, in); err != nil {
-		return "", err
-	}
-	return buf.String(), nil
-}
-
-func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
-	// Check if the template was already loaded
-	if _, ok := ml.promptsTemplates[modelName]; ok {
-		return nil
-	}
-
-	// Check if the model path exists
-	// skip any error here - we run anyway if a template does not exist
-	modelTemplateFile := fmt.Sprintf("%s.tmpl", modelName)
-
-	if !ml.ExistsInModelPath(modelTemplateFile) {
-		return nil
-	}
-
-	dat, err := os.ReadFile(filepath.Join(ml.ModelPath, modelTemplateFile))
-	if err != nil {
-		return err
-	}
-
-	// Parse the template
-	tmpl, err := template.New("prompt").Parse(string(dat))
-	if err != nil {
-		return err
-	}
-	ml.promptsTemplates[modelName] = tmpl
-
-	return nil
-}
-
 func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (*grpc.Client, error)) (*grpc.Client, error) {
 	ml.mu.Lock()
 	defer ml.mu.Unlock()
@ -134,10 +115,13 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (*grpc.Cl
 		return nil, err
 	}

-	// If there is a prompt template, load it
-	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
-		return nil, err
-	}
+	// TODO: Add a helper method to iterate all prompt templates associated with a config if and only if it's YAML?
+	// Minor perf loss here until this is fixed, but we initialize on first request
+
+	// // If there is a prompt template, load it
+	// if err := ml.loadTemplateIfExists(modelName); err != nil {
+	// 	return nil, err
+	// }

 	ml.models[modelName] = model
 	return model, nil
@ -148,9 +132,9 @@ func (ml *ModelLoader) checkIsLoaded(s string) *grpc.Client {
 		log.Debug().Msgf("Model already loaded in memory: %s", s)

 		if !m.HealthCheck(context.Background()) {
-			log.Debug().Msgf("GRPC Model not responding", s)
+			log.Debug().Msgf("GRPC Model not responding: %s", s)
 			if !ml.grpcProcesses[s].IsAlive() {
-				log.Debug().Msgf("GRPC Process is not responding", s)
+				log.Debug().Msgf("GRPC Process is not responding: %s", s)
 				// stop and delete the process, this forces to re-load the model and re-create again the service
 				ml.grpcProcesses[s].Stop()
 				delete(ml.grpcProcesses, s)
@ -164,3 +148,81 @@ func (ml *ModelLoader) checkIsLoaded(s string) *grpc.Client {

 	return nil
 }
+
+func (ml *ModelLoader) EvaluateTemplateForPrompt(templateType TemplateType, templateName string, in PromptTemplateData) (string, error) {
+	// TODO: should this check be improved?
+	if templateType == ChatMessageTemplate {
+		return "", fmt.Errorf("invalid templateType: ChatMessage")
+	}
+	return ml.evaluateTemplate(templateType, templateName, in)
+}
+
+func (ml *ModelLoader) EvaluateTemplateForChatMessage(templateName string, messageData ChatMessageTemplateData) (string, error) {
+	return ml.evaluateTemplate(ChatMessageTemplate, templateName, messageData)
+}
+
+func existsInPath(path string, s string) bool {
+	_, err := os.Stat(filepath.Join(path, s))
+	return err == nil
+}
+
+func (ml *ModelLoader) initializeTemplateMap() {
+	// This also seems somewhat clunky as we reference the Test / End of valid data value slug, but it works?
+	for tt := TemplateType(0); tt < IntegrationTestTemplate; tt++ {
+		ml.templates[tt] = make(map[string]*template.Template)
+	}
+}
+
+func (ml *ModelLoader) evaluateTemplate(templateType TemplateType, templateName string, in interface{}) (string, error) {
+	ml.mu.Lock()
+	defer ml.mu.Unlock()
+
+	m, ok := ml.templates[templateType][templateName]
+	if !ok {
+		// return "", fmt.Errorf("template not loaded: %s", templateName)
+		loadErr := ml.loadTemplateIfExists(templateType, templateName)
+		if loadErr != nil {
+			return "", loadErr
+		}
+		m = ml.templates[templateType][templateName] // ok is not important since we check m on the next line, and wealready checked
+	}
+	if m == nil {
+		return "", fmt.Errorf("failed loading a template for %s", templateName)
+	}
+
+	var buf bytes.Buffer
+
+	if err := m.Execute(&buf, in); err != nil {
+		return "", err
+	}
+	return buf.String(), nil
+}
+
+func (ml *ModelLoader) loadTemplateIfExists(templateType TemplateType, templateName string) error {
+	// Check if the template was already loaded
+	if _, ok := ml.templates[templateType][templateName]; ok {
+		return nil
+	}
+
+	// Check if the model path exists
+	// skip any error here - we run anyway if a template does not exist
+	modelTemplateFile := fmt.Sprintf("%s.tmpl", templateName)
+
+	if !ml.ExistsInModelPath(modelTemplateFile) {
+		return nil
+	}
+
+	dat, err := os.ReadFile(filepath.Join(ml.ModelPath, modelTemplateFile))
+	if err != nil {
+		return err
+	}
+
+	// Parse the template
+	tmpl, err := template.New("prompt").Parse(string(dat))
+	if err != nil {
+		return err
+	}
+	ml.templates[templateType][templateName] = tmpl
+
+	return nil
+}