Major API enhancements (#44)

2025-05-25 21:15:00 +00:00 · 2023-04-20 18:33:02 +02:00 · 2023-04-20 18:33:02 +02:00 · d517a54e28
commit d517a54e28
parent c905512bb0
8 changed files with 172 additions and 105 deletions
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@ -10,6 +10,8 @@ import (
 	"sync"
 	"text/template"

+	"github.com/rs/zerolog/log"
+
 	gptj "github.com/go-skynet/go-gpt4all-j.cpp"
 	llama "github.com/go-skynet/go-llama.cpp"
 )
@ -26,6 +28,11 @@ func NewModelLoader(modelPath string) *ModelLoader {
 	return &ModelLoader{modelPath: modelPath, gptmodels: make(map[string]*gptj.GPTJ), models: make(map[string]*llama.LLama), promptsTemplates: make(map[string]*template.Template)}
 }

+func (ml *ModelLoader) ExistsInModelPath(s string) bool {
+	_, err := os.Stat(filepath.Join(ml.modelPath, s))
+	return err == nil
+}
+
 func (ml *ModelLoader) ListModels() ([]string, error) {
 	files, err := ioutil.ReadDir(ml.modelPath)
 	if err != nil {
@ -34,9 +41,12 @@ func (ml *ModelLoader) ListModels() ([]string, error) {

 	models := []string{}
 	for _, file := range files {
-		if strings.HasSuffix(file.Name(), ".bin") {
-			models = append(models, strings.TrimRight(file.Name(), ".bin"))
+		// Skip templates, YAML and .keep files
+		if strings.HasSuffix(file.Name(), ".tmpl") || strings.HasSuffix(file.Name(), ".keep") || strings.HasSuffix(file.Name(), ".yaml") || strings.HasSuffix(file.Name(), ".yml") {
+			continue
 		}
+
+		models = append(models, file.Name())
 	}

 	return models, nil
@ -48,12 +58,7 @@ func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string,

 	m, ok := ml.promptsTemplates[modelName]
 	if !ok {
-		// try to find a s.bin
-		modelBin := fmt.Sprintf("%s.bin", modelName)
-		m, ok = ml.promptsTemplates[modelBin]
-		if !ok {
-			return "", fmt.Errorf("no prompt template available")
-		}
+		return "", fmt.Errorf("no prompt template available")
 	}

 	var buf bytes.Buffer
@ -64,15 +69,21 @@ func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string,
 	return buf.String(), nil
 }

-func (ml *ModelLoader) loadTemplate(modelName, modelFile string) error {
-	modelTemplateFile := fmt.Sprintf("%s.tmpl", modelFile)
-
-	// Check if the model path exists
-	if _, err := os.Stat(modelTemplateFile); err != nil {
+func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
+	// Check if the template was already loaded
+	if _, ok := ml.promptsTemplates[modelName]; ok {
 		return nil
 	}

-	dat, err := os.ReadFile(modelTemplateFile)
+	// Check if the model path exists
+	// skip any error here - we run anyway if a template is not exist
+	modelTemplateFile := fmt.Sprintf("%s.tmpl", modelName)
+
+	if !ml.ExistsInModelPath(modelTemplateFile) {
+		return nil
+	}
+
+	dat, err := os.ReadFile(filepath.Join(ml.modelPath, modelTemplateFile))
 	if err != nil {
 		return err
 	}
@ -92,36 +103,30 @@ func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
 	defer ml.mu.Unlock()

 	// Check if we already have a loaded model
-	modelFile := filepath.Join(ml.modelPath, modelName)
+	if !ml.ExistsInModelPath(modelName) {
+		return nil, fmt.Errorf("model does not exist")
+	}

-	if m, ok := ml.gptmodels[modelFile]; ok {
+	if m, ok := ml.gptmodels[modelName]; ok {
+		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
 		return m, nil
 	}

-	// Check if the model path exists
-	if _, err := os.Stat(modelFile); os.IsNotExist(err) {
-		// try to find a s.bin
-		modelBin := fmt.Sprintf("%s.bin", modelFile)
-		if _, err := os.Stat(modelBin); os.IsNotExist(err) {
-			return nil, err
-		} else {
-			modelName = fmt.Sprintf("%s.bin", modelName)
-			modelFile = modelBin
-		}
-	}
-
 	// Load the model and keep it in memory for later use
+	modelFile := filepath.Join(ml.modelPath, modelName)
+	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
+
 	model, err := gptj.New(modelFile)
 	if err != nil {
 		return nil, err
 	}

 	// If there is a prompt template, load it
-	if err := ml.loadTemplate(modelName, modelFile); err != nil {
+	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
 		return nil, err
 	}

-	ml.gptmodels[modelFile] = model
+	ml.gptmodels[modelName] = model
 	return model, err
 }

@ -129,40 +134,39 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
 	ml.mu.Lock()
 	defer ml.mu.Unlock()

+	log.Debug().Msgf("Loading model name: %s", modelName)
+
 	// Check if we already have a loaded model
-	modelFile := filepath.Join(ml.modelPath, modelName)
-	if m, ok := ml.models[modelFile]; ok {
+	if !ml.ExistsInModelPath(modelName) {
+		return nil, fmt.Errorf("model does not exist")
+	}
+
+	if m, ok := ml.models[modelName]; ok {
+		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
 		return m, nil
 	}
+
 	// TODO: This needs refactoring, it's really bad to have it in here
-	// Check if we have a GPTJ model loaded instead
-	if _, ok := ml.gptmodels[modelFile]; ok {
+	// Check if we have a GPTJ model loaded instead - if we do we return an error so the API tries with GPTJ
+	if _, ok := ml.gptmodels[modelName]; ok {
+		log.Debug().Msgf("Model is GPTJ: %s", modelName)
 		return nil, fmt.Errorf("this model is a GPTJ one")
 	}

-	// Check if the model path exists
-	if _, err := os.Stat(modelFile); os.IsNotExist(err) {
-		// try to find a s.bin
-		modelBin := fmt.Sprintf("%s.bin", modelFile)
-		if _, err := os.Stat(modelBin); os.IsNotExist(err) {
-			return nil, err
-		} else {
-			modelName = fmt.Sprintf("%s.bin", modelName)
-			modelFile = modelBin
-		}
-	}
-
 	// Load the model and keep it in memory for later use
+	modelFile := filepath.Join(ml.modelPath, modelName)
+	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
+
 	model, err := llama.New(modelFile, opts...)
 	if err != nil {
 		return nil, err
 	}

 	// If there is a prompt template, load it
-	if err := ml.loadTemplate(modelName, modelFile); err != nil {
+	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
 		return nil, err
 	}

-	ml.models[modelFile] = model
+	ml.models[modelName] = model
 	return model, err
 }