stash progress for the night. loading GPTJ fails, hacked in PredictTEMP as kludge

2025-06-29 22:20:43 +00:00 · 2023-06-08 03:11:52 -04:00 · 2023-06-08 03:11:52 -04:00 · 0b910e0595
commit 0b910e0595
parent 8fc4b6cded
14 changed files with 518 additions and 193 deletions
--- a/apiv2/engine.go
+++ b/apiv2/engine.go
@ -0,0 +1,205 @@
+package apiv2
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+	"sync"
+
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	transformers "github.com/go-skynet/go-ggml-transformers.cpp"
+	llama "github.com/go-skynet/go-llama.cpp"
+	"github.com/mitchellh/mapstructure"
+	gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
+)
+
+type LocalAIEngine struct {
+	loader         *model.ModelLoader
+	mutexMapMutex  sync.Mutex
+	mutexes        map[ConfigRegistration]*sync.Mutex
+	cutstrings     map[ConfigRegistration]map[string]*regexp.Regexp
+	cutstringMutex sync.Mutex
+}
+
+func NewLocalAIEngine(loader *model.ModelLoader) LocalAIEngine {
+	return LocalAIEngine{
+		loader:     loader,
+		mutexes:    make(map[ConfigRegistration]*sync.Mutex),
+		cutstrings: make(map[ConfigRegistration]map[string]*regexp.Regexp),
+	}
+}
+
+// TODO model interface? Currently scheduled for phase 3 lol
+func (e *LocalAIEngine) LoadModel(config Config) (interface{}, error) {
+	ls := config.GetLocalSettings()
+	fmt.Printf("LocalAIEngine.LoadModel => %+v\n\n", config)
+	return e.loader.BackendLoader(ls.Backend, ls.ModelPath, config.ToModelOptions(), uint32(ls.Threads))
+}
+
+func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback func(string) bool) (func() ([]string, error), error) {
+
+	fmt.Printf("LocalAIEngine.GetModelPredictionFunction => %+v\n\n", config)
+
+	supportStreams := false
+	var predictOnce func(p Prompt) (string, error) = nil
+
+	inferenceModel, err := e.LoadModel(config)
+	if err != nil {
+		fmt.Printf("ERROR LOADING MODEL: %s\n", err.Error())
+		return nil, err
+	}
+
+	prompts, err := config.GetPrompts()
+	if err != nil {
+		fmt.Printf("ERROR GetPrompts: %s\n", err.Error())
+		return nil, err
+	}
+
+	switch localModel := inferenceModel.(type) {
+	case *llama.LLama:
+		fmt.Println("setting predictOnce for llama")
+		supportStreams = true
+		predictOnce = func(p Prompt) (string, error) {
+
+			if tokenCallback != nil {
+				localModel.SetTokenCallback(tokenCallback)
+			}
+
+			// TODO: AsTokens? I think that would need to be exposed from llama and the others.
+			str, er := localModel.Predict(
+				p.AsString(),
+				config.ToPredictOptions()...,
+			)
+			// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
+			// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
+			// after a stream event has occurred
+			localModel.SetTokenCallback(nil)
+			return str, er
+		}
+	case *gpt4all.Model:
+		fmt.Println("setting predictOnce for gpt4all")
+		supportStreams = true
+		predictOnce = func(p Prompt) (string, error) {
+			if tokenCallback != nil {
+				localModel.SetTokenCallback(tokenCallback)
+			}
+
+			mappedPredictOptions := gpt4all.PredictOptions{}
+
+			mapstructure.Decode(config.ToPredictOptions(), &mappedPredictOptions)
+
+			str, err := localModel.PredictTEMP(
+				p.AsString(),
+				mappedPredictOptions,
+			)
+			// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
+			// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
+			// after a stream event has occurred
+			localModel.SetTokenCallback(nil)
+			return str, err
+		}
+	case *transformers.GPTJ:
+		fmt.Println("setting predictOnce for GPTJ")
+		supportStreams = false // EXP
+		predictOnce = func(p Prompt) (string, error) {
+			mappedPredictOptions := transformers.PredictOptions{}
+
+			mapstructure.Decode(config.ToPredictOptions(), &mappedPredictOptions)
+
+			fmt.Printf("MAPPED OPTIONS: %+v\n", mappedPredictOptions)
+
+			str, err := localModel.PredictTEMP(
+				p.AsString(),
+				mappedPredictOptions,
+			)
+			return str, err
+		}
+	}
+
+	if predictOnce == nil {
+		fmt.Printf("Failed to find a predictOnce for %T", inferenceModel)
+		return nil, fmt.Errorf("failed to find a predictOnce for %T", inferenceModel)
+	}
+
+	req := config.GetRequestDefaults()
+
+	return func() ([]string, error) {
+		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
+		e.mutexMapMutex.Lock()
+		r := config.GetRegistration()
+		l, ok := e.mutexes[r]
+		if !ok {
+			m := &sync.Mutex{}
+			e.mutexes[r] = m
+			l = m
+		}
+		e.mutexMapMutex.Unlock()
+		l.Lock()
+		defer l.Unlock()
+
+		results := []string{}
+
+		n, err := config.GetN()
+
+		if err != nil {
+			// TODO live to regret this, but for now...
+			n = 1
+		}
+
+		for p_i, prompt := range prompts {
+			for n_i := 0; n_i < n; n_i++ {
+				res, err := predictOnce(prompt)
+
+				// TODO: this used to be a part of finetune. For.... questionable parameter reasons I've moved it up here. Revisit this if it's smelly in the future.
+				ccr, is_ccr := req.(CreateCompletionRequest)
+				if is_ccr {
+					if *ccr.Echo {
+						res = prompt.AsString() + res
+					}
+				}
+
+				res = e.Finetune(config, res)
+
+				if err != nil {
+					fmt.Printf("ERROR DURING GetModelPredictionFunction -> PredictionFunction for %T with p_i: %d/n_i: %d\n%s", config, p_i, n_i, err.Error())
+					return nil, err
+				}
+				if tokenCallback != nil && !supportStreams {
+					tokenCallback(res)
+				}
+				results = append(results, res)
+			}
+		}
+
+		return results, nil
+
+	}, nil
+}
+
+func (e *LocalAIEngine) Finetune(config Config, prediction string) string {
+
+	reg := config.GetRegistration()
+	switch req := config.GetRequestDefaults().(type) {
+	case *CreateChatCompletionRequest:
+	case *CreateCompletionRequest:
+		ext := req.XLocalaiExtensions
+		if ext != nil {
+			for _, c := range *ext.Cutstrings {
+				e.cutstringMutex.Lock()
+				regex, ok := e.cutstrings[reg][c]
+				if !ok {
+					e.cutstrings[reg][c] = regexp.MustCompile(c)
+					regex = e.cutstrings[reg][c]
+				}
+				e.cutstringMutex.Unlock()
+				prediction = regex.ReplaceAllString(prediction, "")
+			}
+
+			for _, c := range *ext.Trimstrings {
+				prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
+			}
+		}
+	}
+
+	return prediction
+}