cleanup

2025-06-30 06:30:43 +00:00 · 2023-06-12 17:55:03 -04:00 · 2023-06-12 17:55:03 -04:00 · d4c6407bf4
commit d4c6407bf4
parent 115766205c
6 changed files with 71 additions and 109 deletions
--- a/apiv2/config.go
+++ b/apiv2/config.go
@ -5,6 +5,7 @@ import (
 	"strings"

 	llama "github.com/go-skynet/go-llama.cpp"
+	"github.com/rs/zerolog/log"
 )

 type ConfigRegistration struct {
@ -34,11 +35,11 @@ type Config interface {
 	GetLocalSettings() ConfigLocalSettings
 	GetRegistration() ConfigRegistration

-	// TODO: Test these. I am not sure.
+	// Go People: Is this good design?
 	ToPredictOptions() []llama.PredictOption
 	ToModelOptions() []llama.ModelOption

-	// TODO also dubious? Technically some requests lack prompts, but it's pretty general and may just be worth sticking here.
+	// Go People: Also curious about these two. Even more sketchy!
 	GetPrompts() ([]Prompt, error)
 	GetN() (int, error)
 }
@ -275,9 +276,6 @@ func (sc SpecificConfig[RequestModel]) GetPrompts() ([]Prompt, error) {
 			return prompts, nil
 		}
 	case CreateChatCompletionRequest:
-
-		fmt.Printf("🥳 %+v\n\n\n", req.XLocalaiExtensions.Roles)
-
 		for _, message := range req.Messages {
 			var content string
 			var role string
@ -297,7 +295,7 @@ func (sc SpecificConfig[RequestModel]) GetPrompts() ([]Prompt, error) {
 						role = *r
 					}
 				default:
-					fmt.Printf("Unrecognized message role: %s\n", message.Role)
+					log.Error().Msgf("Unrecognized message role: %s", message.Role)
 					role = ""
 				}
 			}
--- a/apiv2/config_manager.go
+++ b/apiv2/config_manager.go
@ -8,6 +8,7 @@ import (
 	"sync"

 	"github.com/mitchellh/mapstructure"
+	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v2"
 )

@ -24,7 +25,6 @@ func NewConfigManager() *ConfigManager {

 // Private helper method doesn't enforce the mutex. This is because loading at the directory level keeps the lock up the whole time, and I like that.
 func (cm *ConfigManager) loadConfigFile(path string) (*Config, error) {
-	fmt.Printf("INTERNAL loadConfigFile for %s\n", path)
 	stub := ConfigStub{}
 	f, err := os.ReadFile(path)
 	if err != nil {
@ -33,62 +33,42 @@ func (cm *ConfigManager) loadConfigFile(path string) (*Config, error) {
 	if err := yaml.Unmarshal(f, &stub); err != nil {
 		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
 	}
-	fmt.Printf("RAW STUB: %+v\n", stub)
-
 	endpoint := stub.Registration.Endpoint

 	// EndpointConfigMap is generated over in localai.gen.go
 	// It's a map that translates a string endpoint function name to an empty SpecificConfig[T], with the type parameter for that request.
+	// We then dump the raw YAML configuration of that request into a map[string]interface{}
+	// mapstructure then copies the fields into our specific SpecificConfig[T]
 	if structType, ok := EndpointConfigMap[endpoint]; ok {
-		fmt.Printf("~~ EndpointConfigMap[%s]: %+v\n", endpoint, structType)
 		tmpUnmarshal := map[string]interface{}{}
 		if err := yaml.Unmarshal(f, &tmpUnmarshal); err != nil {
 			if e, ok := err.(*yaml.TypeError); ok {
-				fmt.Println("\n!!!!!Type error:", e)
+				log.Error().Msgf("[ConfigManager::loadConfigFile] Type error: %s", e.Error())
 			}
 			return nil, fmt.Errorf("cannot unmarshal config file for %s: %w", endpoint, err)
 		}
-		fmt.Printf("$$$ tmpUnmarshal: %+v\n", tmpUnmarshal)
 		mapstructure.Decode(tmpUnmarshal, &structType)
-
-		fmt.Printf("AFTER UNMARSHAL %T\n%+v\n=======\n", structType, structType)
-
-		// rawConfig.RequestDefaults = structType.GetRequestDefaults()
-
 		cm.configs[structType.GetRegistration()] = structType
-		// fmt.Printf("\n\n\n!!!!!HIT BOTTOM!!!!!!")
 		return &structType, nil
-		// fmt.Printf("\n\n\n!!!!!\n\n\nBIG MISS!\n\n%+v\n\n%T\n%T=====", specificStruct, specificStruct, structType)
 	}

-	// for i, ts := range EndpointToRequestBodyMap {
-	// 	fmt.Printf("%s: %+v\n", i, ts)
-	// }
-
 	return nil, fmt.Errorf("failed to parse config for endpoint %s", endpoint)
 }

 func (cm *ConfigManager) LoadConfigFile(path string) (*Config, error) {
-	fmt.Printf("LoadConfigFile TOP for %s", path)
-
 	cm.Lock()
-	fmt.Println("cm.Lock done")
-
 	defer cm.Unlock()
-	fmt.Println("cm.Unlock done")
-
 	return cm.loadConfigFile(path)
 }

 func (cm *ConfigManager) LoadConfigDirectory(path string) ([]ConfigRegistration, error) {
-	fmt.Printf("LoadConfigDirectory TOP for %s\n", path)
 	cm.Lock()
 	defer cm.Unlock()
 	files, err := os.ReadDir(path)
 	if err != nil {
 		return []ConfigRegistration{}, err
 	}
-	fmt.Printf("os.ReadDir done, found %d files\n", len(files))
+	log.Debug().Msgf("[ConfigManager::LoadConfigDirectory] os.ReadDir done, found %d files\n", len(files))

 	for _, file := range files {
 		// Skip anything that isn't yaml
@ -100,9 +80,6 @@ func (cm *ConfigManager) LoadConfigDirectory(path string) ([]ConfigRegistration,
 			return []ConfigRegistration{}, err
 		}
 	}
-
-	fmt.Printf("LoadConfigDirectory DONE %d", len(cm.configs))
-
 	return cm.listConfigs(), nil
 }

--- a/apiv2/engine.go
+++ b/apiv2/engine.go
@ -13,6 +13,7 @@ import (
 	llama "github.com/go-skynet/go-llama.cpp"
 	"github.com/mitchellh/mapstructure"
 	gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
+	"github.com/rs/zerolog/log"
 )

 type LocalAIEngine struct {
@ -28,7 +29,7 @@ func NewLocalAIEngine(loader *model.ModelLoader) LocalAIEngine {
 	// TODO CLEANUP: Perform evil magic, we only need to do once, and api should NOT be removed yet.
 	gpt4alldir := filepath.Join(".", "backend-assets", "gpt4all")
 	os.Setenv("GPT4ALL_IMPLEMENTATIONS_PATH", gpt4alldir)
-	fmt.Printf("[*HAX*] GPT4ALL_IMPLEMENTATIONS_PATH: %s\n", gpt4alldir)
+	log.Debug().Msgf("[*HAX*] GPT4ALL_IMPLEMENTATIONS_PATH: %s", gpt4alldir)

 	return LocalAIEngine{
 		loader:     loader,
@ -40,32 +41,29 @@ func NewLocalAIEngine(loader *model.ModelLoader) LocalAIEngine {
 // TODO model interface? Currently scheduled for phase 3 lol
 func (e *LocalAIEngine) LoadModel(config Config) (interface{}, error) {
 	ls := config.GetLocalSettings()
-	fmt.Printf("LocalAIEngine.LoadModel => %+v\n\n", config)
+	log.Debug().Msgf("[LocalAIEngine::LoadModel] LocalAIEngine.LoadModel => %+v", config)
 	return e.loader.BackendLoader(ls.Backend, ls.ModelPath, config.ToModelOptions(), uint32(ls.Threads))
 }

 func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback func(string) bool) (func() ([]string, error), error) {

-	fmt.Printf("LocalAIEngine.GetModelPredictionFunction => %+v\n\n", config)
+	log.Debug().Msgf("[LocalAIEngine::GetModelPredictionFunction] called for configuration:\n%+v", config)

 	supportStreams := false
 	var predictOnce func(p Prompt) (string, error) = nil

 	inferenceModel, err := e.LoadModel(config)
 	if err != nil {
-		fmt.Printf("ERROR LOADING MODEL: %s\n", err.Error())
-		return nil, err
+		return nil, fmt.Errorf("error loading model: %w", err)
 	}

 	prompts, err := config.GetPrompts()
 	if err != nil {
-		fmt.Printf("ERROR GetPrompts: %s\n", err.Error())
-		return nil, err
+		return nil, fmt.Errorf("error calling GetPrompts(): %w", err)
 	}

 	switch localModel := inferenceModel.(type) {
 	case *llama.LLama:
-		fmt.Println("setting predictOnce for llama")
 		supportStreams = true
 		predictOnce = func(p Prompt) (string, error) {

@ -85,7 +83,6 @@ func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback
 			return str, er
 		}
 	case *gpt4all.Model:
-		fmt.Println("setting predictOnce for gpt4all")
 		supportStreams = true

 		predictOnce = func(p Prompt) (string, error) {
@ -111,13 +108,13 @@ func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback
 			return str, err
 		}
 	case *transformers.GPTJ:
-		fmt.Println("setting predictOnce for GPTJ")
 		supportStreams = false // EXP
 		predictOnce = func(p Prompt) (string, error) {
 			mappedPredictOptions := transformers.PredictOptions{}

 			mapstructure.Decode(config.ToPredictOptions(), &mappedPredictOptions)

+			// TODO Leave this for testing phase 1
 			fmt.Printf("MAPPED OPTIONS: %+v\n", mappedPredictOptions)

 			// str, err := localModel.PredictTEMP(
@ -131,7 +128,6 @@ func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback
 	}

 	if predictOnce == nil {
-		fmt.Printf("Failed to find a predictOnce for %T", inferenceModel)
 		return nil, fmt.Errorf("failed to find a predictOnce for %T", inferenceModel)
 	}

@ -160,21 +156,18 @@ func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback
 			n = 1
 		}

-		for p_i, prompt := range prompts {
+		for _, prompt := range prompts {
 			for n_i := 0; n_i < n; n_i++ {
 				res, err := predictOnce(prompt)

 				if err != nil {
-					fmt.Printf("ERROR DURING GetModelPredictionFunction -> PredictionFunction for %T with p_i: %d/n_i: %d\n%s", config, p_i, n_i, err.Error())
 					return nil, err
 				}

-				fmt.Printf("\n\n🤯 raw res: %s\n\n", res)
-
 				// TODO: this used to be a part of finetune. For.... questionable parameter reasons I've moved it up here. Revisit this if it's smelly in the future.
 				ccr, is_ccr := req.(CreateCompletionRequest)
 				if is_ccr {
-					if *ccr.Echo {
+					if ccr.Echo != nil && *ccr.Echo { // 🥲
 						res = prompt.AsString() + res
 					}
 				}
@ -184,6 +177,9 @@ func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback
 				if tokenCallback != nil && !supportStreams {
 					tokenCallback(res)
 				}
+
+				log.Debug().Msgf("[%s - %s] prediction: %s", r.Model, r.Endpoint, res)
+
 				results = append(results, res)
 			}
 		}
--- a/apiv2/localai.go
+++ b/apiv2/localai.go
@ -7,6 +7,7 @@ import (

 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/mitchellh/mapstructure"
+	"github.com/rs/zerolog/log"
 )

 type LocalAIServer struct {
@ -26,11 +27,9 @@ func combineRequestAndConfig[RequestType any](configManager *ConfigManager, mode
 	config, exists := configManager.GetConfig(lookup)

 	if !exists {
-		return nil, fmt.Errorf("Config not found for %+v", lookup)
+		return nil, fmt.Errorf("config not found for %+v", lookup)
 	}

-	// fmt.Printf("Model: %s\nConfig: %+v\nrequestFromInput: %+v\n", model, config, requestFromInput)
-
 	request, ok := config.GetRequestDefaults().(RequestType)

 	if !ok {
@ -53,8 +52,6 @@ func combineRequestAndConfig[RequestType any](configManager *ConfigManager, mode
 		return nil, decodeErr
 	}

-	fmt.Printf("AFTER rD: %T\n%+v\n\n", request, request)
-
 	return &SpecificConfig[RequestType]{
 		ConfigStub: ConfigStub{
 			Registration:  config.GetRegistration(),
@ -64,10 +61,6 @@ func combineRequestAndConfig[RequestType any](configManager *ConfigManager, mode
 	}, nil
 }

-// func (las *LocalAIServer) loadModel(configStub ConfigStub) {
-
-// }
-
 // CancelFineTune implements StrictServerInterface
 func (*LocalAIServer) CancelFineTune(ctx context.Context, request CancelFineTuneRequestObject) (CancelFineTuneResponseObject, error) {
 	panic("unimplemented")
@ -79,51 +72,44 @@ func (las *LocalAIServer) CreateChatCompletion(ctx context.Context, request Crea
 	chatRequestConfig, err := combineRequestAndConfig(las.configManager, request.Body.Model, request.Body)

 	if err != nil {
-		fmt.Printf("CreateChatCompletion ERROR combining config and input!\n%s\n", err.Error())
-		return nil, err
+		return nil, fmt.Errorf("errpr during CreateChatCompletion, failed to combineRequestAndConfig: %w", err)
 	}

-	chatRequest := chatRequestConfig.RequestDefaults
-
-	fmt.Printf("\n===CreateChatCompletion===\n%+v\n", chatRequest)
-
-	fmt.Printf("\n\n!! TYPED CreateChatCompletion !!\ntemperature %f\n top_p %f \n %d\n", *chatRequest.Temperature, *chatRequest.TopP, *chatRequest.XLocalaiExtensions.TopK)
-
-	fmt.Printf("chatRequest: %+v\nlen(messages): %d", chatRequest, len(chatRequest.Messages))
-	for i, m := range chatRequest.Messages {
-		fmt.Printf("message #%d: %+v", i, m)
-	}
-
-	fmt.Println("Dodgy Stuff Below")
-
 	predict, err := las.engine.GetModelPredictionFunction(chatRequestConfig, nil)
 	if err != nil {
-		fmt.Printf("!!!!!!!!!! Error obtaining predict fn %s\n", err.Error())
-		return nil, err
+		return nil, fmt.Errorf("failed to GetModelPredictionFunction: %w", err)
 	}

-	fmt.Println("About to call predict()")
 	predictions, err := predict()
 	if err != nil {
-		fmt.Printf("!!!!!!!!!! Error INSIDE predict fn %s\n", err.Error())
-		return nil, err
+		return nil, fmt.Errorf("error during CreateChatCompletion calling model prediction function: %w", err)
 	}

 	resp := CreateChatCompletion200JSONResponse{}

+	// People who know golang better: is there a cleaner way to do this kind of nil-safe init?
+	var responseRole ChatCompletionResponseMessageRole = "asssistant" // Fallback on a reasonable guess
+	ext := chatRequestConfig.GetRequest().XLocalaiExtensions
+	if ext != nil {
+		extr := ext.Roles
+		if extr != nil {
+			if extr.Assistant != nil {
+				responseRole = ChatCompletionResponseMessageRole(*extr.Assistant) // Call for help here too - this really seems dirty. How should this be expressed?
+			}
+		}
+	}
+
 	for i, prediction := range predictions {
 		resp.Choices = append(resp.Choices, CreateChatCompletionResponseChoice{
 			Message: &ChatCompletionResponseMessage{
 				Content: prediction,
-				Role:    "asssistant", // TODO FIX
+				Role:    responseRole,
 			},
 			Index: &i,
 		})
 	}

 	return resp, nil
-
-	// panic("unimplemented")
 }

 // CreateCompletion implements StrictServerInterface
@ -134,40 +120,35 @@ func (las *LocalAIServer) CreateCompletion(ctx context.Context, request CreateCo
 	config, err := combineRequestAndConfig(las.configManager, modelName, request.Body)

 	if err != nil {
-		fmt.Printf("CreateCompletion ERROR combining config and input!\n%s\n", err.Error())
-		return nil, err
+		return nil, fmt.Errorf("[CreateCompletion] error in combineRequestAndConfig %w", err)
 	}

-	req := config.GetRequest()
+	predict, err := las.engine.GetModelPredictionFunction(config, nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to GetModelPredictionFunction: %w", err)
+	}

-	fmt.Printf("\n===CreateCompletion===\n%+v\n", req)
+	predictions, err := predict()
+	if err != nil {
+		return nil, fmt.Errorf("error during CreateChatCompletion calling model prediction function: %w", err)
+	}
+
+	log.Debug().Msgf("[CreateCompletion] predict() completed, %d", len(predictions))

 	var choices []CreateCompletionResponseChoice
-
-	prompts, err := req.Prompt.AsCreateCompletionRequestPrompt1()
-
-	if err != nil {
-		tokenPrompt, err := req.Prompt.AsCreateCompletionRequestPrompt2()
-		if err == nil {
-			fmt.Printf("Scary token array length %d\n", len(tokenPrompt))
-			panic("Token array is scary and phase 2")
-		}
-		singlePrompt, err := req.Prompt.AsCreateCompletionRequestPrompt0()
-		if err != nil {
-			return nil, err
-		}
-		prompts = []string{singlePrompt}
-	}
-
-	// model := las.loader.LoadModel(modelName, )
-
-	for _, v := range prompts {
-		fmt.Printf("[prompt] %s\n", v)
+	for i, prediction := range predictions {
+		log.Debug().Msgf("[CreateCompletion]%d: %s", i, prediction)
+		choices = append(choices, CreateCompletionResponseChoice{
+			Index: &i,
+			Text:  &prediction,
+			// TODO more?
+		})
 	}

 	return CreateCompletion200JSONResponse{
 		Model:   modelName,
 		Choices: choices,
+		// Usage need to be fixed in yaml
 	}, nil
 }