This commit is contained in:
Dave Lee 2023-06-12 17:55:03 -04:00
parent 115766205c
commit d4c6407bf4
6 changed files with 71 additions and 109 deletions

View file

@ -5,6 +5,7 @@ import (
"strings"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/rs/zerolog/log"
)
type ConfigRegistration struct {
@ -34,11 +35,11 @@ type Config interface {
GetLocalSettings() ConfigLocalSettings
GetRegistration() ConfigRegistration
// TODO: Test these. I am not sure.
// Go People: Is this good design?
ToPredictOptions() []llama.PredictOption
ToModelOptions() []llama.ModelOption
// TODO also dubious? Technically some requests lack prompts, but it's pretty general and may just be worth sticking here.
// Go People: Also curious about these two. Even more sketchy!
GetPrompts() ([]Prompt, error)
GetN() (int, error)
}
@ -275,9 +276,6 @@ func (sc SpecificConfig[RequestModel]) GetPrompts() ([]Prompt, error) {
return prompts, nil
}
case CreateChatCompletionRequest:
fmt.Printf("🥳 %+v\n\n\n", req.XLocalaiExtensions.Roles)
for _, message := range req.Messages {
var content string
var role string
@ -297,7 +295,7 @@ func (sc SpecificConfig[RequestModel]) GetPrompts() ([]Prompt, error) {
role = *r
}
default:
fmt.Printf("Unrecognized message role: %s\n", message.Role)
log.Error().Msgf("Unrecognized message role: %s", message.Role)
role = ""
}
}

View file

@ -8,6 +8,7 @@ import (
"sync"
"github.com/mitchellh/mapstructure"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v2"
)
@ -24,7 +25,6 @@ func NewConfigManager() *ConfigManager {
// Private helper method doesn't enforce the mutex. This is because loading at the directory level keeps the lock up the whole time, and I like that.
func (cm *ConfigManager) loadConfigFile(path string) (*Config, error) {
fmt.Printf("INTERNAL loadConfigFile for %s\n", path)
stub := ConfigStub{}
f, err := os.ReadFile(path)
if err != nil {
@ -33,62 +33,42 @@ func (cm *ConfigManager) loadConfigFile(path string) (*Config, error) {
if err := yaml.Unmarshal(f, &stub); err != nil {
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
}
fmt.Printf("RAW STUB: %+v\n", stub)
endpoint := stub.Registration.Endpoint
// EndpointConfigMap is generated over in localai.gen.go
// It's a map that translates a string endpoint function name to an empty SpecificConfig[T], with the type parameter for that request.
// We then dump the raw YAML configuration of that request into a map[string]interface{}
// mapstructure then copies the fields into our specific SpecificConfig[T]
if structType, ok := EndpointConfigMap[endpoint]; ok {
fmt.Printf("~~ EndpointConfigMap[%s]: %+v\n", endpoint, structType)
tmpUnmarshal := map[string]interface{}{}
if err := yaml.Unmarshal(f, &tmpUnmarshal); err != nil {
if e, ok := err.(*yaml.TypeError); ok {
fmt.Println("\n!!!!!Type error:", e)
log.Error().Msgf("[ConfigManager::loadConfigFile] Type error: %s", e.Error())
}
return nil, fmt.Errorf("cannot unmarshal config file for %s: %w", endpoint, err)
}
fmt.Printf("$$$ tmpUnmarshal: %+v\n", tmpUnmarshal)
mapstructure.Decode(tmpUnmarshal, &structType)
fmt.Printf("AFTER UNMARSHAL %T\n%+v\n=======\n", structType, structType)
// rawConfig.RequestDefaults = structType.GetRequestDefaults()
cm.configs[structType.GetRegistration()] = structType
// fmt.Printf("\n\n\n!!!!!HIT BOTTOM!!!!!!")
return &structType, nil
// fmt.Printf("\n\n\n!!!!!\n\n\nBIG MISS!\n\n%+v\n\n%T\n%T=====", specificStruct, specificStruct, structType)
}
// for i, ts := range EndpointToRequestBodyMap {
// fmt.Printf("%s: %+v\n", i, ts)
// }
return nil, fmt.Errorf("failed to parse config for endpoint %s", endpoint)
}
func (cm *ConfigManager) LoadConfigFile(path string) (*Config, error) {
fmt.Printf("LoadConfigFile TOP for %s", path)
cm.Lock()
fmt.Println("cm.Lock done")
defer cm.Unlock()
fmt.Println("cm.Unlock done")
return cm.loadConfigFile(path)
}
func (cm *ConfigManager) LoadConfigDirectory(path string) ([]ConfigRegistration, error) {
fmt.Printf("LoadConfigDirectory TOP for %s\n", path)
cm.Lock()
defer cm.Unlock()
files, err := os.ReadDir(path)
if err != nil {
return []ConfigRegistration{}, err
}
fmt.Printf("os.ReadDir done, found %d files\n", len(files))
log.Debug().Msgf("[ConfigManager::LoadConfigDirectory] os.ReadDir done, found %d files\n", len(files))
for _, file := range files {
// Skip anything that isn't yaml
@ -100,9 +80,6 @@ func (cm *ConfigManager) LoadConfigDirectory(path string) ([]ConfigRegistration,
return []ConfigRegistration{}, err
}
}
fmt.Printf("LoadConfigDirectory DONE %d", len(cm.configs))
return cm.listConfigs(), nil
}

View file

@ -13,6 +13,7 @@ import (
llama "github.com/go-skynet/go-llama.cpp"
"github.com/mitchellh/mapstructure"
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
"github.com/rs/zerolog/log"
)
type LocalAIEngine struct {
@ -28,7 +29,7 @@ func NewLocalAIEngine(loader *model.ModelLoader) LocalAIEngine {
// TODO CLEANUP: Perform evil magic, we only need to do once, and api should NOT be removed yet.
gpt4alldir := filepath.Join(".", "backend-assets", "gpt4all")
os.Setenv("GPT4ALL_IMPLEMENTATIONS_PATH", gpt4alldir)
fmt.Printf("[*HAX*] GPT4ALL_IMPLEMENTATIONS_PATH: %s\n", gpt4alldir)
log.Debug().Msgf("[*HAX*] GPT4ALL_IMPLEMENTATIONS_PATH: %s", gpt4alldir)
return LocalAIEngine{
loader: loader,
@ -40,32 +41,29 @@ func NewLocalAIEngine(loader *model.ModelLoader) LocalAIEngine {
// TODO model interface? Currently scheduled for phase 3 lol
func (e *LocalAIEngine) LoadModel(config Config) (interface{}, error) {
ls := config.GetLocalSettings()
fmt.Printf("LocalAIEngine.LoadModel => %+v\n\n", config)
log.Debug().Msgf("[LocalAIEngine::LoadModel] LocalAIEngine.LoadModel => %+v", config)
return e.loader.BackendLoader(ls.Backend, ls.ModelPath, config.ToModelOptions(), uint32(ls.Threads))
}
func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback func(string) bool) (func() ([]string, error), error) {
fmt.Printf("LocalAIEngine.GetModelPredictionFunction => %+v\n\n", config)
log.Debug().Msgf("[LocalAIEngine::GetModelPredictionFunction] called for configuration:\n%+v", config)
supportStreams := false
var predictOnce func(p Prompt) (string, error) = nil
inferenceModel, err := e.LoadModel(config)
if err != nil {
fmt.Printf("ERROR LOADING MODEL: %s\n", err.Error())
return nil, err
return nil, fmt.Errorf("error loading model: %w", err)
}
prompts, err := config.GetPrompts()
if err != nil {
fmt.Printf("ERROR GetPrompts: %s\n", err.Error())
return nil, err
return nil, fmt.Errorf("error calling GetPrompts(): %w", err)
}
switch localModel := inferenceModel.(type) {
case *llama.LLama:
fmt.Println("setting predictOnce for llama")
supportStreams = true
predictOnce = func(p Prompt) (string, error) {
@ -85,7 +83,6 @@ func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback
return str, er
}
case *gpt4all.Model:
fmt.Println("setting predictOnce for gpt4all")
supportStreams = true
predictOnce = func(p Prompt) (string, error) {
@ -111,13 +108,13 @@ func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback
return str, err
}
case *transformers.GPTJ:
fmt.Println("setting predictOnce for GPTJ")
supportStreams = false // EXP
predictOnce = func(p Prompt) (string, error) {
mappedPredictOptions := transformers.PredictOptions{}
mapstructure.Decode(config.ToPredictOptions(), &mappedPredictOptions)
// TODO Leave this for testing phase 1
fmt.Printf("MAPPED OPTIONS: %+v\n", mappedPredictOptions)
// str, err := localModel.PredictTEMP(
@ -131,7 +128,6 @@ func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback
}
if predictOnce == nil {
fmt.Printf("Failed to find a predictOnce for %T", inferenceModel)
return nil, fmt.Errorf("failed to find a predictOnce for %T", inferenceModel)
}
@ -160,21 +156,18 @@ func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback
n = 1
}
for p_i, prompt := range prompts {
for _, prompt := range prompts {
for n_i := 0; n_i < n; n_i++ {
res, err := predictOnce(prompt)
if err != nil {
fmt.Printf("ERROR DURING GetModelPredictionFunction -> PredictionFunction for %T with p_i: %d/n_i: %d\n%s", config, p_i, n_i, err.Error())
return nil, err
}
fmt.Printf("\n\n🤯 raw res: %s\n\n", res)
// TODO: this used to be a part of finetune. For.... questionable parameter reasons I've moved it up here. Revisit this if it's smelly in the future.
ccr, is_ccr := req.(CreateCompletionRequest)
if is_ccr {
if *ccr.Echo {
if ccr.Echo != nil && *ccr.Echo { // 🥲
res = prompt.AsString() + res
}
}
@ -184,6 +177,9 @@ func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback
if tokenCallback != nil && !supportStreams {
tokenCallback(res)
}
log.Debug().Msgf("[%s - %s] prediction: %s", r.Model, r.Endpoint, res)
results = append(results, res)
}
}

View file

@ -7,6 +7,7 @@ import (
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/mitchellh/mapstructure"
"github.com/rs/zerolog/log"
)
type LocalAIServer struct {
@ -26,11 +27,9 @@ func combineRequestAndConfig[RequestType any](configManager *ConfigManager, mode
config, exists := configManager.GetConfig(lookup)
if !exists {
return nil, fmt.Errorf("Config not found for %+v", lookup)
return nil, fmt.Errorf("config not found for %+v", lookup)
}
// fmt.Printf("Model: %s\nConfig: %+v\nrequestFromInput: %+v\n", model, config, requestFromInput)
request, ok := config.GetRequestDefaults().(RequestType)
if !ok {
@ -53,8 +52,6 @@ func combineRequestAndConfig[RequestType any](configManager *ConfigManager, mode
return nil, decodeErr
}
fmt.Printf("AFTER rD: %T\n%+v\n\n", request, request)
return &SpecificConfig[RequestType]{
ConfigStub: ConfigStub{
Registration: config.GetRegistration(),
@ -64,10 +61,6 @@ func combineRequestAndConfig[RequestType any](configManager *ConfigManager, mode
}, nil
}
// func (las *LocalAIServer) loadModel(configStub ConfigStub) {
// }
// CancelFineTune implements StrictServerInterface
func (*LocalAIServer) CancelFineTune(ctx context.Context, request CancelFineTuneRequestObject) (CancelFineTuneResponseObject, error) {
panic("unimplemented")
@ -79,51 +72,44 @@ func (las *LocalAIServer) CreateChatCompletion(ctx context.Context, request Crea
chatRequestConfig, err := combineRequestAndConfig(las.configManager, request.Body.Model, request.Body)
if err != nil {
fmt.Printf("CreateChatCompletion ERROR combining config and input!\n%s\n", err.Error())
return nil, err
return nil, fmt.Errorf("errpr during CreateChatCompletion, failed to combineRequestAndConfig: %w", err)
}
chatRequest := chatRequestConfig.RequestDefaults
fmt.Printf("\n===CreateChatCompletion===\n%+v\n", chatRequest)
fmt.Printf("\n\n!! TYPED CreateChatCompletion !!\ntemperature %f\n top_p %f \n %d\n", *chatRequest.Temperature, *chatRequest.TopP, *chatRequest.XLocalaiExtensions.TopK)
fmt.Printf("chatRequest: %+v\nlen(messages): %d", chatRequest, len(chatRequest.Messages))
for i, m := range chatRequest.Messages {
fmt.Printf("message #%d: %+v", i, m)
}
fmt.Println("Dodgy Stuff Below")
predict, err := las.engine.GetModelPredictionFunction(chatRequestConfig, nil)
if err != nil {
fmt.Printf("!!!!!!!!!! Error obtaining predict fn %s\n", err.Error())
return nil, err
return nil, fmt.Errorf("failed to GetModelPredictionFunction: %w", err)
}
fmt.Println("About to call predict()")
predictions, err := predict()
if err != nil {
fmt.Printf("!!!!!!!!!! Error INSIDE predict fn %s\n", err.Error())
return nil, err
return nil, fmt.Errorf("error during CreateChatCompletion calling model prediction function: %w", err)
}
resp := CreateChatCompletion200JSONResponse{}
// People who know golang better: is there a cleaner way to do this kind of nil-safe init?
var responseRole ChatCompletionResponseMessageRole = "asssistant" // Fallback on a reasonable guess
ext := chatRequestConfig.GetRequest().XLocalaiExtensions
if ext != nil {
extr := ext.Roles
if extr != nil {
if extr.Assistant != nil {
responseRole = ChatCompletionResponseMessageRole(*extr.Assistant) // Call for help here too - this really seems dirty. How should this be expressed?
}
}
}
for i, prediction := range predictions {
resp.Choices = append(resp.Choices, CreateChatCompletionResponseChoice{
Message: &ChatCompletionResponseMessage{
Content: prediction,
Role: "asssistant", // TODO FIX
Role: responseRole,
},
Index: &i,
})
}
return resp, nil
// panic("unimplemented")
}
// CreateCompletion implements StrictServerInterface
@ -134,40 +120,35 @@ func (las *LocalAIServer) CreateCompletion(ctx context.Context, request CreateCo
config, err := combineRequestAndConfig(las.configManager, modelName, request.Body)
if err != nil {
fmt.Printf("CreateCompletion ERROR combining config and input!\n%s\n", err.Error())
return nil, err
return nil, fmt.Errorf("[CreateCompletion] error in combineRequestAndConfig %w", err)
}
req := config.GetRequest()
predict, err := las.engine.GetModelPredictionFunction(config, nil)
if err != nil {
return nil, fmt.Errorf("failed to GetModelPredictionFunction: %w", err)
}
fmt.Printf("\n===CreateCompletion===\n%+v\n", req)
predictions, err := predict()
if err != nil {
return nil, fmt.Errorf("error during CreateChatCompletion calling model prediction function: %w", err)
}
log.Debug().Msgf("[CreateCompletion] predict() completed, %d", len(predictions))
var choices []CreateCompletionResponseChoice
prompts, err := req.Prompt.AsCreateCompletionRequestPrompt1()
if err != nil {
tokenPrompt, err := req.Prompt.AsCreateCompletionRequestPrompt2()
if err == nil {
fmt.Printf("Scary token array length %d\n", len(tokenPrompt))
panic("Token array is scary and phase 2")
}
singlePrompt, err := req.Prompt.AsCreateCompletionRequestPrompt0()
if err != nil {
return nil, err
}
prompts = []string{singlePrompt}
}
// model := las.loader.LoadModel(modelName, )
for _, v := range prompts {
fmt.Printf("[prompt] %s\n", v)
for i, prediction := range predictions {
log.Debug().Msgf("[CreateCompletion]%d: %s", i, prediction)
choices = append(choices, CreateCompletionResponseChoice{
Index: &i,
Text: &prediction,
// TODO more?
})
}
return CreateCompletion200JSONResponse{
Model: modelName,
Choices: choices,
// Usage need to be fixed in yaml
}, nil
}