From 0b910e059504d20ca2bae3450abc117b23486a9e Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Thu, 8 Jun 2023 03:11:52 -0400 Subject: [PATCH] stash progress for the night. loading GPTJ fails, hacked in PredictTEMP as kludge --- Makefile | 6 +- apiv2/config.go | 252 ++++++++++------------ apiv2/config_manager.go | 139 ++++++++++++ apiv2/engine.go | 205 ++++++++++++++++++ apiv2/localai.go | 29 ++- apiv2/localai_nethttp.go | 2 + config/gpt-3.5-turbo-chat.yaml | 2 + config/gpt-3.5-turbo-completion.yaml | 2 + go.mod | 4 +- go.sum | 12 +- openai-openapi/chi-interface.tmpl | 19 -- openai-openapi/config.yaml | 1 - openai-openapi/localai_model_patches.yaml | 8 + openai-openapi/test_segment.yml | 30 --- 14 files changed, 518 insertions(+), 193 deletions(-) create mode 100644 apiv2/config_manager.go create mode 100644 apiv2/engine.go delete mode 100644 openai-openapi/chi-interface.tmpl delete mode 100644 openai-openapi/test_segment.yml diff --git a/Makefile b/Makefile index d2b419e2..ddecf145 100644 --- a/Makefile +++ b/Makefile @@ -78,9 +78,9 @@ openai-openapi/transformed: openai-openapi/spec apiv2/localai.gen.go: prepare-sources echo "go mod download done, running YTT" - cp ./openai-openapi/transformed/localai.yaml ./openai-openapi/transformed/localai.orig.yaml - $(GOCMD) run github.com/vmware-tanzu/carvel-ytt/cmd/ytt --output-files ./openai-openapi/transformed -f ./openai-openapi/transformed/localai.yaml -f ./openai-openapi/localai_model_patches.yaml - # -f ./openai-openapi/remove_depreciated_openapi.yaml + # cp ./openai-openapi/transformed/localai.yaml ./openai-openapi/transformed/localai.orig.yaml + $(GOCMD) run github.com/vmware-tanzu/carvel-ytt/cmd/ytt --output-files ./openai-openapi/transformed -f ./openai-openapi/transformed/localai.yaml -f ./openai-openapi/localai_model_patches.yaml + cp ./openai-openapi/transformed/localai.yaml ./openai-openapi/transformed/localai.mid.yaml echo "YTT Done, generating code..." $(GOCMD) run github.com/deepmap/oapi-codegen/cmd/oapi-codegen --config=./openai-openapi/config.yaml ./openai-openapi/transformed/localai.yaml diff --git a/apiv2/config.go b/apiv2/config.go index e40957ec..844ceac5 100644 --- a/apiv2/config.go +++ b/apiv2/config.go @@ -2,14 +2,8 @@ package apiv2 import ( "fmt" - "os" - "path/filepath" - "strings" - "sync" llama "github.com/go-skynet/go-llama.cpp" - "github.com/mitchellh/mapstructure" - "gopkg.in/yaml.v2" ) type ConfigRegistration struct { @@ -38,6 +32,33 @@ type Config interface { GetRequestDefaults() interface{} GetLocalSettings() ConfigLocalSettings GetRegistration() ConfigRegistration + + // TODO: Test these. I am not sure. + ToPredictOptions() []llama.PredictOption + ToModelOptions() []llama.ModelOption + + // TODO also dubious? Technically some requests lack prompts, but it's pretty general and may just be worth sticking here. + GetPrompts() ([]Prompt, error) + GetN() (int, error) +} + +type Prompt interface { + AsString() string //, bool) + AsTokens() []int +} + +// How do Go people name these? Should I just ditch the interface entirely? +type PromptImpl struct { + sVal string + tVal []int +} + +func (p PromptImpl) AsString() string { + return p.sVal +} + +func (p PromptImpl) AsTokens() []int { + return p.tVal } func (cs ConfigStub) GetRequestDefaults() interface{} { @@ -52,6 +73,23 @@ func (cs ConfigStub) GetRegistration() ConfigRegistration { return cs.Registration } +func (cs ConfigStub) ToPredictOptions() []llama.PredictOption { + return []llama.PredictOption{} +} + +func (cs ConfigStub) ToModelOptions() []llama.ModelOption { + return []llama.ModelOption{} +} + +func (cs ConfigStub) GetPrompts() ([]Prompt, error) { + // Does this make sense? + return nil, fmt.Errorf("unsupported operation GetPrompts for %T", cs) +} + +func (cs ConfigStub) GetN() (int, error) { + return 0, fmt.Errorf("unsupported operation GetN for %T", cs) +} + func (sc SpecificConfig[RequestModel]) GetRequestDefaults() interface{} { return sc.RequestDefaults } @@ -68,133 +106,6 @@ func (sc SpecificConfig[RequestModel]) GetRegistration() ConfigRegistration { return sc.Registration } -type ConfigManager struct { - configs map[ConfigRegistration]Config - sync.Mutex -} - -func NewConfigManager() *ConfigManager { - return &ConfigManager{ - configs: make(map[ConfigRegistration]Config), - } -} - -// Private helper method doesn't enforce the mutex. This is because loading at the directory level keeps the lock up the whole time, and I like that. -func (cm *ConfigManager) loadConfigFile(path string) (*Config, error) { - fmt.Printf("INTERNAL loadConfigFile for %s\n", path) - stub := ConfigStub{} - f, err := os.ReadFile(path) - if err != nil { - return nil, fmt.Errorf("cannot read config file: %w", err) - } - if err := yaml.Unmarshal(f, &stub); err != nil { - return nil, fmt.Errorf("cannot unmarshal config file: %w", err) - } - fmt.Printf("RAW STUB: %+v\n", stub) - - endpoint := stub.Registration.Endpoint - - // EndpointConfigMap is generated over in localai.gen.go - // It's a map that translates a string endpoint function name to an empty SpecificConfig[T], with the type parameter for that request. - if structType, ok := EndpointConfigMap[endpoint]; ok { - fmt.Printf("~~ EndpointConfigMap[%s]: %+v\n", endpoint, structType) - tmpUnmarshal := map[string]interface{}{} - if err := yaml.Unmarshal(f, &tmpUnmarshal); err != nil { - if e, ok := err.(*yaml.TypeError); ok { - fmt.Println("\n!!!!!Type error:", e) - } - return nil, fmt.Errorf("cannot unmarshal config file for %s: %w", endpoint, err) - } - fmt.Printf("$$$ tmpUnmarshal: %+v\n", tmpUnmarshal) - mapstructure.Decode(tmpUnmarshal, &structType) - - fmt.Printf("AFTER UNMARSHAL %T\n%+v\n=======\n", structType, structType) - - // rawConfig.RequestDefaults = structType.GetRequestDefaults() - - cm.configs[structType.GetRegistration()] = structType - // fmt.Printf("\n\n\n!!!!!HIT BOTTOM!!!!!!") - return &structType, nil - // fmt.Printf("\n\n\n!!!!!\n\n\nBIG MISS!\n\n%+v\n\n%T\n%T=====", specificStruct, specificStruct, structType) - } - - // for i, ts := range EndpointToRequestBodyMap { - // fmt.Printf("%s: %+v\n", i, ts) - // } - - return nil, fmt.Errorf("failed to parse config for endpoint %s", endpoint) -} - -func (cm *ConfigManager) LoadConfigFile(path string) (*Config, error) { - fmt.Printf("LoadConfigFile TOP for %s", path) - - cm.Lock() - fmt.Println("cm.Lock done") - - defer cm.Unlock() - fmt.Println("cm.Unlock done") - - return cm.loadConfigFile(path) -} - -func (cm *ConfigManager) LoadConfigDirectory(path string) ([]ConfigRegistration, error) { - fmt.Printf("LoadConfigDirectory TOP for %s\n", path) - cm.Lock() - defer cm.Unlock() - files, err := os.ReadDir(path) - if err != nil { - return []ConfigRegistration{}, err - } - fmt.Printf("os.ReadDir done, found %d files\n", len(files)) - - for _, file := range files { - // Skip anything that isn't yaml - if !strings.Contains(file.Name(), ".yaml") { - continue - } - _, err := cm.loadConfigFile(filepath.Join(path, file.Name())) - if err != nil { - return []ConfigRegistration{}, err - } - } - - fmt.Printf("LoadConfigDirectory DONE %d", len(cm.configs)) - - return cm.listConfigs(), nil -} - -func (cm *ConfigManager) GetConfig(r ConfigRegistration) (Config, bool) { - cm.Lock() - defer cm.Unlock() - v, exists := cm.configs[r] - return v, exists -} - -// This is a convience function for endpoint functions to use. -// The advantage is it avoids errors in the endpoint string -// Not a clue what the performance cost of this is. -func (cm *ConfigManager) GetConfigForThisEndpoint(m string) (Config, bool) { - endpoint := printCurrentFunctionName(2) - return cm.GetConfig(ConfigRegistration{ - Model: m, - Endpoint: endpoint, - }) -} - -func (cm *ConfigManager) listConfigs() []ConfigRegistration { - var res []ConfigRegistration - for k := range cm.configs { - res = append(res, k) - } - return res -} - -func (cm *ConfigManager) ListConfigs() []ConfigRegistration { - cm.Lock() - defer cm.Unlock() - return cm.listConfigs() -} - // These functions I'm a bit dubious about. I think there's a better refactoring down in pkg/model // But to get a minimal test up and running, here we go! // TODO: non text completion @@ -328,3 +239,78 @@ func (sc SpecificConfig[RequestModel]) ToPredictOptions() []llama.PredictOption return llamaOpts } + +// It's unclear if this code belongs here or somewhere else, but I'm jamming it here for now. +func (sc SpecificConfig[RequestModel]) GetPrompts() ([]Prompt, error) { + prompts := []Prompt{} + + switch req := sc.GetRequestDefaults().(type) { + case CreateCompletionRequest: + p0, err := req.Prompt.AsCreateCompletionRequestPrompt0() + if err == nil { + p := PromptImpl{sVal: p0} + return []Prompt{p}, nil + } + p1, err := req.Prompt.AsCreateCompletionRequestPrompt1() + if err == nil { + for _, m := range p1 { + prompts = append(prompts, PromptImpl{sVal: m}) + } + return prompts, nil + } + p2, err := req.Prompt.AsCreateCompletionRequestPrompt2() + if err == nil { + p := PromptImpl{tVal: p2} + return []Prompt{p}, nil + } + p3, err := req.Prompt.AsCreateCompletionRequestPrompt3() + if err == nil { + for _, t := range p3 { + prompts = append(prompts, PromptImpl{tVal: t}) + } + return prompts, nil + } + case CreateChatCompletionRequest: + + for _, message := range req.Messages { + + prompts = append(prompts, PromptImpl{sVal: message.Content}) + + // TODO Deal with ROLES + // var content string + // r := req.Roles[message.Role] + // if r != "" { + // content = fmt.Sprint(r, " ", message.Content) + // } else { + // content = message.Content + // } + + // if content != "" { + // prompt = prompt + content + // } + + } + return prompts, nil + } + + return nil, fmt.Errorf("string prompt not found for %T", sc.GetRequestDefaults()) +} + +func (sc SpecificConfig[RequestModel]) GetN() (int, error) { + switch req := sc.GetRequestDefaults().(type) { + + case CreateChatCompletionRequest: + case CreateCompletionRequest: + case CreateEditRequest: + case CreateImageRequest: + // TODO I AM SORRY FOR THIS DIRTY HACK. + // YTT is currently mangling the n property and renaming it to false. + // This needs to be fixed before merging. However for testing..... + return *req.False, nil + } + + return 0, fmt.Errorf("unsupported operation GetN for %T", sc) +} + +// TODO: Not even using this, but illustration of difficulty: should this be integrated to make GetPrompts(), returning an interface of {Tokens []int, String string} +// func (sc SpecificConfig[RequestModel]) GetTokenPrompts() ([]int, error) {} diff --git a/apiv2/config_manager.go b/apiv2/config_manager.go new file mode 100644 index 00000000..f1629dc2 --- /dev/null +++ b/apiv2/config_manager.go @@ -0,0 +1,139 @@ +package apiv2 + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/mitchellh/mapstructure" + "gopkg.in/yaml.v2" +) + +type ConfigManager struct { + configs map[ConfigRegistration]Config + sync.Mutex +} + +func NewConfigManager() *ConfigManager { + return &ConfigManager{ + configs: make(map[ConfigRegistration]Config), + } +} + +// Private helper method doesn't enforce the mutex. This is because loading at the directory level keeps the lock up the whole time, and I like that. +func (cm *ConfigManager) loadConfigFile(path string) (*Config, error) { + fmt.Printf("INTERNAL loadConfigFile for %s\n", path) + stub := ConfigStub{} + f, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("cannot read config file: %w", err) + } + if err := yaml.Unmarshal(f, &stub); err != nil { + return nil, fmt.Errorf("cannot unmarshal config file: %w", err) + } + fmt.Printf("RAW STUB: %+v\n", stub) + + endpoint := stub.Registration.Endpoint + + // EndpointConfigMap is generated over in localai.gen.go + // It's a map that translates a string endpoint function name to an empty SpecificConfig[T], with the type parameter for that request. + if structType, ok := EndpointConfigMap[endpoint]; ok { + fmt.Printf("~~ EndpointConfigMap[%s]: %+v\n", endpoint, structType) + tmpUnmarshal := map[string]interface{}{} + if err := yaml.Unmarshal(f, &tmpUnmarshal); err != nil { + if e, ok := err.(*yaml.TypeError); ok { + fmt.Println("\n!!!!!Type error:", e) + } + return nil, fmt.Errorf("cannot unmarshal config file for %s: %w", endpoint, err) + } + fmt.Printf("$$$ tmpUnmarshal: %+v\n", tmpUnmarshal) + mapstructure.Decode(tmpUnmarshal, &structType) + + fmt.Printf("AFTER UNMARSHAL %T\n%+v\n=======\n", structType, structType) + + // rawConfig.RequestDefaults = structType.GetRequestDefaults() + + cm.configs[structType.GetRegistration()] = structType + // fmt.Printf("\n\n\n!!!!!HIT BOTTOM!!!!!!") + return &structType, nil + // fmt.Printf("\n\n\n!!!!!\n\n\nBIG MISS!\n\n%+v\n\n%T\n%T=====", specificStruct, specificStruct, structType) + } + + // for i, ts := range EndpointToRequestBodyMap { + // fmt.Printf("%s: %+v\n", i, ts) + // } + + return nil, fmt.Errorf("failed to parse config for endpoint %s", endpoint) +} + +func (cm *ConfigManager) LoadConfigFile(path string) (*Config, error) { + fmt.Printf("LoadConfigFile TOP for %s", path) + + cm.Lock() + fmt.Println("cm.Lock done") + + defer cm.Unlock() + fmt.Println("cm.Unlock done") + + return cm.loadConfigFile(path) +} + +func (cm *ConfigManager) LoadConfigDirectory(path string) ([]ConfigRegistration, error) { + fmt.Printf("LoadConfigDirectory TOP for %s\n", path) + cm.Lock() + defer cm.Unlock() + files, err := os.ReadDir(path) + if err != nil { + return []ConfigRegistration{}, err + } + fmt.Printf("os.ReadDir done, found %d files\n", len(files)) + + for _, file := range files { + // Skip anything that isn't yaml + if !strings.Contains(file.Name(), ".yaml") { + continue + } + _, err := cm.loadConfigFile(filepath.Join(path, file.Name())) + if err != nil { + return []ConfigRegistration{}, err + } + } + + fmt.Printf("LoadConfigDirectory DONE %d", len(cm.configs)) + + return cm.listConfigs(), nil +} + +func (cm *ConfigManager) GetConfig(r ConfigRegistration) (Config, bool) { + cm.Lock() + defer cm.Unlock() + v, exists := cm.configs[r] + return v, exists +} + +// This is a convience function for endpoint functions to use. +// The advantage is it avoids errors in the endpoint string +// Not a clue what the performance cost of this is. +func (cm *ConfigManager) GetConfigForThisEndpoint(m string) (Config, bool) { + endpoint := printCurrentFunctionName(2) + return cm.GetConfig(ConfigRegistration{ + Model: m, + Endpoint: endpoint, + }) +} + +func (cm *ConfigManager) listConfigs() []ConfigRegistration { + var res []ConfigRegistration + for k := range cm.configs { + res = append(res, k) + } + return res +} + +func (cm *ConfigManager) ListConfigs() []ConfigRegistration { + cm.Lock() + defer cm.Unlock() + return cm.listConfigs() +} diff --git a/apiv2/engine.go b/apiv2/engine.go new file mode 100644 index 00000000..326004df --- /dev/null +++ b/apiv2/engine.go @@ -0,0 +1,205 @@ +package apiv2 + +import ( + "fmt" + "regexp" + "strings" + "sync" + + model "github.com/go-skynet/LocalAI/pkg/model" + transformers "github.com/go-skynet/go-ggml-transformers.cpp" + llama "github.com/go-skynet/go-llama.cpp" + "github.com/mitchellh/mapstructure" + gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang" +) + +type LocalAIEngine struct { + loader *model.ModelLoader + mutexMapMutex sync.Mutex + mutexes map[ConfigRegistration]*sync.Mutex + cutstrings map[ConfigRegistration]map[string]*regexp.Regexp + cutstringMutex sync.Mutex +} + +func NewLocalAIEngine(loader *model.ModelLoader) LocalAIEngine { + return LocalAIEngine{ + loader: loader, + mutexes: make(map[ConfigRegistration]*sync.Mutex), + cutstrings: make(map[ConfigRegistration]map[string]*regexp.Regexp), + } +} + +// TODO model interface? Currently scheduled for phase 3 lol +func (e *LocalAIEngine) LoadModel(config Config) (interface{}, error) { + ls := config.GetLocalSettings() + fmt.Printf("LocalAIEngine.LoadModel => %+v\n\n", config) + return e.loader.BackendLoader(ls.Backend, ls.ModelPath, config.ToModelOptions(), uint32(ls.Threads)) +} + +func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback func(string) bool) (func() ([]string, error), error) { + + fmt.Printf("LocalAIEngine.GetModelPredictionFunction => %+v\n\n", config) + + supportStreams := false + var predictOnce func(p Prompt) (string, error) = nil + + inferenceModel, err := e.LoadModel(config) + if err != nil { + fmt.Printf("ERROR LOADING MODEL: %s\n", err.Error()) + return nil, err + } + + prompts, err := config.GetPrompts() + if err != nil { + fmt.Printf("ERROR GetPrompts: %s\n", err.Error()) + return nil, err + } + + switch localModel := inferenceModel.(type) { + case *llama.LLama: + fmt.Println("setting predictOnce for llama") + supportStreams = true + predictOnce = func(p Prompt) (string, error) { + + if tokenCallback != nil { + localModel.SetTokenCallback(tokenCallback) + } + + // TODO: AsTokens? I think that would need to be exposed from llama and the others. + str, er := localModel.Predict( + p.AsString(), + config.ToPredictOptions()..., + ) + // Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels) + // For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}} + // after a stream event has occurred + localModel.SetTokenCallback(nil) + return str, er + } + case *gpt4all.Model: + fmt.Println("setting predictOnce for gpt4all") + supportStreams = true + predictOnce = func(p Prompt) (string, error) { + if tokenCallback != nil { + localModel.SetTokenCallback(tokenCallback) + } + + mappedPredictOptions := gpt4all.PredictOptions{} + + mapstructure.Decode(config.ToPredictOptions(), &mappedPredictOptions) + + str, err := localModel.PredictTEMP( + p.AsString(), + mappedPredictOptions, + ) + // Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels) + // For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}} + // after a stream event has occurred + localModel.SetTokenCallback(nil) + return str, err + } + case *transformers.GPTJ: + fmt.Println("setting predictOnce for GPTJ") + supportStreams = false // EXP + predictOnce = func(p Prompt) (string, error) { + mappedPredictOptions := transformers.PredictOptions{} + + mapstructure.Decode(config.ToPredictOptions(), &mappedPredictOptions) + + fmt.Printf("MAPPED OPTIONS: %+v\n", mappedPredictOptions) + + str, err := localModel.PredictTEMP( + p.AsString(), + mappedPredictOptions, + ) + return str, err + } + } + + if predictOnce == nil { + fmt.Printf("Failed to find a predictOnce for %T", inferenceModel) + return nil, fmt.Errorf("failed to find a predictOnce for %T", inferenceModel) + } + + req := config.GetRequestDefaults() + + return func() ([]string, error) { + // This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784 + e.mutexMapMutex.Lock() + r := config.GetRegistration() + l, ok := e.mutexes[r] + if !ok { + m := &sync.Mutex{} + e.mutexes[r] = m + l = m + } + e.mutexMapMutex.Unlock() + l.Lock() + defer l.Unlock() + + results := []string{} + + n, err := config.GetN() + + if err != nil { + // TODO live to regret this, but for now... + n = 1 + } + + for p_i, prompt := range prompts { + for n_i := 0; n_i < n; n_i++ { + res, err := predictOnce(prompt) + + // TODO: this used to be a part of finetune. For.... questionable parameter reasons I've moved it up here. Revisit this if it's smelly in the future. + ccr, is_ccr := req.(CreateCompletionRequest) + if is_ccr { + if *ccr.Echo { + res = prompt.AsString() + res + } + } + + res = e.Finetune(config, res) + + if err != nil { + fmt.Printf("ERROR DURING GetModelPredictionFunction -> PredictionFunction for %T with p_i: %d/n_i: %d\n%s", config, p_i, n_i, err.Error()) + return nil, err + } + if tokenCallback != nil && !supportStreams { + tokenCallback(res) + } + results = append(results, res) + } + } + + return results, nil + + }, nil +} + +func (e *LocalAIEngine) Finetune(config Config, prediction string) string { + + reg := config.GetRegistration() + switch req := config.GetRequestDefaults().(type) { + case *CreateChatCompletionRequest: + case *CreateCompletionRequest: + ext := req.XLocalaiExtensions + if ext != nil { + for _, c := range *ext.Cutstrings { + e.cutstringMutex.Lock() + regex, ok := e.cutstrings[reg][c] + if !ok { + e.cutstrings[reg][c] = regexp.MustCompile(c) + regex = e.cutstrings[reg][c] + } + e.cutstringMutex.Unlock() + prediction = regex.ReplaceAllString(prediction, "") + } + + for _, c := range *ext.Trimstrings { + prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c)) + } + } + } + + return prediction +} diff --git a/apiv2/localai.go b/apiv2/localai.go index 29c9131e..395f401c 100644 --- a/apiv2/localai.go +++ b/apiv2/localai.go @@ -12,6 +12,7 @@ import ( type LocalAIServer struct { configManager *ConfigManager loader *model.ModelLoader + engine *LocalAIEngine } func combineRequestAndConfig[RequestType any](configManager *ConfigManager, model string, requestFromInput *RequestType) (*SpecificConfig[RequestType], error) { @@ -93,7 +94,33 @@ func (las *LocalAIServer) CreateChatCompletion(ctx context.Context, request Crea fmt.Printf("message #%d: %+v", i, m) } - return CreateChatCompletion200JSONResponse{}, nil + fmt.Println("Dodgy Stuff Below") + + predict, err := las.engine.GetModelPredictionFunction(chatRequestConfig, nil) + if err != nil { + fmt.Printf("!!!!!!!!!! Error obtaining predict fn %s\n", err.Error()) + return nil, err + } + + predictions, err := predict() + if err != nil { + fmt.Printf("!!!!!!!!!! Error INSIDE predict fn %s\n", err.Error()) + return nil, err + } + + resp := CreateChatCompletion200JSONResponse{} + + for i, prediction := range predictions { + resp.Choices = append(resp.Choices, CreateChatCompletionResponseChoice{ + Message: &ChatCompletionResponseMessage{ + Content: prediction, + Role: "asssistant", // TODO FIX + }, + Index: &i, + }) + } + + return resp, nil // panic("unimplemented") } diff --git a/apiv2/localai_nethttp.go b/apiv2/localai_nethttp.go index ca571f50..1eae95cf 100644 --- a/apiv2/localai_nethttp.go +++ b/apiv2/localai_nethttp.go @@ -7,9 +7,11 @@ import ( ) func NewLocalAINetHTTPServer(configManager *ConfigManager, loader *model.ModelLoader, address string) *LocalAIServer { + engine := NewLocalAIEngine(loader) localAI := LocalAIServer{ configManager: configManager, loader: loader, + engine: &engine, } var middlewares []StrictMiddlewareFunc diff --git a/config/gpt-3.5-turbo-chat.yaml b/config/gpt-3.5-turbo-chat.yaml index ee71495d..b92f3c7d 100644 --- a/config/gpt-3.5-turbo-chat.yaml +++ b/config/gpt-3.5-turbo-chat.yaml @@ -4,6 +4,8 @@ registration: local_paths: model: ggml-gpt4all-j template: chat-gpt4all + backend: gptj + threads: 4 request_defaults: top_p: 0.7 temperature: 0.2 diff --git a/config/gpt-3.5-turbo-completion.yaml b/config/gpt-3.5-turbo-completion.yaml index f890d09e..21d01dc0 100644 --- a/config/gpt-3.5-turbo-completion.yaml +++ b/config/gpt-3.5-turbo-completion.yaml @@ -4,6 +4,8 @@ registration: local_paths: model: ggml-gpt4all-j template: chat-gpt4all + backend: gptj + threads: 4 request_defaults: top_p: 0.7 temperature: 0.2 diff --git a/go.mod b/go.mod index 501611db..00cbb854 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230605194130-266f13aee9d8 github.com/onsi/ginkgo/v2 v2.9.7 - github.com/onsi/gomega v1.27.7 + github.com/onsi/gomega v1.27.8 github.com/otiai10/openaigo v1.1.0 github.com/rs/zerolog v1.29.1 github.com/sashabaranov/go-openai v1.10.0 @@ -28,7 +28,7 @@ require ( github.com/tmc/langchaingo v0.0.0-20230605114752-4afed6d7be4a github.com/urfave/cli/v2 v2.25.5 github.com/valyala/fasthttp v1.47.0 - github.com/vmware-tanzu/carvel-ytt v0.45.1 + github.com/vmware-tanzu/carvel-ytt v0.45.2 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go.sum b/go.sum index 40b8f8a6..32084b0d 100644 --- a/go.sum +++ b/go.sum @@ -38,12 +38,12 @@ github.com/donomii/go-rwkv.cpp v0.0.0-20230531084548-c43cdf5fc5bf h1:upCz8WYdzMe github.com/donomii/go-rwkv.cpp v0.0.0-20230531084548-c43cdf5fc5bf/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= github.com/donomii/go-rwkv.cpp v0.0.0-20230601111443-3b28b09469fc h1:RCGGh/zw+K09sjCIYHUV7lFenxONml+LS02RdN+AkwI= github.com/donomii/go-rwkv.cpp v0.0.0-20230601111443-3b28b09469fc/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= +github.com/donomii/go-rwkv.cpp v0.0.0-20230604202420-1e18b2490e7e h1:Qne1BO0ltmyJcsizxZ61SV+uwuD1F8NztsfBDHOd0LI= +github.com/donomii/go-rwkv.cpp v0.0.0-20230604202420-1e18b2490e7e/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= github.com/getkin/kin-openapi v0.117.0 h1:QT2DyGujAL09F4NrKDHJGsUoIprlIcFVHWDVDcUFE8A= github.com/getkin/kin-openapi v0.117.0/go.mod h1:l5e9PaFUo9fyLJCPGQeXI2ML8c3P8BHOEV2VaAVf/pc= -github.com/donomii/go-rwkv.cpp v0.0.0-20230604202420-1e18b2490e7e h1:Qne1BO0ltmyJcsizxZ61SV+uwuD1F8NztsfBDHOd0LI= -github.com/donomii/go-rwkv.cpp v0.0.0-20230604202420-1e18b2490e7e/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881 h1:dafqVivljYk51VLFnnpTXJnfWDe637EobWZ1l8PyEf8= github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e h1:4PMorQuoUGAXmIzCtnNOHaasyLokXdgd8jUWwsraFTo= @@ -60,14 +60,14 @@ github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601065548-3f7436e8a096 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601065548-3f7436e8a096/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601124500-5b9e59bc07dd h1:os3FeYEIB4j5m5QlbFC3HkVcaAmLxNXz48uIfQAexm0= github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601124500-5b9e59bc07dd/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= +github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230606002726-57543c169e27 h1:boeMTUUBtnLU8JElZJHXrsUzROJar9/t6vGOFjkrhhI= +github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230606002726-57543c169e27/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= github.com/gin-gonic/gin v1.9.0 h1:OjyFBKICoexlu99ctXNR2gg+c5pKrKMuyjgARg9qeY8= github.com/gin-gonic/gin v1.9.0/go.mod h1:W1Me9+hsUSyj3CePGrd1/QrKJMSJ1Tu/0hFEH89961k= github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= -github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230606002726-57543c169e27 h1:boeMTUUBtnLU8JElZJHXrsUzROJar9/t6vGOFjkrhhI= -github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230606002726-57543c169e27/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4= github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs= github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA= @@ -280,6 +280,7 @@ github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss= github.com/onsi/ginkgo/v2 v2.9.7/go.mod h1:cxrmXWykAwTwhQsJOPfdIDiJ+l2RYq7U8hFU+M/1uw0= github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU= github.com/onsi/gomega v1.27.7/go.mod h1:1p8OOlwo2iUUDsHnOrjE5UKYJ+e3W8eQ3qSlRahPmr4= +github.com/onsi/gomega v1.27.8/go.mod h1:2J8vzI/s+2shY9XHRApDkdgPo1TKT7P2u6fXeJKFnNQ= github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks= github.com/otiai10/mint v1.5.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM= github.com/otiai10/openaigo v1.1.0 h1:zRvGBqZUW5PCMgdkJNsPVTBd8tOLCMTipXE5wD2pdTg= @@ -330,6 +331,7 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY= github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/swaggo/swag v1.16.1/go.mod h1:9/LMvHycG3NFHfR6LwvikHv5iFvmPADQ359cKikGxto= github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw= github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0= @@ -359,6 +361,8 @@ github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVS github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= github.com/vmware-tanzu/carvel-ytt v0.45.1 h1:zjiOnV7WiKJbkLHkJCRxlmABOMIL4WhKKyahfCzFoIk= github.com/vmware-tanzu/carvel-ytt v0.45.1/go.mod h1:+r+ZVZLsETAYlRsgINztFdUdUufj2OwrTXCfOzYB4fY= +github.com/vmware-tanzu/carvel-ytt v0.45.2 h1:0+aECp3BcMXkD8L/VXVn4KOh1jD3COAt3DOXZ76iZu0= +github.com/vmware-tanzu/carvel-ytt v0.45.2/go.mod h1:oHqFBnn/JvqaUjcQo9T/a/WPUP1ituKjUpFPH+BTzfc= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= diff --git a/openai-openapi/chi-interface.tmpl b/openai-openapi/chi-interface.tmpl deleted file mode 100644 index 5fa41189..00000000 --- a/openai-openapi/chi-interface.tmpl +++ /dev/null @@ -1,19 +0,0 @@ -// NOT ACTUALLY USING THIS CURRENTLY??? - -// ServerInterface represents all server handlers. -type ServerInterface interface { -{{range .}}{{.SummaryAsComment }} -// ({{.Method}} {{.Path}}) -{{.OperationId}}(w http.ResponseWriter, r *http.Request{{genParamArgs .PathParams}}{{if .RequiresParamObject}}, params {{.OperationId}}Params{{end}}) -{{end}} -} - -// TypedServerInterface is used to give each endpoint a fully typed method signature for cases where we're able to route automatically -type TypedServerInterface interface { -{{range .}}{{.SummaryAsComment }} -// ({{.Method}} {{.Path}}) -{{$reqBody := genDefaultRequestBodyType . -}} -{{- if ne $reqBody "" }}{{$reqBody = printf ", body %s" $reqBody}}{{end -}} -{{.OperationId}}(w http.ResponseWriter{{genParamArgs .PathParams}}{{if .RequiresParamObject}}, params {{.OperationId}}Params{{end}}{{$reqBody}}) -{{end}} -} \ No newline at end of file diff --git a/openai-openapi/config.yaml b/openai-openapi/config.yaml index 2547f33a..07be6edf 100644 --- a/openai-openapi/config.yaml +++ b/openai-openapi/config.yaml @@ -11,7 +11,6 @@ output-options: - mapstructure user-templates: endpoint-body-mapping.tmpl: ./openai-openapi/endpoint-body-mapping.tmpl -# chi/chi-interface.tmpl: ./openai-openapi/chi-interface.tmpl # union.tmpl: "// SKIP" # union-and-additional-properties.tmpl: "// SKIP" # additional-properties.tmpl: "// SKIP" \ No newline at end of file diff --git a/openai-openapi/localai_model_patches.yaml b/openai-openapi/localai_model_patches.yaml index 6d8771b2..abe2f8ae 100644 --- a/openai-openapi/localai_model_patches.yaml +++ b/openai-openapi/localai_model_patches.yaml @@ -54,6 +54,14 @@ components: type: number nullable: true x-go-type: float64 + cutstrings: + type: array + items: + type: string + trimstrings: + type: array + items: + type: string #@overlay/match missing_ok=True LocalAIImageRequestExtension: allOf: diff --git a/openai-openapi/test_segment.yml b/openai-openapi/test_segment.yml deleted file mode 100644 index 20d98a2f..00000000 --- a/openai-openapi/test_segment.yml +++ /dev/null @@ -1,30 +0,0 @@ -#! This file is just for my reference during development and will be removed. -components: - schemas: - CreateChatCompletionRequest: - type: object - properties: - model: - description: ID of the model to use. Currently, only `gpt-3.5-turbo` and `gpt-3.5-turbo-0301` are supported. - type: string - messages: - description: The messages to generate chat completions for, in the [chat format](/docs/guides/chat/introduction). - type: array - minItems: 1 - items: - $ref: '#/components/schemas/ChatCompletionRequestMessage' - temperature: - type: number - minimum: 0 - maximum: 2 - default: 1 - example: 1 - nullable: true - description: *completions_temperature_description - CreateImageRequest: - type: object - properties: - prompt: - description: A text description of the desired image(s). The maximum length is 1000 characters. - type: string - example: "A cute baby sea otter" \ No newline at end of file