stash progress for the night. loading GPTJ fails, hacked in PredictTEMP as kludge

This commit is contained in:
Dave Lee 2023-06-08 03:11:52 -04:00
parent 8fc4b6cded
commit 0b910e0595
14 changed files with 518 additions and 193 deletions

View file

@ -2,14 +2,8 @@ package apiv2
import (
"fmt"
"os"
"path/filepath"
"strings"
"sync"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/mitchellh/mapstructure"
"gopkg.in/yaml.v2"
)
type ConfigRegistration struct {
@ -38,6 +32,33 @@ type Config interface {
GetRequestDefaults() interface{}
GetLocalSettings() ConfigLocalSettings
GetRegistration() ConfigRegistration
// TODO: Test these. I am not sure.
ToPredictOptions() []llama.PredictOption
ToModelOptions() []llama.ModelOption
// TODO also dubious? Technically some requests lack prompts, but it's pretty general and may just be worth sticking here.
GetPrompts() ([]Prompt, error)
GetN() (int, error)
}
type Prompt interface {
AsString() string //, bool)
AsTokens() []int
}
// How do Go people name these? Should I just ditch the interface entirely?
type PromptImpl struct {
sVal string
tVal []int
}
func (p PromptImpl) AsString() string {
return p.sVal
}
func (p PromptImpl) AsTokens() []int {
return p.tVal
}
func (cs ConfigStub) GetRequestDefaults() interface{} {
@ -52,6 +73,23 @@ func (cs ConfigStub) GetRegistration() ConfigRegistration {
return cs.Registration
}
func (cs ConfigStub) ToPredictOptions() []llama.PredictOption {
return []llama.PredictOption{}
}
func (cs ConfigStub) ToModelOptions() []llama.ModelOption {
return []llama.ModelOption{}
}
func (cs ConfigStub) GetPrompts() ([]Prompt, error) {
// Does this make sense?
return nil, fmt.Errorf("unsupported operation GetPrompts for %T", cs)
}
func (cs ConfigStub) GetN() (int, error) {
return 0, fmt.Errorf("unsupported operation GetN for %T", cs)
}
func (sc SpecificConfig[RequestModel]) GetRequestDefaults() interface{} {
return sc.RequestDefaults
}
@ -68,133 +106,6 @@ func (sc SpecificConfig[RequestModel]) GetRegistration() ConfigRegistration {
return sc.Registration
}
type ConfigManager struct {
configs map[ConfigRegistration]Config
sync.Mutex
}
func NewConfigManager() *ConfigManager {
return &ConfigManager{
configs: make(map[ConfigRegistration]Config),
}
}
// Private helper method doesn't enforce the mutex. This is because loading at the directory level keeps the lock up the whole time, and I like that.
func (cm *ConfigManager) loadConfigFile(path string) (*Config, error) {
fmt.Printf("INTERNAL loadConfigFile for %s\n", path)
stub := ConfigStub{}
f, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("cannot read config file: %w", err)
}
if err := yaml.Unmarshal(f, &stub); err != nil {
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
}
fmt.Printf("RAW STUB: %+v\n", stub)
endpoint := stub.Registration.Endpoint
// EndpointConfigMap is generated over in localai.gen.go
// It's a map that translates a string endpoint function name to an empty SpecificConfig[T], with the type parameter for that request.
if structType, ok := EndpointConfigMap[endpoint]; ok {
fmt.Printf("~~ EndpointConfigMap[%s]: %+v\n", endpoint, structType)
tmpUnmarshal := map[string]interface{}{}
if err := yaml.Unmarshal(f, &tmpUnmarshal); err != nil {
if e, ok := err.(*yaml.TypeError); ok {
fmt.Println("\n!!!!!Type error:", e)
}
return nil, fmt.Errorf("cannot unmarshal config file for %s: %w", endpoint, err)
}
fmt.Printf("$$$ tmpUnmarshal: %+v\n", tmpUnmarshal)
mapstructure.Decode(tmpUnmarshal, &structType)
fmt.Printf("AFTER UNMARSHAL %T\n%+v\n=======\n", structType, structType)
// rawConfig.RequestDefaults = structType.GetRequestDefaults()
cm.configs[structType.GetRegistration()] = structType
// fmt.Printf("\n\n\n!!!!!HIT BOTTOM!!!!!!")
return &structType, nil
// fmt.Printf("\n\n\n!!!!!\n\n\nBIG MISS!\n\n%+v\n\n%T\n%T=====", specificStruct, specificStruct, structType)
}
// for i, ts := range EndpointToRequestBodyMap {
// fmt.Printf("%s: %+v\n", i, ts)
// }
return nil, fmt.Errorf("failed to parse config for endpoint %s", endpoint)
}
func (cm *ConfigManager) LoadConfigFile(path string) (*Config, error) {
fmt.Printf("LoadConfigFile TOP for %s", path)
cm.Lock()
fmt.Println("cm.Lock done")
defer cm.Unlock()
fmt.Println("cm.Unlock done")
return cm.loadConfigFile(path)
}
func (cm *ConfigManager) LoadConfigDirectory(path string) ([]ConfigRegistration, error) {
fmt.Printf("LoadConfigDirectory TOP for %s\n", path)
cm.Lock()
defer cm.Unlock()
files, err := os.ReadDir(path)
if err != nil {
return []ConfigRegistration{}, err
}
fmt.Printf("os.ReadDir done, found %d files\n", len(files))
for _, file := range files {
// Skip anything that isn't yaml
if !strings.Contains(file.Name(), ".yaml") {
continue
}
_, err := cm.loadConfigFile(filepath.Join(path, file.Name()))
if err != nil {
return []ConfigRegistration{}, err
}
}
fmt.Printf("LoadConfigDirectory DONE %d", len(cm.configs))
return cm.listConfigs(), nil
}
func (cm *ConfigManager) GetConfig(r ConfigRegistration) (Config, bool) {
cm.Lock()
defer cm.Unlock()
v, exists := cm.configs[r]
return v, exists
}
// This is a convience function for endpoint functions to use.
// The advantage is it avoids errors in the endpoint string
// Not a clue what the performance cost of this is.
func (cm *ConfigManager) GetConfigForThisEndpoint(m string) (Config, bool) {
endpoint := printCurrentFunctionName(2)
return cm.GetConfig(ConfigRegistration{
Model: m,
Endpoint: endpoint,
})
}
func (cm *ConfigManager) listConfigs() []ConfigRegistration {
var res []ConfigRegistration
for k := range cm.configs {
res = append(res, k)
}
return res
}
func (cm *ConfigManager) ListConfigs() []ConfigRegistration {
cm.Lock()
defer cm.Unlock()
return cm.listConfigs()
}
// These functions I'm a bit dubious about. I think there's a better refactoring down in pkg/model
// But to get a minimal test up and running, here we go!
// TODO: non text completion
@ -328,3 +239,78 @@ func (sc SpecificConfig[RequestModel]) ToPredictOptions() []llama.PredictOption
return llamaOpts
}
// It's unclear if this code belongs here or somewhere else, but I'm jamming it here for now.
func (sc SpecificConfig[RequestModel]) GetPrompts() ([]Prompt, error) {
prompts := []Prompt{}
switch req := sc.GetRequestDefaults().(type) {
case CreateCompletionRequest:
p0, err := req.Prompt.AsCreateCompletionRequestPrompt0()
if err == nil {
p := PromptImpl{sVal: p0}
return []Prompt{p}, nil
}
p1, err := req.Prompt.AsCreateCompletionRequestPrompt1()
if err == nil {
for _, m := range p1 {
prompts = append(prompts, PromptImpl{sVal: m})
}
return prompts, nil
}
p2, err := req.Prompt.AsCreateCompletionRequestPrompt2()
if err == nil {
p := PromptImpl{tVal: p2}
return []Prompt{p}, nil
}
p3, err := req.Prompt.AsCreateCompletionRequestPrompt3()
if err == nil {
for _, t := range p3 {
prompts = append(prompts, PromptImpl{tVal: t})
}
return prompts, nil
}
case CreateChatCompletionRequest:
for _, message := range req.Messages {
prompts = append(prompts, PromptImpl{sVal: message.Content})
// TODO Deal with ROLES
// var content string
// r := req.Roles[message.Role]
// if r != "" {
// content = fmt.Sprint(r, " ", message.Content)
// } else {
// content = message.Content
// }
// if content != "" {
// prompt = prompt + content
// }
}
return prompts, nil
}
return nil, fmt.Errorf("string prompt not found for %T", sc.GetRequestDefaults())
}
func (sc SpecificConfig[RequestModel]) GetN() (int, error) {
switch req := sc.GetRequestDefaults().(type) {
case CreateChatCompletionRequest:
case CreateCompletionRequest:
case CreateEditRequest:
case CreateImageRequest:
// TODO I AM SORRY FOR THIS DIRTY HACK.
// YTT is currently mangling the n property and renaming it to false.
// This needs to be fixed before merging. However for testing.....
return *req.False, nil
}
return 0, fmt.Errorf("unsupported operation GetN for %T", sc)
}
// TODO: Not even using this, but illustration of difficulty: should this be integrated to make GetPrompts(), returning an interface of {Tokens []int, String string}
// func (sc SpecificConfig[RequestModel]) GetTokenPrompts() ([]int, error) {}

139
apiv2/config_manager.go Normal file
View file

@ -0,0 +1,139 @@
package apiv2
import (
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"github.com/mitchellh/mapstructure"
"gopkg.in/yaml.v2"
)
type ConfigManager struct {
configs map[ConfigRegistration]Config
sync.Mutex
}
func NewConfigManager() *ConfigManager {
return &ConfigManager{
configs: make(map[ConfigRegistration]Config),
}
}
// Private helper method doesn't enforce the mutex. This is because loading at the directory level keeps the lock up the whole time, and I like that.
func (cm *ConfigManager) loadConfigFile(path string) (*Config, error) {
fmt.Printf("INTERNAL loadConfigFile for %s\n", path)
stub := ConfigStub{}
f, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("cannot read config file: %w", err)
}
if err := yaml.Unmarshal(f, &stub); err != nil {
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
}
fmt.Printf("RAW STUB: %+v\n", stub)
endpoint := stub.Registration.Endpoint
// EndpointConfigMap is generated over in localai.gen.go
// It's a map that translates a string endpoint function name to an empty SpecificConfig[T], with the type parameter for that request.
if structType, ok := EndpointConfigMap[endpoint]; ok {
fmt.Printf("~~ EndpointConfigMap[%s]: %+v\n", endpoint, structType)
tmpUnmarshal := map[string]interface{}{}
if err := yaml.Unmarshal(f, &tmpUnmarshal); err != nil {
if e, ok := err.(*yaml.TypeError); ok {
fmt.Println("\n!!!!!Type error:", e)
}
return nil, fmt.Errorf("cannot unmarshal config file for %s: %w", endpoint, err)
}
fmt.Printf("$$$ tmpUnmarshal: %+v\n", tmpUnmarshal)
mapstructure.Decode(tmpUnmarshal, &structType)
fmt.Printf("AFTER UNMARSHAL %T\n%+v\n=======\n", structType, structType)
// rawConfig.RequestDefaults = structType.GetRequestDefaults()
cm.configs[structType.GetRegistration()] = structType
// fmt.Printf("\n\n\n!!!!!HIT BOTTOM!!!!!!")
return &structType, nil
// fmt.Printf("\n\n\n!!!!!\n\n\nBIG MISS!\n\n%+v\n\n%T\n%T=====", specificStruct, specificStruct, structType)
}
// for i, ts := range EndpointToRequestBodyMap {
// fmt.Printf("%s: %+v\n", i, ts)
// }
return nil, fmt.Errorf("failed to parse config for endpoint %s", endpoint)
}
func (cm *ConfigManager) LoadConfigFile(path string) (*Config, error) {
fmt.Printf("LoadConfigFile TOP for %s", path)
cm.Lock()
fmt.Println("cm.Lock done")
defer cm.Unlock()
fmt.Println("cm.Unlock done")
return cm.loadConfigFile(path)
}
func (cm *ConfigManager) LoadConfigDirectory(path string) ([]ConfigRegistration, error) {
fmt.Printf("LoadConfigDirectory TOP for %s\n", path)
cm.Lock()
defer cm.Unlock()
files, err := os.ReadDir(path)
if err != nil {
return []ConfigRegistration{}, err
}
fmt.Printf("os.ReadDir done, found %d files\n", len(files))
for _, file := range files {
// Skip anything that isn't yaml
if !strings.Contains(file.Name(), ".yaml") {
continue
}
_, err := cm.loadConfigFile(filepath.Join(path, file.Name()))
if err != nil {
return []ConfigRegistration{}, err
}
}
fmt.Printf("LoadConfigDirectory DONE %d", len(cm.configs))
return cm.listConfigs(), nil
}
func (cm *ConfigManager) GetConfig(r ConfigRegistration) (Config, bool) {
cm.Lock()
defer cm.Unlock()
v, exists := cm.configs[r]
return v, exists
}
// This is a convience function for endpoint functions to use.
// The advantage is it avoids errors in the endpoint string
// Not a clue what the performance cost of this is.
func (cm *ConfigManager) GetConfigForThisEndpoint(m string) (Config, bool) {
endpoint := printCurrentFunctionName(2)
return cm.GetConfig(ConfigRegistration{
Model: m,
Endpoint: endpoint,
})
}
func (cm *ConfigManager) listConfigs() []ConfigRegistration {
var res []ConfigRegistration
for k := range cm.configs {
res = append(res, k)
}
return res
}
func (cm *ConfigManager) ListConfigs() []ConfigRegistration {
cm.Lock()
defer cm.Unlock()
return cm.listConfigs()
}

205
apiv2/engine.go Normal file
View file

@ -0,0 +1,205 @@
package apiv2
import (
"fmt"
"regexp"
"strings"
"sync"
model "github.com/go-skynet/LocalAI/pkg/model"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/mitchellh/mapstructure"
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
)
type LocalAIEngine struct {
loader *model.ModelLoader
mutexMapMutex sync.Mutex
mutexes map[ConfigRegistration]*sync.Mutex
cutstrings map[ConfigRegistration]map[string]*regexp.Regexp
cutstringMutex sync.Mutex
}
func NewLocalAIEngine(loader *model.ModelLoader) LocalAIEngine {
return LocalAIEngine{
loader: loader,
mutexes: make(map[ConfigRegistration]*sync.Mutex),
cutstrings: make(map[ConfigRegistration]map[string]*regexp.Regexp),
}
}
// TODO model interface? Currently scheduled for phase 3 lol
func (e *LocalAIEngine) LoadModel(config Config) (interface{}, error) {
ls := config.GetLocalSettings()
fmt.Printf("LocalAIEngine.LoadModel => %+v\n\n", config)
return e.loader.BackendLoader(ls.Backend, ls.ModelPath, config.ToModelOptions(), uint32(ls.Threads))
}
func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback func(string) bool) (func() ([]string, error), error) {
fmt.Printf("LocalAIEngine.GetModelPredictionFunction => %+v\n\n", config)
supportStreams := false
var predictOnce func(p Prompt) (string, error) = nil
inferenceModel, err := e.LoadModel(config)
if err != nil {
fmt.Printf("ERROR LOADING MODEL: %s\n", err.Error())
return nil, err
}
prompts, err := config.GetPrompts()
if err != nil {
fmt.Printf("ERROR GetPrompts: %s\n", err.Error())
return nil, err
}
switch localModel := inferenceModel.(type) {
case *llama.LLama:
fmt.Println("setting predictOnce for llama")
supportStreams = true
predictOnce = func(p Prompt) (string, error) {
if tokenCallback != nil {
localModel.SetTokenCallback(tokenCallback)
}
// TODO: AsTokens? I think that would need to be exposed from llama and the others.
str, er := localModel.Predict(
p.AsString(),
config.ToPredictOptions()...,
)
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
// after a stream event has occurred
localModel.SetTokenCallback(nil)
return str, er
}
case *gpt4all.Model:
fmt.Println("setting predictOnce for gpt4all")
supportStreams = true
predictOnce = func(p Prompt) (string, error) {
if tokenCallback != nil {
localModel.SetTokenCallback(tokenCallback)
}
mappedPredictOptions := gpt4all.PredictOptions{}
mapstructure.Decode(config.ToPredictOptions(), &mappedPredictOptions)
str, err := localModel.PredictTEMP(
p.AsString(),
mappedPredictOptions,
)
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
// after a stream event has occurred
localModel.SetTokenCallback(nil)
return str, err
}
case *transformers.GPTJ:
fmt.Println("setting predictOnce for GPTJ")
supportStreams = false // EXP
predictOnce = func(p Prompt) (string, error) {
mappedPredictOptions := transformers.PredictOptions{}
mapstructure.Decode(config.ToPredictOptions(), &mappedPredictOptions)
fmt.Printf("MAPPED OPTIONS: %+v\n", mappedPredictOptions)
str, err := localModel.PredictTEMP(
p.AsString(),
mappedPredictOptions,
)
return str, err
}
}
if predictOnce == nil {
fmt.Printf("Failed to find a predictOnce for %T", inferenceModel)
return nil, fmt.Errorf("failed to find a predictOnce for %T", inferenceModel)
}
req := config.GetRequestDefaults()
return func() ([]string, error) {
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
e.mutexMapMutex.Lock()
r := config.GetRegistration()
l, ok := e.mutexes[r]
if !ok {
m := &sync.Mutex{}
e.mutexes[r] = m
l = m
}
e.mutexMapMutex.Unlock()
l.Lock()
defer l.Unlock()
results := []string{}
n, err := config.GetN()
if err != nil {
// TODO live to regret this, but for now...
n = 1
}
for p_i, prompt := range prompts {
for n_i := 0; n_i < n; n_i++ {
res, err := predictOnce(prompt)
// TODO: this used to be a part of finetune. For.... questionable parameter reasons I've moved it up here. Revisit this if it's smelly in the future.
ccr, is_ccr := req.(CreateCompletionRequest)
if is_ccr {
if *ccr.Echo {
res = prompt.AsString() + res
}
}
res = e.Finetune(config, res)
if err != nil {
fmt.Printf("ERROR DURING GetModelPredictionFunction -> PredictionFunction for %T with p_i: %d/n_i: %d\n%s", config, p_i, n_i, err.Error())
return nil, err
}
if tokenCallback != nil && !supportStreams {
tokenCallback(res)
}
results = append(results, res)
}
}
return results, nil
}, nil
}
func (e *LocalAIEngine) Finetune(config Config, prediction string) string {
reg := config.GetRegistration()
switch req := config.GetRequestDefaults().(type) {
case *CreateChatCompletionRequest:
case *CreateCompletionRequest:
ext := req.XLocalaiExtensions
if ext != nil {
for _, c := range *ext.Cutstrings {
e.cutstringMutex.Lock()
regex, ok := e.cutstrings[reg][c]
if !ok {
e.cutstrings[reg][c] = regexp.MustCompile(c)
regex = e.cutstrings[reg][c]
}
e.cutstringMutex.Unlock()
prediction = regex.ReplaceAllString(prediction, "")
}
for _, c := range *ext.Trimstrings {
prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
}
}
}
return prediction
}

View file

@ -12,6 +12,7 @@ import (
type LocalAIServer struct {
configManager *ConfigManager
loader *model.ModelLoader
engine *LocalAIEngine
}
func combineRequestAndConfig[RequestType any](configManager *ConfigManager, model string, requestFromInput *RequestType) (*SpecificConfig[RequestType], error) {
@ -93,7 +94,33 @@ func (las *LocalAIServer) CreateChatCompletion(ctx context.Context, request Crea
fmt.Printf("message #%d: %+v", i, m)
}
return CreateChatCompletion200JSONResponse{}, nil
fmt.Println("Dodgy Stuff Below")
predict, err := las.engine.GetModelPredictionFunction(chatRequestConfig, nil)
if err != nil {
fmt.Printf("!!!!!!!!!! Error obtaining predict fn %s\n", err.Error())
return nil, err
}
predictions, err := predict()
if err != nil {
fmt.Printf("!!!!!!!!!! Error INSIDE predict fn %s\n", err.Error())
return nil, err
}
resp := CreateChatCompletion200JSONResponse{}
for i, prediction := range predictions {
resp.Choices = append(resp.Choices, CreateChatCompletionResponseChoice{
Message: &ChatCompletionResponseMessage{
Content: prediction,
Role: "asssistant", // TODO FIX
},
Index: &i,
})
}
return resp, nil
// panic("unimplemented")
}

View file

@ -7,9 +7,11 @@ import (
)
func NewLocalAINetHTTPServer(configManager *ConfigManager, loader *model.ModelLoader, address string) *LocalAIServer {
engine := NewLocalAIEngine(loader)
localAI := LocalAIServer{
configManager: configManager,
loader: loader,
engine: &engine,
}
var middlewares []StrictMiddlewareFunc