mirror of
https://github.com/mudler/LocalAI.git
synced 2025-06-29 06:05:00 +00:00
stash progress for the night. loading GPTJ fails, hacked in PredictTEMP as kludge
This commit is contained in:
parent
8fc4b6cded
commit
0b910e0595
14 changed files with 518 additions and 193 deletions
6
Makefile
6
Makefile
|
@ -78,9 +78,9 @@ openai-openapi/transformed: openai-openapi/spec
|
|||
|
||||
apiv2/localai.gen.go: prepare-sources
|
||||
echo "go mod download done, running YTT"
|
||||
cp ./openai-openapi/transformed/localai.yaml ./openai-openapi/transformed/localai.orig.yaml
|
||||
$(GOCMD) run github.com/vmware-tanzu/carvel-ytt/cmd/ytt --output-files ./openai-openapi/transformed -f ./openai-openapi/transformed/localai.yaml -f ./openai-openapi/localai_model_patches.yaml
|
||||
# -f ./openai-openapi/remove_depreciated_openapi.yaml
|
||||
# cp ./openai-openapi/transformed/localai.yaml ./openai-openapi/transformed/localai.orig.yaml
|
||||
$(GOCMD) run github.com/vmware-tanzu/carvel-ytt/cmd/ytt --output-files ./openai-openapi/transformed -f ./openai-openapi/transformed/localai.yaml -f ./openai-openapi/localai_model_patches.yaml
|
||||
cp ./openai-openapi/transformed/localai.yaml ./openai-openapi/transformed/localai.mid.yaml
|
||||
echo "YTT Done, generating code..."
|
||||
$(GOCMD) run github.com/deepmap/oapi-codegen/cmd/oapi-codegen --config=./openai-openapi/config.yaml ./openai-openapi/transformed/localai.yaml
|
||||
|
||||
|
|
252
apiv2/config.go
252
apiv2/config.go
|
@ -2,14 +2,8 @@ package apiv2
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
type ConfigRegistration struct {
|
||||
|
@ -38,6 +32,33 @@ type Config interface {
|
|||
GetRequestDefaults() interface{}
|
||||
GetLocalSettings() ConfigLocalSettings
|
||||
GetRegistration() ConfigRegistration
|
||||
|
||||
// TODO: Test these. I am not sure.
|
||||
ToPredictOptions() []llama.PredictOption
|
||||
ToModelOptions() []llama.ModelOption
|
||||
|
||||
// TODO also dubious? Technically some requests lack prompts, but it's pretty general and may just be worth sticking here.
|
||||
GetPrompts() ([]Prompt, error)
|
||||
GetN() (int, error)
|
||||
}
|
||||
|
||||
type Prompt interface {
|
||||
AsString() string //, bool)
|
||||
AsTokens() []int
|
||||
}
|
||||
|
||||
// How do Go people name these? Should I just ditch the interface entirely?
|
||||
type PromptImpl struct {
|
||||
sVal string
|
||||
tVal []int
|
||||
}
|
||||
|
||||
func (p PromptImpl) AsString() string {
|
||||
return p.sVal
|
||||
}
|
||||
|
||||
func (p PromptImpl) AsTokens() []int {
|
||||
return p.tVal
|
||||
}
|
||||
|
||||
func (cs ConfigStub) GetRequestDefaults() interface{} {
|
||||
|
@ -52,6 +73,23 @@ func (cs ConfigStub) GetRegistration() ConfigRegistration {
|
|||
return cs.Registration
|
||||
}
|
||||
|
||||
func (cs ConfigStub) ToPredictOptions() []llama.PredictOption {
|
||||
return []llama.PredictOption{}
|
||||
}
|
||||
|
||||
func (cs ConfigStub) ToModelOptions() []llama.ModelOption {
|
||||
return []llama.ModelOption{}
|
||||
}
|
||||
|
||||
func (cs ConfigStub) GetPrompts() ([]Prompt, error) {
|
||||
// Does this make sense?
|
||||
return nil, fmt.Errorf("unsupported operation GetPrompts for %T", cs)
|
||||
}
|
||||
|
||||
func (cs ConfigStub) GetN() (int, error) {
|
||||
return 0, fmt.Errorf("unsupported operation GetN for %T", cs)
|
||||
}
|
||||
|
||||
func (sc SpecificConfig[RequestModel]) GetRequestDefaults() interface{} {
|
||||
return sc.RequestDefaults
|
||||
}
|
||||
|
@ -68,133 +106,6 @@ func (sc SpecificConfig[RequestModel]) GetRegistration() ConfigRegistration {
|
|||
return sc.Registration
|
||||
}
|
||||
|
||||
type ConfigManager struct {
|
||||
configs map[ConfigRegistration]Config
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
func NewConfigManager() *ConfigManager {
|
||||
return &ConfigManager{
|
||||
configs: make(map[ConfigRegistration]Config),
|
||||
}
|
||||
}
|
||||
|
||||
// Private helper method doesn't enforce the mutex. This is because loading at the directory level keeps the lock up the whole time, and I like that.
|
||||
func (cm *ConfigManager) loadConfigFile(path string) (*Config, error) {
|
||||
fmt.Printf("INTERNAL loadConfigFile for %s\n", path)
|
||||
stub := ConfigStub{}
|
||||
f, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, &stub); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
}
|
||||
fmt.Printf("RAW STUB: %+v\n", stub)
|
||||
|
||||
endpoint := stub.Registration.Endpoint
|
||||
|
||||
// EndpointConfigMap is generated over in localai.gen.go
|
||||
// It's a map that translates a string endpoint function name to an empty SpecificConfig[T], with the type parameter for that request.
|
||||
if structType, ok := EndpointConfigMap[endpoint]; ok {
|
||||
fmt.Printf("~~ EndpointConfigMap[%s]: %+v\n", endpoint, structType)
|
||||
tmpUnmarshal := map[string]interface{}{}
|
||||
if err := yaml.Unmarshal(f, &tmpUnmarshal); err != nil {
|
||||
if e, ok := err.(*yaml.TypeError); ok {
|
||||
fmt.Println("\n!!!!!Type error:", e)
|
||||
}
|
||||
return nil, fmt.Errorf("cannot unmarshal config file for %s: %w", endpoint, err)
|
||||
}
|
||||
fmt.Printf("$$$ tmpUnmarshal: %+v\n", tmpUnmarshal)
|
||||
mapstructure.Decode(tmpUnmarshal, &structType)
|
||||
|
||||
fmt.Printf("AFTER UNMARSHAL %T\n%+v\n=======\n", structType, structType)
|
||||
|
||||
// rawConfig.RequestDefaults = structType.GetRequestDefaults()
|
||||
|
||||
cm.configs[structType.GetRegistration()] = structType
|
||||
// fmt.Printf("\n\n\n!!!!!HIT BOTTOM!!!!!!")
|
||||
return &structType, nil
|
||||
// fmt.Printf("\n\n\n!!!!!\n\n\nBIG MISS!\n\n%+v\n\n%T\n%T=====", specificStruct, specificStruct, structType)
|
||||
}
|
||||
|
||||
// for i, ts := range EndpointToRequestBodyMap {
|
||||
// fmt.Printf("%s: %+v\n", i, ts)
|
||||
// }
|
||||
|
||||
return nil, fmt.Errorf("failed to parse config for endpoint %s", endpoint)
|
||||
}
|
||||
|
||||
func (cm *ConfigManager) LoadConfigFile(path string) (*Config, error) {
|
||||
fmt.Printf("LoadConfigFile TOP for %s", path)
|
||||
|
||||
cm.Lock()
|
||||
fmt.Println("cm.Lock done")
|
||||
|
||||
defer cm.Unlock()
|
||||
fmt.Println("cm.Unlock done")
|
||||
|
||||
return cm.loadConfigFile(path)
|
||||
}
|
||||
|
||||
func (cm *ConfigManager) LoadConfigDirectory(path string) ([]ConfigRegistration, error) {
|
||||
fmt.Printf("LoadConfigDirectory TOP for %s\n", path)
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
files, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
return []ConfigRegistration{}, err
|
||||
}
|
||||
fmt.Printf("os.ReadDir done, found %d files\n", len(files))
|
||||
|
||||
for _, file := range files {
|
||||
// Skip anything that isn't yaml
|
||||
if !strings.Contains(file.Name(), ".yaml") {
|
||||
continue
|
||||
}
|
||||
_, err := cm.loadConfigFile(filepath.Join(path, file.Name()))
|
||||
if err != nil {
|
||||
return []ConfigRegistration{}, err
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("LoadConfigDirectory DONE %d", len(cm.configs))
|
||||
|
||||
return cm.listConfigs(), nil
|
||||
}
|
||||
|
||||
func (cm *ConfigManager) GetConfig(r ConfigRegistration) (Config, bool) {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
v, exists := cm.configs[r]
|
||||
return v, exists
|
||||
}
|
||||
|
||||
// This is a convience function for endpoint functions to use.
|
||||
// The advantage is it avoids errors in the endpoint string
|
||||
// Not a clue what the performance cost of this is.
|
||||
func (cm *ConfigManager) GetConfigForThisEndpoint(m string) (Config, bool) {
|
||||
endpoint := printCurrentFunctionName(2)
|
||||
return cm.GetConfig(ConfigRegistration{
|
||||
Model: m,
|
||||
Endpoint: endpoint,
|
||||
})
|
||||
}
|
||||
|
||||
func (cm *ConfigManager) listConfigs() []ConfigRegistration {
|
||||
var res []ConfigRegistration
|
||||
for k := range cm.configs {
|
||||
res = append(res, k)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func (cm *ConfigManager) ListConfigs() []ConfigRegistration {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
return cm.listConfigs()
|
||||
}
|
||||
|
||||
// These functions I'm a bit dubious about. I think there's a better refactoring down in pkg/model
|
||||
// But to get a minimal test up and running, here we go!
|
||||
// TODO: non text completion
|
||||
|
@ -328,3 +239,78 @@ func (sc SpecificConfig[RequestModel]) ToPredictOptions() []llama.PredictOption
|
|||
|
||||
return llamaOpts
|
||||
}
|
||||
|
||||
// It's unclear if this code belongs here or somewhere else, but I'm jamming it here for now.
|
||||
func (sc SpecificConfig[RequestModel]) GetPrompts() ([]Prompt, error) {
|
||||
prompts := []Prompt{}
|
||||
|
||||
switch req := sc.GetRequestDefaults().(type) {
|
||||
case CreateCompletionRequest:
|
||||
p0, err := req.Prompt.AsCreateCompletionRequestPrompt0()
|
||||
if err == nil {
|
||||
p := PromptImpl{sVal: p0}
|
||||
return []Prompt{p}, nil
|
||||
}
|
||||
p1, err := req.Prompt.AsCreateCompletionRequestPrompt1()
|
||||
if err == nil {
|
||||
for _, m := range p1 {
|
||||
prompts = append(prompts, PromptImpl{sVal: m})
|
||||
}
|
||||
return prompts, nil
|
||||
}
|
||||
p2, err := req.Prompt.AsCreateCompletionRequestPrompt2()
|
||||
if err == nil {
|
||||
p := PromptImpl{tVal: p2}
|
||||
return []Prompt{p}, nil
|
||||
}
|
||||
p3, err := req.Prompt.AsCreateCompletionRequestPrompt3()
|
||||
if err == nil {
|
||||
for _, t := range p3 {
|
||||
prompts = append(prompts, PromptImpl{tVal: t})
|
||||
}
|
||||
return prompts, nil
|
||||
}
|
||||
case CreateChatCompletionRequest:
|
||||
|
||||
for _, message := range req.Messages {
|
||||
|
||||
prompts = append(prompts, PromptImpl{sVal: message.Content})
|
||||
|
||||
// TODO Deal with ROLES
|
||||
// var content string
|
||||
// r := req.Roles[message.Role]
|
||||
// if r != "" {
|
||||
// content = fmt.Sprint(r, " ", message.Content)
|
||||
// } else {
|
||||
// content = message.Content
|
||||
// }
|
||||
|
||||
// if content != "" {
|
||||
// prompt = prompt + content
|
||||
// }
|
||||
|
||||
}
|
||||
return prompts, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("string prompt not found for %T", sc.GetRequestDefaults())
|
||||
}
|
||||
|
||||
func (sc SpecificConfig[RequestModel]) GetN() (int, error) {
|
||||
switch req := sc.GetRequestDefaults().(type) {
|
||||
|
||||
case CreateChatCompletionRequest:
|
||||
case CreateCompletionRequest:
|
||||
case CreateEditRequest:
|
||||
case CreateImageRequest:
|
||||
// TODO I AM SORRY FOR THIS DIRTY HACK.
|
||||
// YTT is currently mangling the n property and renaming it to false.
|
||||
// This needs to be fixed before merging. However for testing.....
|
||||
return *req.False, nil
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("unsupported operation GetN for %T", sc)
|
||||
}
|
||||
|
||||
// TODO: Not even using this, but illustration of difficulty: should this be integrated to make GetPrompts(), returning an interface of {Tokens []int, String string}
|
||||
// func (sc SpecificConfig[RequestModel]) GetTokenPrompts() ([]int, error) {}
|
||||
|
|
139
apiv2/config_manager.go
Normal file
139
apiv2/config_manager.go
Normal file
|
@ -0,0 +1,139 @@
|
|||
package apiv2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
type ConfigManager struct {
|
||||
configs map[ConfigRegistration]Config
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
func NewConfigManager() *ConfigManager {
|
||||
return &ConfigManager{
|
||||
configs: make(map[ConfigRegistration]Config),
|
||||
}
|
||||
}
|
||||
|
||||
// Private helper method doesn't enforce the mutex. This is because loading at the directory level keeps the lock up the whole time, and I like that.
|
||||
func (cm *ConfigManager) loadConfigFile(path string) (*Config, error) {
|
||||
fmt.Printf("INTERNAL loadConfigFile for %s\n", path)
|
||||
stub := ConfigStub{}
|
||||
f, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, &stub); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
}
|
||||
fmt.Printf("RAW STUB: %+v\n", stub)
|
||||
|
||||
endpoint := stub.Registration.Endpoint
|
||||
|
||||
// EndpointConfigMap is generated over in localai.gen.go
|
||||
// It's a map that translates a string endpoint function name to an empty SpecificConfig[T], with the type parameter for that request.
|
||||
if structType, ok := EndpointConfigMap[endpoint]; ok {
|
||||
fmt.Printf("~~ EndpointConfigMap[%s]: %+v\n", endpoint, structType)
|
||||
tmpUnmarshal := map[string]interface{}{}
|
||||
if err := yaml.Unmarshal(f, &tmpUnmarshal); err != nil {
|
||||
if e, ok := err.(*yaml.TypeError); ok {
|
||||
fmt.Println("\n!!!!!Type error:", e)
|
||||
}
|
||||
return nil, fmt.Errorf("cannot unmarshal config file for %s: %w", endpoint, err)
|
||||
}
|
||||
fmt.Printf("$$$ tmpUnmarshal: %+v\n", tmpUnmarshal)
|
||||
mapstructure.Decode(tmpUnmarshal, &structType)
|
||||
|
||||
fmt.Printf("AFTER UNMARSHAL %T\n%+v\n=======\n", structType, structType)
|
||||
|
||||
// rawConfig.RequestDefaults = structType.GetRequestDefaults()
|
||||
|
||||
cm.configs[structType.GetRegistration()] = structType
|
||||
// fmt.Printf("\n\n\n!!!!!HIT BOTTOM!!!!!!")
|
||||
return &structType, nil
|
||||
// fmt.Printf("\n\n\n!!!!!\n\n\nBIG MISS!\n\n%+v\n\n%T\n%T=====", specificStruct, specificStruct, structType)
|
||||
}
|
||||
|
||||
// for i, ts := range EndpointToRequestBodyMap {
|
||||
// fmt.Printf("%s: %+v\n", i, ts)
|
||||
// }
|
||||
|
||||
return nil, fmt.Errorf("failed to parse config for endpoint %s", endpoint)
|
||||
}
|
||||
|
||||
func (cm *ConfigManager) LoadConfigFile(path string) (*Config, error) {
|
||||
fmt.Printf("LoadConfigFile TOP for %s", path)
|
||||
|
||||
cm.Lock()
|
||||
fmt.Println("cm.Lock done")
|
||||
|
||||
defer cm.Unlock()
|
||||
fmt.Println("cm.Unlock done")
|
||||
|
||||
return cm.loadConfigFile(path)
|
||||
}
|
||||
|
||||
func (cm *ConfigManager) LoadConfigDirectory(path string) ([]ConfigRegistration, error) {
|
||||
fmt.Printf("LoadConfigDirectory TOP for %s\n", path)
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
files, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
return []ConfigRegistration{}, err
|
||||
}
|
||||
fmt.Printf("os.ReadDir done, found %d files\n", len(files))
|
||||
|
||||
for _, file := range files {
|
||||
// Skip anything that isn't yaml
|
||||
if !strings.Contains(file.Name(), ".yaml") {
|
||||
continue
|
||||
}
|
||||
_, err := cm.loadConfigFile(filepath.Join(path, file.Name()))
|
||||
if err != nil {
|
||||
return []ConfigRegistration{}, err
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("LoadConfigDirectory DONE %d", len(cm.configs))
|
||||
|
||||
return cm.listConfigs(), nil
|
||||
}
|
||||
|
||||
func (cm *ConfigManager) GetConfig(r ConfigRegistration) (Config, bool) {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
v, exists := cm.configs[r]
|
||||
return v, exists
|
||||
}
|
||||
|
||||
// This is a convience function for endpoint functions to use.
|
||||
// The advantage is it avoids errors in the endpoint string
|
||||
// Not a clue what the performance cost of this is.
|
||||
func (cm *ConfigManager) GetConfigForThisEndpoint(m string) (Config, bool) {
|
||||
endpoint := printCurrentFunctionName(2)
|
||||
return cm.GetConfig(ConfigRegistration{
|
||||
Model: m,
|
||||
Endpoint: endpoint,
|
||||
})
|
||||
}
|
||||
|
||||
func (cm *ConfigManager) listConfigs() []ConfigRegistration {
|
||||
var res []ConfigRegistration
|
||||
for k := range cm.configs {
|
||||
res = append(res, k)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func (cm *ConfigManager) ListConfigs() []ConfigRegistration {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
return cm.listConfigs()
|
||||
}
|
205
apiv2/engine.go
Normal file
205
apiv2/engine.go
Normal file
|
@ -0,0 +1,205 @@
|
|||
package apiv2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
|
||||
)
|
||||
|
||||
type LocalAIEngine struct {
|
||||
loader *model.ModelLoader
|
||||
mutexMapMutex sync.Mutex
|
||||
mutexes map[ConfigRegistration]*sync.Mutex
|
||||
cutstrings map[ConfigRegistration]map[string]*regexp.Regexp
|
||||
cutstringMutex sync.Mutex
|
||||
}
|
||||
|
||||
func NewLocalAIEngine(loader *model.ModelLoader) LocalAIEngine {
|
||||
return LocalAIEngine{
|
||||
loader: loader,
|
||||
mutexes: make(map[ConfigRegistration]*sync.Mutex),
|
||||
cutstrings: make(map[ConfigRegistration]map[string]*regexp.Regexp),
|
||||
}
|
||||
}
|
||||
|
||||
// TODO model interface? Currently scheduled for phase 3 lol
|
||||
func (e *LocalAIEngine) LoadModel(config Config) (interface{}, error) {
|
||||
ls := config.GetLocalSettings()
|
||||
fmt.Printf("LocalAIEngine.LoadModel => %+v\n\n", config)
|
||||
return e.loader.BackendLoader(ls.Backend, ls.ModelPath, config.ToModelOptions(), uint32(ls.Threads))
|
||||
}
|
||||
|
||||
func (e *LocalAIEngine) GetModelPredictionFunction(config Config, tokenCallback func(string) bool) (func() ([]string, error), error) {
|
||||
|
||||
fmt.Printf("LocalAIEngine.GetModelPredictionFunction => %+v\n\n", config)
|
||||
|
||||
supportStreams := false
|
||||
var predictOnce func(p Prompt) (string, error) = nil
|
||||
|
||||
inferenceModel, err := e.LoadModel(config)
|
||||
if err != nil {
|
||||
fmt.Printf("ERROR LOADING MODEL: %s\n", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
prompts, err := config.GetPrompts()
|
||||
if err != nil {
|
||||
fmt.Printf("ERROR GetPrompts: %s\n", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch localModel := inferenceModel.(type) {
|
||||
case *llama.LLama:
|
||||
fmt.Println("setting predictOnce for llama")
|
||||
supportStreams = true
|
||||
predictOnce = func(p Prompt) (string, error) {
|
||||
|
||||
if tokenCallback != nil {
|
||||
localModel.SetTokenCallback(tokenCallback)
|
||||
}
|
||||
|
||||
// TODO: AsTokens? I think that would need to be exposed from llama and the others.
|
||||
str, er := localModel.Predict(
|
||||
p.AsString(),
|
||||
config.ToPredictOptions()...,
|
||||
)
|
||||
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
|
||||
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
|
||||
// after a stream event has occurred
|
||||
localModel.SetTokenCallback(nil)
|
||||
return str, er
|
||||
}
|
||||
case *gpt4all.Model:
|
||||
fmt.Println("setting predictOnce for gpt4all")
|
||||
supportStreams = true
|
||||
predictOnce = func(p Prompt) (string, error) {
|
||||
if tokenCallback != nil {
|
||||
localModel.SetTokenCallback(tokenCallback)
|
||||
}
|
||||
|
||||
mappedPredictOptions := gpt4all.PredictOptions{}
|
||||
|
||||
mapstructure.Decode(config.ToPredictOptions(), &mappedPredictOptions)
|
||||
|
||||
str, err := localModel.PredictTEMP(
|
||||
p.AsString(),
|
||||
mappedPredictOptions,
|
||||
)
|
||||
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
|
||||
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
|
||||
// after a stream event has occurred
|
||||
localModel.SetTokenCallback(nil)
|
||||
return str, err
|
||||
}
|
||||
case *transformers.GPTJ:
|
||||
fmt.Println("setting predictOnce for GPTJ")
|
||||
supportStreams = false // EXP
|
||||
predictOnce = func(p Prompt) (string, error) {
|
||||
mappedPredictOptions := transformers.PredictOptions{}
|
||||
|
||||
mapstructure.Decode(config.ToPredictOptions(), &mappedPredictOptions)
|
||||
|
||||
fmt.Printf("MAPPED OPTIONS: %+v\n", mappedPredictOptions)
|
||||
|
||||
str, err := localModel.PredictTEMP(
|
||||
p.AsString(),
|
||||
mappedPredictOptions,
|
||||
)
|
||||
return str, err
|
||||
}
|
||||
}
|
||||
|
||||
if predictOnce == nil {
|
||||
fmt.Printf("Failed to find a predictOnce for %T", inferenceModel)
|
||||
return nil, fmt.Errorf("failed to find a predictOnce for %T", inferenceModel)
|
||||
}
|
||||
|
||||
req := config.GetRequestDefaults()
|
||||
|
||||
return func() ([]string, error) {
|
||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
e.mutexMapMutex.Lock()
|
||||
r := config.GetRegistration()
|
||||
l, ok := e.mutexes[r]
|
||||
if !ok {
|
||||
m := &sync.Mutex{}
|
||||
e.mutexes[r] = m
|
||||
l = m
|
||||
}
|
||||
e.mutexMapMutex.Unlock()
|
||||
l.Lock()
|
||||
defer l.Unlock()
|
||||
|
||||
results := []string{}
|
||||
|
||||
n, err := config.GetN()
|
||||
|
||||
if err != nil {
|
||||
// TODO live to regret this, but for now...
|
||||
n = 1
|
||||
}
|
||||
|
||||
for p_i, prompt := range prompts {
|
||||
for n_i := 0; n_i < n; n_i++ {
|
||||
res, err := predictOnce(prompt)
|
||||
|
||||
// TODO: this used to be a part of finetune. For.... questionable parameter reasons I've moved it up here. Revisit this if it's smelly in the future.
|
||||
ccr, is_ccr := req.(CreateCompletionRequest)
|
||||
if is_ccr {
|
||||
if *ccr.Echo {
|
||||
res = prompt.AsString() + res
|
||||
}
|
||||
}
|
||||
|
||||
res = e.Finetune(config, res)
|
||||
|
||||
if err != nil {
|
||||
fmt.Printf("ERROR DURING GetModelPredictionFunction -> PredictionFunction for %T with p_i: %d/n_i: %d\n%s", config, p_i, n_i, err.Error())
|
||||
return nil, err
|
||||
}
|
||||
if tokenCallback != nil && !supportStreams {
|
||||
tokenCallback(res)
|
||||
}
|
||||
results = append(results, res)
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (e *LocalAIEngine) Finetune(config Config, prediction string) string {
|
||||
|
||||
reg := config.GetRegistration()
|
||||
switch req := config.GetRequestDefaults().(type) {
|
||||
case *CreateChatCompletionRequest:
|
||||
case *CreateCompletionRequest:
|
||||
ext := req.XLocalaiExtensions
|
||||
if ext != nil {
|
||||
for _, c := range *ext.Cutstrings {
|
||||
e.cutstringMutex.Lock()
|
||||
regex, ok := e.cutstrings[reg][c]
|
||||
if !ok {
|
||||
e.cutstrings[reg][c] = regexp.MustCompile(c)
|
||||
regex = e.cutstrings[reg][c]
|
||||
}
|
||||
e.cutstringMutex.Unlock()
|
||||
prediction = regex.ReplaceAllString(prediction, "")
|
||||
}
|
||||
|
||||
for _, c := range *ext.Trimstrings {
|
||||
prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return prediction
|
||||
}
|
|
@ -12,6 +12,7 @@ import (
|
|||
type LocalAIServer struct {
|
||||
configManager *ConfigManager
|
||||
loader *model.ModelLoader
|
||||
engine *LocalAIEngine
|
||||
}
|
||||
|
||||
func combineRequestAndConfig[RequestType any](configManager *ConfigManager, model string, requestFromInput *RequestType) (*SpecificConfig[RequestType], error) {
|
||||
|
@ -93,7 +94,33 @@ func (las *LocalAIServer) CreateChatCompletion(ctx context.Context, request Crea
|
|||
fmt.Printf("message #%d: %+v", i, m)
|
||||
}
|
||||
|
||||
return CreateChatCompletion200JSONResponse{}, nil
|
||||
fmt.Println("Dodgy Stuff Below")
|
||||
|
||||
predict, err := las.engine.GetModelPredictionFunction(chatRequestConfig, nil)
|
||||
if err != nil {
|
||||
fmt.Printf("!!!!!!!!!! Error obtaining predict fn %s\n", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
predictions, err := predict()
|
||||
if err != nil {
|
||||
fmt.Printf("!!!!!!!!!! Error INSIDE predict fn %s\n", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp := CreateChatCompletion200JSONResponse{}
|
||||
|
||||
for i, prediction := range predictions {
|
||||
resp.Choices = append(resp.Choices, CreateChatCompletionResponseChoice{
|
||||
Message: &ChatCompletionResponseMessage{
|
||||
Content: prediction,
|
||||
Role: "asssistant", // TODO FIX
|
||||
},
|
||||
Index: &i,
|
||||
})
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
|
||||
// panic("unimplemented")
|
||||
}
|
||||
|
|
|
@ -7,9 +7,11 @@ import (
|
|||
)
|
||||
|
||||
func NewLocalAINetHTTPServer(configManager *ConfigManager, loader *model.ModelLoader, address string) *LocalAIServer {
|
||||
engine := NewLocalAIEngine(loader)
|
||||
localAI := LocalAIServer{
|
||||
configManager: configManager,
|
||||
loader: loader,
|
||||
engine: &engine,
|
||||
}
|
||||
|
||||
var middlewares []StrictMiddlewareFunc
|
||||
|
|
|
@ -4,6 +4,8 @@ registration:
|
|||
local_paths:
|
||||
model: ggml-gpt4all-j
|
||||
template: chat-gpt4all
|
||||
backend: gptj
|
||||
threads: 4
|
||||
request_defaults:
|
||||
top_p: 0.7
|
||||
temperature: 0.2
|
||||
|
|
|
@ -4,6 +4,8 @@ registration:
|
|||
local_paths:
|
||||
model: ggml-gpt4all-j
|
||||
template: chat-gpt4all
|
||||
backend: gptj
|
||||
threads: 4
|
||||
request_defaults:
|
||||
top_p: 0.7
|
||||
temperature: 0.2
|
||||
|
|
4
go.mod
4
go.mod
|
@ -20,7 +20,7 @@ require (
|
|||
github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af
|
||||
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230605194130-266f13aee9d8
|
||||
github.com/onsi/ginkgo/v2 v2.9.7
|
||||
github.com/onsi/gomega v1.27.7
|
||||
github.com/onsi/gomega v1.27.8
|
||||
github.com/otiai10/openaigo v1.1.0
|
||||
github.com/rs/zerolog v1.29.1
|
||||
github.com/sashabaranov/go-openai v1.10.0
|
||||
|
@ -28,7 +28,7 @@ require (
|
|||
github.com/tmc/langchaingo v0.0.0-20230605114752-4afed6d7be4a
|
||||
github.com/urfave/cli/v2 v2.25.5
|
||||
github.com/valyala/fasthttp v1.47.0
|
||||
github.com/vmware-tanzu/carvel-ytt v0.45.1
|
||||
github.com/vmware-tanzu/carvel-ytt v0.45.2
|
||||
gopkg.in/yaml.v2 v2.4.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
|
12
go.sum
12
go.sum
|
@ -38,12 +38,12 @@ github.com/donomii/go-rwkv.cpp v0.0.0-20230531084548-c43cdf5fc5bf h1:upCz8WYdzMe
|
|||
github.com/donomii/go-rwkv.cpp v0.0.0-20230531084548-c43cdf5fc5bf/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230601111443-3b28b09469fc h1:RCGGh/zw+K09sjCIYHUV7lFenxONml+LS02RdN+AkwI=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230601111443-3b28b09469fc/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230604202420-1e18b2490e7e h1:Qne1BO0ltmyJcsizxZ61SV+uwuD1F8NztsfBDHOd0LI=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230604202420-1e18b2490e7e/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
|
||||
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
|
||||
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
|
||||
github.com/getkin/kin-openapi v0.117.0 h1:QT2DyGujAL09F4NrKDHJGsUoIprlIcFVHWDVDcUFE8A=
|
||||
github.com/getkin/kin-openapi v0.117.0/go.mod h1:l5e9PaFUo9fyLJCPGQeXI2ML8c3P8BHOEV2VaAVf/pc=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230604202420-1e18b2490e7e h1:Qne1BO0ltmyJcsizxZ61SV+uwuD1F8NztsfBDHOd0LI=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230604202420-1e18b2490e7e/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881 h1:dafqVivljYk51VLFnnpTXJnfWDe637EobWZ1l8PyEf8=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e h1:4PMorQuoUGAXmIzCtnNOHaasyLokXdgd8jUWwsraFTo=
|
||||
|
@ -60,14 +60,14 @@ github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601065548-3f7436e8a096
|
|||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601065548-3f7436e8a096/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601124500-5b9e59bc07dd h1:os3FeYEIB4j5m5QlbFC3HkVcaAmLxNXz48uIfQAexm0=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230601124500-5b9e59bc07dd/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230606002726-57543c169e27 h1:boeMTUUBtnLU8JElZJHXrsUzROJar9/t6vGOFjkrhhI=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230606002726-57543c169e27/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
|
||||
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
|
||||
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
|
||||
github.com/gin-gonic/gin v1.9.0 h1:OjyFBKICoexlu99ctXNR2gg+c5pKrKMuyjgARg9qeY8=
|
||||
github.com/gin-gonic/gin v1.9.0/go.mod h1:W1Me9+hsUSyj3CePGrd1/QrKJMSJ1Tu/0hFEH89961k=
|
||||
github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg=
|
||||
github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230606002726-57543c169e27 h1:boeMTUUBtnLU8JElZJHXrsUzROJar9/t6vGOFjkrhhI=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230606002726-57543c169e27/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
|
||||
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
|
||||
github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
|
||||
github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
|
||||
|
@ -280,6 +280,7 @@ github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss=
|
|||
github.com/onsi/ginkgo/v2 v2.9.7/go.mod h1:cxrmXWykAwTwhQsJOPfdIDiJ+l2RYq7U8hFU+M/1uw0=
|
||||
github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=
|
||||
github.com/onsi/gomega v1.27.7/go.mod h1:1p8OOlwo2iUUDsHnOrjE5UKYJ+e3W8eQ3qSlRahPmr4=
|
||||
github.com/onsi/gomega v1.27.8/go.mod h1:2J8vzI/s+2shY9XHRApDkdgPo1TKT7P2u6fXeJKFnNQ=
|
||||
github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks=
|
||||
github.com/otiai10/mint v1.5.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM=
|
||||
github.com/otiai10/openaigo v1.1.0 h1:zRvGBqZUW5PCMgdkJNsPVTBd8tOLCMTipXE5wD2pdTg=
|
||||
|
@ -330,6 +331,7 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o
|
|||
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY=
|
||||
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
||||
github.com/swaggo/swag v1.16.1/go.mod h1:9/LMvHycG3NFHfR6LwvikHv5iFvmPADQ359cKikGxto=
|
||||
github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
|
||||
github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
|
||||
|
@ -359,6 +361,8 @@ github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVS
|
|||
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
|
||||
github.com/vmware-tanzu/carvel-ytt v0.45.1 h1:zjiOnV7WiKJbkLHkJCRxlmABOMIL4WhKKyahfCzFoIk=
|
||||
github.com/vmware-tanzu/carvel-ytt v0.45.1/go.mod h1:+r+ZVZLsETAYlRsgINztFdUdUufj2OwrTXCfOzYB4fY=
|
||||
github.com/vmware-tanzu/carvel-ytt v0.45.2 h1:0+aECp3BcMXkD8L/VXVn4KOh1jD3COAt3DOXZ76iZu0=
|
||||
github.com/vmware-tanzu/carvel-ytt v0.45.2/go.mod h1:oHqFBnn/JvqaUjcQo9T/a/WPUP1ituKjUpFPH+BTzfc=
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
|
||||
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
|
|
|
@ -1,19 +0,0 @@
|
|||
// NOT ACTUALLY USING THIS CURRENTLY???
|
||||
|
||||
// ServerInterface represents all server handlers.
|
||||
type ServerInterface interface {
|
||||
{{range .}}{{.SummaryAsComment }}
|
||||
// ({{.Method}} {{.Path}})
|
||||
{{.OperationId}}(w http.ResponseWriter, r *http.Request{{genParamArgs .PathParams}}{{if .RequiresParamObject}}, params {{.OperationId}}Params{{end}})
|
||||
{{end}}
|
||||
}
|
||||
|
||||
// TypedServerInterface is used to give each endpoint a fully typed method signature for cases where we're able to route automatically
|
||||
type TypedServerInterface interface {
|
||||
{{range .}}{{.SummaryAsComment }}
|
||||
// ({{.Method}} {{.Path}})
|
||||
{{$reqBody := genDefaultRequestBodyType . -}}
|
||||
{{- if ne $reqBody "" }}{{$reqBody = printf ", body %s" $reqBody}}{{end -}}
|
||||
{{.OperationId}}(w http.ResponseWriter{{genParamArgs .PathParams}}{{if .RequiresParamObject}}, params {{.OperationId}}Params{{end}}{{$reqBody}})
|
||||
{{end}}
|
||||
}
|
|
@ -11,7 +11,6 @@ output-options:
|
|||
- mapstructure
|
||||
user-templates:
|
||||
endpoint-body-mapping.tmpl: ./openai-openapi/endpoint-body-mapping.tmpl
|
||||
# chi/chi-interface.tmpl: ./openai-openapi/chi-interface.tmpl
|
||||
# union.tmpl: "// SKIP"
|
||||
# union-and-additional-properties.tmpl: "// SKIP"
|
||||
# additional-properties.tmpl: "// SKIP"
|
|
@ -54,6 +54,14 @@ components:
|
|||
type: number
|
||||
nullable: true
|
||||
x-go-type: float64
|
||||
cutstrings:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
trimstrings:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
#@overlay/match missing_ok=True
|
||||
LocalAIImageRequestExtension:
|
||||
allOf:
|
||||
|
|
|
@ -1,30 +0,0 @@
|
|||
#! This file is just for my reference during development and will be removed.
|
||||
components:
|
||||
schemas:
|
||||
CreateChatCompletionRequest:
|
||||
type: object
|
||||
properties:
|
||||
model:
|
||||
description: ID of the model to use. Currently, only `gpt-3.5-turbo` and `gpt-3.5-turbo-0301` are supported.
|
||||
type: string
|
||||
messages:
|
||||
description: The messages to generate chat completions for, in the [chat format](/docs/guides/chat/introduction).
|
||||
type: array
|
||||
minItems: 1
|
||||
items:
|
||||
$ref: '#/components/schemas/ChatCompletionRequestMessage'
|
||||
temperature:
|
||||
type: number
|
||||
minimum: 0
|
||||
maximum: 2
|
||||
default: 1
|
||||
example: 1
|
||||
nullable: true
|
||||
description: *completions_temperature_description
|
||||
CreateImageRequest:
|
||||
type: object
|
||||
properties:
|
||||
prompt:
|
||||
description: A text description of the desired image(s). The maximum length is 1000 characters.
|
||||
type: string
|
||||
example: "A cute baby sea otter"
|
Loading…
Add table
Add a link
Reference in a new issue