mirror of
https://github.com/mudler/LocalAI.git
synced 2025-06-29 22:20:43 +00:00
319 lines
8.9 KiB
Go
319 lines
8.9 KiB
Go
package apiv2
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
llama "github.com/go-skynet/go-llama.cpp"
|
|
)
|
|
|
|
type ConfigRegistration struct {
|
|
Endpoint string `yaml:"endpoint" json:"endpoint" mapstructure:"endpoint"`
|
|
Model string `yaml:"model" json:"model" mapstructure:"model"`
|
|
}
|
|
|
|
type ConfigLocalSettings struct {
|
|
ModelPath string `yaml:"model" mapstructure:"model"`
|
|
TemplatePath string `yaml:"template" mapstructure:"template"`
|
|
Backend string `yaml:"backend" mapstructure:"backend"`
|
|
Threads int `yaml:"threads" mapstructure:"threads"`
|
|
}
|
|
|
|
type ConfigStub struct {
|
|
Registration ConfigRegistration `yaml:"registration" mapstructure:"registration"`
|
|
LocalSettings ConfigLocalSettings `yaml:"local_paths" mapstructure:"local_paths"`
|
|
}
|
|
|
|
type SpecificConfig[RequestModel any] struct {
|
|
ConfigStub `mapstructure:",squash"`
|
|
RequestDefaults RequestModel `yaml:"request_defaults" mapstructure:"request_defaults"`
|
|
}
|
|
|
|
type Config interface {
|
|
GetRequestDefaults() interface{}
|
|
GetLocalSettings() ConfigLocalSettings
|
|
GetRegistration() ConfigRegistration
|
|
|
|
// TODO: Test these. I am not sure.
|
|
ToPredictOptions() []llama.PredictOption
|
|
ToModelOptions() []llama.ModelOption
|
|
|
|
// TODO also dubious? Technically some requests lack prompts, but it's pretty general and may just be worth sticking here.
|
|
GetPrompts() ([]Prompt, error)
|
|
GetN() (int, error)
|
|
}
|
|
|
|
type Prompt interface {
|
|
AsString() string //, bool)
|
|
AsTokens() []int
|
|
}
|
|
|
|
// How do Go people name these? Should I just ditch the interface entirely?
|
|
type PromptImpl struct {
|
|
sVal string
|
|
tVal []int
|
|
}
|
|
|
|
func (p PromptImpl) AsString() string {
|
|
return p.sVal
|
|
}
|
|
|
|
func (p PromptImpl) AsTokens() []int {
|
|
return p.tVal
|
|
}
|
|
|
|
func (cs ConfigStub) GetRequestDefaults() interface{} {
|
|
return nil
|
|
}
|
|
|
|
func (cs ConfigStub) GetLocalSettings() ConfigLocalSettings {
|
|
return cs.LocalSettings
|
|
}
|
|
|
|
func (cs ConfigStub) GetRegistration() ConfigRegistration {
|
|
return cs.Registration
|
|
}
|
|
|
|
func (cs ConfigStub) ToPredictOptions() []llama.PredictOption {
|
|
return []llama.PredictOption{}
|
|
}
|
|
|
|
func (cs ConfigStub) ToModelOptions() []llama.ModelOption {
|
|
return []llama.ModelOption{}
|
|
}
|
|
|
|
func (cs ConfigStub) GetPrompts() ([]Prompt, error) {
|
|
// Does this make sense?
|
|
return nil, fmt.Errorf("unsupported operation GetPrompts for %T", cs)
|
|
}
|
|
|
|
func (cs ConfigStub) GetN() (int, error) {
|
|
return 0, fmt.Errorf("unsupported operation GetN for %T", cs)
|
|
}
|
|
|
|
func (sc SpecificConfig[RequestModel]) GetRequestDefaults() interface{} {
|
|
return sc.RequestDefaults
|
|
}
|
|
|
|
func (sc SpecificConfig[RequestModel]) GetRequest() RequestModel {
|
|
return sc.RequestDefaults
|
|
}
|
|
|
|
func (sc SpecificConfig[RequestModel]) GetLocalSettings() ConfigLocalSettings {
|
|
return sc.LocalSettings
|
|
}
|
|
|
|
func (sc SpecificConfig[RequestModel]) GetRegistration() ConfigRegistration {
|
|
return sc.Registration
|
|
}
|
|
|
|
// These functions I'm a bit dubious about. I think there's a better refactoring down in pkg/model
|
|
// But to get a minimal test up and running, here we go!
|
|
// TODO: non text completion
|
|
func (sc SpecificConfig[RequestModel]) ToModelOptions() []llama.ModelOption {
|
|
|
|
llamaOpts := []llama.ModelOption{}
|
|
|
|
switch req := sc.GetRequestDefaults().(type) {
|
|
case CreateCompletionRequest:
|
|
case CreateChatCompletionRequest:
|
|
if req.XLocalaiExtensions.F16 != nil && *(req.XLocalaiExtensions.F16) {
|
|
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
|
}
|
|
|
|
if req.MaxTokens != nil && *req.MaxTokens > 0 {
|
|
llamaOpts = append(llamaOpts, llama.SetContext(*req.MaxTokens)) // todo is this right?
|
|
}
|
|
|
|
// TODO DO MORE!
|
|
|
|
}
|
|
// Code to Port:
|
|
|
|
// if c.Embeddings {
|
|
// llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
|
|
// }
|
|
|
|
// if c.NGPULayers != 0 {
|
|
// llamaOpts = append(llamaOpts, llama.SetGPULayers(c.NGPULayers))
|
|
// }
|
|
|
|
return llamaOpts
|
|
}
|
|
|
|
func (sc SpecificConfig[RequestModel]) ToPredictOptions() []llama.PredictOption {
|
|
llamaOpts := []llama.PredictOption{
|
|
llama.SetThreads(sc.GetLocalSettings().Threads),
|
|
}
|
|
|
|
switch req := sc.GetRequestDefaults().(type) {
|
|
// TODO Refactor this when we get to p2 and add image / audio
|
|
// I expect that it'll be worth pulling out the base case first, and doing fancy fallthrough things.
|
|
// Text Requests:
|
|
case CreateCompletionRequest:
|
|
case CreateChatCompletionRequest:
|
|
|
|
if req.Temperature != nil {
|
|
llamaOpts = append(llamaOpts, llama.SetTemperature(float64(*req.Temperature))) // Oh boy. TODO Investigate. This is why I'm doing this.
|
|
}
|
|
|
|
if req.TopP != nil {
|
|
llamaOpts = append(llamaOpts, llama.SetTopP(float64(*req.TopP))) // CAST
|
|
}
|
|
|
|
if req.MaxTokens != nil {
|
|
llamaOpts = append(llamaOpts, llama.SetTokens(*req.MaxTokens))
|
|
}
|
|
|
|
if req.FrequencyPenalty != nil {
|
|
llamaOpts = append(llamaOpts, llama.SetPenalty(float64(*req.FrequencyPenalty))) // CAST
|
|
}
|
|
|
|
if req.Stop != nil {
|
|
|
|
if stop0, err := req.Stop.AsCreateChatCompletionRequestStop0(); err == nil {
|
|
llamaOpts = append(llamaOpts, llama.SetStopWords(stop0))
|
|
}
|
|
|
|
if stop1, err := req.Stop.AsCreateChatCompletionRequestStop1(); err == nil && len(stop1) > 0 {
|
|
llamaOpts = append(llamaOpts, llama.SetStopWords(stop1...))
|
|
}
|
|
}
|
|
|
|
if req.XLocalaiExtensions != nil {
|
|
|
|
if req.XLocalaiExtensions.TopK != nil {
|
|
llamaOpts = append(llamaOpts, llama.SetTopK(*req.XLocalaiExtensions.TopK))
|
|
}
|
|
|
|
if req.XLocalaiExtensions.F16 != nil && *(req.XLocalaiExtensions.F16) {
|
|
llamaOpts = append(llamaOpts, llama.EnableF16KV)
|
|
}
|
|
|
|
if req.XLocalaiExtensions.Seed != nil {
|
|
llamaOpts = append(llamaOpts, llama.SetSeed(*req.XLocalaiExtensions.Seed))
|
|
}
|
|
|
|
if req.XLocalaiExtensions.IgnoreEos != nil && *(req.XLocalaiExtensions.IgnoreEos) {
|
|
llamaOpts = append(llamaOpts, llama.IgnoreEOS)
|
|
}
|
|
|
|
if req.XLocalaiExtensions.Debug != nil && *(req.XLocalaiExtensions.Debug) {
|
|
llamaOpts = append(llamaOpts, llama.Debug)
|
|
}
|
|
|
|
if req.XLocalaiExtensions.Mirostat != nil {
|
|
llamaOpts = append(llamaOpts, llama.SetMirostat(*req.XLocalaiExtensions.Mirostat))
|
|
}
|
|
|
|
if req.XLocalaiExtensions.MirostatEta != nil {
|
|
llamaOpts = append(llamaOpts, llama.SetMirostatETA(*req.XLocalaiExtensions.MirostatEta))
|
|
}
|
|
|
|
if req.XLocalaiExtensions.MirostatTau != nil {
|
|
llamaOpts = append(llamaOpts, llama.SetMirostatTAU(*req.XLocalaiExtensions.MirostatTau))
|
|
}
|
|
|
|
if req.XLocalaiExtensions.Keep != nil {
|
|
llamaOpts = append(llamaOpts, llama.SetNKeep(*req.XLocalaiExtensions.Keep))
|
|
}
|
|
|
|
if req.XLocalaiExtensions.Batch != nil && *(req.XLocalaiExtensions.Batch) != 0 {
|
|
llamaOpts = append(llamaOpts, llama.SetBatch(*req.XLocalaiExtensions.Batch))
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// CODE TO PORT
|
|
|
|
// SKIPPING PROMPT CACHE FOR PASS ONE, TODO READ ABOUT IT
|
|
|
|
// if c.PromptCacheAll {
|
|
// predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
|
|
// }
|
|
|
|
// if c.PromptCachePath != "" {
|
|
// // Create parent directory
|
|
// p := filepath.Join(modelPath, c.PromptCachePath)
|
|
// os.MkdirAll(filepath.Dir(p), 0755)
|
|
// predictOptions = append(predictOptions, llama.SetPathPromptCache(p))
|
|
// }
|
|
|
|
return llamaOpts
|
|
}
|
|
|
|
// It's unclear if this code belongs here or somewhere else, but I'm jamming it here for now.
|
|
func (sc SpecificConfig[RequestModel]) GetPrompts() ([]Prompt, error) {
|
|
prompts := []Prompt{}
|
|
|
|
switch req := sc.GetRequestDefaults().(type) {
|
|
case CreateCompletionRequest:
|
|
p0, err := req.Prompt.AsCreateCompletionRequestPrompt0()
|
|
if err == nil {
|
|
p := PromptImpl{sVal: p0}
|
|
return []Prompt{p}, nil
|
|
}
|
|
p1, err := req.Prompt.AsCreateCompletionRequestPrompt1()
|
|
if err == nil {
|
|
for _, m := range p1 {
|
|
prompts = append(prompts, PromptImpl{sVal: m})
|
|
}
|
|
return prompts, nil
|
|
}
|
|
p2, err := req.Prompt.AsCreateCompletionRequestPrompt2()
|
|
if err == nil {
|
|
p := PromptImpl{tVal: p2}
|
|
return []Prompt{p}, nil
|
|
}
|
|
p3, err := req.Prompt.AsCreateCompletionRequestPrompt3()
|
|
if err == nil {
|
|
for _, t := range p3 {
|
|
prompts = append(prompts, PromptImpl{tVal: t})
|
|
}
|
|
return prompts, nil
|
|
}
|
|
case CreateChatCompletionRequest:
|
|
|
|
for _, message := range req.Messages {
|
|
|
|
prompts = append(prompts, PromptImpl{sVal: message.Content})
|
|
|
|
// TODO Deal with ROLES
|
|
// var content string
|
|
// r := req.Roles[message.Role]
|
|
// if r != "" {
|
|
// content = fmt.Sprint(r, " ", message.Content)
|
|
// } else {
|
|
// content = message.Content
|
|
// }
|
|
|
|
// if content != "" {
|
|
// prompt = prompt + content
|
|
// }
|
|
|
|
}
|
|
return prompts, nil
|
|
}
|
|
|
|
return nil, fmt.Errorf("string prompt not found for %T", sc.GetRequestDefaults())
|
|
}
|
|
|
|
func (sc SpecificConfig[RequestModel]) GetN() (int, error) {
|
|
switch req := sc.GetRequestDefaults().(type) {
|
|
|
|
case CreateChatCompletionRequest:
|
|
case CreateCompletionRequest:
|
|
case CreateEditRequest:
|
|
case CreateImageRequest:
|
|
// TODO I AM SORRY FOR THIS DIRTY HACK.
|
|
// YTT is currently mangling the n property and renaming it to false.
|
|
// This needs to be fixed before merging. However for testing.....
|
|
return *req.False, nil
|
|
}
|
|
|
|
return 0, fmt.Errorf("unsupported operation GetN for %T", sc)
|
|
}
|
|
|
|
// TODO: Not even using this, but illustration of difficulty: should this be integrated to make GetPrompts(), returning an interface of {Tokens []int, String string}
|
|
// func (sc SpecificConfig[RequestModel]) GetTokenPrompts() ([]int, error) {}
|