mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-31 07:54:59 +00:00
feat: extend model configuration for llama.cpp (#536)
This commit is contained in:
parent
694dd4ad9e
commit
5abbb134d9
6 changed files with 69 additions and 150 deletions
|
@ -16,27 +16,33 @@ import (
|
|||
)
|
||||
|
||||
type Config struct {
|
||||
OpenAIRequest `yaml:"parameters"`
|
||||
Name string `yaml:"name"`
|
||||
StopWords []string `yaml:"stopwords"`
|
||||
Cutstrings []string `yaml:"cutstrings"`
|
||||
TrimSpace []string `yaml:"trimspace"`
|
||||
ContextSize int `yaml:"context_size"`
|
||||
F16 bool `yaml:"f16"`
|
||||
Threads int `yaml:"threads"`
|
||||
Debug bool `yaml:"debug"`
|
||||
Roles map[string]string `yaml:"roles"`
|
||||
Embeddings bool `yaml:"embeddings"`
|
||||
Backend string `yaml:"backend"`
|
||||
TemplateConfig TemplateConfig `yaml:"template"`
|
||||
MirostatETA float64 `yaml:"mirostat_eta"`
|
||||
MirostatTAU float64 `yaml:"mirostat_tau"`
|
||||
Mirostat int `yaml:"mirostat"`
|
||||
NGPULayers int `yaml:"gpu_layers"`
|
||||
ImageGenerationAssets string `yaml:"asset_dir"`
|
||||
OpenAIRequest `yaml:"parameters"`
|
||||
Name string `yaml:"name"`
|
||||
StopWords []string `yaml:"stopwords"`
|
||||
Cutstrings []string `yaml:"cutstrings"`
|
||||
TrimSpace []string `yaml:"trimspace"`
|
||||
ContextSize int `yaml:"context_size"`
|
||||
F16 bool `yaml:"f16"`
|
||||
Threads int `yaml:"threads"`
|
||||
Debug bool `yaml:"debug"`
|
||||
Roles map[string]string `yaml:"roles"`
|
||||
Embeddings bool `yaml:"embeddings"`
|
||||
Backend string `yaml:"backend"`
|
||||
TemplateConfig TemplateConfig `yaml:"template"`
|
||||
MirostatETA float64 `yaml:"mirostat_eta"`
|
||||
MirostatTAU float64 `yaml:"mirostat_tau"`
|
||||
Mirostat int `yaml:"mirostat"`
|
||||
NGPULayers int `yaml:"gpu_layers"`
|
||||
MMap bool `yaml:"mmap"`
|
||||
MMlock bool `yaml:"mmlock"`
|
||||
|
||||
TensorSplit string `yaml:"tensor_split"`
|
||||
MainGPU string `yaml:"main_gpu"`
|
||||
ImageGenerationAssets string `yaml:"asset_dir"`
|
||||
|
||||
PromptCachePath string `yaml:"prompt_cache_path"`
|
||||
PromptCacheAll bool `yaml:"prompt_cache_all"`
|
||||
PromptCacheRO bool `yaml:"prompt_cache_ro"`
|
||||
|
||||
PromptStrings, InputStrings []string
|
||||
InputToken [][]int
|
||||
|
@ -53,6 +59,12 @@ type ConfigMerger struct {
|
|||
sync.Mutex
|
||||
}
|
||||
|
||||
func defaultConfig(modelFile string) *Config {
|
||||
return &Config{
|
||||
OpenAIRequest: defaultRequest(modelFile),
|
||||
}
|
||||
}
|
||||
|
||||
func NewConfigMerger() *ConfigMerger {
|
||||
return &ConfigMerger{
|
||||
configs: make(map[string]Config),
|
||||
|
@ -308,13 +320,11 @@ func readConfig(modelFile string, input *OpenAIRequest, cm *ConfigMerger, loader
|
|||
var config *Config
|
||||
cfg, exists := cm.GetConfig(modelFile)
|
||||
if !exists {
|
||||
config = &Config{
|
||||
OpenAIRequest: defaultRequest(modelFile),
|
||||
ContextSize: ctx,
|
||||
Threads: threads,
|
||||
F16: f16,
|
||||
Debug: debug,
|
||||
}
|
||||
config = defaultConfig(modelFile)
|
||||
config.ContextSize = ctx
|
||||
config.Threads = threads
|
||||
config.F16 = f16
|
||||
config.Debug = debug
|
||||
} else {
|
||||
config = &cfg
|
||||
}
|
||||
|
|
|
@ -4,8 +4,8 @@ import (
|
|||
"bufio"
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
@ -125,6 +125,9 @@ type OpenAIRequest struct {
|
|||
MirostatTAU float64 `json:"mirostat_tau" yaml:"mirostat_tau"`
|
||||
Mirostat int `json:"mirostat" yaml:"mirostat"`
|
||||
|
||||
FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
|
||||
TFZ float64 `json:"tfz" yaml:"tfz"`
|
||||
|
||||
Seed int `json:"seed" yaml:"seed"`
|
||||
|
||||
// Image (not supported by OpenAI)
|
||||
|
@ -191,7 +194,7 @@ func completionEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
|
|||
}
|
||||
|
||||
if input.Stream {
|
||||
if (len(config.PromptStrings) > 1) {
|
||||
if len(config.PromptStrings) > 1 {
|
||||
return errors.New("cannot handle more than 1 `PromptStrings` when `Stream`ing")
|
||||
}
|
||||
|
||||
|
|
|
@ -39,6 +39,13 @@ func defaultLLamaOpts(c Config) []llama.ModelOption {
|
|||
llamaOpts = append(llamaOpts, llama.SetGPULayers(c.NGPULayers))
|
||||
}
|
||||
|
||||
llamaOpts = append(llamaOpts, llama.SetMMap(c.MMap))
|
||||
llamaOpts = append(llamaOpts, llama.SetMainGPU(c.MainGPU))
|
||||
llamaOpts = append(llamaOpts, llama.SetTensorSplit(c.TensorSplit))
|
||||
if c.Batch != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.SetNBatch(c.Batch))
|
||||
}
|
||||
|
||||
return llamaOpts
|
||||
}
|
||||
|
||||
|
@ -168,6 +175,10 @@ func buildLLamaPredictOptions(c Config, modelPath string) []llama.PredictOption
|
|||
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
|
||||
}
|
||||
|
||||
if c.PromptCacheRO {
|
||||
predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
|
||||
}
|
||||
|
||||
if c.PromptCachePath != "" {
|
||||
// Create parent directory
|
||||
p := filepath.Join(modelPath, c.PromptCachePath)
|
||||
|
@ -217,6 +228,15 @@ func buildLLamaPredictOptions(c Config, modelPath string) []llama.PredictOption
|
|||
predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetFrequencyPenalty(c.FrequencyPenalty))
|
||||
predictOptions = append(predictOptions, llama.SetMlock(c.MMlock))
|
||||
predictOptions = append(predictOptions, llama.SetMemoryMap(c.MMap))
|
||||
predictOptions = append(predictOptions, llama.SetPredictionMainGPU(c.MainGPU))
|
||||
predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(c.TensorSplit))
|
||||
predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(c.TFZ))
|
||||
|
||||
return predictOptions
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue