mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-22 11:35:00 +00:00
feat: allow to set a prompt cache path and enable saving state (#395)
Signed-off-by: mudler <mudler@mocaccino.org>
This commit is contained in:
parent
76c881043e
commit
217dbb448e
3 changed files with 39 additions and 22 deletions
|
@ -2,6 +2,8 @@ package api
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
|
@ -102,7 +104,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config)
|
|||
switch model := inferenceModel.(type) {
|
||||
case *llama.LLama:
|
||||
fn = func() ([]float32, error) {
|
||||
predictOptions := buildLLamaPredictOptions(c)
|
||||
predictOptions := buildLLamaPredictOptions(c, loader.ModelPath)
|
||||
if len(tokens) > 0 {
|
||||
return model.TokenEmbeddings(tokens, predictOptions...)
|
||||
}
|
||||
|
@ -151,7 +153,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config)
|
|||
}, nil
|
||||
}
|
||||
|
||||
func buildLLamaPredictOptions(c Config) []llama.PredictOption {
|
||||
func buildLLamaPredictOptions(c Config, modelPath string) []llama.PredictOption {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []llama.PredictOption{
|
||||
llama.SetTemperature(c.Temperature),
|
||||
|
@ -161,6 +163,17 @@ func buildLLamaPredictOptions(c Config) []llama.PredictOption {
|
|||
llama.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.PromptCacheAll {
|
||||
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
|
||||
}
|
||||
|
||||
if c.PromptCachePath != "" {
|
||||
// Create parent directory
|
||||
p := filepath.Join(modelPath, c.PromptCachePath)
|
||||
os.MkdirAll(filepath.Dir(p), 0755)
|
||||
predictOptions = append(predictOptions, llama.SetPathPromptCache(p))
|
||||
}
|
||||
|
||||
if c.Mirostat != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat))
|
||||
}
|
||||
|
@ -469,7 +482,7 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
|
|||
model.SetTokenCallback(tokenCallback)
|
||||
}
|
||||
|
||||
predictOptions := buildLLamaPredictOptions(c)
|
||||
predictOptions := buildLLamaPredictOptions(c, loader.ModelPath)
|
||||
|
||||
str, er := model.Predict(
|
||||
s,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue