feat: update llama, enable NUMA (#684)

2025-05-28 06:25:00 +00:00 · 2023-06-27 09:00:10 +02:00 · 2023-06-27 09:00:10 +02:00 · 3593cb0c87
commit 3593cb0c87
parent e130b208ab
3 changed files with 6 additions and 1 deletions
--- a/api/config.go
+++ b/api/config.go
@ -23,6 +23,7 @@ type Config struct {
 	TrimSpace      []string          `yaml:"trimspace"`
 	ContextSize    int               `yaml:"context_size"`
 	F16            bool              `yaml:"f16"`
+	NUMA           bool              `yaml:"numa"`
 	Threads        int               `yaml:"threads"`
 	Debug          bool              `yaml:"debug"`
 	Roles          map[string]string `yaml:"roles"`
--- a/api/prediction.go
+++ b/api/prediction.go
@ -48,6 +48,10 @@ func defaultLLamaOpts(c Config) []llama.ModelOption {
 		llamaOpts = append(llamaOpts, llama.SetNBatch(512))
 	}

+	if c.NUMA {
+		llamaOpts = append(llamaOpts, llama.EnableNUMA)
+	}
+
 	if c.LowVRAM {
 		llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
 	}