mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 02:24:59 +00:00
feat(llama.cpp): add flash_attention
and no_kv_offloading
(#2310)
feat(llama.cpp): add flash_attn and no_kv_offload Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
7123d07456
commit
e49ea0123b
4 changed files with 11 additions and 0 deletions
|
@ -132,6 +132,9 @@ type LLMConfig struct {
|
|||
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
|
||||
MMProj string `yaml:"mmproj"`
|
||||
|
||||
FlashAttention bool `yaml:"flash_attention"`
|
||||
NoKVOffloading bool `yaml:"no_kv_offloading"`
|
||||
|
||||
RopeScaling string `yaml:"rope_scaling"`
|
||||
ModelType string `yaml:"type"`
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue