feat(llama.cpp): add flash_attention and no_kv_offloading (#2310)

feat(llama.cpp): add flash_attn and no_kv_offload

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2024-05-13 19:07:51 +02:00 committed by GitHub
parent 7123d07456
commit e49ea0123b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 11 additions and 0 deletions

View file

@ -212,6 +212,9 @@ message ModelOptions {
float YarnBetaSlow = 47;
string Type = 49;
bool FlashAttention = 56;
bool NoKVOffload = 57;
}
message Result {