mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
feat(llama.cpp): expose cache_type_k and cache_type_v for quant of kv cache (#4329)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
88737e1d76
commit
d4c1746c7d
4 changed files with 15 additions and 2 deletions
|
@ -151,6 +151,8 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
|||
TensorParallelSize: int32(c.TensorParallelSize),
|
||||
MMProj: c.MMProj,
|
||||
FlashAttention: c.FlashAttention,
|
||||
CacheTypeKey: c.CacheTypeK,
|
||||
CacheTypeValue: c.CacheTypeV,
|
||||
NoKVOffload: c.NoKVOffloading,
|
||||
YarnExtFactor: c.YarnExtFactor,
|
||||
YarnAttnFactor: c.YarnAttnFactor,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue