feat(llama.cpp): expose cache_type_k and cache_type_v for quant of kv cache (#4329)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 10:35:01 +00:00 · 2024-12-06 10:23:59 +01:00 · 2024-12-06 10:23:59 +01:00 · d4c1746c7d
commit d4c1746c7d
parent 88737e1d76
4 changed files with 15 additions and 2 deletions
--- a/backend/backend.proto
+++ b/backend/backend.proto
@ -242,6 +242,9 @@ message ModelOptions {
  repeated float LoraScales = 61;

  repeated string Options = 62;
+
+  string CacheTypeKey = 63;
+  string CacheTypeValue = 64;
 }

 message Result {