From 31b280f8942c794486f2b837f65c7188e998d530 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Fri, 16 May 2025 18:25:45 +0200
Subject: [PATCH] =?UTF-8?q?This=20seems=20to=20be=20broken=20-=20https://g?=
 =?UTF-8?q?ithub.com/ggml-org/llama.cpp/commit/360a9c98e13d35f322b4c5b1309?=
 =?UTF-8?q?aab0cc90ed82b#diff-a18a8e64e12a01167d8e98fc[=E2=80=A6]cccf0d4ee?=
 =?UTF-8?q?d09d76d879L2998-L3207?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/cpp/llama/grpc-server.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index c0b0c732..1053573d 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -3217,7 +3217,7 @@ struct server_context {
                     SLT_INF(slot, "kv cache rm [%d, end)\n", slot.n_past);
 
                     // remove the non-common part from the cache
-                    slot.cache_tokens.resize(slot.n_past);
+                    //slot.cache_tokens.resize(slot.n_past);
 
                     // check if we should process the image
                     if (slot.n_past < slot.n_prompt_tokens