From 31b280f8942c794486f2b837f65c7188e998d530 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 16 May 2025 18:25:45 +0200 Subject: [PATCH] =?UTF-8?q?This=20seems=20to=20be=20broken=20-=20https://g?= =?UTF-8?q?ithub.com/ggml-org/llama.cpp/commit/360a9c98e13d35f322b4c5b1309?= =?UTF-8?q?aab0cc90ed82b#diff-a18a8e64e12a01167d8e98fc[=E2=80=A6]cccf0d4ee?= =?UTF-8?q?d09d76d879L2998-L3207?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ettore Di Giacinto --- backend/cpp/llama/grpc-server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index c0b0c732..1053573d 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -3217,7 +3217,7 @@ struct server_context { SLT_INF(slot, "kv cache rm [%d, end)\n", slot.n_past); // remove the non-common part from the cache - slot.cache_tokens.resize(slot.n_past); + //slot.cache_tokens.resize(slot.n_past); // check if we should process the image if (slot.n_past < slot.n_prompt_tokens