chore(deps): bump llama.cpp to 'e434e69183fd9e1031f4445002083178c331a28b (#5665)

chore(deps): bump llama.cpp to 'e434e69183fd9e1031f4445002083178c331a28b' Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-21 02:04:59 +00:00 · 2025-06-17 17:00:10 +02:00 · 2025-06-17 17:00:10 +02:00 · d68660bd5a
commit d68660bd5a
parent 30ceee2dec
2 changed files with 6 additions and 4 deletions
--- a/2
+++ b/2
@ -6,7 +6,7 @@ BINARY_NAME=local-ai
 DETECT_LIBS?=true

 # llama.cpp versions
-CPPLLAMA_VERSION?=30e5b01de2a0bcddc7c063c8ef0802703a958417
+CPPLLAMA_VERSION?=e434e69183fd9e1031f4445002083178c331a28b

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@ -300,8 +300,10 @@ static void params_parse(const backend::ModelOptions* request,
    params.no_kv_offload = request->nokvoffload();
    params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)

-    params.embedding = request->embeddings();
-    params.reranking = request->reranking();
+    params.embedding = request->embeddings() || request->reranking();
+    if (request->reranking()) {
+        params.pooling_type = LLAMA_POOLING_TYPE_RANK;
+    }

    if (request->ropescaling() == "none")   { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
    else if (request->ropescaling() == "yarn")   { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; }
@ -823,7 +825,7 @@ public:
    }

    grpc::Status Rerank(ServerContext* context, const backend::RerankRequest* request, backend::RerankResult* rerankResult) {
-        if (!ctx_server.params_base.reranking || ctx_server.params_base.embedding) {
+        if (!ctx_server.params_base.embedding || ctx_server.params_base.pooling_type != LLAMA_POOLING_TYPE_RANK) {
            return grpc::Status(grpc::StatusCode::UNIMPLEMENTED, "This server does not support reranking. Start it with `--reranking` and without `--embedding`");
        }