From d68660bd5a0fd1e4a0cf9dcebeceafd410f463ca Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 17 Jun 2025 17:00:10 +0200 Subject: [PATCH] chore(deps): bump llama.cpp to 'e434e69183fd9e1031f4445002083178c331a28b (#5665) chore(deps): bump llama.cpp to 'e434e69183fd9e1031f4445002083178c331a28b' Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- backend/cpp/llama/grpc-server.cpp | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index dc44a573..da27c889 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ BINARY_NAME=local-ai DETECT_LIBS?=true # llama.cpp versions -CPPLLAMA_VERSION?=30e5b01de2a0bcddc7c063c8ef0802703a958417 +CPPLLAMA_VERSION?=e434e69183fd9e1031f4445002083178c331a28b # whisper.cpp version WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index a8b21ef3..1b56e9fe 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -300,8 +300,10 @@ static void params_parse(const backend::ModelOptions* request, params.no_kv_offload = request->nokvoffload(); params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops) - params.embedding = request->embeddings(); - params.reranking = request->reranking(); + params.embedding = request->embeddings() || request->reranking(); + if (request->reranking()) { + params.pooling_type = LLAMA_POOLING_TYPE_RANK; + } if (request->ropescaling() == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; } else if (request->ropescaling() == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; } @@ -823,7 +825,7 @@ public: } grpc::Status Rerank(ServerContext* context, const backend::RerankRequest* request, backend::RerankResult* rerankResult) { - if (!ctx_server.params_base.reranking || ctx_server.params_base.embedding) { + if (!ctx_server.params_base.embedding || ctx_server.params_base.pooling_type != LLAMA_POOLING_TYPE_RANK) { return grpc::Status(grpc::StatusCode::UNIMPLEMENTED, "This server does not support reranking. Start it with `--reranking` and without `--embedding`"); }