chore(deps): bump llama.cpp to 'e434e69183fd9e1031f4445002083178c331a28b (#5665)

chore(deps): bump llama.cpp to 'e434e69183fd9e1031f4445002083178c331a28b'

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-06-17 17:00:10 +02:00 committed by GitHub
parent 30ceee2dec
commit d68660bd5a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 6 additions and 4 deletions

View file

@ -6,7 +6,7 @@ BINARY_NAME=local-ai
DETECT_LIBS?=true
# llama.cpp versions
CPPLLAMA_VERSION?=30e5b01de2a0bcddc7c063c8ef0802703a958417
CPPLLAMA_VERSION?=e434e69183fd9e1031f4445002083178c331a28b
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp

View file

@ -300,8 +300,10 @@ static void params_parse(const backend::ModelOptions* request,
params.no_kv_offload = request->nokvoffload();
params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)
params.embedding = request->embeddings();
params.reranking = request->reranking();
params.embedding = request->embeddings() || request->reranking();
if (request->reranking()) {
params.pooling_type = LLAMA_POOLING_TYPE_RANK;
}
if (request->ropescaling() == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
else if (request->ropescaling() == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; }
@ -823,7 +825,7 @@ public:
}
grpc::Status Rerank(ServerContext* context, const backend::RerankRequest* request, backend::RerankResult* rerankResult) {
if (!ctx_server.params_base.reranking || ctx_server.params_base.embedding) {
if (!ctx_server.params_base.embedding || ctx_server.params_base.pooling_type != LLAMA_POOLING_TYPE_RANK) {
return grpc::Status(grpc::StatusCode::UNIMPLEMENTED, "This server does not support reranking. Start it with `--reranking` and without `--embedding`");
}