From 67f7bffd18fdceef6084c6c258dd5f73430762be Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 6 Mar 2025 00:40:58 +0100 Subject: [PATCH] chore(deps): update llama.cpp and sync with upstream changes (#4950) Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- backend/backend.proto | 1 - backend/cpp/llama/grpc-server.cpp | 14 +++++++------- core/backend/options.go | 1 - pkg/functions/parse.go | 1 - 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index a7d25c7a..5c809853 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ BINARY_NAME=local-ai DETECT_LIBS?=true # llama.cpp versions -CPPLLAMA_VERSION?=5bbe6a9fe9a8796a9389c85accec89dbc4d91e39 +CPPLLAMA_VERSION?=5e43f104cca1a14874e980326a506b44fde022b8 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp diff --git a/backend/backend.proto b/backend/backend.proto index aa34687a..cbb81c66 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -165,7 +165,6 @@ message Reply { message GrammarTrigger { string word = 1; - bool at_start = 2; } message ModelOptions { diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 69a09c74..0f3b927a 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -469,7 +469,7 @@ struct llama_server_context bool has_eos_token = true; bool grammar_lazy = false; - std::vector grammar_trigger_words; + std::vector grammar_triggers; int32_t n_ctx; // total context for all clients / slots @@ -709,7 +709,7 @@ struct llama_server_context slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar); slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs); slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep); - slot->sparams.grammar_trigger_words = grammar_trigger_words; + slot->sparams.grammar_triggers = grammar_triggers; slot->sparams.grammar_lazy = grammar_lazy; if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) { @@ -2393,12 +2393,12 @@ static void params_parse(const backend::ModelOptions* request, llama.grammar_lazy = true; for (int i = 0; i < request->grammartriggers_size(); i++) { common_grammar_trigger trigger; - trigger.word = request->grammartriggers(i).word(); - trigger.at_start = request->grammartriggers(i).at_start(); - llama.grammar_trigger_words.push_back(trigger); + trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_WORD; + trigger.value = request->grammartriggers(i).word(); + // trigger.at_start = request->grammartriggers(i).at_start(); + llama.grammar_triggers.push_back(trigger); LOG_INFO("grammar trigger", { - { "word", trigger.word }, - { "at_start", trigger.at_start } + { "word", trigger.value }, }); } } diff --git a/core/backend/options.go b/core/backend/options.go index c807e4e8..d98e136c 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -122,7 +122,6 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions { for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers { triggers = append(triggers, &pb.GrammarTrigger{ Word: t.Word, - AtStart: t.AtStart, }) } diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go index 30338ffd..a9d7b8bd 100644 --- a/pkg/functions/parse.go +++ b/pkg/functions/parse.go @@ -54,7 +54,6 @@ type GrammarConfig struct { type GrammarTrigger struct { // Trigger is the string that triggers the grammar Word string `yaml:"word"` - AtStart bool `yaml:"at_start"` } // FunctionsConfig is the configuration for the tool/function call.