mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
fix(clip): do not imply GPU offload by default (#5010)
* fix(clip): do not imply GPUs by default Until a better solution is found upstream, be conservative and default to GPU. https://github.com/ggml-org/llama.cpp/pull/12322 https://github.com/ggml-org/llama.cpp/pull/12322#issuecomment-2720970695 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * allow to override gpu via backend options Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
12568c7d6d
commit
423514a5a5
1 changed files with 21 additions and 3 deletions
|
@ -467,6 +467,7 @@ struct llama_server_context
|
||||||
bool all_slots_are_idle = false;
|
bool all_slots_are_idle = false;
|
||||||
bool add_bos_token = true;
|
bool add_bos_token = true;
|
||||||
bool has_eos_token = true;
|
bool has_eos_token = true;
|
||||||
|
bool has_gpu = false;
|
||||||
|
|
||||||
bool grammar_lazy = false;
|
bool grammar_lazy = false;
|
||||||
std::vector<common_grammar_trigger> grammar_triggers;
|
std::vector<common_grammar_trigger> grammar_triggers;
|
||||||
|
@ -511,7 +512,10 @@ struct llama_server_context
|
||||||
if (!params.mmproj.empty()) {
|
if (!params.mmproj.empty()) {
|
||||||
multimodal = true;
|
multimodal = true;
|
||||||
LOG_INFO("Multi Modal Mode Enabled", {});
|
LOG_INFO("Multi Modal Mode Enabled", {});
|
||||||
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
|
clp_ctx = clip_init(params.mmproj.c_str(), clip_context_params {
|
||||||
|
/* use_gpu */ has_gpu,
|
||||||
|
/*verbosity=*/ 1,
|
||||||
|
});
|
||||||
if(clp_ctx == nullptr) {
|
if(clp_ctx == nullptr) {
|
||||||
LOG_ERR("unable to load clip model: %s", params.mmproj.c_str());
|
LOG_ERR("unable to load clip model: %s", params.mmproj.c_str());
|
||||||
return false;
|
return false;
|
||||||
|
@ -2314,7 +2318,7 @@ static std::string get_all_kv_cache_types() {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void params_parse(const backend::ModelOptions* request,
|
static void params_parse(const backend::ModelOptions* request,
|
||||||
common_params & params) {
|
common_params & params, llama_server_context &llama) {
|
||||||
|
|
||||||
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
|
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
|
||||||
|
|
||||||
|
@ -2352,6 +2356,20 @@ static void params_parse(const backend::ModelOptions* request,
|
||||||
add_rpc_devices(std::string(llama_grpc_servers));
|
add_rpc_devices(std::string(llama_grpc_servers));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// decode options. Options are in form optname:optvale, or if booleans only optname.
|
||||||
|
for (int i = 0; i < request->options_size(); i++) {
|
||||||
|
std::string opt = request->options(i);
|
||||||
|
char *optname = strtok(&opt[0], ":");
|
||||||
|
char *optval = strtok(NULL, ":");
|
||||||
|
if (optval == NULL) {
|
||||||
|
optval = "true";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!strcmp(optname, "gpu")) {
|
||||||
|
llama.has_gpu = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: Add yarn
|
// TODO: Add yarn
|
||||||
|
|
||||||
if (!request->tensorsplit().empty()) {
|
if (!request->tensorsplit().empty()) {
|
||||||
|
@ -2445,7 +2463,7 @@ public:
|
||||||
grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) {
|
grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) {
|
||||||
// Implement LoadModel RPC
|
// Implement LoadModel RPC
|
||||||
common_params params;
|
common_params params;
|
||||||
params_parse(request, params);
|
params_parse(request, params, llama);
|
||||||
|
|
||||||
llama_backend_init();
|
llama_backend_init();
|
||||||
llama_numa_init(params.numa);
|
llama_numa_init(params.numa);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue