feat(llama.cpp): support lora with scale and yarn (#1277)

* feat(llama.cpp): support lora with scale

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(llama.cpp): support yarn

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2023-11-11 18:40:48 +01:00 committed by GitHub
parent bde87d00b9
commit 803a0ac02a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 476 additions and 364 deletions

View file

@ -2017,12 +2017,41 @@ static void params_parse(const backend::ModelOptions* request,
if (!request->maingpu().empty()) {
params.main_gpu = std::stoi(request->maingpu());
}
// TODO: lora needs also a scale factor
//params.lora_adapter = request->loraadapter();
//params.lora_base = request->lorabase();
if (!request->loraadapter().empty() && !request->lorabase().empty()) {
float scale_factor = 1.0f;
if (request->lorascale() != 0.0f) {
scale_factor = request->lorascale();
}
// get the directory of modelfile
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
params.lora_base = model_dir + "/"+request->lorabase();
}
params.use_mlock = request->mlock();
params.use_mmap = request->mmap();
params.embedding = request->embeddings();
if (request->ropescaling() == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_NONE; }
else if (request->ropescaling() == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_YARN; }
else { params.rope_scaling_type = LLAMA_ROPE_SCALING_LINEAR; }
if ( request->yarnextfactor() != 0.0f ) {
params.yarn_ext_factor = request->yarnextfactor();
}
if ( request->yarnattnfactor() != 0.0f ) {
params.yarn_attn_factor = request->yarnattnfactor();
}
if ( request->yarnbetafast() != 0.0f ) {
params.yarn_beta_fast = request->yarnbetafast();
}
if ( request->yarnbetaslow() != 0.0f ) {
params.yarn_beta_slow = request->yarnbetaslow();
}
if ( request->ropefreqbase() != 0.0f ) {
params.rope_freq_base = request->ropefreqbase();
}
if ( request->ropefreqscale() != 0.0f ) {
params.rope_freq_scale = request->ropefreqscale();
}
}