mirror of
https://github.com/mudler/LocalAI.git
synced 2025-06-30 06:30:43 +00:00
Merge branch 'master' into fix-pr-folder-tasks
Signed-off-by: Dave Lee <dave@gray101.com>
This commit is contained in:
commit
522f2e5e0a
3 changed files with 100 additions and 31 deletions
4
Makefile
4
Makefile
|
@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
CPPLLAMA_VERSION?=feff4aa8461da7c432d144c11da4802e41fef3cf
|
CPPLLAMA_VERSION?=6262d13e0b2da91f230129a93a996609a2f5a2f2
|
||||||
|
|
||||||
# go-rwkv version
|
# go-rwkv version
|
||||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||||
|
@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=a551933542d956ae84634937acd2942eb40efaaf
|
WHISPER_CPP_VERSION?=049b3a0e53c8a8e4c4576c06a1a4fccf0063a73f
|
||||||
|
|
||||||
# bert.cpp version
|
# bert.cpp version
|
||||||
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include "clip.h"
|
#include "clip.h"
|
||||||
#include "llava.h"
|
#include "llava.h"
|
||||||
|
#include "log.h"
|
||||||
#include "stb_image.h"
|
#include "stb_image.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
|
@ -448,7 +449,7 @@ struct llama_server_context
|
||||||
LOG_INFO("Multi Modal Mode Enabled", {});
|
LOG_INFO("Multi Modal Mode Enabled", {});
|
||||||
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
|
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
|
||||||
if(clp_ctx == nullptr) {
|
if(clp_ctx == nullptr) {
|
||||||
LOG_ERROR("unable to load clip model", {{"model", params.mmproj}});
|
LOG_ERR("unable to load clip model: %s", params.mmproj.c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -462,7 +463,7 @@ struct llama_server_context
|
||||||
ctx = llama_init.context;
|
ctx = llama_init.context;
|
||||||
if (model == nullptr)
|
if (model == nullptr)
|
||||||
{
|
{
|
||||||
LOG_ERROR("unable to load model", {{"model", params.model}});
|
LOG_ERR("unable to load model: %s", params.model.c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -470,7 +471,7 @@ struct llama_server_context
|
||||||
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
||||||
const int n_embd_llm = llama_n_embd(model);
|
const int n_embd_llm = llama_n_embd(model);
|
||||||
if (n_embd_clip != n_embd_llm) {
|
if (n_embd_clip != n_embd_llm) {
|
||||||
LOG_TEE("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
llama_free_model(model);
|
llama_free_model(model);
|
||||||
return false;
|
return false;
|
||||||
|
@ -489,7 +490,7 @@ struct llama_server_context
|
||||||
std::vector<char> buf(1);
|
std::vector<char> buf(1);
|
||||||
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
|
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
|
||||||
if (res < 0) {
|
if (res < 0) {
|
||||||
LOG_ERROR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {});
|
LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__);
|
||||||
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
|
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -812,10 +813,11 @@ struct llama_server_context
|
||||||
img_sl.img_data = clip_image_u8_init();
|
img_sl.img_data = clip_image_u8_init();
|
||||||
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
|
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
|
||||||
{
|
{
|
||||||
LOG_ERROR("failed to load image", {
|
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d",
|
||||||
{"slot_id", slot->id},
|
__func__,
|
||||||
{"img_sl_id", img_sl.id}
|
slot->id,
|
||||||
});
|
img_sl.id
|
||||||
|
);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
LOG_VERBOSE("image loaded", {
|
LOG_VERBOSE("image loaded", {
|
||||||
|
@ -853,12 +855,12 @@ struct llama_server_context
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!found) {
|
if (!found) {
|
||||||
LOG_TEE("ERROR: Image with id: %i, not found.\n", img_id);
|
LOG("ERROR: Image with id: %i, not found.\n", img_id);
|
||||||
slot->images.clear();
|
slot->images.clear();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} catch (const std::invalid_argument& e) {
|
} catch (const std::invalid_argument& e) {
|
||||||
LOG_TEE("Invalid image number id in prompt\n");
|
LOG("Invalid image number id in prompt\n");
|
||||||
slot->images.clear();
|
slot->images.clear();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -886,7 +888,7 @@ struct llama_server_context
|
||||||
{"task_id", slot->task_id},
|
{"task_id", slot->task_id},
|
||||||
});
|
});
|
||||||
|
|
||||||
// LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
|
// LOG("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -926,7 +928,7 @@ struct llama_server_context
|
||||||
};
|
};
|
||||||
if (llama_decode(ctx, batch_view) != 0)
|
if (llama_decode(ctx, batch_view) != 0)
|
||||||
{
|
{
|
||||||
LOG_TEE("%s: llama_decode() failed\n", __func__);
|
LOG("%s: llama_decode() failed\n", __func__);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -938,7 +940,7 @@ struct llama_server_context
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_TEE("system prompt updated\n");
|
LOG("system prompt updated\n");
|
||||||
system_need_update = false;
|
system_need_update = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1120,7 +1122,7 @@ struct llama_server_context
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
|
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
|
||||||
LOG_TEE("Error processing the given image");
|
LOG("Error processing the given image");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1132,7 +1134,7 @@ struct llama_server_context
|
||||||
|
|
||||||
void send_error(task_server& task, const std::string &error)
|
void send_error(task_server& task, const std::string &error)
|
||||||
{
|
{
|
||||||
LOG_TEE("task %i - error: %s\n", task.id, error.c_str());
|
LOG("task %i - error: %s\n", task.id, error.c_str());
|
||||||
task_result res;
|
task_result res;
|
||||||
res.id = task.id;
|
res.id = task.id;
|
||||||
res.multitask_id = task.multitask_id;
|
res.multitask_id = task.multitask_id;
|
||||||
|
@ -1371,7 +1373,7 @@ struct llama_server_context
|
||||||
};
|
};
|
||||||
if (llama_decode(ctx, batch_view))
|
if (llama_decode(ctx, batch_view))
|
||||||
{
|
{
|
||||||
LOG_TEE("%s : failed to eval\n", __func__);
|
LOG("%s : failed to eval\n", __func__);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1389,7 +1391,7 @@ struct llama_server_context
|
||||||
llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
|
llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
|
||||||
if (llama_decode(ctx, batch_img))
|
if (llama_decode(ctx, batch_img))
|
||||||
{
|
{
|
||||||
LOG_TEE("%s : failed to eval image\n", __func__);
|
LOG("%s : failed to eval image\n", __func__);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
slot.n_past += n_eval;
|
slot.n_past += n_eval;
|
||||||
|
@ -1572,7 +1574,7 @@ struct llama_server_context
|
||||||
slot.n_past = 0;
|
slot.n_past = 0;
|
||||||
slot.truncated = false;
|
slot.truncated = false;
|
||||||
slot.has_next_token = true;
|
slot.has_next_token = true;
|
||||||
LOG_TEE("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
// END LOCALAI changes
|
// END LOCALAI changes
|
||||||
|
@ -1820,10 +1822,11 @@ struct llama_server_context
|
||||||
|
|
||||||
if (has_images && !ingest_images(slot, n_batch))
|
if (has_images && !ingest_images(slot, n_batch))
|
||||||
{
|
{
|
||||||
LOG_ERROR("failed processing images", {
|
LOG_ERR("%s: failed processing images Slot id : %d, Task id: %d",
|
||||||
"slot_id", slot.id,
|
__func__,
|
||||||
"task_id", slot.task_id,
|
slot.id,
|
||||||
});
|
slot.task_id
|
||||||
|
);
|
||||||
// FIXME @phymbert: to be properly tested
|
// FIXME @phymbert: to be properly tested
|
||||||
// early returning without changing the slot state will block the slot for ever
|
// early returning without changing the slot state will block the slot for ever
|
||||||
// no one at the moment is checking the return value
|
// no one at the moment is checking the return value
|
||||||
|
@ -1863,10 +1866,10 @@ struct llama_server_context
|
||||||
const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1);
|
const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1);
|
||||||
const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w;
|
const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w;
|
||||||
|
|
||||||
LOG_TEE("\n");
|
LOG("\n");
|
||||||
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
|
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
|
||||||
LOG_TEE("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
|
LOG("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
|
||||||
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
|
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
|
||||||
|
|
||||||
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd);
|
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd);
|
||||||
llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n);
|
llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n);
|
||||||
|
@ -1876,7 +1879,7 @@ struct llama_server_context
|
||||||
|
|
||||||
slot.ga_i += slot.ga_w / slot.ga_n;
|
slot.ga_i += slot.ga_w / slot.ga_n;
|
||||||
|
|
||||||
LOG_TEE("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
|
LOG("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
|
||||||
}
|
}
|
||||||
slot.n_past_se += n_tokens;
|
slot.n_past_se += n_tokens;
|
||||||
}
|
}
|
||||||
|
@ -1901,11 +1904,11 @@ struct llama_server_context
|
||||||
if (n_batch == 1 || ret < 0)
|
if (n_batch == 1 || ret < 0)
|
||||||
{
|
{
|
||||||
// if you get here, it means the KV cache is full - try increasing it via the context size
|
// if you get here, it means the KV cache is full - try increasing it via the context size
|
||||||
LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
|
LOG("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
|
LOG("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
|
||||||
|
|
||||||
// retry with half the batch size to try to find a free slot in the KV cache
|
// retry with half the batch size to try to find a free slot in the KV cache
|
||||||
n_batch /= 2;
|
n_batch /= 2;
|
||||||
|
|
|
@ -675,6 +675,72 @@
|
||||||
- filename: Azure_Dusk-v0.2-Q4_K_M-imat.gguf
|
- filename: Azure_Dusk-v0.2-Q4_K_M-imat.gguf
|
||||||
sha256: c03a670c00976d14c267a0322374ed488b2a5f4790eb509136ca4e75cbc10cf4
|
sha256: c03a670c00976d14c267a0322374ed488b2a5f4790eb509136ca4e75cbc10cf4
|
||||||
uri: huggingface://Lewdiculous/Azure_Dusk-v0.2-GGUF-IQ-Imatrix/Azure_Dusk-v0.2-Q4_K_M-imat.gguf
|
uri: huggingface://Lewdiculous/Azure_Dusk-v0.2-GGUF-IQ-Imatrix/Azure_Dusk-v0.2-Q4_K_M-imat.gguf
|
||||||
|
- !!merge <<: *llama31
|
||||||
|
name: "l3.1-8b-niitama-v1.1-iq-imatrix"
|
||||||
|
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/2Q5ky8TvP0vLS1ulMXnrn.png
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/Sao10K/L3.1-8B-Niitama-v1.1
|
||||||
|
- https://huggingface.co/Lewdiculous/L3.1-8B-Niitama-v1.1-GGUF-IQ-Imatrix
|
||||||
|
description: |
|
||||||
|
GGUF-IQ-Imatrix quants for Sao10K/L3.1-8B-Niitama-v1.1
|
||||||
|
Here's the subjectively superior L3 version: L3-8B-Niitama-v1
|
||||||
|
An experimental model using experimental methods.
|
||||||
|
|
||||||
|
More detail on it:
|
||||||
|
|
||||||
|
Tamamo and Niitama are made from the same data. Literally. The only thing that's changed is how theyre shuffled and formatted. Yet, I get wildly different results.
|
||||||
|
|
||||||
|
Interesting, eh? Feels kinda not as good compared to the l3 version, but it's aight.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: L3.1-8B-Niitama-v1.1-Q4_K_M-imat.gguf
|
||||||
|
files:
|
||||||
|
- filename: L3.1-8B-Niitama-v1.1-Q4_K_M-imat.gguf
|
||||||
|
sha256: 524163bd0f1d43c9284b09118abcc192f3250b13dd3bb79d60c28321108b6748
|
||||||
|
uri: huggingface://Lewdiculous/L3.1-8B-Niitama-v1.1-GGUF-IQ-Imatrix/L3.1-8B-Niitama-v1.1-Q4_K_M-imat.gguf
|
||||||
|
- !!merge <<: *llama31
|
||||||
|
name: "llama-3.1-8b-stheno-v3.4-iq-imatrix"
|
||||||
|
icon: https://huggingface.co/Sao10K/Llama-3.1-8B-Stheno-v3.4/resolve/main/meneno.jpg
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/Sao10K/Llama-3.1-8B-Stheno-v3.4
|
||||||
|
- https://huggingface.co/Lewdiculous/Llama-3.1-8B-Stheno-v3.4-GGUF-IQ-Imatrix
|
||||||
|
description: |
|
||||||
|
This model has went through a multi-stage finetuning process.
|
||||||
|
|
||||||
|
- 1st, over a multi-turn Conversational-Instruct
|
||||||
|
- 2nd, over a Creative Writing / Roleplay along with some Creative-based Instruct Datasets.
|
||||||
|
- - Dataset consists of a mixture of Human and Claude Data.
|
||||||
|
|
||||||
|
Prompting Format:
|
||||||
|
|
||||||
|
- Use the L3 Instruct Formatting - Euryale 2.1 Preset Works Well
|
||||||
|
- Temperature + min_p as per usual, I recommend 1.4 Temp + 0.2 min_p.
|
||||||
|
- Has a different vibe to previous versions. Tinker around.
|
||||||
|
|
||||||
|
Changes since previous Stheno Datasets:
|
||||||
|
|
||||||
|
- Included Multi-turn Conversation-based Instruct Datasets to boost multi-turn coherency. # This is a seperate set, not the ones made by Kalomaze and Nopm, that are used in Magnum. They're completely different data.
|
||||||
|
- Replaced Single-Turn Instruct with Better Prompts and Answers by Claude 3.5 Sonnet and Claude 3 Opus.
|
||||||
|
- Removed c2 Samples -> Underway of re-filtering and masking to use with custom prefills. TBD
|
||||||
|
- Included 55% more Roleplaying Examples based of [Gryphe's](https://huggingface.co/datasets/Gryphe/Sonnet3.5-Charcard-Roleplay) Charcard RP Sets. Further filtered and cleaned on.
|
||||||
|
- Included 40% More Creative Writing Examples.
|
||||||
|
- Included Datasets Targeting System Prompt Adherence.
|
||||||
|
- Included Datasets targeting Reasoning / Spatial Awareness.
|
||||||
|
- Filtered for the usual errors, slop and stuff at the end. Some may have slipped through, but I removed nearly all of it.
|
||||||
|
|
||||||
|
Personal Opinions:
|
||||||
|
|
||||||
|
- Llama3.1 was more disappointing, in the Instruct Tune? It felt overbaked, atleast. Likely due to the DPO being done after their SFT Stage.
|
||||||
|
- Tuning on L3.1 base did not give good results, unlike when I tested with Nemo base. unfortunate.
|
||||||
|
- Still though, I think I did an okay job. It does feel a bit more distinctive.
|
||||||
|
- It took a lot of tinkering, like a LOT to wrangle this.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Llama-3.1-8B-Stheno-v3.4-Q4_K_M-imat.gguf
|
||||||
|
files:
|
||||||
|
- filename: Llama-3.1-8B-Stheno-v3.4-Q4_K_M-imat.gguf
|
||||||
|
sha256: 830d4858aa11a654f82f69fa40dee819edf9ecf54213057648304eb84b8dd5eb
|
||||||
|
uri: huggingface://Lewdiculous/Llama-3.1-8B-Stheno-v3.4-GGUF-IQ-Imatrix/Llama-3.1-8B-Stheno-v3.4-Q4_K_M-imat.gguf
|
||||||
- &deepseek
|
- &deepseek
|
||||||
## Deepseek
|
## Deepseek
|
||||||
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
|
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue