From 4a079f893ca28217c86b9659a70d7ad33b8bf6fa Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 4 Dec 2024 23:19:35 +0100 Subject: [PATCH 001/849] chore: :arrow_up: Update ggerganov/llama.cpp to `59f4db10883a4f3e855cffbf2c3ab68430e95272` (#4319) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dad02937..3a4a2d3e 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=cc98896db858df7aa40d0e16a505883ef196a482 +CPPLLAMA_VERSION?=59f4db10883a4f3e855cffbf2c3ab68430e95272 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From c2261495038617fecc3b9cd8431a3478ff2bc9b8 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 5 Dec 2024 09:09:11 +0100 Subject: [PATCH 002/849] chore: :arrow_up: Update leejet/stable-diffusion.cpp to `9578fdcc4632dc3de5565f28e2fb16b7c18f8d48` (#4320) :arrow_up: Update leejet/stable-diffusion.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3a4a2d3e..c1a6cbcd 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ BARKCPP_VERSION?=v1.0.0 # stablediffusion.cpp (ggml) STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp -STABLEDIFFUSION_GGML_VERSION?=4570715727f35e5a07a76796d823824c8f42206c +STABLEDIFFUSION_GGML_VERSION?=9578fdcc4632dc3de5565f28e2fb16b7c18f8d48 ONNX_VERSION?=1.20.0 ONNX_ARCH?=x64 From ab0f8648a33aa0c4453d67d989b530dab6fae477 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 5 Dec 2024 10:01:49 +0100 Subject: [PATCH 003/849] chore(model gallery): add rp-naughty-v1.0c-8b (#4322) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 02a58d22..b1269741 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -7891,6 +7891,26 @@ - filename: Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf sha256: 5dd81b8b809667d10036499affdd1461cf95af50b405cbc9f800b421a4b60e98 uri: huggingface://DavidAU/Meta-Llama-3-Instruct-8.9B-BRAINSTORM-5x-FORM-11-GGUF/Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf +- !!merge <<: *llama3 + name: "rp-naughty-v1.0c-8b" + urls: + - https://huggingface.co/QuantFactory/RP-Naughty-v1.0c-8b-GGUF + description: | + This model was merged using the Model Stock merge method using aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K as a base. + The following models were included in the merge: + + underwoods/adventure-8b + Khetterman/Multilingual-SaigaSuzume-8B + underwoods/writer-8b + Khetterman/Kosmos-8B-v1 + Khetterman/CursedMatrix-8B-v9 + overrides: + parameters: + model: RP-Naughty-v1.0c-8b.Q4_K_M.gguf + files: + - filename: RP-Naughty-v1.0c-8b.Q4_K_M.gguf + sha256: c344564d26d0c3d244d31cfeb103666eab37f9dee6678a2dbaf5bfcf4109d789 + uri: huggingface://QuantFactory/RP-Naughty-v1.0c-8b-GGUF/RP-Naughty-v1.0c-8b.Q4_K_M.gguf - &command-R ### START Command-r url: "github:mudler/LocalAI/gallery/command-r.yaml@master" From be907d993f95f1144271d2847e3c176314d00c68 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 5 Dec 2024 10:02:02 +0100 Subject: [PATCH 004/849] chore(model gallery): add loki-v2.6-8b-1024k (#4321) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 133 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index b1269741..cc40048c 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3420,6 +3420,139 @@ - filename: Sparse-Llama-3.1-8B-2of4.Q4_K_M.gguf sha256: c481e7089ffaedd5ae8c74dccc7fb45f6509640b661fa086ae979f6fefc3fdba uri: huggingface://QuantFactory/Sparse-Llama-3.1-8B-2of4-GGUF/Sparse-Llama-3.1-8B-2of4.Q4_K_M.gguf +- !!merge <<: *llama31 + name: "loki-v2.6-8b-1024k" + icon: https://cdn-uploads.huggingface.co/production/uploads/6472de046facfb01d8b1fb9d/uQPITKRS8XLTLyaiGwgh_.jpeg + urls: + - https://huggingface.co/QuantFactory/Loki-v2.6-8b-1024k-GGUF + description: | + The following models were included in the merge: + MrRobotoAI/Epic_Fiction-8b + MrRobotoAI/Unaligned-RP-Base-8b-1024k + MrRobotoAI/Loki-.Epic_Fiction.-8b + Casual-Autopsy/L3-Luna-8B + Casual-Autopsy/L3-Super-Nova-RP-8B + Casual-Autopsy/L3-Umbral-Mind-RP-v3.0-8B + Casual-Autopsy/Halu-L3-Stheno-BlackOasis-8B + Undi95/Llama-3-LewdPlay-8B + Undi95/Llama-3-LewdPlay-8B-evo + Undi95/Llama-3-Unholy-8B + ChaoticNeutrals/Hathor_Tahsin-L3-8B-v0.9 + ChaoticNeutrals/Hathor_RP-v.01-L3-8B + ChaoticNeutrals/Domain-Fusion-L3-8B + ChaoticNeutrals/T-900-8B + ChaoticNeutrals/Poppy_Porpoise-1.4-L3-8B + ChaoticNeutrals/Templar_v1_8B + ChaoticNeutrals/Hathor_Respawn-L3-8B-v0.8 + ChaoticNeutrals/Sekhmet_Gimmel-L3.1-8B-v0.3 + zeroblu3/LewdPoppy-8B-RP + tohur/natsumura-storytelling-rp-1.0-llama-3.1-8b + jeiku/Chaos_RP_l3_8B + tannedbum/L3-Nymeria-Maid-8B + Nekochu/Luminia-8B-RP + vicgalle/Humanish-Roleplay-Llama-3.1-8B + saishf/SOVLish-Maid-L3-8B + Dogge/llama-3-8B-instruct-Bluemoon-Freedom-RP + MrRobotoAI/Epic_Fiction-8b-v4 + maldv/badger-lambda-0-llama-3-8b + maldv/llama-3-fantasy-writer-8b + maldv/badger-kappa-llama-3-8b + maldv/badger-mu-llama-3-8b + maldv/badger-lambda-llama-3-8b + maldv/badger-iota-llama-3-8b + maldv/badger-writer-llama-3-8b + Magpie-Align/MagpieLM-8B-Chat-v0.1 + nbeerbower/llama-3-gutenberg-8B + nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K + nbeerbower/llama-3-spicy-abliterated-stella-8B + Magpie-Align/MagpieLM-8B-SFT-v0.1 + NeverSleep/Llama-3-Lumimaid-8B-v0.1 + mlabonne/NeuralDaredevil-8B-abliterated + mlabonne/Daredevil-8B-abliterated + NeverSleep/Llama-3-Lumimaid-8B-v0.1-OAS + nothingiisreal/L3-8B-Instruct-Abliterated-DWP + openchat/openchat-3.6-8b-20240522 + turboderp/llama3-turbcat-instruct-8b + UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3 + Undi95/Llama-3-LewdPlay-8B + TIGER-Lab/MAmmoTH2-8B-Plus + OwenArli/Awanllm-Llama-3-8B-Cumulus-v1.0 + refuelai/Llama-3-Refueled + SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha + NousResearch/Hermes-2-Theta-Llama-3-8B + ResplendentAI/Nymph_8B + grimjim/Llama-3-Oasis-v1-OAS-8B + flammenai/Mahou-1.3b-llama3-8B + lemon07r/Llama-3-RedMagic4-8B + grimjim/Llama-3.1-SuperNova-Lite-lorabilterated-8B + grimjim/Llama-Nephilim-Metamorphosis-v2-8B + lemon07r/Lllama-3-RedElixir-8B + grimjim/Llama-3-Perky-Pat-Instruct-8B + ChaoticNeutrals/Hathor_RP-v.01-L3-8B + grimjim/llama-3-Nephilim-v2.1-8B + ChaoticNeutrals/Hathor_Respawn-L3-8B-v0.8 + migtissera/Llama-3-8B-Synthia-v3.5 + Locutusque/Llama-3-Hercules-5.0-8B + WhiteRabbitNeo/Llama-3-WhiteRabbitNeo-8B-v2.0 + VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct + iRyanBell/ARC1-II + HPAI-BSC/Llama3-Aloe-8B-Alpha + HaitameLaf/Llama-3-8B-StoryGenerator + failspy/Meta-Llama-3-8B-Instruct-abliterated-v3 + Undi95/Llama-3-Unholy-8B + ajibawa-2023/Uncensored-Frank-Llama-3-8B + ajibawa-2023/SlimOrca-Llama-3-8B + ChaoticNeutrals/Templar_v1_8B + aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K + ChaoticNeutrals/Hathor_Tahsin-L3-8B-v0.9 + Blackroot/Llama-3-Gamma-Twist + FPHam/L3-8B-Everything-COT + Blackroot/Llama-3-LongStory + ChaoticNeutrals/Sekhmet_Gimmel-L3.1-8B-v0.3 + abacusai/Llama-3-Smaug-8B + Khetterman/CursedMatrix-8B-v9 + ajibawa-2023/Scarlett-Llama-3-8B-v1.0 + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/physics_non_masked + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/electrical_engineering + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/college_chemistry + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/philosophy_non_masked + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/college_physics + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/philosophy + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/formal_logic + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/philosophy_100 + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/conceptual_physics + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/college_computer_science + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/psychology_non_masked + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/psychology + MrRobotoAI/Unaligned-RP-Base-8b-1024k + Blackroot/Llama3-RP-Lora + MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Llama-3-LimaRP-Instruct-LoRA-8B + MrRobotoAI/Unaligned-RP-Base-8b-1024k + nothingiisreal/llama3-8B-DWP-lora + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/world_religions + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/high_school_european_history + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/electrical_engineering + MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Llama-3-8B-Abomination-LORA + MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Llama-3-LongStory-LORA + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/human_sexuality + MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/sociology + MrRobotoAI/Unaligned-RP-Base-8b-1024k + ResplendentAI/Theory_of_Mind_Llama3 + MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Smarts_Llama3 + MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Llama-3-LongStory-LORA + MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Nimue-8B + MrRobotoAI/Unaligned-RP-Base-8b-1024k + vincentyandex/lora_llama3_chunked_novel_bs128 + MrRobotoAI/Unaligned-RP-Base-8b-1024k + ResplendentAI/Aura_Llama3 + MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/L3-Daybreak-8b-lora + MrRobotoAI/Unaligned-RP-Base-8b-1024k + ResplendentAI/Luna_Llama3 + MrRobotoAI/Unaligned-RP-Base-8b-1024k + nicce/story-mixtral-8x7b-lora + MrRobotoAI/Unaligned-RP-Base-8b-1024k + Blackroot/Llama-3-LongStory-LORA + MrRobotoAI/Unaligned-RP-Base-8b-1024k + ResplendentAI/NoWarning_Llama3 + MrRobotoAI/Unaligned-RP-Base-8b-1024k + ResplendentAI/BlueMoon_Llama3 + overrides: + parameters: + model: Loki-v2.6-8b-1024k.Q4_K_M.gguf + files: + - filename: Loki-v2.6-8b-1024k.Q4_K_M.gguf + sha256: 9b15c1fee0a0e6d6ed97df3d1b6fc8f774e6e1bd388328599e731c62e0f19d81 + uri: huggingface://QuantFactory/Loki-v2.6-8b-1024k-GGUF/Loki-v2.6-8b-1024k.Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" From b86a3e4fa69513cc876dac327e676740306f16dc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 5 Dec 2024 10:05:35 +0100 Subject: [PATCH 005/849] chore(model gallery): add math-iio-7b-instruct (#4323) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index cc40048c..e8e9c9fc 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1791,6 +1791,30 @@ - filename: HomerCreativeAnvita-Mix-Qw7B.Q4_K_M.gguf sha256: a356f279a104bff0bbc2ef7ec136c1e774153de8893bf988083e96fb7f4bc053 uri: huggingface://QuantFactory/HomerCreativeAnvita-Mix-Qw7B-GGUF/HomerCreativeAnvita-Mix-Qw7B.Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "math-iio-7b-instruct" + icon: https://cdn-uploads.huggingface.co/production/uploads/65bb837dbfb878f46c77de4c/faLfR-doaWP_BLUkOQrbq.png + urls: + - https://huggingface.co/prithivMLmods/Math-IIO-7B-Instruct + - https://huggingface.co/QuantFactory/Math-IIO-7B-Instruct-GGUF + description: | + The Math IIO 7B Instruct is a fine-tuned language model based on the robust Qwen2.5-7B-Instruct architecture. This model has been specifically trained to excel in single-shot mathematical reasoning and instruction-based tasks, making it a reliable choice for educational, analytical, and problem-solving applications. + Key Features: + Math-Optimized Capabilities: + The model is designed to handle complex mathematical problems, step-by-step calculations, and reasoning tasks. + + Instruction-Tuned: + Fine-tuned for better adherence to structured queries and task-oriented prompts, enabling clear and concise outputs. + + Large Vocabulary: + Equipped with an extensive tokenizer configuration and custom tokens to ensure precise mathematical notation support. + overrides: + parameters: + model: Math-IIO-7B-Instruct.Q4_K_M.gguf + files: + - filename: Math-IIO-7B-Instruct.Q4_K_M.gguf + sha256: 8ffda0b6a43eb9997dfd7db48fe3bd0970fd1b9b86fb68f082c38622a48b58f4 + uri: huggingface://QuantFactory/Math-IIO-7B-Instruct-GGUF/Math-IIO-7B-Instruct.Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From b90d78d9f6438cd8c90fd901b45539a4b410c264 Mon Sep 17 00:00:00 2001 From: PetrFlegr Date: Thu, 5 Dec 2024 16:06:51 +0100 Subject: [PATCH 006/849] Updated links of yamls (#4324) Updated links Links to deplyment*.yaml was changed Signed-off-by: PetrFlegr --- docs/content/docs/getting-started/kubernetes.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/content/docs/getting-started/kubernetes.md b/docs/content/docs/getting-started/kubernetes.md index fb08b046..aea28f3e 100644 --- a/docs/content/docs/getting-started/kubernetes.md +++ b/docs/content/docs/getting-started/kubernetes.md @@ -10,13 +10,13 @@ ico = "rocket_launch" For installing LocalAI in Kubernetes, the deployment file from the `examples` can be used and customized as prefered: ``` -kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI/master/examples/kubernetes/deployment.yaml +kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI-examples/refs/heads/main/kubernetes/deployment.yaml ``` For Nvidia GPUs: ``` -kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI/master/examples/kubernetes/deployment-nvidia.yaml +kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI-examples/refs/heads/main/kubernetes/deployment-nvidia.yaml ``` Alternatively, the [helm chart](https://github.com/go-skynet/helm-charts) can be used as well: From 3127cd135279f40926d4375bdf3e940b789e2734 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 5 Dec 2024 16:57:56 +0100 Subject: [PATCH 007/849] chore(docs): update available backends (#4325) Signed-off-by: Ettore Di Giacinto --- .../docs/reference/compatibility-table.md | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/docs/content/docs/reference/compatibility-table.md b/docs/content/docs/reference/compatibility-table.md index f76ad85d..c3bf2660 100644 --- a/docs/content/docs/reference/compatibility-table.md +++ b/docs/content/docs/reference/compatibility-table.md @@ -6,7 +6,7 @@ weight = 24 url = "/model-compatibility/" +++ -Besides llama based models, LocalAI is compatible also with other architectures. The table below lists all the compatible models families and the associated binding repository. +Besides llama based models, LocalAI is compatible also with other architectures. The table below lists all the backends, compatible models families and the associated repository. {{% alert note %}} @@ -16,19 +16,8 @@ LocalAI will attempt to automatically load models which are not explicitly confi | Backend and Bindings | Compatible models | Completion/Chat endpoint | Capability | Embeddings support | Token stream support | Acceleration | |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------| -| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}}) | Vicuna, Alpaca, LLaMa, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal | -| [gpt4all-llama](https://github.com/nomic-ai/gpt4all) | Vicuna, Alpaca, LLaMa | yes | GPT | no | yes | N/A | -| [gpt4all-mpt](https://github.com/nomic-ai/gpt4all) | MPT | yes | GPT | no | yes | N/A | -| [gpt4all-j](https://github.com/nomic-ai/gpt4all) | GPT4ALL-J | yes | GPT | no | yes | N/A | -| [falcon-ggml](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Falcon (*) | yes | GPT | no | no | N/A | -| [dolly](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Dolly | yes | GPT | no | no | N/A | -| [gptj](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPTJ | yes | GPT | no | no | N/A | -| [mpt](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | MPT | yes | GPT | no | no | N/A | -| [replit](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Replit | yes | GPT | no | no | N/A | -| [gptneox](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPT NeoX, RedPajama, StableLM | yes | GPT | no | no | N/A | -| [bloomz](https://github.com/NouamaneTazi/bloomz.cpp) ([binding](https://github.com/go-skynet/bloomz.cpp)) | Bloom | yes | GPT | no | no | N/A | -| [rwkv](https://github.com/saharNooby/rwkv.cpp) ([binding](https://github.com/donomii/go-rwkv.cpp)) | rwkv | yes | GPT | no | yes | N/A | -| [bert](https://github.com/skeskinen/bert.cpp) ([binding](https://github.com/go-skynet/go-bert.cpp)) | bert | no | Embeddings only | yes | no | N/A | +| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}}) | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal | +| [llama.cpp's ggml model (backward compatibility with old format, before GGUF)](https://github.com/ggerganov/llama.cpp) ([binding](https://github.com/go-skynet/go-llama.cpp)) | LLama, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal | | [whisper](https://github.com/ggerganov/whisper.cpp) | whisper | no | Audio | no | no | N/A | | [stablediffusion](https://github.com/EdVince/Stable-Diffusion-NCNN) ([binding](https://github.com/mudler/go-stable-diffusion)) | stablediffusion | no | Image | no | no | N/A | | [langchain-huggingface](https://github.com/tmc/langchaingo) | Any text generators available on HuggingFace through API | yes | GPT | no | no | N/A | @@ -40,11 +29,18 @@ LocalAI will attempt to automatically load models which are not explicitly confi | `diffusers` | SD,... | no | Image generation | no | no | N/A | | `vall-e-x` | Vall-E | no | Audio generation and Voice cloning | no | no | CPU/CUDA | | `vllm` | Various GPTs and quantization formats | yes | GPT | no | no | CPU/CUDA | +| `mamba` | Mamba models architecture | yes | GPT | no | no | CPU/CUDA | | `exllama2` | GPTQ | yes | GPT only | no | no | N/A | | `transformers-musicgen` | | no | Audio generation | no | no | N/A | | [tinydream](https://github.com/symisc/tiny-dream#tiny-dreaman-embedded-header-only-stable-diffusion-inference-c-librarypixlabiotiny-dream) | stablediffusion | no | Image | no | no | N/A | | `coqui` | Coqui | no | Audio generation and Voice cloning | no | no | CPU/CUDA | +| `openvoice` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA | +| `parler-tts` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA | +| [rerankers](https://github.com/AnswerDotAI/rerankers) | Reranking API | no | Reranking | no | no | CPU/CUDA | | `transformers` | Various GPTs and quantization formats | yes | GPT, embeddings | yes | yes**** | CPU/CUDA/XPU | +| [bark-cpp](https://github.com/PABannier/bark.cpp) | bark | no | Audio-Only | no | no | yes | +| [stablediffusion-cpp](https://github.com/leejet/stable-diffusion.cpp) | stablediffusion-1, stablediffusion-2, stablediffusion-3, flux, PhotoMaker | no | Image | no | no | N/A | +| [silero-vad](https://github.com/snakers4/silero-vad) with [Golang bindings](https://github.com/streamer45/silero-vad-go) | Silero VAD | no | Voice Activity Detection | no | no | CPU | Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "docs/advanced" %}})). From ba225f660b532e9f51366660b17405913392466a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 5 Dec 2024 22:54:00 +0100 Subject: [PATCH 008/849] docs: :arrow_up: update docs version mudler/LocalAI (#4327) :arrow_up: Update docs version mudler/LocalAI Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 20611657..bb7517a1 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.23.0" + "version": "v2.24.0" } From 88737e1d760e5d6466f98d1d692d6589dcd1ca7a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 6 Dec 2024 09:15:21 +0100 Subject: [PATCH 009/849] chore: :arrow_up: Update ggerganov/llama.cpp to `c9c6e01daedac542b174c235872569fce5385982` (#4328) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c1a6cbcd..225189ad 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=59f4db10883a4f3e855cffbf2c3ab68430e95272 +CPPLLAMA_VERSION?=c9c6e01daedac542b174c235872569fce5385982 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From d4c1746c7db3d13ba97bb9d8a8b698d8a366a0a7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 6 Dec 2024 10:23:59 +0100 Subject: [PATCH 010/849] feat(llama.cpp): expose cache_type_k and cache_type_v for quant of kv cache (#4329) Signed-off-by: Ettore Di Giacinto --- backend/backend.proto | 3 +++ backend/cpp/llama/grpc-server.cpp | 6 ++++++ core/backend/options.go | 2 ++ core/config/backend_config.go | 6 ++++-- 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/backend/backend.proto b/backend/backend.proto index 48b0101b..0a341ca2 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -242,6 +242,9 @@ message ModelOptions { repeated float LoraScales = 61; repeated string Options = 62; + + string CacheTypeKey = 63; + string CacheTypeValue = 64; } message Result { diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 0fde74cb..ea5c4e34 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -2241,6 +2241,12 @@ static void params_parse(const backend::ModelOptions* request, } // params.model_alias ?? params.model_alias = request->modelfile(); + if (!request->cachetypekey().empty()) { + params.cache_type_k = request->cachetypekey(); + } + if (!request->cachetypevalue().empty()) { + params.cache_type_v = request->cachetypevalue(); + } params.n_ctx = request->contextsize(); //params.memory_f16 = request->f16memory(); params.cpuparams.n_threads = request->threads(); diff --git a/core/backend/options.go b/core/backend/options.go index 1f88122f..f6247c60 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -151,6 +151,8 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions { TensorParallelSize: int32(c.TensorParallelSize), MMProj: c.MMProj, FlashAttention: c.FlashAttention, + CacheTypeKey: c.CacheTypeK, + CacheTypeValue: c.CacheTypeV, NoKVOffload: c.NoKVOffloading, YarnExtFactor: c.YarnExtFactor, YarnAttnFactor: c.YarnAttnFactor, diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 1de540f9..0ff34769 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -155,8 +155,10 @@ type LLMConfig struct { TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM MMProj string `yaml:"mmproj"` - FlashAttention bool `yaml:"flash_attention"` - NoKVOffloading bool `yaml:"no_kv_offloading"` + FlashAttention bool `yaml:"flash_attention"` + NoKVOffloading bool `yaml:"no_kv_offloading"` + CacheTypeK string `yaml:"cache_type_k"` + CacheTypeV string `yaml:"cache_type_v"` RopeScaling string `yaml:"rope_scaling"` ModelType string `yaml:"type"` From 5592f5e8206ba0bc8c2a00f760cde1f7b1da2c08 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 6 Dec 2024 22:46:51 +0100 Subject: [PATCH 011/849] chore: :arrow_up: Update ggerganov/llama.cpp to `c5ede3849fc021174862f9c0bf8273808d8f0d39` (#4330) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 225189ad..1ab621cd 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=c9c6e01daedac542b174c235872569fce5385982 +CPPLLAMA_VERSION?=c5ede3849fc021174862f9c0bf8273808d8f0d39 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 7184ca546fc553874441e789ff466de69b4e2b93 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 7 Dec 2024 10:39:20 +0100 Subject: [PATCH 012/849] chore(model gallery): add llama-3.3-70b-instruct (#4333) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index e8e9c9fc..c94358b6 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,4 +1,27 @@ --- +- &llama33 + url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" + icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png + license: llama3.3 + description: | + The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks. + tags: + - llm + - gguf + - gpu + - cpu + - llama3.3 + name: "llama-3.3-70b-instruct" + urls: + - https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct + - https://huggingface.co/MaziyarPanahi/Llama-3.3-70B-Instruct-GGUF + overrides: + parameters: + model: Llama-3.3-70B-Instruct.Q4_K_M.gguf + files: + - filename: Llama-3.3-70B-Instruct.Q4_K_M.gguf + sha256: 4f3b04ecae278bdb0fd545b47c210bc5edf823e5ebf7d41e0b526c81d54b1ff3 + uri: huggingface://MaziyarPanahi/Llama-3.3-70B-Instruct-GGUF/Llama-3.3-70B-Instruct.Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" From f5e1527a5accbab3af6a69a0cbf085ff5e61a8c6 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 7 Dec 2024 22:51:45 +0100 Subject: [PATCH 013/849] chore: :arrow_up: Update ggerganov/llama.cpp to `3573fa8e7b7f0865638b52b4e9b4d2006f0558a2` (#4335) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1ab621cd..786de811 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=c5ede3849fc021174862f9c0bf8273808d8f0d39 +CPPLLAMA_VERSION?=3573fa8e7b7f0865638b52b4e9b4d2006f0558a2 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From cea5a0ea42348f64b982ef7fb64796a86d2bd70e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 8 Dec 2024 13:50:33 +0100 Subject: [PATCH 014/849] feat(template): read jinja templates from gguf files (#4332) * Read jinja templates as fallback Signed-off-by: Ettore Di Giacinto * Move templating out of model loader Signed-off-by: Ettore Di Giacinto * Test TemplateMessages Signed-off-by: Ettore Di Giacinto * Set role and content from transformers Signed-off-by: Ettore Di Giacinto * Tests: be more flexible Signed-off-by: Ettore Di Giacinto * More jinja Signed-off-by: Ettore Di Giacinto * Small refactoring and adaptations Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- core/application.go | 38 --- core/application/application.go | 39 +++ .../config_file_watcher.go | 4 +- core/{startup => application}/startup.go | 77 ++--- core/cli/run.go | 8 +- core/config/backend_config.go | 2 + core/config/guesser.go | 16 +- core/http/app.go | 73 +++-- core/http/app_test.go | 24 +- core/http/endpoints/openai/chat.go | 146 +-------- core/http/endpoints/openai/completion.go | 47 +-- core/http/endpoints/openai/edit.go | 33 +- core/http/routes/localai.go | 48 +-- core/http/routes/openai.go | 154 ++++++--- go.mod | 5 + go.sum | 12 + pkg/model/loader.go | 4 - pkg/model/template.go | 52 --- pkg/model/template_test.go | 197 ------------ pkg/templates/cache.go | 156 ++++++--- pkg/templates/cache_test.go | 73 ----- pkg/templates/evaluator.go | 295 ++++++++++++++++++ pkg/templates/evaluator_test.go | 253 +++++++++++++++ 23 files changed, 971 insertions(+), 785 deletions(-) delete mode 100644 core/application.go create mode 100644 core/application/application.go rename core/{startup => application}/config_file_watcher.go (96%) rename core/{startup => application}/startup.go (62%) delete mode 100644 pkg/model/template.go delete mode 100644 pkg/model/template_test.go delete mode 100644 pkg/templates/cache_test.go create mode 100644 pkg/templates/evaluator.go create mode 100644 pkg/templates/evaluator_test.go diff --git a/core/application.go b/core/application.go deleted file mode 100644 index e4efbdd0..00000000 --- a/core/application.go +++ /dev/null @@ -1,38 +0,0 @@ -package core - -import ( - "github.com/mudler/LocalAI/core/config" - "github.com/mudler/LocalAI/core/services" - "github.com/mudler/LocalAI/pkg/model" -) - -// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy -// Perhaps a proper DI system is worth it in the future, but for now keep things simple. -type Application struct { - - // Application-Level Config - ApplicationConfig *config.ApplicationConfig - // ApplicationState *ApplicationState - - // Core Low-Level Services - BackendConfigLoader *config.BackendConfigLoader - ModelLoader *model.ModelLoader - - // Backend Services - // EmbeddingsBackendService *backend.EmbeddingsBackendService - // ImageGenerationBackendService *backend.ImageGenerationBackendService - // LLMBackendService *backend.LLMBackendService - // TranscriptionBackendService *backend.TranscriptionBackendService - // TextToSpeechBackendService *backend.TextToSpeechBackendService - - // LocalAI System Services - BackendMonitorService *services.BackendMonitorService - GalleryService *services.GalleryService - LocalAIMetricsService *services.LocalAIMetricsService - // OpenAIService *services.OpenAIService -} - -// TODO [NEXT PR?]: Break up ApplicationConfig. -// Migrate over stuff that is not set via config at all - especially runtime stuff -type ApplicationState struct { -} diff --git a/core/application/application.go b/core/application/application.go new file mode 100644 index 00000000..6e8d6204 --- /dev/null +++ b/core/application/application.go @@ -0,0 +1,39 @@ +package application + +import ( + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/pkg/model" + "github.com/mudler/LocalAI/pkg/templates" +) + +type Application struct { + backendLoader *config.BackendConfigLoader + modelLoader *model.ModelLoader + applicationConfig *config.ApplicationConfig + templatesEvaluator *templates.Evaluator +} + +func newApplication(appConfig *config.ApplicationConfig) *Application { + return &Application{ + backendLoader: config.NewBackendConfigLoader(appConfig.ModelPath), + modelLoader: model.NewModelLoader(appConfig.ModelPath), + applicationConfig: appConfig, + templatesEvaluator: templates.NewEvaluator(appConfig.ModelPath), + } +} + +func (a *Application) BackendLoader() *config.BackendConfigLoader { + return a.backendLoader +} + +func (a *Application) ModelLoader() *model.ModelLoader { + return a.modelLoader +} + +func (a *Application) ApplicationConfig() *config.ApplicationConfig { + return a.applicationConfig +} + +func (a *Application) TemplatesEvaluator() *templates.Evaluator { + return a.templatesEvaluator +} diff --git a/core/startup/config_file_watcher.go b/core/application/config_file_watcher.go similarity index 96% rename from core/startup/config_file_watcher.go rename to core/application/config_file_watcher.go index df72483f..46f29b10 100644 --- a/core/startup/config_file_watcher.go +++ b/core/application/config_file_watcher.go @@ -1,4 +1,4 @@ -package startup +package application import ( "encoding/json" @@ -8,8 +8,8 @@ import ( "path/filepath" "time" - "github.com/fsnotify/fsnotify" "dario.cat/mergo" + "github.com/fsnotify/fsnotify" "github.com/mudler/LocalAI/core/config" "github.com/rs/zerolog/log" ) diff --git a/core/startup/startup.go b/core/application/startup.go similarity index 62% rename from core/startup/startup.go rename to core/application/startup.go index 0eb5fa58..cd52d37a 100644 --- a/core/startup/startup.go +++ b/core/application/startup.go @@ -1,15 +1,15 @@ -package startup +package application import ( "fmt" "os" - "github.com/mudler/LocalAI/core" "github.com/mudler/LocalAI/core/backend" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/internal" "github.com/mudler/LocalAI/pkg/assets" + "github.com/mudler/LocalAI/pkg/library" "github.com/mudler/LocalAI/pkg/model" pkgStartup "github.com/mudler/LocalAI/pkg/startup" @@ -17,8 +17,9 @@ import ( "github.com/rs/zerolog/log" ) -func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { +func New(opts ...config.AppOption) (*Application, error) { options := config.NewApplicationConfig(opts...) + application := newApplication(options) log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath) log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion()) @@ -36,28 +37,28 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode // Make sure directories exists if options.ModelPath == "" { - return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty") + return nil, fmt.Errorf("options.ModelPath cannot be empty") } err = os.MkdirAll(options.ModelPath, 0750) if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err) + return nil, fmt.Errorf("unable to create ModelPath: %q", err) } if options.ImageDir != "" { err := os.MkdirAll(options.ImageDir, 0750) if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err) + return nil, fmt.Errorf("unable to create ImageDir: %q", err) } } if options.AudioDir != "" { err := os.MkdirAll(options.AudioDir, 0750) if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err) + return nil, fmt.Errorf("unable to create AudioDir: %q", err) } } if options.UploadDir != "" { err := os.MkdirAll(options.UploadDir, 0750) if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err) + return nil, fmt.Errorf("unable to create UploadDir: %q", err) } } @@ -65,39 +66,36 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode log.Error().Err(err).Msg("error installing models") } - cl := config.NewBackendConfigLoader(options.ModelPath) - ml := model.NewModelLoader(options.ModelPath) - configLoaderOpts := options.ToConfigLoaderOptions() - if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil { + if err := application.BackendLoader().LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil { log.Error().Err(err).Msg("error loading config files") } if options.ConfigFile != "" { - if err := cl.LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil { + if err := application.BackendLoader().LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil { log.Error().Err(err).Msg("error loading config file") } } - if err := cl.Preload(options.ModelPath); err != nil { + if err := application.BackendLoader().Preload(options.ModelPath); err != nil { log.Error().Err(err).Msg("error downloading models") } if options.PreloadJSONModels != "" { if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil { - return nil, nil, nil, err + return nil, err } } if options.PreloadModelsFromPath != "" { if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil { - return nil, nil, nil, err + return nil, err } } if options.Debug { - for _, v := range cl.GetAllBackendConfigs() { + for _, v := range application.BackendLoader().GetAllBackendConfigs() { log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v) } } @@ -123,7 +121,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode go func() { <-options.Context.Done() log.Debug().Msgf("Context canceled, shutting down") - err := ml.StopAllGRPC() + err := application.ModelLoader().StopAllGRPC() if err != nil { log.Error().Err(err).Msg("error while stopping all grpc backends") } @@ -131,12 +129,12 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode if options.WatchDog { wd := model.NewWatchDog( - ml, + application.ModelLoader(), options.WatchDogBusyTimeout, options.WatchDogIdleTimeout, options.WatchDogBusy, options.WatchDogIdle) - ml.SetWatchDog(wd) + application.ModelLoader().SetWatchDog(wd) go wd.Run() go func() { <-options.Context.Done() @@ -147,7 +145,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode if options.LoadToMemory != nil { for _, m := range options.LoadToMemory { - cfg, err := cl.LoadBackendConfigFileByName(m, options.ModelPath, + cfg, err := application.BackendLoader().LoadBackendConfigFileByName(m, options.ModelPath, config.LoadOptionDebug(options.Debug), config.LoadOptionThreads(options.Threads), config.LoadOptionContextSize(options.ContextSize), @@ -155,7 +153,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode config.ModelPath(options.ModelPath), ) if err != nil { - return nil, nil, nil, err + return nil, err } log.Debug().Msgf("Auto loading model %s into memory from file: %s", m, cfg.Model) @@ -163,9 +161,9 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode o := backend.ModelOptions(*cfg, options) var backendErr error - _, backendErr = ml.Load(o...) + _, backendErr = application.ModelLoader().Load(o...) if backendErr != nil { - return nil, nil, nil, err + return nil, err } } } @@ -174,7 +172,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode startWatcher(options) log.Info().Msg("core/startup process completed!") - return cl, ml, options, nil + return application, nil } func startWatcher(options *config.ApplicationConfig) { @@ -201,32 +199,3 @@ func startWatcher(options *config.ApplicationConfig) { log.Error().Err(err).Msg("failed creating watcher") } } - -// In Lieu of a proper DI framework, this function wires up the Application manually. -// This is in core/startup rather than core/state.go to keep package references clean! -func createApplication(appConfig *config.ApplicationConfig) *core.Application { - app := &core.Application{ - ApplicationConfig: appConfig, - BackendConfigLoader: config.NewBackendConfigLoader(appConfig.ModelPath), - ModelLoader: model.NewModelLoader(appConfig.ModelPath), - } - - var err error - - // app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - // app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - // app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - // app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - // app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - - app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - app.GalleryService = services.NewGalleryService(app.ApplicationConfig) - // app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService) - - app.LocalAIMetricsService, err = services.NewLocalAIMetricsService() - if err != nil { - log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.") - } - - return app -} diff --git a/core/cli/run.go b/core/cli/run.go index b2d439a0..a0e16155 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -6,12 +6,12 @@ import ( "strings" "time" + "github.com/mudler/LocalAI/core/application" cli_api "github.com/mudler/LocalAI/core/cli/api" cliContext "github.com/mudler/LocalAI/core/cli/context" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/http" "github.com/mudler/LocalAI/core/p2p" - "github.com/mudler/LocalAI/core/startup" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) @@ -186,16 +186,16 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { } if r.PreloadBackendOnly { - _, _, _, err := startup.Startup(opts...) + _, err := application.New(opts...) return err } - cl, ml, options, err := startup.Startup(opts...) + app, err := application.New(opts...) if err != nil { return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) } - appHTTP, err := http.App(cl, ml, options) + appHTTP, err := http.API(app) if err != nil { log.Error().Err(err).Msg("error during HTTP App construction") return err diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 0ff34769..f07ec3d3 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -206,6 +206,8 @@ type TemplateConfig struct { JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"` Multimodal string `yaml:"multimodal"` + + JinjaTemplate bool `yaml:"jinja_template"` } func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error { diff --git a/core/config/guesser.go b/core/config/guesser.go index b63dd051..f5627461 100644 --- a/core/config/guesser.go +++ b/core/config/guesser.go @@ -26,14 +26,14 @@ const ( type settingsConfig struct { StopWords []string TemplateConfig TemplateConfig - RepeatPenalty float64 + RepeatPenalty float64 } // default settings to adopt with a given model family var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{ Gemma: { RepeatPenalty: 1.0, - StopWords: []string{"<|im_end|>", "", ""}, + StopWords: []string{"<|im_end|>", "", ""}, TemplateConfig: TemplateConfig{ Chat: "{{.Input }}\nmodel\n", ChatMessage: "{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}", @@ -200,6 +200,18 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) { } else { log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family") } + + if cfg.HasTemplate() { + return + } + + // identify from well known templates first, otherwise use the raw jinja template + chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template") + if found { + // try to use the jinja template + cfg.TemplateConfig.JinjaTemplate = true + cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString() + } } func identifyFamily(f *gguf.GGUFFile) familyType { diff --git a/core/http/app.go b/core/http/app.go index 2ba2c2b9..a2d8b87a 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -14,10 +14,9 @@ import ( "github.com/mudler/LocalAI/core/http/middleware" "github.com/mudler/LocalAI/core/http/routes" - "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/application" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/services" - "github.com/mudler/LocalAI/pkg/model" "github.com/gofiber/contrib/fiberzerolog" "github.com/gofiber/fiber/v2" @@ -49,18 +48,18 @@ var embedDirStatic embed.FS // @in header // @name Authorization -func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) { +func API(application *application.Application) (*fiber.App, error) { fiberCfg := fiber.Config{ Views: renderEngine(), - BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB + BodyLimit: application.ApplicationConfig().UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB // We disable the Fiber startup message as it does not conform to structured logging. // We register a startup log line with connection information in the OnListen hook to keep things user friendly though DisableStartupMessage: true, // Override default error handler } - if !appConfig.OpaqueErrors { + if !application.ApplicationConfig().OpaqueErrors { // Normally, return errors as JSON responses fiberCfg.ErrorHandler = func(ctx *fiber.Ctx, err error) error { // Status code defaults to 500 @@ -86,9 +85,9 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi } } - app := fiber.New(fiberCfg) + router := fiber.New(fiberCfg) - app.Hooks().OnListen(func(listenData fiber.ListenData) error { + router.Hooks().OnListen(func(listenData fiber.ListenData) error { scheme := "http" if listenData.TLS { scheme = "https" @@ -99,82 +98,82 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi // Have Fiber use zerolog like the rest of the application rather than it's built-in logger logger := log.Logger - app.Use(fiberzerolog.New(fiberzerolog.Config{ + router.Use(fiberzerolog.New(fiberzerolog.Config{ Logger: &logger, })) // Default middleware config - if !appConfig.Debug { - app.Use(recover.New()) + if !application.ApplicationConfig().Debug { + router.Use(recover.New()) } - if !appConfig.DisableMetrics { + if !application.ApplicationConfig().DisableMetrics { metricsService, err := services.NewLocalAIMetricsService() if err != nil { return nil, err } if metricsService != nil { - app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService)) - app.Hooks().OnShutdown(func() error { + router.Use(localai.LocalAIMetricsAPIMiddleware(metricsService)) + router.Hooks().OnShutdown(func() error { return metricsService.Shutdown() }) } } // Health Checks should always be exempt from auth, so register these first - routes.HealthRoutes(app) + routes.HealthRoutes(router) - kaConfig, err := middleware.GetKeyAuthConfig(appConfig) + kaConfig, err := middleware.GetKeyAuthConfig(application.ApplicationConfig()) if err != nil || kaConfig == nil { return nil, fmt.Errorf("failed to create key auth config: %w", err) } // Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration - app.Use(v2keyauth.New(*kaConfig)) + router.Use(v2keyauth.New(*kaConfig)) - if appConfig.CORS { + if application.ApplicationConfig().CORS { var c func(ctx *fiber.Ctx) error - if appConfig.CORSAllowOrigins == "" { + if application.ApplicationConfig().CORSAllowOrigins == "" { c = cors.New() } else { - c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins}) + c = cors.New(cors.Config{AllowOrigins: application.ApplicationConfig().CORSAllowOrigins}) } - app.Use(c) + router.Use(c) } - if appConfig.CSRF { + if application.ApplicationConfig().CSRF { log.Debug().Msg("Enabling CSRF middleware. Tokens are now required for state-modifying requests") - app.Use(csrf.New()) + router.Use(csrf.New()) } // Load config jsons - utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles) - utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) - utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) + utils.LoadConfig(application.ApplicationConfig().UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles) + utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) + utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) - galleryService := services.NewGalleryService(appConfig) - galleryService.Start(appConfig.Context, cl) + galleryService := services.NewGalleryService(application.ApplicationConfig()) + galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader()) - routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig) - routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService) - routes.RegisterOpenAIRoutes(app, cl, ml, appConfig) - if !appConfig.DisableWebUI { - routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService) + routes.RegisterElevenLabsRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()) + routes.RegisterLocalAIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService) + routes.RegisterOpenAIRoutes(router, application) + if !application.ApplicationConfig().DisableWebUI { + routes.RegisterUIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService) } - routes.RegisterJINARoutes(app, cl, ml, appConfig) + routes.RegisterJINARoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()) httpFS := http.FS(embedDirStatic) - app.Use(favicon.New(favicon.Config{ + router.Use(favicon.New(favicon.Config{ URL: "/favicon.ico", FileSystem: httpFS, File: "static/favicon.ico", })) - app.Use("/static", filesystem.New(filesystem.Config{ + router.Use("/static", filesystem.New(filesystem.Config{ Root: httpFS, PathPrefix: "static", Browse: true, @@ -182,7 +181,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi // Define a custom 404 handler // Note: keep this at the bottom! - app.Use(notFoundHandler) + router.Use(notFoundHandler) - return app, nil + return router, nil } diff --git a/core/http/app_test.go b/core/http/app_test.go index 83fb0e73..34ebacf7 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -12,15 +12,14 @@ import ( "path/filepath" "runtime" + "github.com/mudler/LocalAI/core/application" "github.com/mudler/LocalAI/core/config" . "github.com/mudler/LocalAI/core/http" "github.com/mudler/LocalAI/core/schema" - "github.com/mudler/LocalAI/core/startup" "github.com/gofiber/fiber/v2" "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/pkg/downloader" - "github.com/mudler/LocalAI/pkg/model" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "gopkg.in/yaml.v3" @@ -252,9 +251,6 @@ var _ = Describe("API test", func() { var cancel context.CancelFunc var tmpdir string var modelDir string - var bcl *config.BackendConfigLoader - var ml *model.ModelLoader - var applicationConfig *config.ApplicationConfig commonOpts := []config.AppOption{ config.WithDebug(true), @@ -300,7 +296,7 @@ var _ = Describe("API test", func() { }, } - bcl, ml, applicationConfig, err = startup.Startup( + application, err := application.New( append(commonOpts, config.WithContext(c), config.WithGalleries(galleries), @@ -310,7 +306,7 @@ var _ = Describe("API test", func() { config.WithBackendAssetsOutput(backendAssetsDir))...) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = API(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -539,7 +535,7 @@ var _ = Describe("API test", func() { var res map[string]string err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) Expect(err).ToNot(HaveOccurred()) - Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res)) + Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res)) Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason)) @@ -641,7 +637,7 @@ var _ = Describe("API test", func() { }, } - bcl, ml, applicationConfig, err = startup.Startup( + application, err := application.New( append(commonOpts, config.WithContext(c), config.WithAudioDir(tmpdir), @@ -652,7 +648,7 @@ var _ = Describe("API test", func() { config.WithBackendAssetsOutput(tmpdir))..., ) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = API(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -772,14 +768,14 @@ var _ = Describe("API test", func() { var err error - bcl, ml, applicationConfig, err = startup.Startup( + application, err := application.New( append(commonOpts, config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")), config.WithContext(c), config.WithModelPath(modelPath), )...) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = API(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -990,14 +986,14 @@ var _ = Describe("API test", func() { c, cancel = context.WithCancel(context.Background()) var err error - bcl, ml, applicationConfig, err = startup.Startup( + application, err := application.New( append(commonOpts, config.WithContext(c), config.WithModelPath(modelPath), config.WithConfigFile(os.Getenv("CONFIG_FILE")))..., ) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = API(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index b03b18bd..21e71d35 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -14,6 +14,8 @@ import ( "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/functions" + "github.com/mudler/LocalAI/pkg/templates" + model "github.com/mudler/LocalAI/pkg/model" "github.com/rs/zerolog/log" "github.com/valyala/fasthttp" @@ -24,7 +26,7 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/chat/completions [post] -func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error { +func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error { var id, textContentToReturn string var created int @@ -298,148 +300,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup // If we are using the tokenizer template, we don't need to process the messages // unless we are processing functions if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn { - suppressConfigSystemPrompt := false - mess := []string{} - for messageIndex, i := range input.Messages { - var content string - role := i.Role - - // if function call, we might want to customize the role so we can display better that the "assistant called a json action" - // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request - if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { - roleFn := "assistant_function_call" - r := config.Roles[roleFn] - if r != "" { - role = roleFn - } - } - r := config.Roles[role] - contentExists := i.Content != nil && i.StringContent != "" - - fcall := i.FunctionCall - if len(i.ToolCalls) > 0 { - fcall = i.ToolCalls - } - - // First attempt to populate content via a chat message specific template - if config.TemplateConfig.ChatMessage != "" { - chatMessageData := model.ChatMessageTemplateData{ - SystemPrompt: config.SystemPrompt, - Role: r, - RoleName: role, - Content: i.StringContent, - FunctionCall: fcall, - FunctionName: i.Name, - LastMessage: messageIndex == (len(input.Messages) - 1), - Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)), - MessageIndex: messageIndex, - } - templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) - if err != nil { - log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping") - } else { - if templatedChatMessage == "" { - log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData) - continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf - } - log.Debug().Msgf("templated message for chat: %s", templatedChatMessage) - content = templatedChatMessage - } - } - - marshalAnyRole := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + fmt.Sprint(r, " ", string(j)) - } else { - content = fmt.Sprint(r, " ", string(j)) - } - } - } - marshalAny := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + string(j) - } else { - content = string(j) - } - } - } - // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. - if content == "" { - if r != "" { - if contentExists { - content = fmt.Sprint(r, i.StringContent) - } - - if i.FunctionCall != nil { - marshalAnyRole(i.FunctionCall) - } - if i.ToolCalls != nil { - marshalAnyRole(i.ToolCalls) - } - } else { - if contentExists { - content = fmt.Sprint(i.StringContent) - } - if i.FunctionCall != nil { - marshalAny(i.FunctionCall) - } - if i.ToolCalls != nil { - marshalAny(i.ToolCalls) - } - } - // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately - if contentExists && role == "system" { - suppressConfigSystemPrompt = true - } - } - - mess = append(mess, content) - } - - joinCharacter := "\n" - if config.TemplateConfig.JoinChatMessagesByCharacter != nil { - joinCharacter = *config.TemplateConfig.JoinChatMessagesByCharacter - } - - predInput = strings.Join(mess, joinCharacter) - log.Debug().Msgf("Prompt (before templating): %s", predInput) - - templateFile := "" - - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { - templateFile = config.Model - } - - if config.TemplateConfig.Chat != "" && !shouldUseFn { - templateFile = config.TemplateConfig.Chat - } - - if config.TemplateConfig.Functions != "" && shouldUseFn { - templateFile = config.TemplateConfig.Functions - } - - if templateFile != "" { - templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ - SystemPrompt: config.SystemPrompt, - SuppressSystemPrompt: suppressConfigSystemPrompt, - Input: predInput, - Functions: funcs, - }) - if err == nil { - predInput = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", predInput) - } else { - log.Debug().Msgf("Template failed loading: %s", err.Error()) - } - } + predInput = evaluator.TemplateMessages(input.Messages, config, funcs, shouldUseFn) log.Debug().Msgf("Prompt (after templating): %s", predInput) - if shouldUseFn && config.Grammar != "" { + if config.Grammar != "" { log.Debug().Msgf("Grammar: %+v", config.Grammar) } } diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index e5de1b3f..04ebc847 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -16,6 +16,7 @@ import ( "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/functions" model "github.com/mudler/LocalAI/pkg/model" + "github.com/mudler/LocalAI/pkg/templates" "github.com/rs/zerolog/log" "github.com/valyala/fasthttp" ) @@ -25,7 +26,7 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/completions [post] -func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { id := uuid.New().String() created := int(time.Now().Unix()) @@ -94,17 +95,6 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a c.Set("Transfer-Encoding", "chunked") } - templateFile := "" - - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { - templateFile = config.Model - } - - if config.TemplateConfig.Completion != "" { - templateFile = config.TemplateConfig.Completion - } - if input.Stream { if len(config.PromptStrings) > 1 { return errors.New("cannot handle more than 1 `PromptStrings` when Streaming") @@ -112,15 +102,13 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a predInput := config.PromptStrings[0] - if templateFile != "" { - templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ - Input: predInput, - SystemPrompt: config.SystemPrompt, - }) - if err == nil { - predInput = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", predInput) - } + templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{ + Input: predInput, + SystemPrompt: config.SystemPrompt, + }) + if err == nil { + predInput = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", predInput) } responses := make(chan schema.OpenAIResponse) @@ -165,16 +153,13 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a totalTokenUsage := backend.TokenUsage{} for k, i := range config.PromptStrings { - if templateFile != "" { - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ - SystemPrompt: config.SystemPrompt, - Input: i, - }) - if err == nil { - i = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", i) - } + templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{ + SystemPrompt: config.SystemPrompt, + Input: i, + }) + if err == nil { + i = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", i) } r, tokenUsage, err := ComputeChoices( diff --git a/core/http/endpoints/openai/edit.go b/core/http/endpoints/openai/edit.go index 12fb4035..a6d609fb 100644 --- a/core/http/endpoints/openai/edit.go +++ b/core/http/endpoints/openai/edit.go @@ -12,6 +12,7 @@ import ( "github.com/google/uuid" "github.com/mudler/LocalAI/core/schema" model "github.com/mudler/LocalAI/pkg/model" + "github.com/mudler/LocalAI/pkg/templates" "github.com/rs/zerolog/log" ) @@ -21,7 +22,8 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/edits [post] -func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { modelFile, input, err := readRequest(c, cl, ml, appConfig, true) if err != nil { @@ -35,31 +37,18 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConf log.Debug().Msgf("Parameter Config: %+v", config) - templateFile := "" - - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { - templateFile = config.Model - } - - if config.TemplateConfig.Edit != "" { - templateFile = config.TemplateConfig.Edit - } - var result []schema.Choice totalTokenUsage := backend.TokenUsage{} for _, i := range config.InputStrings { - if templateFile != "" { - templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{ - Input: i, - Instruction: input.Instruction, - SystemPrompt: config.SystemPrompt, - }) - if err == nil { - i = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", i) - } + templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.EditPromptTemplate, *config, templates.PromptTemplateData{ + Input: i, + Instruction: input.Instruction, + SystemPrompt: config.SystemPrompt, + }) + if err == nil { + i = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", i) } r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) { diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index e7097741..2ea9896a 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -11,62 +11,62 @@ import ( "github.com/mudler/LocalAI/pkg/model" ) -func RegisterLocalAIRoutes(app *fiber.App, +func RegisterLocalAIRoutes(router *fiber.App, cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService) { - app.Get("/swagger/*", swagger.HandlerDefault) // default + router.Get("/swagger/*", swagger.HandlerDefault) // default // LocalAI API endpoints if !appConfig.DisableGalleryEndpoint { modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) - app.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint()) - app.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint()) + router.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint()) + router.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint()) - app.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint()) - app.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint()) - app.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint()) - app.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint()) - app.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint()) - app.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint()) + router.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint()) + router.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint()) + router.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint()) + router.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint()) + router.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint()) + router.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint()) } - app.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig)) - app.Post("/vad", localai.VADEndpoint(cl, ml, appConfig)) + router.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig)) + router.Post("/vad", localai.VADEndpoint(cl, ml, appConfig)) // Stores sl := model.NewModelLoader("") - app.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig)) - app.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig)) - app.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig)) - app.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig)) + router.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig)) + router.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig)) + router.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig)) + router.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig)) if !appConfig.DisableMetrics { - app.Get("/metrics", localai.LocalAIMetricsEndpoint()) + router.Get("/metrics", localai.LocalAIMetricsEndpoint()) } // Experimental Backend Statistics Module backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now - app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService)) - app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService)) + router.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService)) + router.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService)) // p2p if p2p.IsP2PEnabled() { - app.Get("/api/p2p", localai.ShowP2PNodes(appConfig)) - app.Get("/api/p2p/token", localai.ShowP2PToken(appConfig)) + router.Get("/api/p2p", localai.ShowP2PNodes(appConfig)) + router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig)) } - app.Get("/version", func(c *fiber.Ctx) error { + router.Get("/version", func(c *fiber.Ctx) error { return c.JSON(struct { Version string `json:"version"` }{Version: internal.PrintableVersion()}) }) - app.Get("/system", localai.SystemInformations(ml, appConfig)) + router.Get("/system", localai.SystemInformations(ml, appConfig)) // misc - app.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig)) + router.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig)) } diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go index 081daf70..5ff301b6 100644 --- a/core/http/routes/openai.go +++ b/core/http/routes/openai.go @@ -2,84 +2,134 @@ package routes import ( "github.com/gofiber/fiber/v2" - "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/application" "github.com/mudler/LocalAI/core/http/endpoints/localai" "github.com/mudler/LocalAI/core/http/endpoints/openai" - "github.com/mudler/LocalAI/pkg/model" ) func RegisterOpenAIRoutes(app *fiber.App, - cl *config.BackendConfigLoader, - ml *model.ModelLoader, - appConfig *config.ApplicationConfig) { + application *application.Application) { // openAI compatible API endpoint // chat - app.Post("/v1/chat/completions", openai.ChatEndpoint(cl, ml, appConfig)) - app.Post("/chat/completions", openai.ChatEndpoint(cl, ml, appConfig)) + app.Post("/v1/chat/completions", + openai.ChatEndpoint( + application.BackendLoader(), + application.ModelLoader(), + application.TemplatesEvaluator(), + application.ApplicationConfig(), + ), + ) + + app.Post("/chat/completions", + openai.ChatEndpoint( + application.BackendLoader(), + application.ModelLoader(), + application.TemplatesEvaluator(), + application.ApplicationConfig(), + ), + ) // edit - app.Post("/v1/edits", openai.EditEndpoint(cl, ml, appConfig)) - app.Post("/edits", openai.EditEndpoint(cl, ml, appConfig)) + app.Post("/v1/edits", + openai.EditEndpoint( + application.BackendLoader(), + application.ModelLoader(), + application.TemplatesEvaluator(), + application.ApplicationConfig(), + ), + ) + + app.Post("/edits", + openai.EditEndpoint( + application.BackendLoader(), + application.ModelLoader(), + application.TemplatesEvaluator(), + application.ApplicationConfig(), + ), + ) // assistant - app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig)) - app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig)) - app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig)) - app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig)) - app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig)) - app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) - app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) - app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) - app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/v1/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) // files - app.Post("/v1/files", openai.UploadFilesEndpoint(cl, appConfig)) - app.Post("/files", openai.UploadFilesEndpoint(cl, appConfig)) - app.Get("/v1/files", openai.ListFilesEndpoint(cl, appConfig)) - app.Get("/files", openai.ListFilesEndpoint(cl, appConfig)) - app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(cl, appConfig)) - app.Get("/files/:file_id", openai.GetFilesEndpoint(cl, appConfig)) - app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig)) - app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig)) - app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig)) - app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig)) + app.Post("/v1/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Post("/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Get("/v1/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Get("/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Get("/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig())) // completion - app.Post("/v1/completions", openai.CompletionEndpoint(cl, ml, appConfig)) - app.Post("/completions", openai.CompletionEndpoint(cl, ml, appConfig)) - app.Post("/v1/engines/:model/completions", openai.CompletionEndpoint(cl, ml, appConfig)) + app.Post("/v1/completions", + openai.CompletionEndpoint( + application.BackendLoader(), + application.ModelLoader(), + application.TemplatesEvaluator(), + application.ApplicationConfig(), + ), + ) + + app.Post("/completions", + openai.CompletionEndpoint( + application.BackendLoader(), + application.ModelLoader(), + application.TemplatesEvaluator(), + application.ApplicationConfig(), + ), + ) + + app.Post("/v1/engines/:model/completions", + openai.CompletionEndpoint( + application.BackendLoader(), + application.ModelLoader(), + application.TemplatesEvaluator(), + application.ApplicationConfig(), + ), + ) // embeddings - app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig)) - app.Post("/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig)) - app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig)) + app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) // audio - app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(cl, ml, appConfig)) - app.Post("/v1/audio/speech", localai.TTSEndpoint(cl, ml, appConfig)) + app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/v1/audio/speech", localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) // images - app.Post("/v1/images/generations", openai.ImageEndpoint(cl, ml, appConfig)) + app.Post("/v1/images/generations", openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) - if appConfig.ImageDir != "" { - app.Static("/generated-images", appConfig.ImageDir) + if application.ApplicationConfig().ImageDir != "" { + app.Static("/generated-images", application.ApplicationConfig().ImageDir) } - if appConfig.AudioDir != "" { - app.Static("/generated-audio", appConfig.AudioDir) + if application.ApplicationConfig().AudioDir != "" { + app.Static("/generated-audio", application.ApplicationConfig().AudioDir) } // List models - app.Get("/v1/models", openai.ListModelsEndpoint(cl, ml)) - app.Get("/models", openai.ListModelsEndpoint(cl, ml)) + app.Get("/v1/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader())) + app.Get("/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader())) } diff --git a/go.mod b/go.mod index 3bc625ac..e9bcf3ec 100644 --- a/go.mod +++ b/go.mod @@ -76,6 +76,7 @@ require ( cloud.google.com/go/auth v0.4.1 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect cloud.google.com/go/compute/metadata v0.3.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/envoyproxy/protoc-gen-validate v1.0.4 // indirect github.com/fasthttp/websocket v1.5.3 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect @@ -84,8 +85,12 @@ require ( github.com/google/s2a-go v0.1.7 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect github.com/googleapis/gax-go/v2 v2.12.4 // indirect + github.com/json-iterator/go v1.1.12 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/nikolalohinski/gonja/v2 v2.3.2 // indirect github.com/pion/datachannel v1.5.8 // indirect github.com/pion/dtls/v2 v2.2.12 // indirect github.com/pion/ice/v2 v2.3.34 // indirect diff --git a/go.sum b/go.sum index 11b87fa9..f1628f7a 100644 --- a/go.sum +++ b/go.sum @@ -140,6 +140,8 @@ github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/uo= github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= @@ -268,6 +270,7 @@ github.com/google/go-containerregistry v0.19.2 h1:TannFKE1QSajsP6hPWb5oJNgKe1IKj github.com/google/go-containerregistry v0.19.2/go.mod h1:YCMFNQeeXeLF+dnhhWkqDItx/JSkH01j1Kis4PsjzFI= github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF8= github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= @@ -353,6 +356,8 @@ github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwA github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= @@ -474,8 +479,12 @@ github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5 github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo= github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= @@ -519,6 +528,9 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJEU3ofeGjhHklVoIGuVj85JJwZ6kWPaJwCIxgnFmo= github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM= +github.com/nikolalohinski/gonja v1.5.3 h1:GsA+EEaZDZPGJ8JtpeGN78jidhOlxeJROpqMT9fTj9c= +github.com/nikolalohinski/gonja/v2 v2.3.2 h1:UgLFfqi7L9XfX0PEcE4eUpvGojVQL5KhBfJJaBp7ZxY= +github.com/nikolalohinski/gonja/v2 v2.3.2/go.mod h1:1Wcc/5huTu6y36e0sOFR1XQoFlylw3c3H3L5WOz0RDg= github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ= github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY= diff --git a/pkg/model/loader.go b/pkg/model/loader.go index b32e3745..d62f52b2 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -9,8 +9,6 @@ import ( "sync" "time" - "github.com/mudler/LocalAI/pkg/templates" - "github.com/mudler/LocalAI/pkg/utils" "github.com/rs/zerolog/log" @@ -23,7 +21,6 @@ type ModelLoader struct { ModelPath string mu sync.Mutex models map[string]*Model - templates *templates.TemplateCache wd *WatchDog } @@ -31,7 +28,6 @@ func NewModelLoader(modelPath string) *ModelLoader { nml := &ModelLoader{ ModelPath: modelPath, models: make(map[string]*Model), - templates: templates.NewTemplateCache(modelPath), } return nml diff --git a/pkg/model/template.go b/pkg/model/template.go deleted file mode 100644 index 3dc850cf..00000000 --- a/pkg/model/template.go +++ /dev/null @@ -1,52 +0,0 @@ -package model - -import ( - "fmt" - - "github.com/mudler/LocalAI/pkg/functions" - "github.com/mudler/LocalAI/pkg/templates" -) - -// Rather than pass an interface{} to the prompt template: -// These are the definitions of all possible variables LocalAI will currently populate for use in a prompt template file -// Please note: Not all of these are populated on every endpoint - your template should either be tested for each endpoint you map it to, or tolerant of zero values. -type PromptTemplateData struct { - SystemPrompt string - SuppressSystemPrompt bool // used by chat specifically to indicate that SystemPrompt above should be _ignored_ - Input string - Instruction string - Functions []functions.Function - MessageIndex int -} - -type ChatMessageTemplateData struct { - SystemPrompt string - Role string - RoleName string - FunctionName string - Content string - MessageIndex int - Function bool - FunctionCall interface{} - LastMessage bool -} - -const ( - ChatPromptTemplate templates.TemplateType = iota - ChatMessageTemplate - CompletionPromptTemplate - EditPromptTemplate - FunctionsPromptTemplate -) - -func (ml *ModelLoader) EvaluateTemplateForPrompt(templateType templates.TemplateType, templateName string, in PromptTemplateData) (string, error) { - // TODO: should this check be improved? - if templateType == ChatMessageTemplate { - return "", fmt.Errorf("invalid templateType: ChatMessage") - } - return ml.templates.EvaluateTemplate(templateType, templateName, in) -} - -func (ml *ModelLoader) EvaluateTemplateForChatMessage(templateName string, messageData ChatMessageTemplateData) (string, error) { - return ml.templates.EvaluateTemplate(ChatMessageTemplate, templateName, messageData) -} diff --git a/pkg/model/template_test.go b/pkg/model/template_test.go deleted file mode 100644 index 1142ed0c..00000000 --- a/pkg/model/template_test.go +++ /dev/null @@ -1,197 +0,0 @@ -package model_test - -import ( - . "github.com/mudler/LocalAI/pkg/model" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} -{{- if .FunctionCall }} - -{{- else if eq .RoleName "tool" }} - -{{- end }} -{{- if .Content}} -{{.Content }} -{{- end }} -{{- if .FunctionCall}} -{{toJson .FunctionCall}} -{{- end }} -{{- if .FunctionCall }} - -{{- else if eq .RoleName "tool" }} - -{{- end }}<|im_end|>` - -const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> - -{{ if .FunctionCall -}} -Function call: -{{ else if eq .RoleName "tool" -}} -Function response: -{{ end -}} -{{ if .Content -}} -{{.Content -}} -{{ else if .FunctionCall -}} -{{ toJson .FunctionCall -}} -{{ end -}} -<|eot_id|>` - -var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ - "user": { - "template": llama3, - "expected": "<|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>", - "data": ChatMessageTemplateData{ - SystemPrompt: "", - Role: "user", - RoleName: "user", - Content: "A long time ago in a galaxy far, far away...", - FunctionCall: nil, - FunctionName: "", - LastMessage: false, - Function: false, - MessageIndex: 0, - }, - }, - "assistant": { - "template": llama3, - "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>", - "data": ChatMessageTemplateData{ - SystemPrompt: "", - Role: "assistant", - RoleName: "assistant", - Content: "A long time ago in a galaxy far, far away...", - FunctionCall: nil, - FunctionName: "", - LastMessage: false, - Function: false, - MessageIndex: 0, - }, - }, - "function_call": { - "template": llama3, - "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nFunction call:\n{\"function\":\"test\"}<|eot_id|>", - "data": ChatMessageTemplateData{ - SystemPrompt: "", - Role: "assistant", - RoleName: "assistant", - Content: "", - FunctionCall: map[string]string{"function": "test"}, - FunctionName: "", - LastMessage: false, - Function: false, - MessageIndex: 0, - }, - }, - "function_response": { - "template": llama3, - "expected": "<|start_header_id|>tool<|end_header_id|>\n\nFunction response:\nResponse from tool<|eot_id|>", - "data": ChatMessageTemplateData{ - SystemPrompt: "", - Role: "tool", - RoleName: "tool", - Content: "Response from tool", - FunctionCall: nil, - FunctionName: "", - LastMessage: false, - Function: false, - MessageIndex: 0, - }, - }, -} - -var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ - "user": { - "template": chatML, - "expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...<|im_end|>", - "data": ChatMessageTemplateData{ - SystemPrompt: "", - Role: "user", - RoleName: "user", - Content: "A long time ago in a galaxy far, far away...", - FunctionCall: nil, - FunctionName: "", - LastMessage: false, - Function: false, - MessageIndex: 0, - }, - }, - "assistant": { - "template": chatML, - "expected": "<|im_start|>assistant\nA long time ago in a galaxy far, far away...<|im_end|>", - "data": ChatMessageTemplateData{ - SystemPrompt: "", - Role: "assistant", - RoleName: "assistant", - Content: "A long time ago in a galaxy far, far away...", - FunctionCall: nil, - FunctionName: "", - LastMessage: false, - Function: false, - MessageIndex: 0, - }, - }, - "function_call": { - "template": chatML, - "expected": "<|im_start|>assistant\n\n{\"function\":\"test\"}\n<|im_end|>", - "data": ChatMessageTemplateData{ - SystemPrompt: "", - Role: "assistant", - RoleName: "assistant", - Content: "", - FunctionCall: map[string]string{"function": "test"}, - FunctionName: "", - LastMessage: false, - Function: false, - MessageIndex: 0, - }, - }, - "function_response": { - "template": chatML, - "expected": "<|im_start|>tool\n\nResponse from tool\n<|im_end|>", - "data": ChatMessageTemplateData{ - SystemPrompt: "", - Role: "tool", - RoleName: "tool", - Content: "Response from tool", - FunctionCall: nil, - FunctionName: "", - LastMessage: false, - Function: false, - MessageIndex: 0, - }, - }, -} - -var _ = Describe("Templates", func() { - Context("chat message ChatML", func() { - var modelLoader *ModelLoader - BeforeEach(func() { - modelLoader = NewModelLoader("") - }) - for key := range chatMLTestMatch { - foo := chatMLTestMatch[key] - It("renders correctly `"+key+"`", func() { - templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(ChatMessageTemplateData)) - Expect(err).ToNot(HaveOccurred()) - Expect(templated).To(Equal(foo["expected"]), templated) - }) - } - }) - Context("chat message llama3", func() { - var modelLoader *ModelLoader - BeforeEach(func() { - modelLoader = NewModelLoader("") - }) - for key := range llama3TestMatch { - foo := llama3TestMatch[key] - It("renders correctly `"+key+"`", func() { - templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(ChatMessageTemplateData)) - Expect(err).ToNot(HaveOccurred()) - Expect(templated).To(Equal(foo["expected"]), templated) - }) - } - }) -}) diff --git a/pkg/templates/cache.go b/pkg/templates/cache.go index e4801946..1efce660 100644 --- a/pkg/templates/cache.go +++ b/pkg/templates/cache.go @@ -11,59 +11,41 @@ import ( "github.com/mudler/LocalAI/pkg/utils" "github.com/Masterminds/sprig/v3" + + "github.com/nikolalohinski/gonja/v2" + "github.com/nikolalohinski/gonja/v2/exec" ) // Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go? // Technically, order doesn't _really_ matter, but the count must stay in sync, see tests/integration/reflect_test.go type TemplateType int -type TemplateCache struct { - mu sync.Mutex - templatesPath string - templates map[TemplateType]map[string]*template.Template +type templateCache struct { + mu sync.Mutex + templatesPath string + templates map[TemplateType]map[string]*template.Template + jinjaTemplates map[TemplateType]map[string]*exec.Template } -func NewTemplateCache(templatesPath string) *TemplateCache { - tc := &TemplateCache{ - templatesPath: templatesPath, - templates: make(map[TemplateType]map[string]*template.Template), +func newTemplateCache(templatesPath string) *templateCache { + tc := &templateCache{ + templatesPath: templatesPath, + templates: make(map[TemplateType]map[string]*template.Template), + jinjaTemplates: make(map[TemplateType]map[string]*exec.Template), } return tc } -func (tc *TemplateCache) initializeTemplateMapKey(tt TemplateType) { +func (tc *templateCache) initializeTemplateMapKey(tt TemplateType) { if _, ok := tc.templates[tt]; !ok { tc.templates[tt] = make(map[string]*template.Template) } } -func (tc *TemplateCache) EvaluateTemplate(templateType TemplateType, templateName string, in interface{}) (string, error) { - tc.mu.Lock() - defer tc.mu.Unlock() - - tc.initializeTemplateMapKey(templateType) - m, ok := tc.templates[templateType][templateName] - if !ok { - // return "", fmt.Errorf("template not loaded: %s", templateName) - loadErr := tc.loadTemplateIfExists(templateType, templateName) - if loadErr != nil { - return "", loadErr - } - m = tc.templates[templateType][templateName] // ok is not important since we check m on the next line, and wealready checked - } - if m == nil { - return "", fmt.Errorf("failed loading a template for %s", templateName) - } - - var buf bytes.Buffer - - if err := m.Execute(&buf, in); err != nil { - return "", err - } - return buf.String(), nil +func (tc *templateCache) existsInModelPath(s string) bool { + return utils.ExistsInPath(tc.templatesPath, s) } - -func (tc *TemplateCache) loadTemplateIfExists(templateType TemplateType, templateName string) error { +func (tc *templateCache) loadTemplateIfExists(templateType TemplateType, templateName string) error { // Check if the template was already loaded if _, ok := tc.templates[templateType][templateName]; ok { @@ -82,6 +64,51 @@ func (tc *TemplateCache) loadTemplateIfExists(templateType TemplateType, templat return fmt.Errorf("template file outside path: %s", file) } + // can either be a file in the system or a string with the template + if tc.existsInModelPath(modelTemplateFile) { + d, err := os.ReadFile(file) + if err != nil { + return err + } + dat = string(d) + } else { + dat = templateName + } + + // Parse the template + tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat) + if err != nil { + return err + } + tc.templates[templateType][templateName] = tmpl + + return nil +} + +func (tc *templateCache) initializeJinjaTemplateMapKey(tt TemplateType) { + if _, ok := tc.jinjaTemplates[tt]; !ok { + tc.jinjaTemplates[tt] = make(map[string]*exec.Template) + } +} + +func (tc *templateCache) loadJinjaTemplateIfExists(templateType TemplateType, templateName string) error { + // Check if the template was already loaded + if _, ok := tc.jinjaTemplates[templateType][templateName]; ok { + return nil + } + + // Check if the model path exists + // skip any error here - we run anyway if a template does not exist + modelTemplateFile := fmt.Sprintf("%s.tmpl", templateName) + + dat := "" + file := filepath.Join(tc.templatesPath, modelTemplateFile) + + // Security check + if err := utils.VerifyPath(modelTemplateFile, tc.templatesPath); err != nil { + return fmt.Errorf("template file outside path: %s", file) + } + // can either be a file in the system or a string with the template if utils.ExistsInPath(tc.templatesPath, modelTemplateFile) { d, err := os.ReadFile(file) @@ -93,12 +120,65 @@ func (tc *TemplateCache) loadTemplateIfExists(templateType TemplateType, templat dat = templateName } - // Parse the template - tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat) + tmpl, err := gonja.FromString(dat) if err != nil { return err } - tc.templates[templateType][templateName] = tmpl + tc.jinjaTemplates[templateType][templateName] = tmpl return nil } + +func (tc *templateCache) evaluateJinjaTemplate(templateType TemplateType, templateNameOrContent string, in map[string]interface{}) (string, error) { + tc.mu.Lock() + defer tc.mu.Unlock() + + tc.initializeJinjaTemplateMapKey(templateType) + m, ok := tc.jinjaTemplates[templateType][templateNameOrContent] + if !ok { + // return "", fmt.Errorf("template not loaded: %s", templateName) + loadErr := tc.loadJinjaTemplateIfExists(templateType, templateNameOrContent) + if loadErr != nil { + return "", loadErr + } + m = tc.jinjaTemplates[templateType][templateNameOrContent] // ok is not important since we check m on the next line, and wealready checked + } + if m == nil { + return "", fmt.Errorf("failed loading a template for %s", templateNameOrContent) + } + + var buf bytes.Buffer + + data := exec.NewContext(in) + + if err := m.Execute(&buf, data); err != nil { + return "", err + } + return buf.String(), nil +} + +func (tc *templateCache) evaluateTemplate(templateType TemplateType, templateNameOrContent string, in interface{}) (string, error) { + tc.mu.Lock() + defer tc.mu.Unlock() + + tc.initializeTemplateMapKey(templateType) + m, ok := tc.templates[templateType][templateNameOrContent] + if !ok { + // return "", fmt.Errorf("template not loaded: %s", templateName) + loadErr := tc.loadTemplateIfExists(templateType, templateNameOrContent) + if loadErr != nil { + return "", loadErr + } + m = tc.templates[templateType][templateNameOrContent] // ok is not important since we check m on the next line, and wealready checked + } + if m == nil { + return "", fmt.Errorf("failed loading a template for %s", templateNameOrContent) + } + + var buf bytes.Buffer + + if err := m.Execute(&buf, in); err != nil { + return "", err + } + return buf.String(), nil +} diff --git a/pkg/templates/cache_test.go b/pkg/templates/cache_test.go deleted file mode 100644 index 8bb50766..00000000 --- a/pkg/templates/cache_test.go +++ /dev/null @@ -1,73 +0,0 @@ -package templates_test - -import ( - "os" - "path/filepath" - - "github.com/mudler/LocalAI/pkg/templates" // Update with your module path - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -var _ = Describe("TemplateCache", func() { - var ( - templateCache *templates.TemplateCache - tempDir string - ) - - BeforeEach(func() { - var err error - tempDir, err = os.MkdirTemp("", "templates") - Expect(err).NotTo(HaveOccurred()) - - // Writing example template files - err = os.WriteFile(filepath.Join(tempDir, "example.tmpl"), []byte("Hello, {{.Name}}!"), 0600) - Expect(err).NotTo(HaveOccurred()) - err = os.WriteFile(filepath.Join(tempDir, "empty.tmpl"), []byte(""), 0600) - Expect(err).NotTo(HaveOccurred()) - - templateCache = templates.NewTemplateCache(tempDir) - }) - - AfterEach(func() { - os.RemoveAll(tempDir) // Clean up - }) - - Describe("EvaluateTemplate", func() { - Context("when template is loaded successfully", func() { - It("should evaluate the template correctly", func() { - result, err := templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"}) - Expect(err).NotTo(HaveOccurred()) - Expect(result).To(Equal("Hello, Gopher!")) - }) - }) - - Context("when template isn't a file", func() { - It("should parse from string", func() { - result, err := templateCache.EvaluateTemplate(1, "{{.Name}}", map[string]string{"Name": "Gopher"}) - Expect(err).ToNot(HaveOccurred()) - Expect(result).To(Equal("Gopher")) - }) - }) - - Context("when template is empty", func() { - It("should return an empty string", func() { - result, err := templateCache.EvaluateTemplate(1, "empty", nil) - Expect(err).NotTo(HaveOccurred()) - Expect(result).To(Equal("")) - }) - }) - }) - - Describe("concurrency", func() { - It("should handle multiple concurrent accesses", func(done Done) { - go func() { - _, _ = templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"}) - }() - go func() { - _, _ = templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"}) - }() - close(done) - }, 0.1) // timeout in seconds - }) -}) diff --git a/pkg/templates/evaluator.go b/pkg/templates/evaluator.go new file mode 100644 index 00000000..aedf7b41 --- /dev/null +++ b/pkg/templates/evaluator.go @@ -0,0 +1,295 @@ +package templates + +import ( + "encoding/json" + "fmt" + "strings" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/schema" + "github.com/mudler/LocalAI/pkg/functions" + "github.com/rs/zerolog/log" +) + +// Rather than pass an interface{} to the prompt template: +// These are the definitions of all possible variables LocalAI will currently populate for use in a prompt template file +// Please note: Not all of these are populated on every endpoint - your template should either be tested for each endpoint you map it to, or tolerant of zero values. +type PromptTemplateData struct { + SystemPrompt string + SuppressSystemPrompt bool // used by chat specifically to indicate that SystemPrompt above should be _ignored_ + Input string + Instruction string + Functions []functions.Function + MessageIndex int +} + +type ChatMessageTemplateData struct { + SystemPrompt string + Role string + RoleName string + FunctionName string + Content string + MessageIndex int + Function bool + FunctionCall interface{} + LastMessage bool +} + +const ( + ChatPromptTemplate TemplateType = iota + ChatMessageTemplate + CompletionPromptTemplate + EditPromptTemplate + FunctionsPromptTemplate +) + +type Evaluator struct { + cache *templateCache +} + +func NewEvaluator(modelPath string) *Evaluator { + return &Evaluator{ + cache: newTemplateCache(modelPath), + } +} + +func (e *Evaluator) EvaluateTemplateForPrompt(templateType TemplateType, config config.BackendConfig, in PromptTemplateData) (string, error) { + template := "" + + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + if e.cache.existsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { + template = config.Model + } + + switch templateType { + case CompletionPromptTemplate: + if config.TemplateConfig.Completion != "" { + template = config.TemplateConfig.Completion + } + case EditPromptTemplate: + if config.TemplateConfig.Edit != "" { + template = config.TemplateConfig.Edit + } + case ChatPromptTemplate: + if config.TemplateConfig.Chat != "" { + template = config.TemplateConfig.Chat + } + case FunctionsPromptTemplate: + if config.TemplateConfig.Functions != "" { + template = config.TemplateConfig.Functions + } + } + + if template == "" { + return in.Input, nil + } + + if config.TemplateConfig.JinjaTemplate { + return e.evaluateJinjaTemplateForPrompt(templateType, template, in) + } + + return e.cache.evaluateTemplate(templateType, template, in) +} + +func (e *Evaluator) evaluateTemplateForChatMessage(templateName string, messageData ChatMessageTemplateData) (string, error) { + return e.cache.evaluateTemplate(ChatMessageTemplate, templateName, messageData) +} + +func (e *Evaluator) templateJinjaChat(templateName string, messageData []ChatMessageTemplateData, funcs []functions.Function) (string, error) { + + conversation := make(map[string]interface{}) + messages := make([]map[string]interface{}, len(messageData)) + + // convert from ChatMessageTemplateData to what the jinja template expects + + for _, message := range messageData { + // TODO: this seems to cover minimum text templates. Can be expanded to cover more complex interactions + var data []byte + data, _ = json.Marshal(message.FunctionCall) + messages = append(messages, map[string]interface{}{ + "role": message.RoleName, + "content": message.Content, + "tool_call": string(data), + }) + } + + conversation["messages"] = messages + + // if tools are detected, add these + if len(funcs) > 0 { + conversation["tools"] = funcs + } + + return e.cache.evaluateJinjaTemplate(ChatMessageTemplate, templateName, conversation) +} + +func (e *Evaluator) evaluateJinjaTemplateForPrompt(templateType TemplateType, templateName string, in PromptTemplateData) (string, error) { + + conversation := make(map[string]interface{}) + + conversation["system_prompt"] = in.SystemPrompt + conversation["content"] = in.Input + + return e.cache.evaluateJinjaTemplate(templateType, templateName, conversation) +} + +func (e *Evaluator) TemplateMessages(messages []schema.Message, config *config.BackendConfig, funcs []functions.Function, shouldUseFn bool) string { + + if config.TemplateConfig.JinjaTemplate { + var messageData []ChatMessageTemplateData + for messageIndex, i := range messages { + fcall := i.FunctionCall + if len(i.ToolCalls) > 0 { + fcall = i.ToolCalls + } + messageData = append(messageData, ChatMessageTemplateData{ + SystemPrompt: config.SystemPrompt, + Role: config.Roles[i.Role], + RoleName: i.Role, + Content: i.StringContent, + FunctionCall: fcall, + FunctionName: i.Name, + LastMessage: messageIndex == (len(messages) - 1), + Function: config.Grammar != "" && (messageIndex == (len(messages) - 1)), + MessageIndex: messageIndex, + }) + } + + templatedInput, err := e.templateJinjaChat(config.TemplateConfig.ChatMessage, messageData, funcs) + if err == nil { + return templatedInput + } + } + + var predInput string + suppressConfigSystemPrompt := false + mess := []string{} + for messageIndex, i := range messages { + var content string + role := i.Role + + // if function call, we might want to customize the role so we can display better that the "assistant called a json action" + // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request + if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { + roleFn := "assistant_function_call" + r := config.Roles[roleFn] + if r != "" { + role = roleFn + } + } + r := config.Roles[role] + contentExists := i.Content != nil && i.StringContent != "" + + fcall := i.FunctionCall + if len(i.ToolCalls) > 0 { + fcall = i.ToolCalls + } + + // First attempt to populate content via a chat message specific template + if config.TemplateConfig.ChatMessage != "" { + chatMessageData := ChatMessageTemplateData{ + SystemPrompt: config.SystemPrompt, + Role: r, + RoleName: role, + Content: i.StringContent, + FunctionCall: fcall, + FunctionName: i.Name, + LastMessage: messageIndex == (len(messages) - 1), + Function: config.Grammar != "" && (messageIndex == (len(messages) - 1)), + MessageIndex: messageIndex, + } + templatedChatMessage, err := e.evaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) + if err != nil { + log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping") + } else { + if templatedChatMessage == "" { + log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData) + continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf + } + log.Debug().Msgf("templated message for chat: %s", templatedChatMessage) + content = templatedChatMessage + } + } + + marshalAnyRole := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + fmt.Sprint(r, " ", string(j)) + } else { + content = fmt.Sprint(r, " ", string(j)) + } + } + } + marshalAny := func(f any) { + j, err := json.Marshal(f) + if err == nil { + if contentExists { + content += "\n" + string(j) + } else { + content = string(j) + } + } + } + // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. + if content == "" { + if r != "" { + if contentExists { + content = fmt.Sprint(r, i.StringContent) + } + + if i.FunctionCall != nil { + marshalAnyRole(i.FunctionCall) + } + if i.ToolCalls != nil { + marshalAnyRole(i.ToolCalls) + } + } else { + if contentExists { + content = fmt.Sprint(i.StringContent) + } + if i.FunctionCall != nil { + marshalAny(i.FunctionCall) + } + if i.ToolCalls != nil { + marshalAny(i.ToolCalls) + } + } + // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately + if contentExists && role == "system" { + suppressConfigSystemPrompt = true + } + } + + mess = append(mess, content) + } + + joinCharacter := "\n" + if config.TemplateConfig.JoinChatMessagesByCharacter != nil { + joinCharacter = *config.TemplateConfig.JoinChatMessagesByCharacter + } + + predInput = strings.Join(mess, joinCharacter) + log.Debug().Msgf("Prompt (before templating): %s", predInput) + + promptTemplate := ChatPromptTemplate + + if config.TemplateConfig.Functions != "" && shouldUseFn { + promptTemplate = FunctionsPromptTemplate + } + + templatedInput, err := e.EvaluateTemplateForPrompt(promptTemplate, *config, PromptTemplateData{ + SystemPrompt: config.SystemPrompt, + SuppressSystemPrompt: suppressConfigSystemPrompt, + Input: predInput, + Functions: funcs, + }) + if err == nil { + predInput = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", predInput) + } else { + log.Debug().Msgf("Template failed loading: %s", err.Error()) + } + + return predInput +} diff --git a/pkg/templates/evaluator_test.go b/pkg/templates/evaluator_test.go new file mode 100644 index 00000000..b58dd40b --- /dev/null +++ b/pkg/templates/evaluator_test.go @@ -0,0 +1,253 @@ +package templates_test + +import ( + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/schema" + "github.com/mudler/LocalAI/pkg/functions" + . "github.com/mudler/LocalAI/pkg/templates" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +const toolCallJinja = `{{ '<|begin_of_text|>' }}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|> + +' + system_message + '<|eot_id|>' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|> + +' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' }}{% elif message['role'] == 'assistant' %}{{ content + '<|eot_id|>' }}{% endif %}{% endfor %}` + +const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} +{{- if .FunctionCall }} + +{{- else if eq .RoleName "tool" }} + +{{- end }} +{{- if .Content}} +{{.Content }} +{{- end }} +{{- if .FunctionCall}} +{{toJson .FunctionCall}} +{{- end }} +{{- if .FunctionCall }} + +{{- else if eq .RoleName "tool" }} + +{{- end }}<|im_end|>` + +const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + +{{ if .FunctionCall -}} +Function call: +{{ else if eq .RoleName "tool" -}} +Function response: +{{ end -}} +{{ if .Content -}} +{{.Content -}} +{{ else if .FunctionCall -}} +{{ toJson .FunctionCall -}} +{{ end -}} +<|eot_id|>` + +var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ + "user": { + "expected": "<|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>", + "config": &config.BackendConfig{ + TemplateConfig: config.TemplateConfig{ + ChatMessage: llama3, + }, + }, + "functions": []functions.Function{}, + "shouldUseFn": false, + "messages": []schema.Message{ + { + Role: "user", + StringContent: "A long time ago in a galaxy far, far away...", + }, + }, + }, + "assistant": { + "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>", + "config": &config.BackendConfig{ + TemplateConfig: config.TemplateConfig{ + ChatMessage: llama3, + }, + }, + "functions": []functions.Function{}, + "messages": []schema.Message{ + { + Role: "assistant", + StringContent: "A long time ago in a galaxy far, far away...", + }, + }, + "shouldUseFn": false, + }, + "function_call": { + + "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nFunction call:\n{\"function\":\"test\"}<|eot_id|>", + "config": &config.BackendConfig{ + TemplateConfig: config.TemplateConfig{ + ChatMessage: llama3, + }, + }, + "functions": []functions.Function{}, + "messages": []schema.Message{ + { + Role: "assistant", + FunctionCall: map[string]string{"function": "test"}, + }, + }, + "shouldUseFn": false, + }, + "function_response": { + "expected": "<|start_header_id|>tool<|end_header_id|>\n\nFunction response:\nResponse from tool<|eot_id|>", + "config": &config.BackendConfig{ + TemplateConfig: config.TemplateConfig{ + ChatMessage: llama3, + }, + }, + "functions": []functions.Function{}, + "messages": []schema.Message{ + { + Role: "tool", + StringContent: "Response from tool", + }, + }, + "shouldUseFn": false, + }, +} + +var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ + "user": { + "expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...<|im_end|>", + "config": &config.BackendConfig{ + TemplateConfig: config.TemplateConfig{ + ChatMessage: chatML, + }, + }, + "functions": []functions.Function{}, + "shouldUseFn": false, + "messages": []schema.Message{ + { + Role: "user", + StringContent: "A long time ago in a galaxy far, far away...", + }, + }, + }, + "assistant": { + "expected": "<|im_start|>assistant\nA long time ago in a galaxy far, far away...<|im_end|>", + "config": &config.BackendConfig{ + TemplateConfig: config.TemplateConfig{ + ChatMessage: chatML, + }, + }, + "functions": []functions.Function{}, + "messages": []schema.Message{ + { + Role: "assistant", + StringContent: "A long time ago in a galaxy far, far away...", + }, + }, + "shouldUseFn": false, + }, + "function_call": { + "expected": "<|im_start|>assistant\n\n{\"function\":\"test\"}\n<|im_end|>", + "config": &config.BackendConfig{ + TemplateConfig: config.TemplateConfig{ + ChatMessage: chatML, + }, + }, + "functions": []functions.Function{ + { + Name: "test", + Description: "test", + Parameters: nil, + }, + }, + "shouldUseFn": true, + "messages": []schema.Message{ + { + Role: "assistant", + FunctionCall: map[string]string{"function": "test"}, + }, + }, + }, + "function_response": { + "expected": "<|im_start|>tool\n\nResponse from tool\n<|im_end|>", + "config": &config.BackendConfig{ + TemplateConfig: config.TemplateConfig{ + ChatMessage: chatML, + }, + }, + "functions": []functions.Function{}, + "shouldUseFn": false, + "messages": []schema.Message{ + { + Role: "tool", + StringContent: "Response from tool", + }, + }, + }, +} + +var jinjaTest map[string]map[string]interface{} = map[string]map[string]interface{}{ + "user": { + "expected": "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", + "config": &config.BackendConfig{ + TemplateConfig: config.TemplateConfig{ + ChatMessage: toolCallJinja, + JinjaTemplate: true, + }, + }, + "functions": []functions.Function{}, + "shouldUseFn": false, + "messages": []schema.Message{ + { + Role: "user", + StringContent: "A long time ago in a galaxy far, far away...", + }, + }, + }, +} +var _ = Describe("Templates", func() { + Context("chat message ChatML", func() { + var evaluator *Evaluator + BeforeEach(func() { + evaluator = NewEvaluator("") + }) + for key := range chatMLTestMatch { + foo := chatMLTestMatch[key] + It("renders correctly `"+key+"`", func() { + templated := evaluator.TemplateMessages(foo["messages"].([]schema.Message), foo["config"].(*config.BackendConfig), foo["functions"].([]functions.Function), foo["shouldUseFn"].(bool)) + Expect(templated).To(Equal(foo["expected"]), templated) + }) + } + }) + Context("chat message llama3", func() { + var evaluator *Evaluator + BeforeEach(func() { + evaluator = NewEvaluator("") + }) + for key := range llama3TestMatch { + foo := llama3TestMatch[key] + It("renders correctly `"+key+"`", func() { + templated := evaluator.TemplateMessages(foo["messages"].([]schema.Message), foo["config"].(*config.BackendConfig), foo["functions"].([]functions.Function), foo["shouldUseFn"].(bool)) + Expect(templated).To(Equal(foo["expected"]), templated) + }) + } + }) + Context("chat message jinja", func() { + var evaluator *Evaluator + BeforeEach(func() { + evaluator = NewEvaluator("") + }) + for key := range jinjaTest { + foo := jinjaTest[key] + It("renders correctly `"+key+"`", func() { + templated := evaluator.TemplateMessages(foo["messages"].([]schema.Message), foo["config"].(*config.BackendConfig), foo["functions"].([]functions.Function), foo["shouldUseFn"].(bool)) + Expect(templated).To(Equal(foo["expected"]), templated) + }) + } + }) +}) From f943c4b803b99efc587ff126d3766a6ada19db20 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 8 Dec 2024 17:53:36 +0100 Subject: [PATCH 015/849] Revert "feat: include tokens usage for streamed output" (#4336) Revert "feat: include tokens usage for streamed output (#4282)" This reverts commit 0d6c3a7d57101428aec4100d0f7bca765ee684a7. --- core/backend/llm.go | 12 ++---------- core/http/endpoints/openai/chat.go | 9 +-------- pkg/grpc/backend.go | 2 +- pkg/grpc/client.go | 6 +++--- pkg/grpc/embed.go | 6 +++--- 5 files changed, 10 insertions(+), 25 deletions(-) diff --git a/core/backend/llm.go b/core/backend/llm.go index 9e121f79..4491a191 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -117,12 +117,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im ss := "" var partialRune []byte - err := inferenceModel.PredictStream(ctx, opts, func(reply *proto.Reply) { - msg := reply.GetMessage() - partialRune = append(partialRune, msg...) - - tokenUsage.Prompt = int(reply.PromptTokens) - tokenUsage.Completion = int(reply.Tokens) + err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) { + partialRune = append(partialRune, chars...) for len(partialRune) > 0 { r, size := utf8.DecodeRune(partialRune) @@ -136,10 +132,6 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im partialRune = partialRune[size:] } - - if len(msg) == 0 { - tokenCallback("", tokenUsage) - } }) return LLMResponse{ Response: ss, diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 21e71d35..c2b201bd 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -41,15 +41,11 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat responses <- initialMessage ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { - choices := []schema.Choice{} - if s != "" { - choices = append(choices, schema.Choice{Delta: &schema.Message{Content: &s}, Index: 0}) - } resp := schema.OpenAIResponse{ ID: id, Created: created, Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: choices, + Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}}, Object: "chat.completion.chunk", Usage: schema.OpenAIUsage{ PromptTokens: usage.Prompt, @@ -333,9 +329,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat toolsCalled := false for ev := range responses { usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it - if len(ev.Choices) == 0 { - break - } if len(ev.Choices[0].Delta.ToolCalls) > 0 { toolsCalled = true } diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go index fabc0268..21435891 100644 --- a/pkg/grpc/backend.go +++ b/pkg/grpc/backend.go @@ -37,7 +37,7 @@ type Backend interface { Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error) Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error) - PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error + PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error) diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go index ca207c3f..9c8b302e 100644 --- a/pkg/grpc/client.go +++ b/pkg/grpc/client.go @@ -136,7 +136,7 @@ func (c *Client) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grp return client.LoadModel(ctx, in, opts...) } -func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error { +func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error { if !c.parallel { c.opMutex.Lock() defer c.opMutex.Unlock() @@ -158,7 +158,7 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun } for { - reply, err := stream.Recv() + feature, err := stream.Recv() if err == io.EOF { break } @@ -167,7 +167,7 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun return err } - f(reply) + f(feature.GetMessage()) } return nil diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go index 79648c5a..a5828a5f 100644 --- a/pkg/grpc/embed.go +++ b/pkg/grpc/embed.go @@ -35,7 +35,7 @@ func (e *embedBackend) LoadModel(ctx context.Context, in *pb.ModelOptions, opts return e.s.LoadModel(ctx, in) } -func (e *embedBackend) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error { +func (e *embedBackend) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error { bs := &embedBackendServerStream{ ctx: ctx, fn: f, @@ -97,11 +97,11 @@ func (e *embedBackend) GetTokenMetrics(ctx context.Context, in *pb.MetricsReques type embedBackendServerStream struct { ctx context.Context - fn func(reply *pb.Reply) + fn func(s []byte) } func (e *embedBackendServerStream) Send(reply *pb.Reply) error { - e.fn(reply) + e.fn(reply.GetMessage()) return nil } From a0fe05005586353844e7704c2b87c6f55a7240c8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 8 Dec 2024 18:01:16 +0100 Subject: [PATCH 016/849] chore(model gallery): add mn-chunky-lotus-12b (#4337) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index c94358b6..43f34430 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -4410,6 +4410,30 @@ - filename: Chatty-Harry_V3.0.Q4_K_M.gguf sha256: 54b63bb74498576ca77b801ed096657a93cc2f6b71d707c3605fdb394bd3e622 uri: huggingface://QuantFactory/Chatty-Harry_V3.0-GGUF/Chatty-Harry_V3.0.Q4_K_M.gguf +- !!merge <<: *mistral03 + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + name: "mn-chunky-lotus-12b" + icon: https://huggingface.co/FallenMerick/MN-Chunky-Lotus-12B/resolve/main/chunky-lotus.jpg + urls: + - https://huggingface.co/QuantFactory/MN-Chunky-Lotus-12B-GGUF + description: | + I had originally planned to use this model for future/further merges, but decided to go ahead and release it since it scored rather high on my local EQ Bench testing (79.58 w/ 100% parsed @ 8-bit). + Bear in mind that most models tend to score a bit higher on my own local tests as compared to their posted scores. Still, its the highest score I've personally seen from all the models I've tested. + Its a decent model, with great emotional intelligence and acceptable adherence to various character personalities. It does a good job at roleplaying despite being a bit bland at times. + + Overall, I like the way it writes, but it has a few formatting issues that show up from time to time, and it has an uncommon tendency to paste walls of character feelings/intentions at the end of some outputs without any prompting. This is something I hope to correct with future iterations. + This is a merge of pre-trained language models created using mergekit. + The following models were included in the merge: + Epiculous/Violet_Twilight-v0.2 + nbeerbower/mistral-nemo-gutenberg-12B-v4 + flammenai/Mahou-1.5-mistral-nemo-12B + overrides: + parameters: + model: MN-Chunky-Lotus-12B.Q4_K_M.gguf + files: + - filename: MN-Chunky-Lotus-12B.Q4_K_M.gguf + sha256: 363defe0a769fdb715dab75517966a0a80bcdd981a610d4c759099b6c8ff143a + uri: huggingface://QuantFactory/MN-Chunky-Lotus-12B-GGUF/MN-Chunky-Lotus-12B.Q4_K_M.gguf - &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" From 61839efed2d15c3c223f5b7a5802f55a28ced45b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 8 Dec 2024 18:01:25 +0100 Subject: [PATCH 017/849] chore(model gallery): add virtuoso-small (#4338) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 43f34430..4a307b88 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1838,6 +1838,20 @@ - filename: Math-IIO-7B-Instruct.Q4_K_M.gguf sha256: 8ffda0b6a43eb9997dfd7db48fe3bd0970fd1b9b86fb68f082c38622a48b58f4 uri: huggingface://QuantFactory/Math-IIO-7B-Instruct-GGUF/Math-IIO-7B-Instruct.Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "virtuoso-small" + icon: https://i.ibb.co/pXD6Bcv/SW2-U-g-QQLSH1-ZAbxhs-Iu-A.webp + urls: + - https://huggingface.co/arcee-ai/Virtuoso-Small-GGUF + description: | + Virtuoso-Small is the debut public release of the Virtuoso series of models by Arcee.ai, designed to bring cutting-edge generative AI capabilities to organizations and developers in a compact, efficient form. With 14 billion parameters, Virtuoso-Small is an accessible entry point for high-quality instruction-following, complex reasoning, and business-oriented generative AI tasks. + overrides: + parameters: + model: Virtuoso-Small-Q4_K_M.gguf + files: + - filename: Virtuoso-Small-Q4_K_M.gguf + sha256: 07db215cdfcb05036567017fe20e50e60cb2da28d1f9a8251cc4f18c8caa247f + uri: huggingface://arcee-ai/Virtuoso-Small-GGUF/Virtuoso-Small-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From e147f1bd3eddbe4f8a24fec1a0b293fff5db2ad4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 8 Dec 2024 18:43:26 +0100 Subject: [PATCH 018/849] chore(model gallery): add bio-medical-llama-3-8b (#4339) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 4a307b88..b3ed3f90 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -8129,6 +8129,23 @@ - filename: RP-Naughty-v1.0c-8b.Q4_K_M.gguf sha256: c344564d26d0c3d244d31cfeb103666eab37f9dee6678a2dbaf5bfcf4109d789 uri: huggingface://QuantFactory/RP-Naughty-v1.0c-8b-GGUF/RP-Naughty-v1.0c-8b.Q4_K_M.gguf +- !!merge <<: *llama3 + name: "bio-medical-llama-3-8b" + icon: https://cdn-uploads.huggingface.co/production/uploads/653f5b93cd52f288490edc83/zPMUugzfOiwTiRw88jm7T.jpeg + urls: + - https://huggingface.co/ContactDoctor/Bio-Medical-Llama-3-8B + - https://huggingface.co/QuantFactory/Bio-Medical-Llama-3-8B-GGUF + description: | + Bio-Medical-Llama-3-8B model is a specialized large language model designed for biomedical applications. It is finetuned from the meta-llama/Meta-Llama-3-8B-Instruct model using a custom dataset containing over 500,000 diverse entries. These entries include a mix of synthetic and manually curated data, ensuring high quality and broad coverage of biomedical topics. + + The model is trained to understand and generate text related to various biomedical fields, making it a valuable tool for researchers, clinicians, and other professionals in the biomedical domain. + overrides: + parameters: + model: Bio-Medical-Llama-3-8B.Q4_K_M.gguf + files: + - filename: Bio-Medical-Llama-3-8B.Q4_K_M.gguf + sha256: 672939e0487d02c55734132c25a59f26e4deaac7cd49445a7028f2291139edcc + uri: huggingface://QuantFactory/Bio-Medical-Llama-3-8B-GGUF/Bio-Medical-Llama-3-8B.Q4_K_M.gguf - &command-R ### START Command-r url: "github:mudler/LocalAI/gallery/command-r.yaml@master" From b5a21202ed81cf90dd59c7fee18b656173557148 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 8 Dec 2024 23:54:06 +0100 Subject: [PATCH 019/849] chore: :arrow_up: Update ggerganov/llama.cpp to `e52522b8694ae73abf12feb18d29168674aa1c1b` (#4342) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 786de811..c499119a 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=3573fa8e7b7f0865638b52b4e9b4d2006f0558a2 +CPPLLAMA_VERSION?=e52522b8694ae73abf12feb18d29168674aa1c1b # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From fb17e737f0dc4d176a1d7e6845453cb6ecd4e95c Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 9 Dec 2024 09:19:25 +0100 Subject: [PATCH 020/849] docs: :arrow_up: update docs version mudler/LocalAI (#4341) :arrow_up: Update docs version mudler/LocalAI Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index bb7517a1..f6462f81 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.24.0" + "version": "v2.24.1" } From a9c0dd3a1e12841ed08b722f1c7e739f967afffa Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 9 Dec 2024 10:24:15 +0100 Subject: [PATCH 021/849] chore(model gallery): add qwen2.5-7b-homeranvita-nerdmix (#4343) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index b3ed3f90..e163f72d 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1852,6 +1852,20 @@ - filename: Virtuoso-Small-Q4_K_M.gguf sha256: 07db215cdfcb05036567017fe20e50e60cb2da28d1f9a8251cc4f18c8caa247f uri: huggingface://arcee-ai/Virtuoso-Small-GGUF/Virtuoso-Small-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "qwen2.5-7b-homeranvita-nerdmix" + urls: + - https://huggingface.co/ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix + - https://huggingface.co/QuantFactory/Qwen2.5-7B-HomerAnvita-NerdMix-GGUF + description: | + ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix is an advanced language model meticulously crafted by merging five pre-trained models using the powerful mergekit framework. This fusion leverages the Model Stock merge method to combine the creative prowess of Qandora, the instructive capabilities of Qwen-Instruct-Fusion, the sophisticated blending of HomerSlerp1, the mathematical precision of Cybertron-MGS, and the uncensored expertise of Qwen-Nerd. The resulting model excels in creative text generation, contextual understanding, technical reasoning, and dynamic conversational interactions. + overrides: + parameters: + model: Qwen2.5-7B-HomerAnvita-NerdMix.Q4_K_M.gguf + files: + - filename: Qwen2.5-7B-HomerAnvita-NerdMix.Q4_K_M.gguf + sha256: 73db2ca3ab50e8627352078988cd173e7447c5e8199a7db9e554602da1362e5f + uri: huggingface://QuantFactory/Qwen2.5-7B-HomerAnvita-NerdMix-GGUF/Qwen2.5-7B-HomerAnvita-NerdMix.Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From 5eceb5f67ced52e51dd485c72c21eeb8cc8b43b6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 9 Dec 2024 10:24:30 +0100 Subject: [PATCH 022/849] chore(model gallery): add impish_mind_8b (#4344) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index e163f72d..2f2f4c1b 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3628,6 +3628,25 @@ - filename: Loki-v2.6-8b-1024k.Q4_K_M.gguf sha256: 9b15c1fee0a0e6d6ed97df3d1b6fc8f774e6e1bd388328599e731c62e0f19d81 uri: huggingface://QuantFactory/Loki-v2.6-8b-1024k-GGUF/Loki-v2.6-8b-1024k.Q4_K_M.gguf +- !!merge <<: *llama31 + name: "impish_mind_8b" + icon: https://huggingface.co/SicariusSicariiStuff/Impish_Mind_8B/resolve/main/Images/Impish_Mind.png + urls: + - https://huggingface.co/SicariusSicariiStuff/Impish_Mind_8B + - https://huggingface.co/bartowski/Impish_Mind_8B-GGUF + description: | + This model was trained with new data and a new approach (compared to my other models). While it may be a bit more censored, it is expected to be significantly smarter. The data used is quite unique, and is also featuring long and complex markdown datasets. + + Regarding censorship: Whether uncensoring or enforcing strict censorship, the model tends to lose some of its intelligence. The use of toxic data was kept to a minimum with this model. + + Consequently, the model is likely to refuse some requests, this is easly avoidable with a basic system prompt, or assistant impersonation ("Sure thing!..."). Unlike many RP models, this one is designed to excel at general assistant tasks as well. + overrides: + parameters: + model: Impish_Mind_8B-Q4_K_M.gguf + files: + - filename: Impish_Mind_8B-Q4_K_M.gguf + sha256: 918f82bcb893c75fa2e846156df7bd3ce359464b960e32ae9171035ee14e7c51 + uri: huggingface://bartowski/Impish_Mind_8B-GGUF/Impish_Mind_8B-Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" From f45d6c746ad5012cb5406c977608d9ad081c35b4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 9 Dec 2024 15:58:29 +0100 Subject: [PATCH 023/849] chore(model gallery): add tulu-3.1-8b-supernova-smart (#4347) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 2f2f4c1b..f57b80c6 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3647,6 +3647,20 @@ - filename: Impish_Mind_8B-Q4_K_M.gguf sha256: 918f82bcb893c75fa2e846156df7bd3ce359464b960e32ae9171035ee14e7c51 uri: huggingface://bartowski/Impish_Mind_8B-GGUF/Impish_Mind_8B-Q4_K_M.gguf +- !!merge <<: *llama31 + name: "tulu-3.1-8b-supernova-smart" + urls: + - https://huggingface.co/bunnycore/Tulu-3.1-8B-SuperNova-Smart + - https://huggingface.co/QuantFactory/Tulu-3.1-8B-SuperNova-Smart-GGUF + description: | + This model was merged using the passthrough merge method using bunnycore/Tulu-3.1-8B-SuperNova + bunnycore/Llama-3.1-8b-smart-lora as a base. + overrides: + parameters: + model: Tulu-3.1-8B-SuperNova-Smart.Q4_K_M.gguf + files: + - filename: Tulu-3.1-8B-SuperNova-Smart.Q4_K_M.gguf + sha256: 4b8ba9e64f0667199eee2dcc769f1a90aa9c7730165d42f440fdf107c7585c63 + uri: huggingface://QuantFactory/Tulu-3.1-8B-SuperNova-Smart-GGUF/Tulu-3.1-8B-SuperNova-Smart.Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" From a03a9b9e51ef95e598b9108bc6da593d1619ab5f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Dec 2024 20:09:26 +0000 Subject: [PATCH 024/849] chore(deps): Bump docs/themes/hugo-theme-relearn from `be85052` to `bd1f3d3` (#4348) chore(deps): Bump docs/themes/hugo-theme-relearn Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `be85052` to `bd1f3d3`. - [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases) - [Commits](https://github.com/McShelby/hugo-theme-relearn/compare/be85052efea3a0aaef45ecb0126d390c1bbac760...bd1f3d3432632c61bb12e7ec0f7673fed0289f19) --- updated-dependencies: - dependency-name: docs/themes/hugo-theme-relearn dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- docs/themes/hugo-theme-relearn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn index be85052e..bd1f3d34 160000 --- a/docs/themes/hugo-theme-relearn +++ b/docs/themes/hugo-theme-relearn @@ -1 +1 @@ -Subproject commit be85052efea3a0aaef45ecb0126d390c1bbac760 +Subproject commit bd1f3d3432632c61bb12e7ec0f7673fed0289f19 From 885118e863e24253b88bb3751f1963e8c34043de Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 10 Dec 2024 09:10:58 +0100 Subject: [PATCH 025/849] chore: :arrow_up: Update ggerganov/llama.cpp to `26a8406ba9198eb6fdd8329fa717555b4f77f05f` (#4353) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c499119a..f9a1a2db 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=e52522b8694ae73abf12feb18d29168674aa1c1b +CPPLLAMA_VERSION?=26a8406ba9198eb6fdd8329fa717555b4f77f05f # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 3aff87a5cfae23f1c1f40b162f1745fe018b98b1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 10 Dec 2024 09:42:24 +0100 Subject: [PATCH 026/849] chore(model gallery): add qwen2.5-math-14b-instruct (#4355) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index f57b80c6..08ef8bcb 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1866,6 +1866,21 @@ - filename: Qwen2.5-7B-HomerAnvita-NerdMix.Q4_K_M.gguf sha256: 73db2ca3ab50e8627352078988cd173e7447c5e8199a7db9e554602da1362e5f uri: huggingface://QuantFactory/Qwen2.5-7B-HomerAnvita-NerdMix-GGUF/Qwen2.5-7B-HomerAnvita-NerdMix.Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "qwen2.5-math-14b-instruct" + urls: + - https://huggingface.co/qingy2024/Qwen2.5-Math-14B-Instruct-Preview + - https://huggingface.co/QuantFactory/Qwen2.5-Math-14B-Instruct-GGUF + description: | + This Qwen 2.5 model was trained 2x faster with Unsloth and Huggingface's TRL library. + Fine-tuned it for 400 steps on garage-bAInd/Open-Platypus with a batch size of 3. + overrides: + parameters: + model: Qwen2.5-Math-14B-Instruct.Q4_K_M.gguf + files: + - filename: Qwen2.5-Math-14B-Instruct.Q4_K_M.gguf + sha256: 14e672394738a7d9f14a6cb16fd9a649b113a19a8b4934f9c18299fc4e286ab6 + uri: huggingface://QuantFactory/Qwen2.5-Math-14B-Instruct-GGUF/Qwen2.5-Math-14B-Instruct.Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From 272763f625a6db7e064504074677d77676d7e941 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 10 Dec 2024 09:42:37 +0100 Subject: [PATCH 027/849] chore(model gallery): add intellect-1-instruct (#4356) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 08ef8bcb..a73500d7 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,4 +1,29 @@ --- +- &intellect1 + name: "intellect-1-instruct" + url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" + icon: https://huggingface.co/PrimeIntellect/INTELLECT-1-Instruct/resolve/main/intellect-1-map.png + urls: + - https://huggingface.co/PrimeIntellect/INTELLECT-1-Instruct + - https://huggingface.co/bartowski/INTELLECT-1-Instruct-GGUF + tags: + - llm + - gguf + - gpu + - cpu + - intellect + license: apache-2.0 + description: | + INTELLECT-1 is the first collaboratively trained 10 billion parameter language model trained from scratch on 1 trillion tokens of English text and code. + This is an instruct model. The base model associated with it is INTELLECT-1. + INTELLECT-1 was trained on up to 14 concurrent nodes distributed across 3 continents, with contributions from 30 independent community contributors providing compute. The training code utilizes the prime framework, a scalable distributed training framework designed for fault-tolerant, dynamically scaling, high-perfomance training on unreliable, globally distributed workers. The key abstraction that allows dynamic scaling is the ElasticDeviceMesh which manages dynamic global process groups for fault-tolerant communication across the internet and local process groups for communication within a node. The model was trained using the DiLoCo algorithms with 100 inner steps. The global all-reduce was done with custom int8 all-reduce kernels to reduce the communication payload required, greatly reducing the communication overhead by a factor 400x. + overrides: + parameters: + model: INTELLECT-1-Instruct-Q4_K_M.gguf + files: + - filename: INTELLECT-1-Instruct-Q4_K_M.gguf + sha256: 5df236fe570e5998d07fb3207788eac811ef3b77dd2a0ad04a2ef5c6361f3030 + uri: huggingface://bartowski/INTELLECT-1-Instruct-GGUF/INTELLECT-1-Instruct-Q4_K_M.gguf - &llama33 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png From de1ddb8ba69ed6c55ba01d06f15572e1423dd8f7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 10 Dec 2024 09:42:47 +0100 Subject: [PATCH 028/849] chore(model gallery): add b-nimita-l3-8b-v0.02 (#4357) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index a73500d7..b8eedc53 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3701,6 +3701,20 @@ - filename: Tulu-3.1-8B-SuperNova-Smart.Q4_K_M.gguf sha256: 4b8ba9e64f0667199eee2dcc769f1a90aa9c7730165d42f440fdf107c7585c63 uri: huggingface://QuantFactory/Tulu-3.1-8B-SuperNova-Smart-GGUF/Tulu-3.1-8B-SuperNova-Smart.Q4_K_M.gguf +- !!merge <<: *llama31 + name: "b-nimita-l3-8b-v0.02" + urls: + - https://huggingface.co/Arkana08/B-NIMITA-L3-8B-v0.02 + - https://huggingface.co/QuantFactory/B-NIMITA-L3-8B-v0.02-GGUF + description: | + B-NIMITA is an AI model designed to bring role-playing scenarios to life with emotional depth and rich storytelling. At its core is NIHAPPY, providing a solid narrative foundation and contextual consistency. This is enhanced by Mythorica, which adds vivid emotional arcs and expressive dialogue, and V-Blackroot, ensuring character consistency and subtle adaptability. This combination allows B-NIMITA to deliver dynamic, engaging interactions that feel natural and immersive. + overrides: + parameters: + model: B-NIMITA-L3-8B-v0.02.Q4_K_M.gguf + files: + - filename: B-NIMITA-L3-8B-v0.02.Q4_K_M.gguf + sha256: 625a54848dcd3f23bc06b639a7dfecae14142b5d177dd45acfe7724816bab4cd + uri: huggingface://QuantFactory/B-NIMITA-L3-8B-v0.02-GGUF/B-NIMITA-L3-8B-v0.02.Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" From b74a936178a97d6944d5fff73cd193b691b6c06e Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 10 Dec 2024 22:45:42 +0100 Subject: [PATCH 029/849] chore: :arrow_up: Update ggerganov/llama.cpp to `dafae66cc242eb766797194d3c85c5e502625623` (#4360) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f9a1a2db..36c7be21 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=26a8406ba9198eb6fdd8329fa717555b4f77f05f +CPPLLAMA_VERSION?=dafae66cc242eb766797194d3c85c5e502625623 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From ec239a0cd0cd5ce321d8e49c28a2bf1a46597331 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 11 Dec 2024 10:04:16 +0100 Subject: [PATCH 030/849] docs: :arrow_up: update docs version mudler/LocalAI (#4359) :arrow_up: Update docs version mudler/LocalAI Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index f6462f81..bf065426 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.24.1" + "version": "v2.24.2" } From 1918efdfdd08d96f732a8f7e7d42060b56d8c2e5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 11 Dec 2024 10:32:18 +0100 Subject: [PATCH 031/849] chore(model gallery): add sailor2-1b-chat (#4363) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index b8eedc53..b63520c6 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1906,6 +1906,22 @@ - filename: Qwen2.5-Math-14B-Instruct.Q4_K_M.gguf sha256: 14e672394738a7d9f14a6cb16fd9a649b113a19a8b4934f9c18299fc4e286ab6 uri: huggingface://QuantFactory/Qwen2.5-Math-14B-Instruct-GGUF/Qwen2.5-Math-14B-Instruct.Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "sailor2-1b-chat" + icon: https://huggingface.co/sail/Sailor2-1B-Chat/resolve/main/sailor2_banner.jpg + urls: + - https://huggingface.co/sail/Sailor2-1B-Chat + - https://huggingface.co/bartowski/Sailor2-1B-Chat-GGUF + description: | + Sailor2 is a community-driven initiative that brings cutting-edge multilingual language models to South-East Asia (SEA). Our research highlights a strong demand for models in the 8B and 20B parameter range for production use, alongside 1B models for specialized applications, such as speculative decoding and research purposes. These models, released under the Apache 2.0 license, provide enhanced accessibility to advanced language technologies across the region. + Sailor2 builds upon the foundation of the awesome multilingual model Qwen 2.5 and is continuously pre-trained on 500B tokens to support 15 languages better with a unified model. These languages include English, Chinese, Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. By addressing the growing demand for diverse, robust, and accessible language models, Sailor2 seeks to serve the underserved in SEA areas with open, inclusive, and accessible multilingual LLMs. The Sailor2 model comes in three sizes, 1B, 8B, and 20B, which are expanded from the Qwen2.5 base models of 0.5B, 7B, and 14B, respectively. + overrides: + parameters: + model: Sailor2-1B-Chat-Q4_K_M.gguf + files: + - filename: Sailor2-1B-Chat-Q4_K_M.gguf + sha256: 782e8abed13d51a2083eadfb2f6d94c2cd77940532f612a99e6f6bec9b3501d4 + uri: huggingface://bartowski/Sailor2-1B-Chat-GGUF/Sailor2-1B-Chat-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From 75b283d83c3acdda5156e90281e5cfadabb1b39c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 11 Dec 2024 10:51:39 +0100 Subject: [PATCH 032/849] chore(model gallery): add sailor2-8b-chat (#4364) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index b63520c6..40394f6a 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1922,6 +1922,21 @@ - filename: Sailor2-1B-Chat-Q4_K_M.gguf sha256: 782e8abed13d51a2083eadfb2f6d94c2cd77940532f612a99e6f6bec9b3501d4 uri: huggingface://bartowski/Sailor2-1B-Chat-GGUF/Sailor2-1B-Chat-Q4_K_M.gguf +- !!merge <<: *qwen25 + icon: https://huggingface.co/sail/Sailor2-1B-Chat/resolve/main/sailor2_banner.jpg + name: "sailor2-8b-chat" + urls: + - https://huggingface.co/bartowski/Sailor2-8B-Chat-GGUF + description: | + Sailor2 is a community-driven initiative that brings cutting-edge multilingual language models to South-East Asia (SEA). Our research highlights a strong demand for models in the 8B and 20B parameter range for production use, alongside 1B models for specialized applications, such as speculative decoding and research purposes. These models, released under the Apache 2.0 license, provide enhanced accessibility to advanced language technologies across the region. + Sailor2 builds upon the foundation of the awesome multilingual model Qwen 2.5 and is continuously pre-trained on 500B tokens to support 15 languages better with a unified model. These languages include English, Chinese, Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. By addressing the growing demand for diverse, robust, and accessible language models, Sailor2 seeks to serve the underserved in SEA areas with open, inclusive, and accessible multilingual LLMs. The Sailor2 model comes in three sizes, 1B, 8B, and 20B, which are expanded from the Qwen2.5 base models of 0.5B, 7B, and 14B, respectively. + overrides: + parameters: + model: Sailor2-8B-Chat-Q4_K_M.gguf + files: + - filename: Sailor2-8B-Chat-Q4_K_M.gguf + sha256: 1a6aaadd6f6ef9c2290d66b348ebcbd6fdec542834cde622498fbd467d966103 + uri: huggingface://bartowski/Sailor2-8B-Chat-GGUF/Sailor2-8B-Chat-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From c85f46a71dbf184c4a391456ab87af104ef5dab9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 11 Dec 2024 10:55:04 +0100 Subject: [PATCH 033/849] chore(model gallery): add sailor2-20b-chat (#4365) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 40394f6a..37664dd8 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1937,6 +1937,21 @@ - filename: Sailor2-8B-Chat-Q4_K_M.gguf sha256: 1a6aaadd6f6ef9c2290d66b348ebcbd6fdec542834cde622498fbd467d966103 uri: huggingface://bartowski/Sailor2-8B-Chat-GGUF/Sailor2-8B-Chat-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "sailor2-20b-chat" + icon: https://huggingface.co/sail/Sailor2-1B-Chat/resolve/main/sailor2_banner.jpg + urls: + - https://huggingface.co/bartowski/Sailor2-20B-Chat-GGUF + description: | + Sailor2 is a community-driven initiative that brings cutting-edge multilingual language models to South-East Asia (SEA). Our research highlights a strong demand for models in the 8B and 20B parameter range for production use, alongside 1B models for specialized applications, such as speculative decoding and research purposes. These models, released under the Apache 2.0 license, provide enhanced accessibility to advanced language technologies across the region. + Sailor2 builds upon the foundation of the awesome multilingual model Qwen 2.5 and is continuously pre-trained on 500B tokens to support 15 languages better with a unified model. These languages include English, Chinese, Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. By addressing the growing demand for diverse, robust, and accessible language models, Sailor2 seeks to serve the underserved in SEA areas with open, inclusive, and accessible multilingual LLMs. The Sailor2 model comes in three sizes, 1B, 8B, and 20B, which are expanded from the Qwen2.5 base models of 0.5B, 7B, and 14B, respectively. + overrides: + parameters: + model: Sailor2-20B-Chat-Q4_K_M.gguf + files: + - filename: Sailor2-20B-Chat-Q4_K_M.gguf + sha256: 0cf8fcd367accee19702ef15ee964bddd5035bde034afddd838f818e7655534a + uri: huggingface://bartowski/Sailor2-20B-Chat-GGUF/Sailor2-20B-Chat-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From f2cb261797d587bc63d33defd17a4394fa4a0361 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 12 Dec 2024 09:23:36 +0100 Subject: [PATCH 034/849] chore: :arrow_up: Update ggerganov/llama.cpp to `235f6e14bf0ed0211c51aeff14139038ae1000aa` (#4366) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 36c7be21..1dd5d18e 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=dafae66cc242eb766797194d3c85c5e502625623 +CPPLLAMA_VERSION?=235f6e14bf0ed0211c51aeff14139038ae1000aa # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 3ab83e91df2678cab49d254559fec41fc8794706 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 12 Dec 2024 12:07:41 +0100 Subject: [PATCH 035/849] chore(model gallery): add 72b-qwen2.5-kunou-v1 (#4369) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 37664dd8..75c987c0 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1952,6 +1952,27 @@ - filename: Sailor2-20B-Chat-Q4_K_M.gguf sha256: 0cf8fcd367accee19702ef15ee964bddd5035bde034afddd838f818e7655534a uri: huggingface://bartowski/Sailor2-20B-Chat-GGUF/Sailor2-20B-Chat-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "72b-qwen2.5-kunou-v1" + icon: https://huggingface.co/Sao10K/72B-Qwen2.5-Kunou-v1/resolve/main/knn.png + urls: + - https://huggingface.co/Sao10K/72B-Qwen2.5-Kunou-v1 + - https://huggingface.co/bartowski/72B-Qwen2.5-Kunou-v1-GGUF + description: | + I do not really have anything planned for this model other than it being a generalist, and Roleplay Model? It was just something made and planned in minutes. + Same with the 14 and 32B version. + Kunou's the name of an OC I worked on for a couple of years, for a... fanfic. mmm... + + A kind-of successor to L3-70B-Euryale-v2.2 in all but name? I'm keeping Stheno/Euryale lineage to Llama series for now. + I had a version made on top of Nemotron, a supposed Euryale 2.4 but that flopped hard, it was not my cup of tea. + This version is basically a better, more cleaned up Dataset used on Euryale and Stheno. + overrides: + parameters: + model: 72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf + files: + - filename: 72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf + sha256: 91907f29746625a62885793475956220b81d8a5a34b53686a1acd1d03fd403ea + uri: huggingface://bartowski/72B-Qwen2.5-Kunou-v1-GGUF/72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From b8824f2ad928ee518e35a633df9a085d2648d926 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 12 Dec 2024 12:07:57 +0100 Subject: [PATCH 036/849] chore(model gallery): add deepthought-8b-llama-v0.01-alpha (#4370) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 75c987c0..cb104908 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3782,6 +3782,20 @@ - filename: B-NIMITA-L3-8B-v0.02.Q4_K_M.gguf sha256: 625a54848dcd3f23bc06b639a7dfecae14142b5d177dd45acfe7724816bab4cd uri: huggingface://QuantFactory/B-NIMITA-L3-8B-v0.02-GGUF/B-NIMITA-L3-8B-v0.02.Q4_K_M.gguf +- !!merge <<: *llama31 + name: "deepthought-8b-llama-v0.01-alpha" + urls: + - https://huggingface.co/ruliad/deepthought-8b-llama-v0.01-alpha + - https://huggingface.co/bartowski/deepthought-8b-llama-v0.01-alpha-GGUF + description: | + Deepthought-8B is a small and capable reasoning model built on LLaMA-3.1 8B, designed to make AI reasoning more transparent and controllable. Despite its relatively small size, it achieves sophisticated reasoning capabilities that rival much larger models. + overrides: + parameters: + model: deepthought-8b-llama-v0.01-alpha-Q4_K_M.gguf + files: + - filename: deepthought-8b-llama-v0.01-alpha-Q4_K_M.gguf + sha256: 33195ba7b898ef8b2997d095e8be42adf1d0e1f6e8291cf07e026fc8e45903fd + uri: huggingface://bartowski/deepthought-8b-llama-v0.01-alpha-GGUF/deepthought-8b-llama-v0.01-alpha-Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" From 1854b8c612bac4c2c04a64632158c66d3818945c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 12 Dec 2024 12:22:48 +0100 Subject: [PATCH 037/849] chore(model gallery): add l3.3-70b-euryale-v2.3 (#4371) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index cb104908..11408635 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -47,6 +47,21 @@ - filename: Llama-3.3-70B-Instruct.Q4_K_M.gguf sha256: 4f3b04ecae278bdb0fd545b47c210bc5edf823e5ebf7d41e0b526c81d54b1ff3 uri: huggingface://MaziyarPanahi/Llama-3.3-70B-Instruct-GGUF/Llama-3.3-70B-Instruct.Q4_K_M.gguf +- !!merge <<: *llama33 + name: "l3.3-70b-euryale-v2.3" + icon: https://huggingface.co/Sao10K/L3.3-70B-Euryale-v2.3/resolve/main/Eury.png + urls: + - https://huggingface.co/Sao10K/L3.3-70B-Euryale-v2.3 + - https://huggingface.co/bartowski/L3.3-70B-Euryale-v2.3-GGUF + description: | + A direct replacement / successor to Euryale v2.2, not Hanami-x1, though it is slightly better than them in my opinion. + overrides: + parameters: + model: L3.3-70B-Euryale-v2.3-Q4_K_M.gguf + files: + - filename: L3.3-70B-Euryale-v2.3-Q4_K_M.gguf + sha256: 4e78bb0e65886bfcff89b829f6d38aa6f6846988bb8291857e387e3f60b3217b + uri: huggingface://bartowski/L3.3-70B-Euryale-v2.3-GGUF/L3.3-70B-Euryale-v2.3-Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" From 37527420de67ffdb23057e6735e351d4d72f06e1 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 12 Dec 2024 22:44:54 +0100 Subject: [PATCH 038/849] chore: :arrow_up: Update ggerganov/llama.cpp to `274ec65af6e54039eb95cb44904af5c945dca1fa` (#4372) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1dd5d18e..2c7d0259 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=235f6e14bf0ed0211c51aeff14139038ae1000aa +CPPLLAMA_VERSION?=274ec65af6e54039eb95cb44904af5c945dca1fa # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 044570fa85f99a21769de1f2ee9a56db0aa5ca53 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 13 Dec 2024 09:50:41 +0100 Subject: [PATCH 039/849] chore(model gallery): add l3.3-ms-evayale-70b (#4374) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 11408635..cccf1138 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -62,6 +62,21 @@ - filename: L3.3-70B-Euryale-v2.3-Q4_K_M.gguf sha256: 4e78bb0e65886bfcff89b829f6d38aa6f6846988bb8291857e387e3f60b3217b uri: huggingface://bartowski/L3.3-70B-Euryale-v2.3-GGUF/L3.3-70B-Euryale-v2.3-Q4_K_M.gguf +- !!merge <<: *llama33 + name: "l3.3-ms-evayale-70b" + icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/HFCaVzRpiE05Y46p41qRy.webp + urls: + - https://huggingface.co/Steelskull/L3.3-MS-Evayale-70B + - https://huggingface.co/bartowski/L3.3-MS-Evayale-70B-GGUF + description: | + This model was created as I liked the storytelling of EVA but the prose and details of scenes from EURYALE, my goal is to merge the robust storytelling of both models while attempting to maintain the positives of both models. + overrides: + parameters: + model: L3.3-MS-Evayale-70B-Q4_K_M.gguf + files: + - filename: L3.3-MS-Evayale-70B-Q4_K_M.gguf + sha256: f941d88870fec8343946517a1802d159d23f3971eeea50b6cf12295330bd29cc + uri: huggingface://bartowski/L3.3-MS-Evayale-70B-GGUF/L3.3-MS-Evayale-70B-Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" From 73f1f25b9a4cdd19b107d892df84a7a24f4937f3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 13 Dec 2024 09:51:13 +0100 Subject: [PATCH 040/849] chore(model gallery): add evathene-v1.3 (#4375) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index cccf1138..f3e428b7 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2003,6 +2003,21 @@ - filename: 72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf sha256: 91907f29746625a62885793475956220b81d8a5a34b53686a1acd1d03fd403ea uri: huggingface://bartowski/72B-Qwen2.5-Kunou-v1-GGUF/72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf +- !!merge <<: *qwen25 + icon: https://i.imgur.com/OxX2Usi.png + name: "evathene-v1.3" + urls: + - https://huggingface.co/sophosympatheia/Evathene-v1.3 + - https://huggingface.co/bartowski/Evathene-v1.3-GGUF + description: | + This 72B parameter model is a merge of sophosympatheia/Evathene-v1.1 and sophosympatheia/Evathene-v1.2. See the merge recipe below for details. + overrides: + parameters: + model: Evathene-v1.3-Q4_K_M.gguf + files: + - filename: Evathene-v1.3-Q4_K_M.gguf + sha256: 0f54909b3ddca514994ee16417da8750f56e7bd59581b46ac47625c230e29d1f + uri: huggingface://bartowski/Evathene-v1.3-GGUF/Evathene-v1.3-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From 0429e007463b8ee37b7ac642a417b911c3365b71 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 13 Dec 2024 09:51:26 +0100 Subject: [PATCH 041/849] chore(model gallery): add hermes-3-llama-3.2-3b (#4376) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index f3e428b7..2997230e 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3074,6 +3074,22 @@ - filename: hermes-3-llama-3.1-8b-lorablated.Q4_K_M.gguf sha256: 8cff9d399a0583616fe1f290da6daa091ab5c5493d0e173a8fffb45202d79417 uri: huggingface://mlabonne/Hermes-3-Llama-3.1-8B-lorablated-GGUF/hermes-3-llama-3.1-8b-lorablated.Q4_K_M.gguf +- !!merge <<: *llama32 + name: "hermes-3-llama-3.2-3b" + icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/-kj_KflXsdpcZoTQsvx7W.jpeg + urls: + - https://huggingface.co/NousResearch/Hermes-3-Llama-3.2-3B + - https://huggingface.co/bartowski/Hermes-3-Llama-3.2-3B-GGUF + description: | + Hermes 3 3B is a small but mighty new addition to the Hermes series of LLMs by Nous Research, and is Nous's first fine-tune in this parameter class. + Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board. + overrides: + parameters: + model: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf + files: + - filename: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf + sha256: 2e220a14ba4328fee38cf36c2c068261560f999fadb5725ce5c6d977cb5126b5 + uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf - !!merge <<: *llama31 name: "doctoraifinetune-3.1-8b-i1" urls: From fc4a714992e44b68c81b1270e8723a72de97f06e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 14 Dec 2024 00:30:52 +0100 Subject: [PATCH 042/849] feat(llama.cpp): bump and adapt to upstream changes (#4378) Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- backend/cpp/llama/grpc-server.cpp | 33 +++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 2c7d0259..2645ddd0 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=274ec65af6e54039eb95cb44904af5c945dca1fa +CPPLLAMA_VERSION?=c27ac678dd393af0da9b8acf10266e760c8a0912 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index ea5c4e34..d553d35d 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -2228,6 +2228,35 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama // } // } +const std::vector kv_cache_types = { + GGML_TYPE_F32, + GGML_TYPE_F16, + GGML_TYPE_BF16, + GGML_TYPE_Q8_0, + GGML_TYPE_Q4_0, + GGML_TYPE_Q4_1, + GGML_TYPE_IQ4_NL, + GGML_TYPE_Q5_0, + GGML_TYPE_Q5_1, +}; + +static ggml_type kv_cache_type_from_str(const std::string & s) { + for (const auto & type : kv_cache_types) { + if (ggml_type_name(type) == s) { + return type; + } + } + throw std::runtime_error("Unsupported cache type: " + s); +} + +static std::string get_all_kv_cache_types() { + std::ostringstream msg; + for (const auto & type : kv_cache_types) { + msg << ggml_type_name(type) << (&type == &kv_cache_types.back() ? "" : ", "); + } + return msg.str(); +} + static void params_parse(const backend::ModelOptions* request, common_params & params) { @@ -2242,10 +2271,10 @@ static void params_parse(const backend::ModelOptions* request, // params.model_alias ?? params.model_alias = request->modelfile(); if (!request->cachetypekey().empty()) { - params.cache_type_k = request->cachetypekey(); + params.cache_type_k = kv_cache_type_from_str(request->cachetypekey()); } if (!request->cachetypevalue().empty()) { - params.cache_type_v = request->cachetypevalue(); + params.cache_type_v = kv_cache_type_from_str(request->cachetypevalue()); } params.n_ctx = request->contextsize(); //params.memory_f16 = request->f16memory(); From 5051074845880c36bdaf1a5e3a6b9bb33223b6ae Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 14 Dec 2024 11:26:40 +0100 Subject: [PATCH 043/849] chore(model gallery): add fusechat-gemma-2-9b-instruct (#4379) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 2997230e..90564eae 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -5479,6 +5479,21 @@ - filename: BgGPT-Gemma-2-2.6B-IT-v1.0.Q4_K_M.gguf sha256: 1e92fe80ccad80e97076ee26b002c2280f075dfe2507d534b46a4391a077f319 uri: huggingface://QuantFactory/BgGPT-Gemma-2-2.6B-IT-v1.0-GGUF/BgGPT-Gemma-2-2.6B-IT-v1.0.Q4_K_M.gguf +- !!merge <<: *gemma + name: "fusechat-gemma-2-9b-instruct" + icon: "https://huggingface.co/FuseAI/FuseChat-Gemma-2-9B-Instruct/resolve/main/FuseChat-3.0.png" + urls: + - https://huggingface.co/FuseAI/FuseChat-Gemma-2-9B-Instruct + - https://huggingface.co/bartowski/FuseChat-Gemma-2-9B-Instruct-GGUF + description: | + We present FuseChat-3.0, a series of models crafted to enhance performance by integrating the strengths of multiple source LLMs into more compact target LLMs. To achieve this fusion, we utilized four powerful source LLMs: Gemma-2-27B-It, Mistral-Large-Instruct-2407, Qwen-2.5-72B-Instruct, and Llama-3.1-70B-Instruct. For the target LLMs, we employed three widely-used smaller models—Llama-3.1-8B-Instruct, Gemma-2-9B-It, and Qwen-2.5-7B-Instruct—along with two even more compact models—Llama-3.2-3B-Instruct and Llama-3.2-1B-Instruct. The implicit model fusion process involves a two-stage training pipeline comprising Supervised Fine-Tuning (SFT) to mitigate distribution discrepancies between target and source LLMs, and Direct Preference Optimization (DPO) for learning preferences from multiple source LLMs. The resulting FuseChat-3.0 models demonstrated substantial improvements in tasks related to general conversation, instruction following, mathematics, and coding. Notably, when Llama-3.1-8B-Instruct served as the target LLM, our fusion approach achieved an average improvement of 6.8 points across 14 benchmarks. Moreover, it showed significant improvements of 37.1 and 30.1 points on instruction-following test sets AlpacaEval-2 and Arena-Hard respectively. We have released the FuseChat-3.0 models on Huggingface, stay tuned for the forthcoming dataset and code. + overrides: + parameters: + model: FuseChat-Gemma-2-9B-Instruct-Q4_K_M.gguf + files: + - filename: FuseChat-Gemma-2-9B-Instruct-Q4_K_M.gguf + sha256: f5aef201be68f344bebff3433af87aac6428fd227adfd7e468c8bfbcf9660ece + uri: huggingface://bartowski/FuseChat-Gemma-2-9B-Instruct-GGUF/FuseChat-Gemma-2-9B-Instruct-Q4_K_M.gguf - &llama3 url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png From af33483687affc05cfaea69db2b8846efd63f6bc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 14 Dec 2024 11:27:11 +0100 Subject: [PATCH 044/849] chore(model gallery): add fusechat-qwen-2.5-7b-instruct (#4380) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 90564eae..9b1f5ea6 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2018,6 +2018,21 @@ - filename: Evathene-v1.3-Q4_K_M.gguf sha256: 0f54909b3ddca514994ee16417da8750f56e7bd59581b46ac47625c230e29d1f uri: huggingface://bartowski/Evathene-v1.3-GGUF/Evathene-v1.3-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "fusechat-qwen-2.5-7b-instruct" + icon: https://huggingface.co/FuseAI/FuseChat-Qwen-2.5-7B-Instruct/resolve/main/FuseChat-3.0.png + urls: + -https://huggingface.co/FuseAI/FuseChat-Qwen-2.5-7B-Instruct + - https://huggingface.co/bartowski/FuseChat-Qwen-2.5-7B-Instruct-GGUF + description: | + We present FuseChat-3.0, a series of models crafted to enhance performance by integrating the strengths of multiple source LLMs into more compact target LLMs. To achieve this fusion, we utilized four powerful source LLMs: Gemma-2-27B-It, Mistral-Large-Instruct-2407, Qwen-2.5-72B-Instruct, and Llama-3.1-70B-Instruct. For the target LLMs, we employed three widely-used smaller models—Llama-3.1-8B-Instruct, Gemma-2-9B-It, and Qwen-2.5-7B-Instruct—along with two even more compact models—Llama-3.2-3B-Instruct and Llama-3.2-1B-Instruct. The implicit model fusion process involves a two-stage training pipeline comprising Supervised Fine-Tuning (SFT) to mitigate distribution discrepancies between target and source LLMs, and Direct Preference Optimization (DPO) for learning preferences from multiple source LLMs. The resulting FuseChat-3.0 models demonstrated substantial improvements in tasks related to general conversation, instruction following, mathematics, and coding. Notably, when Llama-3.1-8B-Instruct served as the target LLM, our fusion approach achieved an average improvement of 6.8 points across 14 benchmarks. Moreover, it showed significant improvements of 37.1 and 30.1 points on instruction-following test sets AlpacaEval-2 and Arena-Hard respectively. We have released the FuseChat-3.0 models on Huggingface, stay tuned for the forthcoming dataset and code. + overrides: + parameters: + model: FuseChat-Qwen-2.5-7B-Instruct-Q4_K_M.gguf + files: + - filename: FuseChat-Qwen-2.5-7B-Instruct-Q4_K_M.gguf + sha256: 8cd8c317769f03125ac753c836ac92c5a76ee0b35502811d0e65bcbb8df9d55c + uri: huggingface://bartowski/FuseChat-Qwen-2.5-7B-Instruct-GGUF/FuseChat-Qwen-2.5-7B-Instruct-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From 432c31d90419230e2b80711a4a34a054a978fcca Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 14 Dec 2024 11:27:25 +0100 Subject: [PATCH 045/849] chore(model gallery): add chronos-gold-12b-1.0 (#4381) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 9b1f5ea6..dcf8df58 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -4706,6 +4706,32 @@ - filename: MN-Chunky-Lotus-12B.Q4_K_M.gguf sha256: 363defe0a769fdb715dab75517966a0a80bcdd981a610d4c759099b6c8ff143a uri: huggingface://QuantFactory/MN-Chunky-Lotus-12B-GGUF/MN-Chunky-Lotus-12B.Q4_K_M.gguf +- !!merge <<: *mistral03 + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + name: "chronos-gold-12b-1.0" + icon: https://cdn-uploads.huggingface.co/production/uploads/630417380907b9a115c6aa9f/3hc8zt8fzKdO3qHK1p1mW.webp + urls: + - https://huggingface.co/elinas/Chronos-Gold-12B-1.0 + - https://huggingface.co/mradermacher/Chronos-Gold-12B-1.0-GGUF + description: | + Chronos Gold 12B 1.0 is a very unique model that applies to domain areas such as general chatbot functionatliy, roleplay, and storywriting. The model has been observed to write up to 2250 tokens in a single sequence. The model was trained at a sequence length of 16384 (16k) and will still retain the apparent 128k context length from Mistral-Nemo, though it deteriorates over time like regular Nemo does based on the RULER Test + + As a result, is recommended to keep your sequence length max at 16384, or you will experience performance degredation. + + The base model is mistralai/Mistral-Nemo-Base-2407 which was heavily modified to produce a more coherent model, comparable to much larger models. + + Chronos Gold 12B-1.0 re-creates the uniqueness of the original Chronos with significiantly enhanced prompt adherence (following), coherence, a modern dataset, as well as supporting a majority of "character card" formats in applications like SillyTavern. + + It went through an iterative and objective merge process as my previous models and was further finetuned on a dataset curated for it. + + The specifics of the model will not be disclosed at the time due to dataset ownership. + overrides: + parameters: + model: Chronos-Gold-12B-1.0.Q4_K_M.gguf + files: + - filename: Chronos-Gold-12B-1.0.Q4_K_M.gguf + sha256: d75a6ed28781f0ea6fa6e58c0b25dfecdd160d4cab64aaf511ea156e99a1e1f3 + uri: huggingface://mradermacher/Chronos-Gold-12B-1.0-GGUF/Chronos-Gold-12B-1.0.Q4_K_M.gguf - &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" From 59cbf38b4b52a807097d9b88f9e570706f487f07 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 14 Dec 2024 21:21:27 +0100 Subject: [PATCH 046/849] fix(gallery): correct syntax typo Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index dcf8df58..89569cc4 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2022,7 +2022,7 @@ name: "fusechat-qwen-2.5-7b-instruct" icon: https://huggingface.co/FuseAI/FuseChat-Qwen-2.5-7B-Instruct/resolve/main/FuseChat-3.0.png urls: - -https://huggingface.co/FuseAI/FuseChat-Qwen-2.5-7B-Instruct + - https://huggingface.co/FuseAI/FuseChat-Qwen-2.5-7B-Instruct - https://huggingface.co/bartowski/FuseChat-Qwen-2.5-7B-Instruct-GGUF description: | We present FuseChat-3.0, a series of models crafted to enhance performance by integrating the strengths of multiple source LLMs into more compact target LLMs. To achieve this fusion, we utilized four powerful source LLMs: Gemma-2-27B-It, Mistral-Large-Instruct-2407, Qwen-2.5-72B-Instruct, and Llama-3.1-70B-Instruct. For the target LLMs, we employed three widely-used smaller models—Llama-3.1-8B-Instruct, Gemma-2-9B-It, and Qwen-2.5-7B-Instruct—along with two even more compact models—Llama-3.2-3B-Instruct and Llama-3.2-1B-Instruct. The implicit model fusion process involves a two-stage training pipeline comprising Supervised Fine-Tuning (SFT) to mitigate distribution discrepancies between target and source LLMs, and Direct Preference Optimization (DPO) for learning preferences from multiple source LLMs. The resulting FuseChat-3.0 models demonstrated substantial improvements in tasks related to general conversation, instruction following, mathematics, and coding. Notably, when Llama-3.1-8B-Instruct served as the target LLM, our fusion approach achieved an average improvement of 6.8 points across 14 benchmarks. Moreover, it showed significant improvements of 37.1 and 30.1 points on instruction-following test sets AlpacaEval-2 and Arena-Hard respectively. We have released the FuseChat-3.0 models on Huggingface, stay tuned for the forthcoming dataset and code. From cca911f3e50ff36b0625cd43b6865198a3594a3d Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 15 Dec 2024 09:59:20 +0100 Subject: [PATCH 048/849] chore: :arrow_up: Update ggerganov/llama.cpp to `e52aba537a34d51a65cddec6bc6dafc9031edc63` (#4385) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2645ddd0..255d6071 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=c27ac678dd393af0da9b8acf10266e760c8a0912 +CPPLLAMA_VERSION?=e52aba537a34d51a65cddec6bc6dafc9031edc63 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 8f2be8266700788acc92c9e8dccb7acc45daebfc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 15 Dec 2024 10:07:30 +0100 Subject: [PATCH 049/849] chore(model gallery): add fusechat-llama-3.2-3b-instruct (#4386) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 89569cc4..2df138ff 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -807,6 +807,20 @@ - filename: Llama-SmolTalk-3.2-1B-Instruct.Q4_K_M.gguf sha256: 03d8d05e3821f4caa65defa82baaff658484d4405b66546431528153ceef4d9e uri: huggingface://mradermacher/Llama-SmolTalk-3.2-1B-Instruct-GGUF/Llama-SmolTalk-3.2-1B-Instruct.Q4_K_M.gguf +- !!merge <<: *llama32 + name: "fusechat-llama-3.2-3b-instruct" + urls: + - https://huggingface.co/FuseAI/FuseChat-Llama-3.2-3B-Instruct + - https://huggingface.co/bartowski/FuseChat-Llama-3.2-3B-Instruct-GGUF + description: | + We present FuseChat-3.0, a series of models crafted to enhance performance by integrating the strengths of multiple source LLMs into more compact target LLMs. To achieve this fusion, we utilized four powerful source LLMs: Gemma-2-27B-It, Mistral-Large-Instruct-2407, Qwen-2.5-72B-Instruct, and Llama-3.1-70B-Instruct. For the target LLMs, we employed three widely-used smaller models—Llama-3.1-8B-Instruct, Gemma-2-9B-It, and Qwen-2.5-7B-Instruct—along with two even more compact models—Llama-3.2-3B-Instruct and Llama-3.2-1B-Instruct. The implicit model fusion process involves a two-stage training pipeline comprising Supervised Fine-Tuning (SFT) to mitigate distribution discrepancies between target and source LLMs, and Direct Preference Optimization (DPO) for learning preferences from multiple source LLMs. The resulting FuseChat-3.0 models demonstrated substantial improvements in tasks related to general conversation, instruction following, mathematics, and coding. Notably, when Llama-3.1-8B-Instruct served as the target LLM, our fusion approach achieved an average improvement of 6.8 points across 14 benchmarks. Moreover, it showed significant improvements of 37.1 and 30.1 points on instruction-following test sets AlpacaEval-2 and Arena-Hard respectively. We have released the FuseChat-3.0 models on Huggingface, stay tuned for the forthcoming dataset and code. + overrides: + parameters: + model: FuseChat-Llama-3.2-3B-Instruct-Q4_K_M.gguf + files: + - filename: FuseChat-Llama-3.2-3B-Instruct-Q4_K_M.gguf + sha256: a4f0e9a905b74886b79b72622c06a3219d6812818a564a53c39fc49032d7f842 + uri: huggingface://bartowski/FuseChat-Llama-3.2-3B-Instruct-GGUF/FuseChat-Llama-3.2-3B-Instruct-Q4_K_M.gguf - &qwen25 ## Qwen2.5 name: "qwen2.5-14b-instruct" From 1d6d30137009cb4fb9c6d3971900c1919baea690 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 15 Dec 2024 10:07:42 +0100 Subject: [PATCH 050/849] chore(model gallery): add fusechat-llama-3.1-8b-instruct (#4387) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 2df138ff..b729dfe2 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3886,6 +3886,21 @@ - filename: deepthought-8b-llama-v0.01-alpha-Q4_K_M.gguf sha256: 33195ba7b898ef8b2997d095e8be42adf1d0e1f6e8291cf07e026fc8e45903fd uri: huggingface://bartowski/deepthought-8b-llama-v0.01-alpha-GGUF/deepthought-8b-llama-v0.01-alpha-Q4_K_M.gguf +- !!merge <<: *llama31 + name: "fusechat-llama-3.1-8b-instruct" + icon: https://huggingface.co/FuseAI/FuseChat-Llama-3.1-8B-Instruct/resolve/main/FuseChat-3.0.png + urls: + - https://huggingface.co/bartowski/FuseChat-Llama-3.1-8B-Instruct-GGUF + - https://huggingface.co/bartowski/FuseChat-Llama-3.1-8B-Instruct-GGUF + description: | + We present FuseChat-3.0, a series of models crafted to enhance performance by integrating the strengths of multiple source LLMs into more compact target LLMs. To achieve this fusion, we utilized four powerful source LLMs: Gemma-2-27B-It, Mistral-Large-Instruct-2407, Qwen-2.5-72B-Instruct, and Llama-3.1-70B-Instruct. For the target LLMs, we employed three widely-used smaller models—Llama-3.1-8B-Instruct, Gemma-2-9B-It, and Qwen-2.5-7B-Instruct—along with two even more compact models—Llama-3.2-3B-Instruct and Llama-3.2-1B-Instruct. The implicit model fusion process involves a two-stage training pipeline comprising Supervised Fine-Tuning (SFT) to mitigate distribution discrepancies between target and source LLMs, and Direct Preference Optimization (DPO) for learning preferences from multiple source LLMs. The resulting FuseChat-3.0 models demonstrated substantial improvements in tasks related to general conversation, instruction following, mathematics, and coding. Notably, when Llama-3.1-8B-Instruct served as the target LLM, our fusion approach achieved an average improvement of 6.8 points across 14 benchmarks. Moreover, it showed significant improvements of 37.1 and 30.1 points on instruction-following test sets AlpacaEval-2 and Arena-Hard respectively. We have released the FuseChat-3.0 models on Huggingface, stay tuned for the forthcoming dataset and code. + overrides: + parameters: + model: FuseChat-Llama-3.1-8B-Instruct-Q4_K_M.gguf + files: + - filename: FuseChat-Llama-3.1-8B-Instruct-Q4_K_M.gguf + sha256: fe58c8c9b695e36e6b0ee5e4d81ff71ea0a4f1a11fa7bb16e8d6f1b35a58dff6 + uri: huggingface://bartowski/FuseChat-Llama-3.1-8B-Instruct-GGUF/FuseChat-Llama-3.1-8B-Instruct-Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" From 9429a53db7162e798795b1d73dfdf4e055a8c899 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 15 Dec 2024 10:07:56 +0100 Subject: [PATCH 051/849] chore(model gallery): add neumind-math-7b-instruct (#4388) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index b729dfe2..71a2d56f 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2047,6 +2047,20 @@ - filename: FuseChat-Qwen-2.5-7B-Instruct-Q4_K_M.gguf sha256: 8cd8c317769f03125ac753c836ac92c5a76ee0b35502811d0e65bcbb8df9d55c uri: huggingface://bartowski/FuseChat-Qwen-2.5-7B-Instruct-GGUF/FuseChat-Qwen-2.5-7B-Instruct-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "neumind-math-7b-instruct" + urls: + - https://huggingface.co/prithivMLmods/Neumind-Math-7B-Instruct + - https://huggingface.co/QuantFactory/Neumind-Math-7B-Instruct-GGUF + description: | + The Neumind-Math-7B-Instruct is a fine-tuned model based on Qwen2.5-7B-Instruct, optimized for mathematical reasoning, step-by-step problem-solving, and instruction-based tasks in the mathematics domain. The model is designed for applications requiring structured reasoning, numerical computations, and mathematical proof generation. + overrides: + parameters: + model: Neumind-Math-7B-Instruct.Q4_K_M.gguf + files: + - filename: Neumind-Math-7B-Instruct.Q4_K_M.gguf + sha256: 3250abadeae4234e06dfaf7cf86fe871fe021e6c2dfcb4542c2a4f412d71e28c + uri: huggingface://QuantFactory/Neumind-Math-7B-Instruct-GGUF/Neumind-Math-7B-Instruct.Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From 5d9c530eaa38a5c470f9a766e29e617b401432ce Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 15 Dec 2024 18:43:39 +0100 Subject: [PATCH 052/849] fix(gallery): disable default embeddings Do not always enable embeddings on llama32, but let specific models settings Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 71a2d56f..35febd56 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -458,7 +458,6 @@ urls: - https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF overrides: - embeddings: true parameters: model: llama-3.2-1b-instruct-q4_k_m.gguf files: @@ -9708,6 +9707,10 @@ llama3.2 embeddings model. Using as drop-in replacement for bert-embeddings tags: - embeddings + overrides: + embeddings: true + parameters: + model: llama-3.2-1b-instruct-q4_k_m.gguf ## Stable Diffusion - url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master license: "BSD-3" From 6938618e30fbbc3858ad951cc9afc5a04f3d5415 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 15 Dec 2024 23:01:44 +0100 Subject: [PATCH 053/849] chore: :arrow_up: Update ggerganov/llama.cpp to `a0974156f334acf8af5858d7ede5ab7d7490d415` (#4391) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 255d6071..9310e264 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=e52aba537a34d51a65cddec6bc6dafc9031edc63 +CPPLLAMA_VERSION?=a0974156f334acf8af5858d7ede5ab7d7490d415 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From b40d5d12b720f0ee8b4f6aa391b9dfff4d46e3b1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 16 Dec 2024 09:47:49 +0100 Subject: [PATCH 054/849] chore(model gallery): add naturallm-7b-instruct (#4392) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 35febd56..fbd419b3 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -4774,6 +4774,22 @@ - filename: Chronos-Gold-12B-1.0.Q4_K_M.gguf sha256: d75a6ed28781f0ea6fa6e58c0b25dfecdd160d4cab64aaf511ea156e99a1e1f3 uri: huggingface://mradermacher/Chronos-Gold-12B-1.0-GGUF/Chronos-Gold-12B-1.0.Q4_K_M.gguf +- !!merge <<: *mistral03 + name: "naturallm-7b-instruct" + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + urls: + - https://huggingface.co/qingy2024/NaturalLM-7B-Instruct + - https://huggingface.co/bartowski/NaturalLM-7B-Instruct-GGUF + description: | + This Mistral 7B fine-tune is trained (for 150 steps) to talk like a human, not a "helpful assistant"! + It's also very beta right now. The dataset (qingy2024/Natural-Text-ShareGPT) can definitely be improved. + overrides: + parameters: + model: NaturalLM-7B-Instruct-Q4_K_M.gguf + files: + - filename: NaturalLM-7B-Instruct-Q4_K_M.gguf + sha256: 15b2f34116f690fea35790a9392b8a2190fe25827e370d426e88a2a543f4dcee + uri: huggingface://bartowski/NaturalLM-7B-Instruct-GGUF/NaturalLM-7B-Instruct-Q4_K_M.gguf - &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" From 472d11f8844e8f04d2a8cb6f8b1ed580f0ca3bc1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 16 Dec 2024 09:48:23 +0100 Subject: [PATCH 055/849] chore(model gallery): add marco-o1-uncensored (#4393) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index fbd419b3..9d5696c2 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -4299,6 +4299,20 @@ - filename: Marco-o1.Q4_K_M.gguf sha256: 54dd9554cb54609bf0bf4b367dfba192fc982a2fc6b87a0f56fba5ea82762d0d uri: huggingface://QuantFactory/Marco-o1-GGUF/Marco-o1.Q4_K_M.gguf +- !!merge <<: *qwen2 + name: "marco-o1-uncensored" + urls: + - https://huggingface.co/thirdeyeai/marco-o1-uncensored + - https://huggingface.co/QuantFactory/marco-o1-uncensored-GGUF + description: | + Uncensored version of marco-o1 + overrides: + parameters: + model: marco-o1-uncensored.Q4_K_M.gguf + files: + - filename: marco-o1-uncensored.Q4_K_M.gguf + sha256: ad0440270a7254098f90779744d3e5b34fe49b7baf97c819909ba9c5648cc0d9 + uri: huggingface://QuantFactory/marco-o1-uncensored-GGUF/marco-o1-uncensored.Q4_K_M.gguf - &mistral03 ## START Mistral url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master" From 037e8030bf8d2ee23e3f66072cdd93a884b8965b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 16 Dec 2024 09:48:33 +0100 Subject: [PATCH 056/849] chore(model gallery): add qwen2-7b-multilingual-rp (#4394) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 9d5696c2..99c0e9a3 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2113,6 +2113,20 @@ - filename: Arch-Function-3B.Q4_K_M.gguf sha256: 9945cb8d070498d163e5df90c1987f591d35e4fd2222a6c51bcfff848c4b573b uri: huggingface://mradermacher/Arch-Function-3B-GGUF/Arch-Function-3B.Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "qwen2-7b-multilingual-rp" + urls: + - https://huggingface.co/maywell/Qwen2-7B-Multilingual-RP + - https://huggingface.co/QuantFactory/Qwen2-7B-Multilingual-RP-GGUF + description: | + Multilingual Qwen2-7B model trained on Roleplaying. + overrides: + parameters: + model: Qwen2-7B-Multilingual-RP.Q4_K_M.gguf + files: + - filename: Qwen2-7B-Multilingual-RP.Q4_K_M.gguf + sha256: 31756c58fd135f2deb59b2d9b142f39134dc8d1a6eaa02f388dda7491fc95ccc + uri: huggingface://QuantFactory/Qwen2-7B-Multilingual-RP-GGUF/Qwen2-7B-Multilingual-RP.Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From 7ca0e2d925b48eaedc945ef53f19de38fb43f049 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 16 Dec 2024 10:55:02 +0100 Subject: [PATCH 057/849] fix(python): remove pin to setuptools, pin python version (#4395) fix(setuptools): remove pin Signed-off-by: Ettore Di Giacinto --- backend/python/autogptq/requirements-intel.txt | 2 +- backend/python/bark/requirements-intel.txt | 2 +- backend/python/common/libbackend.sh | 5 ++++- backend/python/coqui/requirements-intel.txt | 2 +- backend/python/diffusers/requirements-intel.txt | 2 +- backend/python/openvoice/requirements.txt | 1 + backend/python/parler-tts/requirements-intel.txt | 1 - backend/python/parler-tts/requirements.txt | 1 + backend/python/rerankers/requirements-intel.txt | 2 +- backend/python/sentencetransformers/requirements-intel.txt | 2 +- backend/python/transformers-musicgen/requirements-intel.txt | 2 +- backend/python/transformers/requirements.txt | 2 +- backend/python/vall-e-x/requirements-intel.txt | 3 +-- backend/python/vall-e-x/requirements.txt | 3 ++- backend/python/vllm/requirements-intel.txt | 2 +- 15 files changed, 18 insertions(+), 14 deletions(-) diff --git a/backend/python/autogptq/requirements-intel.txt b/backend/python/autogptq/requirements-intel.txt index d5e0173e..cec8bff4 100644 --- a/backend/python/autogptq/requirements-intel.txt +++ b/backend/python/autogptq/requirements-intel.txt @@ -2,4 +2,4 @@ intel-extension-for-pytorch torch optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools \ No newline at end of file diff --git a/backend/python/bark/requirements-intel.txt b/backend/python/bark/requirements-intel.txt index c0e4dcaa..1f043bbf 100644 --- a/backend/python/bark/requirements-intel.txt +++ b/backend/python/bark/requirements-intel.txt @@ -3,6 +3,6 @@ intel-extension-for-pytorch torch torchaudio optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 +setuptools transformers accelerate \ No newline at end of file diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh index 934b1fd3..6013cf76 100644 --- a/backend/python/common/libbackend.sh +++ b/backend/python/common/libbackend.sh @@ -17,6 +17,9 @@ # LIMIT_TARGETS="cublas12" # source $(dirname $0)/../common/libbackend.sh # + +PYTHON_VERSION="3.10" + function init() { # Name of the backend (directory name) BACKEND_NAME=${PWD##*/} @@ -88,7 +91,7 @@ function getBuildProfile() { # always result in an activated virtual environment function ensureVenv() { if [ ! -d "${EDIR}/venv" ]; then - uv venv ${EDIR}/venv + uv venv --python ${PYTHON_VERSION} ${EDIR}/venv echo "virtualenv created" fi diff --git a/backend/python/coqui/requirements-intel.txt b/backend/python/coqui/requirements-intel.txt index de3b4ee4..7ed2fb42 100644 --- a/backend/python/coqui/requirements-intel.txt +++ b/backend/python/coqui/requirements-intel.txt @@ -3,7 +3,7 @@ intel-extension-for-pytorch torch torchaudio optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 +setuptools transformers accelerate coqui-tts \ No newline at end of file diff --git a/backend/python/diffusers/requirements-intel.txt b/backend/python/diffusers/requirements-intel.txt index 566278a8..bd6632bf 100644 --- a/backend/python/diffusers/requirements-intel.txt +++ b/backend/python/diffusers/requirements-intel.txt @@ -3,7 +3,7 @@ intel-extension-for-pytorch torch torchvision optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 +setuptools diffusers opencv-python transformers diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt index 6806d3e1..e6a1e5a5 100644 --- a/backend/python/openvoice/requirements.txt +++ b/backend/python/openvoice/requirements.txt @@ -18,3 +18,4 @@ jieba==0.42.1 gradio==3.48.0 langid==1.1.6 llvmlite==0.43.0 +setuptools \ No newline at end of file diff --git a/backend/python/parler-tts/requirements-intel.txt b/backend/python/parler-tts/requirements-intel.txt index c0e4dcaa..bcb8900e 100644 --- a/backend/python/parler-tts/requirements-intel.txt +++ b/backend/python/parler-tts/requirements-intel.txt @@ -3,6 +3,5 @@ intel-extension-for-pytorch torch torchaudio optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 transformers accelerate \ No newline at end of file diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt index 75ea8a59..faf4ea3d 100644 --- a/backend/python/parler-tts/requirements.txt +++ b/backend/python/parler-tts/requirements.txt @@ -1,3 +1,4 @@ grpcio==1.68.1 certifi llvmlite==0.43.0 +setuptools \ No newline at end of file diff --git a/backend/python/rerankers/requirements-intel.txt b/backend/python/rerankers/requirements-intel.txt index e6bb4cc7..a3cc600c 100644 --- a/backend/python/rerankers/requirements-intel.txt +++ b/backend/python/rerankers/requirements-intel.txt @@ -5,4 +5,4 @@ accelerate torch rerankers[transformers] optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt index 56e17446..23e0d5f2 100644 --- a/backend/python/sentencetransformers/requirements-intel.txt +++ b/backend/python/sentencetransformers/requirements-intel.txt @@ -2,7 +2,7 @@ intel-extension-for-pytorch torch optimum[openvino] -setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 +setuptools accelerate sentence-transformers==3.3.1 transformers \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt index 608d6939..bb191163 100644 --- a/backend/python/transformers-musicgen/requirements-intel.txt +++ b/backend/python/transformers-musicgen/requirements-intel.txt @@ -4,4 +4,4 @@ transformers accelerate torch optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools \ No newline at end of file diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt index b556b9f1..d981fd99 100644 --- a/backend/python/transformers/requirements.txt +++ b/backend/python/transformers/requirements.txt @@ -1,4 +1,4 @@ grpcio==1.68.1 protobuf certifi -setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-intel.txt b/backend/python/vall-e-x/requirements-intel.txt index adbabeac..284e7131 100644 --- a/backend/python/vall-e-x/requirements-intel.txt +++ b/backend/python/vall-e-x/requirements-intel.txt @@ -3,5 +3,4 @@ intel-extension-for-pytorch accelerate torch torchaudio -optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +optimum[openvino] \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt index 8e4eabf1..d981fd99 100644 --- a/backend/python/vall-e-x/requirements.txt +++ b/backend/python/vall-e-x/requirements.txt @@ -1,3 +1,4 @@ grpcio==1.68.1 protobuf -certifi \ No newline at end of file +certifi +setuptools \ No newline at end of file diff --git a/backend/python/vllm/requirements-intel.txt b/backend/python/vllm/requirements-intel.txt index 95443368..36326f95 100644 --- a/backend/python/vllm/requirements-intel.txt +++ b/backend/python/vllm/requirements-intel.txt @@ -4,5 +4,5 @@ accelerate torch transformers optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 +setuptools bitsandbytes \ No newline at end of file From 24abf568cbedfd438f48c9b7c5af6479473a6d1f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 17 Dec 2024 00:46:48 +0100 Subject: [PATCH 058/849] chore(tests): stabilize tts test (#4417) chore(tests): stabilize test Signed-off-by: Ettore Di Giacinto --- core/http/app_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/http/app_test.go b/core/http/app_test.go index 34ebacf7..7c57ba21 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -704,7 +704,7 @@ var _ = Describe("API test", func() { Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp)) Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat))) - Expect(resp.Header.Get("Content-Type")).To(Equal("audio/x-wav")) + Expect(resp.Header.Get("Content-Type")).To(Or(Equal("audio/x-wav"), Equal("audio/vnd.wave"))) }) It("installs and is capable to generate images", Label("stablediffusion"), func() { if runtime.GOOS != "linux" { From 708cba0c1bf5b8068a0eb4a18994b2c187136a2f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 17 Dec 2024 00:47:52 +0100 Subject: [PATCH 059/849] chore(llama.cpp): bump, drop penalize_nl (#4418) deps(llama.cpp): bump, drop penalize_nl Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- backend/cpp/llama/grpc-server.cpp | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 9310e264..4226c5d7 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=a0974156f334acf8af5858d7ede5ab7d7490d415 +CPPLLAMA_VERSION?=08ea539df211e46bb4d0dd275e541cb591d5ebc8 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index d553d35d..98dd8fde 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -681,7 +681,6 @@ struct llama_server_context slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat); slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau); slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta); - slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl); slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep); slot->sparams.seed = json_value(data, "seed", default_sparams.seed); slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar); @@ -1213,13 +1212,12 @@ struct llama_server_context {"mirostat", slot.sparams.mirostat}, {"mirostat_tau", slot.sparams.mirostat_tau}, {"mirostat_eta", slot.sparams.mirostat_eta}, - {"penalize_nl", slot.sparams.penalize_nl}, {"stop", slot.params.antiprompt}, {"n_predict", slot.params.n_predict}, {"n_keep", params.n_keep}, {"ignore_eos", slot.sparams.ignore_eos}, {"stream", slot.params.stream}, - // {"logit_bias", slot.sparams.logit_bias}, + // {"logit_bias", slot.sparams.logit_bias}, {"n_probs", slot.sparams.n_probs}, {"min_keep", slot.sparams.min_keep}, {"grammar", slot.sparams.grammar}, @@ -2112,7 +2110,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama // slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat); // slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau); // slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta); - // slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl); // slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep); // slot->params.seed = json_value(data, "seed", default_params.seed); // slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar); @@ -2135,7 +2132,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama data["mirostat"] = predict->mirostat(); data["mirostat_tau"] = predict->mirostattau(); data["mirostat_eta"] = predict->mirostateta(); - data["penalize_nl"] = predict->penalizenl(); data["n_keep"] = predict->nkeep(); data["seed"] = predict->seed(); data["grammar"] = predict->grammar(); @@ -2181,7 +2177,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama // llama.params.sparams.mirostat = predict->mirostat(); // llama.params.sparams.mirostat_tau = predict->mirostattau(); // llama.params.sparams.mirostat_eta = predict->mirostateta(); -// llama.params.sparams.penalize_nl = predict->penalizenl(); // llama.params.n_keep = predict->nkeep(); // llama.params.seed = predict->seed(); // llama.params.sparams.grammar = predict->grammar(); From fdb560b8e511cf4e29694e56be6d395b5b5c8799 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 17 Dec 2024 10:10:37 +0100 Subject: [PATCH 060/849] chore(model gallery): add qwq-lcot-7b-instruct (#4419) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 99c0e9a3..ff4969ca 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2127,6 +2127,20 @@ - filename: Qwen2-7B-Multilingual-RP.Q4_K_M.gguf sha256: 31756c58fd135f2deb59b2d9b142f39134dc8d1a6eaa02f388dda7491fc95ccc uri: huggingface://QuantFactory/Qwen2-7B-Multilingual-RP-GGUF/Qwen2-7B-Multilingual-RP.Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "qwq-lcot-7b-instruct" + urls: + - https://huggingface.co/prithivMLmods/QwQ-LCoT-7B-Instruct + - https://huggingface.co/bartowski/QwQ-LCoT-7B-Instruct-GGUF + description: | + The QwQ-LCoT-7B-Instruct is a fine-tuned language model designed for advanced reasoning and instruction-following tasks. It leverages the Qwen2.5-7B base model and has been fine-tuned on the amphora/QwQ-LongCoT-130K dataset, focusing on chain-of-thought (CoT) reasoning. + overrides: + parameters: + model: QwQ-LCoT-7B-Instruct-Q4_K_M.gguf + files: + - filename: QwQ-LCoT-7B-Instruct-Q4_K_M.gguf + sha256: 1df2e4ff0093a9632687b73969153442776b0ffc1c3c68e7f559472f9cea1945 + uri: huggingface://bartowski/QwQ-LCoT-7B-Instruct-GGUF/QwQ-LCoT-7B-Instruct-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From fc920cc58a6476d79f1f533322592202e0a6845b Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 17 Dec 2024 23:15:14 +0100 Subject: [PATCH 061/849] chore: :arrow_up: Update ggerganov/llama.cpp to `081b29bd2a3d91e7772e3910ce223dd63b8d7d26` (#4421) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4226c5d7..5902c09e 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=08ea539df211e46bb4d0dd275e541cb591d5ebc8 +CPPLLAMA_VERSION?=081b29bd2a3d91e7772e3910ce223dd63b8d7d26 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 2bc4b56a79e375839e78e3f60e41aff9aa3ef542 Mon Sep 17 00:00:00 2001 From: mintyleaf Date: Wed, 18 Dec 2024 12:48:50 +0400 Subject: [PATCH 062/849] feat: stream tokens usage (#4415) * Use pb.Reply instead of []byte with Reply.GetMessage() in llama grpc to get the proper usage data in reply streaming mode at the last [DONE] frame * Fix 'hang' on empty message from the start Seems like that empty message marker trick was unnecessary --------- Co-authored-by: Ettore Di Giacinto --- core/backend/llm.go | 12 ++++++++++-- pkg/grpc/backend.go | 2 +- pkg/grpc/client.go | 6 +++--- pkg/grpc/embed.go | 6 +++--- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/core/backend/llm.go b/core/backend/llm.go index 4491a191..9a4d0d46 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -117,8 +117,12 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im ss := "" var partialRune []byte - err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) { - partialRune = append(partialRune, chars...) + err := inferenceModel.PredictStream(ctx, opts, func(reply *proto.Reply) { + msg := reply.Message + partialRune = append(partialRune, msg...) + + tokenUsage.Prompt = int(reply.PromptTokens) + tokenUsage.Completion = int(reply.Tokens) for len(partialRune) > 0 { r, size := utf8.DecodeRune(partialRune) @@ -132,6 +136,10 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im partialRune = partialRune[size:] } + + if len(msg) == 0 { + tokenCallback("", tokenUsage) + } }) return LLMResponse{ Response: ss, diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go index 21435891..fabc0268 100644 --- a/pkg/grpc/backend.go +++ b/pkg/grpc/backend.go @@ -37,7 +37,7 @@ type Backend interface { Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error) Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error) - PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error + PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error) diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go index 9c8b302e..ca207c3f 100644 --- a/pkg/grpc/client.go +++ b/pkg/grpc/client.go @@ -136,7 +136,7 @@ func (c *Client) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grp return client.LoadModel(ctx, in, opts...) } -func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error { +func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error { if !c.parallel { c.opMutex.Lock() defer c.opMutex.Unlock() @@ -158,7 +158,7 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun } for { - feature, err := stream.Recv() + reply, err := stream.Recv() if err == io.EOF { break } @@ -167,7 +167,7 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun return err } - f(feature.GetMessage()) + f(reply) } return nil diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go index a5828a5f..79648c5a 100644 --- a/pkg/grpc/embed.go +++ b/pkg/grpc/embed.go @@ -35,7 +35,7 @@ func (e *embedBackend) LoadModel(ctx context.Context, in *pb.ModelOptions, opts return e.s.LoadModel(ctx, in) } -func (e *embedBackend) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error { +func (e *embedBackend) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error { bs := &embedBackendServerStream{ ctx: ctx, fn: f, @@ -97,11 +97,11 @@ func (e *embedBackend) GetTokenMetrics(ctx context.Context, in *pb.MetricsReques type embedBackendServerStream struct { ctx context.Context - fn func(s []byte) + fn func(reply *pb.Reply) } func (e *embedBackendServerStream) Send(reply *pb.Reply) error { - e.fn(reply.GetMessage()) + e.fn(reply) return nil } From 0b4bb7a562be6df8e91a2d54551f9385aff9c481 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 18 Dec 2024 09:49:48 +0100 Subject: [PATCH 063/849] chore(model gallery): add llama-openreviewer-8b (#4422) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index ff4969ca..f616b57b 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3942,6 +3942,20 @@ - filename: FuseChat-Llama-3.1-8B-Instruct-Q4_K_M.gguf sha256: fe58c8c9b695e36e6b0ee5e4d81ff71ea0a4f1a11fa7bb16e8d6f1b35a58dff6 uri: huggingface://bartowski/FuseChat-Llama-3.1-8B-Instruct-GGUF/FuseChat-Llama-3.1-8B-Instruct-Q4_K_M.gguf +- !!merge <<: *llama31 + name: "llama-openreviewer-8b" + urls: + - https://huggingface.co/maxidl/Llama-OpenReviewer-8B + - https://huggingface.co/bartowski/Llama-OpenReviewer-8B-GGUF + description: | + Llama-OpenReviewer-8B is a large language model customized to generate high-quality reviews for machine learning and AI-related conference articles. We collected a dataset containing ~79k high-confidence reviews for ~32k individual papers from OpenReview. + overrides: + parameters: + model: Llama-OpenReviewer-8B-Q4_K_M.gguf + files: + - filename: Llama-OpenReviewer-8B-Q4_K_M.gguf + sha256: b48fd7eee01738de4adcb271fc3c7c5b306f8c75b9804794706dbfdf7a6835f0 + uri: huggingface://bartowski/Llama-OpenReviewer-8B-GGUF/Llama-OpenReviewer-8B-Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" From f52c6e3a311d13fc743b151ccac32006cf356166 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 18 Dec 2024 10:12:06 +0100 Subject: [PATCH 064/849] chore(model gallery): add falcon3-1b-instruct (#4423) Signed-off-by: Ettore Di Giacinto --- gallery/falcon3.yaml | 40 ++++++++++++++++++++++++++++++++++++++++ gallery/index.yaml | 25 +++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 gallery/falcon3.yaml diff --git a/gallery/falcon3.yaml b/gallery/falcon3.yaml new file mode 100644 index 00000000..b6593f4b --- /dev/null +++ b/gallery/falcon3.yaml @@ -0,0 +1,40 @@ +--- +name: "falcon3" + +config_file: | + mmap: true + template: + chat_message: | + <|{{ .RoleName }}|> + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} + {{.Content }} + {{ end -}} + {{ if .FunctionCall -}} + {{toJson .FunctionCall}} + {{ end -}} + {{ if eq .RoleName "assistant" }}<|endoftext|>{{ end }} + function: | + <|system|> + You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + For each function call return a json object with function name and arguments + {{.Input }} + <|im_start|>assistant + chat: | + {{.Input }} + <|im_start|>assistant + completion: | + {{.Input}} + context_size: 4096 + f16: true + stopwords: + - '<|endoftext|>' + - '' + - '' diff --git a/gallery/index.yaml b/gallery/index.yaml index f616b57b..5cb02f55 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,4 +1,29 @@ --- +- &falcon3 + name: "falcon3-1b-instruct" + url: "github:mudler/LocalAI/gallery/falcon3.yaml@master" + icon: https://huggingface.co/datasets/tiiuae/documentation-images/resolve/main/general/falco3-logo.png + urls: + - https://huggingface.co/tiiuae/Falcon3-1B-Instruct + - https://huggingface.co/bartowski/Falcon3-1B-Instruct-GGUF + description: | + Falcon3 family of Open Foundation Models is a set of pretrained and instruct LLMs ranging from 1B to 10B parameters. + + This repository contains the Falcon3-1B-Instruct. It achieves strong results on reasoning, language understanding, instruction following, code and mathematics tasks. Falcon3-1B-Instruct supports 4 languages (English, French, Spanish, Portuguese) and a context length of up to 8K. + overrides: + parameters: + model: Falcon3-1B-Instruct-Q4_K_M.gguf + files: + - filename: Falcon3-1B-Instruct-Q4_K_M.gguf + sha256: d351a6506b7d21221f3858b04d98c8b1b7b108b85acde2b13b69d9cb06e2a7e9 + uri: huggingface://bartowski/Falcon3-1B-Instruct-GGUF/Falcon3-1B-Instruct-Q4_K_M.gguf + tags: + - llm + - gguf + - gpu + - cpu + - falcon + license: falcon-llm - &intellect1 name: "intellect-1-instruct" url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" From 5f804aa6e8da229deba3770c20093aa795d140e1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 18 Dec 2024 10:32:31 +0100 Subject: [PATCH 065/849] chore(model gallery): add falcon3-3b-instruct (#4424) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 5cb02f55..a23b57ab 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -24,6 +24,18 @@ - cpu - falcon license: falcon-llm +- !!merge <<: *falcon3 + name: "falcon3-3b-instruct" + urls: + - https://huggingface.co/tiiuae/Falcon3-3B-Instruct + - https://huggingface.co/bartowski/Falcon3-3B-Instruct-GGUF + overrides: + parameters: + model: Falcon3-3B-Instruct-Q4_K_M.gguf + files: + - filename: Falcon3-3B-Instruct-Q4_K_M.gguf + sha256: e6d81653ee28c6944e4f9ab626882faabb69db8019ddcf87f2732d05f3d9158a + uri: huggingface://bartowski/Falcon3-3B-Instruct-GGUF/Falcon3-3B-Instruct-Q4_K_M.gguf - &intellect1 name: "intellect-1-instruct" url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" From 4623728cd74022e625ac49c403b89f7d40c77681 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 18 Dec 2024 10:35:49 +0100 Subject: [PATCH 066/849] chore(model gallery): add qwen2-vl-72b-instruct (#4425) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index a23b57ab..8125f422 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2097,6 +2097,28 @@ - filename: Neumind-Math-7B-Instruct.Q4_K_M.gguf sha256: 3250abadeae4234e06dfaf7cf86fe871fe021e6c2dfcb4542c2a4f412d71e28c uri: huggingface://QuantFactory/Neumind-Math-7B-Instruct-GGUF/Neumind-Math-7B-Instruct.Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "qwen2-vl-72b-instruct" + urls: + - https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct + - https://huggingface.co/bartowski/Qwen2-VL-72B-Instruct-GGUF + description: | + We're excited to unveil Qwen2-VL, the latest iteration of our Qwen-VL model, representing nearly a year of innovation. + Key Enhancements: + SoTA understanding of images of various resolution & ratio: Qwen2-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc. + + Understanding videos of 20min+: Qwen2-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc. + + Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions. + + Multilingual Support: to serve global users, besides English and Chinese, Qwen2-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc. + overrides: + parameters: + model: Qwen2-VL-72B-Instruct-Q4_K_M.gguf + files: + - filename: Qwen2-VL-72B-Instruct-Q4_K_M.gguf + sha256: 0def10ee892a4d4c72ba3807d150de2e1f600edd981d15d402e3d25753cf168d + uri: huggingface://bartowski/Qwen2-VL-72B-Instruct-GGUF/Qwen2-VL-72B-Instruct-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From 8c67f38ef6901cbeb851b8be7223b858ef35578e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 18 Dec 2024 10:36:41 +0100 Subject: [PATCH 067/849] chore(model gallery): add falcon3-10b-instruct (#4426) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 8125f422..5050f50b 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -36,6 +36,18 @@ - filename: Falcon3-3B-Instruct-Q4_K_M.gguf sha256: e6d81653ee28c6944e4f9ab626882faabb69db8019ddcf87f2732d05f3d9158a uri: huggingface://bartowski/Falcon3-3B-Instruct-GGUF/Falcon3-3B-Instruct-Q4_K_M.gguf +- !!merge <<: *falcon3 + name: "falcon3-10b-instruct" + urls: + - https://huggingface.co/tiiuae/Falcon3-10B-Instruct + - https://huggingface.co/bartowski/Falcon3-10B-Instruct-GGUF + overrides: + parameters: + model: Falcon3-10B-Instruct-Q4_K_M.gguf + files: + - filename: Falcon3-10B-Instruct-Q4_K_M.gguf + sha256: 6d54a35d740a616061d6c7d7740d64f4339410e58aaba985aa9e1ea79c7e882a + uri: huggingface://bartowski/Falcon3-10B-Instruct-GGUF/Falcon3-10B-Instruct-Q4_K_M.gguf - &intellect1 name: "intellect-1-instruct" url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" From b656d105562ea924755b54e0c8f875db16780d8d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 19 Dec 2024 09:48:33 +0100 Subject: [PATCH 068/849] chore(model gallery): add llama-song-stream-3b-instruct (#4431) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 5050f50b..4a076bad 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -869,6 +869,20 @@ - filename: FuseChat-Llama-3.2-3B-Instruct-Q4_K_M.gguf sha256: a4f0e9a905b74886b79b72622c06a3219d6812818a564a53c39fc49032d7f842 uri: huggingface://bartowski/FuseChat-Llama-3.2-3B-Instruct-GGUF/FuseChat-Llama-3.2-3B-Instruct-Q4_K_M.gguf +- !!merge <<: *llama32 + name: "llama-song-stream-3b-instruct" + urls: + - https://huggingface.co/prithivMLmods/Llama-Song-Stream-3B-Instruct + - https://huggingface.co/bartowski/Llama-Song-Stream-3B-Instruct-GGUF + description: | + The Llama-Song-Stream-3B-Instruct is a fine-tuned language model specializing in generating music-related text, such as song lyrics, compositions, and musical thoughts. Built upon the meta-llama/Llama-3.2-3B-Instruct base, it has been trained with a custom dataset focused on song lyrics and music compositions to produce context-aware, creative, and stylized music output. + overrides: + parameters: + model: Llama-Song-Stream-3B-Instruct-Q4_K_M.gguf + files: + - filename: Llama-Song-Stream-3B-Instruct-Q4_K_M.gguf + sha256: 1a0f0aef16e5be46be827c430cbe5ce4b2915b2f4a1dd60b98792004d39b9f52 + uri: huggingface://bartowski/Llama-Song-Stream-3B-Instruct-GGUF/Llama-Song-Stream-3B-Instruct-Q4_K_M.gguf - &qwen25 ## Qwen2.5 name: "qwen2.5-14b-instruct" From 3d3bd2d10fe8ebdaa805945cf486e2250af60b0f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 19 Dec 2024 09:53:49 +0100 Subject: [PATCH 069/849] chore: :arrow_up: Update ggerganov/llama.cpp to `0bf2d10c5514ff61b99897a4a5054f846e384e1e` (#4429) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5902c09e..f44d2abb 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=081b29bd2a3d91e7772e3910ce223dd63b8d7d26 +CPPLLAMA_VERSION?=0bf2d10c5514ff61b99897a4a5054f846e384e1e # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From d12660a2864dd0d05e4129c93aa66dbb30837ff7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 19 Dec 2024 09:56:19 +0100 Subject: [PATCH 070/849] chore(model gallery): add llama-chat-summary-3.2-3b (#4432) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 4a076bad..92a2f8a6 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -883,6 +883,20 @@ - filename: Llama-Song-Stream-3B-Instruct-Q4_K_M.gguf sha256: 1a0f0aef16e5be46be827c430cbe5ce4b2915b2f4a1dd60b98792004d39b9f52 uri: huggingface://bartowski/Llama-Song-Stream-3B-Instruct-GGUF/Llama-Song-Stream-3B-Instruct-Q4_K_M.gguf +- !!merge <<: *llama32 + name: "llama-chat-summary-3.2-3b" + urls: + - https://huggingface.co/prithivMLmods/Llama-Chat-Summary-3.2-3B + - https://huggingface.co/bartowski/Llama-Chat-Summary-3.2-3B-GGUF + description: | + Llama-Chat-Summary-3.2-3B is a fine-tuned model designed for generating context-aware summaries of long conversational or text-based inputs. Built on the meta-llama/Llama-3.2-3B-Instruct foundation, this model is optimized to process structured and unstructured conversational data for summarization tasks. + overrides: + parameters: + model: Llama-Chat-Summary-3.2-3B-Q4_K_M.gguf + files: + - filename: Llama-Chat-Summary-3.2-3B-Q4_K_M.gguf + sha256: ed1be20d2374aa6db9940923f41fa229bd7ebe13d41b1ff1ff18a6f87e99df79 + uri: huggingface://bartowski/Llama-Chat-Summary-3.2-3B-GGUF/Llama-Chat-Summary-3.2-3B-Q4_K_M.gguf - &qwen25 ## Qwen2.5 name: "qwen2.5-14b-instruct" From 9ac62b589f1e5799c2f32284630f2df17763159c Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:15:30 +0100 Subject: [PATCH 071/849] chore: :arrow_up: Update ggerganov/llama.cpp to `cd920d0ac38ec243605a5a57c50941140a193f9e` (#4433) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f44d2abb..eca69917 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=0bf2d10c5514ff61b99897a4a5054f846e384e1e +CPPLLAMA_VERSION?=cd920d0ac38ec243605a5a57c50941140a193f9e # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From ae9855a39ea8cc44349dbbb05239e8976593796a Mon Sep 17 00:00:00 2001 From: jtwolfe Date: Fri, 20 Dec 2024 00:19:31 +1000 Subject: [PATCH 072/849] chore(docs): patch p2p detail in env and docs (#4434) * Update distributed_inferencing.md Signed-off-by: jtwolfe * Update .env Signed-off-by: jtwolfe * Update distributed_inferencing.md whoops Signed-off-by: jtwolfe --------- Signed-off-by: jtwolfe Co-authored-by: Ettore Di Giacinto --- .env | 9 +++++++++ docs/content/docs/features/distributed_inferencing.md | 3 +++ 2 files changed, 12 insertions(+) diff --git a/.env b/.env index 9e5dbd79..e92f7f3b 100644 --- a/.env +++ b/.env @@ -82,6 +82,15 @@ # Enable to allow p2p mode # LOCALAI_P2P=true +# Enable to use federated mode +# LOCALAI_FEDERATED=true + +# Enable to start federation server +# FEDERATED_SERVER=true + +# Define to use federation token +# TOKEN="" + ### Watchdog settings ### # Enables watchdog to kill backends that are inactive for too much time diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md index 254de092..71d29f39 100644 --- a/docs/content/docs/features/distributed_inferencing.md +++ b/docs/content/docs/features/distributed_inferencing.md @@ -129,6 +129,9 @@ There are options that can be tweaked or parameters that can be set using enviro | Environment Variable | Description | |----------------------|-------------| +| **LOCALAI_P2P** | Set to "true" to enable p2p | +| **LOCALAI_FEDERATED** | Set to "true" to enable federated mode | +| **FEDERATED_SERVER** | Set to "true" to enable federated server | | **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) | | **LOCALAI_P2P_ENABLE_LIMITS** | Set to "true" to enable connection limits and resources management (useful when running with poor connectivity or want to limit resources consumption) | | **LOCALAI_P2P_LISTEN_MADDRS** | Set to comma separated list of multiaddresses to override default libp2p 0.0.0.0 multiaddresses | From b325807c60c91771ba499f127d004b2ebd8770d0 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 19 Dec 2024 15:39:32 +0100 Subject: [PATCH 073/849] fix(intel): pin torch and intel-extensions (#4435) * fix(intel): pin torch version Signed-off-by: Ettore Di Giacinto * fix(intel): pin intel packages version Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- backend/python/autogptq/requirements-intel.txt | 5 +++-- backend/python/bark/requirements-intel.txt | 7 ++++--- backend/python/common/template/requirements-intel.txt | 5 +++-- backend/python/coqui/requirements-intel.txt | 7 ++++--- backend/python/diffusers/requirements-intel.txt | 7 ++++--- backend/python/openvoice/requirements-intel.txt | 5 +++-- backend/python/parler-tts/requirements-intel.txt | 7 ++++--- backend/python/rerankers/requirements-intel.txt | 5 +++-- .../python/sentencetransformers/requirements-intel.txt | 5 +++-- .../python/transformers-musicgen/requirements-intel.txt | 5 +++-- backend/python/transformers/requirements-intel.txt | 5 +++-- backend/python/vall-e-x/requirements-intel.txt | 9 +++++---- backend/python/vllm/requirements-intel.txt | 7 ++++--- 13 files changed, 46 insertions(+), 33 deletions(-) diff --git a/backend/python/autogptq/requirements-intel.txt b/backend/python/autogptq/requirements-intel.txt index cec8bff4..07b502eb 100644 --- a/backend/python/autogptq/requirements-intel.txt +++ b/backend/python/autogptq/requirements-intel.txt @@ -1,5 +1,6 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu optimum[openvino] setuptools \ No newline at end of file diff --git a/backend/python/bark/requirements-intel.txt b/backend/python/bark/requirements-intel.txt index 1f043bbf..f24bd166 100644 --- a/backend/python/bark/requirements-intel.txt +++ b/backend/python/bark/requirements-intel.txt @@ -1,7 +1,8 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch -torchaudio +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +torchaudio==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu optimum[openvino] setuptools transformers diff --git a/backend/python/common/template/requirements-intel.txt b/backend/python/common/template/requirements-intel.txt index 6dc25a10..b5318a13 100644 --- a/backend/python/common/template/requirements-intel.txt +++ b/backend/python/common/template/requirements-intel.txt @@ -1,4 +1,5 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu optimum[openvino] \ No newline at end of file diff --git a/backend/python/coqui/requirements-intel.txt b/backend/python/coqui/requirements-intel.txt index 7ed2fb42..202dd4ad 100644 --- a/backend/python/coqui/requirements-intel.txt +++ b/backend/python/coqui/requirements-intel.txt @@ -1,7 +1,8 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch -torchaudio +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +torchaudio==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu optimum[openvino] setuptools transformers diff --git a/backend/python/diffusers/requirements-intel.txt b/backend/python/diffusers/requirements-intel.txt index bd6632bf..eb7448b0 100644 --- a/backend/python/diffusers/requirements-intel.txt +++ b/backend/python/diffusers/requirements-intel.txt @@ -1,7 +1,8 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch -torchvision +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +torchvision==0.18.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu optimum[openvino] setuptools diffusers diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt index 7908a889..e3f75e71 100644 --- a/backend/python/openvoice/requirements-intel.txt +++ b/backend/python/openvoice/requirements-intel.txt @@ -1,6 +1,7 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu optimum[openvino] grpcio==1.68.1 protobuf diff --git a/backend/python/parler-tts/requirements-intel.txt b/backend/python/parler-tts/requirements-intel.txt index bcb8900e..f6814bd9 100644 --- a/backend/python/parler-tts/requirements-intel.txt +++ b/backend/python/parler-tts/requirements-intel.txt @@ -1,7 +1,8 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch -torchaudio +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +torchaudio==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu optimum[openvino] transformers accelerate \ No newline at end of file diff --git a/backend/python/rerankers/requirements-intel.txt b/backend/python/rerankers/requirements-intel.txt index a3cc600c..c071e8fb 100644 --- a/backend/python/rerankers/requirements-intel.txt +++ b/backend/python/rerankers/requirements-intel.txt @@ -1,8 +1,9 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch +intel-extension-for-pytorch==2.3.110+xpu transformers accelerate -torch +torch==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu rerankers[transformers] optimum[openvino] setuptools \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt index 23e0d5f2..e9b72aab 100644 --- a/backend/python/sentencetransformers/requirements-intel.txt +++ b/backend/python/sentencetransformers/requirements-intel.txt @@ -1,6 +1,7 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu optimum[openvino] setuptools accelerate diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt index bb191163..ac2feb42 100644 --- a/backend/python/transformers-musicgen/requirements-intel.txt +++ b/backend/python/transformers-musicgen/requirements-intel.txt @@ -1,7 +1,8 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch +intel-extension-for-pytorch==2.3.110+xpu transformers +oneccl_bind_pt==2.3.100+xpu accelerate -torch +torch==2.3.1+cxx11.abi optimum[openvino] setuptools \ No newline at end of file diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt index 5d9efb71..dd683cd9 100644 --- a/backend/python/transformers/requirements-intel.txt +++ b/backend/python/transformers/requirements-intel.txt @@ -1,6 +1,7 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu optimum[openvino] intel-extension-for-transformers bitsandbytes \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-intel.txt b/backend/python/vall-e-x/requirements-intel.txt index 284e7131..efcf885a 100644 --- a/backend/python/vall-e-x/requirements-intel.txt +++ b/backend/python/vall-e-x/requirements-intel.txt @@ -1,6 +1,7 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch +intel-extension-for-pytorch==2.3.110+xpu accelerate -torch -torchaudio -optimum[openvino] \ No newline at end of file +torch==2.3.1+cxx11.abi +torchaudio==2.3.1+cxx11.abi +optimum[openvino] +oneccl_bind_pt==2.3.100+xpu \ No newline at end of file diff --git a/backend/python/vllm/requirements-intel.txt b/backend/python/vllm/requirements-intel.txt index 36326f95..8955165a 100644 --- a/backend/python/vllm/requirements-intel.txt +++ b/backend/python/vllm/requirements-intel.txt @@ -1,8 +1,9 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch +intel-extension-for-pytorch==2.3.110+xpu accelerate -torch +torch==2.3.1+cxx11.abi transformers optimum[openvino] setuptools -bitsandbytes \ No newline at end of file +bitsandbytes +oneccl_bind_pt==2.3.100+xpu \ No newline at end of file From 3be9a08fc9b6485d934e3840ecfcbba2e545410a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 19 Dec 2024 18:24:33 +0100 Subject: [PATCH 074/849] fix(deps): pin openvoice pytorch/torchaudio (#4436) Signed-off-by: Ettore Di Giacinto --- backend/python/openvoice/requirements-cpu.txt | 7 ++++++- backend/python/openvoice/requirements-cublas11.txt | 7 ++++++- backend/python/openvoice/requirements-cublas12.txt | 7 ++++++- backend/python/openvoice/requirements-hipblas.txt | 7 ++++++- backend/python/openvoice/requirements-intel.txt | 1 + backend/python/openvoice/requirements.txt | 5 ----- 6 files changed, 25 insertions(+), 9 deletions(-) diff --git a/backend/python/openvoice/requirements-cpu.txt b/backend/python/openvoice/requirements-cpu.txt index c5368563..c47e22b5 100644 --- a/backend/python/openvoice/requirements-cpu.txt +++ b/backend/python/openvoice/requirements-cpu.txt @@ -1,3 +1,8 @@ torch==2.4.1 git+https://github.com/myshell-ai/MeloTTS.git -git+https://github.com/myshell-ai/OpenVoice.git \ No newline at end of file +git+https://github.com/myshell-ai/OpenVoice.git +whisper-timestamped +numpy==1.22.0 +pydub==0.25.1 +wavmark==0.0.3 +eng_to_ipa==0.0.2 \ No newline at end of file diff --git a/backend/python/openvoice/requirements-cublas11.txt b/backend/python/openvoice/requirements-cublas11.txt index 3828e652..02b8b050 100644 --- a/backend/python/openvoice/requirements-cublas11.txt +++ b/backend/python/openvoice/requirements-cublas11.txt @@ -1,4 +1,9 @@ --extra-index-url https://download.pytorch.org/whl/cu118 torch==2.4.1+cu118 git+https://github.com/myshell-ai/MeloTTS.git -git+https://github.com/myshell-ai/OpenVoice.git \ No newline at end of file +git+https://github.com/myshell-ai/OpenVoice.git +whisper-timestamped +numpy==1.22.0 +pydub==0.25.1 +wavmark==0.0.3 +eng_to_ipa==0.0.2 \ No newline at end of file diff --git a/backend/python/openvoice/requirements-cublas12.txt b/backend/python/openvoice/requirements-cublas12.txt index c5368563..c47e22b5 100644 --- a/backend/python/openvoice/requirements-cublas12.txt +++ b/backend/python/openvoice/requirements-cublas12.txt @@ -1,3 +1,8 @@ torch==2.4.1 git+https://github.com/myshell-ai/MeloTTS.git -git+https://github.com/myshell-ai/OpenVoice.git \ No newline at end of file +git+https://github.com/myshell-ai/OpenVoice.git +whisper-timestamped +numpy==1.22.0 +pydub==0.25.1 +wavmark==0.0.3 +eng_to_ipa==0.0.2 \ No newline at end of file diff --git a/backend/python/openvoice/requirements-hipblas.txt b/backend/python/openvoice/requirements-hipblas.txt index 453ce542..a42d98da 100644 --- a/backend/python/openvoice/requirements-hipblas.txt +++ b/backend/python/openvoice/requirements-hipblas.txt @@ -1,4 +1,9 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 torch==2.4.1+rocm6.0 git+https://github.com/myshell-ai/MeloTTS.git -git+https://github.com/myshell-ai/OpenVoice.git \ No newline at end of file +git+https://github.com/myshell-ai/OpenVoice.git +whisper-timestamped +numpy==1.22.0 +pydub==0.25.1 +wavmark==0.0.3 +eng_to_ipa==0.0.2 \ No newline at end of file diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt index e3f75e71..2b9cce82 100644 --- a/backend/python/openvoice/requirements-intel.txt +++ b/backend/python/openvoice/requirements-intel.txt @@ -1,6 +1,7 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ intel-extension-for-pytorch==2.3.110+xpu torch==2.3.1+cxx11.abi +torchaudio==2.3.1+cxx11.abi oneccl_bind_pt==2.3.100+xpu optimum[openvino] grpcio==1.68.1 diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt index e6a1e5a5..98be8590 100644 --- a/backend/python/openvoice/requirements.txt +++ b/backend/python/openvoice/requirements.txt @@ -2,13 +2,8 @@ grpcio==1.68.1 protobuf librosa faster-whisper -pydub==0.25.1 -wavmark==0.0.3 -numpy==1.22.0 -eng_to_ipa==0.0.2 inflect unidecode -whisper-timestamped openai python-dotenv pypinyin From f2f387e1dd432fd6216341bd4bd99bb6a00120f9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 19 Dec 2024 21:30:43 +0100 Subject: [PATCH 075/849] fix(openvoice): do not pin numpy (#4438) Signed-off-by: Ettore Di Giacinto --- backend/python/openvoice/requirements-cpu.txt | 2 +- backend/python/openvoice/requirements-cublas11.txt | 2 +- backend/python/openvoice/requirements-cublas12.txt | 2 +- backend/python/openvoice/requirements-intel.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/python/openvoice/requirements-cpu.txt b/backend/python/openvoice/requirements-cpu.txt index c47e22b5..ad679f24 100644 --- a/backend/python/openvoice/requirements-cpu.txt +++ b/backend/python/openvoice/requirements-cpu.txt @@ -2,7 +2,7 @@ torch==2.4.1 git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/OpenVoice.git whisper-timestamped -numpy==1.22.0 +numpy pydub==0.25.1 wavmark==0.0.3 eng_to_ipa==0.0.2 \ No newline at end of file diff --git a/backend/python/openvoice/requirements-cublas11.txt b/backend/python/openvoice/requirements-cublas11.txt index 02b8b050..4edd72ba 100644 --- a/backend/python/openvoice/requirements-cublas11.txt +++ b/backend/python/openvoice/requirements-cublas11.txt @@ -3,7 +3,7 @@ torch==2.4.1+cu118 git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/OpenVoice.git whisper-timestamped -numpy==1.22.0 +numpy pydub==0.25.1 wavmark==0.0.3 eng_to_ipa==0.0.2 \ No newline at end of file diff --git a/backend/python/openvoice/requirements-cublas12.txt b/backend/python/openvoice/requirements-cublas12.txt index c47e22b5..ad679f24 100644 --- a/backend/python/openvoice/requirements-cublas12.txt +++ b/backend/python/openvoice/requirements-cublas12.txt @@ -2,7 +2,7 @@ torch==2.4.1 git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/OpenVoice.git whisper-timestamped -numpy==1.22.0 +numpy pydub==0.25.1 wavmark==0.0.3 eng_to_ipa==0.0.2 \ No newline at end of file diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt index 2b9cce82..e8d07008 100644 --- a/backend/python/openvoice/requirements-intel.txt +++ b/backend/python/openvoice/requirements-intel.txt @@ -10,7 +10,7 @@ librosa==0.9.1 faster-whisper==0.9.0 pydub==0.25.1 wavmark==0.0.3 -numpy==1.22.0 +numpy eng_to_ipa==0.0.2 inflect==7.0.0 unidecode==1.3.7 From 61e486dbf591b9e0dd6a294a0a134de543da9364 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 20 Dec 2024 00:03:47 +0100 Subject: [PATCH 076/849] chore: :arrow_up: Update ggerganov/llama.cpp to `d408bb9268a988c5a60a5746d3a6430386e7604d` (#4437) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index eca69917..a054c575 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=cd920d0ac38ec243605a5a57c50941140a193f9e +CPPLLAMA_VERSION?=d408bb9268a988c5a60a5746d3a6430386e7604d # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 8a08e9ec67d5cfe47ff684f84c618e862e9d1fdd Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 20 Dec 2024 10:34:23 +0100 Subject: [PATCH 077/849] fix(openvoice): pin numpy before installing torch (#4439) Signed-off-by: Ettore Di Giacinto --- backend/python/openvoice/requirements-cpu.txt | 1 - backend/python/openvoice/requirements-cublas11.txt | 1 - backend/python/openvoice/requirements-cublas12.txt | 1 - backend/python/openvoice/requirements-hipblas.txt | 1 - backend/python/openvoice/requirements-intel.txt | 1 - backend/python/openvoice/requirements.txt | 1 + 6 files changed, 1 insertion(+), 5 deletions(-) diff --git a/backend/python/openvoice/requirements-cpu.txt b/backend/python/openvoice/requirements-cpu.txt index ad679f24..dd2eb221 100644 --- a/backend/python/openvoice/requirements-cpu.txt +++ b/backend/python/openvoice/requirements-cpu.txt @@ -2,7 +2,6 @@ torch==2.4.1 git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/OpenVoice.git whisper-timestamped -numpy pydub==0.25.1 wavmark==0.0.3 eng_to_ipa==0.0.2 \ No newline at end of file diff --git a/backend/python/openvoice/requirements-cublas11.txt b/backend/python/openvoice/requirements-cublas11.txt index 4edd72ba..84ecc344 100644 --- a/backend/python/openvoice/requirements-cublas11.txt +++ b/backend/python/openvoice/requirements-cublas11.txt @@ -3,7 +3,6 @@ torch==2.4.1+cu118 git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/OpenVoice.git whisper-timestamped -numpy pydub==0.25.1 wavmark==0.0.3 eng_to_ipa==0.0.2 \ No newline at end of file diff --git a/backend/python/openvoice/requirements-cublas12.txt b/backend/python/openvoice/requirements-cublas12.txt index ad679f24..dd2eb221 100644 --- a/backend/python/openvoice/requirements-cublas12.txt +++ b/backend/python/openvoice/requirements-cublas12.txt @@ -2,7 +2,6 @@ torch==2.4.1 git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/OpenVoice.git whisper-timestamped -numpy pydub==0.25.1 wavmark==0.0.3 eng_to_ipa==0.0.2 \ No newline at end of file diff --git a/backend/python/openvoice/requirements-hipblas.txt b/backend/python/openvoice/requirements-hipblas.txt index a42d98da..4c2d6649 100644 --- a/backend/python/openvoice/requirements-hipblas.txt +++ b/backend/python/openvoice/requirements-hipblas.txt @@ -3,7 +3,6 @@ torch==2.4.1+rocm6.0 git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/OpenVoice.git whisper-timestamped -numpy==1.22.0 pydub==0.25.1 wavmark==0.0.3 eng_to_ipa==0.0.2 \ No newline at end of file diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt index e8d07008..43fad1ad 100644 --- a/backend/python/openvoice/requirements-intel.txt +++ b/backend/python/openvoice/requirements-intel.txt @@ -10,7 +10,6 @@ librosa==0.9.1 faster-whisper==0.9.0 pydub==0.25.1 wavmark==0.0.3 -numpy eng_to_ipa==0.0.2 inflect==7.0.0 unidecode==1.3.7 diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt index 98be8590..cee412d5 100644 --- a/backend/python/openvoice/requirements.txt +++ b/backend/python/openvoice/requirements.txt @@ -8,6 +8,7 @@ openai python-dotenv pypinyin cn2an==0.5.22 +numpy==1.22.0 networkx==2.8.8 jieba==0.42.1 gradio==3.48.0 From c4bbecc4d6922e7942878143021d945af1080e7a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 20 Dec 2024 16:08:23 +0100 Subject: [PATCH 078/849] chore(model gallery): add tq2.5-14b-aletheia-v1 (#4440) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 92a2f8a6..e24b6543 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2159,6 +2159,27 @@ - filename: Qwen2-VL-72B-Instruct-Q4_K_M.gguf sha256: 0def10ee892a4d4c72ba3807d150de2e1f600edd981d15d402e3d25753cf168d uri: huggingface://bartowski/Qwen2-VL-72B-Instruct-GGUF/Qwen2-VL-72B-Instruct-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "tq2.5-14b-aletheia-v1" + icon: https://huggingface.co/allura-org/TQ2.5-14B-Aletheia-v1/resolve/main/aletheia.png + urls: + - https://huggingface.co/allura-org/TQ2.5-14B-Aletheia-v1 + - https://huggingface.co/bartowski/TQ2.5-14B-Aletheia-v1-GGUF + description: | + RP/Story hybrid model, merge of Sugarquill and Neon. As with Gemma version, I wanted to preserve Sugarquill's creative spark, while making the model more steerable for RP. It proved to be more difficult this time, but I quite like the result regardless, even if the model is still somewhat temperamental. + + Should work for both RP and storywriting, either on raw completion or with back-and-forth cowriting in chat mode. Seems to be quite sensitive to low depth instructions and samplers. + + Thanks to Toasty and Fizz for testing and giving feedback + + Model was created by Auri. + overrides: + parameters: + model: TQ2.5-14B-Aletheia-v1-Q4_K_M.gguf + files: + - filename: TQ2.5-14B-Aletheia-v1-Q4_K_M.gguf + sha256: 8739a9575520f8460e83905f3e085883dd71ef2c9fa40d36d4e0a3fff003440c + uri: huggingface://bartowski/TQ2.5-14B-Aletheia-v1-GGUF/TQ2.5-14B-Aletheia-v1-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From badbd212f7ab069e84f493c2b79fa3a00b4c71a5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 20 Dec 2024 16:11:16 +0100 Subject: [PATCH 079/849] chore(model gallery): add tq2.5-14b-neon-v1 (#4441) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index e24b6543..7d69878b 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2180,6 +2180,21 @@ - filename: TQ2.5-14B-Aletheia-v1-Q4_K_M.gguf sha256: 8739a9575520f8460e83905f3e085883dd71ef2c9fa40d36d4e0a3fff003440c uri: huggingface://bartowski/TQ2.5-14B-Aletheia-v1-GGUF/TQ2.5-14B-Aletheia-v1-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "tq2.5-14b-neon-v1" + icon: https://huggingface.co/allura-org/TQ2.5-14B-Neon-v1/resolve/main/neon.png + urls: + - https://huggingface.co/allura-org/TQ2.5-14B-Neon-v1 + - https://huggingface.co/bartowski/TQ2.5-14B-Neon-v1-GGUF + description: | + RP finetune of Supernova-Medius. Turned out surprisingly nice on it's own, I honestly made it only as a merge fuel, but it impressed me and Prodeus enough to release it separately (history repeats I guess, Sugarquill also started out this way). Quite interesting prose, definitely quite distinct from Supernova or EVA for that matter. Instruction following is decent as well. Not really much to say about this one, just a decent RP model, tbh. Euryale-inspired I guess. + overrides: + parameters: + model: TQ2.5-14B-Neon-v1-Q4_K_M.gguf + files: + - filename: TQ2.5-14B-Neon-v1-Q4_K_M.gguf + sha256: cefc7409b21e03e4fcd64940e30f6a0c17c5a4a89e0ba0811f1b9720825d2309 + uri: huggingface://bartowski/TQ2.5-14B-Neon-v1-GGUF/TQ2.5-14B-Neon-v1-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From 801a87c3a660c867b38e5f303b4122a7f6338477 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 20 Dec 2024 22:48:03 +0100 Subject: [PATCH 080/849] chore: :arrow_up: Update ggerganov/llama.cpp to `eb5c3dc64bd967f2e23c87d9dec195f45468de60` (#4442) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a054c575..79e7b6a5 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=d408bb9268a988c5a60a5746d3a6430386e7604d +CPPLLAMA_VERSION?=eb5c3dc64bd967f2e23c87d9dec195f45468de60 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 5b5fb9c22a0a0e68abdc10dad2489043432a3209 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 21 Dec 2024 10:39:58 +0100 Subject: [PATCH 081/849] chore(model gallery): add orca_mini_v8_1_70b (#4444) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 7d69878b..6f23a9db 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -4091,6 +4091,21 @@ - filename: Llama-OpenReviewer-8B-Q4_K_M.gguf sha256: b48fd7eee01738de4adcb271fc3c7c5b306f8c75b9804794706dbfdf7a6835f0 uri: huggingface://bartowski/Llama-OpenReviewer-8B-GGUF/Llama-OpenReviewer-8B-Q4_K_M.gguf +- !!merge <<: *llama31 + name: "orca_mini_v8_1_70b" + icon: https://huggingface.co/pankajmathur/orca_mini_v5_8b/resolve/main/orca_minis_small.jpeg + urls: + - https://huggingface.co/pankajmathur/orca_mini_v8_1_70b + - https://huggingface.co/bartowski/orca_mini_v8_1_70b-GGUF + description: | + Orca_Mini_v8_1_Llama-3.3-70B-Instruct is trained with various SFT Datasets on Llama-3.3-70B-Instruct + overrides: + parameters: + model: orca_mini_v8_1_70b-Q4_K_M.gguf + files: + - filename: orca_mini_v8_1_70b-Q4_K_M.gguf + sha256: 97627730b028d4d7a349ae0b8e219207163ec425e4e1c057e445b2a66b61fdfa + uri: huggingface://bartowski/orca_mini_v8_1_70b-GGUF/orca_mini_v8_1_70b-Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" From a429ec1b3f3f8af521ac906bff2a6cebbe5781da Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 21 Dec 2024 22:44:44 +0100 Subject: [PATCH 082/849] chore: :arrow_up: Update ggerganov/llama.cpp to `5cd85b5e008de2ec398d6596e240187d627561e3` (#4445) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 79e7b6a5..5fdc39a8 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=eb5c3dc64bd967f2e23c87d9dec195f45468de60 +CPPLLAMA_VERSION?=5cd85b5e008de2ec398d6596e240187d627561e3 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 396fb88e332a2f283a61a171a9fb667e81088b27 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 22 Dec 2024 08:33:35 +0100 Subject: [PATCH 083/849] chore(model gallery): add anubis-70b-v1 (#4446) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 6f23a9db..867c9ad5 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -126,6 +126,21 @@ - filename: L3.3-MS-Evayale-70B-Q4_K_M.gguf sha256: f941d88870fec8343946517a1802d159d23f3971eeea50b6cf12295330bd29cc uri: huggingface://bartowski/L3.3-MS-Evayale-70B-GGUF/L3.3-MS-Evayale-70B-Q4_K_M.gguf +- !!merge <<: *llama33 + name: "anubis-70b-v1" + icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/qQbZvnrWYvH8dMZORLBJn.webp + urls: + - https://huggingface.co/TheDrummer/Anubis-70B-v1 + - https://huggingface.co/bartowski/Anubis-70B-v1-GGUF + description: | + It's a very balanced model between the L3.3 tunes. It's very creative, able to come up with new and interesting scenarios on your own that will thoroughly surprise you in ways that remind me of a 123B model. It has some of the most natural sounding dialogue and prose can come out of any model I've tried with the right swipe, in a way that truly brings your characters and RP to life that makes you feel like you're talking to a human writer instead of an AI - a quality that reminds me of Character AI in its prime. This model loves a great prompt and thrives off instructions. + overrides: + parameters: + model: Anubis-70B-v1-Q4_K_M.gguf + files: + - filename: Anubis-70B-v1-Q4_K_M.gguf + sha256: 9135f7090c675726469bd3a108cfbdddaa18638bad8e513928410de4b8bfd4d4 + uri: huggingface://bartowski/Anubis-70B-v1-GGUF/Anubis-70B-v1-Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" From d45477b0038e4bdb72972d7df11d6f95b12ed905 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 22 Dec 2024 08:38:47 +0100 Subject: [PATCH 084/849] chore(model gallery): add llama-3.3-70b-instruct-ablated (#4448) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 867c9ad5..0855ae7a 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -141,6 +141,25 @@ - filename: Anubis-70B-v1-Q4_K_M.gguf sha256: 9135f7090c675726469bd3a108cfbdddaa18638bad8e513928410de4b8bfd4d4 uri: huggingface://bartowski/Anubis-70B-v1-GGUF/Anubis-70B-v1-Q4_K_M.gguf +- !!merge <<: *llama33 + name: "llama-3.3-70b-instruct-ablated" + icon: https://cdn-uploads.huggingface.co/production/uploads/6587d8dd1b44d0e694104fbf/0dkt6EhZYwXVBxvSWXdaM.png + urls: + - https://huggingface.co/NaniDAO/Llama-3.3-70B-Instruct-ablated + - https://huggingface.co/bartowski/Llama-3.3-70B-Instruct-ablated-GGUF + description: | + Llama 3.3 instruct 70B 128k context with ablation technique applied for a more helpful (and based) assistant. + + This means it will refuse less of your valid requests for an uncensored UX. Use responsibly and use common sense. + + We do not take any responsibility for how you apply this intelligence, just as we do not for how you apply your own. + overrides: + parameters: + model: Llama-3.3-70B-Instruct-ablated-Q4_K_M.gguf + files: + - filename: Llama-3.3-70B-Instruct-ablated-Q4_K_M.gguf + sha256: 090b2288810c5f6f680ff5cb4bc97665393d115c011fcd54dca6aec02e74a983 + uri: huggingface://bartowski/Llama-3.3-70B-Instruct-ablated-GGUF/Llama-3.3-70B-Instruct-ablated-Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" From 478014ca18a372ddf8f3a5fdc7adfa6970661f28 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 22 Dec 2024 21:28:38 +0100 Subject: [PATCH 085/849] feat(Dockerfile): allow to skip driver installation (#4447) Signed-off-by: Ettore Di Giacinto --- Dockerfile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 01e5f6ac..42c1c1fc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -115,12 +115,13 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers ARG BUILD_TYPE ARG CUDA_MAJOR_VERSION=12 ARG CUDA_MINOR_VERSION=0 +ARG SKIP_DRIVERS=false ENV BUILD_TYPE=${BUILD_TYPE} # Vulkan requirements RUN < Date: Sun, 22 Dec 2024 21:29:33 +0100 Subject: [PATCH 086/849] chore(nvidia-l4t): add l4t arm64 images (#4449) chore(nvidia-l4t): add nvidia-l4t arm64 images Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 20 ++++++++++++++++++++ .github/workflows/image_build.yml | 6 ++++++ 2 files changed, 26 insertions(+) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 6db8bb07..400bcdc7 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -280,6 +280,7 @@ jobs: makeflags: ${{ matrix.makeflags }} latest-image: ${{ matrix.latest-image }} latest-image-aio: ${{ matrix.latest-image-aio }} + skip-drivers: ${{ matrix.skip-drivers }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -301,6 +302,7 @@ jobs: latest-image: 'latest-cpu' latest-image-aio: 'latest-aio-cpu' makeflags: "--jobs=4 --output-sync=target" + skip-drivers: 'false' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -312,6 +314,7 @@ jobs: base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' makeflags: "--jobs=4 --output-sync=target" + skip-drivers: 'false' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -323,6 +326,7 @@ jobs: base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' makeflags: "--jobs=4 --output-sync=target" + skip-drivers: 'false' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -334,6 +338,7 @@ jobs: runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" makeflags: "--jobs=4 --output-sync=target" + skip-drivers: 'false' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -344,6 +349,7 @@ jobs: image-type: 'core' runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" + skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" - build-type: 'vulkan' platforms: 'linux/amd64' @@ -354,4 +360,18 @@ jobs: image-type: 'core' runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" + skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/arm64' + tag-latest: 'false' + tag-suffix: '-nvidia-l4t-arm64-core' + latest-image: 'latest-nvidia-l4t-arm64-core' + ffmpeg: 'true' + image-type: 'core' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + runs-on: 'arc-runner-set' + makeflags: "--jobs=4 --output-sync=target" + skip-drivers: 'true' \ No newline at end of file diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 4a5735e5..9ad612b6 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -49,6 +49,10 @@ on: description: 'FFMPEG' default: '' type: string + skip-drivers: + description: 'Skip drivers by default' + default: 'false' + type: string image-type: description: 'Image type' default: '' @@ -234,6 +238,7 @@ jobs: GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_VERSION=v1.65.0 MAKEFLAGS=${{ inputs.makeflags }} + SKIP_DRIVERS=${{ inputs.skip-drivers }} context: . file: ./Dockerfile cache-from: type=gha @@ -262,6 +267,7 @@ jobs: GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_VERSION=v1.65.0 MAKEFLAGS=${{ inputs.makeflags }} + SKIP_DRIVERS=${{ inputs.skip-drivers }} context: . file: ./Dockerfile cache-from: type=gha From 23499ddc8a53c6b1c911d34126290fb0bec91679 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 22 Dec 2024 23:56:41 +0100 Subject: [PATCH 087/849] chore: :arrow_up: Update ggerganov/llama.cpp to `ebdee9478ca7ba65497b9b96f7457698c6ee5115` (#4451) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5fdc39a8..c4e7b892 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=5cd85b5e008de2ec398d6596e240187d627561e3 +CPPLLAMA_VERSION?=ebdee9478ca7ba65497b9b96f7457698c6ee5115 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 4dd9ac39b079b3b22a8b37f89e3a1780ba4a2616 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 23 Dec 2024 12:34:39 +0100 Subject: [PATCH 088/849] chore(ci): comment arm64 job until we find a native CI runner (#4452) Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 400bcdc7..e806f123 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -362,16 +362,16 @@ jobs: base-image: "ubuntu:22.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/arm64' - tag-latest: 'false' - tag-suffix: '-nvidia-l4t-arm64-core' - latest-image: 'latest-nvidia-l4t-arm64-core' - ffmpeg: 'true' - image-type: 'core' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" - runs-on: 'arc-runner-set' - makeflags: "--jobs=4 --output-sync=target" - skip-drivers: 'true' \ No newline at end of file + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "0" + # platforms: 'linux/arm64' + # tag-latest: 'false' + # tag-suffix: '-nvidia-l4t-arm64-core' + # latest-image: 'latest-nvidia-l4t-arm64-core' + # ffmpeg: 'true' + # image-type: 'core' + # base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + # runs-on: 'arc-runner-set' + # makeflags: "--jobs=4 --output-sync=target" + # skip-drivers: 'true' \ No newline at end of file From 138cd97ce7f599cab64331f0d1a743b38b32f587 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 23 Dec 2024 15:37:57 +0100 Subject: [PATCH 089/849] chore(ci): try to add CirrusCI to build arm64 images natively Signed-off-by: Ettore Di Giacinto --- .cirrus.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .cirrus.yml diff --git a/.cirrus.yml b/.cirrus.yml new file mode 100644 index 00000000..595bd386 --- /dev/null +++ b/.cirrus.yml @@ -0,0 +1,24 @@ +## This builds the docker image for L4T with the arm64 architecture +docker_builder: + only_if: $CIRRUS_TAG != '' || $CIRRUS_BRANCH != '' + env: + CIRRUS_ARCH: arm64 + IMAGE_NAME: localai/localai + DOCKER_USERNAME: ENCRYPTED[!944ba150d3a7a27f3a9b7d3cc0f481989907ddeea9391a345ec751b70de1620f064cce952491a2fe2b434b1b9e0270b2!] + DOCKER_PASSWORD: ENCRYPTED[!2c9b4746de765859c11df043adafe1247876c7d535edc439eac66d8c1e88029b96e67a8023c36dfec30ee78ca5109089!] + build_script: | + BUILD_ARGS="--build-arg SKIP_DRIVERS=true --build-arg BUILD_TYPE=cublas --build-arg BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.4.0 --build-arg IMAGE_TYPE=core" + if [ -n "$CIRRUS_TAG" ]; then + docker build $BUILD_ARGS --tag $IMAGE_NAME:$CIRRUS_TAG-nvidia-l4t-arm64-core . + docker tag $IMAGE_NAME:$CIRRUS_TAG-nvidia-l4t-arm64-core $IMAGE_NAME:latest-nvidia-l4t-arm64-core + else + docker build $BUILD_ARGS --tag $IMAGE_NAME:$CIRRUS_BRANCH-nvidia-l4t-arm64-core . + fi + login_script: echo $DOCKER_PASSWORD | docker login -u $DOCKER_USERNAME --password-stdin + push_script: | + if [ -n "$CIRRUS_TAG" ]; then + docker push $IMAGE_NAME:$CIRRUS_TAG-nvidia-l4t-arm64-core + docker push $IMAGE_NAME:latest-nvidia-l4t-arm64-core + else + docker push $IMAGE_NAME:$CIRRUS_BRANCH-nvidia-l4t-arm64-core + fi \ No newline at end of file From 6477913e8f7d9e3c83184bcd80a32dc1ce2bd8e4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 23 Dec 2024 16:43:32 +0100 Subject: [PATCH 090/849] chore(ci): increase task timeout Signed-off-by: Ettore Di Giacinto --- .cirrus.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 595bd386..e9fe2db2 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -1,6 +1,7 @@ ## This builds the docker image for L4T with the arm64 architecture docker_builder: only_if: $CIRRUS_TAG != '' || $CIRRUS_BRANCH != '' + timeout_in: 120m env: CIRRUS_ARCH: arm64 IMAGE_NAME: localai/localai @@ -21,4 +22,4 @@ docker_builder: docker push $IMAGE_NAME:latest-nvidia-l4t-arm64-core else docker push $IMAGE_NAME:$CIRRUS_BRANCH-nvidia-l4t-arm64-core - fi \ No newline at end of file + fi From a3b675b09ed47ec2dd0f3f373e6dab9d03f9ab99 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 23 Dec 2024 18:31:50 +0100 Subject: [PATCH 091/849] Delete .cirrus.yml Signed-off-by: Ettore Di Giacinto --- .cirrus.yml | 25 ------------------------- 1 file changed, 25 deletions(-) delete mode 100644 .cirrus.yml diff --git a/.cirrus.yml b/.cirrus.yml deleted file mode 100644 index e9fe2db2..00000000 --- a/.cirrus.yml +++ /dev/null @@ -1,25 +0,0 @@ -## This builds the docker image for L4T with the arm64 architecture -docker_builder: - only_if: $CIRRUS_TAG != '' || $CIRRUS_BRANCH != '' - timeout_in: 120m - env: - CIRRUS_ARCH: arm64 - IMAGE_NAME: localai/localai - DOCKER_USERNAME: ENCRYPTED[!944ba150d3a7a27f3a9b7d3cc0f481989907ddeea9391a345ec751b70de1620f064cce952491a2fe2b434b1b9e0270b2!] - DOCKER_PASSWORD: ENCRYPTED[!2c9b4746de765859c11df043adafe1247876c7d535edc439eac66d8c1e88029b96e67a8023c36dfec30ee78ca5109089!] - build_script: | - BUILD_ARGS="--build-arg SKIP_DRIVERS=true --build-arg BUILD_TYPE=cublas --build-arg BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.4.0 --build-arg IMAGE_TYPE=core" - if [ -n "$CIRRUS_TAG" ]; then - docker build $BUILD_ARGS --tag $IMAGE_NAME:$CIRRUS_TAG-nvidia-l4t-arm64-core . - docker tag $IMAGE_NAME:$CIRRUS_TAG-nvidia-l4t-arm64-core $IMAGE_NAME:latest-nvidia-l4t-arm64-core - else - docker build $BUILD_ARGS --tag $IMAGE_NAME:$CIRRUS_BRANCH-nvidia-l4t-arm64-core . - fi - login_script: echo $DOCKER_PASSWORD | docker login -u $DOCKER_USERNAME --password-stdin - push_script: | - if [ -n "$CIRRUS_TAG" ]; then - docker push $IMAGE_NAME:$CIRRUS_TAG-nvidia-l4t-arm64-core - docker push $IMAGE_NAME:latest-nvidia-l4t-arm64-core - else - docker push $IMAGE_NAME:$CIRRUS_BRANCH-nvidia-l4t-arm64-core - fi From cab9f88ca4acf42be989fd808ace6fdec12c820d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 23 Dec 2024 18:59:33 +0100 Subject: [PATCH 092/849] chore(docs): add nvidia l4t instructions (#4454) Signed-off-by: Ettore Di Giacinto --- docs/content/docs/reference/nvidia-l4t.md | 35 +++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 docs/content/docs/reference/nvidia-l4t.md diff --git a/docs/content/docs/reference/nvidia-l4t.md b/docs/content/docs/reference/nvidia-l4t.md new file mode 100644 index 00000000..028ee531 --- /dev/null +++ b/docs/content/docs/reference/nvidia-l4t.md @@ -0,0 +1,35 @@ + ++++ +disableToc = false +title = "Running on Nvidia ARM64" +weight = 27 ++++ + +LocalAI can be run on Nvidia ARM64 devices, such as the Jetson Nano, Jetson Xavier NX, and Jetson AGX Xavier. The following instructions will guide you through building the LocalAI container for Nvidia ARM64 devices. + +## Prerequisites + +- Docker engine installed (https://docs.docker.com/engine/install/ubuntu/) +- Nvidia container toolkit installed (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-ap) + +## Build the container + +Build the LocalAI container for Nvidia ARM64 devices using the following command: + +```bash +git clone https://github.com/mudler/LocalAI + +cd LocalAI + +docker build --build-arg SKIP_DRIVERS=true --build-arg BUILD_TYPE=cublas --build-arg BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.4.0 --build-arg IMAGE_TYPE=core -t localai-orin . +``` + +## Usage + +Run the LocalAI container on Nvidia ARM64 devices using the following command, where `/data/models` is the directory containing the models: + +```bash +docker run -e DEBUG=true -p 8080:8080 -v /data/models:/build/models -ti --restart=always --name local-ai --runtime nvidia --gpus all localai-orin +``` + +Note: `/data/models` is the directory containing the models. You can replace it with the directory containing your models. From 0eb2911aad47d5e5c2c68c02f12abfe7456f3027 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 23 Dec 2024 19:11:31 +0100 Subject: [PATCH 093/849] chore(llava): update clip.patch (#4453) Signed-off-by: Ettore Di Giacinto --- backend/cpp/llama/patches/01-llava.patch | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/backend/cpp/llama/patches/01-llava.patch b/backend/cpp/llama/patches/01-llava.patch index fa122da2..77124628 100644 --- a/backend/cpp/llama/patches/01-llava.patch +++ b/backend/cpp/llama/patches/01-llava.patch @@ -1,13 +1,13 @@ diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp -index 342042ff..224db9b5 100644 +index 3cd0d2fa..6c5e811a 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp -@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima - struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches"); - int* patches_data = (int*)malloc(ggml_nbytes(patches)); - for (int i = 0; i < num_patches; i++) { -- patches_data[i] = i + 1; -+ patches_data[i] = i; - } - ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches)); - free(patches_data); \ No newline at end of file +@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima + struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches"); + int* patches_data = (int*)malloc(ggml_nbytes(patches)); + for (int i = 0; i < num_patches; i++) { +- patches_data[i] = i + 1; ++ patches_data[i] = i; + } + ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches)); + free(patches_data); \ No newline at end of file From 159a7f6df2140305261dccb499e0e87690ae4d18 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 Dec 2024 22:15:38 +0000 Subject: [PATCH 094/849] chore(deps): Bump docs/themes/hugo-theme-relearn from `bd1f3d3` to `ec88e24` (#4460) chore(deps): Bump docs/themes/hugo-theme-relearn Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `bd1f3d3` to `ec88e24`. - [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases) - [Commits](https://github.com/McShelby/hugo-theme-relearn/compare/bd1f3d3432632c61bb12e7ec0f7673fed0289f19...ec88e24f46955bcf1aa3f38ac143982eff08d8a6) --- updated-dependencies: - dependency-name: docs/themes/hugo-theme-relearn dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- docs/themes/hugo-theme-relearn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn index bd1f3d34..ec88e24f 160000 --- a/docs/themes/hugo-theme-relearn +++ b/docs/themes/hugo-theme-relearn @@ -1 +1 @@ -Subproject commit bd1f3d3432632c61bb12e7ec0f7673fed0289f19 +Subproject commit ec88e24f46955bcf1aa3f38ac143982eff08d8a6 From d0adbee75d4df4b67b2208c7024fc94d6a194d4c Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 24 Dec 2024 10:55:30 +0100 Subject: [PATCH 095/849] chore: :arrow_up: Update ggerganov/llama.cpp to `32d6ee6385b3fc908b283f509b845f757a6e7206` (#4486) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c4e7b892..682d7e63 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=ebdee9478ca7ba65497b9b96f7457698c6ee5115 +CPPLLAMA_VERSION?=32d6ee6385b3fc908b283f509b845f757a6e7206 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From c7c275c7c83aecaefc91754627e8aedf29e9814f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 24 Dec 2024 10:56:02 +0100 Subject: [PATCH 096/849] chore(model-gallery): :arrow_up: update checksum (#4487) :arrow_up: Checksum updates in gallery/index.yaml Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- gallery/index.yaml | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 0855ae7a..4984f783 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -15,8 +15,8 @@ model: Falcon3-1B-Instruct-Q4_K_M.gguf files: - filename: Falcon3-1B-Instruct-Q4_K_M.gguf - sha256: d351a6506b7d21221f3858b04d98c8b1b7b108b85acde2b13b69d9cb06e2a7e9 uri: huggingface://bartowski/Falcon3-1B-Instruct-GGUF/Falcon3-1B-Instruct-Q4_K_M.gguf + sha256: 1c92013dac1ab6e703e787f3e0829ca03cc95311e4c113a77950d15ff6dea7b3 tags: - llm - gguf @@ -34,8 +34,8 @@ model: Falcon3-3B-Instruct-Q4_K_M.gguf files: - filename: Falcon3-3B-Instruct-Q4_K_M.gguf - sha256: e6d81653ee28c6944e4f9ab626882faabb69db8019ddcf87f2732d05f3d9158a uri: huggingface://bartowski/Falcon3-3B-Instruct-GGUF/Falcon3-3B-Instruct-Q4_K_M.gguf + sha256: 6ea6cecba144fe5b711ca07ae4263ccdf6ee6419807a46220419189da8446557 - !!merge <<: *falcon3 name: "falcon3-10b-instruct" urls: @@ -46,8 +46,8 @@ model: Falcon3-10B-Instruct-Q4_K_M.gguf files: - filename: Falcon3-10B-Instruct-Q4_K_M.gguf - sha256: 6d54a35d740a616061d6c7d7740d64f4339410e58aaba985aa9e1ea79c7e882a uri: huggingface://bartowski/Falcon3-10B-Instruct-GGUF/Falcon3-10B-Instruct-Q4_K_M.gguf + sha256: 0a33327bd71e1788a8e9f17889824a17a65efd3f96a4b2a5e2bc6ff2f39b8241 - &intellect1 name: "intellect-1-instruct" url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" @@ -915,8 +915,8 @@ model: Llama-Song-Stream-3B-Instruct-Q4_K_M.gguf files: - filename: Llama-Song-Stream-3B-Instruct-Q4_K_M.gguf - sha256: 1a0f0aef16e5be46be827c430cbe5ce4b2915b2f4a1dd60b98792004d39b9f52 uri: huggingface://bartowski/Llama-Song-Stream-3B-Instruct-GGUF/Llama-Song-Stream-3B-Instruct-Q4_K_M.gguf + sha256: 62e4a79eb7a0f80184dc37ab01a5490708e600dad5f074de8bcda6ec5a77cca8 - !!merge <<: *llama32 name: "llama-chat-summary-3.2-3b" urls: @@ -1932,19 +1932,7 @@ urls: - https://huggingface.co/ZeroXClem/Qwen2.5-7B-HomerCreative-Mix - https://huggingface.co/QuantFactory/Qwen2.5-7B-HomerCreative-Mix-GGUF - description: | - ZeroXClem/Qwen2.5-7B-HomerCreative-Mix is an advanced language model meticulously crafted by merging four pre-trained models using the powerful mergekit framework. This fusion leverages the Model Stock merge method to combine the creative prowess of Qandora, the instructive capabilities of Qwen-Instruct-Fusion, the sophisticated blending of HomerSlerp1, and the foundational conversational strengths of Homer-v0.5-Qwen2.5-7B. The resulting model excels in creative text generation, contextual understanding, and dynamic conversational interactions. - 🚀 Merged Models - - This model merge incorporates the following: - - bunnycore/Qandora-2.5-7B-Creative: Specializes in creative text generation, enhancing the model's ability to produce imaginative and diverse content. - - bunnycore/Qwen2.5-7B-Instruct-Fusion: Focuses on instruction-following capabilities, improving the model's performance in understanding and executing user commands. - - allknowingroger/HomerSlerp1-7B: Utilizes spherical linear interpolation (SLERP) to blend model weights smoothly, ensuring a harmonious integration of different model attributes. - - newsbang/Homer-v0.5-Qwen2.5-7B: Acts as the foundational conversational model, providing robust language comprehension and generation capabilities. + description: "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix is an advanced language model meticulously crafted by merging four pre-trained models using the powerful mergekit framework. This fusion leverages the Model Stock merge method to combine the creative prowess of Qandora, the instructive capabilities of Qwen-Instruct-Fusion, the sophisticated blending of HomerSlerp1, and the foundational conversational strengths of Homer-v0.5-Qwen2.5-7B. The resulting model excels in creative text generation, contextual understanding, and dynamic conversational interactions.\n\U0001F680 Merged Models\n\nThis model merge incorporates the following:\n\n bunnycore/Qandora-2.5-7B-Creative: Specializes in creative text generation, enhancing the model's ability to produce imaginative and diverse content.\n\n bunnycore/Qwen2.5-7B-Instruct-Fusion: Focuses on instruction-following capabilities, improving the model's performance in understanding and executing user commands.\n\n allknowingroger/HomerSlerp1-7B: Utilizes spherical linear interpolation (SLERP) to blend model weights smoothly, ensuring a harmonious integration of different model attributes.\n\n newsbang/Homer-v0.5-Qwen2.5-7B: Acts as the foundational conversational model, providing robust language comprehension and generation capabilities.\n" overrides: parameters: model: Qwen2.5-7B-HomerCreative-Mix.Q4_K_M.gguf @@ -9960,7 +9948,7 @@ - !!merge <<: *llama32 name: "bert-embeddings" description: | - llama3.2 embeddings model. Using as drop-in replacement for bert-embeddings + llama3.2 embeddings model. Using as drop-in replacement for bert-embeddings tags: - embeddings overrides: From 6c716982996fcccb5f7f12aa72ecbf72cfea4a46 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 24 Dec 2024 10:59:36 +0100 Subject: [PATCH 097/849] chore(model gallery): add l3.3-ms-evalebis-70b (#4488) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 4984f783..7c8b1ca3 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -160,6 +160,21 @@ - filename: Llama-3.3-70B-Instruct-ablated-Q4_K_M.gguf sha256: 090b2288810c5f6f680ff5cb4bc97665393d115c011fcd54dca6aec02e74a983 uri: huggingface://bartowski/Llama-3.3-70B-Instruct-ablated-GGUF/Llama-3.3-70B-Instruct-ablated-Q4_K_M.gguf +- !!merge <<: *llama33 + name: "l3.3-ms-evalebis-70b" + icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/e49ykknqXee3Ihr-3BIl_.png + urls: + - https://huggingface.co/Steelskull/L3.3-MS-Evalebis-70b + - https://huggingface.co/bartowski/L3.3-MS-Evalebis-70b-GGUF + description: | + This model was created as I liked the storytelling of EVA, the prose and details of scenes from EURYALE and Anubis, my goal is to merge the robust storytelling of all three models while attempting to maintain the positives of the models. + overrides: + parameters: + model: L3.3-MS-Evalebis-70b-Q4_K_M.gguf + files: + - filename: L3.3-MS-Evalebis-70b-Q4_K_M.gguf + sha256: 5515110ab6a583f6eb360533e3c5b3dda6d402af407c0b0f2b34a2a57b5224d5 + uri: huggingface://bartowski/L3.3-MS-Evalebis-70b-GGUF/L3.3-MS-Evalebis-70b-Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" From d65c10cee7120a3706502c6990b287273cf31240 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 24 Dec 2024 11:04:19 +0100 Subject: [PATCH 098/849] chore(model gallery): add tqwendo-36b (#4489) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 7c8b1ca3..8191fd27 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2313,6 +2313,23 @@ - filename: QwQ-LCoT-7B-Instruct-Q4_K_M.gguf sha256: 1df2e4ff0093a9632687b73969153442776b0ffc1c3c68e7f559472f9cea1945 uri: huggingface://bartowski/QwQ-LCoT-7B-Instruct-GGUF/QwQ-LCoT-7B-Instruct-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "tqwendo-36b" + icon: "https://cdn-uploads.huggingface.co/production/uploads/6379683a81c1783a4a2ddba8/DI7Yw8Fs8eukluzKTHjEH.png" + urls: + - https://huggingface.co/nisten/tqwendo-36b + - https://huggingface.co/bartowski/tqwendo-36b-GGUF + description: | + There is a draft model to go with this one for speculative decoding and chain of thought reasoning: https://huggingface.co/nisten/qwen2.5-coder-7b-abliterated-128k-AWQ + + Using the above 4bit 7b in conjuction with the 36b is meant to setup a chain-of-thought reasoner, evaluator similar to what O1-O3 is probably doing. This way the 7b 4bit only uses up an extra 4-6Gb on the gpu, but greatly both speeds up speculative decoding AND also chain-of-throught evals. + overrides: + parameters: + model: tqwendo-36b-Q4_K_M.gguf + files: + - filename: tqwendo-36b-Q4_K_M.gguf + sha256: 890ff05fb717c67848d5c02ad62b2c26fdcdd20f7cc94ade8095869784c0cc82 + uri: huggingface://bartowski/tqwendo-36b-GGUF/tqwendo-36b-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From 60bf7c9dd7c15c8b9083cec6cf96a31187e8f8c8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 24 Dec 2024 11:04:31 +0100 Subject: [PATCH 099/849] chore(model gallery): add rombos-llm-70b-llama-3.3 (#4490) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 8191fd27..c0a48f12 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -198,6 +198,24 @@ - filename: rwkv-6-world-7b-Q4_K_M.gguf sha256: f74574186fa4584f405e92198605680db6ad00fd77974ffa14bf02073bb90273 uri: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf +- !!merge <<: *llama33 + name: "rombos-llm-70b-llama-3.3" + icon: "https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg" + urls: + - https://huggingface.co/rombodawg/Rombos-LLM-70b-Llama-3.3 + - https://huggingface.co/bartowski/Rombos-LLM-70b-Llama-3.3-GGUF + - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing + description: | + You know the drill by now. + Here is the paper. Have fun. + https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing + overrides: + parameters: + model: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf + files: + - filename: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf + sha256: 5f735b704cccf4db030b8b0f118eae6e7718bd6b50c53272dabcf48c89a37577 + uri: huggingface://bartowski/Rombos-LLM-70b-Llama-3.3-GGUF/Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf - &qwen25coder name: "qwen2.5-coder-14b" url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From b9690537010c150e19a711e176c98feda656102a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 24 Dec 2024 11:10:56 +0100 Subject: [PATCH 100/849] chore(gallery): re-order Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index c0a48f12..60333a70 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -175,6 +175,24 @@ - filename: L3.3-MS-Evalebis-70b-Q4_K_M.gguf sha256: 5515110ab6a583f6eb360533e3c5b3dda6d402af407c0b0f2b34a2a57b5224d5 uri: huggingface://bartowski/L3.3-MS-Evalebis-70b-GGUF/L3.3-MS-Evalebis-70b-Q4_K_M.gguf +- !!merge <<: *llama33 + name: "rombos-llm-70b-llama-3.3" + icon: "https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg" + urls: + - https://huggingface.co/rombodawg/Rombos-LLM-70b-Llama-3.3 + - https://huggingface.co/bartowski/Rombos-LLM-70b-Llama-3.3-GGUF + - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing + description: | + You know the drill by now. + Here is the paper. Have fun. + https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing + overrides: + parameters: + model: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf + files: + - filename: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf + sha256: 5f735b704cccf4db030b8b0f118eae6e7718bd6b50c53272dabcf48c89a37577 + uri: huggingface://bartowski/Rombos-LLM-70b-Llama-3.3-GGUF/Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" @@ -198,24 +216,6 @@ - filename: rwkv-6-world-7b-Q4_K_M.gguf sha256: f74574186fa4584f405e92198605680db6ad00fd77974ffa14bf02073bb90273 uri: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf -- !!merge <<: *llama33 - name: "rombos-llm-70b-llama-3.3" - icon: "https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg" - urls: - - https://huggingface.co/rombodawg/Rombos-LLM-70b-Llama-3.3 - - https://huggingface.co/bartowski/Rombos-LLM-70b-Llama-3.3-GGUF - - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing - description: | - You know the drill by now. - Here is the paper. Have fun. - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing - overrides: - parameters: - model: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf - files: - - filename: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf - sha256: 5f735b704cccf4db030b8b0f118eae6e7718bd6b50c53272dabcf48c89a37577 - uri: huggingface://bartowski/Rombos-LLM-70b-Llama-3.3-GGUF/Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf - &qwen25coder name: "qwen2.5-coder-14b" url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From bc8dd3ad14a64464567d8a4fc1e766fdfb1cbd99 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 24 Dec 2024 22:44:11 +0100 Subject: [PATCH 101/849] chore: :arrow_up: Update ggerganov/llama.cpp to `2cd43f4900ba0e34124fdcbf02a7f9df25a10a3d` (#4491) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 682d7e63..14228094 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=32d6ee6385b3fc908b283f509b845f757a6e7206 +CPPLLAMA_VERSION?=2cd43f4900ba0e34124fdcbf02a7f9df25a10a3d # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 1d630e41859f392a9d98149ec16b5d153bd3b9da Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 24 Dec 2024 23:04:50 +0100 Subject: [PATCH 102/849] chore(model-gallery): :arrow_up: update checksum (#4492) :arrow_up: Checksum updates in gallery/index.yaml Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- gallery/index.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 60333a70..d1a055b3 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -191,8 +191,8 @@ model: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf files: - filename: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf - sha256: 5f735b704cccf4db030b8b0f118eae6e7718bd6b50c53272dabcf48c89a37577 uri: huggingface://bartowski/Rombos-LLM-70b-Llama-3.3-GGUF/Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf + sha256: 613008b960f6fff346b5dec71a87cd7ecdaff205bfea6332bd8fe2bb46177352 - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" From 816ae7a53a00b881c9d8fc68efe2b60d5dd8b82d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 25 Dec 2024 11:47:11 +0100 Subject: [PATCH 103/849] chore(model gallery): add fastllama-3.2-1b-instruct (#4493) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index d1a055b3..78df4904 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -964,6 +964,21 @@ - filename: Llama-Chat-Summary-3.2-3B-Q4_K_M.gguf sha256: ed1be20d2374aa6db9940923f41fa229bd7ebe13d41b1ff1ff18a6f87e99df79 uri: huggingface://bartowski/Llama-Chat-Summary-3.2-3B-GGUF/Llama-Chat-Summary-3.2-3B-Q4_K_M.gguf +- !!merge <<: *llama32 + name: "fastllama-3.2-1b-instruct" + icon: https://huggingface.co/suayptalha/FastLlama-3.2-1B-Instruct/resolve/main/FastLlama.png + urls: + - https://huggingface.co/suayptalha/FastLlama-3.2-1B-Instruct + - https://huggingface.co/bartowski/FastLlama-3.2-1B-Instruct-GGUF + description: | + FastLlama is a highly optimized version of the Llama-3.2-1B-Instruct model. Designed for superior performance in constrained environments, it combines speed, compactness, and high accuracy. This version has been fine-tuned using the MetaMathQA-50k section of the HuggingFaceTB/smoltalk dataset to enhance its mathematical reasoning and problem-solving abilities. + overrides: + parameters: + model: FastLlama-3.2-1B-Instruct-Q4_K_M.gguf + files: + - filename: FastLlama-3.2-1B-Instruct-Q4_K_M.gguf + sha256: 3c0303e9560c441a9abdcd0e4c04c47e7f6b21277c1e8c00eed94fc656da0be9 + uri: huggingface://bartowski/FastLlama-3.2-1B-Instruct-GGUF/FastLlama-3.2-1B-Instruct-Q4_K_M.gguf - &qwen25 ## Qwen2.5 name: "qwen2.5-14b-instruct" From 8f6332ab23472b4591248a2717c263ea5b389482 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 25 Dec 2024 11:47:22 +0100 Subject: [PATCH 104/849] chore(model gallery): add dans-personalityengine-v1.1.0-12b (#4494) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 78df4904..0eb95b7b 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -5083,6 +5083,21 @@ - filename: NaturalLM-7B-Instruct-Q4_K_M.gguf sha256: 15b2f34116f690fea35790a9392b8a2190fe25827e370d426e88a2a543f4dcee uri: huggingface://bartowski/NaturalLM-7B-Instruct-GGUF/NaturalLM-7B-Instruct-Q4_K_M.gguf +- !!merge <<: *mistral03 + name: "dans-personalityengine-v1.1.0-12b" + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + urls: + - https://huggingface.co/PocketDoc/Dans-PersonalityEngine-V1.1.0-12b + - https://huggingface.co/bartowski/Dans-PersonalityEngine-V1.1.0-12b-GGUF + description: | + This model series is intended to be multifarious in its capabilities and should be quite capable at both co-writing and roleplay as well as find itself quite at home performing sentiment analysis or summarization as part of a pipeline. It has been trained on a wide array of one shot instructions, multi turn instructions, tool use, role playing scenarios, text adventure games, co-writing, and much more. + overrides: + parameters: + model: Dans-PersonalityEngine-V1.1.0-12b-Q4_K_M.gguf + files: + - filename: Dans-PersonalityEngine-V1.1.0-12b-Q4_K_M.gguf + sha256: a1afb9fddfa3f2847ed710cc374b4f17e63a75f7e10d8871cf83983c2f5415ab + uri: huggingface://bartowski/Dans-PersonalityEngine-V1.1.0-12b-GGUF/Dans-PersonalityEngine-V1.1.0-12b-Q4_K_M.gguf - &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" From 1a74af1492709173ed347cbde9ad53698b9922ac Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 25 Dec 2024 11:47:33 +0100 Subject: [PATCH 105/849] chore(model gallery): add llama-3.1-8b-open-sft (#4495) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 0eb95b7b..2a765c37 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -4193,6 +4193,20 @@ - filename: orca_mini_v8_1_70b-Q4_K_M.gguf sha256: 97627730b028d4d7a349ae0b8e219207163ec425e4e1c057e445b2a66b61fdfa uri: huggingface://bartowski/orca_mini_v8_1_70b-GGUF/orca_mini_v8_1_70b-Q4_K_M.gguf +- !!merge <<: *llama31 + name: "llama-3.1-8b-open-sft" + urls: + - https://huggingface.co/prithivMLmods/Llama-3.1-8B-Open-SFT + - https://huggingface.co/bartowski/Llama-3.1-8B-Open-SFT-GGUF + description: | + The Llama-3.1-8B-Open-SFT model is a fine-tuned version of meta-llama/Llama-3.1-8B-Instruct, designed for advanced text generation tasks, including conversational interactions, question answering, and chain-of-thought reasoning. This model leverages Supervised Fine-Tuning (SFT) using the O1-OPEN/OpenO1-SFT dataset to provide enhanced performance in context-sensitive and instruction-following tasks. + overrides: + parameters: + model: Llama-3.1-8B-Open-SFT-Q4_K_M.gguf + files: + - filename: Llama-3.1-8B-Open-SFT-Q4_K_M.gguf + sha256: ce75152763c48c5386fe59652cc921aae456da36ab82af3d9e2080f603f45132 + uri: huggingface://bartowski/Llama-3.1-8B-Open-SFT-GGUF/Llama-3.1-8B-Open-SFT-Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" From 5c29e0cd4d1c79da4b6e0416f16384478c1f5f44 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 25 Dec 2024 22:43:06 +0100 Subject: [PATCH 106/849] chore: :arrow_up: Update ggerganov/llama.cpp to `9ba399dfa7f115effc63d48e6860a94c9faa31b2` (#4496) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 14228094..16f7a6b0 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=2cd43f4900ba0e34124fdcbf02a7f9df25a10a3d +CPPLLAMA_VERSION?=9ba399dfa7f115effc63d48e6860a94c9faa31b2 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 1a14c7d45a770ea1a4a0dc9af37a61ae4f48e373 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 26 Dec 2024 10:47:54 +0100 Subject: [PATCH 107/849] chore(model gallery): add qvq-72b-preview (#4498) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 2a765c37..df4f0366 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2363,6 +2363,25 @@ - filename: tqwendo-36b-Q4_K_M.gguf sha256: 890ff05fb717c67848d5c02ad62b2c26fdcdd20f7cc94ade8095869784c0cc82 uri: huggingface://bartowski/tqwendo-36b-GGUF/tqwendo-36b-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "qvq-72b-preview" + urls: + - https://huggingface.co/Qwen/QVQ-72B-Preview + - https://huggingface.co/bartowski/QVQ-72B-Preview-GGUF + description: | + QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities. + QVQ-72B-Preview has achieved remarkable performance on various benchmarks. It scored a remarkable 70.3% on the Multimodal Massive Multi-task Understanding (MMMU) benchmark, showcasing QVQ's powerful ability in multidisciplinary understanding and reasoning. Furthermore, the significant improvements on MathVision highlight the model's progress in mathematical reasoning tasks. OlympiadBench also demonstrates the model's enhanced ability to tackle challenging problems. + overrides: + mmproj: mmproj-QVQ-72B-Preview-f16.gguf + parameters: + model: QVQ-72B-Preview-Q4_K_M.gguf + files: + - filename: QVQ-72B-Preview-Q4_K_M.gguf + sha256: 0fab6809995614c19e4b4c23e3191824944a04999f742486278f0d9929dc82ae + uri: huggingface://bartowski/QVQ-72B-Preview-GGUF/QVQ-72B-Preview-Q4_K_M.gguf + - filename: mmproj-QVQ-72B-Preview-f16.gguf + sha256: 85110223f39aa1aad887052d269074afbd52a49ae02c53b66753b033662cc8e6 + uri: huggingface://bartowski/QVQ-72B-Preview-GGUF/mmproj-QVQ-72B-Preview-f16.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From 9572f0577b8f12166dd82d54d5a839661b8a3928 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 26 Dec 2024 10:52:57 +0100 Subject: [PATCH 108/849] chore(model gallery): add teleut-7b-rp (#4499) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index df4f0366..b636abed 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2382,6 +2382,22 @@ - filename: mmproj-QVQ-72B-Preview-f16.gguf sha256: 85110223f39aa1aad887052d269074afbd52a49ae02c53b66753b033662cc8e6 uri: huggingface://bartowski/QVQ-72B-Preview-GGUF/mmproj-QVQ-72B-Preview-f16.gguf +- !!merge <<: *qwen25 + name: "teleut-7b-rp" + icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/2y6PHgWe4ewoMFlgn-p3d.png + urls: + - https://huggingface.co/allura-org/Teleut-7b-RP + - https://huggingface.co/bartowski/Teleut-7b-RP-GGUF + description: | + A roleplay-focused LoRA finetune of Teleut 7b. Methodology and hyperparams inspired by SorcererLM and Slush. + Dataset: The worst mix of data you've ever seen. Like, seriously, you do not want to see the things that went into this model. It's bad. + overrides: + parameters: + model: Teleut-7b-RP-Q4_K_M.gguf + files: + - filename: Teleut-7b-RP-Q4_K_M.gguf + sha256: 74d9a0974c48f16677da8891ac76ed89ed04f246275b9ca8316d25e1e86ce89f + uri: huggingface://bartowski/Teleut-7b-RP-GGUF/Teleut-7b-RP-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From 9cbf168dc0a00b178b45c2047987811bd698f531 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 26 Dec 2024 22:44:36 +0100 Subject: [PATCH 109/849] chore: :arrow_up: Update ggerganov/llama.cpp to `d79d8f39b4da6deca4aea8bf130c6034c482b320` (#4500) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 16f7a6b0..2aa34b7c 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=9ba399dfa7f115effc63d48e6860a94c9faa31b2 +CPPLLAMA_VERSION?=d79d8f39b4da6deca4aea8bf130c6034c482b320 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From f3bbdef77dbbd92dba5fd4ff3ae6617ae8e5831c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 27 Dec 2024 11:15:56 +0100 Subject: [PATCH 110/849] chore(model gallery): add falcon3-1b-instruct-abliterated (#4501) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index b636abed..1446b343 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -48,6 +48,21 @@ - filename: Falcon3-10B-Instruct-Q4_K_M.gguf uri: huggingface://bartowski/Falcon3-10B-Instruct-GGUF/Falcon3-10B-Instruct-Q4_K_M.gguf sha256: 0a33327bd71e1788a8e9f17889824a17a65efd3f96a4b2a5e2bc6ff2f39b8241 +- !!merge <<: *falcon3 + name: "falcon3-1b-instruct-abliterated" + urls: + - https://huggingface.co/huihui-ai/Falcon3-1B-Instruct-abliterated + - https://huggingface.co/bartowski/Falcon3-1B-Instruct-abliterated-GGUF + description: | + This is an uncensored version of tiiuae/Falcon3-1B-Instruct created with abliteration (see remove-refusals-with-transformers to know more about it). + This is a crude, proof-of-concept implementation to remove refusals from an LLM model without using TransformerLens. + overrides: + parameters: + model: Falcon3-1B-Instruct-abliterated-Q4_K_M.gguf + files: + - filename: Falcon3-1B-Instruct-abliterated-Q4_K_M.gguf + sha256: 416d15ce58334b7956818befb088d46c1e3e7153ebf2da2fb9769a5b1ff934a1 + uri: huggingface://bartowski/Falcon3-1B-Instruct-abliterated-GGUF/Falcon3-1B-Instruct-abliterated-Q4_K_M.gguf - &intellect1 name: "intellect-1-instruct" url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" From 7f51e2dddf1e0347d9d5e4719bbca95262190677 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 27 Dec 2024 11:20:10 +0100 Subject: [PATCH 111/849] chore(model gallery): add falcon3-3b-instruct-abliterated (#4502) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 1446b343..d93cb1e1 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -63,6 +63,21 @@ - filename: Falcon3-1B-Instruct-abliterated-Q4_K_M.gguf sha256: 416d15ce58334b7956818befb088d46c1e3e7153ebf2da2fb9769a5b1ff934a1 uri: huggingface://bartowski/Falcon3-1B-Instruct-abliterated-GGUF/Falcon3-1B-Instruct-abliterated-Q4_K_M.gguf +- !!merge <<: *falcon3 + name: "falcon3-3b-instruct-abliterated" + urls: + - https://huggingface.co/huihui-ai/Falcon3-3B-Instruct-abliterated + - https://huggingface.co/bartowski/Falcon3-3B-Instruct-abliterated-GGUF + description: | + This is an uncensored version of tiiuae/Falcon3-3B-Instruct created with abliteration (see remove-refusals-with-transformers to know more about it). + This is a crude, proof-of-concept implementation to remove refusals from an LLM model without using TransformerLens. + overrides: + parameters: + model: Falcon3-3B-Instruct-abliterated-Q4_K_M.gguf + files: + - filename: Falcon3-3B-Instruct-abliterated-Q4_K_M.gguf + sha256: 83773b77b0e34ef115f8a6508192e9f1d3426a61456744493f65cfe1e7f90aa9 + uri: huggingface://bartowski/Falcon3-3B-Instruct-abliterated-GGUF/Falcon3-3B-Instruct-abliterated-Q4_K_M.gguf - &intellect1 name: "intellect-1-instruct" url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" From 0093985e7c09acf71d3eb012a2e42eac380b9cea Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 27 Dec 2024 11:24:13 +0100 Subject: [PATCH 112/849] chore(model gallery): add falcon3-10b-instruct-abliterated (#4503) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index d93cb1e1..4f00f89c 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -78,6 +78,21 @@ - filename: Falcon3-3B-Instruct-abliterated-Q4_K_M.gguf sha256: 83773b77b0e34ef115f8a6508192e9f1d3426a61456744493f65cfe1e7f90aa9 uri: huggingface://bartowski/Falcon3-3B-Instruct-abliterated-GGUF/Falcon3-3B-Instruct-abliterated-Q4_K_M.gguf +- !!merge <<: *falcon3 + name: "falcon3-10b-instruct-abliterated" + urls: + - https://huggingface.co/huihui-ai/Falcon3-10B-Instruct-abliterated + - https://huggingface.co/bartowski/Falcon3-10B-Instruct-abliterated-GGUF + description: | + This is an uncensored version of tiiuae/Falcon3-10B-Instruct created with abliteration (see remove-refusals-with-transformers to know more about it). + This is a crude, proof-of-concept implementation to remove refusals from an LLM model without using TransformerLens. + overrides: + parameters: + model: Falcon3-10B-Instruct-abliterated-Q4_K_M.gguf + files: + - filename: Falcon3-10B-Instruct-abliterated-Q4_K_M.gguf + sha256: 5940df2ff88e5be93dbe0766b2a9683d7e73c204a69a1348a37f835cf2b5f767 + uri: huggingface://bartowski/Falcon3-10B-Instruct-abliterated-GGUF/Falcon3-10B-Instruct-abliterated-Q4_K_M.gguf - &intellect1 name: "intellect-1-instruct" url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" From 2a7222c6aac7b962582a98bc60746e81393b045b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 27 Dec 2024 11:29:34 +0100 Subject: [PATCH 113/849] chore(model gallery): add falcon3-7b-instruct-abliterated (#4504) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 4f00f89c..121b363e 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -93,6 +93,21 @@ - filename: Falcon3-10B-Instruct-abliterated-Q4_K_M.gguf sha256: 5940df2ff88e5be93dbe0766b2a9683d7e73c204a69a1348a37f835cf2b5f767 uri: huggingface://bartowski/Falcon3-10B-Instruct-abliterated-GGUF/Falcon3-10B-Instruct-abliterated-Q4_K_M.gguf +- !!merge <<: *falcon3 + name: "falcon3-7b-instruct-abliterated" + urls: + - https://huggingface.co/huihui-ai/Falcon3-7B-Instruct-abliterated + - https://huggingface.co/bartowski/Falcon3-7B-Instruct-abliterated-GGUF + description: | + This is an uncensored version of tiiuae/Falcon3-7B-Instruct created with abliteration (see remove-refusals-with-transformers to know more about it). + This is a crude, proof-of-concept implementation to remove refusals from an LLM model without using TransformerLens. + overrides: + parameters: + model: Falcon3-7B-Instruct-abliterated-Q4_K_M.gguf + files: + - filename: Falcon3-7B-Instruct-abliterated-Q4_K_M.gguf + sha256: 68e10e638668acaa49fb7919224c7d8bcf1798126c7a499c4d9ec3b81313f8c8 + uri: huggingface://bartowski/Falcon3-7B-Instruct-abliterated-GGUF/Falcon3-7B-Instruct-abliterated-Q4_K_M.gguf - &intellect1 name: "intellect-1-instruct" url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" From 58524d40c9067aabc613e7c064490dd2149bd21a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 27 Dec 2024 15:13:06 +0100 Subject: [PATCH 114/849] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ef950bf1..8fd0f4e7 100644 --- a/README.md +++ b/README.md @@ -126,10 +126,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl ## 🚀 [Features](https://localai.io/features/) -- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table)) +- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table)) - 🗣 [Text to Audio](https://localai.io/features/text-to-audio/) - 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`) -- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation) +- 🎨 [Image generation](https://localai.io/features/image-generation) - 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/) - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/) - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/) From afd0af987d6c6af79fd38b3220320b9901092ce9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 27 Dec 2024 15:17:02 +0100 Subject: [PATCH 115/849] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 8fd0f4e7..9cf758f8 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,7 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl - 🥽 [Vision API](https://localai.io/features/gpt-vision/) - 📈 [Reranker API](https://localai.io/features/reranker/) - 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/) +- 🔊 Voice activity detection (Silero-VAD support) - 🌍 Integrated WebUI! ## 💻 Usage From 9b0983d027a564287ccaee8cae5e7bf817238fa3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 28 Dec 2024 10:49:53 +0100 Subject: [PATCH 116/849] chore(model gallery): add control-nanuq-8b (#4506) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 121b363e..4991238b 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -4302,6 +4302,21 @@ - filename: Llama-3.1-8B-Open-SFT-Q4_K_M.gguf sha256: ce75152763c48c5386fe59652cc921aae456da36ab82af3d9e2080f603f45132 uri: huggingface://bartowski/Llama-3.1-8B-Open-SFT-GGUF/Llama-3.1-8B-Open-SFT-Q4_K_M.gguf +- !!merge <<: *llama31 + name: "control-nanuq-8b" + icon: https://cdn-uploads.huggingface.co/production/uploads/66c26b6fb01b19d8c3c2467b/6L-SXxQZ2nxYwvIjnlzN8.png + urls: + - https://huggingface.co/Delta-Vector/Control-Nanuq-8B + - https://huggingface.co/QuantFactory/Control-Nanuq-8B-GGUF + description: | + The model is a fine-tuned version of LLaMA 3.1 8B Supernova, designed to be "short and sweet" by minimizing narration and lengthy responses. It was fine-tuned over 4 epochs using OpenCAI and RP logs, with DPO applied to enhance coherence. Finally, KTO reinforcement learning was implemented on version 1.1, significantly improving the model's prose and creativity. + overrides: + parameters: + model: Control-Nanuq-8B.Q4_K_M.gguf + files: + - filename: Control-Nanuq-8B.Q4_K_M.gguf + sha256: 5aa3b929cbcaf62709fef58d6f630c2df1185d774d0074c7e750cb03c53b744e + uri: huggingface://QuantFactory/Control-Nanuq-8B-GGUF/Control-Nanuq-8B.Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" From 270d33504b269c0401e9a0ca85fbbd113f6022cf Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 28 Dec 2024 10:54:47 +0100 Subject: [PATCH 117/849] chore(model gallery): add miscii-14b-1028 (#4507) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 4991238b..4686ad34 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2325,6 +2325,21 @@ - filename: TQ2.5-14B-Neon-v1-Q4_K_M.gguf sha256: cefc7409b21e03e4fcd64940e30f6a0c17c5a4a89e0ba0811f1b9720825d2309 uri: huggingface://bartowski/TQ2.5-14B-Neon-v1-GGUF/TQ2.5-14B-Neon-v1-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "miscii-14b-1028" + icon: https://i.imgur.com/hkiubT4.jpeg + urls: + - https://huggingface.co/sthenno-com/miscii-14b-1028 + - https://huggingface.co/QuantFactory/miscii-14b-1028-GGUF + description: | + miscii-14b-1028 is a 14-billion parameter language model based on the Qwen2.5-14B-Instruct model. It is designed for chat and conversational AI tasks, with a focus on role-based instructions. + overrides: + parameters: + model: miscii-14b-1028.Q4_K_M.gguf + files: + - filename: miscii-14b-1028.Q4_K_M.gguf + sha256: 0e57bc628c79a1033a6bb92837fba1e52a9e5dbccc5107720c95b89cd9cf92a9 + uri: huggingface://QuantFactory/miscii-14b-1028-GGUF/miscii-14b-1028.Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From d1762e098e862e190631acdc85e9939c66c34864 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 28 Dec 2024 10:56:59 +0100 Subject: [PATCH 118/849] chore(model gallery): add miscii-14b-1225 (#4508) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 4686ad34..da468ae1 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2340,6 +2340,23 @@ - filename: miscii-14b-1028.Q4_K_M.gguf sha256: 0e57bc628c79a1033a6bb92837fba1e52a9e5dbccc5107720c95b89cd9cf92a9 uri: huggingface://QuantFactory/miscii-14b-1028-GGUF/miscii-14b-1028.Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "miscii-14b-1225" + icon: https://huggingface.co/sthenno-com/miscii-14b-1225/resolve/main/Rrharil.png + urls: + - https://huggingface.co/sthenno-com/miscii-14b-1225 + - https://huggingface.co/mradermacher/miscii-14b-1225-GGUF + description: | + The following models were included in the merge: + sthenno/exp-002 + sthenno/miscii-1218 + overrides: + parameters: + model: miscii-14b-1225.Q4_K_M.gguf + files: + - filename: miscii-14b-1225.Q4_K_M.gguf + sha256: f21fe73450be394055aeb87b7619e98a09e5c190b48f145bdebef4e12df871fe + uri: huggingface://mradermacher/miscii-14b-1225-GGUF/miscii-14b-1225.Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From 671381267a5d0733e1cf6fdf300e4175c7062e24 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 28 Dec 2024 22:43:05 +0100 Subject: [PATCH 119/849] chore: :arrow_up: Update ggerganov/llama.cpp to `f865ea149d71ef883e3780fced8a20a1464eccf4` (#4510) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2aa34b7c..11ed132e 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=d79d8f39b4da6deca4aea8bf130c6034c482b320 +CPPLLAMA_VERSION?=f865ea149d71ef883e3780fced8a20a1464eccf4 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 7122c7472e8109b087f334745cc9b569ff0e10ca Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 29 Dec 2024 22:42:38 +0100 Subject: [PATCH 120/849] chore: :arrow_up: Update ggerganov/llama.cpp to `a813badbbdf0d38705f249df7a0c99af5cdee678` (#4512) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 11ed132e..de92ee64 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=f865ea149d71ef883e3780fced8a20a1464eccf4 +CPPLLAMA_VERSION?=a813badbbdf0d38705f249df7a0c99af5cdee678 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 998ff9fa22b2eece6099695495a4db99ce4a3aad Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 30 Dec 2024 20:05:45 +0000 Subject: [PATCH 121/849] chore(deps): Bump gradio from 3.48.0 to 5.9.1 in /backend/python/openvoice (#4514) chore(deps): Bump gradio in /backend/python/openvoice Bumps [gradio](https://github.com/gradio-app/gradio) from 3.48.0 to 5.9.1. - [Release notes](https://github.com/gradio-app/gradio/releases) - [Changelog](https://github.com/gradio-app/gradio/blob/main/CHANGELOG.md) - [Commits](https://github.com/gradio-app/gradio/compare/gradio@3.48.0...gradio@5.9.1) --- updated-dependencies: - dependency-name: gradio dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- backend/python/openvoice/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt index cee412d5..a105da94 100644 --- a/backend/python/openvoice/requirements.txt +++ b/backend/python/openvoice/requirements.txt @@ -11,7 +11,7 @@ cn2an==0.5.22 numpy==1.22.0 networkx==2.8.8 jieba==0.42.1 -gradio==3.48.0 +gradio==5.9.1 langid==1.1.6 llvmlite==0.43.0 setuptools \ No newline at end of file From 639526d207911bea0b8f8446f9ed001c3056a409 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 30 Dec 2024 22:44:53 +0100 Subject: [PATCH 122/849] chore: :arrow_up: Update leejet/stable-diffusion.cpp to `dcf91f9e0f2cbf9da472ee2a556751ed4bab2d2a` (#4509) :arrow_up: Update leejet/stable-diffusion.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index de92ee64..cf3cad4b 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ BARKCPP_VERSION?=v1.0.0 # stablediffusion.cpp (ggml) STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp -STABLEDIFFUSION_GGML_VERSION?=9578fdcc4632dc3de5565f28e2fb16b7c18f8d48 +STABLEDIFFUSION_GGML_VERSION?=dcf91f9e0f2cbf9da472ee2a556751ed4bab2d2a ONNX_VERSION?=1.20.0 ONNX_ARCH?=x64 From e044970a5b737a80e7b41bafe2a2817dba0eac9c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 31 Dec 2024 11:17:29 +0100 Subject: [PATCH 123/849] chore(model gallery): add qwen2.5-32b-rp-ink (#4517) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index da468ae1..407cb201 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2490,6 +2490,22 @@ - filename: Teleut-7b-RP-Q4_K_M.gguf sha256: 74d9a0974c48f16677da8891ac76ed89ed04f246275b9ca8316d25e1e86ce89f uri: huggingface://bartowski/Teleut-7b-RP-GGUF/Teleut-7b-RP-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "qwen2.5-32b-rp-ink" + icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/1_Zt_OvEW183lmrgidQw8.png + urls: + - https://huggingface.co/allura-org/Qwen2.5-32b-RP-Ink + - https://huggingface.co/bartowski/Qwen2.5-32b-RP-Ink-GGUF + description: | + A roleplay-focused LoRA finetune of Qwen 2.5 32b Instruct. Methodology and hyperparams inspired by SorcererLM and Slush. + Yet another model in the Ink series, following in the footsteps of the Nemo one + overrides: + parameters: + model: Qwen2.5-32b-RP-Ink-Q4_K_M.gguf + files: + - filename: Qwen2.5-32b-RP-Ink-Q4_K_M.gguf + sha256: 7a0693d50aa40ba4fd43b4988851e67443e758ae34881f448e2812e5fcc25468 + uri: huggingface://bartowski/Qwen2.5-32b-RP-Ink-GGUF/Qwen2.5-32b-RP-Ink-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From 7674c80bb6eea6b2a5099f31c531575755c25b27 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 31 Dec 2024 11:18:33 +0100 Subject: [PATCH 124/849] chore: :arrow_up: Update ggerganov/llama.cpp to `716bd6dec3e044e5c325386b5b0483392b24cefe` (#4516) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cf3cad4b..5a35771a 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=a813badbbdf0d38705f249df7a0c99af5cdee678 +CPPLLAMA_VERSION?=716bd6dec3e044e5c325386b5b0483392b24cefe # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 25acb0cbbcfecb0344076ce1c5b2785d8aa7cf5f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 31 Dec 2024 11:18:51 +0100 Subject: [PATCH 125/849] chore(deps): Bump docs/themes/hugo-theme-relearn from `ec88e24` to `d25f856` (#4515) chore(deps): Bump docs/themes/hugo-theme-relearn Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `ec88e24` to `d25f856`. - [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases) - [Commits](https://github.com/McShelby/hugo-theme-relearn/compare/ec88e24f46955bcf1aa3f38ac143982eff08d8a6...d25f856477223170b0de0b284252aa54b3e6255b) --- updated-dependencies: - dependency-name: docs/themes/hugo-theme-relearn dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- docs/themes/hugo-theme-relearn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn index ec88e24f..d25f8564 160000 --- a/docs/themes/hugo-theme-relearn +++ b/docs/themes/hugo-theme-relearn @@ -1 +1 @@ -Subproject commit ec88e24f46955bcf1aa3f38ac143982eff08d8a6 +Subproject commit d25f856477223170b0de0b284252aa54b3e6255b From e1dd78bcea83fd35d72bf57ae558e02f67be1f8e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 31 Dec 2024 11:23:07 +0100 Subject: [PATCH 126/849] chore(model gallery): add huatuogpt-o1-8b (#4518) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 407cb201..f67bb0e8 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -4365,6 +4365,21 @@ - filename: Control-Nanuq-8B.Q4_K_M.gguf sha256: 5aa3b929cbcaf62709fef58d6f630c2df1185d774d0074c7e750cb03c53b744e uri: huggingface://QuantFactory/Control-Nanuq-8B-GGUF/Control-Nanuq-8B.Q4_K_M.gguf +- !!merge <<: *llama31 + name: "huatuogpt-o1-8b" + urls: + - https://huggingface.co/FreedomIntelligence/HuatuoGPT-o1-8B + - https://huggingface.co/bartowski/HuatuoGPT-o1-8B-GGUF + description: | + HuatuoGPT-o1 is a medical LLM designed for advanced medical reasoning. It generates a complex thought process, reflecting and refining its reasoning, before providing a final response. + For more information, visit our GitHub repository: https://github.com/FreedomIntelligence/HuatuoGPT-o1. + overrides: + parameters: + model: HuatuoGPT-o1-8B-Q4_K_M.gguf + files: + - filename: HuatuoGPT-o1-8B-Q4_K_M.gguf + sha256: 3e1ef35fc230182d96ae2d6c7436a2e8250c21a4278e798e1aa45790ba82006b + uri: huggingface://bartowski/HuatuoGPT-o1-8B-GGUF/HuatuoGPT-o1-8B-Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" From f16c7cef92979f3a53a40cd3cdf723645f720ba1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 31 Dec 2024 11:23:29 +0100 Subject: [PATCH 127/849] chore(model gallery): add q2.5-veltha-14b-0.5 (#4519) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index f67bb0e8..4bb08df5 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2506,6 +2506,24 @@ - filename: Qwen2.5-32b-RP-Ink-Q4_K_M.gguf sha256: 7a0693d50aa40ba4fd43b4988851e67443e758ae34881f448e2812e5fcc25468 uri: huggingface://bartowski/Qwen2.5-32b-RP-Ink-GGUF/Qwen2.5-32b-RP-Ink-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "q2.5-veltha-14b-0.5" + urls: + - https://huggingface.co/djuna/Q2.5-Veltha-14B-0.5 + - https://huggingface.co/bartowski/Q2.5-Veltha-14B-0.5-GGUF + description: | + The following models were included in the merge:s + huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2 + allura-org/TQ2.5-14B-Aletheia-v1 + EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2 + v000000/Qwen2.5-Lumen-14B + overrides: + parameters: + model: Q2.5-Veltha-14B-0.5-Q4_K_M.gguf + files: + - filename: Q2.5-Veltha-14B-0.5-Q4_K_M.gguf + sha256: f75b8cbceab555ebcab6fcb3b51d398b7ef79671aa05c21c288edd75c9f217bd + uri: huggingface://bartowski/Q2.5-Veltha-14B-0.5-GGUF/Q2.5-Veltha-14B-0.5-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From c30ecdd5353c76f024495d54e4ef632e19f5a323 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 31 Dec 2024 22:43:29 +0100 Subject: [PATCH 128/849] chore: :arrow_up: Update ggerganov/llama.cpp to `0827b2c1da299805288abbd556d869318f2b121e` (#4520) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5a35771a..a4f62d3f 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=716bd6dec3e044e5c325386b5b0483392b24cefe +CPPLLAMA_VERSION?=0827b2c1da299805288abbd556d869318f2b121e # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From ae80a2bd2428acf422236497ba8cf446824a414a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 1 Jan 2025 13:26:48 +0100 Subject: [PATCH 129/849] chore(model gallery): add smallthinker-3b-preview (#4521) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 4bb08df5..b407ab9d 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2524,6 +2524,22 @@ - filename: Q2.5-Veltha-14B-0.5-Q4_K_M.gguf sha256: f75b8cbceab555ebcab6fcb3b51d398b7ef79671aa05c21c288edd75c9f217bd uri: huggingface://bartowski/Q2.5-Veltha-14B-0.5-GGUF/Q2.5-Veltha-14B-0.5-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "smallthinker-3b-preview" + urls: + - https://huggingface.co/PowerInfer/SmallThinker-3B-Preview + - https://huggingface.co/bartowski/SmallThinker-3B-Preview-GGUF + description: | + SmallThinker is designed for the following use cases: + Edge Deployment: Its small size makes it ideal for deployment on resource-constrained devices. + Draft Model for QwQ-32B-Preview: SmallThinker can serve as a fast and efficient draft model for the larger QwQ-32B-Preview model. From my test, in llama.cpp we can get 70% speedup (from 40 tokens/s to 70 tokens/s). + overrides: + parameters: + model: SmallThinker-3B-Preview-Q4_K_M.gguf + files: + - filename: SmallThinker-3B-Preview-Q4_K_M.gguf + sha256: ac04f82a09ee6a2748437c3bb774b638a54099dc7d5d6ef7549893fae22ab055 + uri: huggingface://bartowski/SmallThinker-3B-Preview-GGUF/SmallThinker-3B-Preview-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From 1a2a7a57b3974eaefc2ef7a2761da8528d199296 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 1 Jan 2025 13:27:13 +0100 Subject: [PATCH 130/849] chore(model gallery): add mn-12b-mag-mell-r1-iq-arm-imatrix (#4522) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index b407ab9d..137603f1 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -5319,6 +5319,38 @@ - filename: Dans-PersonalityEngine-V1.1.0-12b-Q4_K_M.gguf sha256: a1afb9fddfa3f2847ed710cc374b4f17e63a75f7e10d8871cf83983c2f5415ab uri: huggingface://bartowski/Dans-PersonalityEngine-V1.1.0-12b-GGUF/Dans-PersonalityEngine-V1.1.0-12b-Q4_K_M.gguf +- !!merge <<: *mistral03 + name: "mn-12b-mag-mell-r1-iq-arm-imatrix" + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + icon: "https://i.imgur.com/wjyAaTO.png" + urls: + - https://huggingface.co/inflatebot/MN-12B-Mag-Mell-R1 + - https://huggingface.co/Lewdiculous/MN-12B-Mag-Mell-R1-GGUF-IQ-ARM-Imatrix + description: | + This is a merge of pre-trained language models created using mergekit. Mag Mell is a multi-stage merge, Inspired by hyper-merges like Tiefighter and Umbral Mind. Intended to be a general purpose "Best of Nemo" model for any fictional, creative use case. + 6 models were chosen based on 3 categories; they were then paired up and merged via layer-weighted SLERP to create intermediate "specialists" which are then evaluated in their domain. The specialists were then merged into the base via DARE-TIES, with hyperparameters chosen to reduce interference caused by the overlap of the three domains. The idea with this approach is to extract the best qualities of each component part, and produce models whose task vectors represent more than the sum of their parts. + + The three specialists are as follows: + Hero (RP, kink/trope coverage): Chronos Gold, Sunrose. + Monk (Intelligence, groundedness): Bophades, Wissenschaft. + Deity (Prose, flair): Gutenberg v4, Magnum 2.5 KTO. + I've been dreaming about this merge since Nemo tunes started coming out in earnest. From our testing, Mag Mell demonstrates worldbuilding capabilities unlike any model in its class, comparable to old adventuring models like Tiefighter, and prose that exhibits minimal "slop" (not bad for no finetuning,) frequently devising electrifying metaphors that left us consistently astonished. + + I don't want to toot my own bugle though; I'm really proud of how this came out, but please leave your feedback, good or bad.Special thanks as usual to Toaster for his feedback and Fizz for helping fund compute, as well as the KoboldAI Discord for their resources. The following models were included in the merge: + IntervitensInc/Mistral-Nemo-Base-2407-chatml + nbeerbower/mistral-nemo-bophades-12B + nbeerbower/mistral-nemo-wissenschaft-12B + elinas/Chronos-Gold-12B-1.0 + Fizzarolli/MN-12b-Sunrose + nbeerbower/mistral-nemo-gutenberg-12B-v4 + anthracite-org/magnum-12b-v2.5-kto + overrides: + parameters: + model: MN-12B-Mag-Mell-R1-Q4_K_M-imat.gguf + files: + - filename: MN-12B-Mag-Mell-R1-Q4_K_M-imat.gguf + sha256: ba0c9e64222b35f8c3828b7295e173ee54d83fd2e457ba67f6561a4a6d98481e + uri: huggingface://Lewdiculous/MN-12B-Mag-Mell-R1-GGUF-IQ-ARM-Imatrix/MN-12B-Mag-Mell-R1-Q4_K_M-imat.gguf - &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" From f345f7a7958834123036640891b76c92e8ffc17b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 1 Jan 2025 13:33:39 +0100 Subject: [PATCH 131/849] chore(model gallery): add captain-eris-diogenes_twilight-v0.420-12b (#4523) chore(model gallery): add captain-eris-diogenes_twilight-v0.420-12b-arm-imatrix Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 137603f1..75723250 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -5351,6 +5351,24 @@ - filename: MN-12B-Mag-Mell-R1-Q4_K_M-imat.gguf sha256: ba0c9e64222b35f8c3828b7295e173ee54d83fd2e457ba67f6561a4a6d98481e uri: huggingface://Lewdiculous/MN-12B-Mag-Mell-R1-GGUF-IQ-ARM-Imatrix/MN-12B-Mag-Mell-R1-Q4_K_M-imat.gguf +- !!merge <<: *mistral03 + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + name: "captain-eris-diogenes_twilight-v0.420-12b-arm-imatrix" + icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/n0HUz-yRPkwQzt3dFrjW9.png + urls: + - https://huggingface.co/Nitral-AI/Captain-Eris-Diogenes_Twilight-V0.420-12B + - https://huggingface.co/Lewdiculous/Captain-Eris-Diogenes_Twilight-V0.420-12B-GGUF-ARM-Imatrix + description: | + The following models were included in the merge: + Nitral-AI/Captain-Eris_Twilight-V0.420-12B + Nitral-AI/Diogenes-12B-ChatMLified + overrides: + parameters: + model: Captain-Eris-Diogenes_Twighlight-V0.420-12B-Q4_K_M-imat.gguf + files: + - filename: Captain-Eris-Diogenes_Twighlight-V0.420-12B-Q4_K_M-imat.gguf + sha256: e70b26114108c41e3ca0aefc0c7b8f5f69452ab461ffe7155e6b75ede24ec1b5 + uri: huggingface://Lewdiculous/Captain-Eris-Diogenes_Twilight-V0.420-12B-GGUF-ARM-Imatrix/Captain-Eris-Diogenes_Twighlight-V0.420-12B-Q4_K_M-imat.gguf - &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" From f1082f3c6d8862ea03a1e16cda4cb3d4492c82b8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 1 Jan 2025 14:41:48 +0100 Subject: [PATCH 132/849] chore(model gallery): add violet_twilight-v0.2 (#4524) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 75723250..116baff2 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -5369,6 +5369,22 @@ - filename: Captain-Eris-Diogenes_Twighlight-V0.420-12B-Q4_K_M-imat.gguf sha256: e70b26114108c41e3ca0aefc0c7b8f5f69452ab461ffe7155e6b75ede24ec1b5 uri: huggingface://Lewdiculous/Captain-Eris-Diogenes_Twilight-V0.420-12B-GGUF-ARM-Imatrix/Captain-Eris-Diogenes_Twighlight-V0.420-12B-Q4_K_M-imat.gguf +- !!merge <<: *mistral03 + name: "violet_twilight-v0.2" + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + icon: https://cdn-uploads.huggingface.co/production/uploads/64adfd277b5ff762771e4571/P962FQhRG4I8nbU_DJolY.png + urls: + - https://huggingface.co/Epiculous/Violet_Twilight-v0.2 + - https://huggingface.co/Epiculous/Violet_Twilight-v0.2-GGUF + description: | + Now for something a bit different, Violet_Twilight-v0.2! This model is a SLERP merge of Azure_Dusk-v0.2 and Crimson_Dawn-v0.2! + overrides: + parameters: + model: Violet_Twilight-v0.2.Q4_K_M.gguf + files: + - filename: Violet_Twilight-v0.2.Q4_K_M.gguf + sha256: b63f07cc441146af9c98cd3c3d4390d7c39bfef11c1d168dc7c6244ca2ba6b12 + uri: huggingface://Epiculous/Violet_Twilight-v0.2-GGUF/Violet_Twilight-v0.2.Q4_K_M.gguf - &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" From 3415e6ae740f8434eb943581c753ae2c0fd5a39c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 2 Jan 2025 10:45:52 +0100 Subject: [PATCH 133/849] chore(model gallery): add qwenwify2.5-32b-v4.5 (#4525) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 116baff2..658736f4 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2540,6 +2540,26 @@ - filename: SmallThinker-3B-Preview-Q4_K_M.gguf sha256: ac04f82a09ee6a2748437c3bb774b638a54099dc7d5d6ef7549893fae22ab055 uri: huggingface://bartowski/SmallThinker-3B-Preview-GGUF/SmallThinker-3B-Preview-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "qwenwify2.5-32b-v4.5" + urls: + - https://huggingface.co/Kaoeiri/Qwenwify2.5-32B-v4.5 + - https://huggingface.co/mradermacher/Qwenwify2.5-32B-v4.5-GGUF + description: | + The following models were included in the merge: + Kaoeiri/Qwenwify-32B-v3 + allura-org/Qwen2.5-32b-RP-Ink + Dans-DiscountModels/Qwen2.5-32B-ChatML + Saxo/Linkbricks-Horizon-AI-Japanese-Base-32B + OpenBuddy/openbuddy-qwq-32b-v24.2-200k + Sao10K/32B-Qwen2.5-Kunou-v1 + overrides: + parameters: + model: Qwenwify2.5-32B-v4.5.Q4_K_M.gguf + files: + - filename: Qwenwify2.5-32B-v4.5.Q4_K_M.gguf + sha256: 52670acdc285356c01259f45b1953860f34deb4f80345ca63b60acc19165280c + uri: huggingface://mradermacher/Qwenwify2.5-32B-v4.5-GGUF/Qwenwify2.5-32B-v4.5.Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From 930280ecacd00de948b3678e684d6925410ad526 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 2 Jan 2025 10:46:01 +0100 Subject: [PATCH 134/849] chore(model gallery): add sainemo-remix (#4526) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 658736f4..4a5be373 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -5405,6 +5405,24 @@ - filename: Violet_Twilight-v0.2.Q4_K_M.gguf sha256: b63f07cc441146af9c98cd3c3d4390d7c39bfef11c1d168dc7c6244ca2ba6b12 uri: huggingface://Epiculous/Violet_Twilight-v0.2-GGUF/Violet_Twilight-v0.2.Q4_K_M.gguf +- !!merge <<: *mistral03 + name: "sainemo-remix" + icon: https://huggingface.co/Moraliane/SAINEMO-reMIX/resolve/main/remixwife.webp + urls: + - https://huggingface.co/Moraliane/SAINEMO-reMIX + - https://huggingface.co/QuantFactory/SAINEMO-reMIX-GGUF + description: | + The following models were included in the merge: + elinas_Chronos-Gold-12B-1.0 + Vikhrmodels_Vikhr-Nemo-12B-Instruct-R-21-09-24 + MarinaraSpaghetti_NemoMix-Unleashed-12B + overrides: + parameters: + model: SAINEMO-reMIX.Q4_K_M.gguf + files: + - filename: SAINEMO-reMIX.Q4_K_M.gguf + sha256: 91c81623542df97462d93bed8014af4830940182786948fc395d8958a5add994 + uri: huggingface://QuantFactory/SAINEMO-reMIX-GGUF/SAINEMO-reMIX.Q4_K_M.gguf - &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" From d9facbcee93fdc61521201264a4c1861e3ab9427 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 2 Jan 2025 10:46:11 +0100 Subject: [PATCH 135/849] chore(model gallery): add l3.1-purosani-2-8b (#4527) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 4a5be373..f9e2731b 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -4434,6 +4434,25 @@ - filename: HuatuoGPT-o1-8B-Q4_K_M.gguf sha256: 3e1ef35fc230182d96ae2d6c7436a2e8250c21a4278e798e1aa45790ba82006b uri: huggingface://bartowski/HuatuoGPT-o1-8B-GGUF/HuatuoGPT-o1-8B-Q4_K_M.gguf +- !!merge <<: *llama31 + name: "l3.1-purosani-2-8b" + urls: + - https://huggingface.co/djuna/L3.1-Purosani-2-8B + - https://huggingface.co/QuantFactory/L3.1-Purosani-2-8B-GGUF + description: | + The following models were included in the merge: + hf-100/Llama-3-Spellbound-Instruct-8B-0.3 + arcee-ai/Llama-3.1-SuperNova-Lite + grimjim/Llama-3-Instruct-abliteration-LoRA-8B + THUDM/LongWriter-llama3.1-8b + ResplendentAI/Smarts_Llama3 + djuna/L3.1-Suze-Vume-2-calc + djuna/L3.1-ForStHS + Blackroot/Llama-3-8B-Abomination-LORA + overrides: + parameters: + model: L3.1-Purosani-2-8B.Q4_K_M.gguf + files: + - filename: L3.1-Purosani-2-8B.Q4_K_M.gguf + sha256: e3eb8038a72b6e85b7a43c7806c32f01208f4644d54bf94d77ecad6286cf609f + uri: huggingface://QuantFactory/L3.1-Purosani-2-8B-GGUF/L3.1-Purosani-2-8B.Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" From 3c21c8789a68c7a5baf830dd7b6711c5b9a99ded Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 2 Jan 2025 22:43:37 +0100 Subject: [PATCH 136/849] chore: :arrow_up: Update ggerganov/llama.cpp to `2f0ee84b9b02d2a98742308026f060ebdc2423f1` (#4528) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a4f62d3f..11904d1b 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=0827b2c1da299805288abbd556d869318f2b121e +CPPLLAMA_VERSION?=2f0ee84b9b02d2a98742308026f060ebdc2423f1 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 36e4c0fcf096c353fbadb51894d464538c6cd71a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 3 Jan 2025 09:10:03 +0100 Subject: [PATCH 137/849] chore(model gallery): add nera_noctis-12b (#4530) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index f9e2731b..be624e9f 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -5442,6 +5442,22 @@ - filename: SAINEMO-reMIX.Q4_K_M.gguf sha256: 91c81623542df97462d93bed8014af4830940182786948fc395d8958a5add994 uri: huggingface://QuantFactory/SAINEMO-reMIX-GGUF/SAINEMO-reMIX.Q4_K_M.gguf +- !!merge <<: *mistral03 + name: "nera_noctis-12b" + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/89XJnlNNSsEfBjI1oHCVt.jpeg + urls: + - https://huggingface.co/Nitral-AI/Nera_Noctis-12B + - https://huggingface.co/bartowski/Nera_Noctis-12B-GGUF + description: | + Sometimes, the brightest gems are found in the darkest places. For it is in the shadows where we learn to really see the light. + overrides: + parameters: + model: Nera_Noctis-12B-Q4_K_M.gguf + files: + - filename: Nera_Noctis-12B-Q4_K_M.gguf + sha256: 0662a9a847adde046e6255c15d5a677ebf09ab00841547c8963668d14baf00ff + uri: huggingface://bartowski/Nera_Noctis-12B-GGUF/Nera_Noctis-12B-Q4_K_M.gguf - &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" From 286dc32fe0417d079ab8ac1cc1308e331d7f08b2 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 3 Jan 2025 19:18:18 +0100 Subject: [PATCH 138/849] ci(arm64): try building on self-hosted Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index e806f123..422070b8 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -362,16 +362,16 @@ jobs: base-image: "ubuntu:22.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" - # - build-type: 'cublas' - # cuda-major-version: "12" - # cuda-minor-version: "0" - # platforms: 'linux/arm64' - # tag-latest: 'false' - # tag-suffix: '-nvidia-l4t-arm64-core' - # latest-image: 'latest-nvidia-l4t-arm64-core' - # ffmpeg: 'true' - # image-type: 'core' - # base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" - # runs-on: 'arc-runner-set' - # makeflags: "--jobs=4 --output-sync=target" - # skip-drivers: 'true' \ No newline at end of file + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/arm64' + tag-latest: 'false' + tag-suffix: '-nvidia-l4t-arm64-core' + latest-image: 'latest-nvidia-l4t-arm64-core' + ffmpeg: 'true' + image-type: 'core' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + runs-on: 'self-hosted' + makeflags: "--jobs=4 --output-sync=target" + skip-drivers: 'true' From baee4f7bd5021591c684849c1b645a75a6eaade0 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 3 Jan 2025 19:23:05 +0100 Subject: [PATCH 139/849] ci: split jobs Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 422070b8..16419bc7 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -362,6 +362,33 @@ jobs: base-image: "ubuntu:22.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" + parallel-builds: + uses: ./.github/workflows/image_build.yml + with: + tag-latest: ${{ matrix.tag-latest }} + tag-suffix: ${{ matrix.tag-suffix }} + ffmpeg: ${{ matrix.ffmpeg }} + image-type: ${{ matrix.image-type }} + build-type: ${{ matrix.build-type }} + cuda-major-version: ${{ matrix.cuda-major-version }} + cuda-minor-version: ${{ matrix.cuda-minor-version }} + platforms: ${{ matrix.platforms }} + runs-on: ${{ matrix.runs-on }} + aio: ${{ matrix.aio }} + base-image: ${{ matrix.base-image }} + grpc-base-image: ${{ matrix.grpc-base-image }} + makeflags: ${{ matrix.makeflags }} + latest-image: ${{ matrix.latest-image }} + latest-image-aio: ${{ matrix.latest-image-aio }} + skip-drivers: ${{ matrix.skip-drivers }} + secrets: + dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + strategy: + matrix: + include: - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" From 9bcfda171b4ddd5eab8c9b864529654040f89df6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 3 Jan 2025 20:48:23 +0100 Subject: [PATCH 140/849] ci: lower concurrent jobs Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 16419bc7..6b06b4b6 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -400,5 +400,5 @@ jobs: image-type: 'core' base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" runs-on: 'self-hosted' - makeflags: "--jobs=4 --output-sync=target" + makeflags: "--jobs=1 --output-sync=target" skip-drivers: 'true' From 1006e8a2ede2d61502273ec4628a2ce6c1cec2f3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 3 Jan 2025 21:58:04 +0100 Subject: [PATCH 141/849] ci: disable arm jobs Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 80 ++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 6b06b4b6..68727ebe 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -362,43 +362,43 @@ jobs: base-image: "ubuntu:22.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" - parallel-builds: - uses: ./.github/workflows/image_build.yml - with: - tag-latest: ${{ matrix.tag-latest }} - tag-suffix: ${{ matrix.tag-suffix }} - ffmpeg: ${{ matrix.ffmpeg }} - image-type: ${{ matrix.image-type }} - build-type: ${{ matrix.build-type }} - cuda-major-version: ${{ matrix.cuda-major-version }} - cuda-minor-version: ${{ matrix.cuda-minor-version }} - platforms: ${{ matrix.platforms }} - runs-on: ${{ matrix.runs-on }} - aio: ${{ matrix.aio }} - base-image: ${{ matrix.base-image }} - grpc-base-image: ${{ matrix.grpc-base-image }} - makeflags: ${{ matrix.makeflags }} - latest-image: ${{ matrix.latest-image }} - latest-image-aio: ${{ matrix.latest-image-aio }} - skip-drivers: ${{ matrix.skip-drivers }} - secrets: - dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} - dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} - quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} - quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} - strategy: - matrix: - include: - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/arm64' - tag-latest: 'false' - tag-suffix: '-nvidia-l4t-arm64-core' - latest-image: 'latest-nvidia-l4t-arm64-core' - ffmpeg: 'true' - image-type: 'core' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" - runs-on: 'self-hosted' - makeflags: "--jobs=1 --output-sync=target" - skip-drivers: 'true' +# parallel-builds: +# uses: ./.github/workflows/image_build.yml +# with: +# tag-latest: ${{ matrix.tag-latest }} +# tag-suffix: ${{ matrix.tag-suffix }} +# ffmpeg: ${{ matrix.ffmpeg }} +# image-type: ${{ matrix.image-type }} +# build-type: ${{ matrix.build-type }} +# cuda-major-version: ${{ matrix.cuda-major-version }} +# cuda-minor-version: ${{ matrix.cuda-minor-version }} +# platforms: ${{ matrix.platforms }} +# runs-on: ${{ matrix.runs-on }} +# aio: ${{ matrix.aio }} +# base-image: ${{ matrix.base-image }} +# grpc-base-image: ${{ matrix.grpc-base-image }} +# makeflags: ${{ matrix.makeflags }} +# latest-image: ${{ matrix.latest-image }} +# latest-image-aio: ${{ matrix.latest-image-aio }} +# skip-drivers: ${{ matrix.skip-drivers }} +# secrets: +# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} +# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} +# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} +# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} +# strategy: +# matrix: +# include: +# - build-type: 'cublas' +# cuda-major-version: "12" +# cuda-minor-version: "0" +# platforms: 'linux/arm64' +# tag-latest: 'false' +# tag-suffix: '-nvidia-l4t-arm64-core' +# latest-image: 'latest-nvidia-l4t-arm64-core' +# ffmpeg: 'true' +# image-type: 'core' +# base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" +# runs-on: 'self-hosted' +# makeflags: "--jobs=4 --output-sync=target" +# skip-drivers: 'true' From c553d73748d2ef66d86a9901b0739f1ffc9ea852 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 4 Jan 2025 09:40:08 +0100 Subject: [PATCH 142/849] chore(deps): bump llama.cpp to 4b0c638b9 (#4532) deps(llama.cpp): bump to 4b0c638b9 Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- backend/cpp/llama/grpc-server.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 11904d1b..fd9c7627 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=2f0ee84b9b02d2a98742308026f060ebdc2423f1 +CPPLLAMA_VERSION?=4b0c638b9a68f577cb2066b638c9f622d91ee661 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 98dd8fde..7632aebc 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -492,8 +492,8 @@ struct llama_server_context } common_init_result common_init = common_init_from_params(params); - model = common_init.model; - ctx = common_init.context; + model = common_init.model.release(); + ctx = common_init.context.release(); if (model == nullptr) { LOG_ERR("unable to load model: %s", params.model.c_str()); From 05841c24354519555b3a0f5db4970b954eb07c52 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 4 Jan 2025 09:44:14 +0100 Subject: [PATCH 143/849] chore(model gallery): add drt-o1-7b (#4533) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index be624e9f..3251397c 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2560,6 +2560,26 @@ - filename: Qwenwify2.5-32B-v4.5.Q4_K_M.gguf sha256: 52670acdc285356c01259f45b1953860f34deb4f80345ca63b60acc19165280c uri: huggingface://mradermacher/Qwenwify2.5-32B-v4.5-GGUF/Qwenwify2.5-32B-v4.5.Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "drt-o1-7b" + urls: + - https://huggingface.co/Krystalan/DRT-o1-7B + - https://huggingface.co/QuantFactory/DRT-o1-7B-GGUF + description: | + In this work, we introduce DRT-o1, an attempt to bring the success of long thought reasoning to neural machine translation (MT). To this end, + + 🌟 We mine English sentences with similes or metaphors from existing literature books, which are suitable for translation via long thought. + 🌟 We propose a designed multi-agent framework with three agents (i.e., a translator, an advisor and an evaluator) to synthesize the MT samples with long thought. There are 22,264 synthesized samples in total. + 🌟 We train DRT-o1-8B, DRT-o1-7B and DRT-o1-14B using Llama-3.1-8B-Instruct, Qwen2.5-7B-Instruct and Qwen2.5-14B-Instruct as backbones. + + Our goal is not to achieve competitive performance with OpenAI’s O1 in neural machine translation (MT). Instead, we explore technical routes to bring the success of long thought to MT. To this end, we introduce DRT-o1, a byproduct of our exploration, and we hope it could facilitate the corresponding research in this direction. + overrides: + parameters: + model: DRT-o1-7B.Q4_K_M.gguf + files: + - filename: DRT-o1-7B.Q4_K_M.gguf + sha256: f592a2523f92ae29630b45fbb501bba7f2fbd99355975cd05fa989faf8d3597d + uri: huggingface://QuantFactory/DRT-o1-7B-GGUF/DRT-o1-7B.Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From ec66f7e3b1246e1e417fe472203bc95aea34515f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 4 Jan 2025 09:45:07 +0100 Subject: [PATCH 144/849] chore(model gallery): add codepy-deepthink-3b (#4534) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 3251397c..f04f4e40 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1039,6 +1039,22 @@ - filename: FastLlama-3.2-1B-Instruct-Q4_K_M.gguf sha256: 3c0303e9560c441a9abdcd0e4c04c47e7f6b21277c1e8c00eed94fc656da0be9 uri: huggingface://bartowski/FastLlama-3.2-1B-Instruct-GGUF/FastLlama-3.2-1B-Instruct-Q4_K_M.gguf +- !!merge <<: *llama32 + name: "codepy-deepthink-3b" + urls: + - https://huggingface.co/prithivMLmods/Codepy-Deepthink-3B + - https://huggingface.co/QuantFactory/Codepy-Deepthink-3B-GGUF + description: | + The Codepy 3B Deep Think Model is a fine-tuned version of the meta-llama/Llama-3.2-3B-Instruct base model, designed for text generation tasks that require deep reasoning, logical structuring, and problem-solving. This model leverages its optimized architecture to provide accurate and contextually relevant outputs for complex queries, making it ideal for applications in education, programming, and creative writing. + + With its robust natural language processing capabilities, Codepy 3B Deep Think excels in generating step-by-step solutions, creative content, and logical analyses. Its architecture integrates advanced understanding of both structured and unstructured data, ensuring precise text generation aligned with user inputs. + overrides: + parameters: + model: Codepy-Deepthink-3B.Q4_K_M.gguf + files: + - filename: Codepy-Deepthink-3B.Q4_K_M.gguf + sha256: 6202976de1a1b23bb09448dd6f188b849e10f3f99366f829415533ea4445e853 + uri: huggingface://QuantFactory/Codepy-Deepthink-3B-GGUF/Codepy-Deepthink-3B.Q4_K_M.gguf - &qwen25 ## Qwen2.5 name: "qwen2.5-14b-instruct" From a8b3b3d6f4f8e82c9e8f45873024da9fe9b60355 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 4 Jan 2025 09:48:34 +0100 Subject: [PATCH 145/849] chore(model gallery): add llama3.1-8b-prm-deepseek-data (#4535) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index f04f4e40..0242b5ff 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -4489,6 +4489,22 @@ - filename: L3.1-Purosani-2-8B.Q4_K_M.gguf sha256: e3eb8038a72b6e85b7a43c7806c32f01208f4644d54bf94d77ecad6286cf609f uri: huggingface://QuantFactory/L3.1-Purosani-2-8B-GGUF/L3.1-Purosani-2-8B.Q4_K_M.gguf +- !!merge <<: *llama31 + name: "llama3.1-8b-prm-deepseek-data" + urls: + - https://huggingface.co/RLHFlow/Llama3.1-8B-PRM-Deepseek-Data + - https://huggingface.co/QuantFactory/Llama3.1-8B-PRM-Deepseek-Data-GGUF + description: | + This is a process-supervised reward (PRM) trained on Mistral-generated data from the project RLHFlow/RLHF-Reward-Modeling + + The model is trained from meta-llama/Llama-3.1-8B-Instruct on RLHFlow/Deepseek-PRM-Data for 1 epochs. We use a global batch size of 32 and a learning rate of 2e-6, where we pack the samples and split them into chunks of 8192 token. See more training details at https://github.com/RLHFlow/Online-RLHF/blob/main/math/llama-3.1-prm.yaml. + overrides: + parameters: + model: Llama3.1-8B-PRM-Deepseek-Data.Q4_K_M.gguf + files: + - filename: Llama3.1-8B-PRM-Deepseek-Data.Q4_K_M.gguf + sha256: 254c7ccc4ea3818fe5f6e3ffd5500c779b02058b98f9ce9a3856e54106d008e3 + uri: huggingface://QuantFactory/Llama3.1-8B-PRM-Deepseek-Data-GGUF/Llama3.1-8B-PRM-Deepseek-Data.Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" From 6c6d840e6b273e43eacd0ce05dbffa9932b6b3f5 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 4 Jan 2025 22:43:08 +0100 Subject: [PATCH 146/849] chore: :arrow_up: Update ggerganov/llama.cpp to `9394bbd484f802ce80d2858033583af3ef700d25` (#4536) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fd9c7627..80458a29 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=4b0c638b9a68f577cb2066b638c9f622d91ee661 +CPPLLAMA_VERSION?=9394bbd484f802ce80d2858033583af3ef700d25 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From a10033e8a4ca429a30d51004c229bb9a5f11e892 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 5 Jan 2025 09:12:12 +0100 Subject: [PATCH 147/849] chore(model gallery): add experimental-lwd-mirau-rp-14b-iq-imatrix (#4539) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 0242b5ff..3126e8fb 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2596,6 +2596,33 @@ - filename: DRT-o1-7B.Q4_K_M.gguf sha256: f592a2523f92ae29630b45fbb501bba7f2fbd99355975cd05fa989faf8d3597d uri: huggingface://QuantFactory/DRT-o1-7B-GGUF/DRT-o1-7B.Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "experimental-lwd-mirau-rp-14b-iq-imatrix" + icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/99YhsFSeaGDYCq7XVcTcq.png + urls: + - https://huggingface.co/AetherArchitectural/lwd-Mirau-RP-14B + - https://huggingface.co/Lewdiculous/experimental-lwd-Mirau-RP-14B-GGUF-IQ-Imatrix + description: | + This model is designed to improve the controllability and consistency of current roleplaying models. We developed a story flow thought chain approach that makes the system prompts combined with the entire user-BOT dialogue read like a first-person narrative told by the BOT. We found this design greatly enhances the model's consistency and expressiveness. + + Additionally, we allow users to play two roles simultaneously: one as the director of the entire plot (see Special Designs), and another as an actor dialoguing with the BOT. Users can be viewed as writers who need to draft outlines and plot summaries, while the BOT helps complete story details, requiring users to have powerful control over the BOT. + + The model's output is divided into two parts: the model's inner monologue (which it believes is invisible to users) and the final response. + + Overall, mirau features: + + Superior character consistency + + Powerful long-context memory capability + + Transparent thinking with hidden thought chains + overrides: + parameters: + model: lwd-Mirau-RP-Q4_K_M-imat.gguf + files: + - filename: lwd-Mirau-RP-Q4_K_M-imat.gguf + sha256: 22ff461e9034b9ebded07b2a9d3d88c2f75359d5c069ebb3ee4e9c6ec5c45cf8 + uri: huggingface://Lewdiculous/experimental-lwd-Mirau-RP-14B-GGUF-IQ-Imatrix/lwd-Mirau-RP-Q4_K_M-imat.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From e845cc040181983fcf79e4c17b786c137bf53eee Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 5 Jan 2025 09:19:05 +0100 Subject: [PATCH 148/849] chore(model gallery): add llama-deepsync-3b (#4540) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 3126e8fb..89a4118f 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1055,6 +1055,20 @@ - filename: Codepy-Deepthink-3B.Q4_K_M.gguf sha256: 6202976de1a1b23bb09448dd6f188b849e10f3f99366f829415533ea4445e853 uri: huggingface://QuantFactory/Codepy-Deepthink-3B-GGUF/Codepy-Deepthink-3B.Q4_K_M.gguf +- !!merge <<: *llama32 + name: "llama-deepsync-3b" + urls: + - https://huggingface.co/prithivMLmods/Llama-Deepsync-3B + - https://huggingface.co/prithivMLmods/Llama-Deepsync-3B-GGUF + description: | + The Llama-Deepsync-3B-GGUF is a fine-tuned version of the Llama-3.2-3B-Instruct base model, designed for text generation tasks that require deep reasoning, logical structuring, and problem-solving. This model leverages its optimized architecture to provide accurate and contextually relevant outputs for complex queries, making it ideal for applications in education, programming, and creative writing. + overrides: + parameters: + model: Llama-Deepsync-3B.Q4_K_M.gguf + files: + - filename: Llama-Deepsync-3B.Q4_K_M.gguf + sha256: f11c4d9b10a732845d8e64dc9badfcbb7d94053bc5fe11f89bb8e99ed557f711 + uri: huggingface://prithivMLmods/Llama-Deepsync-3B-GGUF/Llama-Deepsync-3B.Q4_K_M.gguf - &qwen25 ## Qwen2.5 name: "qwen2.5-14b-instruct" From b5992255ac62927960072096820131222c56e13a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 5 Jan 2025 09:22:00 +0100 Subject: [PATCH 149/849] chore(model gallery): add qwentile2.5-32b-instruct (#4541) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 89a4118f..1a194c9d 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2387,6 +2387,31 @@ - filename: miscii-14b-1225.Q4_K_M.gguf sha256: f21fe73450be394055aeb87b7619e98a09e5c190b48f145bdebef4e12df871fe uri: huggingface://mradermacher/miscii-14b-1225-GGUF/miscii-14b-1225.Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "qwentile2.5-32b-instruct" + icon: https://cdn-uploads.huggingface.co/production/uploads/65b19c1b098c85365af5a83e/sF7RDZA7lFYOmGy4bGy1s.png + urls: + - https://huggingface.co/maldv/Qwentile2.5-32B-Instruct + - https://huggingface.co/bartowski/Qwentile2.5-32B-Instruct-GGUF + description: | + Qwentile 2.5 32B Instruct is a normalized denoised fourier interpolation of the following models: + - { "model": "AiCloser/Qwen2.5-32B-AGI", "base": "Qwen/Qwen2.5-32B", "alpha": 0.3 } + - { "model": "EVA-UNIT-01/EVA-Qwen2.5-32B-v0.2", "base": "Qwen/Qwen2.5-32B", "alpha": 0.7 } + - { "model": "fblgit/TheBeagle-v2beta-32B-MGS", "base": "Qwen/Qwen2.5-32B", "alpha": 0.6 } + - { "model": "huihui-ai/Qwen2.5-32B-Instruct-abliterated", "base": "Qwen/Qwen2.5-32B-Instruct", "alpha": 1.0 } + - { "model": "huihui-ai/QwQ-32B-Preview-abliterated", "base": "Qwen/Qwen2.5-32B", "alpha": 1.0 } + - { "model": "Qwen/QwQ-32B-Preview", "base": "Qwen/Qwen2.5-32B", "alpha": 0.8, "is_input": true } + - { "model": "rombodawg/Rombos-LLM-V2.5-Qwen-32b", "base": "Qwen/Qwen2.5-32B", "alpha": 1.0, "is_output": true } + - { "model": "nbeerbower/Qwen2.5-Gutenberg-Doppel-32B", "base": "Qwen/Qwen2.5-32B-Instruct", "alpha": 0.4 } + I started my experiment because of QwQ is a really nifty model, but it was giving me problems with xml output - which is what I use for my thought tokens. So, I thought... lets just merge it in! + The first model worked pretty well, but I got a sense that the balances could be tweaked. Why not throw in some other models as well for fun and see if I can't run out of disk space in the process? + overrides: + parameters: + model: Qwentile2.5-32B-Instruct-Q4_K_M.gguf + files: + - filename: Qwentile2.5-32B-Instruct-Q4_K_M.gguf + sha256: e476d6e3c15c78fc3f986d7ae8fa35c16116843827f2e6243c05767cef2f3615 + uri: huggingface://bartowski/Qwentile2.5-32B-Instruct-GGUF/Qwentile2.5-32B-Instruct-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From 0d7550ad5451c4ca1ccc2794d1d5e598a830208e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 5 Jan 2025 15:01:49 +0100 Subject: [PATCH 150/849] chore(deps): bump grpcio to 1.69.0 (#4543) Signed-off-by: Ettore Di Giacinto --- backend/python/autogptq/requirements.txt | 2 +- backend/python/bark/requirements.txt | 2 +- backend/python/common/template/requirements.txt | 2 +- backend/python/coqui/requirements.txt | 2 +- backend/python/diffusers/requirements.txt | 2 +- backend/python/exllama2/requirements.txt | 2 +- backend/python/mamba/requirements.txt | 2 +- backend/python/openvoice/requirements-intel.txt | 2 +- backend/python/openvoice/requirements.txt | 2 +- backend/python/parler-tts/requirements.txt | 2 +- backend/python/rerankers/requirements.txt | 2 +- backend/python/sentencetransformers/requirements.txt | 2 +- backend/python/transformers-musicgen/requirements.txt | 2 +- backend/python/transformers/requirements.txt | 2 +- backend/python/vall-e-x/requirements.txt | 2 +- backend/python/vllm/requirements.txt | 2 +- 16 files changed, 16 insertions(+), 16 deletions(-) diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt index 22408f47..c857a867 100644 --- a/backend/python/autogptq/requirements.txt +++ b/backend/python/autogptq/requirements.txt @@ -1,6 +1,6 @@ accelerate auto-gptq==0.7.1 -grpcio==1.68.1 +grpcio==1.69.0 protobuf certifi transformers \ No newline at end of file diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt index 3fca1de5..81c1273d 100644 --- a/backend/python/bark/requirements.txt +++ b/backend/python/bark/requirements.txt @@ -1,4 +1,4 @@ bark==0.1.5 -grpcio==1.68.1 +grpcio==1.69.0 protobuf certifi \ No newline at end of file diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt index 893dc812..0f43df10 100644 --- a/backend/python/common/template/requirements.txt +++ b/backend/python/common/template/requirements.txt @@ -1,3 +1,3 @@ -grpcio==1.68.1 +grpcio==1.69.0 protobuf grpcio-tools \ No newline at end of file diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt index 57638588..76c9ba4b 100644 --- a/backend/python/coqui/requirements.txt +++ b/backend/python/coqui/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.68.1 +grpcio==1.69.0 protobuf certifi packaging==24.1 \ No newline at end of file diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt index 71832ead..d49155ed 100644 --- a/backend/python/diffusers/requirements.txt +++ b/backend/python/diffusers/requirements.txt @@ -1,5 +1,5 @@ setuptools -grpcio==1.68.1 +grpcio==1.69.0 pillow protobuf certifi diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt index 408eb318..77464406 100644 --- a/backend/python/exllama2/requirements.txt +++ b/backend/python/exllama2/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.68.1 +grpcio==1.69.0 protobuf certifi wheel diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt index 8e4eabf1..afc8b2a9 100644 --- a/backend/python/mamba/requirements.txt +++ b/backend/python/mamba/requirements.txt @@ -1,3 +1,3 @@ -grpcio==1.68.1 +grpcio==1.69.0 protobuf certifi \ No newline at end of file diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt index 43fad1ad..39b2b8b0 100644 --- a/backend/python/openvoice/requirements-intel.txt +++ b/backend/python/openvoice/requirements-intel.txt @@ -4,7 +4,7 @@ torch==2.3.1+cxx11.abi torchaudio==2.3.1+cxx11.abi oneccl_bind_pt==2.3.100+xpu optimum[openvino] -grpcio==1.68.1 +grpcio==1.69.0 protobuf librosa==0.9.1 faster-whisper==0.9.0 diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt index a105da94..62b886bb 100644 --- a/backend/python/openvoice/requirements.txt +++ b/backend/python/openvoice/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.68.1 +grpcio==1.69.0 protobuf librosa faster-whisper diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt index faf4ea3d..e6ba016b 100644 --- a/backend/python/parler-tts/requirements.txt +++ b/backend/python/parler-tts/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.68.1 +grpcio==1.69.0 certifi llvmlite==0.43.0 setuptools \ No newline at end of file diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt index 8e4eabf1..afc8b2a9 100644 --- a/backend/python/rerankers/requirements.txt +++ b/backend/python/rerankers/requirements.txt @@ -1,3 +1,3 @@ -grpcio==1.68.1 +grpcio==1.69.0 protobuf certifi \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt index b9dacf9b..6e03c63f 100644 --- a/backend/python/sentencetransformers/requirements.txt +++ b/backend/python/sentencetransformers/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.68.1 +grpcio==1.69.0 protobuf certifi datasets diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt index 2e46b08f..f58e1e80 100644 --- a/backend/python/transformers-musicgen/requirements.txt +++ b/backend/python/transformers-musicgen/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.68.1 +grpcio==1.69.0 protobuf scipy==1.14.0 certifi \ No newline at end of file diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt index d981fd99..a1eea776 100644 --- a/backend/python/transformers/requirements.txt +++ b/backend/python/transformers/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.68.1 +grpcio==1.69.0 protobuf certifi setuptools \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt index d981fd99..a1eea776 100644 --- a/backend/python/vall-e-x/requirements.txt +++ b/backend/python/vall-e-x/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.68.1 +grpcio==1.69.0 protobuf certifi setuptools \ No newline at end of file diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt index d981fd99..a1eea776 100644 --- a/backend/python/vllm/requirements.txt +++ b/backend/python/vllm/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.68.1 +grpcio==1.69.0 protobuf certifi setuptools \ No newline at end of file From e55a1bed5993f8ed91939ba922cfef1494ff479f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 5 Jan 2025 22:43:06 +0100 Subject: [PATCH 151/849] chore: :arrow_up: Update ggerganov/llama.cpp to `b56f079e28fda692f11a8b59200ceb815b05d419` (#4544) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 80458a29..2963a32c 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=9394bbd484f802ce80d2858033583af3ef700d25 +CPPLLAMA_VERSION?=b56f079e28fda692f11a8b59200ceb815b05d419 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 78b34505ab7cf3b543edd8da053e33973a7ebcf5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 6 Jan 2025 10:08:35 +0100 Subject: [PATCH 152/849] chore(model gallery): add 32b-qwen2.5-kunou-v1 (#4545) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 1a194c9d..27f45850 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2662,6 +2662,26 @@ - filename: lwd-Mirau-RP-Q4_K_M-imat.gguf sha256: 22ff461e9034b9ebded07b2a9d3d88c2f75359d5c069ebb3ee4e9c6ec5c45cf8 uri: huggingface://Lewdiculous/experimental-lwd-Mirau-RP-14B-GGUF-IQ-Imatrix/lwd-Mirau-RP-Q4_K_M-imat.gguf +- !!merge <<: *qwen25 + name: "32b-qwen2.5-kunou-v1" + icon: https://huggingface.co/Sao10K/72B-Qwen2.5-Kunou-v1/resolve/main/knn.png + urls: + - https://huggingface.co/Sao10K/32B-Qwen2.5-Kunou-v1 + - https://huggingface.co/bartowski/32B-Qwen2.5-Kunou-v1-GGUF + description: | + I do not really have anything planned for this model other than it being a generalist, and Roleplay Model? It was just something made and planned in minutes. + Same with the 14B and 72B version. + Kunou's the name of an OC I worked on for a couple of years, for a... fanfic. mmm... + A kind-of successor to L3-70B-Euryale-v2.2 in all but name? I'm keeping Stheno/Euryale lineage to Llama series for now. + I had a version made on top of Nemotron, a supposed Euryale 2.4 but that flopped hard, it was not my cup of tea. + This version is basically a better, more cleaned up Dataset used on Euryale and Stheno. + overrides: + parameters: + model: 32B-Qwen2.5-Kunou-v1-Q4_K_M.gguf + files: + - filename: 32B-Qwen2.5-Kunou-v1-Q4_K_M.gguf + sha256: b8910172b74d03c3463ac301589f54b96e54f61c67531fb6b523ecfe923aaffb + uri: huggingface://bartowski/32B-Qwen2.5-Kunou-v1-GGUF/32B-Qwen2.5-Kunou-v1-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From d43adc020587fa684cb99542c8c37c7a2667377d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 6 Jan 2025 10:42:45 +0100 Subject: [PATCH 153/849] chore(model gallery): add triangulum-10b (#4546) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 27f45850..67138dda 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -9309,6 +9309,29 @@ - filename: Bio-Medical-Llama-3-8B.Q4_K_M.gguf sha256: 672939e0487d02c55734132c25a59f26e4deaac7cd49445a7028f2291139edcc uri: huggingface://QuantFactory/Bio-Medical-Llama-3-8B-GGUF/Bio-Medical-Llama-3-8B.Q4_K_M.gguf +- !!merge <<: *llama3 + name: "triangulum-10b" + icon: https://cdn-uploads.huggingface.co/production/uploads/65bb837dbfb878f46c77de4c/By0OJ1lMvP5ZvVvfEGvz5.png + urls: + - https://huggingface.co/prithivMLmods/Triangulum-10B + - https://huggingface.co/mradermacher/Triangulum-10B-GGUF + description: | + Triangulum 10B is a collection of pretrained and instruction-tuned generative models, designed for multilingual applications. These models are trained using synthetic datasets based on long chains of thought, enabling them to perform complex reasoning tasks effectively. + Key Features + Foundation Model: Built upon LLaMA's autoregressive language model, leveraging an optimized transformer architecture for enhanced performance. + Instruction Tuning: Includes supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align model outputs with human preferences for helpfulness and safety. + Multilingual Support: Designed to handle multiple languages, ensuring broad applicability across diverse linguistic contexts. + Training Approach + Synthetic Datasets: Utilizes long chain-of-thought synthetic data to enhance reasoning capabilities. + Supervised Fine-Tuning (SFT): Aligns the model to specific tasks through curated datasets. + Reinforcement Learning with Human Feedback (RLHF): Ensures the model adheres to human values and safety guidelines through iterative training processes. + overrides: + parameters: + model: Triangulum-10B.Q4_K_M.gguf + files: + - filename: Triangulum-10B.Q4_K_M.gguf + sha256: dd071f99edf6b166044bf229cdeec19419c4c348e3fc3d6587cfcc55e6fb85fa + uri: huggingface://mradermacher/Triangulum-10B-GGUF/Triangulum-10B.Q4_K_M.gguf - &command-R ### START Command-r url: "github:mudler/LocalAI/gallery/command-r.yaml@master" From 2ffa89b8b99984bc5174ce40914d31ed757c863e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 6 Jan 2025 10:43:09 +0100 Subject: [PATCH 154/849] chore(model gallery): add 14b-qwen2.5-kunou-v1 (#4547) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 67138dda..445da93a 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2682,6 +2682,25 @@ - filename: 32B-Qwen2.5-Kunou-v1-Q4_K_M.gguf sha256: b8910172b74d03c3463ac301589f54b96e54f61c67531fb6b523ecfe923aaffb uri: huggingface://bartowski/32B-Qwen2.5-Kunou-v1-GGUF/32B-Qwen2.5-Kunou-v1-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "14b-qwen2.5-kunou-v1" + urls: + - https://huggingface.co/Sao10K/14B-Qwen2.5-Kunou-v1 + - https://huggingface.co/DevQuasar/Sao10K.14B-Qwen2.5-Kunou-v1-GGUF + description: | + I do not really have anything planned for this model other than it being a generalist, and Roleplay Model? It was just something made and planned in minutes. + This is the little sister variant, the small 14B version. + Kunou's the name of an OC I worked on for a couple of years, for a... fanfic. mmm... + + A kind-of successor to my smaller model series. It works pretty nicely I think? + This version is basically a better, more cleaned up Dataset used on Euryale and Stheno. + overrides: + parameters: + model: Sao10K.14B-Qwen2.5-Kunou-v1.Q4_K_M.gguf + files: + - filename: Sao10K.14B-Qwen2.5-Kunou-v1.Q4_K_M.gguf + sha256: 7b7af50076e15c305a2a1bed7ad766dc6deb61eef3c2e6a40d4c94ad45623845 + uri: huggingface://DevQuasar/Sao10K.14B-Qwen2.5-Kunou-v1-GGUF/Sao10K.14B-Qwen2.5-Kunou-v1.Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From 3f0850b58b4173ac3eab8ae7b2b8ae5d362e14da Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 20:20:34 +0000 Subject: [PATCH 155/849] chore(deps): Bump docs/themes/hugo-theme-relearn from `d25f856` to `80e448e` (#4549) chore(deps): Bump docs/themes/hugo-theme-relearn Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `d25f856` to `80e448e`. - [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases) - [Commits](https://github.com/McShelby/hugo-theme-relearn/compare/d25f856477223170b0de0b284252aa54b3e6255b...80e448e5bdaa92c87ee0d0d86f1125c8606ebf5f) --- updated-dependencies: - dependency-name: docs/themes/hugo-theme-relearn dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- docs/themes/hugo-theme-relearn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn index d25f8564..80e448e5 160000 --- a/docs/themes/hugo-theme-relearn +++ b/docs/themes/hugo-theme-relearn @@ -1 +1 @@ -Subproject commit d25f856477223170b0de0b284252aa54b3e6255b +Subproject commit 80e448e5bdaa92c87ee0d0d86f1125c8606ebf5f From bf37eebecb64b4bc5d307a6ad9bb6f659b6a9449 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 7 Jan 2025 10:14:07 +0100 Subject: [PATCH 156/849] chore: :arrow_up: Update ggerganov/llama.cpp to `ecebbd292d741ac084cf248146b2cfb17002aa1d` (#4552) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2963a32c..08d5729d 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=b56f079e28fda692f11a8b59200ceb815b05d419 +CPPLLAMA_VERSION?=ecebbd292d741ac084cf248146b2cfb17002aa1d # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 8cc2d01caad64938dda49685178a9f25c3b2d058 Mon Sep 17 00:00:00 2001 From: Max Goltzsche Date: Tue, 7 Jan 2025 17:18:21 +0100 Subject: [PATCH 157/849] feat(ui): path prefix support via HTTP header (#4497) Makes the web app honour the `X-Forwarded-Prefix` HTTP request header that may be sent by a reverse-proxy in order to inform the app that its public routes contain a path prefix. For instance this allows to serve the webapp via a reverse-proxy/ingress controller under a path prefix/sub path such as e.g. `/localai/` while still being able to use the regular LocalAI routes/paths without prefix when directly connecting to the LocalAI server. Changes: * Add new `StripPathPrefix` middleware to strip the path prefix (provided with the `X-Forwarded-Prefix` HTTP request header) from the request path prior to matching the HTTP route. * Add a `BaseURL` utility function to build the base URL, honouring the `X-Forwarded-Prefix` HTTP request header. * Generate the derived base URL into the HTML (`head.html` template) as `` tag. * Make all webapp-internal URLs (within HTML+JS) relative in order to make the browser resolve them against the `` URL specified within each HTML page's header. * Make font URLs within the CSS files relative to the CSS file. * Generate redirect location URLs using the new `BaseURL` function. * Use the new `BaseURL` function to generate absolute URLs within gallery JSON responses. Closes #3095 TL;DR: The header-based approach allows to move the path prefix configuration concern completely to the reverse-proxy/ingress as opposed to having to align the path prefix configuration between LocalAI, the reverse-proxy and potentially other internal LocalAI clients. The gofiber swagger handler already supports path prefixes this way, see https://github.com/gofiber/swagger/blob/e2d9e9916d8809e8b23c4365f8acfbbd8a71c4cd/swagger.go#L79 Signed-off-by: Max Goltzsche --- core/http/app.go | 2 + core/http/app_test.go | 52 ++++++++ core/http/elements/buttons.go | 6 +- core/http/elements/gallery.go | 2 +- core/http/elements/progressbar.go | 4 +- core/http/endpoints/explorer/dashboard.go | 2 + core/http/endpoints/localai/gallery.go | 6 +- core/http/endpoints/localai/welcome.go | 2 + core/http/explorer.go | 2 + core/http/middleware/auth.go | 5 +- core/http/middleware/strippathprefix.go | 36 ++++++ core/http/middleware/strippathprefix_test.go | 121 ++++++++++++++++++ core/http/render.go | 5 +- core/http/routes/ui.go | 22 +++- core/http/static/assets/font1.css | 10 +- core/http/static/assets/font2.css | 10 +- core/http/static/chat.js | 2 +- core/http/static/image.js | 2 +- core/http/static/talk.js | 6 +- core/http/static/tts.js | 2 +- core/http/utils/baseurl.go | 24 ++++ core/http/utils/baseurl_test.go | 48 +++++++ core/http/views/404.html | 2 +- core/http/views/chat.html | 8 +- core/http/views/explorer.html | 2 +- core/http/views/index.html | 6 +- core/http/views/login.html | 2 + core/http/views/models.html | 18 +-- core/http/views/p2p.html | 10 +- core/http/views/partials/footer.html | 2 +- core/http/views/partials/head.html | 32 ++--- core/http/views/partials/inprogress.html | 6 +- core/http/views/partials/navbar.html | 36 +++--- core/http/views/partials/navbar_explorer.html | 8 +- core/http/views/talk.html | 2 +- core/http/views/text2image.html | 8 +- core/http/views/tts.html | 8 +- 37 files changed, 416 insertions(+), 105 deletions(-) create mode 100644 core/http/middleware/strippathprefix.go create mode 100644 core/http/middleware/strippathprefix_test.go create mode 100644 core/http/utils/baseurl.go create mode 100644 core/http/utils/baseurl_test.go diff --git a/core/http/app.go b/core/http/app.go index a2d8b87a..47d89a10 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -87,6 +87,8 @@ func API(application *application.Application) (*fiber.App, error) { router := fiber.New(fiberCfg) + router.Use(middleware.StripPathPrefix()) + router.Hooks().OnListen(func(listenData fiber.ListenData) error { scheme := "http" if listenData.TLS { diff --git a/core/http/app_test.go b/core/http/app_test.go index 7c57ba21..6bf1806b 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -237,6 +237,31 @@ func postInvalidRequest(url string) (error, int) { return nil, resp.StatusCode } +func getRequest(url string, header http.Header) (error, int, []byte) { + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return err, -1, nil + } + + req.Header = header + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return err, -1, nil + } + + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return err, -1, nil + } + + return nil, resp.StatusCode, body +} + const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml` //go:embed backend-assets/* @@ -345,6 +370,33 @@ var _ = Describe("API test", func() { }) }) + Context("URL routing Tests", func() { + It("Should support reverse-proxy when unauthenticated", func() { + + err, sc, body := getRequest("http://127.0.0.1:9090/myprefix/", http.Header{ + "X-Forwarded-Proto": {"https"}, + "X-Forwarded-Host": {"example.org"}, + "X-Forwarded-Prefix": {"/myprefix/"}, + }) + Expect(err).To(BeNil(), "error") + Expect(sc).To(Equal(401), "status code") + Expect(string(body)).To(ContainSubstring(``), "body") + }) + + It("Should support reverse-proxy when authenticated", func() { + + err, sc, body := getRequest("http://127.0.0.1:9090/myprefix/", http.Header{ + "Authorization": {bearerKey}, + "X-Forwarded-Proto": {"https"}, + "X-Forwarded-Host": {"example.org"}, + "X-Forwarded-Prefix": {"/myprefix/"}, + }) + Expect(err).To(BeNil(), "error") + Expect(sc).To(Equal(200), "status code") + Expect(string(body)).To(ContainSubstring(``), "body") + }) + }) + Context("Applying models", func() { It("applies models from a gallery", func() { diff --git a/core/http/elements/buttons.go b/core/http/elements/buttons.go index 7cfe968f..2364a0b3 100644 --- a/core/http/elements/buttons.go +++ b/core/http/elements/buttons.go @@ -16,7 +16,7 @@ func installButton(galleryName string) elem.Node { "class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", "hx-swap": "outerHTML", // post the Model ID as param - "hx-post": "/browse/install/model/" + galleryName, + "hx-post": "browse/install/model/" + galleryName, }, elem.I( attrs.Props{ @@ -36,7 +36,7 @@ func reInstallButton(galleryName string) elem.Node { "hx-target": "#action-div-" + dropBadChars(galleryName), "hx-swap": "outerHTML", // post the Model ID as param - "hx-post": "/browse/install/model/" + galleryName, + "hx-post": "browse/install/model/" + galleryName, }, elem.I( attrs.Props{ @@ -80,7 +80,7 @@ func deleteButton(galleryID string) elem.Node { "hx-target": "#action-div-" + dropBadChars(galleryID), "hx-swap": "outerHTML", // post the Model ID as param - "hx-post": "/browse/delete/model/" + galleryID, + "hx-post": "browse/delete/model/" + galleryID, }, elem.I( attrs.Props{ diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index c9d7a1cb..5ab68508 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -47,7 +47,7 @@ func searchableElement(text, icon string) elem.Node { // "value": text, //"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2", "href": "#!", - "hx-post": "/browse/search/models", + "hx-post": "browse/search/models", "hx-target": "#search-results", // TODO: this doesn't work // "hx-vals": `{ \"search\": \"` + text + `\" }`, diff --git a/core/http/elements/progressbar.go b/core/http/elements/progressbar.go index c9af98d9..7dc340b2 100644 --- a/core/http/elements/progressbar.go +++ b/core/http/elements/progressbar.go @@ -64,7 +64,7 @@ func StartProgressBar(uid, progress, text string) string { return elem.Div( attrs.Props{ "hx-trigger": "done", - "hx-get": "/browse/job/" + uid, + "hx-get": "browse/job/" + uid, "hx-swap": "outerHTML", "hx-target": "this", }, @@ -77,7 +77,7 @@ func StartProgressBar(uid, progress, text string) string { }, elem.Text(bluemonday.StrictPolicy().Sanitize(text)), //Perhaps overly defensive elem.Div(attrs.Props{ - "hx-get": "/browse/job/progress/" + uid, + "hx-get": "browse/job/progress/" + uid, "hx-trigger": "every 600ms", "hx-target": "this", "hx-swap": "innerHTML", diff --git a/core/http/endpoints/explorer/dashboard.go b/core/http/endpoints/explorer/dashboard.go index 9c731d9a..3c896681 100644 --- a/core/http/endpoints/explorer/dashboard.go +++ b/core/http/endpoints/explorer/dashboard.go @@ -6,6 +6,7 @@ import ( "github.com/gofiber/fiber/v2" "github.com/mudler/LocalAI/core/explorer" + "github.com/mudler/LocalAI/core/http/utils" "github.com/mudler/LocalAI/internal" ) @@ -14,6 +15,7 @@ func Dashboard() func(*fiber.Ctx) error { summary := fiber.Map{ "Title": "LocalAI API - " + internal.PrintableVersion(), "Version": internal.PrintableVersion(), + "BaseURL": utils.BaseURL(c), } if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 { diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go index 23c5d4b8..5b2968f4 100644 --- a/core/http/endpoints/localai/gallery.go +++ b/core/http/endpoints/localai/gallery.go @@ -9,6 +9,7 @@ import ( "github.com/google/uuid" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/http/utils" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/services" "github.com/rs/zerolog/log" @@ -82,7 +83,8 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe Galleries: mgs.galleries, ConfigURL: input.ConfigURL, } - return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()}) + + return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())}) } } @@ -105,7 +107,7 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib return err } - return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()}) + return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())}) } } diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go index a1476886..57cf8809 100644 --- a/core/http/endpoints/localai/welcome.go +++ b/core/http/endpoints/localai/welcome.go @@ -4,6 +4,7 @@ import ( "github.com/gofiber/fiber/v2" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/http/utils" "github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/internal" @@ -32,6 +33,7 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig, summary := fiber.Map{ "Title": "LocalAI API - " + internal.PrintableVersion(), "Version": internal.PrintableVersion(), + "BaseURL": utils.BaseURL(c), "Models": modelsWithoutConfig, "ModelsConfig": backendConfigs, "GalleryConfig": galleryConfigs, diff --git a/core/http/explorer.go b/core/http/explorer.go index bdcb93b1..36609add 100644 --- a/core/http/explorer.go +++ b/core/http/explorer.go @@ -7,6 +7,7 @@ import ( "github.com/gofiber/fiber/v2/middleware/favicon" "github.com/gofiber/fiber/v2/middleware/filesystem" "github.com/mudler/LocalAI/core/explorer" + "github.com/mudler/LocalAI/core/http/middleware" "github.com/mudler/LocalAI/core/http/routes" ) @@ -22,6 +23,7 @@ func Explorer(db *explorer.Database) *fiber.App { app := fiber.New(fiberCfg) + app.Use(middleware.StripPathPrefix()) routes.RegisterExplorerRoutes(app, db) httpFS := http.FS(embedDirStatic) diff --git a/core/http/middleware/auth.go b/core/http/middleware/auth.go index 18e7bc3c..23141d4c 100644 --- a/core/http/middleware/auth.go +++ b/core/http/middleware/auth.go @@ -8,6 +8,7 @@ import ( "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/keyauth" "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/http/utils" ) // This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware @@ -39,7 +40,9 @@ func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.Er if applicationConfig.OpaqueErrors { return ctx.SendStatus(401) } - return ctx.Status(401).Render("views/login", nil) + return ctx.Status(401).Render("views/login", fiber.Map{ + "BaseURL": utils.BaseURL(ctx), + }) } if applicationConfig.OpaqueErrors { return ctx.SendStatus(500) diff --git a/core/http/middleware/strippathprefix.go b/core/http/middleware/strippathprefix.go new file mode 100644 index 00000000..5c45d55d --- /dev/null +++ b/core/http/middleware/strippathprefix.go @@ -0,0 +1,36 @@ +package middleware + +import ( + "strings" + + "github.com/gofiber/fiber/v2" +) + +// StripPathPrefix returns a middleware that strips a path prefix from the request path. +// The path prefix is obtained from the X-Forwarded-Prefix HTTP request header. +func StripPathPrefix() fiber.Handler { + return func(c *fiber.Ctx) error { + for _, prefix := range c.GetReqHeaders()["X-Forwarded-Prefix"] { + if prefix != "" { + path := c.Path() + pos := len(prefix) + + if prefix[pos-1] == '/' { + pos-- + } else { + prefix += "/" + } + + if strings.HasPrefix(path, prefix) { + c.Path(path[pos:]) + break + } else if prefix[:pos] == path { + c.Redirect(prefix) + return nil + } + } + } + + return c.Next() + } +} diff --git a/core/http/middleware/strippathprefix_test.go b/core/http/middleware/strippathprefix_test.go new file mode 100644 index 00000000..529f815f --- /dev/null +++ b/core/http/middleware/strippathprefix_test.go @@ -0,0 +1,121 @@ +package middleware + +import ( + "net/http/httptest" + "testing" + + "github.com/gofiber/fiber/v2" + "github.com/stretchr/testify/require" +) + +func TestStripPathPrefix(t *testing.T) { + var actualPath string + + app := fiber.New() + + app.Use(StripPathPrefix()) + + app.Get("/hello/world", func(c *fiber.Ctx) error { + actualPath = c.Path() + return nil + }) + + app.Get("/", func(c *fiber.Ctx) error { + actualPath = c.Path() + return nil + }) + + for _, tc := range []struct { + name string + path string + prefixHeader []string + expectStatus int + expectPath string + }{ + { + name: "without prefix and header", + path: "/hello/world", + expectStatus: 200, + expectPath: "/hello/world", + }, + { + name: "without prefix and headers on root path", + path: "/", + expectStatus: 200, + expectPath: "/", + }, + { + name: "without prefix but header", + path: "/hello/world", + prefixHeader: []string{"/otherprefix/"}, + expectStatus: 200, + expectPath: "/hello/world", + }, + { + name: "with prefix but non-matching header", + path: "/prefix/hello/world", + prefixHeader: []string{"/otherprefix/"}, + expectStatus: 404, + }, + { + name: "with prefix and matching header", + path: "/myprefix/hello/world", + prefixHeader: []string{"/myprefix/"}, + expectStatus: 200, + expectPath: "/hello/world", + }, + { + name: "with prefix and 1st header matching", + path: "/myprefix/hello/world", + prefixHeader: []string{"/myprefix/", "/otherprefix/"}, + expectStatus: 200, + expectPath: "/hello/world", + }, + { + name: "with prefix and 2nd header matching", + path: "/myprefix/hello/world", + prefixHeader: []string{"/otherprefix/", "/myprefix/"}, + expectStatus: 200, + expectPath: "/hello/world", + }, + { + name: "with prefix and header not ending with slash", + path: "/myprefix/hello/world", + prefixHeader: []string{"/myprefix"}, + expectStatus: 200, + expectPath: "/hello/world", + }, + { + name: "with prefix and non-matching header not ending with slash", + path: "/myprefix-suffix/hello/world", + prefixHeader: []string{"/myprefix"}, + expectStatus: 404, + }, + { + name: "redirect when prefix does not end with a slash", + path: "/myprefix", + prefixHeader: []string{"/myprefix"}, + expectStatus: 302, + expectPath: "/myprefix/", + }, + } { + t.Run(tc.name, func(t *testing.T) { + actualPath = "" + req := httptest.NewRequest("GET", tc.path, nil) + if tc.prefixHeader != nil { + req.Header["X-Forwarded-Prefix"] = tc.prefixHeader + } + + resp, err := app.Test(req, -1) + + require.NoError(t, err) + require.Equal(t, tc.expectStatus, resp.StatusCode, "response status code") + + if tc.expectStatus == 200 { + require.Equal(t, tc.expectPath, actualPath, "rewritten path") + } else if tc.expectStatus == 302 { + require.Equal(t, tc.expectPath, resp.Header.Get("Location"), "redirect location") + } + }) + } +} diff --git a/core/http/render.go b/core/http/render.go index 205f7ca3..2f889f57 100644 --- a/core/http/render.go +++ b/core/http/render.go @@ -10,6 +10,7 @@ import ( "github.com/gofiber/fiber/v2" fiberhtml "github.com/gofiber/template/html/v2" "github.com/microcosm-cc/bluemonday" + "github.com/mudler/LocalAI/core/http/utils" "github.com/mudler/LocalAI/core/schema" "github.com/russross/blackfriday" ) @@ -26,7 +27,9 @@ func notFoundHandler(c *fiber.Ctx) error { }) } else { // The client expects an HTML response - return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{}) + return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{ + "BaseURL": utils.BaseURL(c), + }) } } diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index 6ea38f35..92d20544 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -6,20 +6,21 @@ import ( "sort" "strings" - "github.com/microcosm-cc/bluemonday" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/http/elements" "github.com/mudler/LocalAI/core/http/endpoints/localai" + "github.com/mudler/LocalAI/core/http/utils" "github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/internal" "github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/xsync" - "github.com/rs/zerolog/log" "github.com/gofiber/fiber/v2" "github.com/google/uuid" + "github.com/microcosm-cc/bluemonday" + "github.com/rs/zerolog/log" ) type modelOpCache struct { @@ -91,6 +92,7 @@ func RegisterUIRoutes(app *fiber.App, app.Get("/p2p", func(c *fiber.Ctx) error { summary := fiber.Map{ "Title": "LocalAI - P2P dashboard", + "BaseURL": utils.BaseURL(c), "Version": internal.PrintableVersion(), //"Nodes": p2p.GetAvailableNodes(""), //"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID), @@ -149,6 +151,7 @@ func RegisterUIRoutes(app *fiber.App, summary := fiber.Map{ "Title": "LocalAI - Models", + "BaseURL": utils.BaseURL(c), "Version": internal.PrintableVersion(), "Models": template.HTML(elements.ListModels(models, processingModels, galleryService)), "Repositories": appConfig.Galleries, @@ -308,6 +311,7 @@ func RegisterUIRoutes(app *fiber.App, summary := fiber.Map{ "Title": "LocalAI - Chat with " + c.Params("model"), + "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, "Model": c.Params("model"), "Version": internal.PrintableVersion(), @@ -323,11 +327,12 @@ func RegisterUIRoutes(app *fiber.App, if len(backendConfigs) == 0 { // If no model is available redirect to the index which suggests how to install models - return c.Redirect("/") + return c.Redirect(utils.BaseURL(c)) } summary := fiber.Map{ "Title": "LocalAI - Talk", + "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, "Model": backendConfigs[0], "IsP2PEnabled": p2p.IsP2PEnabled(), @@ -344,11 +349,12 @@ func RegisterUIRoutes(app *fiber.App, if len(backendConfigs) == 0 { // If no model is available redirect to the index which suggests how to install models - return c.Redirect("/") + return c.Redirect(utils.BaseURL(c)) } summary := fiber.Map{ "Title": "LocalAI - Chat with " + backendConfigs[0], + "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, "Model": backendConfigs[0], "Version": internal.PrintableVersion(), @@ -364,6 +370,7 @@ func RegisterUIRoutes(app *fiber.App, summary := fiber.Map{ "Title": "LocalAI - Generate images with " + c.Params("model"), + "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, "Model": c.Params("model"), "Version": internal.PrintableVersion(), @@ -380,11 +387,12 @@ func RegisterUIRoutes(app *fiber.App, if len(backendConfigs) == 0 { // If no model is available redirect to the index which suggests how to install models - return c.Redirect("/") + return c.Redirect(utils.BaseURL(c)) } summary := fiber.Map{ "Title": "LocalAI - Generate images with " + backendConfigs[0].Name, + "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, "Model": backendConfigs[0].Name, "Version": internal.PrintableVersion(), @@ -400,6 +408,7 @@ func RegisterUIRoutes(app *fiber.App, summary := fiber.Map{ "Title": "LocalAI - Generate images with " + c.Params("model"), + "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, "Model": c.Params("model"), "Version": internal.PrintableVersion(), @@ -416,11 +425,12 @@ func RegisterUIRoutes(app *fiber.App, if len(backendConfigs) == 0 { // If no model is available redirect to the index which suggests how to install models - return c.Redirect("/") + return c.Redirect(utils.BaseURL(c)) } summary := fiber.Map{ "Title": "LocalAI - Generate audio with " + backendConfigs[0].Name, + "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, "Model": backendConfigs[0].Name, "IsP2PEnabled": p2p.IsP2PEnabled(), diff --git a/core/http/static/assets/font1.css b/core/http/static/assets/font1.css index f46cc3ff..c640d54f 100644 --- a/core/http/static/assets/font1.css +++ b/core/http/static/assets/font1.css @@ -7,33 +7,33 @@ https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wg font-style: normal; font-weight: 400; font-display: swap; - src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf) format('truetype'); + src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf) format('truetype'); } @font-face { font-family: 'Inter'; font-style: normal; font-weight: 600; font-display: swap; - src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf) format('truetype'); + src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf) format('truetype'); } @font-face { font-family: 'Inter'; font-style: normal; font-weight: 700; font-display: swap; - src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf) format('truetype'); + src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf) format('truetype'); } @font-face { font-family: 'Roboto'; font-style: normal; font-weight: 400; font-display: swap; - src: url(/static/assets/KFOmCnqEu92Fr1Me5Q.ttf) format('truetype'); + src: url(./KFOmCnqEu92Fr1Me5Q.ttf) format('truetype'); } @font-face { font-family: 'Roboto'; font-style: normal; font-weight: 500; font-display: swap; - src: url(/static/assets/KFOlCnqEu92Fr1MmEU9vAw.ttf) format('truetype'); + src: url(./KFOlCnqEu92Fr1MmEU9vAw.ttf) format('truetype'); } diff --git a/core/http/static/assets/font2.css b/core/http/static/assets/font2.css index f2f47e74..387b61d9 100644 --- a/core/http/static/assets/font2.css +++ b/core/http/static/assets/font2.css @@ -7,33 +7,33 @@ https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap font-style: normal; font-weight: 300; font-display: swap; - src: url(/static/assets//KFOlCnqEu92Fr1MmSU5fBBc9.ttf) format('truetype'); + src: url(./KFOlCnqEu92Fr1MmSU5fBBc9.ttf) format('truetype'); } @font-face { font-family: 'Roboto'; font-style: normal; font-weight: 400; font-display: swap; - src: url(/static/assets//KFOmCnqEu92Fr1Mu4mxP.ttf) format('truetype'); + src: url(./KFOmCnqEu92Fr1Mu4mxP.ttf) format('truetype'); } @font-face { font-family: 'Roboto'; font-style: normal; font-weight: 500; font-display: swap; - src: url(/static/assets//KFOlCnqEu92Fr1MmEU9fBBc9.ttf) format('truetype'); + src: url(./KFOlCnqEu92Fr1MmEU9fBBc9.ttf) format('truetype'); } @font-face { font-family: 'Roboto'; font-style: normal; font-weight: 700; font-display: swap; - src: url(/static/assets//KFOlCnqEu92Fr1MmWUlfBBc9.ttf) format('truetype'); + src: url(./KFOlCnqEu92Fr1MmWUlfBBc9.ttf) format('truetype'); } @font-face { font-family: 'Roboto'; font-style: normal; font-weight: 900; font-display: swap; - src: url(/static/assets//KFOlCnqEu92Fr1MmYUtfBBc9.ttf) format('truetype'); + src: url(./KFOlCnqEu92Fr1MmYUtfBBc9.ttf) format('truetype'); } diff --git a/core/http/static/chat.js b/core/http/static/chat.js index ef15f838..67e0bb60 100644 --- a/core/http/static/chat.js +++ b/core/http/static/chat.js @@ -143,7 +143,7 @@ function readInputImage() { // } // Source: https://stackoverflow.com/a/75751803/11386095 - const response = await fetch("/v1/chat/completions", { + const response = await fetch("v1/chat/completions", { method: "POST", headers: { Authorization: `Bearer ${key}`, diff --git a/core/http/static/image.js b/core/http/static/image.js index 315bdda0..079c9dc0 100644 --- a/core/http/static/image.js +++ b/core/http/static/image.js @@ -48,7 +48,7 @@ async function promptDallE(key, input) { document.getElementById("input").disabled = true; const model = document.getElementById("image-model").value; - const response = await fetch("/v1/images/generations", { + const response = await fetch("v1/images/generations", { method: "POST", headers: { Authorization: `Bearer ${key}`, diff --git a/core/http/static/talk.js b/core/http/static/talk.js index 3072da84..ecaa0f0b 100644 --- a/core/http/static/talk.js +++ b/core/http/static/talk.js @@ -122,7 +122,7 @@ async function sendAudioToWhisper(audioBlob) { formData.append('model', getWhisperModel()); API_KEY = localStorage.getItem("key"); - const response = await fetch('/v1/audio/transcriptions', { + const response = await fetch('v1/audio/transcriptions', { method: 'POST', headers: { 'Authorization': `Bearer ${API_KEY}` @@ -139,7 +139,7 @@ async function sendTextToChatGPT(text) { conversationHistory.push({ role: "user", content: text }); API_KEY = localStorage.getItem("key"); - const response = await fetch('/v1/chat/completions', { + const response = await fetch('v1/chat/completions', { method: 'POST', headers: { 'Authorization': `Bearer ${API_KEY}`, @@ -163,7 +163,7 @@ async function sendTextToChatGPT(text) { async function getTextToSpeechAudio(text) { API_KEY = localStorage.getItem("key"); - const response = await fetch('/v1/audio/speech', { + const response = await fetch('v1/audio/speech', { method: 'POST', headers: { diff --git a/core/http/static/tts.js b/core/http/static/tts.js index 7fc74729..daead3a8 100644 --- a/core/http/static/tts.js +++ b/core/http/static/tts.js @@ -19,7 +19,7 @@ async function tts(key, input) { document.getElementById("input").disabled = true; const model = document.getElementById("tts-model").value; - const response = await fetch("/tts", { + const response = await fetch("tts", { method: "POST", headers: { Authorization: `Bearer ${key}`, diff --git a/core/http/utils/baseurl.go b/core/http/utils/baseurl.go new file mode 100644 index 00000000..9fe20f44 --- /dev/null +++ b/core/http/utils/baseurl.go @@ -0,0 +1,24 @@ +package utils + +import ( + "strings" + + "github.com/gofiber/fiber/v2" +) + +// BaseURL returns the base URL for the given HTTP request context. +// It takes into account that the app may be exposed by a reverse-proxy under a different protocol, host and path. +// The returned URL is guaranteed to end with `/`. +// The method should be used in conjunction with the StripPathPrefix middleware. +func BaseURL(c *fiber.Ctx) string { + path := c.Path() + origPath := c.OriginalURL() + + if path != origPath && strings.HasSuffix(origPath, path) { + pathPrefix := origPath[:len(origPath)-len(path)+1] + + return c.BaseURL() + pathPrefix + } + + return c.BaseURL() + "/" +} diff --git a/core/http/utils/baseurl_test.go b/core/http/utils/baseurl_test.go new file mode 100644 index 00000000..1750285c --- /dev/null +++ b/core/http/utils/baseurl_test.go @@ -0,0 +1,48 @@ +package utils + +import ( + "net/http/httptest" + "testing" + + "github.com/gofiber/fiber/v2" + "github.com/stretchr/testify/require" +) + +func TestBaseURL(t *testing.T) { + for _, tc := range []struct { + name string + prefix string + expectURL string + }{ + { + name: "without prefix", + prefix: "/", + expectURL: "http://example.com/", + }, + { + name: "with prefix", + prefix: "/myprefix/", + expectURL: "http://example.com/myprefix/", + }, + } { + t.Run(tc.name, func(t *testing.T) { + app := fiber.New() + actualURL := "" + + app.Get(tc.prefix+"hello/world", func(c *fiber.Ctx) error { + if tc.prefix != "/" { + c.Path("/hello/world") + } + actualURL = BaseURL(c) + return nil + }) + + req := httptest.NewRequest("GET", tc.prefix+"hello/world", nil) + resp, err := app.Test(req, -1) + + require.NoError(t, err) + require.Equal(t, 200, resp.StatusCode, "response status code") + require.Equal(t, tc.expectURL, actualURL, "base URL") + }) + } +} diff --git a/core/http/views/404.html b/core/http/views/404.html index 359d8505..2f5a4386 100644 --- a/core/http/views/404.html +++ b/core/http/views/404.html @@ -12,7 +12,7 @@

Welcome to your LocalAI instance!

- diff --git a/core/http/views/chat.html b/core/http/views/chat.html index 67d40bfd..b0f11281 100644 --- a/core/http/views/chat.html +++ b/core/http/views/chat.html @@ -28,7 +28,7 @@ SOFTWARE. {{template "views/partials/head" .}} - + - -
+ + {{template "views/partials/navbar" .}} -
- -
-
+ +
-

Chat {{ if .Model }} with {{.Model}} {{ end }} - -

- + +
+ + +
+ +
+ + +
- data-twe-ripple-init - data-twe-ripple-color="light" - class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong" + + - - -
- - -
- - -
-
- -
-

- Start chatting with the AI by typing a prompt in the input field below and pressing Enter. - For models that support images, you can upload an image by clicking the paperclip icon. -

-
- +
+ + +
+

+ Start chatting with the AI by typing a prompt in the input field below and pressing Enter. + For models that support images, you can upload an image by clicking the paperclip + icon. +

+
+ +
+
+ + +
+ + + +
+
+ + + + +
+
-
- - - -
-
- - - - -
-
-
+ -
- + \ No newline at end of file diff --git a/core/http/views/partials/head.html b/core/http/views/partials/head.html index 57be385d..66afb22d 100644 --- a/core/http/views/partials/head.html +++ b/core/http/views/partials/head.html @@ -1,36 +1,20 @@ - - - {{.Title}} - - - + + + {{.Title}} + + + - - - - + + + - - - + + + + + - + + - - - - - + \ No newline at end of file From c87870b18e4c1ece8be123dcd69786b0ce985806 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 26 Feb 2025 18:27:18 +0100 Subject: [PATCH 442/849] feat(ui): improve chat interface (#4910) * feat(ui): show more informations in the chat view, minor adjustments to model gallery Signed-off-by: Ettore Di Giacinto * fix(ui): UI improvements Visual improvements and bugfixes including: - disable pagination during search - fix scrolling on new message Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- core/gallery/gallery.go | 2 + core/http/routes/ui.go | 21 ++ core/http/static/chat.js | 7 +- core/http/views/chat.html | 540 +++++++++++++++++++++++------------- core/http/views/models.html | 55 +++- 5 files changed, 421 insertions(+), 204 deletions(-) diff --git a/core/gallery/gallery.go b/core/gallery/gallery.go index a3a1d909..0c540052 100644 --- a/core/gallery/gallery.go +++ b/core/gallery/gallery.go @@ -29,6 +29,8 @@ func InstallModelFromGallery(galleries []config.Gallery, name string, basePath s if err != nil { return err } + config.Description = model.Description + config.License = model.License } else if len(model.ConfigFile) > 0 { // TODO: is this worse than using the override method with a blank cfg yaml? reYamlConfig, err := yaml.Marshal(model.ConfigFile) diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index 40919a83..65d1b09c 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -404,6 +404,15 @@ func RegisterUIRoutes(app *fiber.App, return c.Redirect(utils.BaseURL(c)) } modelThatCanBeUsed := "" + galleryConfigs := map[string]*gallery.Config{} + + for _, m := range backendConfigs { + cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name) + if err != nil { + continue + } + galleryConfigs[m.Name] = cfg + } title := "LocalAI - Chat" @@ -419,6 +428,7 @@ func RegisterUIRoutes(app *fiber.App, "Title": title, "BaseURL": utils.BaseURL(c), "ModelsWithoutConfig": modelsWithoutConfig, + "GalleryConfig": galleryConfigs, "ModelsConfig": backendConfigs, "Model": modelThatCanBeUsed, "Version": internal.PrintableVersion(), @@ -434,10 +444,21 @@ func RegisterUIRoutes(app *fiber.App, backendConfigs := cl.GetAllBackendConfigs() modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY) + galleryConfigs := map[string]*gallery.Config{} + + for _, m := range backendConfigs { + cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name) + if err != nil { + continue + } + galleryConfigs[m.Name] = cfg + } + summary := fiber.Map{ "Title": "LocalAI - Chat with " + c.Params("model"), "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, + "GalleryConfig": galleryConfigs, "ModelsWithoutConfig": modelsWithoutConfig, "Model": c.Params("model"), "Version": internal.PrintableVersion(), diff --git a/core/http/static/chat.js b/core/http/static/chat.js index 67e0bb60..ac1e0ba8 100644 --- a/core/http/static/chat.js +++ b/core/http/static/chat.js @@ -49,7 +49,7 @@ function submitPrompt(event) { document.getElementById("input").value = ""; const key = localStorage.getItem("key"); const systemPrompt = localStorage.getItem("system_prompt"); - + Alpine.nextTick(() => { document.getElementById('messages').scrollIntoView(false); }); promptGPT(systemPrompt, key, input); } @@ -74,7 +74,6 @@ function readInputImage() { // Make the "loader" visible document.getElementById("loader").style.display = "block"; document.getElementById("input").disabled = true; - document.getElementById('messages').scrollIntoView(false) messages = Alpine.store("chat").messages(); @@ -181,8 +180,8 @@ function readInputImage() { const chatStore = Alpine.store("chat"); chatStore.add("assistant", token); // Efficiently scroll into view without triggering multiple reflows - const messages = document.getElementById('messages'); - messages.scrollTop = messages.scrollHeight; + // const messages = document.getElementById('messages'); + // messages.scrollTop = messages.scrollHeight; }; let buffer = ""; diff --git a/core/http/views/chat.html b/core/http/views/chat.html index 71e9b8d6..59414fe4 100644 --- a/core/http/views/chat.html +++ b/core/http/views/chat.html @@ -4,7 +4,7 @@ Part of this page is based on the OpenAI Chatbot example by David Härer: https://github.com/david-haerer/chatapi MIT License Copyright (c) 2023 David Härer - Copyright (c) 2024 Ettore Di Giacinto + Copyright (c) 2024-2025 Ettore Di Giacinto Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -29,205 +29,355 @@ SOFTWARE. {{template "views/partials/head" .}} - - - + {{ $allGalleryConfigs:=.GalleryConfig }} + {{ $model:=.Model}} + {{template "views/partials/navbar" .}} - -
+ +
+ + + + +
+ + +
+ + + +
+ + {{ if $model }} + {{ $galleryConfig:= index $allGalleryConfigs $model}} + + {{ end }} +

+ Chat {{ if .Model }} with {{.Model}} {{ end }} +

+
+
+ + +
+

+ Start chatting with the AI by typing a prompt in the input field below and pressing Enter. + For models that support images, you can upload an image by clicking the paperclip + icon. +

+
+ +
+
+ + + +
+ + + +
+
+ + + + +
+
+
+ + + {{ if $model }} + {{ $galleryConfig:= index $allGalleryConfigs $model}} + {{ if $galleryConfig }} + + {{ end }} + {{ end }} diff --git a/core/http/views/models.html b/core/http/views/models.html index 9d1e8578..c3910bdc 100644 --- a/core/http/views/models.html +++ b/core/http/views/models.html @@ -6,6 +6,7 @@
{{template "views/partials/navbar" .}} + {{ $numModelsPerPage := 21 }}
@@ -20,38 +21,45 @@ class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2" hx-target="#search-results" hx-vals='{"search": "tts"}' - hx-indicator=".htmx-indicator" >TTS + onclick="hidePagination()" + hx-indicator=".htmx-indicator" >TTS + onclick="hidePagination()" + hx-indicator=".htmx-indicator" >Image generation + onclick="hidePagination()" + hx-indicator=".htmx-indicator" >Text generation + onclick="hidePagination()" + hx-indicator=".htmx-indicator" >Multimodal + onclick="hidePagination()" + hx-indicator=".htmx-indicator" >Embeddings + onclick="hidePagination()" + hx-indicator=".htmx-indicator" >Rerankers + onclick="hidePagination()" + hx-indicator=".htmx-indicator" >Audio transcription
@@ -59,6 +67,7 @@ {{ range .AllTags }} {{ end }}
@@ -71,17 +80,21 @@ name="search" placeholder="Begin Typing To Search models..." hx-post="browse/search/models" hx-trigger="input changed delay:500ms, search" - hx-target="#search-results" + hx-target="#search-results" + oninput="hidePagination()" + onchange="hidePagination()" + onsearch="hidePagination()" hx-indicator=".htmx-indicator">
{{.Models}}
+ {{ if gt .AvailableModels $numModelsPerPage }} -
+
- - +
+ {{ end }}
{{template "views/partials/footer" .}}
+ + - + \ No newline at end of file From 054860539a0f343f8e5a0a0f039a4981a3b6a8cd Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 26 Feb 2025 23:43:49 +0100 Subject: [PATCH 443/849] chore: :arrow_up: Update ggml-org/llama.cpp to `a800ae46da2ed7dac236aa6bf2b595da6b6294b5` (#4911) :arrow_up: Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0e8b3753..412e93f9 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ BINARY_NAME=local-ai DETECT_LIBS?=true # llama.cpp versions -CPPLLAMA_VERSION?=d7cfe1ffe0f435d0048a6058d529daf76e072d9c +CPPLLAMA_VERSION?=a800ae46da2ed7dac236aa6bf2b595da6b6294b5 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 1461fd877752ecbd65f4f45a98ffa4c18d4aea94 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 27 Feb 2025 10:02:44 +0100 Subject: [PATCH 444/849] chore(model gallery): add locutusque_thespis-llama-3.1-8b (#4912) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 200449c0..6d1c991f 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -6188,6 +6188,20 @@ - filename: l3.1-8b-rp-ink-q4_k_m.gguf sha256: 0e8d44a92153cda0c6a5d6b0d9af44d4806104b39d3232f9097cfcc384a78152 uri: huggingface://Triangle104/L3.1-8b-RP-Ink-Q4_K_M-GGUF/l3.1-8b-rp-ink-q4_k_m.gguf +- !!merge <<: *llama31 + name: "locutusque_thespis-llama-3.1-8b" + urls: + - https://huggingface.co/Locutusque/Thespis-Llama-3.1-8B + - https://huggingface.co/bartowski/Locutusque_Thespis-Llama-3.1-8B-GGUF + description: | + The Thespis family of language models is designed to enhance roleplaying performance through reasoning inspired by the Theory of Mind. Thespis-Llama-3.1-8B is a fine-tuned version of an abliterated Llama-3.1-8B model, optimized using Group Relative Policy Optimization (GRPO). The model is specifically rewarded for minimizing "slop" and repetition in its outputs, aiming to produce coherent and engaging text that maintains character consistency and avoids low-quality responses. This version represents an initial release; future iterations will incorporate a more rigorous fine-tuning process. + overrides: + parameters: + model: Locutusque_Thespis-Llama-3.1-8B-Q4_K_M.gguf + files: + - filename: Locutusque_Thespis-Llama-3.1-8B-Q4_K_M.gguf + sha256: 94138f3774f496e28c2e76bb6df7a073c6087f8c074216a24b3cbcdc58ec7853 + uri: huggingface://bartowski/Locutusque_Thespis-Llama-3.1-8B-GGUF/Locutusque_Thespis-Llama-3.1-8B-Q4_K_M.gguf - &deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" ## Deepseek name: "deepseek-coder-v2-lite-instruct" From 3bf2e9d065d5b155b0255453935ebafbc6cbf2e9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 27 Feb 2025 10:52:19 +0100 Subject: [PATCH 445/849] fix(ui): not all models have an Icon (#4913) Signed-off-by: Ettore Di Giacinto --- core/http/views/chat.html | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/http/views/chat.html b/core/http/views/chat.html index 59414fe4..b9a40687 100644 --- a/core/http/views/chat.html +++ b/core/http/views/chat.html @@ -94,7 +94,7 @@ SOFTWARE.
- + {{ if $galleryConfig.Icon }}{{end}}

{{ $model }}