From c30ecdd5353c76f024495d54e4ef632e19f5a323 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 31 Dec 2024 22:43:29 +0100 Subject: [PATCH 01/18] chore: :arrow_up: Update ggerganov/llama.cpp to `0827b2c1da299805288abbd556d869318f2b121e` (#4520) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5a35771a..a4f62d3f 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=716bd6dec3e044e5c325386b5b0483392b24cefe +CPPLLAMA_VERSION?=0827b2c1da299805288abbd556d869318f2b121e # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From ae80a2bd2428acf422236497ba8cf446824a414a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 1 Jan 2025 13:26:48 +0100 Subject: [PATCH 02/18] chore(model gallery): add smallthinker-3b-preview (#4521) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 4bb08df5..b407ab9d 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2524,6 +2524,22 @@ - filename: Q2.5-Veltha-14B-0.5-Q4_K_M.gguf sha256: f75b8cbceab555ebcab6fcb3b51d398b7ef79671aa05c21c288edd75c9f217bd uri: huggingface://bartowski/Q2.5-Veltha-14B-0.5-GGUF/Q2.5-Veltha-14B-0.5-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "smallthinker-3b-preview" + urls: + - https://huggingface.co/PowerInfer/SmallThinker-3B-Preview + - https://huggingface.co/bartowski/SmallThinker-3B-Preview-GGUF + description: | + SmallThinker is designed for the following use cases: + Edge Deployment: Its small size makes it ideal for deployment on resource-constrained devices. + Draft Model for QwQ-32B-Preview: SmallThinker can serve as a fast and efficient draft model for the larger QwQ-32B-Preview model. From my test, in llama.cpp we can get 70% speedup (from 40 tokens/s to 70 tokens/s). + overrides: + parameters: + model: SmallThinker-3B-Preview-Q4_K_M.gguf + files: + - filename: SmallThinker-3B-Preview-Q4_K_M.gguf + sha256: ac04f82a09ee6a2748437c3bb774b638a54099dc7d5d6ef7549893fae22ab055 + uri: huggingface://bartowski/SmallThinker-3B-Preview-GGUF/SmallThinker-3B-Preview-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From 1a2a7a57b3974eaefc2ef7a2761da8528d199296 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 1 Jan 2025 13:27:13 +0100 Subject: [PATCH 03/18] chore(model gallery): add mn-12b-mag-mell-r1-iq-arm-imatrix (#4522) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index b407ab9d..137603f1 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -5319,6 +5319,38 @@ - filename: Dans-PersonalityEngine-V1.1.0-12b-Q4_K_M.gguf sha256: a1afb9fddfa3f2847ed710cc374b4f17e63a75f7e10d8871cf83983c2f5415ab uri: huggingface://bartowski/Dans-PersonalityEngine-V1.1.0-12b-GGUF/Dans-PersonalityEngine-V1.1.0-12b-Q4_K_M.gguf +- !!merge <<: *mistral03 + name: "mn-12b-mag-mell-r1-iq-arm-imatrix" + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + icon: "https://i.imgur.com/wjyAaTO.png" + urls: + - https://huggingface.co/inflatebot/MN-12B-Mag-Mell-R1 + - https://huggingface.co/Lewdiculous/MN-12B-Mag-Mell-R1-GGUF-IQ-ARM-Imatrix + description: | + This is a merge of pre-trained language models created using mergekit. Mag Mell is a multi-stage merge, Inspired by hyper-merges like Tiefighter and Umbral Mind. Intended to be a general purpose "Best of Nemo" model for any fictional, creative use case. + 6 models were chosen based on 3 categories; they were then paired up and merged via layer-weighted SLERP to create intermediate "specialists" which are then evaluated in their domain. The specialists were then merged into the base via DARE-TIES, with hyperparameters chosen to reduce interference caused by the overlap of the three domains. The idea with this approach is to extract the best qualities of each component part, and produce models whose task vectors represent more than the sum of their parts. + + The three specialists are as follows: + Hero (RP, kink/trope coverage): Chronos Gold, Sunrose. + Monk (Intelligence, groundedness): Bophades, Wissenschaft. + Deity (Prose, flair): Gutenberg v4, Magnum 2.5 KTO. + I've been dreaming about this merge since Nemo tunes started coming out in earnest. From our testing, Mag Mell demonstrates worldbuilding capabilities unlike any model in its class, comparable to old adventuring models like Tiefighter, and prose that exhibits minimal "slop" (not bad for no finetuning,) frequently devising electrifying metaphors that left us consistently astonished. + + I don't want to toot my own bugle though; I'm really proud of how this came out, but please leave your feedback, good or bad.Special thanks as usual to Toaster for his feedback and Fizz for helping fund compute, as well as the KoboldAI Discord for their resources. The following models were included in the merge: + IntervitensInc/Mistral-Nemo-Base-2407-chatml + nbeerbower/mistral-nemo-bophades-12B + nbeerbower/mistral-nemo-wissenschaft-12B + elinas/Chronos-Gold-12B-1.0 + Fizzarolli/MN-12b-Sunrose + nbeerbower/mistral-nemo-gutenberg-12B-v4 + anthracite-org/magnum-12b-v2.5-kto + overrides: + parameters: + model: MN-12B-Mag-Mell-R1-Q4_K_M-imat.gguf + files: + - filename: MN-12B-Mag-Mell-R1-Q4_K_M-imat.gguf + sha256: ba0c9e64222b35f8c3828b7295e173ee54d83fd2e457ba67f6561a4a6d98481e + uri: huggingface://Lewdiculous/MN-12B-Mag-Mell-R1-GGUF-IQ-ARM-Imatrix/MN-12B-Mag-Mell-R1-Q4_K_M-imat.gguf - &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" From f345f7a7958834123036640891b76c92e8ffc17b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 1 Jan 2025 13:33:39 +0100 Subject: [PATCH 04/18] chore(model gallery): add captain-eris-diogenes_twilight-v0.420-12b (#4523) chore(model gallery): add captain-eris-diogenes_twilight-v0.420-12b-arm-imatrix Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 137603f1..75723250 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -5351,6 +5351,24 @@ - filename: MN-12B-Mag-Mell-R1-Q4_K_M-imat.gguf sha256: ba0c9e64222b35f8c3828b7295e173ee54d83fd2e457ba67f6561a4a6d98481e uri: huggingface://Lewdiculous/MN-12B-Mag-Mell-R1-GGUF-IQ-ARM-Imatrix/MN-12B-Mag-Mell-R1-Q4_K_M-imat.gguf +- !!merge <<: *mistral03 + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + name: "captain-eris-diogenes_twilight-v0.420-12b-arm-imatrix" + icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/n0HUz-yRPkwQzt3dFrjW9.png + urls: + - https://huggingface.co/Nitral-AI/Captain-Eris-Diogenes_Twilight-V0.420-12B + - https://huggingface.co/Lewdiculous/Captain-Eris-Diogenes_Twilight-V0.420-12B-GGUF-ARM-Imatrix + description: | + The following models were included in the merge: + Nitral-AI/Captain-Eris_Twilight-V0.420-12B + Nitral-AI/Diogenes-12B-ChatMLified + overrides: + parameters: + model: Captain-Eris-Diogenes_Twighlight-V0.420-12B-Q4_K_M-imat.gguf + files: + - filename: Captain-Eris-Diogenes_Twighlight-V0.420-12B-Q4_K_M-imat.gguf + sha256: e70b26114108c41e3ca0aefc0c7b8f5f69452ab461ffe7155e6b75ede24ec1b5 + uri: huggingface://Lewdiculous/Captain-Eris-Diogenes_Twilight-V0.420-12B-GGUF-ARM-Imatrix/Captain-Eris-Diogenes_Twighlight-V0.420-12B-Q4_K_M-imat.gguf - &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" From f1082f3c6d8862ea03a1e16cda4cb3d4492c82b8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 1 Jan 2025 14:41:48 +0100 Subject: [PATCH 05/18] chore(model gallery): add violet_twilight-v0.2 (#4524) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 75723250..116baff2 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -5369,6 +5369,22 @@ - filename: Captain-Eris-Diogenes_Twighlight-V0.420-12B-Q4_K_M-imat.gguf sha256: e70b26114108c41e3ca0aefc0c7b8f5f69452ab461ffe7155e6b75ede24ec1b5 uri: huggingface://Lewdiculous/Captain-Eris-Diogenes_Twilight-V0.420-12B-GGUF-ARM-Imatrix/Captain-Eris-Diogenes_Twighlight-V0.420-12B-Q4_K_M-imat.gguf +- !!merge <<: *mistral03 + name: "violet_twilight-v0.2" + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + icon: https://cdn-uploads.huggingface.co/production/uploads/64adfd277b5ff762771e4571/P962FQhRG4I8nbU_DJolY.png + urls: + - https://huggingface.co/Epiculous/Violet_Twilight-v0.2 + - https://huggingface.co/Epiculous/Violet_Twilight-v0.2-GGUF + description: | + Now for something a bit different, Violet_Twilight-v0.2! This model is a SLERP merge of Azure_Dusk-v0.2 and Crimson_Dawn-v0.2! + overrides: + parameters: + model: Violet_Twilight-v0.2.Q4_K_M.gguf + files: + - filename: Violet_Twilight-v0.2.Q4_K_M.gguf + sha256: b63f07cc441146af9c98cd3c3d4390d7c39bfef11c1d168dc7c6244ca2ba6b12 + uri: huggingface://Epiculous/Violet_Twilight-v0.2-GGUF/Violet_Twilight-v0.2.Q4_K_M.gguf - &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" From 3415e6ae740f8434eb943581c753ae2c0fd5a39c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 2 Jan 2025 10:45:52 +0100 Subject: [PATCH 06/18] chore(model gallery): add qwenwify2.5-32b-v4.5 (#4525) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 116baff2..658736f4 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2540,6 +2540,26 @@ - filename: SmallThinker-3B-Preview-Q4_K_M.gguf sha256: ac04f82a09ee6a2748437c3bb774b638a54099dc7d5d6ef7549893fae22ab055 uri: huggingface://bartowski/SmallThinker-3B-Preview-GGUF/SmallThinker-3B-Preview-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "qwenwify2.5-32b-v4.5" + urls: + - https://huggingface.co/Kaoeiri/Qwenwify2.5-32B-v4.5 + - https://huggingface.co/mradermacher/Qwenwify2.5-32B-v4.5-GGUF + description: | + The following models were included in the merge: + Kaoeiri/Qwenwify-32B-v3 + allura-org/Qwen2.5-32b-RP-Ink + Dans-DiscountModels/Qwen2.5-32B-ChatML + Saxo/Linkbricks-Horizon-AI-Japanese-Base-32B + OpenBuddy/openbuddy-qwq-32b-v24.2-200k + Sao10K/32B-Qwen2.5-Kunou-v1 + overrides: + parameters: + model: Qwenwify2.5-32B-v4.5.Q4_K_M.gguf + files: + - filename: Qwenwify2.5-32B-v4.5.Q4_K_M.gguf + sha256: 52670acdc285356c01259f45b1953860f34deb4f80345ca63b60acc19165280c + uri: huggingface://mradermacher/Qwenwify2.5-32B-v4.5-GGUF/Qwenwify2.5-32B-v4.5.Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From 930280ecacd00de948b3678e684d6925410ad526 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 2 Jan 2025 10:46:01 +0100 Subject: [PATCH 07/18] chore(model gallery): add sainemo-remix (#4526) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 658736f4..4a5be373 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -5405,6 +5405,24 @@ - filename: Violet_Twilight-v0.2.Q4_K_M.gguf sha256: b63f07cc441146af9c98cd3c3d4390d7c39bfef11c1d168dc7c6244ca2ba6b12 uri: huggingface://Epiculous/Violet_Twilight-v0.2-GGUF/Violet_Twilight-v0.2.Q4_K_M.gguf +- !!merge <<: *mistral03 + name: "sainemo-remix" + icon: https://huggingface.co/Moraliane/SAINEMO-reMIX/resolve/main/remixwife.webp + urls: + - https://huggingface.co/Moraliane/SAINEMO-reMIX + - https://huggingface.co/QuantFactory/SAINEMO-reMIX-GGUF + description: | + The following models were included in the merge: + elinas_Chronos-Gold-12B-1.0 + Vikhrmodels_Vikhr-Nemo-12B-Instruct-R-21-09-24 + MarinaraSpaghetti_NemoMix-Unleashed-12B + overrides: + parameters: + model: SAINEMO-reMIX.Q4_K_M.gguf + files: + - filename: SAINEMO-reMIX.Q4_K_M.gguf + sha256: 91c81623542df97462d93bed8014af4830940182786948fc395d8958a5add994 + uri: huggingface://QuantFactory/SAINEMO-reMIX-GGUF/SAINEMO-reMIX.Q4_K_M.gguf - &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" From d9facbcee93fdc61521201264a4c1861e3ab9427 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 2 Jan 2025 10:46:11 +0100 Subject: [PATCH 08/18] chore(model gallery): add l3.1-purosani-2-8b (#4527) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 4a5be373..f9e2731b 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -4434,6 +4434,25 @@ - filename: HuatuoGPT-o1-8B-Q4_K_M.gguf sha256: 3e1ef35fc230182d96ae2d6c7436a2e8250c21a4278e798e1aa45790ba82006b uri: huggingface://bartowski/HuatuoGPT-o1-8B-GGUF/HuatuoGPT-o1-8B-Q4_K_M.gguf +- !!merge <<: *llama31 + name: "l3.1-purosani-2-8b" + urls: + - https://huggingface.co/djuna/L3.1-Purosani-2-8B + - https://huggingface.co/QuantFactory/L3.1-Purosani-2-8B-GGUF + description: | + The following models were included in the merge: + hf-100/Llama-3-Spellbound-Instruct-8B-0.3 + arcee-ai/Llama-3.1-SuperNova-Lite + grimjim/Llama-3-Instruct-abliteration-LoRA-8B + THUDM/LongWriter-llama3.1-8b + ResplendentAI/Smarts_Llama3 + djuna/L3.1-Suze-Vume-2-calc + djuna/L3.1-ForStHS + Blackroot/Llama-3-8B-Abomination-LORA + overrides: + parameters: + model: L3.1-Purosani-2-8B.Q4_K_M.gguf + files: + - filename: L3.1-Purosani-2-8B.Q4_K_M.gguf + sha256: e3eb8038a72b6e85b7a43c7806c32f01208f4644d54bf94d77ecad6286cf609f + uri: huggingface://QuantFactory/L3.1-Purosani-2-8B-GGUF/L3.1-Purosani-2-8B.Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" From 3c21c8789a68c7a5baf830dd7b6711c5b9a99ded Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 2 Jan 2025 22:43:37 +0100 Subject: [PATCH 09/18] chore: :arrow_up: Update ggerganov/llama.cpp to `2f0ee84b9b02d2a98742308026f060ebdc2423f1` (#4528) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a4f62d3f..11904d1b 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=0827b2c1da299805288abbd556d869318f2b121e +CPPLLAMA_VERSION?=2f0ee84b9b02d2a98742308026f060ebdc2423f1 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 36e4c0fcf096c353fbadb51894d464538c6cd71a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 3 Jan 2025 09:10:03 +0100 Subject: [PATCH 10/18] chore(model gallery): add nera_noctis-12b (#4530) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index f9e2731b..be624e9f 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -5442,6 +5442,22 @@ - filename: SAINEMO-reMIX.Q4_K_M.gguf sha256: 91c81623542df97462d93bed8014af4830940182786948fc395d8958a5add994 uri: huggingface://QuantFactory/SAINEMO-reMIX-GGUF/SAINEMO-reMIX.Q4_K_M.gguf +- !!merge <<: *mistral03 + name: "nera_noctis-12b" + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/89XJnlNNSsEfBjI1oHCVt.jpeg + urls: + - https://huggingface.co/Nitral-AI/Nera_Noctis-12B + - https://huggingface.co/bartowski/Nera_Noctis-12B-GGUF + description: | + Sometimes, the brightest gems are found in the darkest places. For it is in the shadows where we learn to really see the light. + overrides: + parameters: + model: Nera_Noctis-12B-Q4_K_M.gguf + files: + - filename: Nera_Noctis-12B-Q4_K_M.gguf + sha256: 0662a9a847adde046e6255c15d5a677ebf09ab00841547c8963668d14baf00ff + uri: huggingface://bartowski/Nera_Noctis-12B-GGUF/Nera_Noctis-12B-Q4_K_M.gguf - &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" From 286dc32fe0417d079ab8ac1cc1308e331d7f08b2 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 3 Jan 2025 19:18:18 +0100 Subject: [PATCH 11/18] ci(arm64): try building on self-hosted Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index e806f123..422070b8 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -362,16 +362,16 @@ jobs: base-image: "ubuntu:22.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" - # - build-type: 'cublas' - # cuda-major-version: "12" - # cuda-minor-version: "0" - # platforms: 'linux/arm64' - # tag-latest: 'false' - # tag-suffix: '-nvidia-l4t-arm64-core' - # latest-image: 'latest-nvidia-l4t-arm64-core' - # ffmpeg: 'true' - # image-type: 'core' - # base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" - # runs-on: 'arc-runner-set' - # makeflags: "--jobs=4 --output-sync=target" - # skip-drivers: 'true' \ No newline at end of file + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/arm64' + tag-latest: 'false' + tag-suffix: '-nvidia-l4t-arm64-core' + latest-image: 'latest-nvidia-l4t-arm64-core' + ffmpeg: 'true' + image-type: 'core' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + runs-on: 'self-hosted' + makeflags: "--jobs=4 --output-sync=target" + skip-drivers: 'true' From baee4f7bd5021591c684849c1b645a75a6eaade0 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 3 Jan 2025 19:23:05 +0100 Subject: [PATCH 12/18] ci: split jobs Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 422070b8..16419bc7 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -362,6 +362,33 @@ jobs: base-image: "ubuntu:22.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" + parallel-builds: + uses: ./.github/workflows/image_build.yml + with: + tag-latest: ${{ matrix.tag-latest }} + tag-suffix: ${{ matrix.tag-suffix }} + ffmpeg: ${{ matrix.ffmpeg }} + image-type: ${{ matrix.image-type }} + build-type: ${{ matrix.build-type }} + cuda-major-version: ${{ matrix.cuda-major-version }} + cuda-minor-version: ${{ matrix.cuda-minor-version }} + platforms: ${{ matrix.platforms }} + runs-on: ${{ matrix.runs-on }} + aio: ${{ matrix.aio }} + base-image: ${{ matrix.base-image }} + grpc-base-image: ${{ matrix.grpc-base-image }} + makeflags: ${{ matrix.makeflags }} + latest-image: ${{ matrix.latest-image }} + latest-image-aio: ${{ matrix.latest-image-aio }} + skip-drivers: ${{ matrix.skip-drivers }} + secrets: + dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + strategy: + matrix: + include: - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" From 9bcfda171b4ddd5eab8c9b864529654040f89df6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 3 Jan 2025 20:48:23 +0100 Subject: [PATCH 13/18] ci: lower concurrent jobs Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 16419bc7..6b06b4b6 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -400,5 +400,5 @@ jobs: image-type: 'core' base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" runs-on: 'self-hosted' - makeflags: "--jobs=4 --output-sync=target" + makeflags: "--jobs=1 --output-sync=target" skip-drivers: 'true' From 1006e8a2ede2d61502273ec4628a2ce6c1cec2f3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 3 Jan 2025 21:58:04 +0100 Subject: [PATCH 14/18] ci: disable arm jobs Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 80 ++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 6b06b4b6..68727ebe 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -362,43 +362,43 @@ jobs: base-image: "ubuntu:22.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" - parallel-builds: - uses: ./.github/workflows/image_build.yml - with: - tag-latest: ${{ matrix.tag-latest }} - tag-suffix: ${{ matrix.tag-suffix }} - ffmpeg: ${{ matrix.ffmpeg }} - image-type: ${{ matrix.image-type }} - build-type: ${{ matrix.build-type }} - cuda-major-version: ${{ matrix.cuda-major-version }} - cuda-minor-version: ${{ matrix.cuda-minor-version }} - platforms: ${{ matrix.platforms }} - runs-on: ${{ matrix.runs-on }} - aio: ${{ matrix.aio }} - base-image: ${{ matrix.base-image }} - grpc-base-image: ${{ matrix.grpc-base-image }} - makeflags: ${{ matrix.makeflags }} - latest-image: ${{ matrix.latest-image }} - latest-image-aio: ${{ matrix.latest-image-aio }} - skip-drivers: ${{ matrix.skip-drivers }} - secrets: - dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} - dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} - quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} - quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} - strategy: - matrix: - include: - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "0" - platforms: 'linux/arm64' - tag-latest: 'false' - tag-suffix: '-nvidia-l4t-arm64-core' - latest-image: 'latest-nvidia-l4t-arm64-core' - ffmpeg: 'true' - image-type: 'core' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" - runs-on: 'self-hosted' - makeflags: "--jobs=1 --output-sync=target" - skip-drivers: 'true' +# parallel-builds: +# uses: ./.github/workflows/image_build.yml +# with: +# tag-latest: ${{ matrix.tag-latest }} +# tag-suffix: ${{ matrix.tag-suffix }} +# ffmpeg: ${{ matrix.ffmpeg }} +# image-type: ${{ matrix.image-type }} +# build-type: ${{ matrix.build-type }} +# cuda-major-version: ${{ matrix.cuda-major-version }} +# cuda-minor-version: ${{ matrix.cuda-minor-version }} +# platforms: ${{ matrix.platforms }} +# runs-on: ${{ matrix.runs-on }} +# aio: ${{ matrix.aio }} +# base-image: ${{ matrix.base-image }} +# grpc-base-image: ${{ matrix.grpc-base-image }} +# makeflags: ${{ matrix.makeflags }} +# latest-image: ${{ matrix.latest-image }} +# latest-image-aio: ${{ matrix.latest-image-aio }} +# skip-drivers: ${{ matrix.skip-drivers }} +# secrets: +# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} +# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} +# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} +# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} +# strategy: +# matrix: +# include: +# - build-type: 'cublas' +# cuda-major-version: "12" +# cuda-minor-version: "0" +# platforms: 'linux/arm64' +# tag-latest: 'false' +# tag-suffix: '-nvidia-l4t-arm64-core' +# latest-image: 'latest-nvidia-l4t-arm64-core' +# ffmpeg: 'true' +# image-type: 'core' +# base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" +# runs-on: 'self-hosted' +# makeflags: "--jobs=4 --output-sync=target" +# skip-drivers: 'true' From c553d73748d2ef66d86a9901b0739f1ffc9ea852 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 4 Jan 2025 09:40:08 +0100 Subject: [PATCH 15/18] chore(deps): bump llama.cpp to 4b0c638b9 (#4532) deps(llama.cpp): bump to 4b0c638b9 Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- backend/cpp/llama/grpc-server.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 11904d1b..fd9c7627 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=2f0ee84b9b02d2a98742308026f060ebdc2423f1 +CPPLLAMA_VERSION?=4b0c638b9a68f577cb2066b638c9f622d91ee661 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 98dd8fde..7632aebc 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -492,8 +492,8 @@ struct llama_server_context } common_init_result common_init = common_init_from_params(params); - model = common_init.model; - ctx = common_init.context; + model = common_init.model.release(); + ctx = common_init.context.release(); if (model == nullptr) { LOG_ERR("unable to load model: %s", params.model.c_str()); From 05841c24354519555b3a0f5db4970b954eb07c52 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 4 Jan 2025 09:44:14 +0100 Subject: [PATCH 16/18] chore(model gallery): add drt-o1-7b (#4533) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index be624e9f..3251397c 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2560,6 +2560,26 @@ - filename: Qwenwify2.5-32B-v4.5.Q4_K_M.gguf sha256: 52670acdc285356c01259f45b1953860f34deb4f80345ca63b60acc19165280c uri: huggingface://mradermacher/Qwenwify2.5-32B-v4.5-GGUF/Qwenwify2.5-32B-v4.5.Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "drt-o1-7b" + urls: + - https://huggingface.co/Krystalan/DRT-o1-7B + - https://huggingface.co/QuantFactory/DRT-o1-7B-GGUF + description: | + In this work, we introduce DRT-o1, an attempt to bring the success of long thought reasoning to neural machine translation (MT). To this end, + + ๐ŸŒŸ We mine English sentences with similes or metaphors from existing literature books, which are suitable for translation via long thought. + ๐ŸŒŸ We propose a designed multi-agent framework with three agents (i.e., a translator, an advisor and an evaluator) to synthesize the MT samples with long thought. There are 22,264 synthesized samples in total. + ๐ŸŒŸ We train DRT-o1-8B, DRT-o1-7B and DRT-o1-14B using Llama-3.1-8B-Instruct, Qwen2.5-7B-Instruct and Qwen2.5-14B-Instruct as backbones. + + Our goal is not to achieve competitive performance with OpenAIโ€™s O1 in neural machine translation (MT). Instead, we explore technical routes to bring the success of long thought to MT. To this end, we introduce DRT-o1, a byproduct of our exploration, and we hope it could facilitate the corresponding research in this direction. + overrides: + parameters: + model: DRT-o1-7B.Q4_K_M.gguf + files: + - filename: DRT-o1-7B.Q4_K_M.gguf + sha256: f592a2523f92ae29630b45fbb501bba7f2fbd99355975cd05fa989faf8d3597d + uri: huggingface://QuantFactory/DRT-o1-7B-GGUF/DRT-o1-7B.Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From ec66f7e3b1246e1e417fe472203bc95aea34515f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 4 Jan 2025 09:45:07 +0100 Subject: [PATCH 17/18] chore(model gallery): add codepy-deepthink-3b (#4534) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 3251397c..f04f4e40 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1039,6 +1039,22 @@ - filename: FastLlama-3.2-1B-Instruct-Q4_K_M.gguf sha256: 3c0303e9560c441a9abdcd0e4c04c47e7f6b21277c1e8c00eed94fc656da0be9 uri: huggingface://bartowski/FastLlama-3.2-1B-Instruct-GGUF/FastLlama-3.2-1B-Instruct-Q4_K_M.gguf +- !!merge <<: *llama32 + name: "codepy-deepthink-3b" + urls: + - https://huggingface.co/prithivMLmods/Codepy-Deepthink-3B + - https://huggingface.co/QuantFactory/Codepy-Deepthink-3B-GGUF + description: | + The Codepy 3B Deep Think Model is a fine-tuned version of the meta-llama/Llama-3.2-3B-Instruct base model, designed for text generation tasks that require deep reasoning, logical structuring, and problem-solving. This model leverages its optimized architecture to provide accurate and contextually relevant outputs for complex queries, making it ideal for applications in education, programming, and creative writing. + + With its robust natural language processing capabilities, Codepy 3B Deep Think excels in generating step-by-step solutions, creative content, and logical analyses. Its architecture integrates advanced understanding of both structured and unstructured data, ensuring precise text generation aligned with user inputs. + overrides: + parameters: + model: Codepy-Deepthink-3B.Q4_K_M.gguf + files: + - filename: Codepy-Deepthink-3B.Q4_K_M.gguf + sha256: 6202976de1a1b23bb09448dd6f188b849e10f3f99366f829415533ea4445e853 + uri: huggingface://QuantFactory/Codepy-Deepthink-3B-GGUF/Codepy-Deepthink-3B.Q4_K_M.gguf - &qwen25 ## Qwen2.5 name: "qwen2.5-14b-instruct" From a8b3b3d6f4f8e82c9e8f45873024da9fe9b60355 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 4 Jan 2025 09:48:34 +0100 Subject: [PATCH 18/18] chore(model gallery): add llama3.1-8b-prm-deepseek-data (#4535) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index f04f4e40..0242b5ff 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -4489,6 +4489,22 @@ - filename: L3.1-Purosani-2-8B.Q4_K_M.gguf sha256: e3eb8038a72b6e85b7a43c7806c32f01208f4644d54bf94d77ecad6286cf609f uri: huggingface://QuantFactory/L3.1-Purosani-2-8B-GGUF/L3.1-Purosani-2-8B.Q4_K_M.gguf +- !!merge <<: *llama31 + name: "llama3.1-8b-prm-deepseek-data" + urls: + - https://huggingface.co/RLHFlow/Llama3.1-8B-PRM-Deepseek-Data + - https://huggingface.co/QuantFactory/Llama3.1-8B-PRM-Deepseek-Data-GGUF + description: | + This is a process-supervised reward (PRM) trained on Mistral-generated data from the project RLHFlow/RLHF-Reward-Modeling + + The model is trained from meta-llama/Llama-3.1-8B-Instruct on RLHFlow/Deepseek-PRM-Data for 1 epochs. We use a global batch size of 32 and a learning rate of 2e-6, where we pack the samples and split them into chunks of 8192 token. See more training details at https://github.com/RLHFlow/Online-RLHF/blob/main/math/llama-3.1-prm.yaml. + overrides: + parameters: + model: Llama3.1-8B-PRM-Deepseek-Data.Q4_K_M.gguf + files: + - filename: Llama3.1-8B-PRM-Deepseek-Data.Q4_K_M.gguf + sha256: 254c7ccc4ea3818fe5f6e3ffd5500c779b02058b98f9ce9a3856e54106d008e3 + uri: huggingface://QuantFactory/Llama3.1-8B-PRM-Deepseek-Data-GGUF/Llama3.1-8B-PRM-Deepseek-Data.Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"