From 296b97925fab0246184ac582621045565ce9a075 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 18 Jan 2025 23:21:27 +0100 Subject: [PATCH 01/29] chore: :arrow_up: Update leejet/stable-diffusion.cpp to `5eb15ef4d022bef4a391de4f5f6556e81fbb5024` (#4636) :arrow_up: Update leejet/stable-diffusion.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 944cad37..fc4eddf4 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ BARKCPP_VERSION?=v1.0.0 # stablediffusion.cpp (ggml) STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp -STABLEDIFFUSION_GGML_VERSION?=dcf91f9e0f2cbf9da472ee2a556751ed4bab2d2a +STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024 ONNX_VERSION?=1.20.0 ONNX_ARCH?=x64 From a752183fb58de465daa35688c93fbe7d4ed324e9 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 19 Jan 2025 08:38:33 +0100 Subject: [PATCH 02/29] chore: :arrow_up: Update ggerganov/llama.cpp to `a1649cc13f89946322358f92ea268ae1b7b5096c` (#4635) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fc4eddf4..dfa91a15 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6 +CPPLLAMA_VERSION?=a1649cc13f89946322358f92ea268ae1b7b5096c # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From f496d0113b722847aaf4775394ccfd814255fef9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 19 Jan 2025 09:07:56 +0100 Subject: [PATCH 03/29] chore(deps): pin numba Signed-off-by: Ettore Di Giacinto --- backend/python/transformers/requirements-cpu.txt | 3 ++- backend/python/transformers/requirements-cublas11.txt | 1 + backend/python/transformers/requirements-cublas12.txt | 1 + backend/python/transformers/requirements-hipblas.txt | 1 + backend/python/transformers/requirements-intel.txt | 1 + backend/python/transformers/requirements.txt | 3 +-- 6 files changed, 7 insertions(+), 3 deletions(-) diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt index 421c4b80..c88508e3 100644 --- a/backend/python/transformers/requirements-cpu.txt +++ b/backend/python/transformers/requirements-cpu.txt @@ -1,7 +1,8 @@ torch==2.4.1 llvmlite==0.43.0 +numba==0.60.0 accelerate transformers bitsandbytes outetts -sentence-transformers==3.3.1 +sentence-transformers==3.3.1 \ No newline at end of file diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt index c5d18d09..0faa9cec 100644 --- a/backend/python/transformers/requirements-cublas11.txt +++ b/backend/python/transformers/requirements-cublas11.txt @@ -1,6 +1,7 @@ --extra-index-url https://download.pytorch.org/whl/cu118 torch==2.4.1+cu118 llvmlite==0.43.0 +numba==0.60.0 accelerate transformers bitsandbytes diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt index c0bcfc87..1e22312f 100644 --- a/backend/python/transformers/requirements-cublas12.txt +++ b/backend/python/transformers/requirements-cublas12.txt @@ -1,6 +1,7 @@ torch==2.4.1 accelerate llvmlite==0.43.0 +numba==0.60.0 transformers bitsandbytes outetts diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt index e7f53860..47aa88db 100644 --- a/backend/python/transformers/requirements-hipblas.txt +++ b/backend/python/transformers/requirements-hipblas.txt @@ -3,6 +3,7 @@ torch==2.4.1+rocm6.0 accelerate transformers llvmlite==0.43.0 +numba==0.60.0 bitsandbytes outetts bitsandbytes diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt index aada6e00..708b0516 100644 --- a/backend/python/transformers/requirements-intel.txt +++ b/backend/python/transformers/requirements-intel.txt @@ -4,6 +4,7 @@ torch==2.3.1+cxx11.abi oneccl_bind_pt==2.3.100+xpu optimum[openvino] llvmlite==0.43.0 +numba==0.60.0 intel-extension-for-transformers bitsandbytes outetts diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt index d353e4d0..db41b928 100644 --- a/backend/python/transformers/requirements.txt +++ b/backend/python/transformers/requirements.txt @@ -3,5 +3,4 @@ protobuf certifi setuptools scipy==1.15.1 -numpy>=2.0.0 -numba==0.60.0 \ No newline at end of file +numpy>=2.0.0 \ No newline at end of file From 83e2dd5dff7b36d8cc9528d63ed0468145ef79df Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 19 Jan 2025 23:34:32 +0100 Subject: [PATCH 04/29] chore: :arrow_up: Update ggerganov/llama.cpp to `92bc493917d43b83e592349e138b54c90b1c3ea7` (#4640) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dfa91a15..7aaad492 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=a1649cc13f89946322358f92ea268ae1b7b5096c +CPPLLAMA_VERSION?=92bc493917d43b83e592349e138b54c90b1c3ea7 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 30739d94a41139fe5c8cf68239cc7353d102c4fe Mon Sep 17 00:00:00 2001 From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com> Date: Mon, 20 Jan 2025 10:34:19 +0100 Subject: [PATCH 05/29] chore(model gallery): add InternLM3-8b-Q4_K_M (#4637) chore(model gallery): add InternLM3-8b-Q4_K_M Signed-off-by: Gianluca Boiano --- gallery/index.yaml | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index edd52725..61ecd107 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -10100,7 +10100,7 @@ urls: - https://huggingface.co/internlm/internlm2_5-7b-chat-1m - https://huggingface.co/bartowski/internlm2_5-7b-chat-1m-GGUF - icon: https://github.com/InternLM/InternLM/assets/22529082/b9788105-8892-4398-8b47-b513a292378e + icon: https://avatars.githubusercontent.com/u/135356492 tags: - internlm2 - gguf @@ -10121,6 +10121,31 @@ - filename: internlm2_5-7b-chat-1m-Q4_K_M.gguf uri: huggingface://bartowski/internlm2_5-7b-chat-1m-GGUF/internlm2_5-7b-chat-1m-Q4_K_M.gguf sha256: 10d5e18a4125f9d4d74a9284a21e0c820b150af06dee48665e54ff6e1be3a564 +### Internlm3 +- name: "internlm3-8b-instruct" + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + urls: + - https://huggingface.co/internlm/internlm3-8b-instruct + - https://huggingface.co/bartowski/internlm3-8b-instruct-GGUF + icon: https://avatars.githubusercontent.com/u/135356492 + tags: + - internlm3 + - gguf + - cpu + - gpu + description: | + InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning. The model has the following characteristics: + + Enhanced performance at reduced cost: State-of-the-art performance on reasoning and knowledge-intensive tasks surpass models like Llama3.1-8B and Qwen2.5-7B. + + Deep thinking capability: InternLM3 supports both the deep thinking mode for solving complicated reasoning tasks via the long chain-of-thought and the normal response mode for fluent user interactions. + overrides: + parameters: + model: internlm3-8b-instruct-Q4_K_M.gguf + files: + - filename: internlm3-8b-instruct-Q4_K_M.gguf + uri: huggingface://bartowski/internlm3-8b-instruct-GGUF/internlm3-8b-instruct-Q4_K_M.gguf + sha256: 2a9644687318e8659c9cf9b40730d5cc2f5af06f786a50439c7c51359b23896e - &phi-3 ### START Phi-3 url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" From 390bb3f58bb5d878c852c71e473ae0754a8d817d Mon Sep 17 00:00:00 2001 From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com> Date: Mon, 20 Jan 2025 10:35:05 +0100 Subject: [PATCH 06/29] fix(model gallery): minicpm-v-2.6 is based on qwen2 (#4638) Signed-off-by: Gianluca Boiano --- gallery/index.yaml | 54 +++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 61ecd107..1c170f99 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -5583,6 +5583,33 @@ - filename: marco-o1-uncensored.Q4_K_M.gguf sha256: ad0440270a7254098f90779744d3e5b34fe49b7baf97c819909ba9c5648cc0d9 uri: huggingface://QuantFactory/marco-o1-uncensored-GGUF/marco-o1-uncensored.Q4_K_M.gguf +- !!merge <<: *qwen2 + name: "minicpm-v-2_6" + license: apache-2.0 + icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png + urls: + - https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf + - https://huggingface.co/openbmb/MiniCPM-V-2_6 + description: | + MiniCPM-V 2.6 is the latest and most capable model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters + tags: + - llm + - multimodal + - gguf + - gpu + - qwen2 + - cpu + overrides: + mmproj: minicpm-v-2_6-mmproj-f16.gguf + parameters: + model: minicpm-v-2_6-Q4_K_M.gguf + files: + - filename: minicpm-v-2_6-Q4_K_M.gguf + sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1 + uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf + - filename: minicpm-v-2_6-mmproj-f16.gguf + sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0 + uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf - &mistral03 ## START Mistral url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master" @@ -9211,33 +9238,6 @@ - filename: minicpm-llama3-mmproj-f16.gguf sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf -- !!merge <<: *llama3 - name: "minicpm-v-2_6" - license: apache-2.0 - icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png - urls: - - https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf - - https://huggingface.co/openbmb/MiniCPM-V-2_6 - description: | - MiniCPM-V 2.6 is the latest and most capable model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters - tags: - - llm - - multimodal - - gguf - - gpu - - llama3 - - cpu - overrides: - mmproj: minicpm-v-2_6-mmproj-f16.gguf - parameters: - model: minicpm-v-2_6-Q4_K_M.gguf - files: - - filename: minicpm-v-2_6-Q4_K_M.gguf - sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1 - uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf - - filename: minicpm-v-2_6-mmproj-f16.gguf - sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0 - uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf - !!merge <<: *llama3 name: "llama-3-cursedstock-v1.8-8b-iq-imatrix" urls: From 0c0e015b3893816a984f59cd5a6cfb25f5cf90c1 Mon Sep 17 00:00:00 2001 From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com> Date: Mon, 20 Jan 2025 10:40:46 +0100 Subject: [PATCH 07/29] chore(model gallery): update icons and add missing ones (#4639) * chore(model gallery): uniform github URLs for icons Signed-off-by: Gianluca Boiano * chore(model gallery): add icons to phi models Signed-off-by: Gianluca Boiano * chore(model gallery): add icons to QwenLM models Signed-off-by: Gianluca Boiano * chore(model gallery): update icon for Arcee org Signed-off-by: Gianluca Boiano * chore(model gallery): update icon for Meta org Signed-off-by: Gianluca Boiano * chore(model gallery): update icon url for OpenCoder org Signed-off-by: Gianluca Boiano * chore(model gallery): add icon for RWKV org Signed-off-by: Gianluca Boiano * chore(model gallery): add icon for IBM-granite org Signed-off-by: Gianluca Boiano * chore(model gallery): add icon for OpenBMB org Signed-off-by: Gianluca Boiano * chore(model gallery): add icon for KatanemoLabs org Signed-off-by: Gianluca Boiano * chore(model gallery): update icon for Meta-Llama-3.1-8B-Instruct-abliterated Signed-off-by: Gianluca Boiano * chore(model gallery): update icon for hermes-3-llama-3.1-8b-lorablated Signed-off-by: Gianluca Boiano * chore(model gallery): add icon for Google org Signed-off-by: Gianluca Boiano --------- Signed-off-by: Gianluca Boiano Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- gallery/index.yaml | 53 +++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 1c170f99..fb5476f9 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2,6 +2,7 @@ - &phi4 url: "github:mudler/LocalAI/gallery/phi-4-chat.yaml@master" name: "phi-4" + icon: https://avatars.githubusercontent.com/u/6154722 license: mit tags: - llm @@ -224,7 +225,7 @@ uri: huggingface://bartowski/INTELLECT-1-Instruct-GGUF/INTELLECT-1-Instruct-Q4_K_M.gguf - &llama33 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png + icon: https://avatars.githubusercontent.com/u/153379578 license: llama3.3 description: | The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks. @@ -421,6 +422,7 @@ - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" + icon: https://avatars.githubusercontent.com/u/132652788 license: apache-2.0 urls: - https://huggingface.co/RWKV/rwkv-6-world-7b @@ -443,6 +445,7 @@ uri: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf - &qwen25coder name: "qwen2.5-coder-14b" + icon: https://avatars.githubusercontent.com/u/141221163 url: "github:mudler/LocalAI/gallery/chatml.yaml@master" license: apache-2.0 tags: @@ -628,7 +631,7 @@ uri: huggingface://mraWdermacher/Qwen2.5-Coder-32B-Instruct-Uncensored-i1-GGUF/Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf - &opencoder name: "opencoder-8b-base" - icon: https://github.com/OpenCoder-llm/opencoder-llm.github.io/blob/main/static/images/opencoder_icon.jpg?raw=true + icon: https://avatars.githubusercontent.com/u/186387526 url: "github:mudler/LocalAI/gallery/codellama.yaml@master" urls: - https://huggingface.co/infly/OpenCoder-8B-Base @@ -694,6 +697,7 @@ uri: huggingface://QuantFactory/OpenCoder-1.5B-Instruct-GGUF/OpenCoder-1.5B-Instruct.Q4_K_M.gguf - &granite3 name: "granite-3.0-1b-a400m-instruct" + icon: https://avatars.githubusercontent.com/u/167822367 urls: - https://huggingface.co/ibm-granite/granite-3.0-1b-a400m-instruct - https://huggingface.co/QuantFactory/granite-3.0-1b-a400m-instruct-GGUF @@ -781,7 +785,7 @@ - &llama32 ## llama3.2 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png + icon: https://avatars.githubusercontent.com/u/153379578 license: llama3.2 description: | The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks. @@ -950,7 +954,6 @@ uri: huggingface://mradermacher/Llama-3.2-3B-Reasoning-Time-GGUF/Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf - !!merge <<: *llama32 name: "llama-3.2-sun-2.5b-chat" - icon: https://i.ibb.co/PF0TdMJ/imagine-image-9a56cee7-0f4f-4cc2-b265-a5b8d04f266b.png urls: - https://huggingface.co/meditsolutions/Llama-3.2-SUN-2.5B-chat - https://huggingface.co/mradermacher/Llama-3.2-SUN-2.5B-chat-GGUF @@ -982,7 +985,6 @@ uri: huggingface://mradermacher/Llama-3.2-SUN-2.5B-chat-GGUF/Llama-3.2-SUN-2.5B-chat.Q4_K_M.gguf - !!merge <<: *llama32 name: "llama-3.2-3b-instruct-uncensored" - icon: https://i.imgur.com/JOePyAN.png urls: - https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-uncensored-GGUF - https://huggingface.co/chuanli11/Llama-3.2-3B-Instruct-uncensored @@ -1319,6 +1321,7 @@ - &qwen25 ## Qwen2.5 name: "qwen2.5-14b-instruct" + icon: https://avatars.githubusercontent.com/u/141221163 url: "github:mudler/LocalAI/gallery/chatml.yaml@master" license: apache-2.0 description: | @@ -1608,6 +1611,7 @@ uri: huggingface://bartowski/qwen2.5-7b-ins-v3-GGUF/qwen2.5-7b-ins-v3-Q4_K_M.gguf - !!merge <<: *qwen25 name: "supernova-medius" + icon: https://avatars.githubusercontent.com/u/126496414 urls: - https://huggingface.co/arcee-ai/SuperNova-Medius-GGUF description: | @@ -1762,7 +1766,7 @@ uri: huggingface://bartowski/TheBeagle-v2beta-32B-MGS-GGUF/TheBeagle-v2beta-32B-MGS-Q4_K_M.gguf - !!merge <<: *qwen25 name: "meraj-mini" - icon: https://i.ibb.co/CmPSSpq/Screenshot-2024-10-06-at-9-45-06-PM.png + icon: https://avatars.githubusercontent.com/u/126496414 urls: - https://huggingface.co/arcee-ai/Meraj-Mini - https://huggingface.co/QuantFactory/Meraj-Mini-GGUF @@ -2392,7 +2396,7 @@ uri: huggingface://QuantFactory/Math-IIO-7B-Instruct-GGUF/Math-IIO-7B-Instruct.Q4_K_M.gguf - !!merge <<: *qwen25 name: "virtuoso-small" - icon: https://i.ibb.co/pXD6Bcv/SW2-U-g-QQLSH1-ZAbxhs-Iu-A.webp + icon: https://avatars.githubusercontent.com/u/126496414 urls: - https://huggingface.co/arcee-ai/Virtuoso-Small-GGUF description: | @@ -2670,6 +2674,7 @@ - cpu - function-calling name: "arch-function-1.5b" + icon: https://avatars.githubusercontent.com/u/112724757 uri: "github:mudler/LocalAI/gallery/arch-function.yaml@master" urls: - https://huggingface.co/katanemolabs/Arch-Function-1.5B @@ -3109,7 +3114,7 @@ uri: huggingface://bartowski/Rombos-Qwen2.5-Writer-32b-GGUF/Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf - !!merge <<: *qwen25 name: "sky-t1-32b-preview" - icon: https://raw.githubusercontent.com/NovaSky-AI/novasky-ai.github.io/main/assets/images/blue-bird-wider.jpeg + icon: https://github.com/NovaSky-AI/novasky-ai.github.io/raw/main/assets/images/blue-bird-wider.jpeg urls: - https://huggingface.co/NovaSky-AI/Sky-T1-32B-Preview - https://huggingface.co/bartowski/Sky-T1-32B-Preview-GGUF @@ -3298,7 +3303,7 @@ - &llama31 ## LLama3.1 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png + icon: https://avatars.githubusercontent.com/u/153379578 name: "meta-llama-3.1-8b-instruct" license: llama3.1 description: | @@ -3387,7 +3392,7 @@ sha256: 6d175432f66d10dfed9737f73a5073d513d18e1ee7bd4b9cf2a59deb359f36ff - !!merge <<: *llama31 name: "meta-llama-3.1-8b-instruct-abliterated" - icon: https://i.imgur.com/KhorYYG.png + icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/AsTgL8VCgMHgobq4cr46b.png urls: - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF @@ -3416,7 +3421,7 @@ uri: huggingface://mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf/Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf - !!merge <<: *llama31 name: "openbuddy-llama3.1-8b-v22.1-131k" - icon: https://raw.githubusercontent.com/OpenBuddy/OpenBuddy/main/media/demo.png + icon: https://github.com/OpenBuddy/OpenBuddy/raw/main/media/demo.png urls: - https://huggingface.co/sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF description: | @@ -3592,7 +3597,7 @@ sha256: 6557c5d5091f2507d19ab1f8bfb9ceb4e1536a755ab70f148b18aeb33741580f uri: huggingface://mradermacher/Llama-3.1-Techne-RP-8b-v1-GGUF/Llama-3.1-Techne-RP-8b-v1.Q4_K_M.gguf - !!merge <<: *llama31 - icon: https://i.ibb.co/9hwFrvL/BLMs-Wkx-NQf-W-46-FZDg-ILhg.jpg + icon: https://avatars.githubusercontent.com/u/126496414 name: "llama-spark" urls: - https://huggingface.co/arcee-ai/Llama-Spark @@ -3710,7 +3715,6 @@ - !!merge <<: *llama31 name: "llama-3.1-supernova-lite-reflection-v1.0-i1" url: "github:mudler/LocalAI/gallery/llama3.1-reflective.yaml@master" - icon: https://i.ibb.co/r072p7j/eopi-ZVu-SQ0-G-Cav78-Byq-Tg.png urls: - https://huggingface.co/SE6446/Llama-3.1-SuperNova-Lite-Reflection-V1.0 - https://huggingface.co/mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF @@ -3725,7 +3729,7 @@ uri: huggingface://mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF/Llama-3.1-SuperNova-Lite-Reflection-V1.0.i1-Q4_K_M.gguf - !!merge <<: *llama31 name: "llama-3.1-supernova-lite" - icon: https://i.ibb.co/r072p7j/eopi-ZVu-SQ0-G-Cav78-Byq-Tg.png + icon: https://avatars.githubusercontent.com/u/126496414 urls: - https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite - https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite-GGUF @@ -4239,6 +4243,7 @@ uri: huggingface://mradermacher/Hermes-3-Llama-3.1-70B-lorablated-GGUF/Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf - !!merge <<: *llama31 name: "hermes-3-llama-3.1-8b-lorablated" + icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/4Hbw5n68jKUSBQeTqQIeT.png urls: - https://huggingface.co/mlabonne/Hermes-3-Llama-3.1-8B-lorablated-GGUF description: | @@ -5254,6 +5259,7 @@ ## Start QWEN2 url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "qwen2-7b-instruct" + icon: https://avatars.githubusercontent.com/u/141221163 license: apache-2.0 description: | Qwen2 is the new series of Qwen large language models. For Qwen2, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters, including a Mixture-of-Experts model. This repo contains the instruction-tuned 7B Qwen2 model. @@ -5360,7 +5366,7 @@ uri: huggingface://bartowski/Einstein-v7-Qwen2-7B-GGUF/Einstein-v7-Qwen2-7B-Q4_K_M.gguf - !!merge <<: *qwen2 name: "arcee-spark" - icon: https://i.ibb.co/80ssNWS/o-Vdk-Qx-ARNmzr-Pi1h-Efj-SA.webp + icon: https://avatars.githubusercontent.com/u/126496414 description: | Arcee Spark is a powerful 7B parameter language model that punches well above its weight class. Initialized from Qwen2, this model underwent a sophisticated training process: @@ -5398,7 +5404,7 @@ uri: huggingface://Hercules-5.0-Qwen2-7B-Q4_K_M.gguf/Hercules-5.0-Qwen2-7B-Q4_K_M.gguf - !!merge <<: *qwen2 name: "arcee-agent" - icon: https://i.ibb.co/CBHmTDn/136719a5-6d8a-4654-a618-46eabc788953.jpg + icon: https://avatars.githubusercontent.com/u/126496414 description: | Arcee Agent is a cutting-edge 7B parameter language model specifically designed for function calling and tool use. Initialized from Qwen2-7B, it rivals the performance of much larger models while maintaining efficiency and speed. This model is particularly suited for developers, researchers, and businesses looking to implement sophisticated AI-driven solutions without the computational overhead of larger language models. Compute for training Arcee-Agent was provided by CrusoeAI. Arcee-Agent was trained using Spectrum. urls: @@ -5586,7 +5592,7 @@ - !!merge <<: *qwen2 name: "minicpm-v-2_6" license: apache-2.0 - icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png + icon: https://avatars.githubusercontent.com/u/89920203 urls: - https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf - https://huggingface.co/openbmb/MiniCPM-V-2_6 @@ -6321,6 +6327,7 @@ - &gemma url: "github:mudler/LocalAI/gallery/gemma.yaml@master" name: "gemma-2b" + icon: https://avatars.githubusercontent.com/u/1342004 license: gemma urls: - https://ai.google.dev/gemma/docs @@ -7036,7 +7043,7 @@ uri: huggingface://bartowski/GWQ-9B-Preview2-GGUF/GWQ-9B-Preview2-Q4_K_M.gguf - &llama3 url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png + icon: https://avatars.githubusercontent.com/u/153379578 name: "llama3-8b-instruct" license: llama3 description: | @@ -8503,7 +8510,7 @@ urls: - https://huggingface.co/arcee-ai/Llama-3-SEC-Chat-GGUF - https://huggingface.co/arcee-ai/Llama-3-SEC-Chat - icon: https://i.ibb.co/kHtBmDN/w8m6-X4-HCQRa-IR86ar-Cm5gg.webp + icon: https://avatars.githubusercontent.com/u/126496414 tags: - llama3 - gguf @@ -8536,7 +8543,7 @@ - &yi-chat ### Start Yi url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - icon: "https://raw.githubusercontent.com/01-ai/Yi/main/assets/img/Yi_logo_icon_light.svg" + icon: "https://github.com/01-ai/Yi/raw/main/assets/img/Yi_logo_icon_light.svg" name: "yi-1.5-9b-chat" license: apache-2.0 urls: @@ -9165,7 +9172,7 @@ urls: - https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf description: | - Bunny is a family of lightweight but powerful multimodal models. It offers multiple plug-and-play vision encoders, like EVA-CLIP, SigLIP and language backbones, including Llama-3-8B, Phi-1.5, StableLM-2, Qwen1.5, MiniCPM and Phi-2. To compensate for the decrease in model size, we construct more informative training data by curated selection from a broader data source. + Bunny is a family of lightweight but powerful multimodal models. It offers multiple plug-and-play vision encoders, like EVA-CLIP, SigLIP and language backbones, including Llama-3-8B, Phi-1.5, StableLM-2, Qwen1.5, and Phi-2. To compensate for the decrease in model size, we construct more informative training data by curated selection from a broader data source. We provide Bunny-Llama-3-8B-V, which is built upon SigLIP and Llama-3-8B-Instruct. More details about this model can be found in GitHub. icon: https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf/resolve/main/icon.png @@ -9214,7 +9221,7 @@ uri: huggingface://xtuner/llava-llama-3-8b-v1_1-gguf/llava-llama-3-8b-v1_1-mmproj-f16.gguf - !!merge <<: *llama3 name: "minicpm-llama3-v-2_5" - icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png + icon: https://avatars.githubusercontent.com/u/89920203 urls: - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5 @@ -10054,6 +10061,7 @@ - llama2 - cpu name: "phi-2-chat:Q8_0" + icon: https://avatars.githubusercontent.com/u/6154722 overrides: parameters: model: phi-2-layla-v1-chatml-Q8_0.gguf @@ -10150,6 +10158,7 @@ ### START Phi-3 url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" name: "phi-3-mini-4k-instruct" + icon: https://avatars.githubusercontent.com/u/6154722 license: mit description: | The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters. From adebd557ce8446edbe097b3eeb54c524e6638e78 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 20 Jan 2025 10:45:10 +0100 Subject: [PATCH 08/29] chore(model gallery): add wayfarer-12b (#4641) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 105 +++++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 51 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index fb5476f9..0397bd75 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -190,7 +190,7 @@ - https://huggingface.co/Nitral-AI/NightWing3-10B-v0.1 - https://huggingface.co/bartowski/NightWing3-10B-v0.1-GGUF description: | - Base model: (Falcon3-10B) + Base model: (Falcon3-10B) overrides: parameters: model: NightWing3-10B-v0.1-Q4_K_M.gguf @@ -782,8 +782,7 @@ - filename: salamandra-7b-instruct.Q4_K_M-f32.gguf sha256: bac8e8c1d1d9d53cbdb148b8ff9ad378ddb392429207099e85b5aae3a43bff3d uri: huggingface://cstr/salamandra-7b-instruct-GGUF/salamandra-7b-instruct.Q4_K_M-f32.gguf -- &llama32 - ## llama3.2 +- &llama32 ## llama3.2 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" icon: https://avatars.githubusercontent.com/u/153379578 license: llama3.2 @@ -1318,8 +1317,7 @@ - filename: FineMath-Llama-3B-Q4_K_M.gguf sha256: 16c73b5cf2a417a7e1608bcc9469f1461fc3e759ce04a3a337f48df977dc158c uri: huggingface://bartowski/FineMath-Llama-3B-GGUF/FineMath-Llama-3B-Q4_K_M.gguf -- &qwen25 - ## Qwen2.5 +- &qwen25 ## Qwen2.5 name: "qwen2.5-14b-instruct" icon: https://avatars.githubusercontent.com/u/141221163 url: "github:mudler/LocalAI/gallery/chatml.yaml@master" @@ -3241,8 +3239,7 @@ - filename: DRT-o1-14B-Q4_K_M.gguf sha256: 9619ca984cf4ce8e4f69bcde831de17b2ce05dd89536e3130608877521e3d328 uri: huggingface://bartowski/DRT-o1-14B-GGUF/DRT-o1-14B-Q4_K_M.gguf -- &smollm - ## SmolLM +- &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "smollm-1.7b-instruct" icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png @@ -3300,8 +3297,7 @@ - filename: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf sha256: eaeac314e30b461413bc1cc819cdc0cd6a79265711fd0b8268702960a082c7bd uri: huggingface://QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF/Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf -- &llama31 - ## LLama3.1 +- &llama31 ## LLama3.1 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" icon: https://avatars.githubusercontent.com/u/153379578 name: "meta-llama-3.1-8b-instruct" @@ -5189,8 +5185,7 @@ - filename: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf sha256: 268390e07edd407ad93ea21a868b7ae995b5950e01cad0db9e1802ae5049d405 uri: huggingface://bartowski/Dolphin3.0-Llama3.1-8B-GGUF/Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf -- &deepseek - ## Deepseek +- &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" name: "deepseek-coder-v2-lite-instruct" icon: "https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true" @@ -5255,8 +5250,7 @@ - filename: archangel_sft_pythia2-8b.Q4_K_M.gguf sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8 uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf -- &qwen2 - ## Start QWEN2 +- &qwen2 ## Start QWEN2 url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "qwen2-7b-instruct" icon: https://avatars.githubusercontent.com/u/141221163 @@ -5616,8 +5610,7 @@ - filename: minicpm-v-2_6-mmproj-f16.gguf sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0 uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf -- &mistral03 - ## START Mistral +- &mistral03 ## START Mistral url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master" name: "mistral-7b-instruct-v0.3" icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png @@ -6222,8 +6215,35 @@ - filename: Nera_Noctis-12B-Q4_K_M.gguf sha256: 0662a9a847adde046e6255c15d5a677ebf09ab00841547c8963668d14baf00ff uri: huggingface://bartowski/Nera_Noctis-12B-GGUF/Nera_Noctis-12B-Q4_K_M.gguf -- &mudler - ### START mudler's LocalAI specific-models +- !!merge <<: *mistral03 + name: "wayfarer-12b" + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + icon: https://huggingface.co/LatitudeGames/Wayfarer-12B/resolve/main/wayfarer.jpg + urls: + - https://huggingface.co/LatitudeGames/Wayfarer-12B + - https://huggingface.co/bartowski/Wayfarer-12B-GGUF + description: | + We’ve heard over and over from AI Dungeon players that modern AI models are too nice, never letting them fail or die. While it may be good for a chatbot to be nice and helpful, great stories and games aren’t all rainbows and unicorns. They have conflict, tension, and even death. These create real stakes and consequences for characters and the journeys they go on. + + Similarly, great games need opposition. You must be able to fail, die, and may even have to start over. This makes games more fun! + + However, the vast majority of AI models, through alignment RLHF, have been trained away from darkness, violence, or conflict, preventing them from fulfilling this role. To give our players better options, we decided to train our own model to fix these issues. + + Wayfarer is an adventure role-play model specifically trained to give players a challenging and dangerous experience. We thought they would like it, but since releasing it on AI Dungeon, players have reacted even more positively than we expected. + + Because they loved it so much, we’ve decided to open-source the model so anyone can experience unforgivingly brutal AI adventures! Anyone can download the model to run locally. + + Or if you want to easily try this model for free, you can do so at https://aidungeon.com. + + We plan to continue improving and open-sourcing similar models, so please share any and all feedback on how we can improve model behavior. Below we share more details on how Wayfarer was created. + overrides: + parameters: + model: Wayfarer-12B-Q4_K_M.gguf + files: + - filename: Wayfarer-12B-Q4_K_M.gguf + sha256: 6cd9f290c820c64854fcdcfd312b066447acc2f63abe2e2e71af9bc4f1946c08 + uri: huggingface://bartowski/Wayfarer-12B-GGUF/Wayfarer-12B-Q4_K_M.gguf +- &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" name: "LocalAI-llama3-8b-function-call-v0.2" icon: "https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/us5JKi9z046p8K-cn_M0w.webp" @@ -6268,8 +6288,7 @@ - filename: Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin sha256: 579cbb229f9c11d0330759ff4733102d2491615a4c61289e26c09d1b3a583fec uri: huggingface://mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1-GGUF/Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin -- &parler-tts - ### START parler-tts +- &parler-tts ### START parler-tts url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master" name: parler-tts-mini-v0.1 overrides: @@ -6286,8 +6305,7 @@ - cpu - text-to-speech - python -- &rerankers - ### START rerankers +- &rerankers ### START rerankers url: "github:mudler/LocalAI/gallery/rerankers.yaml@master" name: cross-encoder parameters: @@ -8540,8 +8558,7 @@ - filename: Copus-2x8B.i1-Q4_K_M.gguf sha256: 685da1ba49e203e8f491105585143d76044286d4b4687bed37d325f6b55501e5 uri: huggingface://mradermacher/Copus-2x8B-i1-GGUF/Copus-2x8B.i1-Q4_K_M.gguf -- &yi-chat - ### Start Yi +- &yi-chat ### Start Yi url: "github:mudler/LocalAI/gallery/chatml.yaml@master" icon: "https://github.com/01-ai/Yi/raw/main/assets/img/Yi_logo_icon_light.svg" name: "yi-1.5-9b-chat" @@ -8752,8 +8769,7 @@ - filename: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf sha256: 3f309b59508342536a70edd6c4be6cf4f2cb97f2e32cbc79ad2ab3f4c02933a4 uri: huggingface://Lewdiculous/Fimbulvetr-11B-v2-GGUF-IQ-Imatrix/Fimbulvetr-11B-v2-Q4_K_M-imat.gguf -- &noromaid - ### Start noromaid +- &noromaid ### Start noromaid url: "github:mudler/LocalAI/gallery/noromaid.yaml@master" name: "noromaid-13b-0.4-DPO" icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png @@ -8773,8 +8789,7 @@ - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1 uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf -- &wizardlm2 - ### START Vicuna based +- &wizardlm2 ### START Vicuna based url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master" name: "wizardlm2-7b" description: | @@ -8829,8 +8844,7 @@ - filename: moondream2-mmproj-f16.gguf sha256: 4cc1cb3660d87ff56432ebeb7884ad35d67c48c7b9f6b2856f305e39c38eed8f uri: huggingface://moondream/moondream2-gguf/moondream2-mmproj-f16.gguf -- &llava - ### START LLaVa +- &llava ### START LLaVa url: "github:mudler/LocalAI/gallery/llava.yaml@master" license: apache-2.0 description: | @@ -9688,8 +9702,7 @@ - filename: Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf sha256: cdc0f4de6df2ba120835fbd25c2a0ae2af8548f46d2c40c7a018c51c3d19e0c0 uri: huggingface://mradermacher/Freyja-v4.95-maldv-7b-NON-FICTION-i1-GGUF/Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf -- &chatml - ### ChatML +- &chatml ### ChatML url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "una-thepitbull-21.4b-v2" license: afl-3.0 @@ -9975,8 +9988,7 @@ - filename: Triangulum-10B.Q4_K_M.gguf sha256: dd071f99edf6b166044bf229cdeec19419c4c348e3fc3d6587cfcc55e6fb85fa uri: huggingface://mradermacher/Triangulum-10B-GGUF/Triangulum-10B.Q4_K_M.gguf -- &command-R - ### START Command-r +- &command-R ### START Command-r url: "github:mudler/LocalAI/gallery/command-r.yaml@master" name: "command-r-v01:q1_s" license: "cc-by-nc-4.0" @@ -10031,8 +10043,7 @@ - filename: "aya-23-35B-Q4_K_M.gguf" sha256: "57824768c1a945e21e028c8e9a29b39adb4838d489f5865c82601ab9ad98065d" uri: "huggingface://bartowski/aya-23-35B-GGUF/aya-23-35B-Q4_K_M.gguf" -- &phi-2-chat - ### START Phi-2 +- &phi-2-chat ### START Phi-2 url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master" license: mit description: | @@ -10154,8 +10165,7 @@ - filename: internlm3-8b-instruct-Q4_K_M.gguf uri: huggingface://bartowski/internlm3-8b-instruct-GGUF/internlm3-8b-instruct-Q4_K_M.gguf sha256: 2a9644687318e8659c9cf9b40730d5cc2f5af06f786a50439c7c51359b23896e -- &phi-3 - ### START Phi-3 +- &phi-3 ### START Phi-3 url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" name: "phi-3-mini-4k-instruct" icon: https://avatars.githubusercontent.com/u/6154722 @@ -10355,8 +10365,7 @@ - filename: Phi-3.5-MoE-instruct-Q4_K_M.gguf sha256: 43e91bb720869bd8a92d8eb86bc3c74a52c49cf61642ca709b3d7bb89644df36 uri: huggingface://bartowski/Phi-3.5-MoE-instruct-GGUF/Phi-3.5-MoE-instruct-Q4_K_M.gguf -- &hermes-2-pro-mistral - ### START Hermes +- &hermes-2-pro-mistral ### START Hermes url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" name: "hermes-2-pro-mistral" icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png @@ -10692,8 +10701,7 @@ - filename: "galatolo-Q4_K.gguf" sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172" uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf" -- &codellama - ### START Codellama +- &codellama ### START Codellama url: "github:mudler/LocalAI/gallery/codellama.yaml@master" name: "codellama-7b" license: llama2 @@ -10824,8 +10832,7 @@ - filename: "llm-compiler-7b-ftd.Q4_K.gguf" uri: "huggingface://legraphista/llm-compiler-7b-ftd-IMat-GGUF/llm-compiler-7b-ftd.Q4_K.gguf" sha256: d862dd18ed335413787d0ad196522a9902a3c10a6456afdab8721822cb0ddde8 -- &openvino - ### START OpenVINO +- &openvino ### START OpenVINO url: "github:mudler/LocalAI/gallery/openvino.yaml@master" name: "openvino-llama-3-8b-instruct-ov-int8" license: llama3 @@ -10939,8 +10946,7 @@ - gpu - embedding - cpu -- &sentencentransformers - ### START Embeddings +- &sentencentransformers ### START Embeddings description: | This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity. urls: @@ -10955,8 +10961,7 @@ overrides: parameters: model: all-MiniLM-L6-v2 -- &dreamshaper - ### START Image generation +- &dreamshaper ### START Image generation name: dreamshaper icon: https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dd9b038c-bd15-43ab-86ab-66e145ad7ff2/width=450/26072158-132340247-8k%20portrait%20of%20beautiful%20cyborg%20with%20brown%20hair,%20intricate,%20elegant,%20highly%20detailed,%20majestic,%20digital%20photography,%20art%20by%20artg_ed.jpeg license: other @@ -11068,8 +11073,7 @@ - filename: t5xxl_fp16.safetensors sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635 uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors -- &whisper - ## Whisper +- &whisper ## Whisper url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" name: "whisper-1" license: "MIT" @@ -11249,8 +11253,7 @@ description: | Stable Diffusion in NCNN with c++, supported txt2img and img2img name: stablediffusion-cpp -- &piper - ## Piper TTS +- &piper ## Piper TTS url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-kathleen-low icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png From 83a8d90c52816832bd3362d6455501d479ce16ab Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 20 Jan 2025 10:50:29 +0100 Subject: [PATCH 09/29] chore(model gallery): add l3.3-70b-magnum-v4-se (#4642) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 0397bd75..d10cd32e 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -419,6 +419,22 @@ - filename: L3.3-MS-Nevoria-70b-Q4_K_M.gguf sha256: e8b0763f263089a19d4b112b7ed5085cc5f1ed9ca49c5085baa8d51f4ded1f94 uri: huggingface://bartowski/L3.3-MS-Nevoria-70b-GGUF/L3.3-MS-Nevoria-70b-Q4_K_M.gguf +- !!merge <<: *llama33 + name: "l3.3-70b-magnum-v4-se" + urls: + - https://huggingface.co/Doctor-Shotgun/L3.3-70B-Magnum-v4-SE + - https://huggingface.co/bartowski/L3.3-70B-Magnum-v4-SE-GGUF + description: | + The Magnum v4 series is complete, but here's something a little extra I wanted to tack on as I wasn't entirely satisfied with the results of v4 72B. "SE" for Special Edition - this model is finetuned from meta-llama/Llama-3.3-70B-Instruct as an rsLoRA adapter. The dataset is a slightly revised variant of the v4 data with some elements of the v2 data re-introduced. + + The objective, as with the other Magnum models, is to emulate the prose style and quality of the Claude 3 Sonnet/Opus series of models on a local scale, so don't be surprised to see "Claude-isms" in its output. + overrides: + parameters: + model: L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf + files: + - filename: L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf + sha256: 9724a6364a42caa3d5a1687258eb329c9af6cbb2ce01c8dd556c1a222a2e0352 + uri: huggingface://bartowski/L3.3-70B-Magnum-v4-SE-GGUF/L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" From aeb1dca52ef940ec23f3ffddc7af2cc9afac69a7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 20 Jan 2025 11:03:35 +0100 Subject: [PATCH 10/29] chore(model gallery): add l3.3-prikol-70b-v0.2 (#4643) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index d10cd32e..679ab002 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -435,6 +435,27 @@ - filename: L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf sha256: 9724a6364a42caa3d5a1687258eb329c9af6cbb2ce01c8dd556c1a222a2e0352 uri: huggingface://bartowski/L3.3-70B-Magnum-v4-SE-GGUF/L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf +- !!merge <<: *llama33 + name: "l3.3-prikol-70b-v0.2" + icon: https://files.catbox.moe/x9t3zo.png + urls: + - https://huggingface.co/Nohobby/L3.3-Prikol-70B-v0.2 + - https://huggingface.co/bartowski/L3.3-Prikol-70B-v0.2-GGUF + description: | + A merge of some Llama 3.3 models because um uh yeah + + Went extra schizo on the recipe, hoping for an extra fun result, and... Well, I guess it's an overall improvement over the previous revision. It's a tiny bit smarter, has even more distinct swipes and nice dialogues, but for some reason it's damn sloppy. + + I've published the second step of this merge as a separate model, and I'd say the results are more interesting, but not as usable as this one. https://huggingface.co/Nohobby/AbominationSnowPig + + Prompt format: Llama3 OR Llama3 Context and ChatML Instruct. It actually works a bit better this way + overrides: + parameters: + model: L3.3-Prikol-70B-v0.2-Q4_K_M.gguf + files: + - filename: L3.3-Prikol-70B-v0.2-Q4_K_M.gguf + sha256: fc0ff514efbc0b67981c2bf1423d5a2e1b8801e4266ba0c653ea148414fe5ffc + uri: huggingface://bartowski/L3.3-Prikol-70B-v0.2-GGUF/L3.3-Prikol-70B-v0.2-Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" From a396040886fb5e2e13dee72811605956c7506ebc Mon Sep 17 00:00:00 2001 From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com> Date: Mon, 20 Jan 2025 16:13:19 +0100 Subject: [PATCH 11/29] chore(model gallery): remove dead icons and update LLAVA and DeepSeek ones (#4645) * chore(model gallery): update icons and add LLAVA ones Signed-off-by: Gianluca Boiano * chore(model gallery): fix all complains related to yamllint Signed-off-by: Gianluca Boiano --------- Signed-off-by: Gianluca Boiano --- gallery/index.yaml | 69 +++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 38 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 679ab002..30687062 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -819,7 +819,7 @@ - filename: salamandra-7b-instruct.Q4_K_M-f32.gguf sha256: bac8e8c1d1d9d53cbdb148b8ff9ad378ddb392429207099e85b5aae3a43bff3d uri: huggingface://cstr/salamandra-7b-instruct-GGUF/salamandra-7b-instruct.Q4_K_M-f32.gguf -- &llama32 ## llama3.2 +- &llama32 ## llama3.2 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" icon: https://avatars.githubusercontent.com/u/153379578 license: llama3.2 @@ -1354,7 +1354,7 @@ - filename: FineMath-Llama-3B-Q4_K_M.gguf sha256: 16c73b5cf2a417a7e1608bcc9469f1461fc3e759ce04a3a337f48df977dc158c uri: huggingface://bartowski/FineMath-Llama-3B-GGUF/FineMath-Llama-3B-Q4_K_M.gguf -- &qwen25 ## Qwen2.5 +- &qwen25 ## Qwen2.5 name: "qwen2.5-14b-instruct" icon: https://avatars.githubusercontent.com/u/141221163 url: "github:mudler/LocalAI/gallery/chatml.yaml@master" @@ -2181,7 +2181,6 @@ sha256: 42cf7a96784dc8f25c61c2404620c3e6548a024caa8dff6e435d7c86400d7ab8 uri: huggingface://mradermacher/Qwen2.5-7B-nerd-uncensored-v1.7-GGUF/Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf - !!merge <<: *qwen25 - icon: https://i.imgur.com/OxX2Usi.png name: "evathene-v1.0" urls: - https://huggingface.co/sophosympatheia/Evathene-v1.0 @@ -2540,7 +2539,6 @@ sha256: 91907f29746625a62885793475956220b81d8a5a34b53686a1acd1d03fd403ea uri: huggingface://bartowski/72B-Qwen2.5-Kunou-v1-GGUF/72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf - !!merge <<: *qwen25 - icon: https://i.imgur.com/OxX2Usi.png name: "evathene-v1.3" urls: - https://huggingface.co/sophosympatheia/Evathene-v1.3 @@ -3276,7 +3274,7 @@ - filename: DRT-o1-14B-Q4_K_M.gguf sha256: 9619ca984cf4ce8e4f69bcde831de17b2ce05dd89536e3130608877521e3d328 uri: huggingface://bartowski/DRT-o1-14B-GGUF/DRT-o1-14B-Q4_K_M.gguf -- &smollm ## SmolLM +- &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "smollm-1.7b-instruct" icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png @@ -3334,7 +3332,7 @@ - filename: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf sha256: eaeac314e30b461413bc1cc819cdc0cd6a79265711fd0b8268702960a082c7bd uri: huggingface://QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF/Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf -- &llama31 ## LLama3.1 +- &llama31 ## LLama3.1 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" icon: https://avatars.githubusercontent.com/u/153379578 name: "meta-llama-3.1-8b-instruct" @@ -4485,7 +4483,6 @@ sha256: 27b10c3ca4507e8bf7d305d60e5313b54ef5fffdb43a03f36223d19d906e39f3 uri: huggingface://mradermacher/L3.1-70Blivion-v0.1-rc1-70B-i1-GGUF/L3.1-70Blivion-v0.1-rc1-70B.i1-Q4_K_M.gguf - !!merge <<: *llama31 - icon: https://i.imgur.com/sdN0Aqg.jpeg name: "llama-3.1-hawkish-8b" urls: - https://huggingface.co/mukaj/Llama-3.1-Hawkish-8B @@ -5222,10 +5219,10 @@ - filename: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf sha256: 268390e07edd407ad93ea21a868b7ae995b5950e01cad0db9e1802ae5049d405 uri: huggingface://bartowski/Dolphin3.0-Llama3.1-8B-GGUF/Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf -- &deepseek ## Deepseek +- &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" name: "deepseek-coder-v2-lite-instruct" - icon: "https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true" + icon: "https://avatars.githubusercontent.com/u/148330874" license: deepseek description: | DeepSeek-Coder-V2, an open-source Mixture-of-Experts (MoE) code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks. Specifically, DeepSeek-Coder-V2 is further pre-trained from DeepSeek-Coder-V2-Base with 6 trillion tokens sourced from a high-quality and multi-source corpus. Through this continued pre-training, DeepSeek-Coder-V2 substantially enhances the coding and mathematical reasoning capabilities of DeepSeek-Coder-V2-Base, while maintaining comparable performance in general language tasks. Compared to DeepSeek-Coder, DeepSeek-Coder-V2 demonstrates significant advancements in various aspects of code-related tasks, as well as reasoning and general capabilities. Additionally, DeepSeek-Coder-V2 expands its support for programming languages from 86 to 338, while extending the context length from 16K to 128K. @@ -5287,7 +5284,7 @@ - filename: archangel_sft_pythia2-8b.Q4_K_M.gguf sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8 uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf -- &qwen2 ## Start QWEN2 +- &qwen2 ## Start QWEN2 url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "qwen2-7b-instruct" icon: https://avatars.githubusercontent.com/u/141221163 @@ -5647,7 +5644,7 @@ - filename: minicpm-v-2_6-mmproj-f16.gguf sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0 uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf -- &mistral03 ## START Mistral +- &mistral03 ## START Mistral url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master" name: "mistral-7b-instruct-v0.3" icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png @@ -6155,7 +6152,6 @@ - !!merge <<: *mistral03 name: "mn-12b-mag-mell-r1-iq-arm-imatrix" url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - icon: "https://i.imgur.com/wjyAaTO.png" urls: - https://huggingface.co/inflatebot/MN-12B-Mag-Mell-R1 - https://huggingface.co/Lewdiculous/MN-12B-Mag-Mell-R1-GGUF-IQ-ARM-Imatrix @@ -6280,7 +6276,7 @@ - filename: Wayfarer-12B-Q4_K_M.gguf sha256: 6cd9f290c820c64854fcdcfd312b066447acc2f63abe2e2e71af9bc4f1946c08 uri: huggingface://bartowski/Wayfarer-12B-GGUF/Wayfarer-12B-Q4_K_M.gguf -- &mudler ### START mudler's LocalAI specific-models +- &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" name: "LocalAI-llama3-8b-function-call-v0.2" icon: "https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/us5JKi9z046p8K-cn_M0w.webp" @@ -6325,7 +6321,7 @@ - filename: Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin sha256: 579cbb229f9c11d0330759ff4733102d2491615a4c61289e26c09d1b3a583fec uri: huggingface://mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1-GGUF/Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin -- &parler-tts ### START parler-tts +- &parler-tts ### START parler-tts url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master" name: parler-tts-mini-v0.1 overrides: @@ -6342,7 +6338,7 @@ - cpu - text-to-speech - python -- &rerankers ### START rerankers +- &rerankers ### START rerankers url: "github:mudler/LocalAI/gallery/rerankers.yaml@master" name: cross-encoder parameters: @@ -7265,10 +7261,9 @@ name: "l3-8b-stheno-v3.1" urls: - https://huggingface.co/Sao10K/L3-8B-Stheno-v3.1 - icon: https://w.forfun.com/fetch/cb/cba2205390e517bea1ea60ca0b491af4.jpeg description: | - A model made for 1-on-1 Roleplay ideally, but one that is able to handle scenarios, RPGs and storywriting fine. - - Uncensored during actual roleplay scenarios. # I do not care for zero-shot prompting like what some people do. It is uncensored enough in actual usecases. + - Uncensored during actual roleplay scenarios. # I do not care for zero-shot prompting like what some people do. It is uncensored enough in actual usecases. - I quite like the prose and style for this model. overrides: parameters: @@ -8059,7 +8054,6 @@ urls: - https://huggingface.co/bartowski/New-Dawn-Llama-3-70B-32K-v1.0-GGUF - https://huggingface.co/sophosympatheia/New-Dawn-Llama-3-70B-32K-v1.0 - icon: https://imgur.com/tKzncGo.png description: | This model is a multi-level SLERP merge of several Llama 3 70B variants. See the merge recipe below for details. I extended the context window for this model out to 32K by snagging some layers from abacusai/Smaug-Llama-3-70B-Instruct-32K using a technique similar to what I used for Midnight Miqu, which was further honed by jukofyork. This model is uncensored. You are responsible for whatever you do with it. @@ -8411,7 +8405,8 @@ - filename: dolphin-2.9.2-Phi-3-Medium-abliterated-Q4_K_M.gguf sha256: 566331c2efe87725310aacb709ca15088a0063fa0ddc14a345bf20d69982156b uri: huggingface://bartowski/dolphin-2.9.2-Phi-3-Medium-abliterated-GGUF/dolphin-2.9.2-Phi-3-Medium-abliterated-Q4_K_M.gguf -- url: "github:mudler/LocalAI/gallery/chatml.yaml@master" +- !!merge <<: *llama3 + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "llama-3-8b-instruct-dpo-v0.3-32k" license: llama3 urls: @@ -8595,7 +8590,7 @@ - filename: Copus-2x8B.i1-Q4_K_M.gguf sha256: 685da1ba49e203e8f491105585143d76044286d4b4687bed37d325f6b55501e5 uri: huggingface://mradermacher/Copus-2x8B-i1-GGUF/Copus-2x8B.i1-Q4_K_M.gguf -- &yi-chat ### Start Yi +- &yi-chat ### Start Yi url: "github:mudler/LocalAI/gallery/chatml.yaml@master" icon: "https://github.com/01-ai/Yi/raw/main/assets/img/Yi_logo_icon_light.svg" name: "yi-1.5-9b-chat" @@ -8806,7 +8801,7 @@ - filename: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf sha256: 3f309b59508342536a70edd6c4be6cf4f2cb97f2e32cbc79ad2ab3f4c02933a4 uri: huggingface://Lewdiculous/Fimbulvetr-11B-v2-GGUF-IQ-Imatrix/Fimbulvetr-11B-v2-Q4_K_M-imat.gguf -- &noromaid ### Start noromaid +- &noromaid ### Start noromaid url: "github:mudler/LocalAI/gallery/noromaid.yaml@master" name: "noromaid-13b-0.4-DPO" icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png @@ -8826,7 +8821,7 @@ - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1 uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf -- &wizardlm2 ### START Vicuna based +- &wizardlm2 ### START Vicuna based url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master" name: "wizardlm2-7b" description: | @@ -8881,7 +8876,9 @@ - filename: moondream2-mmproj-f16.gguf sha256: 4cc1cb3660d87ff56432ebeb7884ad35d67c48c7b9f6b2856f305e39c38eed8f uri: huggingface://moondream/moondream2-gguf/moondream2-mmproj-f16.gguf -- &llava ### START LLaVa +- &llava ### START LLaVa + name: "llava-1.6-vicuna" + icon: https://github.com/lobehub/lobe-icons/raw/master/packages/static-png/dark/llava-color.png url: "github:mudler/LocalAI/gallery/llava.yaml@master" license: apache-2.0 description: | @@ -8895,7 +8892,6 @@ - gpu - llama2 - cpu - name: "llava-1.6-vicuna" overrides: mmproj: mmproj-vicuna7b-f16.gguf parameters: @@ -9363,7 +9359,6 @@ June 18, 2024 Update, After extensive testing of the intermediate checkpoints, significant progress has been made. The model is slowly — I mean, really slowly — unlearning its alignment. By significantly lowering the learning rate, I was able to visibly observe deep behavioral changes, this process is taking longer than anticipated, but it's going to be worth it. Estimated time to completion: 4 more days.. I'm pleased to report that in several tests, the model not only maintained its intelligence but actually showed a slight improvement, especially in terms of common sense. An intermediate checkpoint of this model was used to create invisietch/EtherealRainbow-v0.3-rc7, with promising results. Currently, it seems like I'm on the right track. I hope this model will serve as a solid foundation for further merges, whether for role-playing (RP) or for uncensoring. This approach also allows us to save on actual fine-tuning, thereby reducing our carbon footprint. The merge process takes just a few minutes of CPU time, instead of days of GPU work. June 20, 2024 Update, Unaligning was partially successful, and the results are decent, but I am not fully satisfied. I decided to bite the bullet, and do a full finetune, god have mercy on my GPUs. I am also releasing the intermediate checkpoint of this model. - icon: https://i.imgur.com/Kpk1PgZ.png overrides: parameters: model: LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf @@ -9389,7 +9384,6 @@ uri: huggingface://bartowski/L3-8B-Lunaris-v1-GGUF/L3-8B-Lunaris-v1-Q4_K_M.gguf - !!merge <<: *llama3 name: "llama-3_8b_unaligned_alpha_rp_soup-i1" - icon: https://i.imgur.com/pXcjpoV.png urls: - https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha_RP_Soup - https://huggingface.co/mradermacher/LLAMA-3_8B_Unaligned_Alpha_RP_Soup-i1-GGUF @@ -9739,7 +9733,7 @@ - filename: Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf sha256: cdc0f4de6df2ba120835fbd25c2a0ae2af8548f46d2c40c7a018c51c3d19e0c0 uri: huggingface://mradermacher/Freyja-v4.95-maldv-7b-NON-FICTION-i1-GGUF/Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf -- &chatml ### ChatML +- &chatml ### ChatML url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "una-thepitbull-21.4b-v2" license: afl-3.0 @@ -9787,7 +9781,6 @@ sha256: 9c90f3a65332a03a6cbb563eee19c7586d9544f646ff9f33f7f1904b3d415ae2 uri: huggingface://nold/HelpingAI-9B-GGUF/HelpingAI-9B_Q4_K_M.gguf - url: "github:mudler/LocalAI/gallery/chatml-hercules.yaml@master" - icon: "https://tse3.mm.bing.net/th/id/OIG1.vnrl3xpEcypR3McLW63q?pid=ImgGn" urls: - https://huggingface.co/Locutusque/Llama-3-Hercules-5.0-8B - https://huggingface.co/bartowski/Llama-3-Hercules-5.0-8B-GGUF @@ -10025,7 +10018,7 @@ - filename: Triangulum-10B.Q4_K_M.gguf sha256: dd071f99edf6b166044bf229cdeec19419c4c348e3fc3d6587cfcc55e6fb85fa uri: huggingface://mradermacher/Triangulum-10B-GGUF/Triangulum-10B.Q4_K_M.gguf -- &command-R ### START Command-r +- &command-R ### START Command-r url: "github:mudler/LocalAI/gallery/command-r.yaml@master" name: "command-r-v01:q1_s" license: "cc-by-nc-4.0" @@ -10080,7 +10073,7 @@ - filename: "aya-23-35B-Q4_K_M.gguf" sha256: "57824768c1a945e21e028c8e9a29b39adb4838d489f5865c82601ab9ad98065d" uri: "huggingface://bartowski/aya-23-35B-GGUF/aya-23-35B-Q4_K_M.gguf" -- &phi-2-chat ### START Phi-2 +- &phi-2-chat ### START Phi-2 url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master" license: mit description: | @@ -10202,7 +10195,7 @@ - filename: internlm3-8b-instruct-Q4_K_M.gguf uri: huggingface://bartowski/internlm3-8b-instruct-GGUF/internlm3-8b-instruct-Q4_K_M.gguf sha256: 2a9644687318e8659c9cf9b40730d5cc2f5af06f786a50439c7c51359b23896e -- &phi-3 ### START Phi-3 +- &phi-3 ### START Phi-3 url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" name: "phi-3-mini-4k-instruct" icon: https://avatars.githubusercontent.com/u/6154722 @@ -10402,7 +10395,7 @@ - filename: Phi-3.5-MoE-instruct-Q4_K_M.gguf sha256: 43e91bb720869bd8a92d8eb86bc3c74a52c49cf61642ca709b3d7bb89644df36 uri: huggingface://bartowski/Phi-3.5-MoE-instruct-GGUF/Phi-3.5-MoE-instruct-Q4_K_M.gguf -- &hermes-2-pro-mistral ### START Hermes +- &hermes-2-pro-mistral ### START Hermes url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" name: "hermes-2-pro-mistral" icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png @@ -10738,7 +10731,7 @@ - filename: "galatolo-Q4_K.gguf" sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172" uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf" -- &codellama ### START Codellama +- &codellama ### START Codellama url: "github:mudler/LocalAI/gallery/codellama.yaml@master" name: "codellama-7b" license: llama2 @@ -10869,7 +10862,7 @@ - filename: "llm-compiler-7b-ftd.Q4_K.gguf" uri: "huggingface://legraphista/llm-compiler-7b-ftd-IMat-GGUF/llm-compiler-7b-ftd.Q4_K.gguf" sha256: d862dd18ed335413787d0ad196522a9902a3c10a6456afdab8721822cb0ddde8 -- &openvino ### START OpenVINO +- &openvino ### START OpenVINO url: "github:mudler/LocalAI/gallery/openvino.yaml@master" name: "openvino-llama-3-8b-instruct-ov-int8" license: llama3 @@ -10983,7 +10976,7 @@ - gpu - embedding - cpu -- &sentencentransformers ### START Embeddings +- &sentencentransformers ### START Embeddings description: | This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity. urls: @@ -10998,7 +10991,7 @@ overrides: parameters: model: all-MiniLM-L6-v2 -- &dreamshaper ### START Image generation +- &dreamshaper ### START Image generation name: dreamshaper icon: https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dd9b038c-bd15-43ab-86ab-66e145ad7ff2/width=450/26072158-132340247-8k%20portrait%20of%20beautiful%20cyborg%20with%20brown%20hair,%20intricate,%20elegant,%20highly%20detailed,%20majestic,%20digital%20photography,%20art%20by%20artg_ed.jpeg license: other @@ -11110,7 +11103,7 @@ - filename: t5xxl_fp16.safetensors sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635 uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors -- &whisper ## Whisper +- &whisper ## Whisper url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" name: "whisper-1" license: "MIT" @@ -11290,7 +11283,7 @@ description: | Stable Diffusion in NCNN with c++, supported txt2img and img2img name: stablediffusion-cpp -- &piper ## Piper TTS +- &piper ## Piper TTS url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-kathleen-low icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png From 2f09aa1b850535d2cb820a49c19c9159867c1f0b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 20 Jan 2025 19:04:23 +0100 Subject: [PATCH 12/29] chore(model gallery): add sd-3.5-large-ggml (#4647) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 30 ++++++++++++++++++++++++++++++ gallery/sd-ggml.yaml | 12 ++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 gallery/sd-ggml.yaml diff --git a/gallery/index.yaml b/gallery/index.yaml index 30687062..bcb7866a 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -11028,6 +11028,36 @@ - sd-3 - gpu url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master" +- name: sd-3.5-large-ggml + license: stabilityai-ai-community + url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master" + description: | + Stable Diffusion 3.5 Large is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency. + urls: + - https://huggingface.co/stabilityai/stable-diffusion-3.5-large + - https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF + tags: + - text-to-image + - flux + - gpu + - cpu + icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-large/media/main/sd3.5_large_demo.png + overrides: + parameters: + model: sd3.5_large-Q4_0.gguf + files: + - filename: "sd3.5_large-Q4_0.gguf" + sha256: "c79ed6cdaa7decaca6b05ccc636b956b37c47de9b104c56315ca8ed086347b00" + uri: "huggingface://second-state/stable-diffusion-3.5-large-GGUF/sd3.5_large-Q4_0.gguf" + - filename: clip_g.safetensors + sha256: ec310df2af79c318e24d20511b601a591ca8cd4f1fce1d8dff822a356bcdb1f4 + uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/clip_g.safetensors + - filename: clip_l.safetensors + sha256: 660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd + uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/clip_l.safetensors + - filename: t5xxl-Q5_0.gguf + sha256: f4df16c641a05c4a6ca717068ba3ee312875000f6fac0efbd152915553b5fc3e + uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/t5xxl-Q5_0.gguf - &flux name: flux.1-dev license: flux-1-dev-non-commercial-license diff --git a/gallery/sd-ggml.yaml b/gallery/sd-ggml.yaml new file mode 100644 index 00000000..d819eba8 --- /dev/null +++ b/gallery/sd-ggml.yaml @@ -0,0 +1,12 @@ +--- +name: "sd-ggml" + +config_file: | + backend: stablediffusion-ggml + step: 25 + cfg_scale: 4.5 + options: + - "clip_l_path:clip_l.safetensors" + - "clip_g_path:clip_g.safetensors" + - "t5xxl_path:t5xxl-Q5_0.gguf" + - "sampler:euler" From 14a1e02f4478cef20d723f9fa91f0645c856b7c8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Jan 2025 23:33:40 +0000 Subject: [PATCH 13/29] chore(deps): Bump docs/themes/hugo-theme-relearn from `80e448e` to `8dad5ee` (#4656) chore(deps): Bump docs/themes/hugo-theme-relearn Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `80e448e` to `8dad5ee`. - [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases) - [Commits](https://github.com/McShelby/hugo-theme-relearn/compare/80e448e5bdaa92c87ee0d0d86f1125c8606ebf5f...8dad5ee419e5bb2a0b380aa72d7a7389af4945f6) --- updated-dependencies: - dependency-name: docs/themes/hugo-theme-relearn dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- docs/themes/hugo-theme-relearn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn index 80e448e5..8dad5ee4 160000 --- a/docs/themes/hugo-theme-relearn +++ b/docs/themes/hugo-theme-relearn @@ -1 +1 @@ -Subproject commit 80e448e5bdaa92c87ee0d0d86f1125c8606ebf5f +Subproject commit 8dad5ee419e5bb2a0b380aa72d7a7389af4945f6 From 1a08948e63ce48dd32524cf4f7df88e6b69e639d Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 21 Jan 2025 08:37:13 +0100 Subject: [PATCH 14/29] chore: :arrow_up: Update ggerganov/llama.cpp to `aea8ddd5165d525a449e2fc3839db77a71f4a318` (#4657) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7aaad492..53e5af7e 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=92bc493917d43b83e592349e138b54c90b1c3ea7 +CPPLLAMA_VERSION?=aea8ddd5165d525a449e2fc3839db77a71f4a318 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From b264a91b3f24ed8b2ec4c3161a8405be4e7019ad Mon Sep 17 00:00:00 2001 From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com> Date: Tue, 21 Jan 2025 10:37:05 +0100 Subject: [PATCH 15/29] chore(model gallery): add Deepseek-R1-Distill models (#4646) * chore(model gallery): add Deepseek-R1-Distill-Llama-8b Signed-off-by: Gianluca Boiano * chore(model gallery): add Deepseek-R1-Distill-Qwen-1.5b Signed-off-by: Gianluca Boiano --------- Signed-off-by: Gianluca Boiano --- gallery/index.yaml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index bcb7866a..126bd14a 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2696,6 +2696,23 @@ - filename: Qwentile2.5-32B-Instruct-Q4_K_M.gguf sha256: e476d6e3c15c78fc3f986d7ae8fa35c16116843827f2e6243c05767cef2f3615 uri: huggingface://bartowski/Qwentile2.5-32B-Instruct-GGUF/Qwentile2.5-32B-Instruct-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "deepseek-r1-distill-qwen-1.5b" + icon: "https://avatars.githubusercontent.com/u/148330874" + urls: + - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5b + - https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF + description: | + DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks. + Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing. + By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks. + overrides: + parameters: + model: deepseek-r1-distill-qwen-1.5b-Q4_K_M.gguf + files: + - filename: deepseek-r1-distill-qwen-1.5b-Q4_K_M.gguf + sha256: c2c43b6018cf7700ce0ddee8807deb1a9a26758ef878232f3a142d16df81f0fe + uri: huggingface://unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: @@ -5219,6 +5236,23 @@ - filename: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf sha256: 268390e07edd407ad93ea21a868b7ae995b5950e01cad0db9e1802ae5049d405 uri: huggingface://bartowski/Dolphin3.0-Llama3.1-8B-GGUF/Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf +- !!merge <<: *llama31 + name: "deepseek-r1-distill-llama-8b" + icon: "https://avatars.githubusercontent.com/u/148330874" + urls: + - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B + - https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF + description: | + DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks. + Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing. + By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks. + overrides: + parameters: + model: deepseek-r1-distill-llama-8b-Q4_K_M.gguf + files: + - filename: deepseek-r1-distill-llama-8b-Q4_K_M.gguf + sha256: f8eba201522ab44b79bc54166126bfaf836111ff4cbf2d13c59c3b57da10573b + uri: huggingface://unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" name: "deepseek-coder-v2-lite-instruct" From 6831719e1e74f5ed0f58c40999bce9a8f4066959 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 21 Jan 2025 15:09:36 +0100 Subject: [PATCH 16/29] chore(model gallery): add deepseek-r1-distill-qwen-7b (#4660) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 126bd14a..c56e37b1 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2713,6 +2713,22 @@ - filename: deepseek-r1-distill-qwen-1.5b-Q4_K_M.gguf sha256: c2c43b6018cf7700ce0ddee8807deb1a9a26758ef878232f3a142d16df81f0fe uri: huggingface://unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "deepseek-r1-distill-qwen-7b" + urls: + - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B + - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF + description: | + DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks. + Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing. + By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks. + overrides: + parameters: + model: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf + files: + - filename: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf + sha256: 731ece8d06dc7eda6f6572997feb9ee1258db0784827e642909d9b565641937b + uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From e81ceff6812c43c401c110eafbcc140747266ea2 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 21 Jan 2025 23:04:29 +0100 Subject: [PATCH 17/29] chore: :arrow_up: Update ggerganov/llama.cpp to `6171c9d25820ccf676b243c172868819d882848f` (#4661) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 53e5af7e..44959fd3 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=aea8ddd5165d525a449e2fc3839db77a71f4a318 +CPPLLAMA_VERSION?=6171c9d25820ccf676b243c172868819d882848f # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 0ec25b8b0743416a7ddd6f66f09dc1d1dd7fe07f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 22 Jan 2025 16:37:20 +0100 Subject: [PATCH 18/29] chore(model gallery): add sd-1.5-ggml and sd-3.5-medium-ggml (#4664) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 58 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index c56e37b1..4ce19bb4 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -11078,6 +11078,62 @@ - sd-3 - gpu url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master" +- name: sd-1.5-ggml + license: creativeml-openrail-m + url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master" + description: | + Stable Diffusion 1.5 + urls: + - https://huggingface.co/second-state/stable-diffusion-v1-5-GGUF + tags: + - text-to-image + - stablediffusion + - gpu + - cpu + overrides: + options: + - "sampler:euler" + parameters: + model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf + files: + - filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf" + sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f" + uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf" +- name: sd-3.5-medium-ggml + license: stabilityai-ai-community + url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master" + description: | + Stable Diffusion 3.5 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency. + urls: + - https://huggingface.co/stabilityai/stable-diffusion-3.5-medium + - https://huggingface.co/second-state/stable-diffusion-3.5-medium-GGUF + tags: + - text-to-image + - stablediffusion + - gpu + - cpu + icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-medium/media/main/sd3.5_medium_demo.jpg + overrides: + options: + - "clip_l_path:clip_l-Q4_0.gguf" + - "clip_g_path:clip_g-Q4_0.gguf" + - "t5xxl_path:t5xxl-Q4_0.gguf" + - "sampler:euler" + parameters: + model: sd3.5_medium-Q4_0.gguf + files: + - filename: "sd3.5_medium-Q4_0.gguf" + sha256: "3bb8c5e9ab0a841117089ed4ed81d885bb85161df2a766b812f829bc55b31adf" + uri: "huggingface://second-state/stable-diffusion-3.5-medium-GGUF/sd3.5_medium-Q4_0.gguf" + - filename: clip_g-Q4_0.gguf + sha256: c142411147e16b7c4b9cc1f5d977cbe596104435d76fde47172d3d35c5e58bb8 + uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_g-Q4_0.gguf + - filename: clip_l-Q4_0.gguf + sha256: f5ad88ae2ac924eb4ac0298b77afa304b5e6014fc0c4128f0e3df40fdfcc0f8a + uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_l-Q4_0.gguf + - filename: t5xxl-Q4_0.gguf + sha256: 987ba47c158b890c274f78fd35324419f50941e846a49789f0977e9fe9d97ab7 + uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/t5xxl-Q4_0.gguf - name: sd-3.5-large-ggml license: stabilityai-ai-community url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master" @@ -11088,7 +11144,7 @@ - https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF tags: - text-to-image - - flux + - stablediffusion - gpu - cpu icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-large/media/main/sd3.5_large_demo.png From 10675ac28e80e990832c650174efec0e0d006838 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 22 Jan 2025 18:07:30 +0100 Subject: [PATCH 19/29] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4d415d16..78267e04 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@

-go-skynet%2FLocalAI | Trendshift +mudler%2FLocalAI | Trendshift

> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/) From e15d29aba2982d07cb2bfec9267c076d73eab2b5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 22 Jan 2025 19:34:16 +0100 Subject: [PATCH 20/29] chore(stablediffusion-ncn): drop in favor of ggml implementation (#4652) * chore(stablediffusion-ncn): drop in favor of ggml implementation Signed-off-by: Ettore Di Giacinto * chore(ci): drop stablediffusion build Signed-off-by: Ettore Di Giacinto * chore(tests): add Signed-off-by: Ettore Di Giacinto * chore(tests): try to fixup current tests Signed-off-by: Ettore Di Giacinto * Try to fix tests Signed-off-by: Ettore Di Giacinto * Tests improvements Signed-off-by: Ettore Di Giacinto * chore(tests): use quality to specify step Signed-off-by: Ettore Di Giacinto * chore(tests): switch to sd-1.5 also increase prep time for downloading models Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- .devcontainer/docker-compose-devcontainer.yml | 2 +- .env | 6 +- .github/workflows/release.yaml | 35 +---------- .github/workflows/test.yml | 6 +- .vscode/launch.json | 2 +- Dockerfile | 38 +----------- Makefile | 36 +---------- aio/cpu/image-gen.yaml | 59 +++--------------- backend/go/image/stablediffusion/main.go | 21 ------- .../image/stablediffusion/stablediffusion.go | 33 ---------- core/config/backend_config.go | 2 +- core/config/config_test.go | 61 +++++++++++++++++++ core/http/app_test.go | 17 +++--- core/http/endpoints/openai/image.go | 6 +- core/http/endpoints/openai/request.go | 9 +++ core/schema/openai.go | 5 +- pkg/model/initializers.go | 9 +-- pkg/stablediffusion/generate.go | 35 ----------- pkg/stablediffusion/generate_unsupported.go | 10 --- pkg/stablediffusion/stablediffusion.go | 20 ------ tests/e2e-aio/e2e_suite_test.go | 2 +- tests/e2e-aio/e2e_test.go | 11 ++-- 22 files changed, 123 insertions(+), 302 deletions(-) delete mode 100644 backend/go/image/stablediffusion/main.go delete mode 100644 backend/go/image/stablediffusion/stablediffusion.go delete mode 100644 pkg/stablediffusion/generate.go delete mode 100644 pkg/stablediffusion/generate_unsupported.go delete mode 100644 pkg/stablediffusion/stablediffusion.go diff --git a/.devcontainer/docker-compose-devcontainer.yml b/.devcontainer/docker-compose-devcontainer.yml index 8795d64d..7ef22099 100644 --- a/.devcontainer/docker-compose-devcontainer.yml +++ b/.devcontainer/docker-compose-devcontainer.yml @@ -7,7 +7,7 @@ services: args: - FFMPEG=true - IMAGE_TYPE=extras - - GO_TAGS=stablediffusion p2p tts + - GO_TAGS=p2p tts env_file: - ../.env ports: diff --git a/.env b/.env index e92f7f3b..ee8db74e 100644 --- a/.env +++ b/.env @@ -38,12 +38,12 @@ ## Uncomment and set to true to enable rebuilding from source # REBUILD=true -## Enable go tags, available: stablediffusion, tts -## stablediffusion: image generation with stablediffusion +## Enable go tags, available: p2p, tts +## p2p: enable distributed inferencing ## tts: enables text-to-speech with go-piper ## (requires REBUILD=true) # -# GO_TAGS=stablediffusion +# GO_TAGS=p2p ## Path where to store generated images # LOCALAI_IMAGE_PATH=/tmp/generated/images diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 47a69b0f..e133ecb6 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -237,40 +237,7 @@ jobs: detached: true connect-timeout-seconds: 180 limit-access-to-actor: true - build-stablediffusion: - runs-on: ubuntu-latest - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - uses: actions/setup-go@v5 - with: - go-version: '1.21.x' - cache: false - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl - go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af - go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 - - name: Build stablediffusion - run: | - export PATH=$PATH:$GOPATH/bin - make backend-assets/grpc/stablediffusion - mkdir -p release && cp backend-assets/grpc/stablediffusion release - env: - GO_TAGS: stablediffusion - - uses: actions/upload-artifact@v4 - with: - name: stablediffusion - path: release/ - - name: Release - uses: softprops/action-gh-release@v2 - if: startsWith(github.ref, 'refs/tags/') - with: - files: | - release/* + build-macOS-x86_64: runs-on: macos-13 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0ee93afa..444c89fb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -105,9 +105,7 @@ jobs: # Pre-build piper before we start tests in order to have shared libraries in place make sources/go-piper && \ GO_TAGS="tts" make -C sources/go-piper piper.o && \ - sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \ - # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn) - PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build + sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ env: CUDA_VERSION: 12-4 - name: Cache grpc @@ -129,7 +127,7 @@ jobs: cd grpc && cd cmake/build && sudo make --jobs 5 install - name: Test run: | - PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test + PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} uses: mxschmitt/action-tmate@v3.19 diff --git a/.vscode/launch.json b/.vscode/launch.json index 50493421..f5e91508 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -26,7 +26,7 @@ "LOCALAI_P2P": "true", "LOCALAI_FEDERATED": "true" }, - "buildFlags": ["-tags", "stablediffusion p2p tts", "-v"], + "buildFlags": ["-tags", "p2p tts", "-v"], "envFile": "${workspaceFolder}/.env", "cwd": "${workspaceRoot}" } diff --git a/Dockerfile b/Dockerfile index 4ddc921d..8594c2a1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -69,14 +69,10 @@ ENV PATH=/opt/rocm/bin:${PATH} # OpenBLAS requirements and stable diffusion RUN apt-get update && \ apt-get install -y --no-install-recommends \ - libopenblas-dev \ - libopencv-dev && \ + libopenblas-dev && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# Set up OpenCV -RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 - WORKDIR /build ################################### @@ -251,7 +247,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall FROM requirements-drivers AS builder-base -ARG GO_TAGS="stablediffusion tts p2p" +ARG GO_TAGS="tts p2p" ARG GRPC_BACKENDS ARG MAKEFLAGS ARG LD_FLAGS="-s -w" @@ -285,35 +281,12 @@ RUN <