From 844c0c422d4cbe2dd7b3f9b4667e6c239c9e33f6 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 11 Jan 2025 22:10:45 +0100 Subject: [PATCH 001/679] docs: :arrow_up: update docs version mudler/LocalAI (#4578) :arrow_up: Update docs version mudler/LocalAI Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index bf065426..0044f3a2 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.24.2" + "version": "v2.25.0" } From 80dc23fab9073e4f2446b1ef9023536ef7413b2f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 11 Jan 2025 22:23:10 +0100 Subject: [PATCH 002/679] chore(model-gallery): :arrow_up: update checksum (#4580) :arrow_up: Checksum updates in gallery/index.yaml Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- gallery/index.yaml | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index f20be17e..4cb6ccbd 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -14,15 +14,15 @@ - https://huggingface.co/microsoft/phi-4 - https://huggingface.co/bartowski/phi-4-GGUF description: | - phi-4 is a state-of-the-art open model built upon a blend of synthetic datasets, data from filtered public domain websites, and acquired academic books and Q&A datasets. The goal of this approach was to ensure that small capable models were trained with data focused on high quality and advanced reasoning. - phi-4 underwent a rigorous enhancement and alignment process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. Phi-4 is a 14B parameters, dense decoder-only Transformer model. + phi-4 is a state-of-the-art open model built upon a blend of synthetic datasets, data from filtered public domain websites, and acquired academic books and Q&A datasets. The goal of this approach was to ensure that small capable models were trained with data focused on high quality and advanced reasoning. + phi-4 underwent a rigorous enhancement and alignment process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. Phi-4 is a 14B parameters, dense decoder-only Transformer model. overrides: parameters: model: phi-4-Q4_K_M.gguf files: - filename: phi-4-Q4_K_M.gguf - sha256: e38bd5fa5f1c03d51ebc34a8d7b284e0da089c8af05e7f409a0079a9c831a10b uri: huggingface://bartowski/phi-4-GGUF/phi-4-Q4_K_M.gguf + sha256: 009aba717c09d4a35890c7d35eb59d54e1dba884c7c526e7197d9c13ab5911d9 - &falcon3 name: "falcon3-1b-instruct" url: "github:mudler/LocalAI/gallery/falcon3.yaml@master" @@ -2726,14 +2726,7 @@ urls: - https://huggingface.co/Krystalan/DRT-o1-7B - https://huggingface.co/QuantFactory/DRT-o1-7B-GGUF - description: | - In this work, we introduce DRT-o1, an attempt to bring the success of long thought reasoning to neural machine translation (MT). To this end, - - 🌟 We mine English sentences with similes or metaphors from existing literature books, which are suitable for translation via long thought. - 🌟 We propose a designed multi-agent framework with three agents (i.e., a translator, an advisor and an evaluator) to synthesize the MT samples with long thought. There are 22,264 synthesized samples in total. - 🌟 We train DRT-o1-8B, DRT-o1-7B and DRT-o1-14B using Llama-3.1-8B-Instruct, Qwen2.5-7B-Instruct and Qwen2.5-14B-Instruct as backbones. - - Our goal is not to achieve competitive performance with OpenAI’s O1 in neural machine translation (MT). Instead, we explore technical routes to bring the success of long thought to MT. To this end, we introduce DRT-o1, a byproduct of our exploration, and we hope it could facilitate the corresponding research in this direction. + description: "In this work, we introduce DRT-o1, an attempt to bring the success of long thought reasoning to neural machine translation (MT). To this end,\n\n\U0001F31F We mine English sentences with similes or metaphors from existing literature books, which are suitable for translation via long thought.\n\U0001F31F We propose a designed multi-agent framework with three agents (i.e., a translator, an advisor and an evaluator) to synthesize the MT samples with long thought. There are 22,264 synthesized samples in total.\n\U0001F31F We train DRT-o1-8B, DRT-o1-7B and DRT-o1-14B using Llama-3.1-8B-Instruct, Qwen2.5-7B-Instruct and Qwen2.5-14B-Instruct as backbones.\n\nOur goal is not to achieve competitive performance with OpenAI’s O1 in neural machine translation (MT). Instead, we explore technical routes to bring the success of long thought to MT. To this end, we introduce DRT-o1, a byproduct of our exploration, and we hope it could facilitate the corresponding research in this direction.\n" overrides: parameters: model: DRT-o1-7B.Q4_K_M.gguf @@ -5874,7 +5867,7 @@ - https://huggingface.co/Nitral-AI/Nera_Noctis-12B - https://huggingface.co/bartowski/Nera_Noctis-12B-GGUF description: | - Sometimes, the brightest gems are found in the darkest places. For it is in the shadows where we learn to really see the light. + Sometimes, the brightest gems are found in the darkest places. For it is in the shadows where we learn to really see the light. overrides: parameters: model: Nera_Noctis-12B-Q4_K_M.gguf From b206eab80f6bad968ec307cef18a1d5b39982be9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 11 Jan 2025 22:41:30 +0100 Subject: [PATCH 003/679] chore(model gallery): add nightwing3-10b-v0.1 (#4582) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 4cb6ccbd..82cd1dc5 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -132,6 +132,22 @@ - filename: Falcon3-7B-Instruct-abliterated-Q4_K_M.gguf sha256: 68e10e638668acaa49fb7919224c7d8bcf1798126c7a499c4d9ec3b81313f8c8 uri: huggingface://bartowski/Falcon3-7B-Instruct-abliterated-GGUF/Falcon3-7B-Instruct-abliterated-Q4_K_M.gguf +- !!merge <<: *falcon3 + name: "nightwing3-10b-v0.1" + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/C6gY9vxCl3_SFzQLpLG0S.png + urls: + - https://huggingface.co/Nitral-AI/NightWing3-10B-v0.1 + - https://huggingface.co/bartowski/NightWing3-10B-v0.1-GGUF + description: | + Base model: (Falcon3-10B) + overrides: + parameters: + model: NightWing3-10B-v0.1-Q4_K_M.gguf + files: + - filename: NightWing3-10B-v0.1-Q4_K_M.gguf + sha256: 2e87671542d22fe1ef9a68e43f2fdab7c2759479ad531946d9f0bdeffa6f5747 + uri: huggingface://bartowski/NightWing3-10B-v0.1-GGUF/NightWing3-10B-v0.1-Q4_K_M.gguf - &intellect1 name: "intellect-1-instruct" url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" From cb8bf79adab6cc658b547c79e29ecc3a221beba9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 11 Jan 2025 22:45:37 +0100 Subject: [PATCH 004/679] chore(model gallery): add qwq-32b-preview-ideawhiz-v1 (#4583) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 82cd1dc5..c6a9b624 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2952,6 +2952,21 @@ - filename: Chuluun-Qwen2.5-72B-v0.01-Q4_K_M.gguf sha256: 901d9d10aad42de3188e721accdc4eb0efec96cbca48563f802793dceaf551f5 uri: huggingface://bartowski/Chuluun-Qwen2.5-72B-v0.01-GGUF/Chuluun-Qwen2.5-72B-v0.01-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "qwq-32b-preview-ideawhiz-v1" + icon: https://cdn-uploads.huggingface.co/production/uploads/6205fefd3f1dc8a642d70b10/JEZgA_xV6oF8AIsya9dop.jpeg + urls: + - https://huggingface.co/6cf/QwQ-32B-Preview-IdeaWhiz-v1 + - https://huggingface.co/bartowski/QwQ-32B-Preview-IdeaWhiz-v1-GGUF + description: | + IdeaWhiz is a fine-tuned version of QwQ-32B-Preview, specifically optimized for scientific creativity and step-by-step reasoning. The model leverages the LiveIdeaBench dataset to enhance its capabilities in generating novel scientific ideas and hypotheses. + overrides: + parameters: + model: QwQ-32B-Preview-IdeaWhiz-v1-Q4_K_M.gguf + files: + - filename: QwQ-32B-Preview-IdeaWhiz-v1-Q4_K_M.gguf + sha256: 1648e13d9974b10d08ee45f48fd3ebd15cf67745fe20d602f9306fe0253b6a96 + uri: huggingface://bartowski/QwQ-32B-Preview-IdeaWhiz-v1-GGUF/QwQ-32B-Preview-IdeaWhiz-v1-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From cd480dbe5c04bb8e82da2f71586937916eb7a11f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 11 Jan 2025 23:24:55 +0100 Subject: [PATCH 005/679] chore(model gallery): add rombos-qwen2.5-writer-32b (#4584) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index c6a9b624..fb4de112 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2967,6 +2967,21 @@ - filename: QwQ-32B-Preview-IdeaWhiz-v1-Q4_K_M.gguf sha256: 1648e13d9974b10d08ee45f48fd3ebd15cf67745fe20d602f9306fe0253b6a96 uri: huggingface://bartowski/QwQ-32B-Preview-IdeaWhiz-v1-GGUF/QwQ-32B-Preview-IdeaWhiz-v1-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "rombos-qwen2.5-writer-32b" + icon: https://huggingface.co/SubtleOne/Rombos-Qwen2.5-Writer-32b/blob/main/robot-creating-fantasy.jpg + urls: + - https://huggingface.co/SubtleOne/Rombos-Qwen2.5-Writer-32b + - https://huggingface.co/bartowski/Rombos-Qwen2.5-Writer-32b-GGUF + description: | + This model is a merge using Rombos's top-ranked 32b model, based on Qwen 2.5, and merging three creative writing finetunes. The creative content is a serious upgrade over the base it started with, and I enjoyed it in my DnD RPG campaign. + overrides: + parameters: + model: Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf + files: + - filename: Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf + sha256: cf0e48c6cb8b6f41834603900642b5395105980297709c85c4216bd44fac956a + uri: huggingface://bartowski/Rombos-Qwen2.5-Writer-32b-GGUF/Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From 7cd33d10c93485e2a01efc298d111c18b4d9fd8e Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 11 Jan 2025 23:25:09 +0100 Subject: [PATCH 006/679] chore: :arrow_up: Update ggerganov/llama.cpp to `c05e8c9934f94fde49bc1bc9dc51eed282605150` (#4579) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e81ec442..261f2833 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=ba8a1f9c5b675459c55a83e3f97f10df3a66c788 +CPPLLAMA_VERSION?=c05e8c9934f94fde49bc1bc9dc51eed282605150 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From b898cd49b58b2f930814fd4703065b9f92f4e3c1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 12 Jan 2025 10:33:29 +0100 Subject: [PATCH 007/679] chore(model gallery): add sky-t1-32b-preview (#4585) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index fb4de112..15706d4c 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2982,6 +2982,22 @@ - filename: Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf sha256: cf0e48c6cb8b6f41834603900642b5395105980297709c85c4216bd44fac956a uri: huggingface://bartowski/Rombos-Qwen2.5-Writer-32b-GGUF/Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "sky-t1-32b-preview" + icon: https://raw.githubusercontent.com/NovaSky-AI/novasky-ai.github.io/main/assets/images/blue-bird-wider.jpeg + urls: + - https://huggingface.co/NovaSky-AI/Sky-T1-32B-Preview + - https://huggingface.co/bartowski/Sky-T1-32B-Preview-GGUF + - https://novasky-ai.github.io/posts/sky-t1/ + description: | + This is a 32B reasoning model trained from Qwen2.5-32B-Instruct with 17K data. The performance is on par with o1-preview model on both math and coding. Please see our blog post for more details. + overrides: + parameters: + model: Sky-T1-32B-Preview-Q4_K_M.gguf + files: + - filename: Sky-T1-32B-Preview-Q4_K_M.gguf + sha256: c735912a582f10e4769461586a02e5b98ef43c2895ec11923b8c4f157e7909e5 + uri: huggingface://bartowski/Sky-T1-32B-Preview-GGUF/Sky-T1-32B-Preview-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From f8cffd05e5902a8452989e4ba66b4805a329b0ea Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 12 Jan 2025 10:36:01 +0100 Subject: [PATCH 008/679] chore(model gallery): add negative_llama_70b (#4586) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 15706d4c..35217913 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -311,6 +311,26 @@ - filename: 70B-L3.3-Cirrus-x1-Q4_K_M.gguf sha256: 07dd464dddba959df8eb2f937787c2210b4c51c2375bd7c7ab2abbe198142a19 uri: huggingface://bartowski/70B-L3.3-Cirrus-x1-GGUF/70B-L3.3-Cirrus-x1-Q4_K_M.gguf +- !!merge <<: *llama33 + name: "negative_llama_70b" + icon: https://huggingface.co/SicariusSicariiStuff/Negative_LLAMA_70B/resolve/main/Images/Negative_LLAMA_70B.png + urls: + - https://huggingface.co/SicariusSicariiStuff/Negative_LLAMA_70B + - https://huggingface.co/bartowski/Negative_LLAMA_70B-GGUF + description: | + - Strong Roleplay & Creative writing abilities. + - Less positivity bias. + - Very smart assistant with low refusals. + - Exceptionally good at following the character card. + - Characters feel more 'alive', and will occasionally initiate stuff on their own (without being prompted to, but fitting to their character). + - Strong ability to comprehend and roleplay uncommon physical and mental characteristics. + overrides: + parameters: + model: Negative_LLAMA_70B-Q4_K_M.gguf + files: + - filename: Negative_LLAMA_70B-Q4_K_M.gguf + sha256: 023c6bd38f6a66178529e6bb77b6e76379ae3ee031adc6885531986aa12750d9 + uri: huggingface://bartowski/Negative_LLAMA_70B-GGUF/Negative_LLAMA_70B-Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" From 1780ccadbccccb79de1c88bd734e3ed38f8fefa6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 12 Jan 2025 10:40:26 +0100 Subject: [PATCH 009/679] chore(model gallery): add finemath-llama-3b (#4587) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 35217913..7d60167b 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1211,6 +1211,24 @@ - filename: MiniThinky-v2-1B-Llama-3.2-Q4_K_M.gguf sha256: 086857b6364afd757a123eea0474bede09f25608783e7a6fcf2f88d8cb322ca1 uri: huggingface://bartowski/MiniThinky-v2-1B-Llama-3.2-GGUF/MiniThinky-v2-1B-Llama-3.2-Q4_K_M.gguf +- !!merge <<: *llama32 + icon: https://cdn-uploads.huggingface.co/production/uploads/61c141342aac764ce1654e43/HZ6KOc8IVXXOABrdv0dyK.png + name: "finemath-llama-3b" + urls: + - https://huggingface.co/HuggingFaceTB/FineMath-Llama-3B + - https://huggingface.co/bartowski/FineMath-Llama-3B-GGUF + description: | + This is a continual-pre-training of Llama-3.2-3B on a mix of šŸ“ FineMath (our new high quality math dataset) and FineWeb-Edu. + + The model demonstrates superior math performance compared to Llama 3.2 3B, while maintaining similar performance on knowledge, reasoning, and common sense benchmarks. + It was trained on 160B tokens using a mix of 40% FineWeb-Edu and 60% from FineMath (30% FineMath-4+ subset and 30% InfiWebMath-4+ subset). We use nanotron for the training, and you can find the training scripts in our SmolLM2 GitHub repo. + overrides: + parameters: + model: FineMath-Llama-3B-Q4_K_M.gguf + files: + - filename: FineMath-Llama-3B-Q4_K_M.gguf + sha256: 16c73b5cf2a417a7e1608bcc9469f1461fc3e759ce04a3a337f48df977dc158c + uri: huggingface://bartowski/FineMath-Llama-3B-GGUF/FineMath-Llama-3B-Q4_K_M.gguf - &qwen25 ## Qwen2.5 name: "qwen2.5-14b-instruct" From e8de7b52da29ec5ac4042f3bed71f1968fe2b973 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 12 Jan 2025 11:26:42 +0100 Subject: [PATCH 010/679] chore(model gallery): add LocalAI-functioncall-phi-4-v0.1 (#4588) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ gallery/phi-4-chat-fcall.yaml | 27 +++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 gallery/phi-4-chat-fcall.yaml diff --git a/gallery/index.yaml b/gallery/index.yaml index 7d60167b..2b546c0b 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -23,6 +23,22 @@ - filename: phi-4-Q4_K_M.gguf uri: huggingface://bartowski/phi-4-GGUF/phi-4-Q4_K_M.gguf sha256: 009aba717c09d4a35890c7d35eb59d54e1dba884c7c526e7197d9c13ab5911d9 +- !!merge <<: *phi4 + url: "github:mudler/LocalAI/gallery/phi-4-fcall.yaml@master" + name: "LocalAI-functioncall-phi-4-v0.1" + icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png + description: | + A model tailored to be conversational and execute function calls with LocalAI. This model is based on phi-4. + urls: + - https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.1 + - https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.1-Q4_K_M-GGUF + overrides: + parameters: + model: localai-functioncall-phi-4-v0.1-q4_k_m.gguf + files: + - filename: localai-functioncall-phi-4-v0.1-q4_k_m.gguf + uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.1-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.1-q4_k_m.gguf + sha256: 0ae4e5e4ba89c16c1e810285c5c8b84416fa67f8ed7c175aa0b6fc0a103017aa - &falcon3 name: "falcon3-1b-instruct" url: "github:mudler/LocalAI/gallery/falcon3.yaml@master" diff --git a/gallery/phi-4-chat-fcall.yaml b/gallery/phi-4-chat-fcall.yaml new file mode 100644 index 00000000..a6fa261e --- /dev/null +++ b/gallery/phi-4-chat-fcall.yaml @@ -0,0 +1,27 @@ +--- +name: "phi-4-chat" + +config_file: | + mmap: true + template: + chat_message: | + <|im_start|>{{ .RoleName }}<|im_sep|> + {{.Content}}<|im_end|> + chat: | + {{.Input}} + <|im_start|>assistant<|im_sep|> + completion: | + {{.Input}} + function: | + <|im_start|>system<|im_sep|> + You are an AI assistant that executes function calls, and these are the tools at your disposal: + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + {{.Input}}<|im_end|> + context_size: 4096 + f16: true + stopwords: + - <|end|> + - <|endoftext|> + - <|im_end|> From 9ce71fe427e8ee3e1e3a8b00b7d00d2725270138 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 12 Jan 2025 11:50:40 +0100 Subject: [PATCH 011/679] fix(gallery): correct UL typo Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 2b546c0b..9fc6f077 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -24,7 +24,7 @@ uri: huggingface://bartowski/phi-4-GGUF/phi-4-Q4_K_M.gguf sha256: 009aba717c09d4a35890c7d35eb59d54e1dba884c7c526e7197d9c13ab5911d9 - !!merge <<: *phi4 - url: "github:mudler/LocalAI/gallery/phi-4-fcall.yaml@master" + url: "github:mudler/LocalAI/gallery/phi-4-chat-fcall.yaml@master" name: "LocalAI-functioncall-phi-4-v0.1" icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png description: | From 6a299c04a7e4a4e23188504bcb0e89488819ee1f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 12 Jan 2025 18:33:51 +0100 Subject: [PATCH 012/679] feat(stablediffusion-ggml): respect build type (#4581) * feat(stablediffusion-ggml): respect build type Signed-off-by: Ettore Di Giacinto * combine libraries Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- Makefile | 10 +-- .../go/image/stablediffusion-ggml/Makefile | 71 ++++++++++++++++++- backend/go/image/stablediffusion-ggml/gosd.go | 2 +- 3 files changed, 71 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 261f2833..0ec85bc3 100644 --- a/Makefile +++ b/Makefile @@ -302,14 +302,8 @@ sources/stablediffusion-ggml.cpp: git checkout $(STABLEDIFFUSION_GGML_VERSION) && \ git submodule update --init --recursive --depth 1 --single-branch -sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a: sources/stablediffusion-ggml.cpp - cd sources/stablediffusion-ggml.cpp && \ - mkdir -p build && \ - cd build && \ - cmake $(CMAKE_ARGS) .. && \ - cmake --build . --config Release - -backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a +backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp + $(MAKE) -C backend/go/image/stablediffusion-ggml build/libstable-diffusion.a $(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc diff --git a/backend/go/image/stablediffusion-ggml/Makefile b/backend/go/image/stablediffusion-ggml/Makefile index cca9bf6e..7c6d9a17 100644 --- a/backend/go/image/stablediffusion-ggml/Makefile +++ b/backend/go/image/stablediffusion-ggml/Makefile @@ -2,20 +2,85 @@ INCLUDE_PATH := $(abspath ./) LIBRARY_PATH := $(abspath ./) AR?=ar - +CMAKE_ARGS?= BUILD_TYPE?= # keep standard at C11 and C++11 CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC +# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static +CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF + +# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically +ifeq ($(BUILD_TYPE),cublas) + CMAKE_ARGS+=-DGGML_CUDA=ON +# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS +# to CMAKE_ARGS automatically +else ifeq ($(BUILD_TYPE),openblas) + CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS +# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path +else ifeq ($(BUILD_TYPE),clblas) + CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path +# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ +else ifeq ($(BUILD_TYPE),hipblas) + CMAKE_ARGS+=-DGGML_HIP=ON +# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation +# But if it's OSX without metal, disable it here +else ifeq ($(OS),Darwin) + ifneq ($(BUILD_TYPE),metal) + CMAKE_ARGS+=-DGGML_METAL=OFF + else + CMAKE_ARGS+=-DGGML_METAL=ON + CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON + TARGET+=--target ggml-metal + endif +endif + +ifeq ($(BUILD_TYPE),sycl_f16) + CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON +endif + +ifeq ($(BUILD_TYPE),sycl_f32) + CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +endif + # warnings CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function +# Find all .a archives in ARCHIVE_DIR +# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive) +GGML_ARCHIVE_DIR := build/ggml/src/ +ALL_ARCHIVES := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.a') + +# Name of the single merged library +COMBINED_LIB := libggmlall.a + +# Rule to merge all the .a files into one +$(COMBINED_LIB): $(ALL_ARCHIVES) + @echo "Merging all .a into $(COMBINED_LIB)" + rm -f $@ + mkdir -p merge-tmp + for a in $(ALL_ARCHIVES); do \ + ( cd merge-tmp && ar x ../$$a ); \ + done + ( cd merge-tmp && ar rcs ../$@ *.o ) + # Ensure we have a proper index + ranlib $@ + # Clean up + rm -rf merge-tmp + +build/libstable-diffusion.a: + mkdir -p build && \ + cd build && \ + cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \ + cmake --build . --config Release + $(MAKE) $(COMBINED_LIB) + gosd.o: $(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c libsd.a: gosd.o - cp $(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a ./libsd.a + cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a $(AR) rcs libsd.a gosd.o clean: - rm -f gosd.o libsd.a \ No newline at end of file + rm -rf gosd.o libsd.a build $(COMBINED_LIB) \ No newline at end of file diff --git a/backend/go/image/stablediffusion-ggml/gosd.go b/backend/go/image/stablediffusion-ggml/gosd.go index 29d0033d..8c3bdb90 100644 --- a/backend/go/image/stablediffusion-ggml/gosd.go +++ b/backend/go/image/stablediffusion-ggml/gosd.go @@ -1,7 +1,7 @@ package main // #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include -// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src/ggml-cpu -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src -lsd -lstdc++ -lm -lggml -lggml-base -lggml-cpu -lgomp +// #cgo LDFLAGS: -L${SRCDIR}/ -lsd -lstdc++ -lm -lggmlall -lgomp // #include // #include import "C" From 9fdb44323dd2d345886f15932586c4d178c2ba95 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 12 Jan 2025 18:50:41 +0100 Subject: [PATCH 013/679] chore(model gallery): add LocalAI-functioncall-phi-4-v0.2 (#4589) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 9fc6f077..7eb9d479 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -23,6 +23,23 @@ - filename: phi-4-Q4_K_M.gguf uri: huggingface://bartowski/phi-4-GGUF/phi-4-Q4_K_M.gguf sha256: 009aba717c09d4a35890c7d35eb59d54e1dba884c7c526e7197d9c13ab5911d9 +- !!merge <<: *phi4 + url: "github:mudler/LocalAI/gallery/phi-4-chat-fcall.yaml@master" + name: "LocalAI-functioncall-phi-4-v0.2" + icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png + description: | + A model tailored to be conversational and execute function calls with LocalAI. This model is based on phi-4. + This is the second iteration of https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.1 with added CoT (o1) capabilities from the marco-o1 dataset. + urls: + - https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.2 + - https://huggingface.co/mudler/localai-functioncall-phi-4-v0.2-Q4_K_M-GGUF + overrides: + parameters: + model: localai-functioncall-phi-4-v0.2-q4_k_m.gguf + files: + - filename: localai-functioncall-phi-4-v0.2-q4_k_m.gguf + uri: huggingface://mudler/localai-functioncall-phi-4-v0.2-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.2-q4_k_m.gguf + sha256: 681b5fb5070f23323a9cc8cbd1306b1c348c2f292041d3ba2335b26b071757b7 - !!merge <<: *phi4 url: "github:mudler/LocalAI/gallery/phi-4-chat-fcall.yaml@master" name: "LocalAI-functioncall-phi-4-v0.1" From aea71dd2c6e6cd7ddd4f9ccd3bb3ae0b714b6176 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 12 Jan 2025 22:07:01 +0100 Subject: [PATCH 014/679] fix(stablediffusion-ggml): correctly enable sycl (#4591) Signed-off-by: Ettore Di Giacinto --- backend/go/image/stablediffusion-ggml/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/go/image/stablediffusion-ggml/Makefile b/backend/go/image/stablediffusion-ggml/Makefile index 7c6d9a17..9d6b6597 100644 --- a/backend/go/image/stablediffusion-ggml/Makefile +++ b/backend/go/image/stablediffusion-ggml/Makefile @@ -36,11 +36,11 @@ else ifeq ($(OS),Darwin) endif ifeq ($(BUILD_TYPE),sycl_f16) - CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON + CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DSD_SYCL=ON -DGGML_SYCL_F16=ON endif ifeq ($(BUILD_TYPE),sycl_f32) - CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx + CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DSD_SYCL=ON endif # warnings From 8d82afb5958b590310b4edb8aeb1a9f72e202b2d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 13 Jan 2025 10:11:48 +0100 Subject: [PATCH 015/679] fix(stablediffusion-ggml): enable oneapi before build (#4593) Signed-off-by: Ettore Di Giacinto --- backend/go/image/stablediffusion-ggml/Makefile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/backend/go/image/stablediffusion-ggml/Makefile b/backend/go/image/stablediffusion-ggml/Makefile index 9d6b6597..259d4d38 100644 --- a/backend/go/image/stablediffusion-ggml/Makefile +++ b/backend/go/image/stablediffusion-ggml/Makefile @@ -4,6 +4,7 @@ LIBRARY_PATH := $(abspath ./) AR?=ar CMAKE_ARGS?= BUILD_TYPE?= +ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh # keep standard at C11 and C++11 CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC @@ -69,10 +70,19 @@ $(COMBINED_LIB): $(ALL_ARCHIVES) rm -rf merge-tmp build/libstable-diffusion.a: + @echo "Building SD with $(BUILD_TYPE) build type and $(CMAKE_ARGS)" +ifneq (,$(findstring sycl,$(BUILD_TYPE))) + +bash -c "source $(ONEAPI_VARS); \ + mkdir -p build && \ + cd build && \ + cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \ + cmake --build . --config Release" +else mkdir -p build && \ cd build && \ cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \ cmake --build . --config Release +endif $(MAKE) $(COMBINED_LIB) gosd.o: From ab5adf40af1994ffe5bbae735252c7ea88755d0f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 13 Jan 2025 17:33:06 +0100 Subject: [PATCH 016/679] =?UTF-8?q?chore(deps):=20bump=20llama.cpp=20to=20?= =?UTF-8?q?'924518e2e5726e81f3aeb2518fb85963a500e=E2=80=A6=20(#4592)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit chore(deps): bump llama.cpp to '924518e2e5726e81f3aeb2518fb85963a500e93a' Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- backend/cpp/llama/grpc-server.cpp | 42 +++++++++++++------------------ 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/Makefile b/Makefile index 0ec85bc3..4392980b 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=c05e8c9934f94fde49bc1bc9dc51eed282605150 +CPPLLAMA_VERSION?=924518e2e5726e81f3aeb2518fb85963a500e93a # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 7632aebc..f0a16ffa 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -428,6 +428,7 @@ struct llama_server_context { llama_model *model = nullptr; llama_context *ctx = nullptr; + const llama_vocab * vocab = nullptr; clip_ctx *clp_ctx = nullptr; @@ -439,6 +440,7 @@ struct llama_server_context bool clean_kv_cache = true; bool all_slots_are_idle = false; bool add_bos_token = true; + bool has_eos_token = true; int32_t n_ctx; // total context for all clients / slots @@ -502,7 +504,7 @@ struct llama_server_context if (multimodal) { const int n_embd_clip = clip_n_mmproj_embd(clp_ctx); - const int n_embd_llm = llama_n_embd(model); + const int n_embd_llm = llama_model_n_embd(model); if (n_embd_clip != n_embd_llm) { LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm); llama_free(ctx); @@ -511,23 +513,15 @@ struct llama_server_context } } + vocab = llama_model_get_vocab(model); n_ctx = llama_n_ctx(ctx); - add_bos_token = llama_add_bos_token(model); + add_bos_token = llama_vocab_get_add_bos(vocab); + has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL; return true; } - void validate_model_chat_template(server_params & sparams) { - llama_chat_message chat[] = {{"user", "test"}}; - std::vector buf(1); - int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size()); - if (res < 0) { - LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__); - sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template - } - } - llama_client_slot* get_active_slot() { for (llama_client_slot& slot : slots) { // Check if the slot is currently processing @@ -725,8 +719,8 @@ struct llama_server_context slot->prompt = ""; } - if (json_value(data, "ignore_eos", false)) { - slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY}); + if (json_value(data, "ignore_eos", false) && has_eos_token) { + slot->sparams.logit_bias.push_back({llama_vocab_eos(vocab), -INFINITY}); } /* slot->sparams.penalty_prompt_tokens.clear(); @@ -765,13 +759,13 @@ struct llama_server_context } } */ - slot->sparams.logit_bias.clear(); const auto &logit_bias = data.find("logit_bias"); if (logit_bias != data.end() && logit_bias->is_array()) { - const int n_vocab = llama_n_vocab(model); + const llama_vocab * vocab = llama_model_get_vocab(model); + const int n_vocab = llama_vocab_n_tokens(vocab); for (const auto &el : *logit_bias) { if (el.is_array() && el.size() == 2) @@ -800,7 +794,7 @@ struct llama_server_context } else if (el[0].is_string()) { - auto toks = common_tokenize(model, el[0].get(), false); + auto toks = common_tokenize(vocab, el[0].get(), false); for (auto tok : toks) { slot->sparams.logit_bias.push_back({tok, bias}); @@ -1130,7 +1124,7 @@ struct llama_server_context slot.has_next_token = false; } - if (result.tok == llama_token_eos(model)) + if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok)) { slot.stopped_eos = true; slot.has_next_token = false; @@ -1325,7 +1319,7 @@ struct llama_server_context res.error = false; res.stop = true; - const int n_embd = llama_n_embd(model); + const int n_embd = llama_model_n_embd(model); if (!params.embedding) { LOG_WARNING("embedding disabled", { @@ -1424,7 +1418,7 @@ struct llama_server_context n_eval = n_batch; } - const int n_embd = llama_n_embd(model); + const int n_embd = llama_model_n_embd(model); float * embd = img.image_embedding + i * n_embd; llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0); if (llama_decode(ctx, llava_batch.batch)) @@ -1705,11 +1699,11 @@ struct llama_server_context suffix_tokens.erase(suffix_tokens.begin()); } - prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model)); - prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS - prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model)); + prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_fim_pre(vocab)); + prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_bos(vocab)); // always add BOS + prefix_tokens.insert(prefix_tokens.end(), llama_vocab_fim_suf(vocab)); prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end()); - prefix_tokens.push_back(llama_token_middle(model)); + prefix_tokens.push_back(llama_vocab_fim_mid(vocab)); prompt_tokens = prefix_tokens; } else From b0ead0bf12e8f08ea69065c0682d2a634795e932 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 Jan 2025 21:17:11 +0000 Subject: [PATCH 017/679] chore(deps): Bump securego/gosec from 2.21.4 to 2.22.0 (#4594) Bumps [securego/gosec](https://github.com/securego/gosec) from 2.21.4 to 2.22.0. - [Release notes](https://github.com/securego/gosec/releases) - [Changelog](https://github.com/securego/gosec/blob/master/.goreleaser.yml) - [Commits](https://github.com/securego/gosec/compare/v2.21.4...v2.22.0) --- updated-dependencies: - dependency-name: securego/gosec dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/secscan.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml index 3fd808e1..228ac1d9 100644 --- a/.github/workflows/secscan.yaml +++ b/.github/workflows/secscan.yaml @@ -18,7 +18,7 @@ jobs: if: ${{ github.actor != 'dependabot[bot]' }} - name: Run Gosec Security Scanner if: ${{ github.actor != 'dependabot[bot]' }} - uses: securego/gosec@v2.21.4 + uses: securego/gosec@v2.22.0 with: # we let the report trigger content trigger a failure using the GitHub Security features. args: '-no-fail -fmt sarif -out results.sarif ./...' From 0c02512f159bfac0e04e5e7bfebfe1170e3bb505 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 14 Jan 2025 09:07:20 +0100 Subject: [PATCH 018/679] chore: :arrow_up: Update ggerganov/llama.cpp to `504af20ee4eae72080a56d59d744f6774f7901ce` (#4597) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4392980b..fd05703e 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=924518e2e5726e81f3aeb2518fb85963a500e93a +CPPLLAMA_VERSION?=504af20ee4eae72080a56d59d744f6774f7901ce # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 69c6e5b1924e9e6d7cbb13edb8dfab45ef729f12 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 14 Jan 2025 09:17:55 +0100 Subject: [PATCH 019/679] chore(stablediffusion-ggml): disable sycl optimizations (#4598) Signed-off-by: Ettore Di Giacinto --- backend/go/image/stablediffusion-ggml/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/backend/go/image/stablediffusion-ggml/Makefile b/backend/go/image/stablediffusion-ggml/Makefile index 259d4d38..f92c3a77 100644 --- a/backend/go/image/stablediffusion-ggml/Makefile +++ b/backend/go/image/stablediffusion-ggml/Makefile @@ -36,13 +36,13 @@ else ifeq ($(OS),Darwin) endif endif -ifeq ($(BUILD_TYPE),sycl_f16) - CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DSD_SYCL=ON -DGGML_SYCL_F16=ON -endif +# ifeq ($(BUILD_TYPE),sycl_f16) +# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DSD_SYCL=ON -DGGML_SYCL_F16=ON +# endif -ifeq ($(BUILD_TYPE),sycl_f32) - CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DSD_SYCL=ON -endif +# ifeq ($(BUILD_TYPE),sycl_f32) +# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DSD_SYCL=ON +# endif # warnings CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function From 1b3e89c89c1e82b98cdfd231d4c44ae491f3cd83 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 14 Jan 2025 09:27:18 +0100 Subject: [PATCH 020/679] chore(model gallery): add LocalAI-functioncall-phi-4-v0.3 (#4599) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ gallery/phi-4-chat-fcall.yaml | 10 ++++++++++ 2 files changed, 26 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 7eb9d479..bb0339bb 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -23,6 +23,22 @@ - filename: phi-4-Q4_K_M.gguf uri: huggingface://bartowski/phi-4-GGUF/phi-4-Q4_K_M.gguf sha256: 009aba717c09d4a35890c7d35eb59d54e1dba884c7c526e7197d9c13ab5911d9 +- !!merge <<: *phi4 + url: "github:mudler/LocalAI/gallery/phi-4-chat-fcall.yaml@master" + name: "LocalAI-functioncall-phi-4-v0.3" + icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png + urls: + - https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3 + - https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF + description: | + A model tailored to be conversational and execute function calls with LocalAI. This model is based on phi-4. + overrides: + parameters: + model: localai-functioncall-phi-4-v0.3-q4_k_m.gguf + files: + - filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf + sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5 + uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf - !!merge <<: *phi4 url: "github:mudler/LocalAI/gallery/phi-4-chat-fcall.yaml@master" name: "LocalAI-functioncall-phi-4-v0.2" diff --git a/gallery/phi-4-chat-fcall.yaml b/gallery/phi-4-chat-fcall.yaml index a6fa261e..23c2e53d 100644 --- a/gallery/phi-4-chat-fcall.yaml +++ b/gallery/phi-4-chat-fcall.yaml @@ -3,6 +3,16 @@ name: "phi-4-chat" config_file: | mmap: true + function: + json_regex_match: + - "(?s)(.*?)" + capture_llm_results: + - (?s)(.*?) + replace_llm_results: + - key: (?s)(.*?) + value: "" + grammar: + properties_order: "name,arguments" template: chat_message: | <|im_start|>{{ .RoleName }}<|im_sep|> From 5414c294c4d2e57f1f0e09da14e341a5cd846e2b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 14 Jan 2025 09:29:25 +0100 Subject: [PATCH 021/679] chore(model gallery): add negative-anubis-70b-v1 (#4600) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index bb0339bb..31468321 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -380,6 +380,27 @@ - filename: Negative_LLAMA_70B-Q4_K_M.gguf sha256: 023c6bd38f6a66178529e6bb77b6e76379ae3ee031adc6885531986aa12750d9 uri: huggingface://bartowski/Negative_LLAMA_70B-GGUF/Negative_LLAMA_70B-Q4_K_M.gguf +- !!merge <<: *llama33 + name: "negative-anubis-70b-v1" + icon: https://huggingface.co/knifeayumu/Negative-Anubis-70B-v1/resolve/main/Negative-Anubis.png + urls: + - https://huggingface.co/knifeayumu/Negative-Anubis-70B-v1 + - https://huggingface.co/bartowski/Negative-Anubis-70B-v1-GGUF + description: | + Enjoyed SicariusSicariiStuff/Negative_LLAMA_70B but the prose was too dry for my tastes. So I merged it with TheDrummer/Anubis-70B-v1 for verbosity. Anubis has positivity bias so Negative could balance things out. + + This is a merge of pre-trained language models created using mergekit. + + The following models were included in the merge: + SicariusSicariiStuff/Negative_LLAMA_70B + TheDrummer/Anubis-70B-v1 + overrides: + parameters: + model: Negative-Anubis-70B-v1-Q4_K_M.gguf + files: + - filename: Negative-Anubis-70B-v1-Q4_K_M.gguf + sha256: ac088da9ca70fffaa70c876fbada9fc5a02e7d6049ef68f16b11a9c3256f2510 + uri: huggingface://bartowski/Negative-Anubis-70B-v1-GGUF/Negative-Anubis-70B-v1-Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" From 62abe0d2c9c6492213039a7ccbbecaa40808791d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 14 Jan 2025 09:33:19 +0100 Subject: [PATCH 022/679] chore(model gallery): add qwen2.5-72b-rp-ink (#4601) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 31468321..a46d47d6 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3106,6 +3106,22 @@ - filename: Sky-T1-32B-Preview-Q4_K_M.gguf sha256: c735912a582f10e4769461586a02e5b98ef43c2895ec11923b8c4f157e7909e5 uri: huggingface://bartowski/Sky-T1-32B-Preview-GGUF/Sky-T1-32B-Preview-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "qwen2.5-72b-rp-ink" + icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/M9KSL64gppBVatmTdoQnG.png + urls: + - https://huggingface.co/allura-org/Qwen2.5-72b-RP-Ink + - https://huggingface.co/bartowski/Qwen2.5-72b-RP-Ink-GGUF + description: | + A roleplay-focused LoRA finetune of Qwen 2.5 72b Instruct. Methodology and hyperparams inspired by SorcererLM and Slush. + Yet another model in the Ink series, following in the footsteps of the 32b one and the Nemo one + overrides: + parameters: + model: Qwen2.5-72b-RP-Ink-Q4_K_M.gguf + files: + - filename: Qwen2.5-72b-RP-Ink-Q4_K_M.gguf + sha256: 2c2bf785dc5798403e0ccf6c4f5f9d7d53fcfb0c0b28855c584e09be88f91517 + uri: huggingface://bartowski/Qwen2.5-72b-RP-Ink-GGUF/Qwen2.5-72b-RP-Ink-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From b8d74e52b1e400a52a747a3a89ac3f6338c6ad4b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 14 Jan 2025 09:41:30 +0100 Subject: [PATCH 023/679] chore(model gallery): add steiner-32b-preview (#4602) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index a46d47d6..258994e9 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3122,6 +3122,22 @@ - filename: Qwen2.5-72b-RP-Ink-Q4_K_M.gguf sha256: 2c2bf785dc5798403e0ccf6c4f5f9d7d53fcfb0c0b28855c584e09be88f91517 uri: huggingface://bartowski/Qwen2.5-72b-RP-Ink-GGUF/Qwen2.5-72b-RP-Ink-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "steiner-32b-preview" + urls: + - https://huggingface.co/peakji/steiner-32b-preview + - https://huggingface.co/bartowski/steiner-32b-preview-GGUF + description: | + Steiner is a series of reasoning models trained on synthetic data using reinforcement learning. These models can explore multiple reasoning paths in an autoregressive manner during inference and autonomously verify or backtrack when necessary, enabling a linear traversal of the implicit search tree. + + Steiner is a personal interest project by Yichao 'Peak' Ji, inspired by OpenAI o1. The ultimate goal is to reproduce o1 and validate the inference-time scaling curves. The Steiner-preview model is currently a work-in-progress. The reason for open-sourcing it is that I’ve found automated evaluation methods, primarily based on multiple-choice questions, struggle to fully reflect the progress of reasoning models. In fact, the assumption that "the correct answer is always among the options" doesn’t align well with real-world reasoning scenarios, as it encourages models to perform substitution-based validation rather than open-ended exploration. For this reason, I’ve chosen to open-source these intermediate results and, when time permits, to build in public. This approach allows me to share knowledge while also gathering more evaluations and feedback from real human users. + overrides: + parameters: + model: steiner-32b-preview-Q4_K_M.gguf + files: + - filename: steiner-32b-preview-Q4_K_M.gguf + sha256: 1d7bf6d6dc8db8c81b3e71dc89756cd23417bb0a645b7dcdd1f9457781a88652 + uri: huggingface://bartowski/steiner-32b-preview-GGUF/steiner-32b-preview-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From d7dee3a5ecd7d3e60ba699ed6f12bc8d75213ffd Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 14 Jan 2025 11:13:16 +0100 Subject: [PATCH 024/679] feat(diffusers): add support for Sana pipelines (#4603) Signed-off-by: Ettore Di Giacinto --- backend/python/diffusers/backend.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py index f1b447b4..c9aa02bc 100755 --- a/backend/python/diffusers/backend.py +++ b/backend/python/diffusers/backend.py @@ -17,7 +17,7 @@ import backend_pb2_grpc import grpc -from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \ +from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \ EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline from diffusers.pipelines.stable_diffusion import safety_checker @@ -275,6 +275,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.LowVRAM: self.pipe.enable_model_cpu_offload() + elif request.PipelineType == "SanaPipeline": + self.pipe = SanaPipeline.from_pretrained( + request.Model, + variant="bf16", + torch_dtype=torch.bfloat16) + self.pipe.vae.to(torch.bfloat16) + self.pipe.text_encoder.to(torch.bfloat16) if CLIPSKIP and request.CLIPSkip != 0: self.clip_skip = request.CLIPSkip From f053f7bde224b0b64d6d6daf7a3ffa7e2036d6db Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 14 Jan 2025 23:16:33 +0100 Subject: [PATCH 025/679] chore: :arrow_up: Update ggerganov/llama.cpp to `b4d92a59a20eea400d8dd30844a339b76210daa0` (#4606) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fd05703e..4c01621d 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=504af20ee4eae72080a56d59d744f6774f7901ce +CPPLLAMA_VERSION?=b4d92a59a20eea400d8dd30844a339b76210daa0 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 792b866727454520c47bd04bc75975cd0caab876 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 15 Jan 2025 15:46:27 +0100 Subject: [PATCH 026/679] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index caf36bcf..ec4db188 100644 --- a/README.md +++ b/README.md @@ -92,19 +92,15 @@ local-ai run oci://localai/phi-2:latest ## šŸ“° Latest project news +- January 2025: SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603 - Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 ) - Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 ) - Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204 - Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples) - Aug 2024: šŸ†• FLUX-1, [P2P Explorer](https://explorer.localai.io) -- July 2024: šŸ”„šŸ”„ šŸ†• P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723 -- June 2024: šŸ†• You can browse now the model gallery without LocalAI! Check out https://models.localai.io -- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628 +- July 2024: šŸ”„šŸ”„ šŸ†• P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723. P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113 - May 2024: šŸ”„šŸ”„ Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) šŸ‘‰ Docs https://localai.io/features/distribute/ -- May 2024: šŸ”„šŸ”„ Openvoice: https://github.com/mudler/LocalAI/pull/2334 -- May 2024: šŸ†• Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328 - May 2024: šŸ”„šŸ”„ Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324 -- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222 - April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121 Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) @@ -113,12 +109,10 @@ Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3A - Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729 - Realtime API https://github.com/mudler/LocalAI/issues/3714 -- šŸ”„šŸ”„ Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113 - WebUI improvements: https://github.com/mudler/LocalAI/issues/2156 - Backends v2: https://github.com/mudler/LocalAI/issues/1126 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373 - Assistant API: https://github.com/mudler/LocalAI/issues/1273 -- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999 - Vulkan: https://github.com/mudler/LocalAI/issues/1647 - Anthropic API: https://github.com/mudler/LocalAI/issues/1808 From 5bba5edf451407e7c969940d4df4d9ce89c081b2 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 15 Jan 2025 15:46:45 +0100 Subject: [PATCH 027/679] chore(model gallery): add qwerus-7b (#4609) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 258994e9..5ef8d2ce 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3138,6 +3138,22 @@ - filename: steiner-32b-preview-Q4_K_M.gguf sha256: 1d7bf6d6dc8db8c81b3e71dc89756cd23417bb0a645b7dcdd1f9457781a88652 uri: huggingface://bartowski/steiner-32b-preview-GGUF/steiner-32b-preview-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "qwerus-7b" + urls: + - https://huggingface.co/mlabonne/Qwerus-7B + - https://huggingface.co/bartowski/Qwerus-7B-GGUF + description: | + Qwerus-7B is a merge of the following models using LazyMergekit: + PRIME-RL/Eurus-2-7B-PRIME + Qwen/Qwen2.5-7B-Instruct + overrides: + parameters: + model: Qwerus-7B-Q4_K_M.gguf + files: + - filename: Qwerus-7B-Q4_K_M.gguf + sha256: 3676629e8092a59f523393e6eb5072727f5213a9e03b7b81141f05a33743e20c + uri: huggingface://bartowski/Qwerus-7B-GGUF/Qwerus-7B-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From 482c6b8be4382d0b91af8bc576b9ca5bd35eff8f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 15 Jan 2025 15:51:50 +0100 Subject: [PATCH 028/679] chore(model gallery): add l3.3-ms-nevoria-70b (#4610) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 5ef8d2ce..bed32d34 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -401,6 +401,23 @@ - filename: Negative-Anubis-70B-v1-Q4_K_M.gguf sha256: ac088da9ca70fffaa70c876fbada9fc5a02e7d6049ef68f16b11a9c3256f2510 uri: huggingface://bartowski/Negative-Anubis-70B-v1-GGUF/Negative-Anubis-70B-v1-Q4_K_M.gguf +- !!merge <<: *llama33 + name: "l3.3-ms-nevoria-70b" + icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/dtlCF4LbekmDD2y3LNpdH.jpeg + urls: + - https://huggingface.co/Steelskull/L3.3-MS-Nevoria-70b + - https://huggingface.co/bartowski/L3.3-MS-Nevoria-70b-GGUF + description: | + This model was created as I liked the storytelling of EVA, the prose and details of scenes from EURYALE and Anubis, enhanced with Negative_LLAMA to kill off the positive bias with a touch of nemotron sprinkeled in. + + The choice to use the lorablated model as a base was intentional - while it might seem counterintuitive, this approach creates unique interactions between the weights, similar to what was achieved in the original Astoria model and Astoria V2 model . Rather than simply removing refusals, this "weight twisting" effect that occurs when subtracting the lorablated base model from the other models during the merge process creates an interesting balance in the final model's behavior. While this approach differs from traditional sequential application of components, it was chosen for its unique characteristics in the model's responses. + overrides: + parameters: + model: L3.3-MS-Nevoria-70b-Q4_K_M.gguf + files: + - filename: L3.3-MS-Nevoria-70b-Q4_K_M.gguf + sha256: e8b0763f263089a19d4b112b7ed5085cc5f1ed9ca49c5085baa8d51f4ded1f94 + uri: huggingface://bartowski/L3.3-MS-Nevoria-70b-GGUF/L3.3-MS-Nevoria-70b-Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" From 6d20497d45301d4ed7ecace3ecf81012cd0e5e4b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 15 Jan 2025 15:54:12 +0100 Subject: [PATCH 029/679] chore(model gallery): add lb-reranker-0.5b-v1.0 (#4611) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index bed32d34..40dc85a4 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3171,6 +3171,36 @@ - filename: Qwerus-7B-Q4_K_M.gguf sha256: 3676629e8092a59f523393e6eb5072727f5213a9e03b7b81141f05a33743e20c uri: huggingface://bartowski/Qwerus-7B-GGUF/Qwerus-7B-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "lb-reranker-0.5b-v1.0" + urls: + - https://huggingface.co/lightblue/lb-reranker-0.5B-v1.0 + - https://huggingface.co/bartowski/lb-reranker-0.5B-v1.0-GGUF + description: | + The LB Reranker has been trained to determine the relatedness of a given query to a piece of text, therefore allowing it to be used as a ranker or reranker in various retrieval-based tasks. + + This model is fine-tuned from a Qwen/Qwen2.5-0.5B-Instruct model checkpoint and was trained for roughly 5.5 hours using the 8 x L20 instance (ecs.gn8is-8x.32xlarge) on Alibaba Cloud. + + The training data for this model can be found at lightblue/reranker_continuous_filt_max7_train and the code for generating this data as well as running the training of the model can be found on our Github repo. + + Trained on data in over 95 languages, this model is applicable to a broad range of use cases. + + This model has three main benefits over comparable rerankers. + + It has shown slightly higher performance on evaluation benchmarks. + It has been trained on more languages than any previous model. + It is a simple Causal LM model trained to output a string between "1" and "7". + + This last point means that this model can be used natively with many widely available inference packages, including vLLM and LMDeploy. This in turns allows our reranker to benefit from improvements to inference as and when these packages release them. + + Update: We have also found that this model works pretty well as a code snippet reranker too (P@1 of 96%)! See our Colab for more details. + overrides: + parameters: + model: lb-reranker-0.5B-v1.0-Q4_K_M.gguf + files: + - filename: lb-reranker-0.5B-v1.0-Q4_K_M.gguf + sha256: 43568150de5136da15c996bbf4d1a78cc6580515c40f0ef9a8c90b0542228ab3 + uri: huggingface://bartowski/lb-reranker-0.5B-v1.0-GGUF/lb-reranker-0.5B-v1.0-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From 26c3deb6739f9f933c9825228f4878c7cdfa1f64 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 16 Jan 2025 01:08:52 +0100 Subject: [PATCH 030/679] chore: :arrow_up: Update ggerganov/llama.cpp to `adc5dd92e8aea98f5e7ac84f6e1bc15de35130b5` (#4612) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4c01621d..143b109b 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=b4d92a59a20eea400d8dd30844a339b76210daa0 +CPPLLAMA_VERSION?=adc5dd92e8aea98f5e7ac84f6e1bc15de35130b5 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 8131ddd87835362834432c3cd1b9500b072d83ed Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 16 Jan 2025 09:58:14 +0100 Subject: [PATCH 031/679] chore(model gallery): add uwu-7b-instruct (#4613) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 40dc85a4..7c4e86b4 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3201,6 +3201,20 @@ - filename: lb-reranker-0.5B-v1.0-Q4_K_M.gguf sha256: 43568150de5136da15c996bbf4d1a78cc6580515c40f0ef9a8c90b0542228ab3 uri: huggingface://bartowski/lb-reranker-0.5B-v1.0-GGUF/lb-reranker-0.5B-v1.0-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "uwu-7b-instruct" + urls: + - https://huggingface.co/qingy2024/UwU-7B-Instruct + - https://huggingface.co/bartowski/UwU-7B-Instruct-GGUF + description: | + Small QwQ, full-finetuned on FineQwQ-142K. Unlike my previous models, this one is a general-purpose reasoning machine! + overrides: + parameters: + model: UwU-7B-Instruct-Q4_K_M.gguf + files: + - filename: UwU-7B-Instruct-Q4_K_M.gguf + sha256: 279b2ba20d51bb155c8dd497cf49e0c28407b1822c75de88cfd83d13fd14a59f + uri: huggingface://bartowski/UwU-7B-Instruct-GGUF/UwU-7B-Instruct-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From 560ba6f25e19cabdb5defbeda2d57d14ed3700df Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 16 Jan 2025 10:04:44 +0100 Subject: [PATCH 032/679] chore(model gallery): add drt-o1-14b (#4614) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 7c4e86b4..647bc942 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3215,6 +3215,28 @@ - filename: UwU-7B-Instruct-Q4_K_M.gguf sha256: 279b2ba20d51bb155c8dd497cf49e0c28407b1822c75de88cfd83d13fd14a59f uri: huggingface://bartowski/UwU-7B-Instruct-GGUF/UwU-7B-Instruct-Q4_K_M.gguf + +- !!merge <<: *qwen25 + name: "drt-o1-14b" + urls: + - https://huggingface.co/Krystalan/DRT-o1-14B + - https://huggingface.co/bartowski/DRT-o1-14B-GGUF + description: | + This repository contains the resources for our paper "DRT-o1: Optimized Deep Reasoning Translation via Long Chain-of-Thought" + In this work, we introduce DRT-o1, an attempt to bring the success of long thought reasoning to neural machine translation (MT). To this end, + + 🌟 We mine English sentences with similes or metaphors from existing literature books, which are suitable for translation via long thought. + 🌟 We propose a designed multi-agent framework with three agents (i.e., a translator, an advisor and an evaluator) to synthesize the MT samples with long thought. There are 22,264 synthesized samples in total. + 🌟 We train DRT-o1-8B, DRT-o1-7B and DRT-o1-14B using Llama-3.1-8B-Instruct, Qwen2.5-7B-Instruct and Qwen2.5-14B-Instruct as backbones. + + Our goal is not to achieve competitive performance with OpenAI’s O1 in neural machine translation (MT). Instead, we explore technical routes to bring the success of long thought to MT. To this end, we introduce DRT-o1, a byproduct of our exploration, and we hope it could facilitate the corresponding research in this direction. + overrides: + parameters: + model: DRT-o1-14B-Q4_K_M.gguf + files: + - filename: DRT-o1-14B-Q4_K_M.gguf + sha256: 9619ca984cf4ce8e4f69bcde831de17b2ce05dd89536e3130608877521e3d328 + uri: huggingface://bartowski/DRT-o1-14B-GGUF/DRT-o1-14B-Q4_K_M.gguf - &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From de4aa9fb1d48abc45577a96f7a4a4541c96226d4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 16 Jan 2025 10:09:25 +0100 Subject: [PATCH 033/679] chore(model gallery): add vikhr-qwen-2.5-1.5b-instruct (#4615) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 647bc942..22d748d8 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3215,7 +3215,6 @@ - filename: UwU-7B-Instruct-Q4_K_M.gguf sha256: 279b2ba20d51bb155c8dd497cf49e0c28407b1822c75de88cfd83d13fd14a59f uri: huggingface://bartowski/UwU-7B-Instruct-GGUF/UwU-7B-Instruct-Q4_K_M.gguf - - !!merge <<: *qwen25 name: "drt-o1-14b" urls: @@ -3282,6 +3281,20 @@ - filename: smollm2-1.7b-instruct-q4_k_m.gguf sha256: decd2598bc2c8ed08c19adc3c8fdd461ee19ed5708679d1c54ef54a5a30d4f33 uri: huggingface://HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF/smollm2-1.7b-instruct-q4_k_m.gguf +- !!merge <<: qwen25 + name: "vikhr-qwen-2.5-1.5b-instruct" + urls: + - https://huggingface.co/Vikhrmodels/Vikhr-Qwen-2.5-1.5B-Instruct + - https://huggingface.co/QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF + description: | + Instructive model based on Qwen-2.5-1.5B-Instruct, trained on the Russian-language dataset GrandMaster-PRO-MAX. Designed for high-efficiency text processing in Russian and English, delivering precise responses and fast task execution. + overrides: + parameters: + model: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf + files: + - filename: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf + sha256: eaeac314e30b461413bc1cc819cdc0cd6a79265711fd0b8268702960a082c7bd + uri: huggingface://QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF/Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf - &llama31 ## LLama3.1 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" From acb2eb23c8376f853fc109f59e93b318f5fb08c1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 16 Jan 2025 22:23:09 +0100 Subject: [PATCH 034/679] feat(tts): Add Kokoro backend (#4616) * feat(kokoro): Add new TTS backend Signed-off-by: Ettore Di Giacinto * Add kokoro to images Signed-off-by: Ettore Di Giacinto * Support combined voices Signed-off-by: Ettore Di Giacinto * Ignore pt and onnx Signed-off-by: Ettore Di Giacinto * Add plbert and istfnet Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- Dockerfile | 9 +- Makefile | 13 +- backend/python/kokoro/Makefile | 20 + backend/python/kokoro/backend.py | 131 +++++ backend/python/kokoro/install.sh | 14 + backend/python/kokoro/istftnet.py | 524 ++++++++++++++++++ backend/python/kokoro/kokoro.py | 166 ++++++ backend/python/kokoro/models.py | 373 +++++++++++++ backend/python/kokoro/plbert.py | 16 + backend/python/kokoro/protogen.sh | 6 + backend/python/kokoro/requirements-cpu.txt | 2 + .../python/kokoro/requirements-cublas11.txt | 3 + .../python/kokoro/requirements-cublas12.txt | 2 + .../python/kokoro/requirements-hipblas.txt | 3 + backend/python/kokoro/requirements-intel.txt | 5 + backend/python/kokoro/requirements.txt | 7 + backend/python/kokoro/run.sh | 4 + backend/python/kokoro/test.sh | 6 + pkg/model/loader.go | 2 + 19 files changed, 1303 insertions(+), 3 deletions(-) create mode 100644 backend/python/kokoro/Makefile create mode 100755 backend/python/kokoro/backend.py create mode 100755 backend/python/kokoro/install.sh create mode 100644 backend/python/kokoro/istftnet.py create mode 100644 backend/python/kokoro/kokoro.py create mode 100644 backend/python/kokoro/models.py create mode 100644 backend/python/kokoro/plbert.py create mode 100644 backend/python/kokoro/protogen.sh create mode 100644 backend/python/kokoro/requirements-cpu.txt create mode 100644 backend/python/kokoro/requirements-cublas11.txt create mode 100644 backend/python/kokoro/requirements-cublas12.txt create mode 100644 backend/python/kokoro/requirements-hipblas.txt create mode 100644 backend/python/kokoro/requirements-intel.txt create mode 100644 backend/python/kokoro/requirements.txt create mode 100755 backend/python/kokoro/run.sh create mode 100755 backend/python/kokoro/test.sh diff --git a/Dockerfile b/Dockerfile index 42c1c1fc..481edf90 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ ARG TARGETARCH ARG TARGETVARIANT ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" +ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" RUN apt-get update && \ @@ -436,6 +436,10 @@ SHELL ["/bin/bash", "-c"] # Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer # Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer +RUN if [[ ( "${IMAGE_TYPE}" == "extras ")]]; then \ + apt-get -qq -y install espeak-ng \ + ; fi + RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ make -C backend/python/coqui \ ; fi && \ @@ -452,6 +456,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ make -C backend/python/vall-e-x \ ; fi && \ + if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ + make -C backend/python/kokoro \ + ; fi && \ if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ make -C backend/python/openvoice \ ; fi && \ diff --git a/Makefile b/Makefile index 143b109b..49c81950 100644 --- a/Makefile +++ b/Makefile @@ -583,10 +583,10 @@ protogen-go-clean: $(RM) bin/* .PHONY: protogen-python -protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen +protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen kokoro-protogen vllm-protogen openvoice-protogen .PHONY: protogen-python-clean -protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean +protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean .PHONY: autogptq-protogen autogptq-protogen: @@ -684,6 +684,14 @@ vall-e-x-protogen: vall-e-x-protogen-clean: $(MAKE) -C backend/python/vall-e-x protogen-clean +.PHONY: kokoro-protogen +kokoro-protogen: + $(MAKE) -C backend/python/kokoro protogen + +.PHONY: kokoro-protogen-clean +kokoro-protogen-clean: + $(MAKE) -C backend/python/kokoro protogen-clean + .PHONY: openvoice-protogen openvoice-protogen: $(MAKE) -C backend/python/openvoice protogen @@ -715,6 +723,7 @@ prepare-extra-conda-environments: protogen-python $(MAKE) -C backend/python/transformers-musicgen $(MAKE) -C backend/python/parler-tts $(MAKE) -C backend/python/vall-e-x + $(MAKE) -C backend/python/kokoro $(MAKE) -C backend/python/openvoice $(MAKE) -C backend/python/exllama2 diff --git a/backend/python/kokoro/Makefile b/backend/python/kokoro/Makefile new file mode 100644 index 00000000..c0e5169f --- /dev/null +++ b/backend/python/kokoro/Makefile @@ -0,0 +1,20 @@ +.DEFAULT_GOAL := install + +.PHONY: install +install: + bash install.sh + $(MAKE) protogen + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + bash protogen.sh + +.PHONY: clean +clean: protogen-clean + rm -rf venv __pycache__ \ No newline at end of file diff --git a/backend/python/kokoro/backend.py b/backend/python/kokoro/backend.py new file mode 100755 index 00000000..1fd1feb9 --- /dev/null +++ b/backend/python/kokoro/backend.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +""" +Extra gRPC server for Kokoro models. +""" +from concurrent import futures + +import argparse +import signal +import sys +import os +import time +import backend_pb2 +import backend_pb2_grpc +import soundfile as sf +import grpc + +from models import build_model +from kokoro import generate +import torch + +SAMPLE_RATE = 22050 +_ONE_DAY_IN_SECONDS = 60 * 60 * 24 + +# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 +MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) + +# Implement the BackendServicer class with the service methods +class BackendServicer(backend_pb2_grpc.BackendServicer): + """ + A gRPC servicer for the backend service. + + This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding. + """ + def Health(self, request, context): + """ + A gRPC method that returns the health status of the backend service. + + Args: + request: A HealthRequest object that contains the request parameters. + context: A grpc.ServicerContext object that provides information about the RPC. + + Returns: + A Reply object that contains the health status of the backend service. + """ + return backend_pb2.Reply(message=bytes("OK", 'utf-8')) + + def LoadModel(self, request, context): + """ + A gRPC method that loads a model into memory. + + Args: + request: A LoadModelRequest object that contains the request parameters. + context: A grpc.ServicerContext object that provides information about the RPC. + + Returns: + A Result object that contains the result of the LoadModel operation. + """ + model_name = request.Model + try: + device = "cuda:0" if torch.cuda.is_available() else "cpu" + self.MODEL = build_model(request.ModelFile, device) + options = request.Options + # Find the voice from the options, options are a list of strings in this form optname:optvalue: + VOICE_NAME = None + for opt in options: + if opt.startswith("voice:"): + VOICE_NAME = opt.split(":")[1] + break + if VOICE_NAME is None: + return backend_pb2.Result(success=False, message=f"No voice specified in options") + MODELPATH = request.ModelPath + # If voice name contains a plus, split it and load the two models and combine them + if "+" in VOICE_NAME: + voice1, voice2 = VOICE_NAME.split("+") + voice1 = torch.load(f'{MODELPATH}/{voice1}.pt', weights_only=True).to(device) + voice2 = torch.load(f'{MODELPATH}/{voice2}.pt', weights_only=True).to(device) + self.VOICEPACK = torch.mean(torch.stack([voice1, voice2]), dim=0) + else: + self.VOICEPACK = torch.load(f'{MODELPATH}/{VOICE_NAME}.pt', weights_only=True).to(device) + + self.VOICE_NAME = VOICE_NAME + + print(f'Loaded voice: {VOICE_NAME}') + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + + return backend_pb2.Result(message="Model loaded successfully", success=True) + + def TTS(self, request, context): + model_name = request.model + if model_name == "": + return backend_pb2.Result(success=False, message="request.model is required") + try: + audio, out_ps = generate(self.MODEL, request.text, self.VOICEPACK, lang=self.VOICE_NAME) + print(out_ps) + sf.write(request.dst, audio, SAMPLE_RATE) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(success=True) + +def serve(address): + server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) + backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) + server.add_insecure_port(address) + server.start() + print("[Kokoro] Server started. Listening on: " + address, file=sys.stderr) + + # Define the signal handler function + def signal_handler(sig, frame): + print("[Kokoro] Received termination signal. Shutting down...") + server.stop(0) + sys.exit(0) + + # Set the signal handlers for SIGINT and SIGTERM + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + try: + while True: + time.sleep(_ONE_DAY_IN_SECONDS) + except KeyboardInterrupt: + server.stop(0) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run the gRPC server.") + parser.add_argument( + "--addr", default="localhost:50051", help="The address to bind the server to." + ) + args = parser.parse_args() + print(f"[Kokoro] startup: {args}", file=sys.stderr) + serve(args.addr) diff --git a/backend/python/kokoro/install.sh b/backend/python/kokoro/install.sh new file mode 100755 index 00000000..36443ef1 --- /dev/null +++ b/backend/python/kokoro/install.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -e + +source $(dirname $0)/../common/libbackend.sh + +# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links. +# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match. +# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index +# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index +if [ "x${BUILD_PROFILE}" == "xintel" ]; then + EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" +fi + +installRequirements diff --git a/backend/python/kokoro/istftnet.py b/backend/python/kokoro/istftnet.py new file mode 100644 index 00000000..818fb912 --- /dev/null +++ b/backend/python/kokoro/istftnet.py @@ -0,0 +1,524 @@ +# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/istftnet.py +# https://github.com/yl4579/StyleTTS2/blob/main/Modules/istftnet.py +from scipy.signal import get_window +from torch.nn import Conv1d, ConvTranspose1d +from torch.nn.utils import weight_norm, remove_weight_norm +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +# https://github.com/yl4579/StyleTTS2/blob/main/Modules/utils.py +def init_weights(m, mean=0.0, std=0.01): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + m.weight.data.normal_(mean, std) + +def get_padding(kernel_size, dilation=1): + return int((kernel_size*dilation - dilation)/2) + +LRELU_SLOPE = 0.1 + +class AdaIN1d(nn.Module): + def __init__(self, style_dim, num_features): + super().__init__() + self.norm = nn.InstanceNorm1d(num_features, affine=False) + self.fc = nn.Linear(style_dim, num_features*2) + + def forward(self, x, s): + h = self.fc(s) + h = h.view(h.size(0), h.size(1), 1) + gamma, beta = torch.chunk(h, chunks=2, dim=1) + return (1 + gamma) * self.norm(x) + beta + +class AdaINResBlock1(torch.nn.Module): + def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5), style_dim=64): + super(AdaINResBlock1, self).__init__() + self.convs1 = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2], + padding=get_padding(kernel_size, dilation[2]))) + ]) + self.convs1.apply(init_weights) + + self.convs2 = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))) + ]) + self.convs2.apply(init_weights) + + self.adain1 = nn.ModuleList([ + AdaIN1d(style_dim, channels), + AdaIN1d(style_dim, channels), + AdaIN1d(style_dim, channels), + ]) + + self.adain2 = nn.ModuleList([ + AdaIN1d(style_dim, channels), + AdaIN1d(style_dim, channels), + AdaIN1d(style_dim, channels), + ]) + + self.alpha1 = nn.ParameterList([nn.Parameter(torch.ones(1, channels, 1)) for i in range(len(self.convs1))]) + self.alpha2 = nn.ParameterList([nn.Parameter(torch.ones(1, channels, 1)) for i in range(len(self.convs2))]) + + + def forward(self, x, s): + for c1, c2, n1, n2, a1, a2 in zip(self.convs1, self.convs2, self.adain1, self.adain2, self.alpha1, self.alpha2): + xt = n1(x, s) + xt = xt + (1 / a1) * (torch.sin(a1 * xt) ** 2) # Snake1D + xt = c1(xt) + xt = n2(xt, s) + xt = xt + (1 / a2) * (torch.sin(a2 * xt) ** 2) # Snake1D + xt = c2(xt) + x = xt + x + return x + + def remove_weight_norm(self): + for l in self.convs1: + remove_weight_norm(l) + for l in self.convs2: + remove_weight_norm(l) + +class TorchSTFT(torch.nn.Module): + def __init__(self, filter_length=800, hop_length=200, win_length=800, window='hann'): + super().__init__() + self.filter_length = filter_length + self.hop_length = hop_length + self.win_length = win_length + self.window = torch.from_numpy(get_window(window, win_length, fftbins=True).astype(np.float32)) + + def transform(self, input_data): + forward_transform = torch.stft( + input_data, + self.filter_length, self.hop_length, self.win_length, window=self.window.to(input_data.device), + return_complex=True) + + return torch.abs(forward_transform), torch.angle(forward_transform) + + def inverse(self, magnitude, phase): + inverse_transform = torch.istft( + magnitude * torch.exp(phase * 1j), + self.filter_length, self.hop_length, self.win_length, window=self.window.to(magnitude.device)) + + return inverse_transform.unsqueeze(-2) # unsqueeze to stay consistent with conv_transpose1d implementation + + def forward(self, input_data): + self.magnitude, self.phase = self.transform(input_data) + reconstruction = self.inverse(self.magnitude, self.phase) + return reconstruction + +class SineGen(torch.nn.Module): + """ Definition of sine generator + SineGen(samp_rate, harmonic_num = 0, + sine_amp = 0.1, noise_std = 0.003, + voiced_threshold = 0, + flag_for_pulse=False) + samp_rate: sampling rate in Hz + harmonic_num: number of harmonic overtones (default 0) + sine_amp: amplitude of sine-wavefrom (default 0.1) + noise_std: std of Gaussian noise (default 0.003) + voiced_thoreshold: F0 threshold for U/V classification (default 0) + flag_for_pulse: this SinGen is used inside PulseGen (default False) + Note: when flag_for_pulse is True, the first time step of a voiced + segment is always sin(np.pi) or cos(0) + """ + + def __init__(self, samp_rate, upsample_scale, harmonic_num=0, + sine_amp=0.1, noise_std=0.003, + voiced_threshold=0, + flag_for_pulse=False): + super(SineGen, self).__init__() + self.sine_amp = sine_amp + self.noise_std = noise_std + self.harmonic_num = harmonic_num + self.dim = self.harmonic_num + 1 + self.sampling_rate = samp_rate + self.voiced_threshold = voiced_threshold + self.flag_for_pulse = flag_for_pulse + self.upsample_scale = upsample_scale + + def _f02uv(self, f0): + # generate uv signal + uv = (f0 > self.voiced_threshold).type(torch.float32) + return uv + + def _f02sine(self, f0_values): + """ f0_values: (batchsize, length, dim) + where dim indicates fundamental tone and overtones + """ + # convert to F0 in rad. The interger part n can be ignored + # because 2 * np.pi * n doesn't affect phase + rad_values = (f0_values / self.sampling_rate) % 1 + + # initial phase noise (no noise for fundamental component) + rand_ini = torch.rand(f0_values.shape[0], f0_values.shape[2], \ + device=f0_values.device) + rand_ini[:, 0] = 0 + rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini + + # instantanouse phase sine[t] = sin(2*pi \sum_i=1 ^{t} rad) + if not self.flag_for_pulse: +# # for normal case + +# # To prevent torch.cumsum numerical overflow, +# # it is necessary to add -1 whenever \sum_k=1^n rad_value_k > 1. +# # Buffer tmp_over_one_idx indicates the time step to add -1. +# # This will not change F0 of sine because (x-1) * 2*pi = x * 2*pi +# tmp_over_one = torch.cumsum(rad_values, 1) % 1 +# tmp_over_one_idx = (padDiff(tmp_over_one)) < 0 +# cumsum_shift = torch.zeros_like(rad_values) +# cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0 + +# phase = torch.cumsum(rad_values, dim=1) * 2 * np.pi + rad_values = torch.nn.functional.interpolate(rad_values.transpose(1, 2), + scale_factor=1/self.upsample_scale, + mode="linear").transpose(1, 2) + +# tmp_over_one = torch.cumsum(rad_values, 1) % 1 +# tmp_over_one_idx = (padDiff(tmp_over_one)) < 0 +# cumsum_shift = torch.zeros_like(rad_values) +# cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0 + + phase = torch.cumsum(rad_values, dim=1) * 2 * np.pi + phase = torch.nn.functional.interpolate(phase.transpose(1, 2) * self.upsample_scale, + scale_factor=self.upsample_scale, mode="linear").transpose(1, 2) + sines = torch.sin(phase) + + else: + # If necessary, make sure that the first time step of every + # voiced segments is sin(pi) or cos(0) + # This is used for pulse-train generation + + # identify the last time step in unvoiced segments + uv = self._f02uv(f0_values) + uv_1 = torch.roll(uv, shifts=-1, dims=1) + uv_1[:, -1, :] = 1 + u_loc = (uv < 1) * (uv_1 > 0) + + # get the instantanouse phase + tmp_cumsum = torch.cumsum(rad_values, dim=1) + # different batch needs to be processed differently + for idx in range(f0_values.shape[0]): + temp_sum = tmp_cumsum[idx, u_loc[idx, :, 0], :] + temp_sum[1:, :] = temp_sum[1:, :] - temp_sum[0:-1, :] + # stores the accumulation of i.phase within + # each voiced segments + tmp_cumsum[idx, :, :] = 0 + tmp_cumsum[idx, u_loc[idx, :, 0], :] = temp_sum + + # rad_values - tmp_cumsum: remove the accumulation of i.phase + # within the previous voiced segment. + i_phase = torch.cumsum(rad_values - tmp_cumsum, dim=1) + + # get the sines + sines = torch.cos(i_phase * 2 * np.pi) + return sines + + def forward(self, f0): + """ sine_tensor, uv = forward(f0) + input F0: tensor(batchsize=1, length, dim=1) + f0 for unvoiced steps should be 0 + output sine_tensor: tensor(batchsize=1, length, dim) + output uv: tensor(batchsize=1, length, 1) + """ + f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim, + device=f0.device) + # fundamental component + fn = torch.multiply(f0, torch.FloatTensor([[range(1, self.harmonic_num + 2)]]).to(f0.device)) + + # generate sine waveforms + sine_waves = self._f02sine(fn) * self.sine_amp + + # generate uv signal + # uv = torch.ones(f0.shape) + # uv = uv * (f0 > self.voiced_threshold) + uv = self._f02uv(f0) + + # noise: for unvoiced should be similar to sine_amp + # std = self.sine_amp/3 -> max value ~ self.sine_amp + # . for voiced regions is self.noise_std + noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3 + noise = noise_amp * torch.randn_like(sine_waves) + + # first: set the unvoiced part to 0 by uv + # then: additive noise + sine_waves = sine_waves * uv + noise + return sine_waves, uv, noise + + +class SourceModuleHnNSF(torch.nn.Module): + """ SourceModule for hn-nsf + SourceModule(sampling_rate, harmonic_num=0, sine_amp=0.1, + add_noise_std=0.003, voiced_threshod=0) + sampling_rate: sampling_rate in Hz + harmonic_num: number of harmonic above F0 (default: 0) + sine_amp: amplitude of sine source signal (default: 0.1) + add_noise_std: std of additive Gaussian noise (default: 0.003) + note that amplitude of noise in unvoiced is decided + by sine_amp + voiced_threshold: threhold to set U/V given F0 (default: 0) + Sine_source, noise_source = SourceModuleHnNSF(F0_sampled) + F0_sampled (batchsize, length, 1) + Sine_source (batchsize, length, 1) + noise_source (batchsize, length 1) + uv (batchsize, length, 1) + """ + + def __init__(self, sampling_rate, upsample_scale, harmonic_num=0, sine_amp=0.1, + add_noise_std=0.003, voiced_threshod=0): + super(SourceModuleHnNSF, self).__init__() + + self.sine_amp = sine_amp + self.noise_std = add_noise_std + + # to produce sine waveforms + self.l_sin_gen = SineGen(sampling_rate, upsample_scale, harmonic_num, + sine_amp, add_noise_std, voiced_threshod) + + # to merge source harmonics into a single excitation + self.l_linear = torch.nn.Linear(harmonic_num + 1, 1) + self.l_tanh = torch.nn.Tanh() + + def forward(self, x): + """ + Sine_source, noise_source = SourceModuleHnNSF(F0_sampled) + F0_sampled (batchsize, length, 1) + Sine_source (batchsize, length, 1) + noise_source (batchsize, length 1) + """ + # source for harmonic branch + with torch.no_grad(): + sine_wavs, uv, _ = self.l_sin_gen(x) + sine_merge = self.l_tanh(self.l_linear(sine_wavs)) + + # source for noise branch, in the same shape as uv + noise = torch.randn_like(uv) * self.sine_amp / 3 + return sine_merge, noise, uv +def padDiff(x): + return F.pad(F.pad(x, (0,0,-1,1), 'constant', 0) - x, (0,0,0,-1), 'constant', 0) + + +class Generator(torch.nn.Module): + def __init__(self, style_dim, resblock_kernel_sizes, upsample_rates, upsample_initial_channel, resblock_dilation_sizes, upsample_kernel_sizes, gen_istft_n_fft, gen_istft_hop_size): + super(Generator, self).__init__() + + self.num_kernels = len(resblock_kernel_sizes) + self.num_upsamples = len(upsample_rates) + resblock = AdaINResBlock1 + + self.m_source = SourceModuleHnNSF( + sampling_rate=24000, + upsample_scale=np.prod(upsample_rates) * gen_istft_hop_size, + harmonic_num=8, voiced_threshod=10) + self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(upsample_rates) * gen_istft_hop_size) + self.noise_convs = nn.ModuleList() + self.noise_res = nn.ModuleList() + + self.ups = nn.ModuleList() + for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): + self.ups.append(weight_norm( + ConvTranspose1d(upsample_initial_channel//(2**i), upsample_initial_channel//(2**(i+1)), + k, u, padding=(k-u)//2))) + + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = upsample_initial_channel//(2**(i+1)) + for j, (k, d) in enumerate(zip(resblock_kernel_sizes,resblock_dilation_sizes)): + self.resblocks.append(resblock(ch, k, d, style_dim)) + + c_cur = upsample_initial_channel // (2 ** (i + 1)) + + if i + 1 < len(upsample_rates): # + stride_f0 = np.prod(upsample_rates[i + 1:]) + self.noise_convs.append(Conv1d( + gen_istft_n_fft + 2, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=(stride_f0+1) // 2)) + self.noise_res.append(resblock(c_cur, 7, [1,3,5], style_dim)) + else: + self.noise_convs.append(Conv1d(gen_istft_n_fft + 2, c_cur, kernel_size=1)) + self.noise_res.append(resblock(c_cur, 11, [1,3,5], style_dim)) + + + self.post_n_fft = gen_istft_n_fft + self.conv_post = weight_norm(Conv1d(ch, self.post_n_fft + 2, 7, 1, padding=3)) + self.ups.apply(init_weights) + self.conv_post.apply(init_weights) + self.reflection_pad = torch.nn.ReflectionPad1d((1, 0)) + self.stft = TorchSTFT(filter_length=gen_istft_n_fft, hop_length=gen_istft_hop_size, win_length=gen_istft_n_fft) + + + def forward(self, x, s, f0): + with torch.no_grad(): + f0 = self.f0_upsamp(f0[:, None]).transpose(1, 2) # bs,n,t + + har_source, noi_source, uv = self.m_source(f0) + har_source = har_source.transpose(1, 2).squeeze(1) + har_spec, har_phase = self.stft.transform(har_source) + har = torch.cat([har_spec, har_phase], dim=1) + + for i in range(self.num_upsamples): + x = F.leaky_relu(x, LRELU_SLOPE) + x_source = self.noise_convs[i](har) + x_source = self.noise_res[i](x_source, s) + + x = self.ups[i](x) + if i == self.num_upsamples - 1: + x = self.reflection_pad(x) + + x = x + x_source + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i*self.num_kernels+j](x, s) + else: + xs += self.resblocks[i*self.num_kernels+j](x, s) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.conv_post(x) + spec = torch.exp(x[:,:self.post_n_fft // 2 + 1, :]) + phase = torch.sin(x[:, self.post_n_fft // 2 + 1:, :]) + return self.stft.inverse(spec, phase) + + def fw_phase(self, x, s): + for i in range(self.num_upsamples): + x = F.leaky_relu(x, LRELU_SLOPE) + x = self.ups[i](x) + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i*self.num_kernels+j](x, s) + else: + xs += self.resblocks[i*self.num_kernels+j](x, s) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.reflection_pad(x) + x = self.conv_post(x) + spec = torch.exp(x[:,:self.post_n_fft // 2 + 1, :]) + phase = torch.sin(x[:, self.post_n_fft // 2 + 1:, :]) + return spec, phase + + def remove_weight_norm(self): + print('Removing weight norm...') + for l in self.ups: + remove_weight_norm(l) + for l in self.resblocks: + l.remove_weight_norm() + remove_weight_norm(self.conv_pre) + remove_weight_norm(self.conv_post) + + +class AdainResBlk1d(nn.Module): + def __init__(self, dim_in, dim_out, style_dim=64, actv=nn.LeakyReLU(0.2), + upsample='none', dropout_p=0.0): + super().__init__() + self.actv = actv + self.upsample_type = upsample + self.upsample = UpSample1d(upsample) + self.learned_sc = dim_in != dim_out + self._build_weights(dim_in, dim_out, style_dim) + self.dropout = nn.Dropout(dropout_p) + + if upsample == 'none': + self.pool = nn.Identity() + else: + self.pool = weight_norm(nn.ConvTranspose1d(dim_in, dim_in, kernel_size=3, stride=2, groups=dim_in, padding=1, output_padding=1)) + + + def _build_weights(self, dim_in, dim_out, style_dim): + self.conv1 = weight_norm(nn.Conv1d(dim_in, dim_out, 3, 1, 1)) + self.conv2 = weight_norm(nn.Conv1d(dim_out, dim_out, 3, 1, 1)) + self.norm1 = AdaIN1d(style_dim, dim_in) + self.norm2 = AdaIN1d(style_dim, dim_out) + if self.learned_sc: + self.conv1x1 = weight_norm(nn.Conv1d(dim_in, dim_out, 1, 1, 0, bias=False)) + + def _shortcut(self, x): + x = self.upsample(x) + if self.learned_sc: + x = self.conv1x1(x) + return x + + def _residual(self, x, s): + x = self.norm1(x, s) + x = self.actv(x) + x = self.pool(x) + x = self.conv1(self.dropout(x)) + x = self.norm2(x, s) + x = self.actv(x) + x = self.conv2(self.dropout(x)) + return x + + def forward(self, x, s): + out = self._residual(x, s) + out = (out + self._shortcut(x)) / np.sqrt(2) + return out + +class UpSample1d(nn.Module): + def __init__(self, layer_type): + super().__init__() + self.layer_type = layer_type + + def forward(self, x): + if self.layer_type == 'none': + return x + else: + return F.interpolate(x, scale_factor=2, mode='nearest') + +class Decoder(nn.Module): + def __init__(self, dim_in=512, F0_channel=512, style_dim=64, dim_out=80, + resblock_kernel_sizes = [3,7,11], + upsample_rates = [10, 6], + upsample_initial_channel=512, + resblock_dilation_sizes=[[1,3,5], [1,3,5], [1,3,5]], + upsample_kernel_sizes=[20, 12], + gen_istft_n_fft=20, gen_istft_hop_size=5): + super().__init__() + + self.decode = nn.ModuleList() + + self.encode = AdainResBlk1d(dim_in + 2, 1024, style_dim) + + self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim)) + self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim)) + self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim)) + self.decode.append(AdainResBlk1d(1024 + 2 + 64, 512, style_dim, upsample=True)) + + self.F0_conv = weight_norm(nn.Conv1d(1, 1, kernel_size=3, stride=2, groups=1, padding=1)) + + self.N_conv = weight_norm(nn.Conv1d(1, 1, kernel_size=3, stride=2, groups=1, padding=1)) + + self.asr_res = nn.Sequential( + weight_norm(nn.Conv1d(512, 64, kernel_size=1)), + ) + + + self.generator = Generator(style_dim, resblock_kernel_sizes, upsample_rates, + upsample_initial_channel, resblock_dilation_sizes, + upsample_kernel_sizes, gen_istft_n_fft, gen_istft_hop_size) + + def forward(self, asr, F0_curve, N, s): + F0 = self.F0_conv(F0_curve.unsqueeze(1)) + N = self.N_conv(N.unsqueeze(1)) + + x = torch.cat([asr, F0, N], axis=1) + x = self.encode(x, s) + + asr_res = self.asr_res(asr) + + res = True + for block in self.decode: + if res: + x = torch.cat([x, asr_res, F0, N], axis=1) + x = block(x, s) + if block.upsample_type != "none": + res = False + + x = self.generator(x, s, F0_curve) + return x diff --git a/backend/python/kokoro/kokoro.py b/backend/python/kokoro/kokoro.py new file mode 100644 index 00000000..3a0df7f5 --- /dev/null +++ b/backend/python/kokoro/kokoro.py @@ -0,0 +1,166 @@ +# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/kokoro.py +import phonemizer +import re +import torch +import numpy as np + +def split_num(num): + num = num.group() + if '.' in num: + return num + elif ':' in num: + h, m = [int(n) for n in num.split(':')] + if m == 0: + return f"{h} o'clock" + elif m < 10: + return f'{h} oh {m}' + return f'{h} {m}' + year = int(num[:4]) + if year < 1100 or year % 1000 < 10: + return num + left, right = num[:2], int(num[2:4]) + s = 's' if num.endswith('s') else '' + if 100 <= year % 1000 <= 999: + if right == 0: + return f'{left} hundred{s}' + elif right < 10: + return f'{left} oh {right}{s}' + return f'{left} {right}{s}' + +def flip_money(m): + m = m.group() + bill = 'dollar' if m[0] == '$' else 'pound' + if m[-1].isalpha(): + return f'{m[1:]} {bill}s' + elif '.' not in m: + s = '' if m[1:] == '1' else 's' + return f'{m[1:]} {bill}{s}' + b, c = m[1:].split('.') + s = '' if b == '1' else 's' + c = int(c.ljust(2, '0')) + coins = f"cent{'' if c == 1 else 's'}" if m[0] == '$' else ('penny' if c == 1 else 'pence') + return f'{b} {bill}{s} and {c} {coins}' + +def point_num(num): + a, b = num.group().split('.') + return ' point '.join([a, ' '.join(b)]) + +def normalize_text(text): + text = text.replace(chr(8216), "'").replace(chr(8217), "'") + text = text.replace('Ā«', chr(8220)).replace('Ā»', chr(8221)) + text = text.replace(chr(8220), '"').replace(chr(8221), '"') + text = text.replace('(', 'Ā«').replace(')', 'Ā»') + for a, b in zip('ć€ć€‚ļ¼ļ¼Œļ¼šļ¼›ļ¼Ÿ', ',.!,:;?'): + text = text.replace(a, b+' ') + text = re.sub(r'[^\S \n]', ' ', text) + text = re.sub(r' +', ' ', text) + text = re.sub(r'(?<=\n) +(?=\n)', '', text) + text = re.sub(r'\bD[Rr]\.(?= [A-Z])', 'Doctor', text) + text = re.sub(r'\b(?:Mr\.|MR\.(?= [A-Z]))', 'Mister', text) + text = re.sub(r'\b(?:Ms\.|MS\.(?= [A-Z]))', 'Miss', text) + text = re.sub(r'\b(?:Mrs\.|MRS\.(?= [A-Z]))', 'Mrs', text) + text = re.sub(r'\betc\.(?! [A-Z])', 'etc', text) + text = re.sub(r'(?i)\b(y)eah?\b', r"\1e'a", text) + text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(? 510: + tokens = tokens[:510] + print('Truncated to 510 tokens') + ref_s = voicepack[len(tokens)] + out = forward(model, tokens, ref_s, speed) + ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens) + return out, ps + +def generate_full(model, text, voicepack, lang='a', speed=1, ps=None): + ps = ps or phonemize(text, lang) + tokens = tokenize(ps) + if not tokens: + return None + outs = [] + loop_count = len(tokens)//510 + (1 if len(tokens) % 510 != 0 else 0) + for i in range(loop_count): + ref_s = voicepack[len(tokens[i*510:(i+1)*510])] + out = forward(model, tokens[i*510:(i+1)*510], ref_s, speed) + outs.append(out) + outs = np.concatenate(outs) + ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens) + return outs, ps \ No newline at end of file diff --git a/backend/python/kokoro/models.py b/backend/python/kokoro/models.py new file mode 100644 index 00000000..cf358d9e --- /dev/null +++ b/backend/python/kokoro/models.py @@ -0,0 +1,373 @@ +# https://github.com/yl4579/StyleTTS2/blob/main/models.py +# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/models.py +from istftnet import AdaIN1d, Decoder +from munch import Munch +from pathlib import Path +from plbert import load_plbert +from torch.nn.utils import weight_norm, spectral_norm +import json +import numpy as np +import os +import os.path as osp +import torch +import torch.nn as nn +import torch.nn.functional as F + +class LinearNorm(torch.nn.Module): + def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'): + super(LinearNorm, self).__init__() + self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias) + + torch.nn.init.xavier_uniform_( + self.linear_layer.weight, + gain=torch.nn.init.calculate_gain(w_init_gain)) + + def forward(self, x): + return self.linear_layer(x) + +class LayerNorm(nn.Module): + def __init__(self, channels, eps=1e-5): + super().__init__() + self.channels = channels + self.eps = eps + + self.gamma = nn.Parameter(torch.ones(channels)) + self.beta = nn.Parameter(torch.zeros(channels)) + + def forward(self, x): + x = x.transpose(1, -1) + x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps) + return x.transpose(1, -1) + +class TextEncoder(nn.Module): + def __init__(self, channels, kernel_size, depth, n_symbols, actv=nn.LeakyReLU(0.2)): + super().__init__() + self.embedding = nn.Embedding(n_symbols, channels) + + padding = (kernel_size - 1) // 2 + self.cnn = nn.ModuleList() + for _ in range(depth): + self.cnn.append(nn.Sequential( + weight_norm(nn.Conv1d(channels, channels, kernel_size=kernel_size, padding=padding)), + LayerNorm(channels), + actv, + nn.Dropout(0.2), + )) + # self.cnn = nn.Sequential(*self.cnn) + + self.lstm = nn.LSTM(channels, channels//2, 1, batch_first=True, bidirectional=True) + + def forward(self, x, input_lengths, m): + x = self.embedding(x) # [B, T, emb] + x = x.transpose(1, 2) # [B, emb, T] + m = m.to(input_lengths.device).unsqueeze(1) + x.masked_fill_(m, 0.0) + + for c in self.cnn: + x = c(x) + x.masked_fill_(m, 0.0) + + x = x.transpose(1, 2) # [B, T, chn] + + input_lengths = input_lengths.cpu().numpy() + x = nn.utils.rnn.pack_padded_sequence( + x, input_lengths, batch_first=True, enforce_sorted=False) + + self.lstm.flatten_parameters() + x, _ = self.lstm(x) + x, _ = nn.utils.rnn.pad_packed_sequence( + x, batch_first=True) + + x = x.transpose(-1, -2) + x_pad = torch.zeros([x.shape[0], x.shape[1], m.shape[-1]]) + + x_pad[:, :, :x.shape[-1]] = x + x = x_pad.to(x.device) + + x.masked_fill_(m, 0.0) + + return x + + def inference(self, x): + x = self.embedding(x) + x = x.transpose(1, 2) + x = self.cnn(x) + x = x.transpose(1, 2) + self.lstm.flatten_parameters() + x, _ = self.lstm(x) + return x + + def length_to_mask(self, lengths): + mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths) + mask = torch.gt(mask+1, lengths.unsqueeze(1)) + return mask + + +class UpSample1d(nn.Module): + def __init__(self, layer_type): + super().__init__() + self.layer_type = layer_type + + def forward(self, x): + if self.layer_type == 'none': + return x + else: + return F.interpolate(x, scale_factor=2, mode='nearest') + +class AdainResBlk1d(nn.Module): + def __init__(self, dim_in, dim_out, style_dim=64, actv=nn.LeakyReLU(0.2), + upsample='none', dropout_p=0.0): + super().__init__() + self.actv = actv + self.upsample_type = upsample + self.upsample = UpSample1d(upsample) + self.learned_sc = dim_in != dim_out + self._build_weights(dim_in, dim_out, style_dim) + self.dropout = nn.Dropout(dropout_p) + + if upsample == 'none': + self.pool = nn.Identity() + else: + self.pool = weight_norm(nn.ConvTranspose1d(dim_in, dim_in, kernel_size=3, stride=2, groups=dim_in, padding=1, output_padding=1)) + + + def _build_weights(self, dim_in, dim_out, style_dim): + self.conv1 = weight_norm(nn.Conv1d(dim_in, dim_out, 3, 1, 1)) + self.conv2 = weight_norm(nn.Conv1d(dim_out, dim_out, 3, 1, 1)) + self.norm1 = AdaIN1d(style_dim, dim_in) + self.norm2 = AdaIN1d(style_dim, dim_out) + if self.learned_sc: + self.conv1x1 = weight_norm(nn.Conv1d(dim_in, dim_out, 1, 1, 0, bias=False)) + + def _shortcut(self, x): + x = self.upsample(x) + if self.learned_sc: + x = self.conv1x1(x) + return x + + def _residual(self, x, s): + x = self.norm1(x, s) + x = self.actv(x) + x = self.pool(x) + x = self.conv1(self.dropout(x)) + x = self.norm2(x, s) + x = self.actv(x) + x = self.conv2(self.dropout(x)) + return x + + def forward(self, x, s): + out = self._residual(x, s) + out = (out + self._shortcut(x)) / np.sqrt(2) + return out + +class AdaLayerNorm(nn.Module): + def __init__(self, style_dim, channels, eps=1e-5): + super().__init__() + self.channels = channels + self.eps = eps + + self.fc = nn.Linear(style_dim, channels*2) + + def forward(self, x, s): + x = x.transpose(-1, -2) + x = x.transpose(1, -1) + + h = self.fc(s) + h = h.view(h.size(0), h.size(1), 1) + gamma, beta = torch.chunk(h, chunks=2, dim=1) + gamma, beta = gamma.transpose(1, -1), beta.transpose(1, -1) + + + x = F.layer_norm(x, (self.channels,), eps=self.eps) + x = (1 + gamma) * x + beta + return x.transpose(1, -1).transpose(-1, -2) + +class ProsodyPredictor(nn.Module): + + def __init__(self, style_dim, d_hid, nlayers, max_dur=50, dropout=0.1): + super().__init__() + + self.text_encoder = DurationEncoder(sty_dim=style_dim, + d_model=d_hid, + nlayers=nlayers, + dropout=dropout) + + self.lstm = nn.LSTM(d_hid + style_dim, d_hid // 2, 1, batch_first=True, bidirectional=True) + self.duration_proj = LinearNorm(d_hid, max_dur) + + self.shared = nn.LSTM(d_hid + style_dim, d_hid // 2, 1, batch_first=True, bidirectional=True) + self.F0 = nn.ModuleList() + self.F0.append(AdainResBlk1d(d_hid, d_hid, style_dim, dropout_p=dropout)) + self.F0.append(AdainResBlk1d(d_hid, d_hid // 2, style_dim, upsample=True, dropout_p=dropout)) + self.F0.append(AdainResBlk1d(d_hid // 2, d_hid // 2, style_dim, dropout_p=dropout)) + + self.N = nn.ModuleList() + self.N.append(AdainResBlk1d(d_hid, d_hid, style_dim, dropout_p=dropout)) + self.N.append(AdainResBlk1d(d_hid, d_hid // 2, style_dim, upsample=True, dropout_p=dropout)) + self.N.append(AdainResBlk1d(d_hid // 2, d_hid // 2, style_dim, dropout_p=dropout)) + + self.F0_proj = nn.Conv1d(d_hid // 2, 1, 1, 1, 0) + self.N_proj = nn.Conv1d(d_hid // 2, 1, 1, 1, 0) + + + def forward(self, texts, style, text_lengths, alignment, m): + d = self.text_encoder(texts, style, text_lengths, m) + + batch_size = d.shape[0] + text_size = d.shape[1] + + # predict duration + input_lengths = text_lengths.cpu().numpy() + x = nn.utils.rnn.pack_padded_sequence( + d, input_lengths, batch_first=True, enforce_sorted=False) + + m = m.to(text_lengths.device).unsqueeze(1) + + self.lstm.flatten_parameters() + x, _ = self.lstm(x) + x, _ = nn.utils.rnn.pad_packed_sequence( + x, batch_first=True) + + x_pad = torch.zeros([x.shape[0], m.shape[-1], x.shape[-1]]) + + x_pad[:, :x.shape[1], :] = x + x = x_pad.to(x.device) + + duration = self.duration_proj(nn.functional.dropout(x, 0.5, training=self.training)) + + en = (d.transpose(-1, -2) @ alignment) + + return duration.squeeze(-1), en + + def F0Ntrain(self, x, s): + x, _ = self.shared(x.transpose(-1, -2)) + + F0 = x.transpose(-1, -2) + for block in self.F0: + F0 = block(F0, s) + F0 = self.F0_proj(F0) + + N = x.transpose(-1, -2) + for block in self.N: + N = block(N, s) + N = self.N_proj(N) + + return F0.squeeze(1), N.squeeze(1) + + def length_to_mask(self, lengths): + mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths) + mask = torch.gt(mask+1, lengths.unsqueeze(1)) + return mask + +class DurationEncoder(nn.Module): + + def __init__(self, sty_dim, d_model, nlayers, dropout=0.1): + super().__init__() + self.lstms = nn.ModuleList() + for _ in range(nlayers): + self.lstms.append(nn.LSTM(d_model + sty_dim, + d_model // 2, + num_layers=1, + batch_first=True, + bidirectional=True, + dropout=dropout)) + self.lstms.append(AdaLayerNorm(sty_dim, d_model)) + + + self.dropout = dropout + self.d_model = d_model + self.sty_dim = sty_dim + + def forward(self, x, style, text_lengths, m): + masks = m.to(text_lengths.device) + + x = x.permute(2, 0, 1) + s = style.expand(x.shape[0], x.shape[1], -1) + x = torch.cat([x, s], axis=-1) + x.masked_fill_(masks.unsqueeze(-1).transpose(0, 1), 0.0) + + x = x.transpose(0, 1) + input_lengths = text_lengths.cpu().numpy() + x = x.transpose(-1, -2) + + for block in self.lstms: + if isinstance(block, AdaLayerNorm): + x = block(x.transpose(-1, -2), style).transpose(-1, -2) + x = torch.cat([x, s.permute(1, -1, 0)], axis=1) + x.masked_fill_(masks.unsqueeze(-1).transpose(-1, -2), 0.0) + else: + x = x.transpose(-1, -2) + x = nn.utils.rnn.pack_padded_sequence( + x, input_lengths, batch_first=True, enforce_sorted=False) + block.flatten_parameters() + x, _ = block(x) + x, _ = nn.utils.rnn.pad_packed_sequence( + x, batch_first=True) + x = F.dropout(x, p=self.dropout, training=self.training) + x = x.transpose(-1, -2) + + x_pad = torch.zeros([x.shape[0], x.shape[1], m.shape[-1]]) + + x_pad[:, :, :x.shape[-1]] = x + x = x_pad.to(x.device) + + return x.transpose(-1, -2) + + def inference(self, x, style): + x = self.embedding(x.transpose(-1, -2)) * np.sqrt(self.d_model) + style = style.expand(x.shape[0], x.shape[1], -1) + x = torch.cat([x, style], axis=-1) + src = self.pos_encoder(x) + output = self.transformer_encoder(src).transpose(0, 1) + return output + + def length_to_mask(self, lengths): + mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths) + mask = torch.gt(mask+1, lengths.unsqueeze(1)) + return mask + +# https://github.com/yl4579/StyleTTS2/blob/main/utils.py +def recursive_munch(d): + if isinstance(d, dict): + return Munch((k, recursive_munch(v)) for k, v in d.items()) + elif isinstance(d, list): + return [recursive_munch(v) for v in d] + else: + return d + +def build_model(path, device): + config = Path(__file__).parent / 'config.json' + assert config.exists(), f'Config path incorrect: config.json not found at {config}' + with open(config, 'r') as r: + args = recursive_munch(json.load(r)) + assert args.decoder.type == 'istftnet', f'Unknown decoder type: {args.decoder.type}' + decoder = Decoder(dim_in=args.hidden_dim, style_dim=args.style_dim, dim_out=args.n_mels, + resblock_kernel_sizes = args.decoder.resblock_kernel_sizes, + upsample_rates = args.decoder.upsample_rates, + upsample_initial_channel=args.decoder.upsample_initial_channel, + resblock_dilation_sizes=args.decoder.resblock_dilation_sizes, + upsample_kernel_sizes=args.decoder.upsample_kernel_sizes, + gen_istft_n_fft=args.decoder.gen_istft_n_fft, gen_istft_hop_size=args.decoder.gen_istft_hop_size) + text_encoder = TextEncoder(channels=args.hidden_dim, kernel_size=5, depth=args.n_layer, n_symbols=args.n_token) + predictor = ProsodyPredictor(style_dim=args.style_dim, d_hid=args.hidden_dim, nlayers=args.n_layer, max_dur=args.max_dur, dropout=args.dropout) + bert = load_plbert() + bert_encoder = nn.Linear(bert.config.hidden_size, args.hidden_dim) + for parent in [bert, bert_encoder, predictor, decoder, text_encoder]: + for child in parent.children(): + if isinstance(child, nn.RNNBase): + child.flatten_parameters() + model = Munch( + bert=bert.to(device).eval(), + bert_encoder=bert_encoder.to(device).eval(), + predictor=predictor.to(device).eval(), + decoder=decoder.to(device).eval(), + text_encoder=text_encoder.to(device).eval(), + ) + for key, state_dict in torch.load(path, map_location='cpu', weights_only=True)['net'].items(): + assert key in model, key + try: + model[key].load_state_dict(state_dict) + except: + state_dict = {k[7:]: v for k, v in state_dict.items()} + model[key].load_state_dict(state_dict, strict=False) + return model diff --git a/backend/python/kokoro/plbert.py b/backend/python/kokoro/plbert.py new file mode 100644 index 00000000..bf1dba5a --- /dev/null +++ b/backend/python/kokoro/plbert.py @@ -0,0 +1,16 @@ +# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/plbert.py +# https://github.com/yl4579/StyleTTS2/blob/main/Utils/PLBERT/util.py +from transformers import AlbertConfig, AlbertModel + +class CustomAlbert(AlbertModel): + def forward(self, *args, **kwargs): + # Call the original forward method + outputs = super().forward(*args, **kwargs) + # Only return the last_hidden_state + return outputs.last_hidden_state + +def load_plbert(): + plbert_config = {'vocab_size': 178, 'hidden_size': 768, 'num_attention_heads': 12, 'intermediate_size': 2048, 'max_position_embeddings': 512, 'num_hidden_layers': 12, 'dropout': 0.1} + albert_base_configuration = AlbertConfig(**plbert_config) + bert = CustomAlbert(albert_base_configuration) + return bert diff --git a/backend/python/kokoro/protogen.sh b/backend/python/kokoro/protogen.sh new file mode 100644 index 00000000..32f39fbb --- /dev/null +++ b/backend/python/kokoro/protogen.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e + +source $(dirname $0)/../common/libbackend.sh + +python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/kokoro/requirements-cpu.txt b/backend/python/kokoro/requirements-cpu.txt new file mode 100644 index 00000000..b4f1261f --- /dev/null +++ b/backend/python/kokoro/requirements-cpu.txt @@ -0,0 +1,2 @@ +torch==2.4.1 +transformers \ No newline at end of file diff --git a/backend/python/kokoro/requirements-cublas11.txt b/backend/python/kokoro/requirements-cublas11.txt new file mode 100644 index 00000000..ed0d4df5 --- /dev/null +++ b/backend/python/kokoro/requirements-cublas11.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +torch==2.4.1+cu118 +transformers \ No newline at end of file diff --git a/backend/python/kokoro/requirements-cublas12.txt b/backend/python/kokoro/requirements-cublas12.txt new file mode 100644 index 00000000..b4f1261f --- /dev/null +++ b/backend/python/kokoro/requirements-cublas12.txt @@ -0,0 +1,2 @@ +torch==2.4.1 +transformers \ No newline at end of file diff --git a/backend/python/kokoro/requirements-hipblas.txt b/backend/python/kokoro/requirements-hipblas.txt new file mode 100644 index 00000000..ec8d0306 --- /dev/null +++ b/backend/python/kokoro/requirements-hipblas.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.0 +torch==2.4.1+rocm6.0 +transformers \ No newline at end of file diff --git a/backend/python/kokoro/requirements-intel.txt b/backend/python/kokoro/requirements-intel.txt new file mode 100644 index 00000000..b16448d3 --- /dev/null +++ b/backend/python/kokoro/requirements-intel.txt @@ -0,0 +1,5 @@ +--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu +transformers \ No newline at end of file diff --git a/backend/python/kokoro/requirements.txt b/backend/python/kokoro/requirements.txt new file mode 100644 index 00000000..75d65ba1 --- /dev/null +++ b/backend/python/kokoro/requirements.txt @@ -0,0 +1,7 @@ +grpcio==1.69.0 +protobuf +phonemizer +scipy +munch +setuptools +soundfile \ No newline at end of file diff --git a/backend/python/kokoro/run.sh b/backend/python/kokoro/run.sh new file mode 100755 index 00000000..375c07e5 --- /dev/null +++ b/backend/python/kokoro/run.sh @@ -0,0 +1,4 @@ +#!/bin/bash +source $(dirname $0)/../common/libbackend.sh + +startBackend $@ \ No newline at end of file diff --git a/backend/python/kokoro/test.sh b/backend/python/kokoro/test.sh new file mode 100755 index 00000000..6940b066 --- /dev/null +++ b/backend/python/kokoro/test.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e + +source $(dirname $0)/../common/libbackend.sh + +runUnittests diff --git a/pkg/model/loader.go b/pkg/model/loader.go index d62f52b2..bb9bdd8a 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -54,6 +54,8 @@ var knownModelsNameSuffixToSkip []string = []string{ ".yml", ".json", ".txt", + ".pt", + ".onnx", ".md", ".MD", ".DS_Store", From d08d97bebf9fd44010f5a38b3f7002edb29f2793 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 16 Jan 2025 22:26:55 +0100 Subject: [PATCH 035/679] chore(model gallery): fix typo Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 22d748d8..349cd419 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -3281,7 +3281,7 @@ - filename: smollm2-1.7b-instruct-q4_k_m.gguf sha256: decd2598bc2c8ed08c19adc3c8fdd461ee19ed5708679d1c54ef54a5a30d4f33 uri: huggingface://HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF/smollm2-1.7b-instruct-q4_k_m.gguf -- !!merge <<: qwen25 +- !!merge <<: *qwen25 name: "vikhr-qwen-2.5-1.5b-instruct" urls: - https://huggingface.co/Vikhrmodels/Vikhr-Qwen-2.5-1.5B-Instruct From 7d0ac1ea3f5faf8047623f5cb92df23bdbd1f393 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 17 Jan 2025 09:35:10 +0100 Subject: [PATCH 036/679] chore(vall-e-x): Drop backend (#4619) There are many new architectures that are SOTA and replaces vall-e-x nowadays. Signed-off-by: Ettore Di Giacinto --- .github/dependabot.yml | 4 - .github/workflows/test-extra.yml | 20 --- Dockerfile | 7 +- Makefile | 13 +- backend/python/vall-e-x/.gitignore | 1 - backend/python/vall-e-x/Makefile | 33 ---- backend/python/vall-e-x/README.md | 5 - backend/python/vall-e-x/backend.py | 141 ------------------ backend/python/vall-e-x/install.sh | 22 --- backend/python/vall-e-x/requirements-cpu.txt | 3 - .../python/vall-e-x/requirements-cublas11.txt | 4 - .../python/vall-e-x/requirements-cublas12.txt | 3 - .../python/vall-e-x/requirements-hipblas.txt | 4 - .../python/vall-e-x/requirements-intel.txt | 7 - backend/python/vall-e-x/requirements.txt | 4 - backend/python/vall-e-x/run.sh | 6 - backend/python/vall-e-x/test.py | 81 ---------- backend/python/vall-e-x/test.sh | 7 - core/backend/options.go | 2 +- core/config/backend_config.go | 7 +- 20 files changed, 6 insertions(+), 368 deletions(-) delete mode 100644 backend/python/vall-e-x/.gitignore delete mode 100644 backend/python/vall-e-x/Makefile delete mode 100644 backend/python/vall-e-x/README.md delete mode 100644 backend/python/vall-e-x/backend.py delete mode 100755 backend/python/vall-e-x/install.sh delete mode 100644 backend/python/vall-e-x/requirements-cpu.txt delete mode 100644 backend/python/vall-e-x/requirements-cublas11.txt delete mode 100644 backend/python/vall-e-x/requirements-cublas12.txt delete mode 100644 backend/python/vall-e-x/requirements-hipblas.txt delete mode 100644 backend/python/vall-e-x/requirements-intel.txt delete mode 100644 backend/python/vall-e-x/requirements.txt delete mode 100755 backend/python/vall-e-x/run.sh delete mode 100644 backend/python/vall-e-x/test.py delete mode 100755 backend/python/vall-e-x/test.sh diff --git a/.github/dependabot.yml b/.github/dependabot.yml index fcd6c88c..8fa0cca5 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -85,10 +85,6 @@ updates: directory: "/backend/python/transformers-musicgen" schedule: interval: "weekly" - - package-ecosystem: "pip" - directory: "/backend/python/vall-e-x" - schedule: - interval: "weekly" - package-ecosystem: "pip" directory: "/backend/python/vllm" schedule: diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index a2c34872..3c2fee37 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -260,26 +260,6 @@ jobs: # run: | # make --jobs=5 --output-sync=target -C backend/python/vllm # make --jobs=5 --output-sync=target -C backend/python/vllm test - tests-vallex: - runs-on: ubuntu-latest - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential ffmpeg - # Install UV - curl -LsSf https://astral.sh/uv/install.sh | sh - sudo apt-get install -y ca-certificates cmake curl patch python3-pip - sudo apt-get install -y libopencv-dev - pip install --user --no-cache-dir grpcio-tools==1.64.1 - - name: Test vall-e-x - run: | - make --jobs=5 --output-sync=target -C backend/python/vall-e-x - make --jobs=5 --output-sync=target -C backend/python/vall-e-x test tests-coqui: runs-on: ubuntu-latest diff --git a/Dockerfile b/Dockerfile index 481edf90..354ef298 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ ARG TARGETARCH ARG TARGETVARIANT ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" +ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" RUN apt-get update && \ @@ -453,10 +453,7 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG make -C backend/python/transformers-musicgen \ ; fi -RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/vall-e-x \ - ; fi && \ - if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ +RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ make -C backend/python/kokoro \ ; fi && \ if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ diff --git a/Makefile b/Makefile index 49c81950..1983f568 100644 --- a/Makefile +++ b/Makefile @@ -583,10 +583,10 @@ protogen-go-clean: $(RM) bin/* .PHONY: protogen-python -protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen kokoro-protogen vllm-protogen openvoice-protogen +protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen kokoro-protogen vllm-protogen openvoice-protogen .PHONY: protogen-python-clean -protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean +protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean .PHONY: autogptq-protogen autogptq-protogen: @@ -676,14 +676,6 @@ transformers-musicgen-protogen: transformers-musicgen-protogen-clean: $(MAKE) -C backend/python/transformers-musicgen protogen-clean -.PHONY: vall-e-x-protogen -vall-e-x-protogen: - $(MAKE) -C backend/python/vall-e-x protogen - -.PHONY: vall-e-x-protogen-clean -vall-e-x-protogen-clean: - $(MAKE) -C backend/python/vall-e-x protogen-clean - .PHONY: kokoro-protogen kokoro-protogen: $(MAKE) -C backend/python/kokoro protogen @@ -722,7 +714,6 @@ prepare-extra-conda-environments: protogen-python $(MAKE) -C backend/python/transformers $(MAKE) -C backend/python/transformers-musicgen $(MAKE) -C backend/python/parler-tts - $(MAKE) -C backend/python/vall-e-x $(MAKE) -C backend/python/kokoro $(MAKE) -C backend/python/openvoice $(MAKE) -C backend/python/exllama2 diff --git a/backend/python/vall-e-x/.gitignore b/backend/python/vall-e-x/.gitignore deleted file mode 100644 index 1d3a0654..00000000 --- a/backend/python/vall-e-x/.gitignore +++ /dev/null @@ -1 +0,0 @@ -source \ No newline at end of file diff --git a/backend/python/vall-e-x/Makefile b/backend/python/vall-e-x/Makefile deleted file mode 100644 index a3ca32a3..00000000 --- a/backend/python/vall-e-x/Makefile +++ /dev/null @@ -1,33 +0,0 @@ -ifneq (,$(findstring sycl,$(BUILD_TYPE))) -export SKIP_CONDA=1 -endif - -.PHONY: ttsvalle -ttsvalle: protogen - bash install.sh - -.PHONY: run -run: protogen - @echo "Running ttsvalle..." - bash run.sh - @echo "ttsvalle run." - -.PHONY: test -test: protogen - @echo "Testing valle..." - bash test.sh - @echo "valle tested." - -.PHONY: protogen -protogen: backend_pb2_grpc.py backend_pb2.py - -.PHONY: protogen-clean -protogen-clean: - $(RM) backend_pb2_grpc.py backend_pb2.py - -backend_pb2_grpc.py backend_pb2.py: - python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto - -.PHONY: clean -clean: protogen-clean - rm -rf source venv __pycache__ \ No newline at end of file diff --git a/backend/python/vall-e-x/README.md b/backend/python/vall-e-x/README.md deleted file mode 100644 index a3a93361..00000000 --- a/backend/python/vall-e-x/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Creating a separate environment for the ttsvalle project - -``` -make ttsvalle -``` \ No newline at end of file diff --git a/backend/python/vall-e-x/backend.py b/backend/python/vall-e-x/backend.py deleted file mode 100644 index fc9d93bd..00000000 --- a/backend/python/vall-e-x/backend.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python3 - -from concurrent import futures -import argparse -import signal -import sys -import os -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - -from utils.generation import SAMPLE_RATE, generate_audio, preload_models -from scipy.io.wavfile import write as write_wav -from utils.prompt_making import make_prompt - -_ONE_DAY_IN_SECONDS = 60 * 60 * 24 - -# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 -MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) - -# Implement the BackendServicer class with the service methods -class BackendServicer(backend_pb2_grpc.BackendServicer): - """ - gRPC servicer for backend services. - """ - def Health(self, request, context): - """ - Health check service. - - Args: - request: A backend_pb2.HealthRequest instance. - context: A grpc.ServicerContext instance. - - Returns: - A backend_pb2.Reply instance with message "OK". - """ - return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - - def LoadModel(self, request, context): - """ - Load model service. - - Args: - request: A backend_pb2.LoadModelRequest instance. - context: A grpc.ServicerContext instance. - - Returns: - A backend_pb2.Result instance with message "Model loaded successfully" and success=True if successful. - A backend_pb2.Result instance with success=False and error message if unsuccessful. - """ - model_name = request.Model - try: - print("Preparing models, please wait", file=sys.stderr) - # download and load all models - preload_models() - self.clonedVoice = False - # Assume directory from request.ModelFile. - # Only if request.LoraAdapter it's not an absolute path - if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath): - # get base path of modelFile - modelFileBase = os.path.dirname(request.ModelFile) - # modify LoraAdapter to be relative to modelFileBase - request.AudioPath = os.path.join(modelFileBase, request.AudioPath) - if request.AudioPath != "": - print("Generating model", file=sys.stderr) - make_prompt(name=model_name, audio_prompt_path=request.AudioPath) - self.clonedVoice = True - ### Use given transcript - ##make_prompt(name=model_name, audio_prompt_path="paimon_prompt.wav", - ## transcript="Just, what was that? Paimon thought we were gonna get eaten.") - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - # Implement your logic here for the LoadModel service - # Replace this with your desired response - return backend_pb2.Result(message="Model loaded successfully", success=True) - - def TTS(self, request, context): - """ - Text-to-speech service. - - Args: - request: A backend_pb2.TTSRequest instance. - context: A grpc.ServicerContext instance. - - Returns: - A backend_pb2.Result instance with success=True if successful. - A backend_pb2.Result instance with success=False and error message if unsuccessful. - """ - model = request.model - print(request, file=sys.stderr) - try: - audio_array = None - if model != "": - if self.clonedVoice: - model = os.path.basename(request.model) - audio_array = generate_audio(request.text, prompt=model) - else: - audio_array = generate_audio(request.text) - print("saving to", request.dst, file=sys.stderr) - # save audio to disk - write_wav(request.dst, SAMPLE_RATE, audio_array) - print("saved to", request.dst, file=sys.stderr) - print("tts for", file=sys.stderr) - print(request, file=sys.stderr) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - return backend_pb2.Result(success=True) - -def serve(address): - server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) - backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) - server.add_insecure_port(address) - server.start() - print("Server started. Listening on: " + address, file=sys.stderr) - - # Define the signal handler function - def signal_handler(sig, frame): - print("Received termination signal. Shutting down...") - server.stop(0) - sys.exit(0) - - # Set the signal handlers for SIGINT and SIGTERM - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - server.stop(0) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run the gRPC server.") - parser.add_argument( - "--addr", default="localhost:50051", help="The address to bind the server to." - ) - args = parser.parse_args() - - serve(args.addr) diff --git a/backend/python/vall-e-x/install.sh b/backend/python/vall-e-x/install.sh deleted file mode 100755 index c0cce96a..00000000 --- a/backend/python/vall-e-x/install.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -set -e - -VALL_E_X_VERSION=3faaf8ccadb154d63b38070caf518ce9309ea0f4 - -source $(dirname $0)/../common/libbackend.sh - -# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links. -# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match. -# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index -# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index -if [ "x${BUILD_PROFILE}" == "xintel" ]; then - EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" -fi - -installRequirements - -git clone https://github.com/Plachtaa/VALL-E-X.git ${MY_DIR}/source -pushd ${MY_DIR}/source && git checkout -b build ${VALL_E_X_VERSION} && popd -uv pip install ${BUILD_ISOLATION_FLAG} --requirement ${MY_DIR}/source/requirements.txt - -cp -v ./*py $MY_DIR/source/ diff --git a/backend/python/vall-e-x/requirements-cpu.txt b/backend/python/vall-e-x/requirements-cpu.txt deleted file mode 100644 index 0aad8812..00000000 --- a/backend/python/vall-e-x/requirements-cpu.txt +++ /dev/null @@ -1,3 +0,0 @@ -accelerate -torch==2.4.1 -torchaudio==2.4.1 \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-cublas11.txt b/backend/python/vall-e-x/requirements-cublas11.txt deleted file mode 100644 index c45de5b7..00000000 --- a/backend/python/vall-e-x/requirements-cublas11.txt +++ /dev/null @@ -1,4 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -accelerate -torch==2.4.1+cu118 -torchaudio==2.4.1+cu118 \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-cublas12.txt b/backend/python/vall-e-x/requirements-cublas12.txt deleted file mode 100644 index 0aad8812..00000000 --- a/backend/python/vall-e-x/requirements-cublas12.txt +++ /dev/null @@ -1,3 +0,0 @@ -accelerate -torch==2.4.1 -torchaudio==2.4.1 \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-hipblas.txt b/backend/python/vall-e-x/requirements-hipblas.txt deleted file mode 100644 index fc43790a..00000000 --- a/backend/python/vall-e-x/requirements-hipblas.txt +++ /dev/null @@ -1,4 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -accelerate -torch==2.3.0+rocm6.0 -torchaudio==2.3.0+rocm6.0 \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-intel.txt b/backend/python/vall-e-x/requirements-intel.txt deleted file mode 100644 index efcf885a..00000000 --- a/backend/python/vall-e-x/requirements-intel.txt +++ /dev/null @@ -1,7 +0,0 @@ ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch==2.3.110+xpu -accelerate -torch==2.3.1+cxx11.abi -torchaudio==2.3.1+cxx11.abi -optimum[openvino] -oneccl_bind_pt==2.3.100+xpu \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt deleted file mode 100644 index a1eea776..00000000 --- a/backend/python/vall-e-x/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -grpcio==1.69.0 -protobuf -certifi -setuptools \ No newline at end of file diff --git a/backend/python/vall-e-x/run.sh b/backend/python/vall-e-x/run.sh deleted file mode 100755 index 4b0682ad..00000000 --- a/backend/python/vall-e-x/run.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -BACKEND_FILE="${MY_DIR}/source/backend.py" - -source $(dirname $0)/../common/libbackend.sh - -startBackend $@ \ No newline at end of file diff --git a/backend/python/vall-e-x/test.py b/backend/python/vall-e-x/test.py deleted file mode 100644 index f31a148c..00000000 --- a/backend/python/vall-e-x/test.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -A test script to test the gRPC service -""" -import unittest -import subprocess -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - - -class TestBackendServicer(unittest.TestCase): - """ - TestBackendServicer is the class that tests the gRPC service - """ - def setUp(self): - """ - This method sets up the gRPC service by starting the server - """ - self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) - - def tearDown(self) -> None: - """ - This method tears down the gRPC service by terminating the server - """ - self.service.terminate() - self.service.wait() - - def test_server_startup(self): - """ - This method tests if the server starts up successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.Health(backend_pb2.HealthMessage()) - self.assertEqual(response.message, b'OK') - except Exception as err: - print(err) - self.fail("Server failed to start") - finally: - self.tearDown() - - def test_load_model(self): - """ - This method tests if the model is loaded successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen")) - self.assertTrue(response.success) - self.assertEqual(response.message, "Model loaded successfully") - except Exception as err: - print(err) - self.fail("LoadModel service failed") - finally: - self.tearDown() - - def test_tts(self): - """ - This method tests if the embeddings are generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen")) - self.assertTrue(response.success) - tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story") - tts_response = stub.TTS(tts_request) - self.assertIsNotNone(tts_response) - except Exception as err: - print(err) - self.fail("TTS service failed") - finally: - self.tearDown() \ No newline at end of file diff --git a/backend/python/vall-e-x/test.sh b/backend/python/vall-e-x/test.sh deleted file mode 100755 index 57336b39..00000000 --- a/backend/python/vall-e-x/test.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -set -e -TEST_FILE="./source/test.py" - -source $(dirname $0)/../common/libbackend.sh - -runUnittests diff --git a/core/backend/options.go b/core/backend/options.go index f6247c60..92a42893 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -140,7 +140,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions { NBatch: int32(b), NoMulMatQ: c.NoMulMatQ, DraftModel: c.DraftModel, - AudioPath: c.VallE.AudioPath, + AudioPath: c.AudioPath, Quantization: c.Quantization, LoadFormat: c.LoadFormat, GPUMemoryUtilization: c.GPUMemoryUtilization, diff --git a/core/config/backend_config.go b/core/config/backend_config.go index f07ec3d3..bb2fa643 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -21,8 +21,7 @@ type TTSConfig struct { // Voice wav path or id Voice string `yaml:"voice"` - // Vall-e-x - VallE VallE `yaml:"vall-e"` + AudioPath string `yaml:"audio_path"` } type BackendConfig struct { @@ -82,10 +81,6 @@ type File struct { URI downloader.URI `yaml:"uri" json:"uri"` } -type VallE struct { - AudioPath string `yaml:"audio_path"` -} - type FeatureFlag map[string]*bool func (ff FeatureFlag) Enabled(s string) bool { From b147ad059611f109e3e2a33494a6e8438b2939e8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 17 Jan 2025 10:14:23 +0100 Subject: [PATCH 037/679] ci: try to build for arm64 Try to use the free arm64 runners from Github: https://github.blog/changelog/2025-01-16-linux-arm64-hosted-runners-now-available-for-free-in-public-repositories-public-preview/ Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 54 ++++++++++--------------------------- 1 file changed, 14 insertions(+), 40 deletions(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 68727ebe..47bc507a 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -362,43 +362,17 @@ jobs: base-image: "ubuntu:22.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" -# parallel-builds: -# uses: ./.github/workflows/image_build.yml -# with: -# tag-latest: ${{ matrix.tag-latest }} -# tag-suffix: ${{ matrix.tag-suffix }} -# ffmpeg: ${{ matrix.ffmpeg }} -# image-type: ${{ matrix.image-type }} -# build-type: ${{ matrix.build-type }} -# cuda-major-version: ${{ matrix.cuda-major-version }} -# cuda-minor-version: ${{ matrix.cuda-minor-version }} -# platforms: ${{ matrix.platforms }} -# runs-on: ${{ matrix.runs-on }} -# aio: ${{ matrix.aio }} -# base-image: ${{ matrix.base-image }} -# grpc-base-image: ${{ matrix.grpc-base-image }} -# makeflags: ${{ matrix.makeflags }} -# latest-image: ${{ matrix.latest-image }} -# latest-image-aio: ${{ matrix.latest-image-aio }} -# skip-drivers: ${{ matrix.skip-drivers }} -# secrets: -# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} -# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} -# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} -# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} -# strategy: -# matrix: -# include: -# - build-type: 'cublas' -# cuda-major-version: "12" -# cuda-minor-version: "0" -# platforms: 'linux/arm64' -# tag-latest: 'false' -# tag-suffix: '-nvidia-l4t-arm64-core' -# latest-image: 'latest-nvidia-l4t-arm64-core' -# ffmpeg: 'true' -# image-type: 'core' -# base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" -# runs-on: 'self-hosted' -# makeflags: "--jobs=4 --output-sync=target" -# skip-drivers: 'true' + # ARM64 + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/arm64' + tag-latest: 'false' + tag-suffix: '-nvidia-l4t-arm64-core' + latest-image: 'latest-nvidia-l4t-arm64-core' + ffmpeg: 'true' + image-type: 'core' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + runs-on: 'ubuntu-24.04-arm' + makeflags: "--jobs=4 --output-sync=target" + skip-drivers: 'true' From b5eeb5c5ab96721afc2daf600b32b49d70e9c2a2 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 17 Jan 2025 10:24:15 +0100 Subject: [PATCH 038/679] ci(arm64): run in parallel Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 47bc507a..722d0f41 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -362,7 +362,33 @@ jobs: base-image: "ubuntu:22.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" - # ARM64 + gh-runner: + uses: ./.github/workflows/image_build.yml + with: + tag-latest: ${{ matrix.tag-latest }} + tag-suffix: ${{ matrix.tag-suffix }} + ffmpeg: ${{ matrix.ffmpeg }} + image-type: ${{ matrix.image-type }} + build-type: ${{ matrix.build-type }} + cuda-major-version: ${{ matrix.cuda-major-version }} + cuda-minor-version: ${{ matrix.cuda-minor-version }} + platforms: ${{ matrix.platforms }} + runs-on: ${{ matrix.runs-on }} + aio: ${{ matrix.aio }} + base-image: ${{ matrix.base-image }} + grpc-base-image: ${{ matrix.grpc-base-image }} + makeflags: ${{ matrix.makeflags }} + latest-image: ${{ matrix.latest-image }} + latest-image-aio: ${{ matrix.latest-image-aio }} + skip-drivers: ${{ matrix.skip-drivers }} + secrets: + dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + strategy: + matrix: + include: - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -375,4 +401,4 @@ jobs: base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" runs-on: 'ubuntu-24.04-arm' makeflags: "--jobs=4 --output-sync=target" - skip-drivers: 'true' + skip-drivers: 'true' \ No newline at end of file From 78533d7230bdb5e352e325c15d0d53f38428b08e Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 17 Jan 2025 10:25:04 +0100 Subject: [PATCH 039/679] chore: :arrow_up: Update ggerganov/llama.cpp to `4dbc8b9cb71876e005724f4e8f73a3544646bcf5` (#4618) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1983f568..f08d1a9c 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=adc5dd92e8aea98f5e7ac84f6e1bc15de35130b5 +CPPLLAMA_VERSION?=4dbc8b9cb71876e005724f4e8f73a3544646bcf5 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 212c8e1a6da1503a7f45a2aeb4efc8f4b9faad7a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 17 Jan 2025 15:11:10 +0100 Subject: [PATCH 040/679] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ec4db188..4d415d16 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ local-ai run oci://localai/phi-2:latest ## šŸ“° Latest project news -- January 2025: SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603 +- Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603 - Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 ) - Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 ) - Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204 From 8027fdf1c781696b3196f6f71fee8bfb63472cbf Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 17 Jan 2025 17:01:16 +0100 Subject: [PATCH 041/679] feat(transformers): merge musicgen functionalities to a single backend (#4620) * feat(transformers): merge musicgen functionalities to a single backend So we optimize space Signed-off-by: Ettore Di Giacinto * specify type in tests Signed-off-by: Ettore Di Giacinto * Some adaptations for the MusicgenForConditionalGeneration type Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- .bruno/LocalAI Test Requests/tts/musicgen.bru | 2 +- .github/dependabot.yml | 4 - .github/workflows/test-extra.yml | 40 ++-- Dockerfile | 5 +- Makefile | 13 +- backend/python/transformers-musicgen/Makefile | 29 --- .../python/transformers-musicgen/README.md | 5 - .../python/transformers-musicgen/backend.py | 176 ------------------ .../python/transformers-musicgen/install.sh | 14 -- .../requirements-cpu.txt | 3 - .../requirements-cublas11.txt | 4 - .../requirements-cublas12.txt | 3 - .../requirements-hipblas.txt | 4 - .../requirements-intel.txt | 8 - .../transformers-musicgen/requirements.txt | 4 - backend/python/transformers-musicgen/run.sh | 4 - backend/python/transformers-musicgen/test.py | 100 ---------- backend/python/transformers-musicgen/test.sh | 6 - backend/python/transformers/backend.py | 115 +++++++++++- backend/python/transformers/requirements.txt | 3 +- backend/python/transformers/test.py | 59 +++++- 21 files changed, 187 insertions(+), 414 deletions(-) delete mode 100644 backend/python/transformers-musicgen/Makefile delete mode 100644 backend/python/transformers-musicgen/README.md delete mode 100644 backend/python/transformers-musicgen/backend.py delete mode 100755 backend/python/transformers-musicgen/install.sh delete mode 100644 backend/python/transformers-musicgen/requirements-cpu.txt delete mode 100644 backend/python/transformers-musicgen/requirements-cublas11.txt delete mode 100644 backend/python/transformers-musicgen/requirements-cublas12.txt delete mode 100644 backend/python/transformers-musicgen/requirements-hipblas.txt delete mode 100644 backend/python/transformers-musicgen/requirements-intel.txt delete mode 100644 backend/python/transformers-musicgen/requirements.txt delete mode 100755 backend/python/transformers-musicgen/run.sh delete mode 100644 backend/python/transformers-musicgen/test.py delete mode 100755 backend/python/transformers-musicgen/test.sh diff --git a/.bruno/LocalAI Test Requests/tts/musicgen.bru b/.bruno/LocalAI Test Requests/tts/musicgen.bru index a720b8b1..900173eb 100644 --- a/.bruno/LocalAI Test Requests/tts/musicgen.bru +++ b/.bruno/LocalAI Test Requests/tts/musicgen.bru @@ -16,7 +16,7 @@ headers { body:json { { - "backend": "transformers-musicgen", + "backend": "transformers", "model": "facebook/musicgen-small", "input": "80s Synths playing Jazz" } diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 8fa0cca5..570ac569 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -81,10 +81,6 @@ updates: directory: "/backend/python/transformers" schedule: interval: "weekly" - - package-ecosystem: "pip" - directory: "/backend/python/transformers-musicgen" - schedule: - interval: "weekly" - package-ecosystem: "pip" directory: "/backend/python/vllm" schedule: diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 3c2fee37..eacd3ab0 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -153,27 +153,27 @@ jobs: make --jobs=5 --output-sync=target -C backend/python/openvoice make --jobs=5 --output-sync=target -C backend/python/openvoice test - tests-transformers-musicgen: - runs-on: ubuntu-latest - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential ffmpeg - # Install UV - curl -LsSf https://astral.sh/uv/install.sh | sh - sudo apt-get install -y ca-certificates cmake curl patch python3-pip - sudo apt-get install -y libopencv-dev - pip install --user --no-cache-dir grpcio-tools==1.64.1 + # tests-transformers-musicgen: + # runs-on: ubuntu-latest + # steps: + # - name: Clone + # uses: actions/checkout@v4 + # with: + # submodules: true + # - name: Dependencies + # run: | + # sudo apt-get update + # sudo apt-get install build-essential ffmpeg + # # Install UV + # curl -LsSf https://astral.sh/uv/install.sh | sh + # sudo apt-get install -y ca-certificates cmake curl patch python3-pip + # sudo apt-get install -y libopencv-dev + # pip install --user --no-cache-dir grpcio-tools==1.64.1 - - name: Test transformers-musicgen - run: | - make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen - make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test + # - name: Test transformers-musicgen + # run: | + # make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen + # make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test # tests-bark: # runs-on: ubuntu-latest diff --git a/Dockerfile b/Dockerfile index 354ef298..9fb07516 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ ARG TARGETARCH ARG TARGETVARIANT ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" +ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" RUN apt-get update && \ @@ -448,9 +448,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG ; fi && \ if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ make -C backend/python/diffusers \ - ; fi && \ - if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/transformers-musicgen \ ; fi RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ diff --git a/Makefile b/Makefile index f08d1a9c..03468ffb 100644 --- a/Makefile +++ b/Makefile @@ -583,10 +583,10 @@ protogen-go-clean: $(RM) bin/* .PHONY: protogen-python -protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen kokoro-protogen vllm-protogen openvoice-protogen +protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen .PHONY: protogen-python-clean -protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean +protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean .PHONY: autogptq-protogen autogptq-protogen: @@ -668,14 +668,6 @@ parler-tts-protogen: parler-tts-protogen-clean: $(MAKE) -C backend/python/parler-tts protogen-clean -.PHONY: transformers-musicgen-protogen -transformers-musicgen-protogen: - $(MAKE) -C backend/python/transformers-musicgen protogen - -.PHONY: transformers-musicgen-protogen-clean -transformers-musicgen-protogen-clean: - $(MAKE) -C backend/python/transformers-musicgen protogen-clean - .PHONY: kokoro-protogen kokoro-protogen: $(MAKE) -C backend/python/kokoro protogen @@ -712,7 +704,6 @@ prepare-extra-conda-environments: protogen-python $(MAKE) -C backend/python/sentencetransformers $(MAKE) -C backend/python/rerankers $(MAKE) -C backend/python/transformers - $(MAKE) -C backend/python/transformers-musicgen $(MAKE) -C backend/python/parler-tts $(MAKE) -C backend/python/kokoro $(MAKE) -C backend/python/openvoice diff --git a/backend/python/transformers-musicgen/Makefile b/backend/python/transformers-musicgen/Makefile deleted file mode 100644 index 06badf6d..00000000 --- a/backend/python/transformers-musicgen/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -.PHONY: transformers-musicgen -transformers-musicgen: protogen - bash install.sh - -.PHONY: run -run: protogen - @echo "Running transformers..." - bash run.sh - @echo "transformers run." - -.PHONY: test -test: protogen - @echo "Testing transformers..." - bash test.sh - @echo "transformers tested." - -.PHONY: protogen -protogen: backend_pb2_grpc.py backend_pb2.py - -.PHONY: protogen-clean -protogen-clean: - $(RM) backend_pb2_grpc.py backend_pb2.py - -backend_pb2_grpc.py backend_pb2.py: - python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto - -.PHONY: clean -clean: protogen-clean - rm -rf venv __pycache__ \ No newline at end of file diff --git a/backend/python/transformers-musicgen/README.md b/backend/python/transformers-musicgen/README.md deleted file mode 100644 index bf7fef84..00000000 --- a/backend/python/transformers-musicgen/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Creating a separate environment for the transformers project - -``` -make transformers-musicgen -``` \ No newline at end of file diff --git a/backend/python/transformers-musicgen/backend.py b/backend/python/transformers-musicgen/backend.py deleted file mode 100644 index b9f1facf..00000000 --- a/backend/python/transformers-musicgen/backend.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python3 -""" -Extra gRPC server for MusicgenForConditionalGeneration models. -""" -from concurrent import futures - -import argparse -import signal -import sys -import os - -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - -from scipy.io import wavfile -from transformers import AutoProcessor, MusicgenForConditionalGeneration - -_ONE_DAY_IN_SECONDS = 60 * 60 * 24 - -# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 -MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) - -# Implement the BackendServicer class with the service methods -class BackendServicer(backend_pb2_grpc.BackendServicer): - """ - A gRPC servicer for the backend service. - - This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding. - """ - def Health(self, request, context): - """ - A gRPC method that returns the health status of the backend service. - - Args: - request: A HealthRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Reply object that contains the health status of the backend service. - """ - return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - - def LoadModel(self, request, context): - """ - A gRPC method that loads a model into memory. - - Args: - request: A LoadModelRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Result object that contains the result of the LoadModel operation. - """ - model_name = request.Model - try: - self.processor = AutoProcessor.from_pretrained(model_name) - self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - - return backend_pb2.Result(message="Model loaded successfully", success=True) - - def SoundGeneration(self, request, context): - model_name = request.model - if model_name == "": - return backend_pb2.Result(success=False, message="request.model is required") - try: - self.processor = AutoProcessor.from_pretrained(model_name) - self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) - inputs = None - if request.text == "": - inputs = self.model.get_unconditional_inputs(num_samples=1) - elif request.HasField('src'): - # TODO SECURITY CODE GOES HERE LOL - # WHO KNOWS IF THIS WORKS??? - sample_rate, wsamples = wavfile.read('path_to_your_file.wav') - - if request.HasField('src_divisor'): - wsamples = wsamples[: len(wsamples) // request.src_divisor] - - inputs = self.processor( - audio=wsamples, - sampling_rate=sample_rate, - text=[request.text], - padding=True, - return_tensors="pt", - ) - else: - inputs = self.processor( - text=[request.text], - padding=True, - return_tensors="pt", - ) - - tokens = 256 - if request.HasField('duration'): - tokens = int(request.duration * 51.2) # 256 tokens = 5 seconds, therefore 51.2 tokens is one second - guidance = 3.0 - if request.HasField('temperature'): - guidance = request.temperature - dosample = True - if request.HasField('sample'): - dosample = request.sample - audio_values = self.model.generate(**inputs, do_sample=dosample, guidance_scale=guidance, max_new_tokens=tokens) - print("[transformers-musicgen] SoundGeneration generated!", file=sys.stderr) - sampling_rate = self.model.config.audio_encoder.sampling_rate - wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy()) - print("[transformers-musicgen] SoundGeneration saved to", request.dst, file=sys.stderr) - print("[transformers-musicgen] SoundGeneration for", file=sys.stderr) - print("[transformers-musicgen] SoundGeneration requested tokens", tokens, file=sys.stderr) - print(request, file=sys.stderr) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - return backend_pb2.Result(success=True) - - -# The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons - def TTS(self, request, context): - model_name = request.model - if model_name == "": - return backend_pb2.Result(success=False, message="request.model is required") - try: - self.processor = AutoProcessor.from_pretrained(model_name) - self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) - inputs = self.processor( - text=[request.text], - padding=True, - return_tensors="pt", - ) - tokens = 512 # No good place to set the "length" in TTS, so use 10s as a sane default - audio_values = self.model.generate(**inputs, max_new_tokens=tokens) - print("[transformers-musicgen] TTS generated!", file=sys.stderr) - sampling_rate = self.model.config.audio_encoder.sampling_rate - write_wav(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy()) - print("[transformers-musicgen] TTS saved to", request.dst, file=sys.stderr) - print("[transformers-musicgen] TTS for", file=sys.stderr) - print(request, file=sys.stderr) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - return backend_pb2.Result(success=True) - - -def serve(address): - server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) - backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) - server.add_insecure_port(address) - server.start() - print("[transformers-musicgen] Server started. Listening on: " + address, file=sys.stderr) - - # Define the signal handler function - def signal_handler(sig, frame): - print("[transformers-musicgen] Received termination signal. Shutting down...") - server.stop(0) - sys.exit(0) - - # Set the signal handlers for SIGINT and SIGTERM - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - server.stop(0) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run the gRPC server.") - parser.add_argument( - "--addr", default="localhost:50051", help="The address to bind the server to." - ) - args = parser.parse_args() - print(f"[transformers-musicgen] startup: {args}", file=sys.stderr) - serve(args.addr) diff --git a/backend/python/transformers-musicgen/install.sh b/backend/python/transformers-musicgen/install.sh deleted file mode 100755 index 36443ef1..00000000 --- a/backend/python/transformers-musicgen/install.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links. -# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match. -# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index -# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index -if [ "x${BUILD_PROFILE}" == "xintel" ]; then - EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" -fi - -installRequirements diff --git a/backend/python/transformers-musicgen/requirements-cpu.txt b/backend/python/transformers-musicgen/requirements-cpu.txt deleted file mode 100644 index 2021fc20..00000000 --- a/backend/python/transformers-musicgen/requirements-cpu.txt +++ /dev/null @@ -1,3 +0,0 @@ -transformers -accelerate -torch==2.4.1 \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-cublas11.txt b/backend/python/transformers-musicgen/requirements-cublas11.txt deleted file mode 100644 index cd2c9fdb..00000000 --- a/backend/python/transformers-musicgen/requirements-cublas11.txt +++ /dev/null @@ -1,4 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -transformers -accelerate -torch==2.4.1+cu118 \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-cublas12.txt b/backend/python/transformers-musicgen/requirements-cublas12.txt deleted file mode 100644 index 2021fc20..00000000 --- a/backend/python/transformers-musicgen/requirements-cublas12.txt +++ /dev/null @@ -1,3 +0,0 @@ -transformers -accelerate -torch==2.4.1 \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-hipblas.txt b/backend/python/transformers-musicgen/requirements-hipblas.txt deleted file mode 100644 index 122b2032..00000000 --- a/backend/python/transformers-musicgen/requirements-hipblas.txt +++ /dev/null @@ -1,4 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -transformers -accelerate -torch==2.4.1+rocm6.0 \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt deleted file mode 100644 index ac2feb42..00000000 --- a/backend/python/transformers-musicgen/requirements-intel.txt +++ /dev/null @@ -1,8 +0,0 @@ ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch==2.3.110+xpu -transformers -oneccl_bind_pt==2.3.100+xpu -accelerate -torch==2.3.1+cxx11.abi -optimum[openvino] -setuptools \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt deleted file mode 100644 index f58e1e80..00000000 --- a/backend/python/transformers-musicgen/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -grpcio==1.69.0 -protobuf -scipy==1.14.0 -certifi \ No newline at end of file diff --git a/backend/python/transformers-musicgen/run.sh b/backend/python/transformers-musicgen/run.sh deleted file mode 100755 index 375c07e5..00000000 --- a/backend/python/transformers-musicgen/run.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -source $(dirname $0)/../common/libbackend.sh - -startBackend $@ \ No newline at end of file diff --git a/backend/python/transformers-musicgen/test.py b/backend/python/transformers-musicgen/test.py deleted file mode 100644 index 295de65e..00000000 --- a/backend/python/transformers-musicgen/test.py +++ /dev/null @@ -1,100 +0,0 @@ -""" -A test script to test the gRPC service -""" -import unittest -import subprocess -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - - -class TestBackendServicer(unittest.TestCase): - """ - TestBackendServicer is the class that tests the gRPC service - """ - def setUp(self): - """ - This method sets up the gRPC service by starting the server - """ - self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) - - def tearDown(self) -> None: - """ - This method tears down the gRPC service by terminating the server - """ - self.service.terminate() - self.service.wait() - - def test_server_startup(self): - """ - This method tests if the server starts up successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.Health(backend_pb2.HealthMessage()) - self.assertEqual(response.message, b'OK') - except Exception as err: - print(err) - self.fail("Server failed to start") - finally: - self.tearDown() - - def test_load_model(self): - """ - This method tests if the model is loaded successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small")) - self.assertTrue(response.success) - self.assertEqual(response.message, "Model loaded successfully") - except Exception as err: - print(err) - self.fail("LoadModel service failed") - finally: - self.tearDown() - - def test_tts(self): - """ - This method tests if TTS is generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small")) - self.assertTrue(response.success) - tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story") - tts_response = stub.TTS(tts_request) - self.assertIsNotNone(tts_response) - except Exception as err: - print(err) - self.fail("TTS service failed") - finally: - self.tearDown() - - def test_sound_generation(self): - """ - This method tests if SoundGeneration is generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small")) - self.assertTrue(response.success) - sg_request = backend_pb2.SoundGenerationRequest(text="80s TV news production music hit for tonight's biggest story") - sg_response = stub.SoundGeneration(sg_request) - self.assertIsNotNone(sg_response) - except Exception as err: - print(err) - self.fail("SoundGeneration service failed") - finally: - self.tearDown() \ No newline at end of file diff --git a/backend/python/transformers-musicgen/test.sh b/backend/python/transformers-musicgen/test.sh deleted file mode 100755 index 6940b066..00000000 --- a/backend/python/transformers-musicgen/test.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -runUnittests diff --git a/backend/python/transformers/backend.py b/backend/python/transformers/backend.py index 2075012e..3f6838ad 100644 --- a/backend/python/transformers/backend.py +++ b/backend/python/transformers/backend.py @@ -22,6 +22,8 @@ import torch.cuda XPU=os.environ.get("XPU", "0") == "1" from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria +from transformers import AutoProcessor, MusicgenForConditionalGeneration +from scipy.io import wavfile _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -191,6 +193,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): export=True, device=device_map) self.OV = True + elif request.Type == "MusicgenForConditionalGeneration": + self.processor = AutoProcessor.from_pretrained(model_name) + self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) else: print("Automodel", file=sys.stderr) self.model = AutoModel.from_pretrained(model_name, @@ -201,19 +206,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): torch_dtype=compute) if request.ContextSize > 0: self.max_tokens = request.ContextSize - else: + elif request.Type != "MusicgenForConditionalGeneration": self.max_tokens = self.model.config.max_position_embeddings + else: + self.max_tokens = 512 - self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True) - self.XPU = False + if request.Type != "MusicgenForConditionalGeneration": + self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True) + self.XPU = False - if XPU and self.OV == False: - self.XPU = True - try: - print("Optimizing model", model_name, "to XPU.", file=sys.stderr) - self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu") - except Exception as err: - print("Not using XPU:", err, file=sys.stderr) + if XPU and self.OV == False: + self.XPU = True + try: + print("Optimizing model", model_name, "to XPU.", file=sys.stderr) + self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu") + except Exception as err: + print("Not using XPU:", err, file=sys.stderr) except Exception as err: print("Error:", err, file=sys.stderr) @@ -380,6 +388,93 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): finally: await iterations.aclose() + def SoundGeneration(self, request, context): + model_name = request.model + try: + if self.processor is None: + if model_name == "": + return backend_pb2.Result(success=False, message="request.model is required") + self.processor = AutoProcessor.from_pretrained(model_name) + if self.model is None: + if model_name == "": + return backend_pb2.Result(success=False, message="request.model is required") + self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) + inputs = None + if request.text == "": + inputs = self.model.get_unconditional_inputs(num_samples=1) + elif request.HasField('src'): + # TODO SECURITY CODE GOES HERE LOL + # WHO KNOWS IF THIS WORKS??? + sample_rate, wsamples = wavfile.read('path_to_your_file.wav') + + if request.HasField('src_divisor'): + wsamples = wsamples[: len(wsamples) // request.src_divisor] + + inputs = self.processor( + audio=wsamples, + sampling_rate=sample_rate, + text=[request.text], + padding=True, + return_tensors="pt", + ) + else: + inputs = self.processor( + text=[request.text], + padding=True, + return_tensors="pt", + ) + + tokens = 256 + if request.HasField('duration'): + tokens = int(request.duration * 51.2) # 256 tokens = 5 seconds, therefore 51.2 tokens is one second + guidance = 3.0 + if request.HasField('temperature'): + guidance = request.temperature + dosample = True + if request.HasField('sample'): + dosample = request.sample + audio_values = self.model.generate(**inputs, do_sample=dosample, guidance_scale=guidance, max_new_tokens=tokens) + print("[transformers-musicgen] SoundGeneration generated!", file=sys.stderr) + sampling_rate = self.model.config.audio_encoder.sampling_rate + wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy()) + print("[transformers-musicgen] SoundGeneration saved to", request.dst, file=sys.stderr) + print("[transformers-musicgen] SoundGeneration for", file=sys.stderr) + print("[transformers-musicgen] SoundGeneration requested tokens", tokens, file=sys.stderr) + print(request, file=sys.stderr) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(success=True) + + +# The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons + def TTS(self, request, context): + model_name = request.model + try: + if self.processor is None: + if model_name == "": + return backend_pb2.Result(success=False, message="request.model is required") + self.processor = AutoProcessor.from_pretrained(model_name) + if self.model is None: + if model_name == "": + return backend_pb2.Result(success=False, message="request.model is required") + self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) + inputs = self.processor( + text=[request.text], + padding=True, + return_tensors="pt", + ) + tokens = 512 # No good place to set the "length" in TTS, so use 10s as a sane default + audio_values = self.model.generate(**inputs, max_new_tokens=tokens) + print("[transformers-musicgen] TTS generated!", file=sys.stderr) + sampling_rate = self.model.config.audio_encoder.sampling_rate + wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy()) + print("[transformers-musicgen] TTS saved to", request.dst, file=sys.stderr) + print("[transformers-musicgen] TTS for", file=sys.stderr) + print(request, file=sys.stderr) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(success=True) + async def serve(address): # Start asyncio gRPC server server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt index a1eea776..262dd17a 100644 --- a/backend/python/transformers/requirements.txt +++ b/backend/python/transformers/requirements.txt @@ -1,4 +1,5 @@ grpcio==1.69.0 protobuf certifi -setuptools \ No newline at end of file +setuptools +scipy==1.14.0 \ No newline at end of file diff --git a/backend/python/transformers/test.py b/backend/python/transformers/test.py index aab3c05e..305b0a93 100644 --- a/backend/python/transformers/test.py +++ b/backend/python/transformers/test.py @@ -19,6 +19,7 @@ class TestBackendServicer(unittest.TestCase): This method sets up the gRPC service by starting the server """ self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) + time.sleep(10) def tearDown(self) -> None: """ @@ -31,7 +32,6 @@ class TestBackendServicer(unittest.TestCase): """ This method tests if the server starts up successfully """ - time.sleep(10) try: self.setUp() with grpc.insecure_channel("localhost:50051") as channel: @@ -48,7 +48,6 @@ class TestBackendServicer(unittest.TestCase): """ This method tests if the model is loaded successfully """ - time.sleep(10) try: self.setUp() with grpc.insecure_channel("localhost:50051") as channel: @@ -66,7 +65,6 @@ class TestBackendServicer(unittest.TestCase): """ This method tests if the embeddings are generated successfully """ - time.sleep(10) try: self.setUp() with grpc.insecure_channel("localhost:50051") as channel: @@ -80,5 +78,60 @@ class TestBackendServicer(unittest.TestCase): except Exception as err: print(err) self.fail("Embedding service failed") + finally: + self.tearDown() + + def test_audio_load_model(self): + """ + This method tests if the model is loaded successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small",Type="MusicgenForConditionalGeneration")) + self.assertTrue(response.success) + self.assertEqual(response.message, "Model loaded successfully") + except Exception as err: + print(err) + self.fail("LoadModel service failed") + finally: + self.tearDown() + + def test_tts(self): + """ + This method tests if TTS is generated successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small",Type="MusicgenForConditionalGeneration")) + self.assertTrue(response.success) + tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story") + tts_response = stub.TTS(tts_request) + self.assertIsNotNone(tts_response) + except Exception as err: + print(err) + self.fail("TTS service failed") + finally: + self.tearDown() + + def test_sound_generation(self): + """ + This method tests if SoundGeneration is generated successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small",Type="MusicgenForConditionalGeneration")) + self.assertTrue(response.success) + sg_request = backend_pb2.SoundGenerationRequest(text="80s TV news production music hit for tonight's biggest story") + sg_response = stub.SoundGeneration(sg_request) + self.assertIsNotNone(sg_response) + except Exception as err: + print(err) + self.fail("SoundGeneration service failed") finally: self.tearDown() \ No newline at end of file From 96f8ec0402ff54e0bf7bdd7c8986497184a2b9f8 Mon Sep 17 00:00:00 2001 From: mintyleaf Date: Fri, 17 Jan 2025 20:05:58 +0400 Subject: [PATCH 042/679] feat: add machine tag and inference timings (#4577) * Add machine tag option, add extraUsage option, grpc-server -> proto -> endpoint extraUsage data is broken for now Signed-off-by: mintyleaf * remove redurant timing fields, fix not working timings output Signed-off-by: mintyleaf * use middleware for Machine-Tag only if tag is specified Signed-off-by: mintyleaf --------- Signed-off-by: mintyleaf --- backend/backend.proto | 4 +- backend/cpp/llama/grpc-server.cpp | 14 ++++++ core/backend/llm.go | 12 ++++- core/cli/run.go | 2 + core/config/application_config.go | 8 ++++ core/http/app.go | 8 ++++ core/http/endpoints/localai/tts.go | 1 - core/http/endpoints/localai/vad.go | 1 - core/http/endpoints/openai/chat.go | 59 ++++++++++++++++-------- core/http/endpoints/openai/completion.go | 44 ++++++++++++------ core/http/endpoints/openai/edit.go | 21 +++++++-- core/http/endpoints/openai/inference.go | 2 + core/http/endpoints/openai/list.go | 2 +- core/http/routes/openai.go | 4 +- core/schema/openai.go | 3 ++ 15 files changed, 137 insertions(+), 48 deletions(-) diff --git a/backend/backend.proto b/backend/backend.proto index 0a341ca2..fea4214f 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -159,6 +159,8 @@ message Reply { bytes message = 1; int32 tokens = 2; int32 prompt_tokens = 3; + double timing_prompt_processing = 4; + double timing_token_generation = 5; } message ModelOptions { @@ -348,4 +350,4 @@ message StatusResponse { message Message { string role = 1; string content = 2; -} \ No newline at end of file +} diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index f0a16ffa..4e75e7b0 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -2408,6 +2408,13 @@ public: int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0); reply.set_prompt_tokens(tokens_evaluated); + if (result.result_json.contains("timings")) { + double timing_prompt_processing = result.result_json.at("timings").value("prompt_ms", 0.0); + reply.set_timing_prompt_processing(timing_prompt_processing); + double timing_token_generation = result.result_json.at("timings").value("predicted_ms", 0.0); + reply.set_timing_token_generation(timing_token_generation); + } + // Log Request Correlation Id LOG_VERBOSE("correlation:", { { "id", data["correlation_id"] } @@ -2448,6 +2455,13 @@ public: reply->set_prompt_tokens(tokens_evaluated); reply->set_tokens(tokens_predicted); reply->set_message(completion_text); + + if (result.result_json.contains("timings")) { + double timing_prompt_processing = result.result_json.at("timings").value("prompt_ms", 0.0); + reply->set_timing_prompt_processing(timing_prompt_processing); + double timing_token_generation = result.result_json.at("timings").value("predicted_ms", 0.0); + reply->set_timing_token_generation(timing_token_generation); + } } else { diff --git a/core/backend/llm.go b/core/backend/llm.go index 9a4d0d46..d91ded51 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -27,8 +27,10 @@ type LLMResponse struct { } type TokenUsage struct { - Prompt int - Completion int + Prompt int + Completion int + TimingPromptProcessing float64 + TimingTokenGeneration float64 } func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { @@ -123,6 +125,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im tokenUsage.Prompt = int(reply.PromptTokens) tokenUsage.Completion = int(reply.Tokens) + tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration + tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing for len(partialRune) > 0 { r, size := utf8.DecodeRune(partialRune) @@ -157,6 +161,10 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im if tokenUsage.Completion == 0 { tokenUsage.Completion = int(reply.Tokens) } + + tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration + tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing + return LLMResponse{ Response: string(reply.Message), Usage: tokenUsage, diff --git a/core/cli/run.go b/core/cli/run.go index a0e16155..b86fe2a6 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -70,6 +70,7 @@ type RunCMD struct { WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"` Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"` DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"` + MachineTag string `env:"LOCALAI_MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"` LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"` } @@ -107,6 +108,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { config.WithHttpGetExemptedEndpoints(r.HttpGetExemptedEndpoints), config.WithP2PNetworkID(r.Peer2PeerNetworkID), config.WithLoadToMemory(r.LoadToMemory), + config.WithMachineTag(r.MachineTag), } if r.DisableMetricsEndpoint { diff --git a/core/config/application_config.go b/core/config/application_config.go index 3f321e70..1ffcb297 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -65,6 +65,8 @@ type ApplicationConfig struct { ModelsURL []string WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration + + MachineTag string } type AppOption func(*ApplicationConfig) @@ -94,6 +96,12 @@ func WithModelPath(path string) AppOption { } } +func WithMachineTag(tag string) AppOption { + return func(o *ApplicationConfig) { + o.MachineTag = tag + } +} + func WithCors(b bool) AppOption { return func(o *ApplicationConfig) { o.CORS = b diff --git a/core/http/app.go b/core/http/app.go index 47d89a10..d1e80f8d 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -89,6 +89,14 @@ func API(application *application.Application) (*fiber.App, error) { router.Use(middleware.StripPathPrefix()) + if application.ApplicationConfig().MachineTag != "" { + router.Use(func(c *fiber.Ctx) error { + c.Response().Header.Set("Machine-Tag", application.ApplicationConfig().MachineTag) + + return c.Next() + }) + } + router.Hooks().OnListen(func(listenData fiber.ListenData) error { scheme := "http" if listenData.TLS { diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go index 7c73c633..9116f9fa 100644 --- a/core/http/endpoints/localai/tts.go +++ b/core/http/endpoints/localai/tts.go @@ -24,7 +24,6 @@ import ( // @Router /tts [post] func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - input := new(schema.TTSRequest) // Get input data from the request body diff --git a/core/http/endpoints/localai/vad.go b/core/http/endpoints/localai/vad.go index c5a5d929..2ed6125c 100644 --- a/core/http/endpoints/localai/vad.go +++ b/core/http/endpoints/localai/vad.go @@ -19,7 +19,6 @@ import ( // @Router /vad [post] func VADEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - input := new(schema.VADRequest) // Get input data from the request body diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index c2b201bd..cbce369a 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -30,7 +30,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat var id, textContentToReturn string var created int - process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) { initialMessage := schema.OpenAIResponse{ ID: id, Created: created, @@ -40,18 +40,24 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat } responses <- initialMessage - ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool { + usage := schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + } + if extraUsage { + usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration + usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing + } + resp := schema.OpenAIResponse{ ID: id, Created: created, Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}}, Object: "chat.completion.chunk", - Usage: schema.OpenAIUsage{ - PromptTokens: usage.Prompt, - CompletionTokens: usage.Completion, - TotalTokens: usage.Prompt + usage.Completion, - }, + Usage: usage, } responses <- resp @@ -59,7 +65,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat }) close(responses) } - processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) { result := "" _, tokenUsage, _ := ComputeChoices(req, prompt, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { result += s @@ -90,6 +96,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat log.Error().Err(err).Msg("error handling question") return } + usage := schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + } + if extraUsage { + usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration + usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing + } resp := schema.OpenAIResponse{ ID: id, @@ -97,11 +112,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}}, Object: "chat.completion.chunk", - Usage: schema.OpenAIUsage{ - PromptTokens: tokenUsage.Prompt, - CompletionTokens: tokenUsage.Completion, - TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, - }, + Usage: usage, } responses <- resp @@ -170,6 +181,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat } c.Set("X-Correlation-ID", correlationID) + // Opt-in extra usage flag + extraUsage := c.Get("LocalAI-Extra-Usage", "") != "" + modelFile, input, err := readRequest(c, cl, ml, startupOptions, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) @@ -319,9 +333,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat responses := make(chan schema.OpenAIResponse) if !shouldUseFn { - go process(predInput, input, config, ml, responses) + go process(predInput, input, config, ml, responses, extraUsage) } else { - go processTools(noActionName, predInput, input, config, ml, responses) + go processTools(noActionName, predInput, input, config, ml, responses, extraUsage) } c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { @@ -449,6 +463,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat if err != nil { return err } + usage := schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + } + if extraUsage { + usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration + usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing + } resp := &schema.OpenAIResponse{ ID: id, @@ -456,11 +479,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: result, Object: "chat.completion", - Usage: schema.OpenAIUsage{ - PromptTokens: tokenUsage.Prompt, - CompletionTokens: tokenUsage.Completion, - TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, - }, + Usage: usage, } respData, _ := json.Marshal(resp) log.Debug().Msgf("Response: %s", respData) diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index 04ebc847..339e9bc2 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -30,8 +30,17 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e id := uuid.New().String() created := int(time.Now().Unix()) - process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { - ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) { + ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool { + usage := schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + } + if extraUsage { + usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration + usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing + } resp := schema.OpenAIResponse{ ID: id, Created: created, @@ -43,11 +52,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e }, }, Object: "text_completion", - Usage: schema.OpenAIUsage{ - PromptTokens: usage.Prompt, - CompletionTokens: usage.Completion, - TotalTokens: usage.Prompt + usage.Completion, - }, + Usage: usage, } log.Debug().Msgf("Sending goroutine: %s", s) @@ -60,6 +65,10 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e return func(c *fiber.Ctx) error { // Add Correlation c.Set("X-Correlation-ID", id) + + // Opt-in extra usage flag + extraUsage := c.Get("LocalAI-Extra-Usage", "") != "" + modelFile, input, err := readRequest(c, cl, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) @@ -113,7 +122,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e responses := make(chan schema.OpenAIResponse) - go process(predInput, input, config, ml, responses) + go process(predInput, input, config, ml, responses, extraUsage) c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { @@ -170,11 +179,20 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e return err } - totalTokenUsage.Prompt += tokenUsage.Prompt - totalTokenUsage.Completion += tokenUsage.Completion + totalTokenUsage.TimingTokenGeneration += tokenUsage.TimingTokenGeneration + totalTokenUsage.TimingPromptProcessing += tokenUsage.TimingPromptProcessing result = append(result, r...) } + usage := schema.OpenAIUsage{ + PromptTokens: totalTokenUsage.Prompt, + CompletionTokens: totalTokenUsage.Completion, + TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, + } + if extraUsage { + usage.TimingTokenGeneration = totalTokenUsage.TimingTokenGeneration + usage.TimingPromptProcessing = totalTokenUsage.TimingPromptProcessing + } resp := &schema.OpenAIResponse{ ID: id, @@ -182,11 +200,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: result, Object: "text_completion", - Usage: schema.OpenAIUsage{ - PromptTokens: totalTokenUsage.Prompt, - CompletionTokens: totalTokenUsage.Completion, - TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, - }, + Usage: usage, } jsonResult, _ := json.Marshal(resp) diff --git a/core/http/endpoints/openai/edit.go b/core/http/endpoints/openai/edit.go index a6d609fb..e10a12d1 100644 --- a/core/http/endpoints/openai/edit.go +++ b/core/http/endpoints/openai/edit.go @@ -25,6 +25,9 @@ import ( func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { + // Opt-in extra usage flag + extraUsage := c.Get("LocalAI-Extra-Usage", "") != "" + modelFile, input, err := readRequest(c, cl, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) @@ -61,8 +64,20 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat totalTokenUsage.Prompt += tokenUsage.Prompt totalTokenUsage.Completion += tokenUsage.Completion + totalTokenUsage.TimingTokenGeneration += tokenUsage.TimingTokenGeneration + totalTokenUsage.TimingPromptProcessing += tokenUsage.TimingPromptProcessing + result = append(result, r...) } + usage := schema.OpenAIUsage{ + PromptTokens: totalTokenUsage.Prompt, + CompletionTokens: totalTokenUsage.Completion, + TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, + } + if extraUsage { + usage.TimingTokenGeneration = totalTokenUsage.TimingTokenGeneration + usage.TimingPromptProcessing = totalTokenUsage.TimingPromptProcessing + } id := uuid.New().String() created := int(time.Now().Unix()) @@ -72,11 +87,7 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: result, Object: "edit", - Usage: schema.OpenAIUsage{ - PromptTokens: totalTokenUsage.Prompt, - CompletionTokens: totalTokenUsage.Completion, - TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, - }, + Usage: usage, } jsonResult, _ := json.Marshal(resp) diff --git a/core/http/endpoints/openai/inference.go b/core/http/endpoints/openai/inference.go index da75d3a1..f59e3b60 100644 --- a/core/http/endpoints/openai/inference.go +++ b/core/http/endpoints/openai/inference.go @@ -52,6 +52,8 @@ func ComputeChoices( tokenUsage.Prompt += prediction.Usage.Prompt tokenUsage.Completion += prediction.Usage.Completion + tokenUsage.TimingPromptProcessing += prediction.Usage.TimingPromptProcessing + tokenUsage.TimingTokenGeneration += prediction.Usage.TimingTokenGeneration finetunedResponse := backend.Finetune(*config, predInput, prediction.Response) cb(finetunedResponse, &result) diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go index 80dcb3e4..9d21f8fe 100644 --- a/core/http/endpoints/openai/list.go +++ b/core/http/endpoints/openai/list.go @@ -12,7 +12,7 @@ import ( // @Summary List and describe the various models available in the API. // @Success 200 {object} schema.ModelsDataResponse "Response" // @Router /v1/models [get] -func ListModelsEndpoint(bcl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error { +func ListModelsEndpoint(bcl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(ctx *fiber.Ctx) error { return func(c *fiber.Ctx) error { // If blank, no filter is applied. filter := c.Query("filter") diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go index 5ff301b6..a48ced65 100644 --- a/core/http/routes/openai.go +++ b/core/http/routes/openai.go @@ -130,6 +130,6 @@ func RegisterOpenAIRoutes(app *fiber.App, } // List models - app.Get("/v1/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader())) - app.Get("/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader())) + app.Get("/v1/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) } diff --git a/core/schema/openai.go b/core/schema/openai.go index 15bcd13d..b06120ae 100644 --- a/core/schema/openai.go +++ b/core/schema/openai.go @@ -23,6 +23,9 @@ type OpenAIUsage struct { PromptTokens int `json:"prompt_tokens"` CompletionTokens int `json:"completion_tokens"` TotalTokens int `json:"total_tokens"` + // Extra timing data, disabled by default as is't not a part of OpenAI specification + TimingPromptProcessing float64 `json:"timing_prompt_processing,omitempty"` + TimingTokenGeneration float64 `json:"timing_token_generation,omitempty"` } type Item struct { From a761e01944b261e2181e5568bb263324d41218c5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 17 Jan 2025 18:16:17 +0100 Subject: [PATCH 043/679] chore: alias transformers-musicgen to transformers (#4623) chore: alias transformers-muscigen to transformers Signed-off-by: Ettore Di Giacinto --- pkg/model/initializers.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 3d03514a..f4675050 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -26,6 +26,7 @@ var Aliases map[string]string = map[string]string{ "llama": LLamaCPP, "embedded-store": LocalStoreBackend, "langchain-huggingface": LCHuggingFaceBackend, + "transformers-musicgen": TransformersBackend, } var AutoDetect = os.Getenv("DISABLE_AUTODETECT") != "true" @@ -51,7 +52,8 @@ const ( PiperBackend = "piper" LCHuggingFaceBackend = "huggingface" - LocalStoreBackend = "local-store" + TransformersBackend = "transformers" + LocalStoreBackend = "local-store" ) func backendPath(assetDir, backend string) string { From ee7904f170786df1ef30e1ddca04f432fb5ac1e6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 17 Jan 2025 19:33:25 +0100 Subject: [PATCH 044/679] feat(transformers): add support to OuteTTS (#4622) Signed-off-by: Ettore Di Giacinto --- backend/python/transformers/backend.py | 68 +++++++++++++++++-- .../python/transformers/requirements-cpu.txt | 4 +- .../transformers/requirements-cublas11.txt | 4 +- .../transformers/requirements-cublas12.txt | 4 +- .../transformers/requirements-hipblas.txt | 4 +- .../transformers/requirements-intel.txt | 4 +- backend/python/transformers/requirements.txt | 4 +- 7 files changed, 82 insertions(+), 10 deletions(-) diff --git a/backend/python/transformers/backend.py b/backend/python/transformers/backend.py index 3f6838ad..27257934 100644 --- a/backend/python/transformers/backend.py +++ b/backend/python/transformers/backend.py @@ -24,7 +24,7 @@ XPU=os.environ.get("XPU", "0") == "1" from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria from transformers import AutoProcessor, MusicgenForConditionalGeneration from scipy.io import wavfile - +import outetts _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -87,6 +87,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.CUDA = torch.cuda.is_available() self.OV=False + self.OuteTTS=False device_map="cpu" @@ -195,7 +196,45 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.OV = True elif request.Type == "MusicgenForConditionalGeneration": self.processor = AutoProcessor.from_pretrained(model_name) - self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) + self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) + elif request.Type == "OuteTTS": + options = request.Options + MODELNAME = "OuteAI/OuteTTS-0.3-1B" + TOKENIZER = "OuteAI/OuteTTS-0.3-1B" + VERSION = "0.3" + SPEAKER = "en_male_1" + for opt in options: + if opt.startswith("tokenizer:"): + TOKENIZER = opt.split(":")[1] + break + if opt.startswith("version:"): + VERSION = opt.split(":")[1] + break + if opt.startswith("speaker:"): + SPEAKER = opt.split(":")[1] + break + + if model_name != "": + MODELNAME = model_name + + # Configure the model + model_config = outetts.HFModelConfig_v2( + model_path=MODELNAME, + tokenizer_path=TOKENIZER + ) + # Initialize the interface + self.interface = outetts.InterfaceHF(model_version=VERSION, cfg=model_config) + self.OuteTTS = True + + self.interface.print_default_speakers() + if request.AudioPath: + if os.path.isabs(request.AudioPath): + self.AudioPath = request.AudioPath + else: + self.AudioPath = os.path.join(request.ModelPath, request.AudioPath) + self.speaker = self.interface.create_speaker(audio_path=self.AudioPath) + else: + self.speaker = self.interface.load_default_speaker(name=SPEAKER) else: print("Automodel", file=sys.stderr) self.model = AutoModel.from_pretrained(model_name, @@ -206,7 +245,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): torch_dtype=compute) if request.ContextSize > 0: self.max_tokens = request.ContextSize - elif request.Type != "MusicgenForConditionalGeneration": + elif hasattr(self.model, 'config') and hasattr(self.model.config, 'max_position_embeddings'): self.max_tokens = self.model.config.max_position_embeddings else: self.max_tokens = 512 @@ -445,9 +484,30 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") return backend_pb2.Result(success=True) + def OuteTTS(self, request, context): + try: + print("[OuteTTS] generating TTS", file=sys.stderr) + gen_cfg = outetts.GenerationConfig( + text="Speech synthesis is the artificial production of human speech.", + temperature=0.1, + repetition_penalty=1.1, + max_length=self.max_tokens, + speaker=self.speaker, + # voice_characteristics="upbeat enthusiasm, friendliness, clarity, professionalism, and trustworthiness" + ) + output = self.interface.generate(config=gen_cfg) + print("[OuteTTS] Generated TTS", file=sys.stderr) + output.save(request.dst) + print("[OuteTTS] TTS done", file=sys.stderr) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(success=True) # The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons def TTS(self, request, context): + if self.OuteTTS: + return self.OuteTTS(request, context) + model_name = request.model try: if self.processor is None: @@ -463,7 +523,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): padding=True, return_tensors="pt", ) - tokens = 512 # No good place to set the "length" in TTS, so use 10s as a sane default + tokens = self.max_tokens # No good place to set the "length" in TTS, so use 10s as a sane default audio_values = self.model.generate(**inputs, max_new_tokens=tokens) print("[transformers-musicgen] TTS generated!", file=sys.stderr) sampling_rate = self.model.config.audio_encoder.sampling_rate diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt index f99aa18f..56b77325 100644 --- a/backend/python/transformers/requirements-cpu.txt +++ b/backend/python/transformers/requirements-cpu.txt @@ -1,4 +1,6 @@ torch==2.4.1 +llvmlite==0.43.0 accelerate transformers -bitsandbytes \ No newline at end of file +bitsandbytes +outetts \ No newline at end of file diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt index 2c1d0755..924b0086 100644 --- a/backend/python/transformers/requirements-cublas11.txt +++ b/backend/python/transformers/requirements-cublas11.txt @@ -1,5 +1,7 @@ --extra-index-url https://download.pytorch.org/whl/cu118 torch==2.4.1+cu118 +llvmlite==0.43.0 accelerate transformers -bitsandbytes \ No newline at end of file +bitsandbytes +outetts \ No newline at end of file diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt index f99aa18f..0feb3d81 100644 --- a/backend/python/transformers/requirements-cublas12.txt +++ b/backend/python/transformers/requirements-cublas12.txt @@ -1,4 +1,6 @@ torch==2.4.1 accelerate +llvmlite==0.43.0 transformers -bitsandbytes \ No newline at end of file +bitsandbytes +outetts \ No newline at end of file diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt index f9577fab..fa65fb8e 100644 --- a/backend/python/transformers/requirements-hipblas.txt +++ b/backend/python/transformers/requirements-hipblas.txt @@ -2,4 +2,6 @@ torch==2.4.1+rocm6.0 accelerate transformers -bitsandbytes \ No newline at end of file +llvmlite==0.43.0 +bitsandbytes +outetts \ No newline at end of file diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt index dd683cd9..4a295599 100644 --- a/backend/python/transformers/requirements-intel.txt +++ b/backend/python/transformers/requirements-intel.txt @@ -3,5 +3,7 @@ intel-extension-for-pytorch==2.3.110+xpu torch==2.3.1+cxx11.abi oneccl_bind_pt==2.3.100+xpu optimum[openvino] +llvmlite==0.43.0 intel-extension-for-transformers -bitsandbytes \ No newline at end of file +bitsandbytes +outetts \ No newline at end of file diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt index 262dd17a..ba1d88e7 100644 --- a/backend/python/transformers/requirements.txt +++ b/backend/python/transformers/requirements.txt @@ -2,4 +2,6 @@ grpcio==1.69.0 protobuf certifi setuptools -scipy==1.14.0 \ No newline at end of file +scipy==1.14.0 +numpy>=2.0.0 +numba==0.60.0 \ No newline at end of file From cbdbe59f164a06ad4e994444671b8dfdbcfc120f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 17 Jan 2025 22:14:11 +0000 Subject: [PATCH 045/679] chore(deps): Bump scipy from 1.14.0 to 1.15.1 in /backend/python/transformers (#4621) chore(deps): Bump scipy in /backend/python/transformers Bumps [scipy](https://github.com/scipy/scipy) from 1.14.0 to 1.15.1. - [Release notes](https://github.com/scipy/scipy/releases) - [Commits](https://github.com/scipy/scipy/compare/v1.14.0...v1.15.1) --- updated-dependencies: - dependency-name: scipy dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- backend/python/transformers/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt index ba1d88e7..d353e4d0 100644 --- a/backend/python/transformers/requirements.txt +++ b/backend/python/transformers/requirements.txt @@ -2,6 +2,6 @@ grpcio==1.69.0 protobuf certifi setuptools -scipy==1.14.0 +scipy==1.15.1 numpy>=2.0.0 numba==0.60.0 \ No newline at end of file From 895cd7c76aa83b84f64b07802682e910a54b0d42 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 18 Jan 2025 08:57:49 +0100 Subject: [PATCH 046/679] feat(swagger): update swagger (#4625) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- swagger/docs.go | 7 +++++++ swagger/swagger.json | 7 +++++++ swagger/swagger.yaml | 6 ++++++ 3 files changed, 20 insertions(+) diff --git a/swagger/docs.go b/swagger/docs.go index 1a5943c4..13a3d3f3 100644 --- a/swagger/docs.go +++ b/swagger/docs.go @@ -1752,6 +1752,13 @@ const docTemplate = `{ "prompt_tokens": { "type": "integer" }, + "timing_prompt_processing": { + "description": "Extra timing data, disabled by default as is't not a part of OpenAI specification", + "type": "number" + }, + "timing_token_generation": { + "type": "number" + }, "total_tokens": { "type": "integer" } diff --git a/swagger/swagger.json b/swagger/swagger.json index dc902e11..1c38e9da 100644 --- a/swagger/swagger.json +++ b/swagger/swagger.json @@ -1745,6 +1745,13 @@ "prompt_tokens": { "type": "integer" }, + "timing_prompt_processing": { + "description": "Extra timing data, disabled by default as is't not a part of OpenAI specification", + "type": "number" + }, + "timing_token_generation": { + "type": "number" + }, "total_tokens": { "type": "integer" } diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index a447f7cc..1692f4bb 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -646,6 +646,12 @@ definitions: type: integer prompt_tokens: type: integer + timing_prompt_processing: + description: Extra timing data, disabled by default as is't not a part of + OpenAI specification + type: number + timing_token_generation: + type: number total_tokens: type: integer type: object From 96306a39a05894dee9ceb6a97f4215f45d359559 Mon Sep 17 00:00:00 2001 From: mintyleaf Date: Sat, 18 Jan 2025 11:58:38 +0400 Subject: [PATCH 047/679] chore(docs): extra-Usage and Machine-Tag docs (#4627) Rename LocalAI-Extra-Usage -> Extra-Usage, add MACHINE_TAG as cli flag option, add docs about extra-usage and machine-tag Signed-off-by: mintyleaf --- core/cli/run.go | 2 +- core/http/endpoints/openai/chat.go | 2 +- core/http/endpoints/openai/completion.go | 2 +- core/http/endpoints/openai/edit.go | 2 +- docs/content/docs/advanced/advanced-usage.md | 31 +++++++++++++++++++- 5 files changed, 34 insertions(+), 5 deletions(-) diff --git a/core/cli/run.go b/core/cli/run.go index b86fe2a6..279ff94b 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -70,7 +70,7 @@ type RunCMD struct { WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"` Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"` DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"` - MachineTag string `env:"LOCALAI_MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"` + MachineTag string `env:"LOCALAI_MACHINE_TAG,MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"` LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"` } diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index cbce369a..3b8d3056 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -182,7 +182,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat c.Set("X-Correlation-ID", correlationID) // Opt-in extra usage flag - extraUsage := c.Get("LocalAI-Extra-Usage", "") != "" + extraUsage := c.Get("Extra-Usage", "") != "" modelFile, input, err := readRequest(c, cl, ml, startupOptions, true) if err != nil { diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index 339e9bc2..a353a0a1 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -67,7 +67,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e c.Set("X-Correlation-ID", id) // Opt-in extra usage flag - extraUsage := c.Get("LocalAI-Extra-Usage", "") != "" + extraUsage := c.Get("Extra-Usage", "") != "" modelFile, input, err := readRequest(c, cl, ml, appConfig, true) if err != nil { diff --git a/core/http/endpoints/openai/edit.go b/core/http/endpoints/openai/edit.go index e10a12d1..28a3597c 100644 --- a/core/http/endpoints/openai/edit.go +++ b/core/http/endpoints/openai/edit.go @@ -26,7 +26,7 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat return func(c *fiber.Ctx) error { // Opt-in extra usage flag - extraUsage := c.Get("LocalAI-Extra-Usage", "") != "" + extraUsage := c.Get("Extra-Usage", "") != "" modelFile, input, err := readRequest(c, cl, ml, appConfig, true) if err != nil { diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md index 35d3a2e4..dd9894ef 100644 --- a/docs/content/docs/advanced/advanced-usage.md +++ b/docs/content/docs/advanced/advanced-usage.md @@ -520,6 +520,7 @@ In the help text below, BASEPATH is the location that local-ai is being executed | --upload-limit | 15 | Default upload-limit in MB | $LOCALAI_UPLOAD_LIMIT | | --api-keys | API-KEYS,... | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys | $LOCALAI_API_KEY | | --disable-welcome | | Disable welcome pages | $LOCALAI_DISABLE_WELCOME | +| --machine-tag | | If not empty - put that string to Machine-Tag header in each response. Useful to track response from different machines using multiple P2P federated nodes | $LOCALAI_MACHINE_TAG | #### Backend Flags | Parameter | Default | Description | Environment Variable | @@ -553,6 +554,34 @@ LOCALAI_MODELS_PATH=/mnt/storage/localai/models LOCALAI_F16=true ``` +### Request headers + +You can use 'Extra-Usage' request header key presence ('Extra-Usage: true') to receive inference timings in milliseconds extending default OpenAI response model in the usage field: +``` +... +{ + "id": "...", + "created": ..., + "model": "...", + "choices": [ + { + ... + }, + ... + ], + "object": "...", + "usage": { + "prompt_tokens": ..., + "completion_tokens": ..., + "total_tokens": ..., + // Extra-Usage header key will include these two float fields: + "timing_prompt_processing: ..., + "timing_token_generation": ..., + }, +} +... +``` + ### Extra backends LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. The container images that are built and published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) contain a set of images split in core and extra. By default Images bring all the dependencies and backends supported by LocalAI (we call those `extra` images). The `-core` images instead bring only the strictly necessary dependencies to run LocalAI without only a core set of backends. @@ -616,4 +645,4 @@ Note that, for llama.cpp you need to set accordingly `LLAMACPP_PARALLEL` to the LocalAI will automatically discover the CPU flagset available in your host and will use the most optimized version of the backends. -If you want to disable this behavior, you can set `DISABLE_AUTODETECT` to `true` in the environment variables. \ No newline at end of file +If you want to disable this behavior, you can set `DISABLE_AUTODETECT` to `true` in the environment variables. From 958f6eb722ca027699238543c10d443451745bb4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 18 Jan 2025 11:55:13 +0100 Subject: [PATCH 048/679] chore(llama.cpp): update dependency (#4628) Update to '3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6' and adapt to upstream changes Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- backend/cpp/llama/grpc-server.cpp | 28 +++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 03468ffb..1f1ffb3e 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=4dbc8b9cb71876e005724f4e8f73a3544646bcf5 +CPPLLAMA_VERSION?=3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 4e75e7b0..9aeb34db 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -134,6 +134,32 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c return out; } +// Adds an RPC server +// https://github.com/ggerganov/llama.cpp/compare/4dbc8b9cb71876e005724f4e8f73a3544646bcf5..3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6 +static void add_rpc_devices(std::string servers) { + auto rpc_servers = string_split(servers, ','); + if (rpc_servers.empty()) { + throw std::invalid_argument("no RPC servers specified"); + } + ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC"); + if (!rpc_reg) { + throw std::invalid_argument("failed to find RPC backend"); + } + typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint); + ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device"); + if (!ggml_backend_rpc_add_device_fn) { + throw std::invalid_argument("failed to find RPC device add function"); + } + for (const auto & server : rpc_servers) { + ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str()); + if (dev) { + ggml_backend_device_register(dev); + } else { + throw std::invalid_argument("failed to register RPC device"); + } + } +} + // convert a vector of completion_token_output to json static json probs_vector_to_json(const llama_context *ctx, const std::vector &probs) { @@ -2282,7 +2308,7 @@ static void params_parse(const backend::ModelOptions* request, const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS"); if (llama_grpc_servers != NULL) { - params.rpc_servers = std::string(llama_grpc_servers); + add_rpc_devices(std::string(llama_grpc_servers)); } // TODO: Add yarn From 4bd8434ae02d934d2ceee56d0779dc149bbb8bc0 Mon Sep 17 00:00:00 2001 From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com> Date: Sat, 18 Jan 2025 15:47:49 +0100 Subject: [PATCH 049/679] fix(docs): add missing `-core` suffix to sycl images (#4630) Signed-off-by: Gianluca Boiano --- docs/content/docs/getting-started/container-images.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/docs/getting-started/container-images.md index 25385f23..967fc28b 100644 --- a/docs/content/docs/getting-started/container-images.md +++ b/docs/content/docs/getting-started/container-images.md @@ -197,7 +197,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen | --- | --- |-------------------------------------------------------------| | Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f16` | `localai/localai:master-sycl-f16` | | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f16` | `localai/localai:latest-gpu-intel-f16` | -| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16` | `localai/localai:{{< version >}}-sycl-f16` | +| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-core` | `localai/localai:{{< version >}}-sycl-f16-core` | | Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg` | | Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg-core` | @@ -209,7 +209,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen | --- | --- |-------------------------------------------------------------| | Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f32` | `localai/localai:master-sycl-f32` | | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f32` | `localai/localai:latest-gpu-intel-f32` | -| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32` | `localai/localai:{{< version >}}-sycl-f32` | +| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-core` | `localai/localai:{{< version >}}-sycl-f32-core` | | Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg` | | Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg-core` | From 1e9bf19c8d4dff99c6c2cbcbddc4d50962c58a07 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 18 Jan 2025 18:30:30 +0100 Subject: [PATCH 050/679] feat(transformers): merge sentencetransformers backend (#4624) * merge sentencetransformers Signed-off-by: Ettore Di Giacinto * Add alias to silently redirect sentencetransformers to transformers Signed-off-by: Ettore Di Giacinto * Add alias also for transformers-musicgen Signed-off-by: Ettore Di Giacinto * Drop from makefile Signed-off-by: Ettore Di Giacinto * Move tests from sentencetransformers Signed-off-by: Ettore Di Giacinto * Remove sentencetransformers Signed-off-by: Ettore Di Giacinto * Remove tests from CI (part of transformers) Signed-off-by: Ettore Di Giacinto * Do not always try to load the tokenizer Signed-off-by: Ettore Di Giacinto * Adapt tests Signed-off-by: Ettore Di Giacinto * Fix typo Signed-off-by: Ettore Di Giacinto * Tiny adjustments Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- .github/workflows/test-extra.yml | 24 ---- .github/workflows/test.yml | 3 +- Dockerfile | 5 +- Makefile | 15 +-- backend/python/sentencetransformers/Makefile | 31 ----- backend/python/sentencetransformers/README.md | 5 - .../python/sentencetransformers/backend.py | 114 ------------------ .../python/sentencetransformers/install.sh | 14 --- .../sentencetransformers/requirements-cpu.txt | 6 - .../requirements-cublas11.txt | 5 - .../requirements-cublas12.txt | 4 - .../requirements-hipblas.txt | 5 - .../requirements-intel.txt | 9 -- .../sentencetransformers/requirements.txt | 5 - backend/python/sentencetransformers/run.sh | 4 - backend/python/sentencetransformers/test.py | 81 ------------- backend/python/sentencetransformers/test.sh | 6 - backend/python/transformers/backend.py | 38 ++++-- .../python/transformers/requirements-cpu.txt | 3 +- .../transformers/requirements-cublas11.txt | 3 +- .../transformers/requirements-cublas12.txt | 3 +- .../transformers/requirements-hipblas.txt | 4 +- .../transformers/requirements-intel.txt | 3 +- backend/python/transformers/test.py | 36 ++++++ core/http/app_test.go | 2 +- pkg/model/initializers.go | 28 ++++- tests/models_fixtures/grpc.yaml | 2 +- 27 files changed, 104 insertions(+), 354 deletions(-) delete mode 100644 backend/python/sentencetransformers/Makefile delete mode 100644 backend/python/sentencetransformers/README.md delete mode 100755 backend/python/sentencetransformers/backend.py delete mode 100755 backend/python/sentencetransformers/install.sh delete mode 100644 backend/python/sentencetransformers/requirements-cpu.txt delete mode 100644 backend/python/sentencetransformers/requirements-cublas11.txt delete mode 100644 backend/python/sentencetransformers/requirements-cublas12.txt delete mode 100644 backend/python/sentencetransformers/requirements-hipblas.txt delete mode 100644 backend/python/sentencetransformers/requirements-intel.txt delete mode 100644 backend/python/sentencetransformers/requirements.txt delete mode 100755 backend/python/sentencetransformers/run.sh delete mode 100644 backend/python/sentencetransformers/test.py delete mode 100755 backend/python/sentencetransformers/test.sh diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index eacd3ab0..e99ea516 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -35,30 +35,6 @@ jobs: run: | make --jobs=5 --output-sync=target -C backend/python/transformers make --jobs=5 --output-sync=target -C backend/python/transformers test - - tests-sentencetransformers: - runs-on: ubuntu-latest - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential ffmpeg - # Install UV - curl -LsSf https://astral.sh/uv/install.sh | sh - sudo apt-get install -y ca-certificates cmake curl patch python3-pip - sudo apt-get install -y libopencv-dev - pip install --user --no-cache-dir grpcio-tools==1.64.1 - - - name: Test sentencetransformers - run: | - make --jobs=5 --output-sync=target -C backend/python/sentencetransformers - make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test - - tests-rerankers: runs-on: ubuntu-latest steps: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ecef0569..0ee93afa 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -100,8 +100,7 @@ jobs: # The python3-grpc-tools package in 22.04 is too old pip install --user grpcio-tools - sudo rm -rfv /usr/bin/conda || true - PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers + make -C backend/python/transformers # Pre-build piper before we start tests in order to have shared libraries in place make sources/go-piper && \ diff --git a/Dockerfile b/Dockerfile index 9fb07516..4ddc921d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ ARG TARGETARCH ARG TARGETVARIANT ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" +ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" RUN apt-get update && \ @@ -456,9 +456,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMA if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ make -C backend/python/openvoice \ ; fi && \ - if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/sentencetransformers \ - ; fi && \ if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ make -C backend/python/exllama2 \ ; fi && \ diff --git a/Makefile b/Makefile index 1f1ffb3e..faa82d6b 100644 --- a/Makefile +++ b/Makefile @@ -497,7 +497,7 @@ test: prepare test-models/testmodel.ggml grpcs @echo 'Running tests' export GO_TAGS="tts stablediffusion debug" $(MAKE) prepare-test - HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ + HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS) $(MAKE) test-llama $(MAKE) test-llama-gguf @@ -583,10 +583,10 @@ protogen-go-clean: $(RM) bin/* .PHONY: protogen-python -protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen +protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen .PHONY: protogen-python-clean -protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean +protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean .PHONY: autogptq-protogen autogptq-protogen: @@ -644,14 +644,6 @@ rerankers-protogen: rerankers-protogen-clean: $(MAKE) -C backend/python/rerankers protogen-clean -.PHONY: sentencetransformers-protogen -sentencetransformers-protogen: - $(MAKE) -C backend/python/sentencetransformers protogen - -.PHONY: sentencetransformers-protogen-clean -sentencetransformers-protogen-clean: - $(MAKE) -C backend/python/sentencetransformers protogen-clean - .PHONY: transformers-protogen transformers-protogen: $(MAKE) -C backend/python/transformers protogen @@ -701,7 +693,6 @@ prepare-extra-conda-environments: protogen-python $(MAKE) -C backend/python/diffusers $(MAKE) -C backend/python/vllm $(MAKE) -C backend/python/mamba - $(MAKE) -C backend/python/sentencetransformers $(MAKE) -C backend/python/rerankers $(MAKE) -C backend/python/transformers $(MAKE) -C backend/python/parler-tts diff --git a/backend/python/sentencetransformers/Makefile b/backend/python/sentencetransformers/Makefile deleted file mode 100644 index 8b18e943..00000000 --- a/backend/python/sentencetransformers/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -.PHONY: sentencetransformers -sentencetransformers: protogen - bash ./install.sh - - -.PHONY: run -run: protogen - @echo "Running sentencetransformers..." - bash run.sh - @echo "sentencetransformers run." - -# It is not working well by using command line. It only6 works with IDE like VSCode. -.PHONY: test -test: protogen - @echo "Testing sentencetransformers..." - bash test.sh - @echo "sentencetransformers tested." - -.PHONY: protogen -protogen: backend_pb2_grpc.py backend_pb2.py - -.PHONY: protogen-clean -protogen-clean: - $(RM) backend_pb2_grpc.py backend_pb2.py - -backend_pb2_grpc.py backend_pb2.py: - python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto - -.PHONY: clean -clean: protogen-clean - rm -rf venv __pycache__ \ No newline at end of file diff --git a/backend/python/sentencetransformers/README.md b/backend/python/sentencetransformers/README.md deleted file mode 100644 index 829cf0d1..00000000 --- a/backend/python/sentencetransformers/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Creating a separate environment for the sentencetransformers project - -``` -make sentencetransformers -``` \ No newline at end of file diff --git a/backend/python/sentencetransformers/backend.py b/backend/python/sentencetransformers/backend.py deleted file mode 100755 index 2a20bf60..00000000 --- a/backend/python/sentencetransformers/backend.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python3 -""" -Extra gRPC server for HuggingFace SentenceTransformer models. -""" -from concurrent import futures - -import argparse -import signal -import sys -import os - -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - -from sentence_transformers import SentenceTransformer - -_ONE_DAY_IN_SECONDS = 60 * 60 * 24 - -# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 -MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) - -# Implement the BackendServicer class with the service methods -class BackendServicer(backend_pb2_grpc.BackendServicer): - """ - A gRPC servicer for the backend service. - - This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding. - """ - def Health(self, request, context): - """ - A gRPC method that returns the health status of the backend service. - - Args: - request: A HealthRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Reply object that contains the health status of the backend service. - """ - return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - - def LoadModel(self, request, context): - """ - A gRPC method that loads a model into memory. - - Args: - request: A LoadModelRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Result object that contains the result of the LoadModel operation. - """ - model_name = request.Model - try: - self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - - # Implement your logic here for the LoadModel service - # Replace this with your desired response - return backend_pb2.Result(message="Model loaded successfully", success=True) - - def Embedding(self, request, context): - """ - A gRPC method that calculates embeddings for a given sentence. - - Args: - request: An EmbeddingRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - An EmbeddingResult object that contains the calculated embeddings. - """ - # Implement your logic here for the Embedding service - # Replace this with your desired response - print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr) - sentence_embeddings = self.model.encode(request.Embeddings) - return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings) - - -def serve(address): - server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) - backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) - server.add_insecure_port(address) - server.start() - print("Server started. Listening on: " + address, file=sys.stderr) - - # Define the signal handler function - def signal_handler(sig, frame): - print("Received termination signal. Shutting down...") - server.stop(0) - sys.exit(0) - - # Set the signal handlers for SIGINT and SIGTERM - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - server.stop(0) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run the gRPC server.") - parser.add_argument( - "--addr", default="localhost:50051", help="The address to bind the server to." - ) - args = parser.parse_args() - - serve(args.addr) diff --git a/backend/python/sentencetransformers/install.sh b/backend/python/sentencetransformers/install.sh deleted file mode 100755 index 36443ef1..00000000 --- a/backend/python/sentencetransformers/install.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links. -# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match. -# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index -# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index -if [ "x${BUILD_PROFILE}" == "xintel" ]; then - EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" -fi - -installRequirements diff --git a/backend/python/sentencetransformers/requirements-cpu.txt b/backend/python/sentencetransformers/requirements-cpu.txt deleted file mode 100644 index 1e23f68c..00000000 --- a/backend/python/sentencetransformers/requirements-cpu.txt +++ /dev/null @@ -1,6 +0,0 @@ -torch==2.4.1 -accelerate -transformers -bitsandbytes -sentence-transformers==3.3.1 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-cublas11.txt b/backend/python/sentencetransformers/requirements-cublas11.txt deleted file mode 100644 index 3900aba9..00000000 --- a/backend/python/sentencetransformers/requirements-cublas11.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.4.1+cu118 -accelerate -sentence-transformers==3.3.1 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-cublas12.txt b/backend/python/sentencetransformers/requirements-cublas12.txt deleted file mode 100644 index 2afd0520..00000000 --- a/backend/python/sentencetransformers/requirements-cublas12.txt +++ /dev/null @@ -1,4 +0,0 @@ -torch==2.4.1 -accelerate -sentence-transformers==3.3.1 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-hipblas.txt b/backend/python/sentencetransformers/requirements-hipblas.txt deleted file mode 100644 index b472d371..00000000 --- a/backend/python/sentencetransformers/requirements-hipblas.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch==2.4.1+rocm6.0 -accelerate -sentence-transformers==3.3.1 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt deleted file mode 100644 index e9b72aab..00000000 --- a/backend/python/sentencetransformers/requirements-intel.txt +++ /dev/null @@ -1,9 +0,0 @@ ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch==2.3.110+xpu -torch==2.3.1+cxx11.abi -oneccl_bind_pt==2.3.100+xpu -optimum[openvino] -setuptools -accelerate -sentence-transformers==3.3.1 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt deleted file mode 100644 index 6e03c63f..00000000 --- a/backend/python/sentencetransformers/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -grpcio==1.69.0 -protobuf -certifi -datasets -einops \ No newline at end of file diff --git a/backend/python/sentencetransformers/run.sh b/backend/python/sentencetransformers/run.sh deleted file mode 100755 index 375c07e5..00000000 --- a/backend/python/sentencetransformers/run.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -source $(dirname $0)/../common/libbackend.sh - -startBackend $@ \ No newline at end of file diff --git a/backend/python/sentencetransformers/test.py b/backend/python/sentencetransformers/test.py deleted file mode 100644 index 9df52b14..00000000 --- a/backend/python/sentencetransformers/test.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -A test script to test the gRPC service -""" -import unittest -import subprocess -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - - -class TestBackendServicer(unittest.TestCase): - """ - TestBackendServicer is the class that tests the gRPC service - """ - def setUp(self): - """ - This method sets up the gRPC service by starting the server - """ - self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) - - def tearDown(self) -> None: - """ - This method tears down the gRPC service by terminating the server - """ - self.service.kill() - self.service.wait() - - def test_server_startup(self): - """ - This method tests if the server starts up successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.Health(backend_pb2.HealthMessage()) - self.assertEqual(response.message, b'OK') - except Exception as err: - print(err) - self.fail("Server failed to start") - finally: - self.tearDown() - - def test_load_model(self): - """ - This method tests if the model is loaded successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens")) - self.assertTrue(response.success) - self.assertEqual(response.message, "Model loaded successfully") - except Exception as err: - print(err) - self.fail("LoadModel service failed") - finally: - self.tearDown() - - def test_embedding(self): - """ - This method tests if the embeddings are generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens")) - self.assertTrue(response.success) - embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.") - embedding_response = stub.Embedding(embedding_request) - self.assertIsNotNone(embedding_response.embeddings) - except Exception as err: - print(err) - self.fail("Embedding service failed") - finally: - self.tearDown() \ No newline at end of file diff --git a/backend/python/sentencetransformers/test.sh b/backend/python/sentencetransformers/test.sh deleted file mode 100755 index 6940b066..00000000 --- a/backend/python/sentencetransformers/test.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -runUnittests diff --git a/backend/python/transformers/backend.py b/backend/python/transformers/backend.py index 27257934..9b65c6db 100644 --- a/backend/python/transformers/backend.py +++ b/backend/python/transformers/backend.py @@ -25,6 +25,8 @@ from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreame from transformers import AutoProcessor, MusicgenForConditionalGeneration from scipy.io import wavfile import outetts +from sentence_transformers import SentenceTransformer + _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -88,10 +90,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.CUDA = torch.cuda.is_available() self.OV=False self.OuteTTS=False + self.SentenceTransformer = False device_map="cpu" quantization = None + autoTokenizer = True if self.CUDA: from transformers import BitsAndBytesConfig, AutoModelForCausalLM @@ -195,9 +199,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): device=device_map) self.OV = True elif request.Type == "MusicgenForConditionalGeneration": + autoTokenizer = False self.processor = AutoProcessor.from_pretrained(model_name) self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) elif request.Type == "OuteTTS": + autoTokenizer = False options = request.Options MODELNAME = "OuteAI/OuteTTS-0.3-1B" TOKENIZER = "OuteAI/OuteTTS-0.3-1B" @@ -235,6 +241,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.speaker = self.interface.create_speaker(audio_path=self.AudioPath) else: self.speaker = self.interface.load_default_speaker(name=SPEAKER) + elif request.Type == "SentenceTransformer": + autoTokenizer = False + self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode) + self.SentenceTransformer = True else: print("Automodel", file=sys.stderr) self.model = AutoModel.from_pretrained(model_name, @@ -250,7 +260,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): else: self.max_tokens = 512 - if request.Type != "MusicgenForConditionalGeneration": + if autoTokenizer: self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True) self.XPU = False @@ -286,18 +296,26 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): max_length = 512 if request.Tokens != 0: max_length = request.Tokens - encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt") - # Create word embeddings - if self.CUDA: - encoded_input = encoded_input.to("cuda") + embeds = None - with torch.no_grad(): - model_output = self.model(**encoded_input) + if self.SentenceTransformer: + print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr) + embeds = self.model.encode(request.Embeddings) + else: + encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt") - # Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence - sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) - return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0]) + # Create word embeddings + if self.CUDA: + encoded_input = encoded_input.to("cuda") + + with torch.no_grad(): + model_output = self.model(**encoded_input) + + # Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence + sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) + embeds = sentence_embeddings[0] + return backend_pb2.EmbeddingResult(embeddings=embeds) async def _predict(self, request, context, streaming=False): set_seed(request.Seed) diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt index 56b77325..421c4b80 100644 --- a/backend/python/transformers/requirements-cpu.txt +++ b/backend/python/transformers/requirements-cpu.txt @@ -3,4 +3,5 @@ llvmlite==0.43.0 accelerate transformers bitsandbytes -outetts \ No newline at end of file +outetts +sentence-transformers==3.3.1 diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt index 924b0086..c5d18d09 100644 --- a/backend/python/transformers/requirements-cublas11.txt +++ b/backend/python/transformers/requirements-cublas11.txt @@ -4,4 +4,5 @@ llvmlite==0.43.0 accelerate transformers bitsandbytes -outetts \ No newline at end of file +outetts +sentence-transformers==3.3.1 diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt index 0feb3d81..c0bcfc87 100644 --- a/backend/python/transformers/requirements-cublas12.txt +++ b/backend/python/transformers/requirements-cublas12.txt @@ -3,4 +3,5 @@ accelerate llvmlite==0.43.0 transformers bitsandbytes -outetts \ No newline at end of file +outetts +sentence-transformers==3.3.1 diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt index fa65fb8e..e7f53860 100644 --- a/backend/python/transformers/requirements-hipblas.txt +++ b/backend/python/transformers/requirements-hipblas.txt @@ -4,4 +4,6 @@ accelerate transformers llvmlite==0.43.0 bitsandbytes -outetts \ No newline at end of file +outetts +bitsandbytes +sentence-transformers==3.3.1 diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt index 4a295599..aada6e00 100644 --- a/backend/python/transformers/requirements-intel.txt +++ b/backend/python/transformers/requirements-intel.txt @@ -6,4 +6,5 @@ optimum[openvino] llvmlite==0.43.0 intel-extension-for-transformers bitsandbytes -outetts \ No newline at end of file +outetts +sentence-transformers==3.3.1 diff --git a/backend/python/transformers/test.py b/backend/python/transformers/test.py index 305b0a93..14efa6a7 100644 --- a/backend/python/transformers/test.py +++ b/backend/python/transformers/test.py @@ -133,5 +133,41 @@ class TestBackendServicer(unittest.TestCase): except Exception as err: print(err) self.fail("SoundGeneration service failed") + finally: + self.tearDown() + + def test_embed_load_model(self): + """ + This method tests if the model is loaded successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer")) + self.assertTrue(response.success) + self.assertEqual(response.message, "Model loaded successfully") + except Exception as err: + print(err) + self.fail("LoadModel service failed") + finally: + self.tearDown() + + def test_sentencetransformers_embedding(self): + """ + This method tests if the embeddings are generated successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer")) + self.assertTrue(response.success) + embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.") + embedding_response = stub.Embedding(embedding_request) + self.assertIsNotNone(embedding_response.embeddings) + except Exception as err: + print(err) + self.fail("Embedding service failed") finally: self.tearDown() \ No newline at end of file diff --git a/core/http/app_test.go b/core/http/app_test.go index 6bf1806b..a2e2f758 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -822,7 +822,7 @@ var _ = Describe("API test", func() { application, err := application.New( append(commonOpts, - config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")), + config.WithExternalBackend("transformers", os.Getenv("HUGGINGFACE_GRPC")), config.WithContext(c), config.WithModelPath(modelPath), )...) diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index f4675050..eb3e4fdf 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -22,11 +22,19 @@ import ( ) var Aliases map[string]string = map[string]string{ - "go-llama": LLamaCPP, - "llama": LLamaCPP, - "embedded-store": LocalStoreBackend, - "langchain-huggingface": LCHuggingFaceBackend, - "transformers-musicgen": TransformersBackend, + "go-llama": LLamaCPP, + "llama": LLamaCPP, + "embedded-store": LocalStoreBackend, + "huggingface-embeddings": TransformersBackend, + "langchain-huggingface": LCHuggingFaceBackend, + "transformers-musicgen": TransformersBackend, + "sentencetransformers": TransformersBackend, +} + +var TypeAlias map[string]string = map[string]string{ + "sentencetransformers": "SentenceTransformer", + "huggingface-embeddings": "SentenceTransformer", + "transformers-musicgen": "MusicgenForConditionalGeneration", } var AutoDetect = os.Getenv("DISABLE_AUTODETECT") != "true" @@ -396,6 +404,7 @@ func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) fu } log.Debug().Msgf("Wait for the service to start up") + log.Debug().Msgf("Options: %+v", o.gRPCOptions) // Wait for the service to start up ready := false @@ -460,8 +469,15 @@ func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err e backend := strings.ToLower(o.backendString) if realBackend, exists := Aliases[backend]; exists { + typeAlias, exists := TypeAlias[backend] + if exists { + log.Debug().Msgf("'%s' is a type alias of '%s' (%s)", backend, realBackend, typeAlias) + o.gRPCOptions.Type = typeAlias + } else { + log.Debug().Msgf("'%s' is an alias of '%s'", backend, realBackend) + } + backend = realBackend - log.Debug().Msgf("%s is an alias of %s", backend, realBackend) } ml.stopActiveBackends(o.modelID, o.singleActiveBackend) diff --git a/tests/models_fixtures/grpc.yaml b/tests/models_fixtures/grpc.yaml index 31c406ab..8c519920 100644 --- a/tests/models_fixtures/grpc.yaml +++ b/tests/models_fixtures/grpc.yaml @@ -1,5 +1,5 @@ name: code-search-ada-code-001 -backend: huggingface +backend: sentencetransformers embeddings: true parameters: model: all-MiniLM-L6-v2 \ No newline at end of file From 032a33de49b3dbe2c3acfd684b6855a7ce0e36f7 Mon Sep 17 00:00:00 2001 From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com> Date: Sat, 18 Jan 2025 18:35:30 +0100 Subject: [PATCH 051/679] chore: remove deprecated tinydream backend (#4631) Signed-off-by: Gianluca Boiano --- Makefile | 34 +---------------- backend/go/image/tinydream/main.go | 21 ----------- backend/go/image/tinydream/tinydream.go | 32 ---------------- core/config/backend_config.go | 2 +- core/http/endpoints/openai/image.go | 2 - docs/content/docs/getting-started/build.md | 4 +- .../docs/getting-started/container-images.md | 2 +- .../docs/reference/compatibility-table.md | 2 +- gallery/index.yaml | 9 ----- gallery/tinydream.yaml | 37 ------------------- go.mod | 1 - go.sum | 2 - pkg/model/initializers.go | 1 - pkg/tinydream/generate.go | 36 ------------------ pkg/tinydream/generate_unsupported.go | 10 ----- pkg/tinydream/tinydream.go | 20 ---------- 16 files changed, 6 insertions(+), 209 deletions(-) delete mode 100644 backend/go/image/tinydream/main.go delete mode 100644 backend/go/image/tinydream/tinydream.go delete mode 100644 gallery/tinydream.yaml delete mode 100644 pkg/tinydream/generate.go delete mode 100644 pkg/tinydream/generate_unsupported.go delete mode 100644 pkg/tinydream/tinydream.go diff --git a/Makefile b/Makefile index faa82d6b..944cad37 100644 --- a/Makefile +++ b/Makefile @@ -22,10 +22,6 @@ PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0 STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f -# tinydream version -TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream -TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057 - # bark.cpp BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git BARKCPP_VERSION?=v1.0.0 @@ -188,11 +184,6 @@ ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion) OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion endif -ifeq ($(findstring tinydream,$(GO_TAGS)),tinydream) -# OPTIONAL_TARGETS+=go-tiny-dream/libtinydream.a - OPTIONAL_GRPC+=backend-assets/grpc/tinydream -endif - ifeq ($(findstring tts,$(GO_TAGS)),tts) # OPTIONAL_TARGETS+=go-piper/libpiper_binding.a # OPTIONAL_TARGETS+=backend-assets/espeak-ng-data @@ -327,19 +318,6 @@ else mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1 endif -## tiny-dream -sources/go-tiny-dream: - mkdir -p sources/go-tiny-dream - cd sources/go-tiny-dream && \ - git init && \ - git remote add origin $(TINYDREAM_REPO) && \ - git fetch origin && \ - git checkout $(TINYDREAM_VERSION) && \ - git submodule update --init --recursive --depth 1 --single-branch - -sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream - $(MAKE) -C sources/go-tiny-dream libtinydream.a - ## whisper sources/whisper.cpp: mkdir -p sources/whisper.cpp @@ -353,12 +331,11 @@ sources/whisper.cpp: sources/whisper.cpp/libwhisper.a: sources/whisper.cpp cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a -get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp +get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion backend/cpp/llama/llama.cpp replace: $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go - $(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream $(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp @@ -366,7 +343,6 @@ replace: dropreplace: $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go - $(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream $(GOCMD) mod edit -dropreplace github.com/mudler/go-piper $(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion $(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp @@ -381,7 +357,6 @@ rebuild: ## Rebuilds the project $(MAKE) -C sources/whisper.cpp clean $(MAKE) -C sources/go-stable-diffusion clean $(MAKE) -C sources/go-piper clean - $(MAKE) -C sources/go-tiny-dream clean $(MAKE) build prepare: prepare-sources $(OPTIONAL_TARGETS) @@ -855,13 +830,6 @@ ifneq ($(UPX),) $(UPX) backend-assets/grpc/silero-vad endif -backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc - CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream -ifneq ($(UPX),) - $(UPX) backend-assets/grpc/tinydream -endif - backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper diff --git a/backend/go/image/tinydream/main.go b/backend/go/image/tinydream/main.go deleted file mode 100644 index ae259fa7..00000000 --- a/backend/go/image/tinydream/main.go +++ /dev/null @@ -1,21 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - grpc "github.com/mudler/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &Image{}); err != nil { - panic(err) - } -} diff --git a/backend/go/image/tinydream/tinydream.go b/backend/go/image/tinydream/tinydream.go deleted file mode 100644 index ad364c47..00000000 --- a/backend/go/image/tinydream/tinydream.go +++ /dev/null @@ -1,32 +0,0 @@ -package main - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "github.com/mudler/LocalAI/pkg/grpc/base" - pb "github.com/mudler/LocalAI/pkg/grpc/proto" - "github.com/mudler/LocalAI/pkg/tinydream" -) - -type Image struct { - base.SingleThread - tinydream *tinydream.TinyDream -} - -func (image *Image) Load(opts *pb.ModelOptions) error { - var err error - // Note: the Model here is a path to a directory containing the model files - image.tinydream, err = tinydream.New(opts.ModelFile) - return err -} - -func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error { - return image.tinydream.GenerateImage( - int(opts.Height), - int(opts.Width), - int(opts.Step), - int(opts.Seed), - opts.PositivePrompt, - opts.NegativePrompt, - opts.Dst) -} diff --git a/core/config/backend_config.go b/core/config/backend_config.go index bb2fa643..a488f2a0 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -515,7 +515,7 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool { } } if (u & FLAG_IMAGE) == FLAG_IMAGE { - imageBackends := []string{"diffusers", "tinydream", "stablediffusion"} + imageBackends := []string{"diffusers", "stablediffusion"} if !slices.Contains(imageBackends, c.Backend) { return false } diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go index 3fdb64d4..baaecd4e 100644 --- a/core/http/endpoints/openai/image.go +++ b/core/http/endpoints/openai/image.go @@ -130,8 +130,6 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon switch config.Backend { case "stablediffusion": config.Backend = model.StableDiffusionBackend - case "tinydream": - config.Backend = model.TinyDreamBackend case "": config.Backend = model.StableDiffusionBackend } diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md index f21a5b48..9fff1989 100644 --- a/docs/content/docs/getting-started/build.md +++ b/docs/content/docs/getting-started/build.md @@ -88,7 +88,7 @@ Here is the list of the variables available that can be used to customize the bu | Variable | Default | Description | | ---------------------| ------- | ----------- | | `BUILD_TYPE` | None | Build type. Available: `cublas`, `openblas`, `clblas`, `metal`,`hipblas`, `sycl_f16`, `sycl_f32` | -| `GO_TAGS` | `tts stablediffusion` | Go tags. Available: `stablediffusion`, `tts`, `tinydream` | +| `GO_TAGS` | `tts stablediffusion` | Go tags. Available: `stablediffusion`, `tts` | | `CLBLAST_DIR` | | Specify a CLBlast directory | | `CUDA_LIBPATH` | | Specify a CUDA library path | | `BUILD_API_ONLY` | false | Set to true to build only the API (no backends will be built) | @@ -202,7 +202,7 @@ make build **Requirements**: OpenCV, Gomp -Image generation requires `GO_TAGS=stablediffusion` or `GO_TAGS=tinydream` to be set during build: +Image generation requires `GO_TAGS=stablediffusion` to be set during build: ``` make GO_TAGS=stablediffusion build diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/docs/getting-started/container-images.md index 967fc28b..64f6dbc9 100644 --- a/docs/content/docs/getting-started/container-images.md +++ b/docs/content/docs/getting-started/container-images.md @@ -16,7 +16,7 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA **Available Images Types**: -- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn`, `tinydream` or `rwkv` backends - if you are not sure which one to use, do **not** use these images. +- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn` or `rwkv` backends - if you are not sure which one to use, do **not** use these images. - Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration. - FFMpeg is **not** included in the default images due to [its licensing](https://www.ffmpeg.org/legal.html). If you need FFMpeg, use the images ending with `-ffmpeg`. Note that `ffmpeg` is needed in case of using `audio-to-text` LocalAI's features. - If using old and outdated CPUs and no GPUs you might need to set `REBUILD` to `true` as environment variable along with options to disable the flags which your CPU does not support, however note that inference will perform poorly and slow. See also [flagset compatibility]({{%relref "docs/getting-started/build#cpu-flagset-compatibility" %}}). diff --git a/docs/content/docs/reference/compatibility-table.md b/docs/content/docs/reference/compatibility-table.md index 7056f4a5..d2f4d8ac 100644 --- a/docs/content/docs/reference/compatibility-table.md +++ b/docs/content/docs/reference/compatibility-table.md @@ -32,7 +32,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi | `mamba` | Mamba models architecture | yes | GPT | no | no | CPU/CUDA | | `exllama2` | GPTQ | yes | GPT only | no | no | N/A | | `transformers-musicgen` | | no | Audio generation | no | no | N/A | -| [tinydream](https://github.com/symisc/tiny-dream#tiny-dreaman-embedded-header-only-stable-diffusion-inference-c-librarypixlabiotiny-dream) | stablediffusion | no | Image | no | no | N/A | +| stablediffusion | no | Image | no | no | N/A | | `coqui` | Coqui | no | Audio generation and Voice cloning | no | no | CPU/CUDA | | `openvoice` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA | | `parler-tts` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA | diff --git a/gallery/index.yaml b/gallery/index.yaml index 349cd419..35fac331 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -11187,15 +11187,6 @@ description: | Stable Diffusion in NCNN with c++, supported txt2img and img2img name: stablediffusion-cpp -## Tiny Dream -- url: github:mudler/LocalAI/gallery/tinydream.yaml@master - name: tinydream - license: "BSD-3" - urls: - - https://github.com/symisc/tiny-dream - - https://github.com/symisc/tiny-dream/blob/main/LICENSE - description: | - An embedded, Header Only, Stable Diffusion C++ implementation - &piper ## Piper TTS url: github:mudler/LocalAI/gallery/piper.yaml@master diff --git a/gallery/tinydream.yaml b/gallery/tinydream.yaml deleted file mode 100644 index e4a79ad7..00000000 --- a/gallery/tinydream.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -name: "tinydream" - -config_file: | - name: tinydream - backend: tinydream - parameters: - model: tinydream_assets - -files: - - filename: "tinydream_assets/AutoencoderKL-fp16.bin" - sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.bin" - - filename: "tinydream_assets/AutoencoderKL-fp16.param" - sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.param" - - filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.bin" - sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.bin" - - filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.param" - sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.param" - - filename: "tinydream_assets/RealESRGAN_x4plus_anime.bin" - sha256: "fe01c269cfd10cdef8e018ab66ebe750cf79c7af4d1f9c16c737e1295229bacc" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.bin" - - filename: "tinydream_assets/RealESRGAN_x4plus_anime.param" - sha256: "2b8fb6e0ae4d2d85704ca08c119a2f5ea40add4f2ecd512eb7f4cd44b6127ed4" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.param" - - filename: "tinydream_assets/UNetModel-fp16.bin" - sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.bin" - - filename: "tinydream_assets/UNetModel-fp16.param" - sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.param" - - filename: "tinydream_assets/vocab.txt" - sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/vocab.txt" diff --git a/go.mod b/go.mod index 8aecf14d..adfa7357 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,6 @@ toolchain go1.23.1 require ( dario.cat/mergo v1.0.1 - github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9 github.com/Masterminds/sprig/v3 v3.3.0 github.com/alecthomas/kong v0.9.0 github.com/census-instrumentation/opencensus-proto v0.4.1 diff --git a/go.sum b/go.sum index a1a487b2..4a744ed8 100644 --- a/go.sum +++ b/go.sum @@ -27,8 +27,6 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03 github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= -github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9 h1:ASsbvw7wQPldWpwKdmYRszJ2A8Cj3oJDr4zO0DiXvN4= -github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0= diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index eb3e4fdf..756deea7 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -56,7 +56,6 @@ const ( WhisperBackend = "whisper" StableDiffusionBackend = "stablediffusion" - TinyDreamBackend = "tinydream" PiperBackend = "piper" LCHuggingFaceBackend = "huggingface" diff --git a/pkg/tinydream/generate.go b/pkg/tinydream/generate.go deleted file mode 100644 index cfcd23cc..00000000 --- a/pkg/tinydream/generate.go +++ /dev/null @@ -1,36 +0,0 @@ -//go:build tinydream -// +build tinydream - -package tinydream - -import ( - "fmt" - "path/filepath" - - tinyDream "github.com/M0Rf30/go-tiny-dream" -) - -func GenerateImage(height, width, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error { - fmt.Println(dst) - if height > 512 || width > 512 { - return tinyDream.GenerateImage( - 1, - step, - seed, - positive_prompt, - negative_prompt, - filepath.Dir(dst), - asset_dir, - ) - } - - return tinyDream.GenerateImage( - 0, - step, - seed, - positive_prompt, - negative_prompt, - filepath.Dir(dst), - asset_dir, - ) -} diff --git a/pkg/tinydream/generate_unsupported.go b/pkg/tinydream/generate_unsupported.go deleted file mode 100644 index 4ffd421a..00000000 --- a/pkg/tinydream/generate_unsupported.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build !tinydream -// +build !tinydream - -package tinydream - -import "fmt" - -func GenerateImage(height, width, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error { - return fmt.Errorf("This version of LocalAI was built without the tinytts tag") -} diff --git a/pkg/tinydream/tinydream.go b/pkg/tinydream/tinydream.go deleted file mode 100644 index a316e641..00000000 --- a/pkg/tinydream/tinydream.go +++ /dev/null @@ -1,20 +0,0 @@ -package tinydream - -import "os" - -type TinyDream struct { - assetDir string -} - -func New(assetDir string) (*TinyDream, error) { - if _, err := os.Stat(assetDir); err != nil { - return nil, err - } - return &TinyDream{ - assetDir: assetDir, - }, nil -} - -func (td *TinyDream) GenerateImage(height, width, step, seed int, positive_prompt, negative_prompt, dst string) error { - return GenerateImage(height, width, step, seed, positive_prompt, negative_prompt, dst, td.assetDir) -} From d0cc3047dc424a9731f8c74b37aa3e45a58ce14a Mon Sep 17 00:00:00 2001 From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com> Date: Sat, 18 Jan 2025 18:36:05 +0100 Subject: [PATCH 052/679] chore(model gallery): add MiniCPM-V-2.6-8b-q4_K_M (#4633) Signed-off-by: Gianluca Boiano --- gallery/index.yaml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 35fac331..edd52725 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -9187,6 +9187,7 @@ uri: huggingface://xtuner/llava-llama-3-8b-v1_1-gguf/llava-llama-3-8b-v1_1-mmproj-f16.gguf - !!merge <<: *llama3 name: "minicpm-llama3-v-2_5" + icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png urls: - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5 @@ -9210,6 +9211,33 @@ - filename: minicpm-llama3-mmproj-f16.gguf sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf +- !!merge <<: *llama3 + name: "minicpm-v-2_6" + license: apache-2.0 + icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png + urls: + - https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf + - https://huggingface.co/openbmb/MiniCPM-V-2_6 + description: | + MiniCPM-V 2.6 is the latest and most capable model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters + tags: + - llm + - multimodal + - gguf + - gpu + - llama3 + - cpu + overrides: + mmproj: minicpm-v-2_6-mmproj-f16.gguf + parameters: + model: minicpm-v-2_6-Q4_K_M.gguf + files: + - filename: minicpm-v-2_6-Q4_K_M.gguf + sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1 + uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf + - filename: minicpm-v-2_6-mmproj-f16.gguf + sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0 + uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf - !!merge <<: *llama3 name: "llama-3-cursedstock-v1.8-8b-iq-imatrix" urls: From 296b97925fab0246184ac582621045565ce9a075 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 18 Jan 2025 23:21:27 +0100 Subject: [PATCH 053/679] chore: :arrow_up: Update leejet/stable-diffusion.cpp to `5eb15ef4d022bef4a391de4f5f6556e81fbb5024` (#4636) :arrow_up: Update leejet/stable-diffusion.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 944cad37..fc4eddf4 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ BARKCPP_VERSION?=v1.0.0 # stablediffusion.cpp (ggml) STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp -STABLEDIFFUSION_GGML_VERSION?=dcf91f9e0f2cbf9da472ee2a556751ed4bab2d2a +STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024 ONNX_VERSION?=1.20.0 ONNX_ARCH?=x64 From a752183fb58de465daa35688c93fbe7d4ed324e9 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 19 Jan 2025 08:38:33 +0100 Subject: [PATCH 054/679] chore: :arrow_up: Update ggerganov/llama.cpp to `a1649cc13f89946322358f92ea268ae1b7b5096c` (#4635) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fc4eddf4..dfa91a15 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6 +CPPLLAMA_VERSION?=a1649cc13f89946322358f92ea268ae1b7b5096c # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From f496d0113b722847aaf4775394ccfd814255fef9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 19 Jan 2025 09:07:56 +0100 Subject: [PATCH 055/679] chore(deps): pin numba Signed-off-by: Ettore Di Giacinto --- backend/python/transformers/requirements-cpu.txt | 3 ++- backend/python/transformers/requirements-cublas11.txt | 1 + backend/python/transformers/requirements-cublas12.txt | 1 + backend/python/transformers/requirements-hipblas.txt | 1 + backend/python/transformers/requirements-intel.txt | 1 + backend/python/transformers/requirements.txt | 3 +-- 6 files changed, 7 insertions(+), 3 deletions(-) diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt index 421c4b80..c88508e3 100644 --- a/backend/python/transformers/requirements-cpu.txt +++ b/backend/python/transformers/requirements-cpu.txt @@ -1,7 +1,8 @@ torch==2.4.1 llvmlite==0.43.0 +numba==0.60.0 accelerate transformers bitsandbytes outetts -sentence-transformers==3.3.1 +sentence-transformers==3.3.1 \ No newline at end of file diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt index c5d18d09..0faa9cec 100644 --- a/backend/python/transformers/requirements-cublas11.txt +++ b/backend/python/transformers/requirements-cublas11.txt @@ -1,6 +1,7 @@ --extra-index-url https://download.pytorch.org/whl/cu118 torch==2.4.1+cu118 llvmlite==0.43.0 +numba==0.60.0 accelerate transformers bitsandbytes diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt index c0bcfc87..1e22312f 100644 --- a/backend/python/transformers/requirements-cublas12.txt +++ b/backend/python/transformers/requirements-cublas12.txt @@ -1,6 +1,7 @@ torch==2.4.1 accelerate llvmlite==0.43.0 +numba==0.60.0 transformers bitsandbytes outetts diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt index e7f53860..47aa88db 100644 --- a/backend/python/transformers/requirements-hipblas.txt +++ b/backend/python/transformers/requirements-hipblas.txt @@ -3,6 +3,7 @@ torch==2.4.1+rocm6.0 accelerate transformers llvmlite==0.43.0 +numba==0.60.0 bitsandbytes outetts bitsandbytes diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt index aada6e00..708b0516 100644 --- a/backend/python/transformers/requirements-intel.txt +++ b/backend/python/transformers/requirements-intel.txt @@ -4,6 +4,7 @@ torch==2.3.1+cxx11.abi oneccl_bind_pt==2.3.100+xpu optimum[openvino] llvmlite==0.43.0 +numba==0.60.0 intel-extension-for-transformers bitsandbytes outetts diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt index d353e4d0..db41b928 100644 --- a/backend/python/transformers/requirements.txt +++ b/backend/python/transformers/requirements.txt @@ -3,5 +3,4 @@ protobuf certifi setuptools scipy==1.15.1 -numpy>=2.0.0 -numba==0.60.0 \ No newline at end of file +numpy>=2.0.0 \ No newline at end of file From 83e2dd5dff7b36d8cc9528d63ed0468145ef79df Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 19 Jan 2025 23:34:32 +0100 Subject: [PATCH 056/679] chore: :arrow_up: Update ggerganov/llama.cpp to `92bc493917d43b83e592349e138b54c90b1c3ea7` (#4640) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dfa91a15..7aaad492 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=a1649cc13f89946322358f92ea268ae1b7b5096c +CPPLLAMA_VERSION?=92bc493917d43b83e592349e138b54c90b1c3ea7 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 30739d94a41139fe5c8cf68239cc7353d102c4fe Mon Sep 17 00:00:00 2001 From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com> Date: Mon, 20 Jan 2025 10:34:19 +0100 Subject: [PATCH 057/679] chore(model gallery): add InternLM3-8b-Q4_K_M (#4637) chore(model gallery): add InternLM3-8b-Q4_K_M Signed-off-by: Gianluca Boiano --- gallery/index.yaml | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index edd52725..61ecd107 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -10100,7 +10100,7 @@ urls: - https://huggingface.co/internlm/internlm2_5-7b-chat-1m - https://huggingface.co/bartowski/internlm2_5-7b-chat-1m-GGUF - icon: https://github.com/InternLM/InternLM/assets/22529082/b9788105-8892-4398-8b47-b513a292378e + icon: https://avatars.githubusercontent.com/u/135356492 tags: - internlm2 - gguf @@ -10121,6 +10121,31 @@ - filename: internlm2_5-7b-chat-1m-Q4_K_M.gguf uri: huggingface://bartowski/internlm2_5-7b-chat-1m-GGUF/internlm2_5-7b-chat-1m-Q4_K_M.gguf sha256: 10d5e18a4125f9d4d74a9284a21e0c820b150af06dee48665e54ff6e1be3a564 +### Internlm3 +- name: "internlm3-8b-instruct" + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + urls: + - https://huggingface.co/internlm/internlm3-8b-instruct + - https://huggingface.co/bartowski/internlm3-8b-instruct-GGUF + icon: https://avatars.githubusercontent.com/u/135356492 + tags: + - internlm3 + - gguf + - cpu + - gpu + description: | + InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning. The model has the following characteristics: + + Enhanced performance at reduced cost: State-of-the-art performance on reasoning and knowledge-intensive tasks surpass models like Llama3.1-8B and Qwen2.5-7B. + + Deep thinking capability: InternLM3 supports both the deep thinking mode for solving complicated reasoning tasks via the long chain-of-thought and the normal response mode for fluent user interactions. + overrides: + parameters: + model: internlm3-8b-instruct-Q4_K_M.gguf + files: + - filename: internlm3-8b-instruct-Q4_K_M.gguf + uri: huggingface://bartowski/internlm3-8b-instruct-GGUF/internlm3-8b-instruct-Q4_K_M.gguf + sha256: 2a9644687318e8659c9cf9b40730d5cc2f5af06f786a50439c7c51359b23896e - &phi-3 ### START Phi-3 url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" From 390bb3f58bb5d878c852c71e473ae0754a8d817d Mon Sep 17 00:00:00 2001 From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com> Date: Mon, 20 Jan 2025 10:35:05 +0100 Subject: [PATCH 058/679] fix(model gallery): minicpm-v-2.6 is based on qwen2 (#4638) Signed-off-by: Gianluca Boiano --- gallery/index.yaml | 54 +++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 61ecd107..1c170f99 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -5583,6 +5583,33 @@ - filename: marco-o1-uncensored.Q4_K_M.gguf sha256: ad0440270a7254098f90779744d3e5b34fe49b7baf97c819909ba9c5648cc0d9 uri: huggingface://QuantFactory/marco-o1-uncensored-GGUF/marco-o1-uncensored.Q4_K_M.gguf +- !!merge <<: *qwen2 + name: "minicpm-v-2_6" + license: apache-2.0 + icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png + urls: + - https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf + - https://huggingface.co/openbmb/MiniCPM-V-2_6 + description: | + MiniCPM-V 2.6 is the latest and most capable model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters + tags: + - llm + - multimodal + - gguf + - gpu + - qwen2 + - cpu + overrides: + mmproj: minicpm-v-2_6-mmproj-f16.gguf + parameters: + model: minicpm-v-2_6-Q4_K_M.gguf + files: + - filename: minicpm-v-2_6-Q4_K_M.gguf + sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1 + uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf + - filename: minicpm-v-2_6-mmproj-f16.gguf + sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0 + uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf - &mistral03 ## START Mistral url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master" @@ -9211,33 +9238,6 @@ - filename: minicpm-llama3-mmproj-f16.gguf sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf -- !!merge <<: *llama3 - name: "minicpm-v-2_6" - license: apache-2.0 - icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png - urls: - - https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf - - https://huggingface.co/openbmb/MiniCPM-V-2_6 - description: | - MiniCPM-V 2.6 is the latest and most capable model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters - tags: - - llm - - multimodal - - gguf - - gpu - - llama3 - - cpu - overrides: - mmproj: minicpm-v-2_6-mmproj-f16.gguf - parameters: - model: minicpm-v-2_6-Q4_K_M.gguf - files: - - filename: minicpm-v-2_6-Q4_K_M.gguf - sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1 - uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf - - filename: minicpm-v-2_6-mmproj-f16.gguf - sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0 - uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf - !!merge <<: *llama3 name: "llama-3-cursedstock-v1.8-8b-iq-imatrix" urls: From 0c0e015b3893816a984f59cd5a6cfb25f5cf90c1 Mon Sep 17 00:00:00 2001 From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com> Date: Mon, 20 Jan 2025 10:40:46 +0100 Subject: [PATCH 059/679] chore(model gallery): update icons and add missing ones (#4639) * chore(model gallery): uniform github URLs for icons Signed-off-by: Gianluca Boiano * chore(model gallery): add icons to phi models Signed-off-by: Gianluca Boiano * chore(model gallery): add icons to QwenLM models Signed-off-by: Gianluca Boiano * chore(model gallery): update icon for Arcee org Signed-off-by: Gianluca Boiano * chore(model gallery): update icon for Meta org Signed-off-by: Gianluca Boiano * chore(model gallery): update icon url for OpenCoder org Signed-off-by: Gianluca Boiano * chore(model gallery): add icon for RWKV org Signed-off-by: Gianluca Boiano * chore(model gallery): add icon for IBM-granite org Signed-off-by: Gianluca Boiano * chore(model gallery): add icon for OpenBMB org Signed-off-by: Gianluca Boiano * chore(model gallery): add icon for KatanemoLabs org Signed-off-by: Gianluca Boiano * chore(model gallery): update icon for Meta-Llama-3.1-8B-Instruct-abliterated Signed-off-by: Gianluca Boiano * chore(model gallery): update icon for hermes-3-llama-3.1-8b-lorablated Signed-off-by: Gianluca Boiano * chore(model gallery): add icon for Google org Signed-off-by: Gianluca Boiano --------- Signed-off-by: Gianluca Boiano Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- gallery/index.yaml | 53 +++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 1c170f99..fb5476f9 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2,6 +2,7 @@ - &phi4 url: "github:mudler/LocalAI/gallery/phi-4-chat.yaml@master" name: "phi-4" + icon: https://avatars.githubusercontent.com/u/6154722 license: mit tags: - llm @@ -224,7 +225,7 @@ uri: huggingface://bartowski/INTELLECT-1-Instruct-GGUF/INTELLECT-1-Instruct-Q4_K_M.gguf - &llama33 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png + icon: https://avatars.githubusercontent.com/u/153379578 license: llama3.3 description: | The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks. @@ -421,6 +422,7 @@ - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" + icon: https://avatars.githubusercontent.com/u/132652788 license: apache-2.0 urls: - https://huggingface.co/RWKV/rwkv-6-world-7b @@ -443,6 +445,7 @@ uri: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf - &qwen25coder name: "qwen2.5-coder-14b" + icon: https://avatars.githubusercontent.com/u/141221163 url: "github:mudler/LocalAI/gallery/chatml.yaml@master" license: apache-2.0 tags: @@ -628,7 +631,7 @@ uri: huggingface://mraWdermacher/Qwen2.5-Coder-32B-Instruct-Uncensored-i1-GGUF/Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf - &opencoder name: "opencoder-8b-base" - icon: https://github.com/OpenCoder-llm/opencoder-llm.github.io/blob/main/static/images/opencoder_icon.jpg?raw=true + icon: https://avatars.githubusercontent.com/u/186387526 url: "github:mudler/LocalAI/gallery/codellama.yaml@master" urls: - https://huggingface.co/infly/OpenCoder-8B-Base @@ -694,6 +697,7 @@ uri: huggingface://QuantFactory/OpenCoder-1.5B-Instruct-GGUF/OpenCoder-1.5B-Instruct.Q4_K_M.gguf - &granite3 name: "granite-3.0-1b-a400m-instruct" + icon: https://avatars.githubusercontent.com/u/167822367 urls: - https://huggingface.co/ibm-granite/granite-3.0-1b-a400m-instruct - https://huggingface.co/QuantFactory/granite-3.0-1b-a400m-instruct-GGUF @@ -781,7 +785,7 @@ - &llama32 ## llama3.2 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png + icon: https://avatars.githubusercontent.com/u/153379578 license: llama3.2 description: | The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks. @@ -950,7 +954,6 @@ uri: huggingface://mradermacher/Llama-3.2-3B-Reasoning-Time-GGUF/Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf - !!merge <<: *llama32 name: "llama-3.2-sun-2.5b-chat" - icon: https://i.ibb.co/PF0TdMJ/imagine-image-9a56cee7-0f4f-4cc2-b265-a5b8d04f266b.png urls: - https://huggingface.co/meditsolutions/Llama-3.2-SUN-2.5B-chat - https://huggingface.co/mradermacher/Llama-3.2-SUN-2.5B-chat-GGUF @@ -982,7 +985,6 @@ uri: huggingface://mradermacher/Llama-3.2-SUN-2.5B-chat-GGUF/Llama-3.2-SUN-2.5B-chat.Q4_K_M.gguf - !!merge <<: *llama32 name: "llama-3.2-3b-instruct-uncensored" - icon: https://i.imgur.com/JOePyAN.png urls: - https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-uncensored-GGUF - https://huggingface.co/chuanli11/Llama-3.2-3B-Instruct-uncensored @@ -1319,6 +1321,7 @@ - &qwen25 ## Qwen2.5 name: "qwen2.5-14b-instruct" + icon: https://avatars.githubusercontent.com/u/141221163 url: "github:mudler/LocalAI/gallery/chatml.yaml@master" license: apache-2.0 description: | @@ -1608,6 +1611,7 @@ uri: huggingface://bartowski/qwen2.5-7b-ins-v3-GGUF/qwen2.5-7b-ins-v3-Q4_K_M.gguf - !!merge <<: *qwen25 name: "supernova-medius" + icon: https://avatars.githubusercontent.com/u/126496414 urls: - https://huggingface.co/arcee-ai/SuperNova-Medius-GGUF description: | @@ -1762,7 +1766,7 @@ uri: huggingface://bartowski/TheBeagle-v2beta-32B-MGS-GGUF/TheBeagle-v2beta-32B-MGS-Q4_K_M.gguf - !!merge <<: *qwen25 name: "meraj-mini" - icon: https://i.ibb.co/CmPSSpq/Screenshot-2024-10-06-at-9-45-06-PM.png + icon: https://avatars.githubusercontent.com/u/126496414 urls: - https://huggingface.co/arcee-ai/Meraj-Mini - https://huggingface.co/QuantFactory/Meraj-Mini-GGUF @@ -2392,7 +2396,7 @@ uri: huggingface://QuantFactory/Math-IIO-7B-Instruct-GGUF/Math-IIO-7B-Instruct.Q4_K_M.gguf - !!merge <<: *qwen25 name: "virtuoso-small" - icon: https://i.ibb.co/pXD6Bcv/SW2-U-g-QQLSH1-ZAbxhs-Iu-A.webp + icon: https://avatars.githubusercontent.com/u/126496414 urls: - https://huggingface.co/arcee-ai/Virtuoso-Small-GGUF description: | @@ -2670,6 +2674,7 @@ - cpu - function-calling name: "arch-function-1.5b" + icon: https://avatars.githubusercontent.com/u/112724757 uri: "github:mudler/LocalAI/gallery/arch-function.yaml@master" urls: - https://huggingface.co/katanemolabs/Arch-Function-1.5B @@ -3109,7 +3114,7 @@ uri: huggingface://bartowski/Rombos-Qwen2.5-Writer-32b-GGUF/Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf - !!merge <<: *qwen25 name: "sky-t1-32b-preview" - icon: https://raw.githubusercontent.com/NovaSky-AI/novasky-ai.github.io/main/assets/images/blue-bird-wider.jpeg + icon: https://github.com/NovaSky-AI/novasky-ai.github.io/raw/main/assets/images/blue-bird-wider.jpeg urls: - https://huggingface.co/NovaSky-AI/Sky-T1-32B-Preview - https://huggingface.co/bartowski/Sky-T1-32B-Preview-GGUF @@ -3298,7 +3303,7 @@ - &llama31 ## LLama3.1 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png + icon: https://avatars.githubusercontent.com/u/153379578 name: "meta-llama-3.1-8b-instruct" license: llama3.1 description: | @@ -3387,7 +3392,7 @@ sha256: 6d175432f66d10dfed9737f73a5073d513d18e1ee7bd4b9cf2a59deb359f36ff - !!merge <<: *llama31 name: "meta-llama-3.1-8b-instruct-abliterated" - icon: https://i.imgur.com/KhorYYG.png + icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/AsTgL8VCgMHgobq4cr46b.png urls: - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF @@ -3416,7 +3421,7 @@ uri: huggingface://mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf/Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf - !!merge <<: *llama31 name: "openbuddy-llama3.1-8b-v22.1-131k" - icon: https://raw.githubusercontent.com/OpenBuddy/OpenBuddy/main/media/demo.png + icon: https://github.com/OpenBuddy/OpenBuddy/raw/main/media/demo.png urls: - https://huggingface.co/sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF description: | @@ -3592,7 +3597,7 @@ sha256: 6557c5d5091f2507d19ab1f8bfb9ceb4e1536a755ab70f148b18aeb33741580f uri: huggingface://mradermacher/Llama-3.1-Techne-RP-8b-v1-GGUF/Llama-3.1-Techne-RP-8b-v1.Q4_K_M.gguf - !!merge <<: *llama31 - icon: https://i.ibb.co/9hwFrvL/BLMs-Wkx-NQf-W-46-FZDg-ILhg.jpg + icon: https://avatars.githubusercontent.com/u/126496414 name: "llama-spark" urls: - https://huggingface.co/arcee-ai/Llama-Spark @@ -3710,7 +3715,6 @@ - !!merge <<: *llama31 name: "llama-3.1-supernova-lite-reflection-v1.0-i1" url: "github:mudler/LocalAI/gallery/llama3.1-reflective.yaml@master" - icon: https://i.ibb.co/r072p7j/eopi-ZVu-SQ0-G-Cav78-Byq-Tg.png urls: - https://huggingface.co/SE6446/Llama-3.1-SuperNova-Lite-Reflection-V1.0 - https://huggingface.co/mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF @@ -3725,7 +3729,7 @@ uri: huggingface://mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF/Llama-3.1-SuperNova-Lite-Reflection-V1.0.i1-Q4_K_M.gguf - !!merge <<: *llama31 name: "llama-3.1-supernova-lite" - icon: https://i.ibb.co/r072p7j/eopi-ZVu-SQ0-G-Cav78-Byq-Tg.png + icon: https://avatars.githubusercontent.com/u/126496414 urls: - https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite - https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite-GGUF @@ -4239,6 +4243,7 @@ uri: huggingface://mradermacher/Hermes-3-Llama-3.1-70B-lorablated-GGUF/Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf - !!merge <<: *llama31 name: "hermes-3-llama-3.1-8b-lorablated" + icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/4Hbw5n68jKUSBQeTqQIeT.png urls: - https://huggingface.co/mlabonne/Hermes-3-Llama-3.1-8B-lorablated-GGUF description: | @@ -5254,6 +5259,7 @@ ## Start QWEN2 url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "qwen2-7b-instruct" + icon: https://avatars.githubusercontent.com/u/141221163 license: apache-2.0 description: | Qwen2 is the new series of Qwen large language models. For Qwen2, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters, including a Mixture-of-Experts model. This repo contains the instruction-tuned 7B Qwen2 model. @@ -5360,7 +5366,7 @@ uri: huggingface://bartowski/Einstein-v7-Qwen2-7B-GGUF/Einstein-v7-Qwen2-7B-Q4_K_M.gguf - !!merge <<: *qwen2 name: "arcee-spark" - icon: https://i.ibb.co/80ssNWS/o-Vdk-Qx-ARNmzr-Pi1h-Efj-SA.webp + icon: https://avatars.githubusercontent.com/u/126496414 description: | Arcee Spark is a powerful 7B parameter language model that punches well above its weight class. Initialized from Qwen2, this model underwent a sophisticated training process: @@ -5398,7 +5404,7 @@ uri: huggingface://Hercules-5.0-Qwen2-7B-Q4_K_M.gguf/Hercules-5.0-Qwen2-7B-Q4_K_M.gguf - !!merge <<: *qwen2 name: "arcee-agent" - icon: https://i.ibb.co/CBHmTDn/136719a5-6d8a-4654-a618-46eabc788953.jpg + icon: https://avatars.githubusercontent.com/u/126496414 description: | Arcee Agent is a cutting-edge 7B parameter language model specifically designed for function calling and tool use. Initialized from Qwen2-7B, it rivals the performance of much larger models while maintaining efficiency and speed. This model is particularly suited for developers, researchers, and businesses looking to implement sophisticated AI-driven solutions without the computational overhead of larger language models. Compute for training Arcee-Agent was provided by CrusoeAI. Arcee-Agent was trained using Spectrum. urls: @@ -5586,7 +5592,7 @@ - !!merge <<: *qwen2 name: "minicpm-v-2_6" license: apache-2.0 - icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png + icon: https://avatars.githubusercontent.com/u/89920203 urls: - https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf - https://huggingface.co/openbmb/MiniCPM-V-2_6 @@ -6321,6 +6327,7 @@ - &gemma url: "github:mudler/LocalAI/gallery/gemma.yaml@master" name: "gemma-2b" + icon: https://avatars.githubusercontent.com/u/1342004 license: gemma urls: - https://ai.google.dev/gemma/docs @@ -7036,7 +7043,7 @@ uri: huggingface://bartowski/GWQ-9B-Preview2-GGUF/GWQ-9B-Preview2-Q4_K_M.gguf - &llama3 url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" - icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png + icon: https://avatars.githubusercontent.com/u/153379578 name: "llama3-8b-instruct" license: llama3 description: | @@ -8503,7 +8510,7 @@ urls: - https://huggingface.co/arcee-ai/Llama-3-SEC-Chat-GGUF - https://huggingface.co/arcee-ai/Llama-3-SEC-Chat - icon: https://i.ibb.co/kHtBmDN/w8m6-X4-HCQRa-IR86ar-Cm5gg.webp + icon: https://avatars.githubusercontent.com/u/126496414 tags: - llama3 - gguf @@ -8536,7 +8543,7 @@ - &yi-chat ### Start Yi url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - icon: "https://raw.githubusercontent.com/01-ai/Yi/main/assets/img/Yi_logo_icon_light.svg" + icon: "https://github.com/01-ai/Yi/raw/main/assets/img/Yi_logo_icon_light.svg" name: "yi-1.5-9b-chat" license: apache-2.0 urls: @@ -9165,7 +9172,7 @@ urls: - https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf description: | - Bunny is a family of lightweight but powerful multimodal models. It offers multiple plug-and-play vision encoders, like EVA-CLIP, SigLIP and language backbones, including Llama-3-8B, Phi-1.5, StableLM-2, Qwen1.5, MiniCPM and Phi-2. To compensate for the decrease in model size, we construct more informative training data by curated selection from a broader data source. + Bunny is a family of lightweight but powerful multimodal models. It offers multiple plug-and-play vision encoders, like EVA-CLIP, SigLIP and language backbones, including Llama-3-8B, Phi-1.5, StableLM-2, Qwen1.5, and Phi-2. To compensate for the decrease in model size, we construct more informative training data by curated selection from a broader data source. We provide Bunny-Llama-3-8B-V, which is built upon SigLIP and Llama-3-8B-Instruct. More details about this model can be found in GitHub. icon: https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf/resolve/main/icon.png @@ -9214,7 +9221,7 @@ uri: huggingface://xtuner/llava-llama-3-8b-v1_1-gguf/llava-llama-3-8b-v1_1-mmproj-f16.gguf - !!merge <<: *llama3 name: "minicpm-llama3-v-2_5" - icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png + icon: https://avatars.githubusercontent.com/u/89920203 urls: - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5 @@ -10054,6 +10061,7 @@ - llama2 - cpu name: "phi-2-chat:Q8_0" + icon: https://avatars.githubusercontent.com/u/6154722 overrides: parameters: model: phi-2-layla-v1-chatml-Q8_0.gguf @@ -10150,6 +10158,7 @@ ### START Phi-3 url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" name: "phi-3-mini-4k-instruct" + icon: https://avatars.githubusercontent.com/u/6154722 license: mit description: | The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters. From adebd557ce8446edbe097b3eeb54c524e6638e78 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 20 Jan 2025 10:45:10 +0100 Subject: [PATCH 060/679] chore(model gallery): add wayfarer-12b (#4641) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 105 +++++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 51 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index fb5476f9..0397bd75 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -190,7 +190,7 @@ - https://huggingface.co/Nitral-AI/NightWing3-10B-v0.1 - https://huggingface.co/bartowski/NightWing3-10B-v0.1-GGUF description: | - Base model: (Falcon3-10B) + Base model: (Falcon3-10B) overrides: parameters: model: NightWing3-10B-v0.1-Q4_K_M.gguf @@ -782,8 +782,7 @@ - filename: salamandra-7b-instruct.Q4_K_M-f32.gguf sha256: bac8e8c1d1d9d53cbdb148b8ff9ad378ddb392429207099e85b5aae3a43bff3d uri: huggingface://cstr/salamandra-7b-instruct-GGUF/salamandra-7b-instruct.Q4_K_M-f32.gguf -- &llama32 - ## llama3.2 +- &llama32 ## llama3.2 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" icon: https://avatars.githubusercontent.com/u/153379578 license: llama3.2 @@ -1318,8 +1317,7 @@ - filename: FineMath-Llama-3B-Q4_K_M.gguf sha256: 16c73b5cf2a417a7e1608bcc9469f1461fc3e759ce04a3a337f48df977dc158c uri: huggingface://bartowski/FineMath-Llama-3B-GGUF/FineMath-Llama-3B-Q4_K_M.gguf -- &qwen25 - ## Qwen2.5 +- &qwen25 ## Qwen2.5 name: "qwen2.5-14b-instruct" icon: https://avatars.githubusercontent.com/u/141221163 url: "github:mudler/LocalAI/gallery/chatml.yaml@master" @@ -3241,8 +3239,7 @@ - filename: DRT-o1-14B-Q4_K_M.gguf sha256: 9619ca984cf4ce8e4f69bcde831de17b2ce05dd89536e3130608877521e3d328 uri: huggingface://bartowski/DRT-o1-14B-GGUF/DRT-o1-14B-Q4_K_M.gguf -- &smollm - ## SmolLM +- &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "smollm-1.7b-instruct" icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png @@ -3300,8 +3297,7 @@ - filename: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf sha256: eaeac314e30b461413bc1cc819cdc0cd6a79265711fd0b8268702960a082c7bd uri: huggingface://QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF/Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf -- &llama31 - ## LLama3.1 +- &llama31 ## LLama3.1 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" icon: https://avatars.githubusercontent.com/u/153379578 name: "meta-llama-3.1-8b-instruct" @@ -5189,8 +5185,7 @@ - filename: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf sha256: 268390e07edd407ad93ea21a868b7ae995b5950e01cad0db9e1802ae5049d405 uri: huggingface://bartowski/Dolphin3.0-Llama3.1-8B-GGUF/Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf -- &deepseek - ## Deepseek +- &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" name: "deepseek-coder-v2-lite-instruct" icon: "https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true" @@ -5255,8 +5250,7 @@ - filename: archangel_sft_pythia2-8b.Q4_K_M.gguf sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8 uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf -- &qwen2 - ## Start QWEN2 +- &qwen2 ## Start QWEN2 url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "qwen2-7b-instruct" icon: https://avatars.githubusercontent.com/u/141221163 @@ -5616,8 +5610,7 @@ - filename: minicpm-v-2_6-mmproj-f16.gguf sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0 uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf -- &mistral03 - ## START Mistral +- &mistral03 ## START Mistral url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master" name: "mistral-7b-instruct-v0.3" icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png @@ -6222,8 +6215,35 @@ - filename: Nera_Noctis-12B-Q4_K_M.gguf sha256: 0662a9a847adde046e6255c15d5a677ebf09ab00841547c8963668d14baf00ff uri: huggingface://bartowski/Nera_Noctis-12B-GGUF/Nera_Noctis-12B-Q4_K_M.gguf -- &mudler - ### START mudler's LocalAI specific-models +- !!merge <<: *mistral03 + name: "wayfarer-12b" + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + icon: https://huggingface.co/LatitudeGames/Wayfarer-12B/resolve/main/wayfarer.jpg + urls: + - https://huggingface.co/LatitudeGames/Wayfarer-12B + - https://huggingface.co/bartowski/Wayfarer-12B-GGUF + description: | + We’ve heard over and over from AI Dungeon players that modern AI models are too nice, never letting them fail or die. While it may be good for a chatbot to be nice and helpful, great stories and games aren’t all rainbows and unicorns. They have conflict, tension, and even death. These create real stakes and consequences for characters and the journeys they go on. + + Similarly, great games need opposition. You must be able to fail, die, and may even have to start over. This makes games more fun! + + However, the vast majority of AI models, through alignment RLHF, have been trained away from darkness, violence, or conflict, preventing them from fulfilling this role. To give our players better options, we decided to train our own model to fix these issues. + + Wayfarer is an adventure role-play model specifically trained to give players a challenging and dangerous experience. We thought they would like it, but since releasing it on AI Dungeon, players have reacted even more positively than we expected. + + Because they loved it so much, we’ve decided to open-source the model so anyone can experience unforgivingly brutal AI adventures! Anyone can download the model to run locally. + + Or if you want to easily try this model for free, you can do so at https://aidungeon.com. + + We plan to continue improving and open-sourcing similar models, so please share any and all feedback on how we can improve model behavior. Below we share more details on how Wayfarer was created. + overrides: + parameters: + model: Wayfarer-12B-Q4_K_M.gguf + files: + - filename: Wayfarer-12B-Q4_K_M.gguf + sha256: 6cd9f290c820c64854fcdcfd312b066447acc2f63abe2e2e71af9bc4f1946c08 + uri: huggingface://bartowski/Wayfarer-12B-GGUF/Wayfarer-12B-Q4_K_M.gguf +- &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" name: "LocalAI-llama3-8b-function-call-v0.2" icon: "https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/us5JKi9z046p8K-cn_M0w.webp" @@ -6268,8 +6288,7 @@ - filename: Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin sha256: 579cbb229f9c11d0330759ff4733102d2491615a4c61289e26c09d1b3a583fec uri: huggingface://mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1-GGUF/Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin -- &parler-tts - ### START parler-tts +- &parler-tts ### START parler-tts url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master" name: parler-tts-mini-v0.1 overrides: @@ -6286,8 +6305,7 @@ - cpu - text-to-speech - python -- &rerankers - ### START rerankers +- &rerankers ### START rerankers url: "github:mudler/LocalAI/gallery/rerankers.yaml@master" name: cross-encoder parameters: @@ -8540,8 +8558,7 @@ - filename: Copus-2x8B.i1-Q4_K_M.gguf sha256: 685da1ba49e203e8f491105585143d76044286d4b4687bed37d325f6b55501e5 uri: huggingface://mradermacher/Copus-2x8B-i1-GGUF/Copus-2x8B.i1-Q4_K_M.gguf -- &yi-chat - ### Start Yi +- &yi-chat ### Start Yi url: "github:mudler/LocalAI/gallery/chatml.yaml@master" icon: "https://github.com/01-ai/Yi/raw/main/assets/img/Yi_logo_icon_light.svg" name: "yi-1.5-9b-chat" @@ -8752,8 +8769,7 @@ - filename: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf sha256: 3f309b59508342536a70edd6c4be6cf4f2cb97f2e32cbc79ad2ab3f4c02933a4 uri: huggingface://Lewdiculous/Fimbulvetr-11B-v2-GGUF-IQ-Imatrix/Fimbulvetr-11B-v2-Q4_K_M-imat.gguf -- &noromaid - ### Start noromaid +- &noromaid ### Start noromaid url: "github:mudler/LocalAI/gallery/noromaid.yaml@master" name: "noromaid-13b-0.4-DPO" icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png @@ -8773,8 +8789,7 @@ - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1 uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf -- &wizardlm2 - ### START Vicuna based +- &wizardlm2 ### START Vicuna based url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master" name: "wizardlm2-7b" description: | @@ -8829,8 +8844,7 @@ - filename: moondream2-mmproj-f16.gguf sha256: 4cc1cb3660d87ff56432ebeb7884ad35d67c48c7b9f6b2856f305e39c38eed8f uri: huggingface://moondream/moondream2-gguf/moondream2-mmproj-f16.gguf -- &llava - ### START LLaVa +- &llava ### START LLaVa url: "github:mudler/LocalAI/gallery/llava.yaml@master" license: apache-2.0 description: | @@ -9688,8 +9702,7 @@ - filename: Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf sha256: cdc0f4de6df2ba120835fbd25c2a0ae2af8548f46d2c40c7a018c51c3d19e0c0 uri: huggingface://mradermacher/Freyja-v4.95-maldv-7b-NON-FICTION-i1-GGUF/Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf -- &chatml - ### ChatML +- &chatml ### ChatML url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "una-thepitbull-21.4b-v2" license: afl-3.0 @@ -9975,8 +9988,7 @@ - filename: Triangulum-10B.Q4_K_M.gguf sha256: dd071f99edf6b166044bf229cdeec19419c4c348e3fc3d6587cfcc55e6fb85fa uri: huggingface://mradermacher/Triangulum-10B-GGUF/Triangulum-10B.Q4_K_M.gguf -- &command-R - ### START Command-r +- &command-R ### START Command-r url: "github:mudler/LocalAI/gallery/command-r.yaml@master" name: "command-r-v01:q1_s" license: "cc-by-nc-4.0" @@ -10031,8 +10043,7 @@ - filename: "aya-23-35B-Q4_K_M.gguf" sha256: "57824768c1a945e21e028c8e9a29b39adb4838d489f5865c82601ab9ad98065d" uri: "huggingface://bartowski/aya-23-35B-GGUF/aya-23-35B-Q4_K_M.gguf" -- &phi-2-chat - ### START Phi-2 +- &phi-2-chat ### START Phi-2 url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master" license: mit description: | @@ -10154,8 +10165,7 @@ - filename: internlm3-8b-instruct-Q4_K_M.gguf uri: huggingface://bartowski/internlm3-8b-instruct-GGUF/internlm3-8b-instruct-Q4_K_M.gguf sha256: 2a9644687318e8659c9cf9b40730d5cc2f5af06f786a50439c7c51359b23896e -- &phi-3 - ### START Phi-3 +- &phi-3 ### START Phi-3 url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" name: "phi-3-mini-4k-instruct" icon: https://avatars.githubusercontent.com/u/6154722 @@ -10355,8 +10365,7 @@ - filename: Phi-3.5-MoE-instruct-Q4_K_M.gguf sha256: 43e91bb720869bd8a92d8eb86bc3c74a52c49cf61642ca709b3d7bb89644df36 uri: huggingface://bartowski/Phi-3.5-MoE-instruct-GGUF/Phi-3.5-MoE-instruct-Q4_K_M.gguf -- &hermes-2-pro-mistral - ### START Hermes +- &hermes-2-pro-mistral ### START Hermes url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" name: "hermes-2-pro-mistral" icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png @@ -10692,8 +10701,7 @@ - filename: "galatolo-Q4_K.gguf" sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172" uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf" -- &codellama - ### START Codellama +- &codellama ### START Codellama url: "github:mudler/LocalAI/gallery/codellama.yaml@master" name: "codellama-7b" license: llama2 @@ -10824,8 +10832,7 @@ - filename: "llm-compiler-7b-ftd.Q4_K.gguf" uri: "huggingface://legraphista/llm-compiler-7b-ftd-IMat-GGUF/llm-compiler-7b-ftd.Q4_K.gguf" sha256: d862dd18ed335413787d0ad196522a9902a3c10a6456afdab8721822cb0ddde8 -- &openvino - ### START OpenVINO +- &openvino ### START OpenVINO url: "github:mudler/LocalAI/gallery/openvino.yaml@master" name: "openvino-llama-3-8b-instruct-ov-int8" license: llama3 @@ -10939,8 +10946,7 @@ - gpu - embedding - cpu -- &sentencentransformers - ### START Embeddings +- &sentencentransformers ### START Embeddings description: | This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity. urls: @@ -10955,8 +10961,7 @@ overrides: parameters: model: all-MiniLM-L6-v2 -- &dreamshaper - ### START Image generation +- &dreamshaper ### START Image generation name: dreamshaper icon: https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dd9b038c-bd15-43ab-86ab-66e145ad7ff2/width=450/26072158-132340247-8k%20portrait%20of%20beautiful%20cyborg%20with%20brown%20hair,%20intricate,%20elegant,%20highly%20detailed,%20majestic,%20digital%20photography,%20art%20by%20artg_ed.jpeg license: other @@ -11068,8 +11073,7 @@ - filename: t5xxl_fp16.safetensors sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635 uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors -- &whisper - ## Whisper +- &whisper ## Whisper url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" name: "whisper-1" license: "MIT" @@ -11249,8 +11253,7 @@ description: | Stable Diffusion in NCNN with c++, supported txt2img and img2img name: stablediffusion-cpp -- &piper - ## Piper TTS +- &piper ## Piper TTS url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-kathleen-low icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png From 83a8d90c52816832bd3362d6455501d479ce16ab Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 20 Jan 2025 10:50:29 +0100 Subject: [PATCH 061/679] chore(model gallery): add l3.3-70b-magnum-v4-se (#4642) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 0397bd75..d10cd32e 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -419,6 +419,22 @@ - filename: L3.3-MS-Nevoria-70b-Q4_K_M.gguf sha256: e8b0763f263089a19d4b112b7ed5085cc5f1ed9ca49c5085baa8d51f4ded1f94 uri: huggingface://bartowski/L3.3-MS-Nevoria-70b-GGUF/L3.3-MS-Nevoria-70b-Q4_K_M.gguf +- !!merge <<: *llama33 + name: "l3.3-70b-magnum-v4-se" + urls: + - https://huggingface.co/Doctor-Shotgun/L3.3-70B-Magnum-v4-SE + - https://huggingface.co/bartowski/L3.3-70B-Magnum-v4-SE-GGUF + description: | + The Magnum v4 series is complete, but here's something a little extra I wanted to tack on as I wasn't entirely satisfied with the results of v4 72B. "SE" for Special Edition - this model is finetuned from meta-llama/Llama-3.3-70B-Instruct as an rsLoRA adapter. The dataset is a slightly revised variant of the v4 data with some elements of the v2 data re-introduced. + + The objective, as with the other Magnum models, is to emulate the prose style and quality of the Claude 3 Sonnet/Opus series of models on a local scale, so don't be surprised to see "Claude-isms" in its output. + overrides: + parameters: + model: L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf + files: + - filename: L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf + sha256: 9724a6364a42caa3d5a1687258eb329c9af6cbb2ce01c8dd556c1a222a2e0352 + uri: huggingface://bartowski/L3.3-70B-Magnum-v4-SE-GGUF/L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" From aeb1dca52ef940ec23f3ffddc7af2cc9afac69a7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 20 Jan 2025 11:03:35 +0100 Subject: [PATCH 062/679] chore(model gallery): add l3.3-prikol-70b-v0.2 (#4643) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index d10cd32e..679ab002 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -435,6 +435,27 @@ - filename: L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf sha256: 9724a6364a42caa3d5a1687258eb329c9af6cbb2ce01c8dd556c1a222a2e0352 uri: huggingface://bartowski/L3.3-70B-Magnum-v4-SE-GGUF/L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf +- !!merge <<: *llama33 + name: "l3.3-prikol-70b-v0.2" + icon: https://files.catbox.moe/x9t3zo.png + urls: + - https://huggingface.co/Nohobby/L3.3-Prikol-70B-v0.2 + - https://huggingface.co/bartowski/L3.3-Prikol-70B-v0.2-GGUF + description: | + A merge of some Llama 3.3 models because um uh yeah + + Went extra schizo on the recipe, hoping for an extra fun result, and... Well, I guess it's an overall improvement over the previous revision. It's a tiny bit smarter, has even more distinct swipes and nice dialogues, but for some reason it's damn sloppy. + + I've published the second step of this merge as a separate model, and I'd say the results are more interesting, but not as usable as this one. https://huggingface.co/Nohobby/AbominationSnowPig + + Prompt format: Llama3 OR Llama3 Context and ChatML Instruct. It actually works a bit better this way + overrides: + parameters: + model: L3.3-Prikol-70B-v0.2-Q4_K_M.gguf + files: + - filename: L3.3-Prikol-70B-v0.2-Q4_K_M.gguf + sha256: fc0ff514efbc0b67981c2bf1423d5a2e1b8801e4266ba0c653ea148414fe5ffc + uri: huggingface://bartowski/L3.3-Prikol-70B-v0.2-GGUF/L3.3-Prikol-70B-v0.2-Q4_K_M.gguf - &rwkv url: "github:mudler/LocalAI/gallery/rwkv.yaml@master" name: "rwkv-6-world-7b" From a396040886fb5e2e13dee72811605956c7506ebc Mon Sep 17 00:00:00 2001 From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com> Date: Mon, 20 Jan 2025 16:13:19 +0100 Subject: [PATCH 063/679] chore(model gallery): remove dead icons and update LLAVA and DeepSeek ones (#4645) * chore(model gallery): update icons and add LLAVA ones Signed-off-by: Gianluca Boiano * chore(model gallery): fix all complains related to yamllint Signed-off-by: Gianluca Boiano --------- Signed-off-by: Gianluca Boiano --- gallery/index.yaml | 69 +++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 38 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 679ab002..30687062 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -819,7 +819,7 @@ - filename: salamandra-7b-instruct.Q4_K_M-f32.gguf sha256: bac8e8c1d1d9d53cbdb148b8ff9ad378ddb392429207099e85b5aae3a43bff3d uri: huggingface://cstr/salamandra-7b-instruct-GGUF/salamandra-7b-instruct.Q4_K_M-f32.gguf -- &llama32 ## llama3.2 +- &llama32 ## llama3.2 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" icon: https://avatars.githubusercontent.com/u/153379578 license: llama3.2 @@ -1354,7 +1354,7 @@ - filename: FineMath-Llama-3B-Q4_K_M.gguf sha256: 16c73b5cf2a417a7e1608bcc9469f1461fc3e759ce04a3a337f48df977dc158c uri: huggingface://bartowski/FineMath-Llama-3B-GGUF/FineMath-Llama-3B-Q4_K_M.gguf -- &qwen25 ## Qwen2.5 +- &qwen25 ## Qwen2.5 name: "qwen2.5-14b-instruct" icon: https://avatars.githubusercontent.com/u/141221163 url: "github:mudler/LocalAI/gallery/chatml.yaml@master" @@ -2181,7 +2181,6 @@ sha256: 42cf7a96784dc8f25c61c2404620c3e6548a024caa8dff6e435d7c86400d7ab8 uri: huggingface://mradermacher/Qwen2.5-7B-nerd-uncensored-v1.7-GGUF/Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf - !!merge <<: *qwen25 - icon: https://i.imgur.com/OxX2Usi.png name: "evathene-v1.0" urls: - https://huggingface.co/sophosympatheia/Evathene-v1.0 @@ -2540,7 +2539,6 @@ sha256: 91907f29746625a62885793475956220b81d8a5a34b53686a1acd1d03fd403ea uri: huggingface://bartowski/72B-Qwen2.5-Kunou-v1-GGUF/72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf - !!merge <<: *qwen25 - icon: https://i.imgur.com/OxX2Usi.png name: "evathene-v1.3" urls: - https://huggingface.co/sophosympatheia/Evathene-v1.3 @@ -3276,7 +3274,7 @@ - filename: DRT-o1-14B-Q4_K_M.gguf sha256: 9619ca984cf4ce8e4f69bcde831de17b2ce05dd89536e3130608877521e3d328 uri: huggingface://bartowski/DRT-o1-14B-GGUF/DRT-o1-14B-Q4_K_M.gguf -- &smollm ## SmolLM +- &smollm ## SmolLM url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "smollm-1.7b-instruct" icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png @@ -3334,7 +3332,7 @@ - filename: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf sha256: eaeac314e30b461413bc1cc819cdc0cd6a79265711fd0b8268702960a082c7bd uri: huggingface://QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF/Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf -- &llama31 ## LLama3.1 +- &llama31 ## LLama3.1 url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" icon: https://avatars.githubusercontent.com/u/153379578 name: "meta-llama-3.1-8b-instruct" @@ -4485,7 +4483,6 @@ sha256: 27b10c3ca4507e8bf7d305d60e5313b54ef5fffdb43a03f36223d19d906e39f3 uri: huggingface://mradermacher/L3.1-70Blivion-v0.1-rc1-70B-i1-GGUF/L3.1-70Blivion-v0.1-rc1-70B.i1-Q4_K_M.gguf - !!merge <<: *llama31 - icon: https://i.imgur.com/sdN0Aqg.jpeg name: "llama-3.1-hawkish-8b" urls: - https://huggingface.co/mukaj/Llama-3.1-Hawkish-8B @@ -5222,10 +5219,10 @@ - filename: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf sha256: 268390e07edd407ad93ea21a868b7ae995b5950e01cad0db9e1802ae5049d405 uri: huggingface://bartowski/Dolphin3.0-Llama3.1-8B-GGUF/Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf -- &deepseek ## Deepseek +- &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" name: "deepseek-coder-v2-lite-instruct" - icon: "https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true" + icon: "https://avatars.githubusercontent.com/u/148330874" license: deepseek description: | DeepSeek-Coder-V2, an open-source Mixture-of-Experts (MoE) code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks. Specifically, DeepSeek-Coder-V2 is further pre-trained from DeepSeek-Coder-V2-Base with 6 trillion tokens sourced from a high-quality and multi-source corpus. Through this continued pre-training, DeepSeek-Coder-V2 substantially enhances the coding and mathematical reasoning capabilities of DeepSeek-Coder-V2-Base, while maintaining comparable performance in general language tasks. Compared to DeepSeek-Coder, DeepSeek-Coder-V2 demonstrates significant advancements in various aspects of code-related tasks, as well as reasoning and general capabilities. Additionally, DeepSeek-Coder-V2 expands its support for programming languages from 86 to 338, while extending the context length from 16K to 128K. @@ -5287,7 +5284,7 @@ - filename: archangel_sft_pythia2-8b.Q4_K_M.gguf sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8 uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf -- &qwen2 ## Start QWEN2 +- &qwen2 ## Start QWEN2 url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "qwen2-7b-instruct" icon: https://avatars.githubusercontent.com/u/141221163 @@ -5647,7 +5644,7 @@ - filename: minicpm-v-2_6-mmproj-f16.gguf sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0 uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf -- &mistral03 ## START Mistral +- &mistral03 ## START Mistral url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master" name: "mistral-7b-instruct-v0.3" icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png @@ -6155,7 +6152,6 @@ - !!merge <<: *mistral03 name: "mn-12b-mag-mell-r1-iq-arm-imatrix" url: "github:mudler/LocalAI/gallery/chatml.yaml@master" - icon: "https://i.imgur.com/wjyAaTO.png" urls: - https://huggingface.co/inflatebot/MN-12B-Mag-Mell-R1 - https://huggingface.co/Lewdiculous/MN-12B-Mag-Mell-R1-GGUF-IQ-ARM-Imatrix @@ -6280,7 +6276,7 @@ - filename: Wayfarer-12B-Q4_K_M.gguf sha256: 6cd9f290c820c64854fcdcfd312b066447acc2f63abe2e2e71af9bc4f1946c08 uri: huggingface://bartowski/Wayfarer-12B-GGUF/Wayfarer-12B-Q4_K_M.gguf -- &mudler ### START mudler's LocalAI specific-models +- &mudler ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" name: "LocalAI-llama3-8b-function-call-v0.2" icon: "https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/us5JKi9z046p8K-cn_M0w.webp" @@ -6325,7 +6321,7 @@ - filename: Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin sha256: 579cbb229f9c11d0330759ff4733102d2491615a4c61289e26c09d1b3a583fec uri: huggingface://mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1-GGUF/Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin -- &parler-tts ### START parler-tts +- &parler-tts ### START parler-tts url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master" name: parler-tts-mini-v0.1 overrides: @@ -6342,7 +6338,7 @@ - cpu - text-to-speech - python -- &rerankers ### START rerankers +- &rerankers ### START rerankers url: "github:mudler/LocalAI/gallery/rerankers.yaml@master" name: cross-encoder parameters: @@ -7265,10 +7261,9 @@ name: "l3-8b-stheno-v3.1" urls: - https://huggingface.co/Sao10K/L3-8B-Stheno-v3.1 - icon: https://w.forfun.com/fetch/cb/cba2205390e517bea1ea60ca0b491af4.jpeg description: | - A model made for 1-on-1 Roleplay ideally, but one that is able to handle scenarios, RPGs and storywriting fine. - - Uncensored during actual roleplay scenarios. # I do not care for zero-shot prompting like what some people do. It is uncensored enough in actual usecases. + - Uncensored during actual roleplay scenarios. # I do not care for zero-shot prompting like what some people do. It is uncensored enough in actual usecases. - I quite like the prose and style for this model. overrides: parameters: @@ -8059,7 +8054,6 @@ urls: - https://huggingface.co/bartowski/New-Dawn-Llama-3-70B-32K-v1.0-GGUF - https://huggingface.co/sophosympatheia/New-Dawn-Llama-3-70B-32K-v1.0 - icon: https://imgur.com/tKzncGo.png description: | This model is a multi-level SLERP merge of several Llama 3 70B variants. See the merge recipe below for details. I extended the context window for this model out to 32K by snagging some layers from abacusai/Smaug-Llama-3-70B-Instruct-32K using a technique similar to what I used for Midnight Miqu, which was further honed by jukofyork. This model is uncensored. You are responsible for whatever you do with it. @@ -8411,7 +8405,8 @@ - filename: dolphin-2.9.2-Phi-3-Medium-abliterated-Q4_K_M.gguf sha256: 566331c2efe87725310aacb709ca15088a0063fa0ddc14a345bf20d69982156b uri: huggingface://bartowski/dolphin-2.9.2-Phi-3-Medium-abliterated-GGUF/dolphin-2.9.2-Phi-3-Medium-abliterated-Q4_K_M.gguf -- url: "github:mudler/LocalAI/gallery/chatml.yaml@master" +- !!merge <<: *llama3 + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "llama-3-8b-instruct-dpo-v0.3-32k" license: llama3 urls: @@ -8595,7 +8590,7 @@ - filename: Copus-2x8B.i1-Q4_K_M.gguf sha256: 685da1ba49e203e8f491105585143d76044286d4b4687bed37d325f6b55501e5 uri: huggingface://mradermacher/Copus-2x8B-i1-GGUF/Copus-2x8B.i1-Q4_K_M.gguf -- &yi-chat ### Start Yi +- &yi-chat ### Start Yi url: "github:mudler/LocalAI/gallery/chatml.yaml@master" icon: "https://github.com/01-ai/Yi/raw/main/assets/img/Yi_logo_icon_light.svg" name: "yi-1.5-9b-chat" @@ -8806,7 +8801,7 @@ - filename: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf sha256: 3f309b59508342536a70edd6c4be6cf4f2cb97f2e32cbc79ad2ab3f4c02933a4 uri: huggingface://Lewdiculous/Fimbulvetr-11B-v2-GGUF-IQ-Imatrix/Fimbulvetr-11B-v2-Q4_K_M-imat.gguf -- &noromaid ### Start noromaid +- &noromaid ### Start noromaid url: "github:mudler/LocalAI/gallery/noromaid.yaml@master" name: "noromaid-13b-0.4-DPO" icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png @@ -8826,7 +8821,7 @@ - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1 uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf -- &wizardlm2 ### START Vicuna based +- &wizardlm2 ### START Vicuna based url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master" name: "wizardlm2-7b" description: | @@ -8881,7 +8876,9 @@ - filename: moondream2-mmproj-f16.gguf sha256: 4cc1cb3660d87ff56432ebeb7884ad35d67c48c7b9f6b2856f305e39c38eed8f uri: huggingface://moondream/moondream2-gguf/moondream2-mmproj-f16.gguf -- &llava ### START LLaVa +- &llava ### START LLaVa + name: "llava-1.6-vicuna" + icon: https://github.com/lobehub/lobe-icons/raw/master/packages/static-png/dark/llava-color.png url: "github:mudler/LocalAI/gallery/llava.yaml@master" license: apache-2.0 description: | @@ -8895,7 +8892,6 @@ - gpu - llama2 - cpu - name: "llava-1.6-vicuna" overrides: mmproj: mmproj-vicuna7b-f16.gguf parameters: @@ -9363,7 +9359,6 @@ June 18, 2024 Update, After extensive testing of the intermediate checkpoints, significant progress has been made. The model is slowly — I mean, really slowly — unlearning its alignment. By significantly lowering the learning rate, I was able to visibly observe deep behavioral changes, this process is taking longer than anticipated, but it's going to be worth it. Estimated time to completion: 4 more days.. I'm pleased to report that in several tests, the model not only maintained its intelligence but actually showed a slight improvement, especially in terms of common sense. An intermediate checkpoint of this model was used to create invisietch/EtherealRainbow-v0.3-rc7, with promising results. Currently, it seems like I'm on the right track. I hope this model will serve as a solid foundation for further merges, whether for role-playing (RP) or for uncensoring. This approach also allows us to save on actual fine-tuning, thereby reducing our carbon footprint. The merge process takes just a few minutes of CPU time, instead of days of GPU work. June 20, 2024 Update, Unaligning was partially successful, and the results are decent, but I am not fully satisfied. I decided to bite the bullet, and do a full finetune, god have mercy on my GPUs. I am also releasing the intermediate checkpoint of this model. - icon: https://i.imgur.com/Kpk1PgZ.png overrides: parameters: model: LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf @@ -9389,7 +9384,6 @@ uri: huggingface://bartowski/L3-8B-Lunaris-v1-GGUF/L3-8B-Lunaris-v1-Q4_K_M.gguf - !!merge <<: *llama3 name: "llama-3_8b_unaligned_alpha_rp_soup-i1" - icon: https://i.imgur.com/pXcjpoV.png urls: - https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha_RP_Soup - https://huggingface.co/mradermacher/LLAMA-3_8B_Unaligned_Alpha_RP_Soup-i1-GGUF @@ -9739,7 +9733,7 @@ - filename: Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf sha256: cdc0f4de6df2ba120835fbd25c2a0ae2af8548f46d2c40c7a018c51c3d19e0c0 uri: huggingface://mradermacher/Freyja-v4.95-maldv-7b-NON-FICTION-i1-GGUF/Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf -- &chatml ### ChatML +- &chatml ### ChatML url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "una-thepitbull-21.4b-v2" license: afl-3.0 @@ -9787,7 +9781,6 @@ sha256: 9c90f3a65332a03a6cbb563eee19c7586d9544f646ff9f33f7f1904b3d415ae2 uri: huggingface://nold/HelpingAI-9B-GGUF/HelpingAI-9B_Q4_K_M.gguf - url: "github:mudler/LocalAI/gallery/chatml-hercules.yaml@master" - icon: "https://tse3.mm.bing.net/th/id/OIG1.vnrl3xpEcypR3McLW63q?pid=ImgGn" urls: - https://huggingface.co/Locutusque/Llama-3-Hercules-5.0-8B - https://huggingface.co/bartowski/Llama-3-Hercules-5.0-8B-GGUF @@ -10025,7 +10018,7 @@ - filename: Triangulum-10B.Q4_K_M.gguf sha256: dd071f99edf6b166044bf229cdeec19419c4c348e3fc3d6587cfcc55e6fb85fa uri: huggingface://mradermacher/Triangulum-10B-GGUF/Triangulum-10B.Q4_K_M.gguf -- &command-R ### START Command-r +- &command-R ### START Command-r url: "github:mudler/LocalAI/gallery/command-r.yaml@master" name: "command-r-v01:q1_s" license: "cc-by-nc-4.0" @@ -10080,7 +10073,7 @@ - filename: "aya-23-35B-Q4_K_M.gguf" sha256: "57824768c1a945e21e028c8e9a29b39adb4838d489f5865c82601ab9ad98065d" uri: "huggingface://bartowski/aya-23-35B-GGUF/aya-23-35B-Q4_K_M.gguf" -- &phi-2-chat ### START Phi-2 +- &phi-2-chat ### START Phi-2 url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master" license: mit description: | @@ -10202,7 +10195,7 @@ - filename: internlm3-8b-instruct-Q4_K_M.gguf uri: huggingface://bartowski/internlm3-8b-instruct-GGUF/internlm3-8b-instruct-Q4_K_M.gguf sha256: 2a9644687318e8659c9cf9b40730d5cc2f5af06f786a50439c7c51359b23896e -- &phi-3 ### START Phi-3 +- &phi-3 ### START Phi-3 url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" name: "phi-3-mini-4k-instruct" icon: https://avatars.githubusercontent.com/u/6154722 @@ -10402,7 +10395,7 @@ - filename: Phi-3.5-MoE-instruct-Q4_K_M.gguf sha256: 43e91bb720869bd8a92d8eb86bc3c74a52c49cf61642ca709b3d7bb89644df36 uri: huggingface://bartowski/Phi-3.5-MoE-instruct-GGUF/Phi-3.5-MoE-instruct-Q4_K_M.gguf -- &hermes-2-pro-mistral ### START Hermes +- &hermes-2-pro-mistral ### START Hermes url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" name: "hermes-2-pro-mistral" icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png @@ -10738,7 +10731,7 @@ - filename: "galatolo-Q4_K.gguf" sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172" uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf" -- &codellama ### START Codellama +- &codellama ### START Codellama url: "github:mudler/LocalAI/gallery/codellama.yaml@master" name: "codellama-7b" license: llama2 @@ -10869,7 +10862,7 @@ - filename: "llm-compiler-7b-ftd.Q4_K.gguf" uri: "huggingface://legraphista/llm-compiler-7b-ftd-IMat-GGUF/llm-compiler-7b-ftd.Q4_K.gguf" sha256: d862dd18ed335413787d0ad196522a9902a3c10a6456afdab8721822cb0ddde8 -- &openvino ### START OpenVINO +- &openvino ### START OpenVINO url: "github:mudler/LocalAI/gallery/openvino.yaml@master" name: "openvino-llama-3-8b-instruct-ov-int8" license: llama3 @@ -10983,7 +10976,7 @@ - gpu - embedding - cpu -- &sentencentransformers ### START Embeddings +- &sentencentransformers ### START Embeddings description: | This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity. urls: @@ -10998,7 +10991,7 @@ overrides: parameters: model: all-MiniLM-L6-v2 -- &dreamshaper ### START Image generation +- &dreamshaper ### START Image generation name: dreamshaper icon: https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dd9b038c-bd15-43ab-86ab-66e145ad7ff2/width=450/26072158-132340247-8k%20portrait%20of%20beautiful%20cyborg%20with%20brown%20hair,%20intricate,%20elegant,%20highly%20detailed,%20majestic,%20digital%20photography,%20art%20by%20artg_ed.jpeg license: other @@ -11110,7 +11103,7 @@ - filename: t5xxl_fp16.safetensors sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635 uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors -- &whisper ## Whisper +- &whisper ## Whisper url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" name: "whisper-1" license: "MIT" @@ -11290,7 +11283,7 @@ description: | Stable Diffusion in NCNN with c++, supported txt2img and img2img name: stablediffusion-cpp -- &piper ## Piper TTS +- &piper ## Piper TTS url: github:mudler/LocalAI/gallery/piper.yaml@master name: voice-en-us-kathleen-low icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png From 2f09aa1b850535d2cb820a49c19c9159867c1f0b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 20 Jan 2025 19:04:23 +0100 Subject: [PATCH 064/679] chore(model gallery): add sd-3.5-large-ggml (#4647) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 30 ++++++++++++++++++++++++++++++ gallery/sd-ggml.yaml | 12 ++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 gallery/sd-ggml.yaml diff --git a/gallery/index.yaml b/gallery/index.yaml index 30687062..bcb7866a 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -11028,6 +11028,36 @@ - sd-3 - gpu url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master" +- name: sd-3.5-large-ggml + license: stabilityai-ai-community + url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master" + description: | + Stable Diffusion 3.5 Large is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency. + urls: + - https://huggingface.co/stabilityai/stable-diffusion-3.5-large + - https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF + tags: + - text-to-image + - flux + - gpu + - cpu + icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-large/media/main/sd3.5_large_demo.png + overrides: + parameters: + model: sd3.5_large-Q4_0.gguf + files: + - filename: "sd3.5_large-Q4_0.gguf" + sha256: "c79ed6cdaa7decaca6b05ccc636b956b37c47de9b104c56315ca8ed086347b00" + uri: "huggingface://second-state/stable-diffusion-3.5-large-GGUF/sd3.5_large-Q4_0.gguf" + - filename: clip_g.safetensors + sha256: ec310df2af79c318e24d20511b601a591ca8cd4f1fce1d8dff822a356bcdb1f4 + uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/clip_g.safetensors + - filename: clip_l.safetensors + sha256: 660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd + uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/clip_l.safetensors + - filename: t5xxl-Q5_0.gguf + sha256: f4df16c641a05c4a6ca717068ba3ee312875000f6fac0efbd152915553b5fc3e + uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/t5xxl-Q5_0.gguf - &flux name: flux.1-dev license: flux-1-dev-non-commercial-license diff --git a/gallery/sd-ggml.yaml b/gallery/sd-ggml.yaml new file mode 100644 index 00000000..d819eba8 --- /dev/null +++ b/gallery/sd-ggml.yaml @@ -0,0 +1,12 @@ +--- +name: "sd-ggml" + +config_file: | + backend: stablediffusion-ggml + step: 25 + cfg_scale: 4.5 + options: + - "clip_l_path:clip_l.safetensors" + - "clip_g_path:clip_g.safetensors" + - "t5xxl_path:t5xxl-Q5_0.gguf" + - "sampler:euler" From 14a1e02f4478cef20d723f9fa91f0645c856b7c8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Jan 2025 23:33:40 +0000 Subject: [PATCH 065/679] chore(deps): Bump docs/themes/hugo-theme-relearn from `80e448e` to `8dad5ee` (#4656) chore(deps): Bump docs/themes/hugo-theme-relearn Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `80e448e` to `8dad5ee`. - [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases) - [Commits](https://github.com/McShelby/hugo-theme-relearn/compare/80e448e5bdaa92c87ee0d0d86f1125c8606ebf5f...8dad5ee419e5bb2a0b380aa72d7a7389af4945f6) --- updated-dependencies: - dependency-name: docs/themes/hugo-theme-relearn dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- docs/themes/hugo-theme-relearn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn index 80e448e5..8dad5ee4 160000 --- a/docs/themes/hugo-theme-relearn +++ b/docs/themes/hugo-theme-relearn @@ -1 +1 @@ -Subproject commit 80e448e5bdaa92c87ee0d0d86f1125c8606ebf5f +Subproject commit 8dad5ee419e5bb2a0b380aa72d7a7389af4945f6 From 1a08948e63ce48dd32524cf4f7df88e6b69e639d Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 21 Jan 2025 08:37:13 +0100 Subject: [PATCH 066/679] chore: :arrow_up: Update ggerganov/llama.cpp to `aea8ddd5165d525a449e2fc3839db77a71f4a318` (#4657) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7aaad492..53e5af7e 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=92bc493917d43b83e592349e138b54c90b1c3ea7 +CPPLLAMA_VERSION?=aea8ddd5165d525a449e2fc3839db77a71f4a318 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From b264a91b3f24ed8b2ec4c3161a8405be4e7019ad Mon Sep 17 00:00:00 2001 From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com> Date: Tue, 21 Jan 2025 10:37:05 +0100 Subject: [PATCH 067/679] chore(model gallery): add Deepseek-R1-Distill models (#4646) * chore(model gallery): add Deepseek-R1-Distill-Llama-8b Signed-off-by: Gianluca Boiano * chore(model gallery): add Deepseek-R1-Distill-Qwen-1.5b Signed-off-by: Gianluca Boiano --------- Signed-off-by: Gianluca Boiano --- gallery/index.yaml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index bcb7866a..126bd14a 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2696,6 +2696,23 @@ - filename: Qwentile2.5-32B-Instruct-Q4_K_M.gguf sha256: e476d6e3c15c78fc3f986d7ae8fa35c16116843827f2e6243c05767cef2f3615 uri: huggingface://bartowski/Qwentile2.5-32B-Instruct-GGUF/Qwentile2.5-32B-Instruct-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "deepseek-r1-distill-qwen-1.5b" + icon: "https://avatars.githubusercontent.com/u/148330874" + urls: + - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5b + - https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF + description: | + DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks. + Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing. + By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks. + overrides: + parameters: + model: deepseek-r1-distill-qwen-1.5b-Q4_K_M.gguf + files: + - filename: deepseek-r1-distill-qwen-1.5b-Q4_K_M.gguf + sha256: c2c43b6018cf7700ce0ddee8807deb1a9a26758ef878232f3a142d16df81f0fe + uri: huggingface://unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: @@ -5219,6 +5236,23 @@ - filename: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf sha256: 268390e07edd407ad93ea21a868b7ae995b5950e01cad0db9e1802ae5049d405 uri: huggingface://bartowski/Dolphin3.0-Llama3.1-8B-GGUF/Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf +- !!merge <<: *llama31 + name: "deepseek-r1-distill-llama-8b" + icon: "https://avatars.githubusercontent.com/u/148330874" + urls: + - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B + - https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF + description: | + DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks. + Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing. + By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks. + overrides: + parameters: + model: deepseek-r1-distill-llama-8b-Q4_K_M.gguf + files: + - filename: deepseek-r1-distill-llama-8b-Q4_K_M.gguf + sha256: f8eba201522ab44b79bc54166126bfaf836111ff4cbf2d13c59c3b57da10573b + uri: huggingface://unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf - &deepseek ## Deepseek url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" name: "deepseek-coder-v2-lite-instruct" From 6831719e1e74f5ed0f58c40999bce9a8f4066959 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 21 Jan 2025 15:09:36 +0100 Subject: [PATCH 068/679] chore(model gallery): add deepseek-r1-distill-qwen-7b (#4660) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 126bd14a..c56e37b1 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2713,6 +2713,22 @@ - filename: deepseek-r1-distill-qwen-1.5b-Q4_K_M.gguf sha256: c2c43b6018cf7700ce0ddee8807deb1a9a26758ef878232f3a142d16df81f0fe uri: huggingface://unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf +- !!merge <<: *qwen25 + name: "deepseek-r1-distill-qwen-7b" + urls: + - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B + - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF + description: | + DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks. + Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing. + By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks. + overrides: + parameters: + model: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf + files: + - filename: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf + sha256: 731ece8d06dc7eda6f6572997feb9ee1258db0784827e642909d9b565641937b + uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf - &archfunct license: apache-2.0 tags: From e81ceff6812c43c401c110eafbcc140747266ea2 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 21 Jan 2025 23:04:29 +0100 Subject: [PATCH 069/679] chore: :arrow_up: Update ggerganov/llama.cpp to `6171c9d25820ccf676b243c172868819d882848f` (#4661) :arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 53e5af7e..44959fd3 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=aea8ddd5165d525a449e2fc3839db77a71f4a318 +CPPLLAMA_VERSION?=6171c9d25820ccf676b243c172868819d882848f # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp From 0ec25b8b0743416a7ddd6f66f09dc1d1dd7fe07f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 22 Jan 2025 16:37:20 +0100 Subject: [PATCH 070/679] chore(model gallery): add sd-1.5-ggml and sd-3.5-medium-ggml (#4664) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 58 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index c56e37b1..4ce19bb4 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -11078,6 +11078,62 @@ - sd-3 - gpu url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master" +- name: sd-1.5-ggml + license: creativeml-openrail-m + url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master" + description: | + Stable Diffusion 1.5 + urls: + - https://huggingface.co/second-state/stable-diffusion-v1-5-GGUF + tags: + - text-to-image + - stablediffusion + - gpu + - cpu + overrides: + options: + - "sampler:euler" + parameters: + model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf + files: + - filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf" + sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f" + uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf" +- name: sd-3.5-medium-ggml + license: stabilityai-ai-community + url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master" + description: | + Stable Diffusion 3.5 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency. + urls: + - https://huggingface.co/stabilityai/stable-diffusion-3.5-medium + - https://huggingface.co/second-state/stable-diffusion-3.5-medium-GGUF + tags: + - text-to-image + - stablediffusion + - gpu + - cpu + icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-medium/media/main/sd3.5_medium_demo.jpg + overrides: + options: + - "clip_l_path:clip_l-Q4_0.gguf" + - "clip_g_path:clip_g-Q4_0.gguf" + - "t5xxl_path:t5xxl-Q4_0.gguf" + - "sampler:euler" + parameters: + model: sd3.5_medium-Q4_0.gguf + files: + - filename: "sd3.5_medium-Q4_0.gguf" + sha256: "3bb8c5e9ab0a841117089ed4ed81d885bb85161df2a766b812f829bc55b31adf" + uri: "huggingface://second-state/stable-diffusion-3.5-medium-GGUF/sd3.5_medium-Q4_0.gguf" + - filename: clip_g-Q4_0.gguf + sha256: c142411147e16b7c4b9cc1f5d977cbe596104435d76fde47172d3d35c5e58bb8 + uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_g-Q4_0.gguf + - filename: clip_l-Q4_0.gguf + sha256: f5ad88ae2ac924eb4ac0298b77afa304b5e6014fc0c4128f0e3df40fdfcc0f8a + uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_l-Q4_0.gguf + - filename: t5xxl-Q4_0.gguf + sha256: 987ba47c158b890c274f78fd35324419f50941e846a49789f0977e9fe9d97ab7 + uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/t5xxl-Q4_0.gguf - name: sd-3.5-large-ggml license: stabilityai-ai-community url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master" @@ -11088,7 +11144,7 @@ - https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF tags: - text-to-image - - flux + - stablediffusion - gpu - cpu icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-large/media/main/sd3.5_large_demo.png From 10675ac28e80e990832c650174efec0e0d006838 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 22 Jan 2025 18:07:30 +0100 Subject: [PATCH 071/679] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4d415d16..78267e04 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@

-go-skynet%2FLocalAI | Trendshift +mudler%2FLocalAI | Trendshift

> :bulb: Get help - [ā“FAQ](https://localai.io/faq/) [šŸ’­Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/) From e15d29aba2982d07cb2bfec9267c076d73eab2b5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 22 Jan 2025 19:34:16 +0100 Subject: [PATCH 072/679] chore(stablediffusion-ncn): drop in favor of ggml implementation (#4652) * chore(stablediffusion-ncn): drop in favor of ggml implementation Signed-off-by: Ettore Di Giacinto * chore(ci): drop stablediffusion build Signed-off-by: Ettore Di Giacinto * chore(tests): add Signed-off-by: Ettore Di Giacinto * chore(tests): try to fixup current tests Signed-off-by: Ettore Di Giacinto * Try to fix tests Signed-off-by: Ettore Di Giacinto * Tests improvements Signed-off-by: Ettore Di Giacinto * chore(tests): use quality to specify step Signed-off-by: Ettore Di Giacinto * chore(tests): switch to sd-1.5 also increase prep time for downloading models Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- .devcontainer/docker-compose-devcontainer.yml | 2 +- .env | 6 +- .github/workflows/release.yaml | 35 +---------- .github/workflows/test.yml | 6 +- .vscode/launch.json | 2 +- Dockerfile | 38 +----------- Makefile | 36 +---------- aio/cpu/image-gen.yaml | 59 +++--------------- backend/go/image/stablediffusion/main.go | 21 ------- .../image/stablediffusion/stablediffusion.go | 33 ---------- core/config/backend_config.go | 2 +- core/config/config_test.go | 61 +++++++++++++++++++ core/http/app_test.go | 17 +++--- core/http/endpoints/openai/image.go | 6 +- core/http/endpoints/openai/request.go | 9 +++ core/schema/openai.go | 5 +- pkg/model/initializers.go | 9 +-- pkg/stablediffusion/generate.go | 35 ----------- pkg/stablediffusion/generate_unsupported.go | 10 --- pkg/stablediffusion/stablediffusion.go | 20 ------ tests/e2e-aio/e2e_suite_test.go | 2 +- tests/e2e-aio/e2e_test.go | 11 ++-- 22 files changed, 123 insertions(+), 302 deletions(-) delete mode 100644 backend/go/image/stablediffusion/main.go delete mode 100644 backend/go/image/stablediffusion/stablediffusion.go delete mode 100644 pkg/stablediffusion/generate.go delete mode 100644 pkg/stablediffusion/generate_unsupported.go delete mode 100644 pkg/stablediffusion/stablediffusion.go diff --git a/.devcontainer/docker-compose-devcontainer.yml b/.devcontainer/docker-compose-devcontainer.yml index 8795d64d..7ef22099 100644 --- a/.devcontainer/docker-compose-devcontainer.yml +++ b/.devcontainer/docker-compose-devcontainer.yml @@ -7,7 +7,7 @@ services: args: - FFMPEG=true - IMAGE_TYPE=extras - - GO_TAGS=stablediffusion p2p tts + - GO_TAGS=p2p tts env_file: - ../.env ports: diff --git a/.env b/.env index e92f7f3b..ee8db74e 100644 --- a/.env +++ b/.env @@ -38,12 +38,12 @@ ## Uncomment and set to true to enable rebuilding from source # REBUILD=true -## Enable go tags, available: stablediffusion, tts -## stablediffusion: image generation with stablediffusion +## Enable go tags, available: p2p, tts +## p2p: enable distributed inferencing ## tts: enables text-to-speech with go-piper ## (requires REBUILD=true) # -# GO_TAGS=stablediffusion +# GO_TAGS=p2p ## Path where to store generated images # LOCALAI_IMAGE_PATH=/tmp/generated/images diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 47a69b0f..e133ecb6 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -237,40 +237,7 @@ jobs: detached: true connect-timeout-seconds: 180 limit-access-to-actor: true - build-stablediffusion: - runs-on: ubuntu-latest - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - uses: actions/setup-go@v5 - with: - go-version: '1.21.x' - cache: false - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl - go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af - go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 - - name: Build stablediffusion - run: | - export PATH=$PATH:$GOPATH/bin - make backend-assets/grpc/stablediffusion - mkdir -p release && cp backend-assets/grpc/stablediffusion release - env: - GO_TAGS: stablediffusion - - uses: actions/upload-artifact@v4 - with: - name: stablediffusion - path: release/ - - name: Release - uses: softprops/action-gh-release@v2 - if: startsWith(github.ref, 'refs/tags/') - with: - files: | - release/* + build-macOS-x86_64: runs-on: macos-13 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0ee93afa..444c89fb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -105,9 +105,7 @@ jobs: # Pre-build piper before we start tests in order to have shared libraries in place make sources/go-piper && \ GO_TAGS="tts" make -C sources/go-piper piper.o && \ - sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \ - # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn) - PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build + sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ env: CUDA_VERSION: 12-4 - name: Cache grpc @@ -129,7 +127,7 @@ jobs: cd grpc && cd cmake/build && sudo make --jobs 5 install - name: Test run: | - PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test + PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} uses: mxschmitt/action-tmate@v3.19 diff --git a/.vscode/launch.json b/.vscode/launch.json index 50493421..f5e91508 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -26,7 +26,7 @@ "LOCALAI_P2P": "true", "LOCALAI_FEDERATED": "true" }, - "buildFlags": ["-tags", "stablediffusion p2p tts", "-v"], + "buildFlags": ["-tags", "p2p tts", "-v"], "envFile": "${workspaceFolder}/.env", "cwd": "${workspaceRoot}" } diff --git a/Dockerfile b/Dockerfile index 4ddc921d..8594c2a1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -69,14 +69,10 @@ ENV PATH=/opt/rocm/bin:${PATH} # OpenBLAS requirements and stable diffusion RUN apt-get update && \ apt-get install -y --no-install-recommends \ - libopenblas-dev \ - libopencv-dev && \ + libopenblas-dev && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# Set up OpenCV -RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 - WORKDIR /build ################################### @@ -251,7 +247,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall FROM requirements-drivers AS builder-base -ARG GO_TAGS="stablediffusion tts p2p" +ARG GO_TAGS="tts p2p" ARG GRPC_BACKENDS ARG MAKEFLAGS ARG LD_FLAGS="-s -w" @@ -285,35 +281,12 @@ RUN <