chore(model gallery): add qwen2.5-omni-3b (#5606)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-13 22:35:00 +00:00 · 2025-06-09 10:54:42 +02:00 · 2025-06-09 10:54:42 +02:00 · 6efa97ce0b
commit 6efa97ce0b
parent 41cde5468a
1 changed files with 33 additions and 0 deletions
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@ -7813,6 +7813,39 @@
    - filename: mmproj-Qwen2.5-Omni-7B-Q8_0.gguf
      sha256: 4a7bc5478a2ec8c5d186d63532eb22e75b79ba75ec3c0ce821676157318ef4ad
      uri: https://huggingface.co/ggml-org/Qwen2.5-Omni-7B-GGUF/resolve/main/mmproj-Qwen2.5-Omni-7B-Q8_0.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-omni-3b"
+  tags:
+    - multimodal
+    - gguf
+    - gpu
+    - cpu
+    - qwen2.5
+    - audio-to-text
+    - image-to-text
+    - text-to-text
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-Omni-3B
+    - https://huggingface.co/ggml-org/Qwen2.5-Omni-3B-GGUF
+  description: |
+    Qwen2.5-Omni is an end-to-end multimodal model designed to perceive diverse modalities, including text, images, audio, and video, while simultaneously generating text and natural speech responses in a streaming manner.
+    Modalities:
+    - ✅ Text input
+    - ✅ Audio input
+    - ✅ Image input
+    - ❌ Video input
+    - ❌ Audio generation
+  overrides:
+    mmproj: mmproj-Qwen2.5-Omni-3B-Q8_0.gguf
+    parameters:
+      model: Qwen2.5-Omni-3B-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Omni-3B-Q4_K_M.gguf
+      sha256: 4b0bd358c1e9ec55dd3055ef6d71c958c821533d85916a10cfa89c4552a86e29
+      uri: huggingface://ggml-org/Qwen2.5-Omni-3B-GGUF/Qwen2.5-Omni-3B-Q4_K_M.gguf
+    - filename: mmproj-Qwen2.5-Omni-3B-Q8_0.gguf
+      sha256: 4e6c816cd33f7298d07cb780c136a396631e50e62f6501660271f8c6e302e565
+      uri: https://huggingface.co/ggml-org/Qwen2.5-Omni-3B-GGUF/resolve/main/mmproj-Qwen2.5-Omni-3B-Q8_0.gguf
 - !!merge <<: *qwen25
  name: "open-thoughts_openthinker3-7b"
  icon: https://huggingface.co/datasets/open-thoughts/open-thoughts-114k/resolve/main/open_thoughts.png