From 68255283451955ca41712850850d75c0a4ff0701 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 20 May 2025 11:39:49 +0200 Subject: [PATCH] chore(model gallery): add smolvlm2-500m-video-instruct Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 6a470e9b..077c1267 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -85,6 +85,27 @@ - filename: mmproj-SmolVLM2-2.2B-Instruct-Q8_0.gguf sha256: ae07ea1facd07dd3230c4483b63e8cda96c6944ad2481f33d531f79e892dd024 uri: huggingface://ggml-org/SmolVLM2-2.2B-Instruct-GGUF/mmproj-SmolVLM2-2.2B-Instruct-Q8_0.gguf +- !!merge <<: *smolvlm + name: "smolvlm2-500m-video-instruct" + icon: https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/SmolVLM2_banner.png + urls: + - https://huggingface.co/HuggingFaceTB/SmolVLM2-500M-Video-Instruct + - https://huggingface.co/ggml-org/SmolVLM2-500M-Video-Instruct-GGUF + description: | + SmolVLM2-500M-Video is a lightweight multimodal model designed to analyze video content. + The model processes videos, images, and text inputs to generate text outputs - whether answering questions about media files, comparing visual content, or transcribing text from images. Despite its compact size, requiring only 1.8GB of GPU RAM for video inference, it delivers robust performance on complex multimodal tasks. + This efficiency makes it particularly well-suited for on-device applications where computational resources may be limited. + overrides: + parameters: + model: SmolVLM2-500M-Video-Instruct-f16.gguf + mmproj: mmproj-SmolVLM2-500M-Video-Instruct-f16.gguf + files: + - filename: SmolVLM2-500M-Video-Instruct-f16.gguf + sha256: 80f7e3f04bc2d3324ac1a9f52f5776fe13a69912adf74f8e7edacf773d140d77 + uri: huggingface://ggml-org/SmolVLM2-500M-Video-Instruct-GGUF/SmolVLM2-500M-Video-Instruct-f16.gguf + - filename: mmproj-SmolVLM2-500M-Video-Instruct-f16.gguf + sha256: b5dc8ebe7cbeab66a5369693960a52515d7824f13d4063ceca78431f2a6b59b0 + uri: huggingface://ggml-org/SmolVLM2-500M-Video-Instruct-GGUF/mmproj-SmolVLM2-500M-Video-Instruct-f16.gguf - &qwen3 url: "github:mudler/LocalAI/gallery/qwen3.yaml@master" name: "qwen3-30b-a3b"