From 68255283451955ca41712850850d75c0a4ff0701 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 20 May 2025 11:39:49 +0200
Subject: [PATCH] chore(model gallery): add smolvlm2-500m-video-instruct

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 6a470e9b..077c1267 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -85,6 +85,27 @@
     - filename: mmproj-SmolVLM2-2.2B-Instruct-Q8_0.gguf
       sha256: ae07ea1facd07dd3230c4483b63e8cda96c6944ad2481f33d531f79e892dd024
       uri: huggingface://ggml-org/SmolVLM2-2.2B-Instruct-GGUF/mmproj-SmolVLM2-2.2B-Instruct-Q8_0.gguf
+- !!merge <<: *smolvlm
+  name: "smolvlm2-500m-video-instruct"
+  icon: https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/SmolVLM2_banner.png
+  urls:
+    - https://huggingface.co/HuggingFaceTB/SmolVLM2-500M-Video-Instruct
+    - https://huggingface.co/ggml-org/SmolVLM2-500M-Video-Instruct-GGUF
+  description: |
+    SmolVLM2-500M-Video is a lightweight multimodal model designed to analyze video content.
+    The model processes videos, images, and text inputs to generate text outputs - whether answering questions about media files, comparing visual content, or transcribing text from images. Despite its compact size, requiring only 1.8GB of GPU RAM for video inference, it delivers robust performance on complex multimodal tasks.
+    This efficiency makes it particularly well-suited for on-device applications where computational resources may be limited.
+  overrides:
+    parameters:
+      model: SmolVLM2-500M-Video-Instruct-f16.gguf
+    mmproj: mmproj-SmolVLM2-500M-Video-Instruct-f16.gguf
+  files:
+    - filename: SmolVLM2-500M-Video-Instruct-f16.gguf
+      sha256: 80f7e3f04bc2d3324ac1a9f52f5776fe13a69912adf74f8e7edacf773d140d77
+      uri: huggingface://ggml-org/SmolVLM2-500M-Video-Instruct-GGUF/SmolVLM2-500M-Video-Instruct-f16.gguf
+    - filename: mmproj-SmolVLM2-500M-Video-Instruct-f16.gguf
+      sha256: b5dc8ebe7cbeab66a5369693960a52515d7824f13d4063ceca78431f2a6b59b0
+      uri: huggingface://ggml-org/SmolVLM2-500M-Video-Instruct-GGUF/mmproj-SmolVLM2-500M-Video-Instruct-f16.gguf
 - &qwen3
   url: "github:mudler/LocalAI/gallery/qwen3.yaml@master"
   name: "qwen3-30b-a3b"