chore(model gallery): add smolvlm-256m-instruct (#5412)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-05-20 11:15:09 +02:00 committed by GitHub
parent 996259b529
commit ec21b58008
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 49 additions and 0 deletions

View file

@ -1,4 +1,34 @@
---
- &smolvlm:
url: "github:mudler/LocalAI/gallery/smolvlm.yaml@master"
name: "smolvlm-256m-instruct"
icon: https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/SmolVLM_256_banner.png
urls:
- https://huggingface.co/HuggingFaceTB/SmolVLM-256M-Instruct
- https://huggingface.co/ggml-org/SmolVLM-256M-Instruct-GGUF
license: apache-2.0
description: |
SmolVLM-256M is the smallest multimodal model in the world. It accepts arbitrary sequences of image and text inputs to produce text outputs. It's designed for efficiency. SmolVLM can answer questions about images, describe visual content, or transcribe text. Its lightweight architecture makes it suitable for on-device applications while maintaining strong performance on multimodal tasks. It can run inference on one image with under 1GB of GPU RAM.
tags:
- llm
- gguf
- gpu
- cpu
- vision
- multimodal
- smollvlm
- image-to-text
overrides:
parameters:
model: SmolVLM-256M-Instruct-Q8_0.gguf
mmproj: mmproj-SmolVLM-256M-Instruct-Q8_0.gguf
files:
- filename: mmproj-SmolVLM-256M-Instruct-Q8_0.gguf
sha256: 7e943f7c53f0382a6fc41b6ee0c2def63ba4fded9ab8ed039cc9e2ab905e0edd
uri: huggingface://ggml-org/SmolVLM-256M-Instruct-GGUF/mmproj-SmolVLM-256M-Instruct-Q8_0.gguf
- filename: SmolVLM-256M-Instruct-Q8_0.gguf
sha256: 2a31195d3769c0b0fd0a4906201666108834848db768af11de1d2cef7cd35e65
uri: huggingface://ggml-org/SmolVLM-256M-Instruct-GGUF/SmolVLM-256M-Instruct-Q8_0.gguf
- &qwen3
url: "github:mudler/LocalAI/gallery/qwen3.yaml@master"
name: "qwen3-30b-a3b"

19
gallery/smolvlm.yaml Normal file
View file

@ -0,0 +1,19 @@
---
name: smolvlm
# yamllint disable-line rule:trailing-spaces
config_file: |
mmap: true
template:
chat_message: |
{{if eq .RoleName "assistant"}}Assistant{{else if eq .RoleName "system"}}System{{else if eq .RoleName "user"}}User{{end}}: {{.Content }}<end_of_utterance>
chat: "<|im_start|>\n{{.Input -}}\nAssistant: "
completion: |
{{-.Input}}
f16: true
stopwords:
- '<|im_end|>'
- '<dummy32000>'
- '</s>'
- '<|'
- '<end_of_utterance>'
- '<|endoftext|>'