models(gallery): add phi-3 vision (#3890)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 10:35:01 +00:00 · 2024-10-21 11:47:52 +02:00 · 2024-10-21 11:47:52 +02:00 · 015835dba2
commit 015835dba2
parent 313ea2c4d2
2 changed files with 36 additions and 0 deletions
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@ -6225,6 +6225,19 @@
    - filename: Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf
      sha256: 9579305712f2bca246914639c4873acdc1e7bc64ac2c7db0230df4f0ca0ef234
      uri: huggingface://mradermacher/Phi-3.5-mini-TitanFusion-0.2-GGUF/Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf
 - !!merge <<: *phi-3
  name: "phi-3-vision:vllm"
  url: "github:mudler/LocalAI/gallery/phi-3-vision.yaml@master"
  description: |
    Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures.
 - !!merge <<: *phi-3
  name: "phi-3.5-vision:vllm"
  url: "github:mudler/LocalAI/gallery/phi-3-vision.yaml@master"
  override:
    parameters:
      model: microsoft/Phi-3.5-vision-instruct
  description: |
    Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures.
 - &hermes-2-pro-mistral
  ### START Hermes
  url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
--- a/gallery/phi-3-vision.yaml
+++ b/gallery/phi-3-vision.yaml
@ -0,0 +1,23 @@
 ---
 name: "phi3-vision"
 config_file: |
  name: phi3-vision
  backend: vllm
  parameters:
    model: microsoft/Phi-3-vision-128k-instruct
  trust_remote_code: true
  max_model_len: 32768
  template:
    chat_message: |-
        <|{{ .RoleName }}|>
        {{.Content}}<|end|>
    chat: >-
      {{.Input}}
      <|assistant|>
    completion: |
        {{.Input}}
    use_tokenizer_template: false
    image: "<|image_{{ add1 .ID }}|>\n{{.Text}}"