Merge branch 'master' into update/CPPLLAMA_VERSION

2025-06-26 12:45:01 +00:00 · 2025-03-12 12:56:10 +01:00 · 2025-03-12 12:56:10 +01:00 · 963b21ce82
commit 963b21ce82
parent 4266561597 9f5dcf2d1e
10 changed files with 302 additions and 339 deletions
--- a/aio/cpu/embeddings.yaml
+++ b/aio/cpu/embeddings.yaml
@ -1,7 +1,7 @@
 name: text-embedding-ada-002
 embeddings: true
 name: text-embedding-ada-002
 parameters:
-  model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
+  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
 usage: |
    You can test this model with curl like this:
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@ -1,101 +1,57 @@
 name: gpt-4
 mmap: true
 parameters:
  model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
 context_size: 8192
-
+f16: true
 stopwords:
 - "<|im_end|>"
 - "<dummy32000>"
 - "</tool_call>"
 - "<|eot_id|>"
 - "<|end_of_text|>"
 function:
  # disable injecting the "answer" tool
  disable_no_action: true
  grammar:
-    # This allows the grammar to also return messages
+    no_mixed_free_string: true
-    mixed_mode: true
+    schema_type: llama3.1 # or JSON is supported too (json)
-    # Suffix to add to the grammar
+  response_regex:
-    #prefix: '<tool_call>\n'
+  - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
-    # Force parallel calls in the grammar
+mmap: true
-    # parallel_calls: true
+name: gpt-4
-
+parameters:
-  return_name_in_function_response: true
+  model: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
-  # Without grammar uncomment the lines below
+stopwords:
-  # Warning: this is relying only on the capability of the
+- <|im_end|>
-  # LLM model to generate the correct function call.
+- <dummy32000>
-  json_regex_match: 
+- <|eot_id|>
-   - "(?s)<tool_call>(.*?)</tool_call>"
+- <|end_of_text|>
   - "(?s)<tool_call>(.*?)"
  replace_llm_results:
  # Drop the scratchpad content from responses
  - key: "(?s)<scratchpad>.*</scratchpad>"
    value: ""
  replace_function_results: 
  # Replace everything that is not JSON array or object
  # 
  - key: '(?s)^[^{\[]*'
    value: ""
  - key: '(?s)[^}\]]*$'
    value: ""
  - key: "'([^']*?)'"
    value: "_DQUOTE_${1}_DQUOTE_"
  - key: '\\"'
    value: "__TEMP_QUOTE__"
  - key: "\'"
    value: "'"
  - key: "_DQUOTE_"
    value: '"'
  - key: "__TEMP_QUOTE__"
    value: '"'
  # Drop the scratchpad content from responses
  - key: "(?s)<scratchpad>.*</scratchpad>"
    value: ""
 template:
  chat: |
-    {{.Input -}}
+    <|begin_of_text|><|start_header_id|>system<|end_header_id|>
-    <|im_start|>assistant
+    You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
    {{.Input }}
    <|start_header_id|>assistant<|end_header_id|>
  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+    <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
-    {{- if .FunctionCall }}
+    {{ if .FunctionCall -}}
-    <tool_call>
+    {{ else if eq .RoleName "tool" -}}
-    {{- else if eq .RoleName "tool" }}
+    The Function was executed and the response was:
-    <tool_response>
+    {{ end -}}
-    {{- end }}
+    {{ if .Content -}}
-    {{- if .Content}}
+    {{.Content -}}
-    {{.Content }}
+    {{ else if .FunctionCall -}}
-    {{- end }}
+    {{ range .FunctionCall }}
-    {{- if .FunctionCall}}
+    [{{.FunctionCall.Name}}({{.FunctionCall.Arguments}})]
-    {{toJson .FunctionCall}}
+    {{ end }}
-    {{- end }}
+    {{ end -}}
-    {{- if .FunctionCall }}
+    <|eot_id|>
    </tool_call>
    {{- else if eq .RoleName "tool" }}
    </tool_response>
    {{- end }}<|im_end|>
  completion: |
    {{.Input}}
-  function: |-
+  function: |
-    <|im_start|>system
+    <|start_header_id|>system<|end_header_id|>
-    You are a function calling AI model.
+    You are an expert in composing functions. You are given a question and a set of possible functions.
-    Here are the available tools:
+    Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
-    <tools>
+    If none of the functions can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections.
-    {{range .Functions}}
+    If you decide to invoke any of the function(s), you MUST put it in the format as follows:
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+    [func_name1(params_name1=params_value1,params_name2=params_value2,...),func_name2(params_name1=params_value1,params_name2=params_value2,...)]
-    {{end}}
+    You SHOULD NOT include any other text in the response.
-    </tools>
+    Here is a list of functions in JSON format that you can invoke.
-    You should call the tools provided to you sequentially
+    {{toJson .Functions}}
-    Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
+    <|eot_id|><|start_header_id|>user<|end_header_id|>
-    <scratchpad>
+    {{.Input}}
-    {step-by-step reasoning and plan in bullet points}
+    <|eot_id|><|start_header_id|>assistant<|end_header_id|>
-    </scratchpad>
+
-    For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
+download_files:
-    <tool_call>
+- filename: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
-    {"arguments": <args-dict>, "name": <function-name>}
+  sha256: 2e220a14ba4328fee38cf36c2c068261560f999fadb5725ce5c6d977cb5126b5
-    </tool_call><|im_end|>
+  uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
    {{.Input -}}
    <|im_start|>assistant
--- a/aio/cpu/vision.yaml
+++ b/aio/cpu/vision.yaml
@ -1,31 +1,49 @@
 backend: llama-cpp
 context_size: 4096
 f16: true
 mmap: true
 mmproj: minicpm-v-2_6-mmproj-f16.gguf
 name: gpt-4o
 roles:
  user: "USER:"
  assistant: "ASSISTANT:"
  system: "SYSTEM:"
 mmproj: bakllava-mmproj.gguf
 parameters:
-  model: bakllava.gguf
+  model: minicpm-v-2_6-Q4_K_M.gguf
-
+stopwords:
 - <|im_end|>
 - <dummy32000>
 - </s>
 - <|endoftext|>
 template:
  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+    {{.Input -}}
    <|im_start|>assistant
  chat_message: |
    <|im_start|>{{ .RoleName }}
    {{ if .FunctionCall -}}
    Function call:
    {{ else if eq .RoleName "tool" -}}
    Function response:
    {{ end -}}
    {{ if .Content -}}
    {{.Content }}
    {{ end -}}
    {{ if .FunctionCall -}}
    {{toJson .FunctionCall}}
    {{ end -}}<|im_end|>
  completion: |
    {{.Input}}
-    ASSISTANT:
+  function: |
    <|im_start|>system
    You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
    {{range .Functions}}
    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
    {{end}}
    For each function call return a json object with function name and arguments
    <|im_end|>
    {{.Input -}}
    <|im_start|>assistant
 download_files:
- filename: bakllava.gguf
+- filename: minicpm-v-2_6-Q4_K_M.gguf
-  uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
+  sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
- filename: bakllava-mmproj.gguf
+  uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
-  uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
+- filename: minicpm-v-2_6-mmproj-f16.gguf
-
+  uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
-usage: |
+  sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
        "model": "gpt-4-vision-preview",
        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
--- a/aio/gpu-8g/embeddings.yaml
+++ b/aio/gpu-8g/embeddings.yaml
@ -1,7 +1,7 @@
 embeddings: true
 name: text-embedding-ada-002
 backend: sentencetransformers
 parameters:
-  model: all-MiniLM-L6-v2
+  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
 usage: |
    You can test this model with curl like this:
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@ -1,101 +1,53 @@
-name: gpt-4
+context_size: 4096
-mmap: true
+f16: true
 parameters:
  model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
 context_size: 8192
 stopwords:
 - "<|im_end|>"
 - "<dummy32000>"
 - "</tool_call>"
 - "<|eot_id|>"
 - "<|end_of_text|>"
 function:
-  # disable injecting the "answer" tool
+  capture_llm_results:
-  disable_no_action: true
+  - (?s)<Thought>(.*?)</Thought>
  grammar:
-    # This allows the grammar to also return messages
+    properties_order: name,arguments
-    mixed_mode: true
+  json_regex_match:
-    # Suffix to add to the grammar
+  - (?s)<Output>(.*?)</Output>
    #prefix: '<tool_call>\n'
    # Force parallel calls in the grammar
    # parallel_calls: true
  return_name_in_function_response: true
  # Without grammar uncomment the lines below
  # Warning: this is relying only on the capability of the
  # LLM model to generate the correct function call.
  json_regex_match: 
   - "(?s)<tool_call>(.*?)</tool_call>"
   - "(?s)<tool_call>(.*?)"
  replace_llm_results:
-  # Drop the scratchpad content from responses
+  - key: (?s)<Thought>(.*?)</Thought>
  - key: "(?s)<scratchpad>.*</scratchpad>"
    value: ""
-  replace_function_results: 
+mmap: true
-  # Replace everything that is not JSON array or object
+name: gpt-4
-  # 
+parameters:
-  - key: '(?s)^[^{\[]*'
+  model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
-    value: ""
+stopwords:
-  - key: '(?s)[^}\]]*$'
+- <|im_end|>
-    value: ""
+- <dummy32000>
-  - key: "'([^']*?)'"
+- </s>
    value: "_DQUOTE_${1}_DQUOTE_"
  - key: '\\"'
    value: "__TEMP_QUOTE__"
  - key: "\'"
    value: "'"
  - key: "_DQUOTE_"
    value: '"'
  - key: "__TEMP_QUOTE__"
    value: '"'
  # Drop the scratchpad content from responses
  - key: "(?s)<scratchpad>.*</scratchpad>"
    value: ""
 template:
  chat: |
    {{.Input -}}
    <|im_start|>assistant
  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+    <|im_start|>{{ .RoleName }}
-    {{- if .FunctionCall }}
+    {{ if .FunctionCall -}}
-    <tool_call>
+    Function call:
-    {{- else if eq .RoleName "tool" }}
+    {{ else if eq .RoleName "tool" -}}
-    <tool_response>
+    Function response:
-    {{- end }}
+    {{ end -}}
-    {{- if .Content}}
+    {{ if .Content -}}
    {{.Content }}
-    {{- end }}
+    {{ end -}}
-    {{- if .FunctionCall}}
+    {{ if .FunctionCall -}}
    {{toJson .FunctionCall}}
-    {{- end }}
+    {{ end -}}<|im_end|>
    {{- if .FunctionCall }}
    </tool_call>
    {{- else if eq .RoleName "tool" }}
    </tool_response>
    {{- end }}<|im_end|>
  completion: |
    {{.Input}}
-  function: |-
+  function: |
    <|im_start|>system
-    You are a function calling AI model.
+    You are an AI assistant that executes function calls, and these are the tools at your disposal:
    Here are the available tools:
    <tools>
    {{range .Functions}}
    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
    {{end}}
-    </tools>
+    <|im_end|>
    You should call the tools provided to you sequentially
    Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
    <scratchpad>
    {step-by-step reasoning and plan in bullet points}
    </scratchpad>
    For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
    <tool_call>
    {"arguments": <args-dict>, "name": <function-name>}
    </tool_call><|im_end|>
    {{.Input -}}
-    <|im_start|>assistant
+    <|im_start|>assistant
 download_files:
 - filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
  sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5
  uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf
--- a/aio/gpu-8g/vision.yaml
+++ b/aio/gpu-8g/vision.yaml
@ -1,35 +1,49 @@
 backend: llama-cpp
 context_size: 4096
 f16: true
 mmap: true
 mmproj: minicpm-v-2_6-mmproj-f16.gguf
 name: gpt-4o
 roles:
  user: "USER:"
  assistant: "ASSISTANT:"
  system: "SYSTEM:"
 mmproj: llava-v1.6-7b-mmproj-f16.gguf
 parameters:
-  model: llava-v1.6-mistral-7b.Q5_K_M.gguf
+  model: minicpm-v-2_6-Q4_K_M.gguf
-  temperature: 0.2
+stopwords:
-  top_k: 40
+- <|im_end|>
-  top_p: 0.95
+- <dummy32000>
-  seed: -1
+- </s>
-
+- <|endoftext|>
 template:
  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+    {{.Input -}}
    <|im_start|>assistant
  chat_message: |
    <|im_start|>{{ .RoleName }}
    {{ if .FunctionCall -}}
    Function call:
    {{ else if eq .RoleName "tool" -}}
    Function response:
    {{ end -}}
    {{ if .Content -}}
    {{.Content }}
    {{ end -}}
    {{ if .FunctionCall -}}
    {{toJson .FunctionCall}}
    {{ end -}}<|im_end|>
  completion: |
    {{.Input}}
-    ASSISTANT:
+  function: |
    <|im_start|>system
    You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
    {{range .Functions}}
    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
    {{end}}
    For each function call return a json object with function name and arguments
    <|im_end|>
    {{.Input -}}
    <|im_start|>assistant
 download_files:
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
+- filename: minicpm-v-2_6-Q4_K_M.gguf
-  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
+  sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
- filename: llava-v1.6-7b-mmproj-f16.gguf
+  uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
-  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
+- filename: minicpm-v-2_6-mmproj-f16.gguf
-
+  uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
-usage: |
+  sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
        "model": "gpt-4-vision-preview",
        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
--- a/aio/intel/embeddings.yaml
+++ b/aio/intel/embeddings.yaml
@ -1,7 +1,7 @@
 embeddings: true
 name: text-embedding-ada-002
 backend: sentencetransformers
 parameters:
-  model: all-MiniLM-L6-v2
+  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
 usage: |
    You can test this model with curl like this:
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@ -1,103 +1,53 @@
-name: gpt-4
+context_size: 4096
-mmap: false
+f16: true
 context_size: 8192
 f16: false
 parameters:
  model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
 stopwords:
 - "<|im_end|>"
 - "<dummy32000>"
 - "</tool_call>"
 - "<|eot_id|>"
 - "<|end_of_text|>"
 function:
-  # disable injecting the "answer" tool
+  capture_llm_results:
-  disable_no_action: true
+  - (?s)<Thought>(.*?)</Thought>
  grammar:
-    # This allows the grammar to also return messages
+    properties_order: name,arguments
-    mixed_mode: true
+  json_regex_match:
-    # Suffix to add to the grammar
+  - (?s)<Output>(.*?)</Output>
    #prefix: '<tool_call>\n'
    # Force parallel calls in the grammar
    # parallel_calls: true
  return_name_in_function_response: true
  # Without grammar uncomment the lines below
  # Warning: this is relying only on the capability of the
  # LLM model to generate the correct function call.
  json_regex_match: 
   - "(?s)<tool_call>(.*?)</tool_call>"
   - "(?s)<tool_call>(.*?)"
  replace_llm_results:
-  # Drop the scratchpad content from responses
+  - key: (?s)<Thought>(.*?)</Thought>
  - key: "(?s)<scratchpad>.*</scratchpad>"
    value: ""
-  replace_function_results: 
+mmap: true
-  # Replace everything that is not JSON array or object
+name: gpt-4
-  # 
+parameters:
-  - key: '(?s)^[^{\[]*'
+  model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
-    value: ""
+stopwords:
-  - key: '(?s)[^}\]]*$'
+- <|im_end|>
-    value: ""
+- <dummy32000>
-  - key: "'([^']*?)'"
+- </s>
    value: "_DQUOTE_${1}_DQUOTE_"
  - key: '\\"'
    value: "__TEMP_QUOTE__"
  - key: "\'"
    value: "'"
  - key: "_DQUOTE_"
    value: '"'
  - key: "__TEMP_QUOTE__"
    value: '"'
  # Drop the scratchpad content from responses
  - key: "(?s)<scratchpad>.*</scratchpad>"
    value: ""
 template:
  chat: |
    {{.Input -}}
    <|im_start|>assistant
  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+    <|im_start|>{{ .RoleName }}
-    {{- if .FunctionCall }}
+    {{ if .FunctionCall -}}
-    <tool_call>
+    Function call:
-    {{- else if eq .RoleName "tool" }}
+    {{ else if eq .RoleName "tool" -}}
-    <tool_response>
+    Function response:
-    {{- end }}
+    {{ end -}}
-    {{- if .Content}}
+    {{ if .Content -}}
    {{.Content }}
-    {{- end }}
+    {{ end -}}
-    {{- if .FunctionCall}}
+    {{ if .FunctionCall -}}
    {{toJson .FunctionCall}}
-    {{- end }}
+    {{ end -}}<|im_end|>
    {{- if .FunctionCall }}
    </tool_call>
    {{- else if eq .RoleName "tool" }}
    </tool_response>
    {{- end }}<|im_end|>
  completion: |
    {{.Input}}
-  function: |-
+  function: |
    <|im_start|>system
-    You are a function calling AI model.
+    You are an AI assistant that executes function calls, and these are the tools at your disposal:
    Here are the available tools:
    <tools>
    {{range .Functions}}
    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
    {{end}}
-    </tools>
+    <|im_end|>
    You should call the tools provided to you sequentially
    Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
    <scratchpad>
    {step-by-step reasoning and plan in bullet points}
    </scratchpad>
    For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
    <tool_call>
    {"arguments": <args-dict>, "name": <function-name>}
    </tool_call><|im_end|>
    {{.Input -}}
    <|im_start|>assistant
 download_files:
 - filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
  sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5
  uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf
--- a/aio/intel/vision.yaml
+++ b/aio/intel/vision.yaml
@ -1,35 +1,50 @@
 backend: llama-cpp
 context_size: 4096
-mmap: false
+f16: true
-f16: false
+mmap: true
 mmproj: minicpm-v-2_6-mmproj-f16.gguf
 name: gpt-4o
 roles:
  user: "USER:"
  assistant: "ASSISTANT:"
  system: "SYSTEM:"
 mmproj: llava-v1.6-7b-mmproj-f16.gguf
 parameters:
-  model: llava-v1.6-mistral-7b.Q5_K_M.gguf
+  model: minicpm-v-2_6-Q4_K_M.gguf
-  temperature: 0.2
+stopwords:
-  top_k: 40
+- <|im_end|>
-  top_p: 0.95
+- <dummy32000>
-  seed: -1
+- </s>
-
+- <|endoftext|>
 template:
  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+    {{.Input -}}
    <|im_start|>assistant
  chat_message: |
    <|im_start|>{{ .RoleName }}
    {{ if .FunctionCall -}}
    Function call:
    {{ else if eq .RoleName "tool" -}}
    Function response:
    {{ end -}}
    {{ if .Content -}}
    {{.Content }}
    {{ end -}}
    {{ if .FunctionCall -}}
    {{toJson .FunctionCall}}
    {{ end -}}<|im_end|>
  completion: |
    {{.Input}}
-    ASSISTANT:
+  function: |
    <|im_start|>system
    You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
    {{range .Functions}}
    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
    {{end}}
    For each function call return a json object with function name and arguments
    <|im_end|>
    {{.Input -}}
    <|im_start|>assistant
 download_files:
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
+- filename: minicpm-v-2_6-Q4_K_M.gguf
-  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
+  sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
- filename: llava-v1.6-7b-mmproj-f16.gguf
+  uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
-  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
+- filename: minicpm-v-2_6-mmproj-f16.gguf
-
+  uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
-usage: |
+  sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
        "model": "gpt-4-vision-preview",
        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@ -4527,6 +4527,49 @@
    - filename: OpenPipe_Deductive-Reasoning-Qwen-32B-Q4_K_M.gguf
      sha256: 53a8314e572c60c867da897721d366f183dc6d2193c83a41ff8ad46a2a0692c8
      uri: huggingface://bartowski/OpenPipe_Deductive-Reasoning-Qwen-32B-GGUF/OpenPipe_Deductive-Reasoning-Qwen-32B-Q4_K_M.gguf
 - !!merge <<: *qwen25
  name: "open-r1_olympiccoder-32b"
  urls:
    - https://huggingface.co/open-r1/OlympicCoder-32B
    - https://huggingface.co/bartowski/open-r1_OlympicCoder-32B-GGUF
  description: |
    OlympicCoder-32B is a code mode that achieves very strong performance on competitive coding benchmarks such as LiveCodeBench andthe 2024 International Olympiad in Informatics.
  overrides:
    parameters:
      model: open-r1_OlympicCoder-32B-Q4_K_M.gguf
  files:
    - filename: open-r1_OlympicCoder-32B-Q4_K_M.gguf
      sha256: bb82e4aa2219f655d37c7efad8985582cf3c32de0e0299ecd2f304d32ac39f12
      uri: huggingface://bartowski/open-r1_OlympicCoder-32B-GGUF/open-r1_OlympicCoder-32B-Q4_K_M.gguf
 - !!merge <<: *qwen25
  name: "open-r1_olympiccoder-7b"
  urls:
    - https://huggingface.co/open-r1/OlympicCoder-7B
    - https://huggingface.co/bartowski/open-r1_OlympicCoder-7B-GGUF
  description: |
    OlympicCoder-7B is a code model that achieves strong performance on competitive coding benchmarks such as LiveCodeBench and the 2024 International Olympiad in Informatics.
  overrides:
    parameters:
      model: open-r1_OlympicCoder-7B-Q4_K_M.gguf
  files:
    - filename: open-r1_OlympicCoder-7B-Q4_K_M.gguf
      sha256: 21e18e7fd1fb244455a67d4dee538a4d86dc96d507c39a4ad16ef335fb9e6e2f
      uri: huggingface://bartowski/open-r1_OlympicCoder-7B-GGUF/open-r1_OlympicCoder-7B-Q4_K_M.gguf
 - !!merge <<: *qwen25
  name: "trashpanda-org_qwq-32b-snowdrop-v0"
  icon: https://cdn-uploads.huggingface.co/production/uploads/675a77cf99ca23af9daacccc/Tdn0PJBFnG3J6UcjO9G94.png
  urls:
    - https://huggingface.co/trashpanda-org/QwQ-32B-Snowdrop-v0
    - https://huggingface.co/bartowski/trashpanda-org_QwQ-32B-Snowdrop-v0-GGUF
  description: |
    R1 at home for RP, literally. Able to handle my cards with gimmicks and subtle tricks in them. With a good reasoning starter+prompt, I'm getting consistently-structured responses that have a good amount of variation across them still while rerolling. Char/scenario portrayal is good despite my focus on writing style, lorebooks are properly referenced at times. Slop doesn't seem to be too much of an issue with thinking enabled. Some user impersonation is rarely observed. Prose is refreshing if you take advantage of what I did (writing style fixation). I know I said Marigold would be my daily driver, but this one is that now, it's that good.
  overrides:
    parameters:
      model: trashpanda-org_QwQ-32B-Snowdrop-v0-Q4_K_M.gguf
  files:
    - filename: trashpanda-org_QwQ-32B-Snowdrop-v0-Q4_K_M.gguf
      sha256: 584d2f14f2f08ce499665c332bef30245b605ed2278e9075766237835f564c5f
      uri: huggingface://bartowski/trashpanda-org_QwQ-32B-Snowdrop-v0-GGUF/trashpanda-org_QwQ-32B-Snowdrop-v0-Q4_K_M.gguf
 - &llama31
  url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" ## LLama3.1
  icon: https://avatars.githubusercontent.com/u/153379578
@ -9056,6 +9099,21 @@
    - filename: TheDrummer_Gemmasutra-Pro-27B-v1.1-Q4_K_M.gguf
      sha256: 218a14f0bf8266f9e77d16b8b4f5cc1dc76e97eb582a2c97cca5a3a2c35de86b
      uri: huggingface://bartowski/TheDrummer_Gemmasutra-Pro-27B-v1.1-GGUF/TheDrummer_Gemmasutra-Pro-27B-v1.1-Q4_K_M.gguf
 - !!merge <<: *gemma
  name: "thedrummer_gemmasutra-small-4b-v1"
  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/Cekk7d2UAKu7LPsw8SxV7.png
  urls:
    - https://huggingface.co/TheDrummer/Gemmasutra-Small-4B-v1
    - https://huggingface.co/bartowski/TheDrummer_Gemmasutra-Small-4B-v1-GGUF
  description: |
    An upscaled Gemma 2B tune with modern techniques. Au Revoir, Gemma!
  overrides:
    parameters:
      model: TheDrummer_Gemmasutra-Small-4B-v1-Q4_K_M.gguf
  files:
    - filename: TheDrummer_Gemmasutra-Small-4B-v1-Q4_K_M.gguf
      sha256: 81dd2e2d9546f5dc2150c45c62acabc112068b801ca50b79feceabb1bd4d6f1a
      uri: huggingface://bartowski/TheDrummer_Gemmasutra-Small-4B-v1-GGUF/TheDrummer_Gemmasutra-Small-4B-v1-Q4_K_M.gguf
 - &llama3
  url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
  icon: https://avatars.githubusercontent.com/u/153379578