feat(functions/aio): all-in-one images, function template enhancements (#1862)

* feat(startup): allow to specify models from local files * feat(aio): add Dockerfile, make targets, aio profiles * feat(template): add Function and LastMessage * add hermes2-pro-mistral * update hermes2 definition * feat(template): add sprig * feat(template): expose FunctionCall * feat(aio): switch llm for text
2025-06-01 16:34:59 +00:00 · 2024-03-21 01:12:20 +01:00 · 2024-03-21 01:12:20 +01:00 · e533dcf506
commit e533dcf506
parent eeaf8c7ccd
20 changed files with 462 additions and 2 deletions
--- a/aio/cpu/vision.yaml
+++ b/aio/cpu/vision.yaml
@ -0,0 +1,40 @@
+backend: llama-cpp
+context_size: 4096
+f16: true
+
+gpu_layers: 90
+mmap: true
+name: llava
+
+roles:
+  user: "USER:"
+  assistant: "ASSISTANT:"
+  system: "SYSTEM:"
+
+mmproj: bakllava-mmproj.gguf
+parameters:
+  model: bakllava.gguf
+  temperature: 0.2
+  top_k: 40
+  top_p: 0.95
+  seed: -1
+mirostat: 2
+mirostat_eta: 1.0
+mirostat_tau: 1.0
+
+template:
+  chat: |
+    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+    {{.Input}}
+    ASSISTANT:
+
+download_files:
+- filename: bakllava.gguf
+  uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
+- filename: bakllava-mmproj.gguf
+  uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
+
+usage: |
+    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+        "model": "llava",
+        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'