Add examples/config defaults

2025-05-25 13:04:59 +00:00 · 2023-11-10 18:34:19 +01:00 · 2023-11-10 18:34:19 +01:00 · f995f23042
commit f995f23042
parent 8254df3f4c
4 changed files with 58 additions and 1 deletions
--- a/.env
+++ b/.env
@ -66,4 +66,7 @@ MODELS_PATH=/models
 ### Python backends GRPC max workers
 ### Default number of workers for GRPC Python backends.
 ### This actually controls wether a backend can process multiple requests or not.
-# PYTHON_GRPC_MAX_WORKERS=1
+# PYTHON_GRPC_MAX_WORKERS=1
+
+### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
+# LLAMACPP_PARALLEL=1
--- a/examples/configurations/llava/README.md
+++ b/examples/configurations/llava/README.md
@ -0,0 +1,17 @@
+
+## Setup
+
+```
+mkdir models
+wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q4_k.gguf -O models/ggml-model-q4_k.gguf
+wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf -O models/mmproj-model-f16.gguf
+docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:master --models-path /models --threads 4
+```
+
+## Try it out
+
+```
+curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+     "model": "llava",
+     "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
+```
--- a/examples/configurations/llava/chat-simple.tmpl
+++ b/examples/configurations/llava/chat-simple.tmpl
@ -0,0 +1,3 @@
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+{{.Input}}
+ASSISTANT:
--- a/examples/configurations/llava/llava.yaml
+++ b/examples/configurations/llava/llava.yaml
@ -0,0 +1,34 @@
+
+context_size: 4096
+#mirostat: 2
+#mirostat_tau: 5.0
+#mirostat_eta: 0.1
+f16: true
+#low_vram: true
+threads: 11
+gpu_layers: 90
+
+name: llava
+mmap: true
+backend: llama-cpp
+roles:
+  user: "USER:"
+  assistant: "ASSISTANT:"
+
+  system: "SYSTEM:"
+parameters:
+  #model: openbuddy-llama2-34b-v11.1-bf16.Q3_K_S.gguf
+  #model: openbuddy-mistral-7b-v13.Q6_K.gguf
+  model: ggml-model-q4_k.gguf
+  #model: openbuddy-llama2-13b-v11.1.Q6_K.gguf
+  #model: openbuddy-llama2-34b-v11.1-bf16.Q4_K_S.gguf
+  #model: llama2-22b-daydreamer-v3.ggmlv3.q6_K.bin
+
+  temperature: 0.2
+
+  top_k: 40
+  top_p: 0.95
+  #ngqa: 8
+template:
+  chat: chat-simple
+mmproj: mmproj-model-f16.gguf