diff --git a/.env b/.env index c067f7b8..8e0b0500 100644 --- a/.env +++ b/.env @@ -66,4 +66,7 @@ MODELS_PATH=/models ### Python backends GRPC max workers ### Default number of workers for GRPC Python backends. ### This actually controls wether a backend can process multiple requests or not. -# PYTHON_GRPC_MAX_WORKERS=1 \ No newline at end of file +# PYTHON_GRPC_MAX_WORKERS=1 + +### Define the number of parallel LLAMA.cpp workers (Defaults to 1) +# LLAMACPP_PARALLEL=1 \ No newline at end of file diff --git a/examples/configurations/llava/README.md b/examples/configurations/llava/README.md new file mode 100644 index 00000000..9e39a32c --- /dev/null +++ b/examples/configurations/llava/README.md @@ -0,0 +1,17 @@ + +## Setup + +``` +mkdir models +wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q4_k.gguf -O models/ggml-model-q4_k.gguf +wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf -O models/mmproj-model-f16.gguf +docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:master --models-path /models --threads 4 +``` + +## Try it out + +``` +curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llava", + "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' +``` \ No newline at end of file diff --git a/examples/configurations/llava/chat-simple.tmpl b/examples/configurations/llava/chat-simple.tmpl new file mode 100644 index 00000000..5fe36767 --- /dev/null +++ b/examples/configurations/llava/chat-simple.tmpl @@ -0,0 +1,3 @@ +A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. +{{.Input}} +ASSISTANT: \ No newline at end of file diff --git a/examples/configurations/llava/llava.yaml b/examples/configurations/llava/llava.yaml new file mode 100644 index 00000000..dbff1a25 --- /dev/null +++ b/examples/configurations/llava/llava.yaml @@ -0,0 +1,34 @@ + +context_size: 4096 +#mirostat: 2 +#mirostat_tau: 5.0 +#mirostat_eta: 0.1 +f16: true +#low_vram: true +threads: 11 +gpu_layers: 90 + +name: llava +mmap: true +backend: llama-cpp +roles: + user: "USER:" + assistant: "ASSISTANT:" + + system: "SYSTEM:" +parameters: + #model: openbuddy-llama2-34b-v11.1-bf16.Q3_K_S.gguf + #model: openbuddy-mistral-7b-v13.Q6_K.gguf + model: ggml-model-q4_k.gguf + #model: openbuddy-llama2-13b-v11.1.Q6_K.gguf + #model: openbuddy-llama2-34b-v11.1-bf16.Q4_K_S.gguf + #model: llama2-22b-daydreamer-v3.ggmlv3.q6_K.bin + + temperature: 0.2 + + top_k: 40 + top_p: 0.95 + #ngqa: 8 +template: + chat: chat-simple +mmproj: mmproj-model-f16.gguf \ No newline at end of file