mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 02:24:59 +00:00
feat(functions/aio): all-in-one images, function template enhancements (#1862)
* feat(startup): allow to specify models from local files * feat(aio): add Dockerfile, make targets, aio profiles * feat(template): add Function and LastMessage * add hermes2-pro-mistral * update hermes2 definition * feat(template): add sprig * feat(template): expose FunctionCall * feat(aio): switch llm for text
This commit is contained in:
parent
eeaf8c7ccd
commit
e533dcf506
20 changed files with 462 additions and 2 deletions
13
aio/cpu/embeddings.yaml
Normal file
13
aio/cpu/embeddings.yaml
Normal file
|
@ -0,0 +1,13 @@
|
|||
name: all-minilm-l6-v2
|
||||
backend: sentencetransformers
|
||||
embeddings: true
|
||||
parameters:
|
||||
model: all-MiniLM-L6-v2
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
||||
"input": "Your text string goes here",
|
||||
"model": "all-minilm-l6-v2"
|
||||
}'
|
53
aio/cpu/image-gen.yaml
Normal file
53
aio/cpu/image-gen.yaml
Normal file
|
@ -0,0 +1,53 @@
|
|||
name: stablediffusion
|
||||
backend: stablediffusion
|
||||
parameters:
|
||||
model: stablediffusion_assets
|
||||
|
||||
license: "BSD-3"
|
||||
urls:
|
||||
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
||||
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
|
||||
|
||||
description: |
|
||||
Stable Diffusion in NCNN with c++, supported txt2img and img2img
|
||||
|
||||
download_files:
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
|
||||
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
|
||||
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
|
||||
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
|
||||
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
|
||||
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
|
||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
|
||||
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
|
||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
|
||||
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
|
||||
- filename: "stablediffusion_assets/log_sigmas.bin"
|
||||
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
|
||||
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
|
||||
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
|
||||
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
|
||||
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
|
||||
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
|
||||
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
|
||||
- filename: "stablediffusion_assets/vocab.txt"
|
||||
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
|
18
aio/cpu/speech-to-text.yaml
Normal file
18
aio/cpu/speech-to-text.yaml
Normal file
|
@ -0,0 +1,18 @@
|
|||
name: whisper
|
||||
backend: whisper
|
||||
parameters:
|
||||
model: ggml-whisper-base.bin
|
||||
|
||||
usage: |
|
||||
## example audio file
|
||||
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
||||
|
||||
## Send the example audio file to the transcriptions endpoint
|
||||
curl http://localhost:8080/v1/audio/transcriptions \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F file="@$PWD/gb1.ogg" -F model="whisper"
|
||||
|
||||
download_files:
|
||||
- filename: "ggml-whisper-base.bin"
|
||||
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
|
||||
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
|
15
aio/cpu/text-to-speech.yaml
Normal file
15
aio/cpu/text-to-speech.yaml
Normal file
|
@ -0,0 +1,15 @@
|
|||
name: voice-en-us-amy-low
|
||||
download_files:
|
||||
- filename: voice-en-us-amy-low.tar.gz
|
||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
||||
|
||||
parameters:
|
||||
model: en-us-amy-low.onnx
|
||||
|
||||
usage: |
|
||||
To test if this model works as expected, you can use the following curl command:
|
||||
|
||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
||||
"model":"voice-en-us-amy-low",
|
||||
"input": "Hi, this is a test."
|
||||
}'
|
22
aio/cpu/text-to-text.yaml
Normal file
22
aio/cpu/text-to-text.yaml
Normal file
|
@ -0,0 +1,22 @@
|
|||
name: gpt-3.5-turbo
|
||||
context_size: 2048
|
||||
f16: true
|
||||
gpu_layers: 90
|
||||
mmap: true
|
||||
trimsuffix:
|
||||
- "\n"
|
||||
parameters:
|
||||
model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
|
||||
|
||||
template:
|
||||
chat: &template |-
|
||||
Instruct: {{.Input}}
|
||||
Output:
|
||||
completion: *template
|
||||
|
||||
usage: |
|
||||
To use this model, interact with the API (in another terminal) with curl for instance:
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "phi-2",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
40
aio/cpu/vision.yaml
Normal file
40
aio/cpu/vision.yaml
Normal file
|
@ -0,0 +1,40 @@
|
|||
backend: llama-cpp
|
||||
context_size: 4096
|
||||
f16: true
|
||||
|
||||
gpu_layers: 90
|
||||
mmap: true
|
||||
name: llava
|
||||
|
||||
roles:
|
||||
user: "USER:"
|
||||
assistant: "ASSISTANT:"
|
||||
system: "SYSTEM:"
|
||||
|
||||
mmproj: bakllava-mmproj.gguf
|
||||
parameters:
|
||||
model: bakllava.gguf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
seed: -1
|
||||
mirostat: 2
|
||||
mirostat_eta: 1.0
|
||||
mirostat_tau: 1.0
|
||||
|
||||
template:
|
||||
chat: |
|
||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||
{{.Input}}
|
||||
ASSISTANT:
|
||||
|
||||
download_files:
|
||||
- filename: bakllava.gguf
|
||||
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
|
||||
- filename: bakllava-mmproj.gguf
|
||||
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "llava",
|
||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
13
aio/gpu-8g/embeddings.yaml
Normal file
13
aio/gpu-8g/embeddings.yaml
Normal file
|
@ -0,0 +1,13 @@
|
|||
name: all-minilm-l6-v2
|
||||
backend: sentencetransformers
|
||||
embeddings: true
|
||||
parameters:
|
||||
model: all-MiniLM-L6-v2
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
||||
"input": "Your text string goes here",
|
||||
"model": "all-minilm-l6-v2"
|
||||
}'
|
22
aio/gpu-8g/image-gen.yaml
Normal file
22
aio/gpu-8g/image-gen.yaml
Normal file
|
@ -0,0 +1,22 @@
|
|||
name: dreamshaper
|
||||
parameters:
|
||||
model: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
|
||||
backend: diffusers
|
||||
step: 25
|
||||
f16: true
|
||||
cuda: true
|
||||
diffusers:
|
||||
pipeline_type: StableDiffusionPipeline
|
||||
cuda: true
|
||||
enable_parameters: "negative_prompt,num_inference_steps"
|
||||
scheduler_type: "k_dpmpp_2m"
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/images/generations \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"prompt": "<positive prompt>|<negative prompt>",
|
||||
"model": "dreamshaper",
|
||||
"step": 25,
|
||||
"size": "512x512"
|
||||
}'
|
18
aio/gpu-8g/speech-to-text.yaml
Normal file
18
aio/gpu-8g/speech-to-text.yaml
Normal file
|
@ -0,0 +1,18 @@
|
|||
name: whisper
|
||||
backend: whisper
|
||||
parameters:
|
||||
model: ggml-whisper-base.bin
|
||||
|
||||
usage: |
|
||||
## example audio file
|
||||
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
||||
|
||||
## Send the example audio file to the transcriptions endpoint
|
||||
curl http://localhost:8080/v1/audio/transcriptions \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F file="@$PWD/gb1.ogg" -F model="whisper"
|
||||
|
||||
download_files:
|
||||
- filename: "ggml-whisper-base.bin"
|
||||
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
|
||||
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
|
15
aio/gpu-8g/text-to-speech.yaml
Normal file
15
aio/gpu-8g/text-to-speech.yaml
Normal file
|
@ -0,0 +1,15 @@
|
|||
name: voice-en-us-amy-low
|
||||
download_files:
|
||||
- filename: voice-en-us-amy-low.tar.gz
|
||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
||||
|
||||
parameters:
|
||||
model: en-us-amy-low.onnx
|
||||
|
||||
usage: |
|
||||
To test if this model works as expected, you can use the following curl command:
|
||||
|
||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
||||
"model":"voice-en-us-amy-low",
|
||||
"input": "Hi, this is a test."
|
||||
}'
|
51
aio/gpu-8g/text-to-text.yaml
Normal file
51
aio/gpu-8g/text-to-text.yaml
Normal file
|
@ -0,0 +1,51 @@
|
|||
name: gpt-3.5-turbo
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
|
||||
|
||||
roles:
|
||||
assistant_function_call: assistant
|
||||
function: tool
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{ if eq .RoleName "assistant_function_call" }}<tool_call>{{end}}
|
||||
{{ if eq .RoleName "function" }}<tool_result>{{end}}
|
||||
{{if .Content}}{{.Content}}{{end}}
|
||||
{{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
|
||||
{{ if eq .RoleName "assistant_function_call" }}</tool_call>{{end}}
|
||||
{{ if eq .RoleName "function" }}</tool_result>{{end}}
|
||||
<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
<|im_start|>system
|
||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
<tools>
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
Use the following pydantic model json schema for each tool call you will make:
|
||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
</tool_call><|im_end|>
|
||||
{{.Input}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
chat: |
|
||||
{{.Input}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "hermes-2-pro-mistral",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
40
aio/gpu-8g/vision.yaml
Normal file
40
aio/gpu-8g/vision.yaml
Normal file
|
@ -0,0 +1,40 @@
|
|||
backend: llama-cpp
|
||||
context_size: 4096
|
||||
f16: true
|
||||
|
||||
gpu_layers: 90
|
||||
mmap: true
|
||||
name: llava
|
||||
|
||||
roles:
|
||||
user: "USER:"
|
||||
assistant: "ASSISTANT:"
|
||||
system: "SYSTEM:"
|
||||
|
||||
mmproj: bakllava-mmproj.gguf
|
||||
parameters:
|
||||
model: bakllava.gguf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
seed: -1
|
||||
mirostat: 2
|
||||
mirostat_eta: 1.0
|
||||
mirostat_tau: 1.0
|
||||
|
||||
template:
|
||||
chat: |
|
||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||
{{.Input}}
|
||||
ASSISTANT:
|
||||
|
||||
download_files:
|
||||
- filename: bakllava.gguf
|
||||
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
|
||||
- filename: bakllava-mmproj.gguf
|
||||
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "llava",
|
||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
Loading…
Add table
Add a link
Reference in a new issue