mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
transformers: correctly load automodels (#1643)
* backends(transformers): use AutoModel with LLM types * examples: animagine-xl * Add codellama examples
This commit is contained in:
parent
3733250b3c
commit
cb7512734d
27 changed files with 1144 additions and 569 deletions
17
embedded/models/animagine-xl.yaml
Normal file
17
embedded/models/animagine-xl.yaml
Normal file
|
@ -0,0 +1,17 @@
|
|||
name: animagine-xl
|
||||
parameters:
|
||||
model: Linaqruf/animagine-xl
|
||||
backend: diffusers
|
||||
f16: true
|
||||
diffusers:
|
||||
scheduler_type: euler_a
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/images/generations \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"prompt": "<positive prompt>|<negative prompt>",
|
||||
"model": "animagine-xl",
|
||||
"step": 51,
|
||||
"size": "1024x1024"
|
||||
}'
|
16
embedded/models/codellama-7b-gguf.yaml
Normal file
16
embedded/models/codellama-7b-gguf.yaml
Normal file
|
@ -0,0 +1,16 @@
|
|||
name: codellama-7b-gguf
|
||||
backend: transformers
|
||||
parameters:
|
||||
model: huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_K_M.gguf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
seed: -1
|
||||
top_p: 0.95
|
||||
context_size: 4096
|
||||
f16: true
|
||||
gpu_layers: 90
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "codellama-7b-gguf",
|
||||
"prompt": "import socket\n\ndef ping_exponential_backoff(host: str):"
|
||||
}'
|
14
embedded/models/codellama-7b.yaml
Normal file
14
embedded/models/codellama-7b.yaml
Normal file
|
@ -0,0 +1,14 @@
|
|||
name: codellama-7b
|
||||
backend: transformers
|
||||
parameters:
|
||||
model: codellama/CodeLlama-7b-hf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
seed: -1
|
||||
top_p: 0.95
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "codellama-7b",
|
||||
"prompt": "import socket\n\ndef ping_exponential_backoff(host: str):"
|
||||
}'
|
|
@ -1,7 +1,7 @@
|
|||
name: dolphin-mixtral-8x7b
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/blob/main/dolphin-2.5-mixtral-8x7b.Q2_K.gguf
|
||||
model: huggingface://TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/dolphin-2.5-mixtral-8x7b.Q2_K.gguf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
|
|
32
embedded/models/transformers-tinyllama.yaml
Normal file
32
embedded/models/transformers-tinyllama.yaml
Normal file
|
@ -0,0 +1,32 @@
|
|||
name: tinyllama-chat
|
||||
backend: transformers
|
||||
type: AutoModelForCausalLM
|
||||
|
||||
parameters:
|
||||
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
seed: -1
|
||||
top_p: 0.95
|
||||
max_tokens: 4096
|
||||
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{if .Content}}{{.Content}}{{end}}<|im_end|>
|
||||
chat: |
|
||||
{{.Input}}
|
||||
<|im_start|>assistant
|
||||
|
||||
completion: |
|
||||
{{.Input}}
|
||||
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "tinyllama-chat",
|
||||
"messages": [{"role": "user", "content": "Say this is a test!"}],
|
||||
"temperature": 0.7
|
||||
}'
|
Loading…
Add table
Add a link
Reference in a new issue