feat(aio): add tests, update model definitions (#1880)

2025-05-20 02:24:59 +00:00 · 2024-03-22 21:13:11 +01:00 · 2024-03-22 21:13:11 +01:00 · 4b1ee0c170
commit 4b1ee0c170
parent 3bec467a91
16 changed files with 461 additions and 40 deletions
--- a/aio/cpu/speech-to-text.yaml
+++ b/aio/cpu/speech-to-text.yaml
@ -1,4 +1,4 @@
-name: whisper
+name: whisper-1
 backend: whisper
 parameters:
  model: ggml-whisper-base.bin
@ -10,7 +10,7 @@ usage: |
    ## Send the example audio file to the transcriptions endpoint
    curl http://localhost:8080/v1/audio/transcriptions \
         -H "Content-Type: multipart/form-data" \
-         -F file="@$PWD/gb1.ogg" -F model="whisper"
+         -F file="@$PWD/gb1.ogg" -F model="whisper-1"

 download_files:
 - filename: "ggml-whisper-base.bin"
--- a/aio/cpu/text-to-speech.yaml
+++ b/aio/cpu/text-to-speech.yaml
@ -1,4 +1,4 @@
-name: voice-en-us-amy-low
+name: tts-1
 download_files:
  - filename: voice-en-us-amy-low.tar.gz
    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@ -1,22 +1,25 @@
-name: gpt-3.5-turbo
-context_size: 2048
-f16: true
-gpu_layers: 90
+name: gpt-4
 mmap: true
-trimsuffix: 
- "\n"
 parameters:
-  model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
+  model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf

 template:
-  chat: &template |-
-    Instruct: {{.Input}}
-    Output:
-  completion: *template
-
+  chat_message: |
+    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
+    {{if .Content}}{{.Content}}{{end}}
+    <|im_end|>
+  chat: |
+    {{.Input}}
+    <|im_start|>assistant
+  completion: |
+    {{.Input}}
+context_size: 2048
+f16: true
+stopwords:
+- <|im_end|>
+- <dummy32000>
 usage: |
-      To use this model, interact with the API (in another terminal) with curl for instance:
      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "phi-2",
+          "model": "phi-2-chat",
          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
      }'
--- a/aio/cpu/vision.yaml
+++ b/aio/cpu/vision.yaml
@ -4,7 +4,7 @@ f16: true

 gpu_layers: 90
 mmap: true
-name: llava
+name: gpt-4-vision-preview

 roles:
  user: "USER:"
@ -36,5 +36,5 @@ download_files:

 usage: |
    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "llava",
+        "model": "gpt-4-vision-preview",
        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@ -88,8 +88,8 @@ function check_vars() {
 detect_gpu
 detect_gpu_size

-SIZE=${SIZE:-$GPU_SIZE} # default to cpu
-MODELS=${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml}
+SIZE="${SIZE:-$GPU_SIZE}" # default to cpu
+export MODELS="${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml}"

 check_vars

--- a/aio/gpu-8g/image-gen.yaml
+++ b/aio/gpu-8g/image-gen.yaml
@ -1,4 +1,4 @@
-name: dreamshaper
+name: stablediffusion
 parameters:
  model: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
 backend: diffusers
--- a/aio/gpu-8g/speech-to-text.yaml
+++ b/aio/gpu-8g/speech-to-text.yaml
@ -1,4 +1,4 @@
-name: whisper
+name: whisper-1
 backend: whisper
 parameters:
  model: ggml-whisper-base.bin
@ -10,7 +10,7 @@ usage: |
    ## Send the example audio file to the transcriptions endpoint
    curl http://localhost:8080/v1/audio/transcriptions \
         -H "Content-Type: multipart/form-data" \
-         -F file="@$PWD/gb1.ogg" -F model="whisper"
+         -F file="@$PWD/gb1.ogg" -F model="whisper-1"

 download_files:
 - filename: "ggml-whisper-base.bin"
--- a/aio/gpu-8g/text-to-speech.yaml
+++ b/aio/gpu-8g/text-to-speech.yaml
@ -1,4 +1,4 @@
-name: voice-en-us-amy-low
+name: tts-1
 download_files:
  - filename: voice-en-us-amy-low.tar.gz
    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
@ -10,6 +10,6 @@ usage: |
    To test if this model works as expected, you can use the following curl command:

    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
-      "model":"voice-en-us-amy-low",
+      "model":"tts-1",
      "input": "Hi, this is a test."
    }'
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@ -1,4 +1,4 @@
-name: gpt-3.5-turbo
+name: gpt-4
 mmap: true
 parameters:
  model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
@ -46,6 +46,6 @@ stopwords:
 - <dummy32000>
 usage: |
      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "hermes-2-pro-mistral",
+          "model": "gpt-4",
          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
      }'
--- a/aio/gpu-8g/vision.yaml
+++ b/aio/gpu-8g/vision.yaml
@ -4,23 +4,20 @@ f16: true

 gpu_layers: 90
 mmap: true
-name: llava
+name: gpt-4-vision-preview

 roles:
  user: "USER:"
  assistant: "ASSISTANT:"
  system: "SYSTEM:"

-mmproj: bakllava-mmproj.gguf
+mmproj: llava-v1.6-7b-mmproj-f16.gguf
 parameters:
-  model: bakllava.gguf
+  model: llava-v1.6-mistral-7b.Q5_K_M.gguf
  temperature: 0.2
  top_k: 40
  top_p: 0.95
  seed: -1
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0

 template:
  chat: |
@ -29,12 +26,12 @@ template:
    ASSISTANT:

 download_files:
- filename: bakllava.gguf
-  uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
- filename: bakllava-mmproj.gguf
-  uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
+- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
+  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
+- filename: llava-v1.6-7b-mmproj-f16.gguf
+  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf

 usage: |
    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "llava",
+        "model": "gpt-4-vision-preview",
        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'