feat(aio): entrypoint, update workflows (#1872)

2025-05-30 23:44:59 +00:00 · 2024-03-21 22:09:04 +01:00 · 2024-03-21 22:09:04 +01:00 · abc9360dc6
commit abc9360dc6
parent 743095b7d8
9 changed files with 191 additions and 16 deletions
--- a/aio/cpu/README.md
+++ b/aio/cpu/README.md
@ -0,0 +1,5 @@
+## AIO CPU size
+
+Use this image with CPU-only.
+
+Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc).
--- a/aio/cpu/embeddings.yaml
+++ b/aio/cpu/embeddings.yaml
@ -1,13 +1,18 @@
-name: all-minilm-l6-v2
-backend: sentencetransformers
+backend: bert-embeddings
 embeddings: true
+f16: true
+
+gpu_layers: 90
+mmap: true
+name: text-embedding-ada-002
+
 parameters:
-  model: all-MiniLM-L6-v2
+  model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin

 usage: |
    You can test this model with curl like this:

    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
      "input": "Your text string goes here",
-      "model": "all-minilm-l6-v2"
+      "model": "text-embedding-ada-002"
    }'