transformers: correctly load automodels (#1643)

* backends(transformers): use AutoModel with LLM types * examples: animagine-xl * Add codellama examples
2025-06-03 01:15:00 +00:00 · 2024-01-26 00:13:21 +01:00 · 2024-01-26 00:13:21 +01:00 · cb7512734d
commit cb7512734d
parent 3733250b3c
27 changed files with 1144 additions and 569 deletions
--- a/embedded/models/codellama-7b-gguf.yaml
+++ b/embedded/models/codellama-7b-gguf.yaml
@ -0,0 +1,16 @@
+name: codellama-7b-gguf
+backend: transformers
+parameters:
+  model: huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_K_M.gguf
+  temperature: 0.2
+  top_k: 40
+  seed: -1
+  top_p: 0.95
+context_size: 4096
+f16: true
+gpu_layers: 90
+usage: |
+      curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
+          "model": "codellama-7b-gguf",
+          "prompt": "import socket\n\ndef ping_exponential_backoff(host: str):"
+      }'