feat: embedded model configurations, add popular model examples, refactoring (#1532)

* move downloader out * separate startup functions for preloading configuration files * docs: add popular model examples Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * shorteners * Add llava * Add mistral-openorca * Better link to build section * docs: update * fixup * Drop code dups * Minor fixups * Apply suggestions from code review Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> * ci: try to cache gRPC build during tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * ci: do not build all images for tests, just necessary * ci: cache gRPC also in release pipeline * fixes * Update model_preload_test.go Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-05-27 22:15:00 +00:00 · 2024-01-05 17:16:33 -05:00 · 2024-01-05 17:16:33 -05:00 · 09e5d9007b
commit 09e5d9007b
parent db926896bd
26 changed files with 586 additions and 150 deletions
--- a/embedded/embedded.go
+++ b/embedded/embedded.go
@ -0,0 +1,53 @@
+package embedded
+
+import (
+	"embed"
+	"fmt"
+	"slices"
+	"strings"
+
+	"github.com/go-skynet/LocalAI/pkg/assets"
+	"gopkg.in/yaml.v3"
+)
+
+var modelShorteners map[string]string
+
+//go:embed model_library.yaml
+var modelLibrary []byte
+
+//go:embed models/*
+var embeddedModels embed.FS
+
+func ModelShortURL(s string) string {
+	if _, ok := modelShorteners[s]; ok {
+		s = modelShorteners[s]
+	}
+
+	return s
+}
+
+func init() {
+	yaml.Unmarshal(modelLibrary, &modelShorteners)
+}
+
+// ExistsInModelsLibrary checks if a model exists in the embedded models library
+func ExistsInModelsLibrary(s string) bool {
+	f := fmt.Sprintf("%s.yaml", s)
+
+	a := []string{}
+
+	for _, j := range assets.ListFiles(embeddedModels) {
+		a = append(a, strings.TrimPrefix(j, "models/"))
+	}
+
+	return slices.Contains(a, f)
+}
+
+// ResolveContent returns the content in the embedded model library
+func ResolveContent(s string) ([]byte, error) {
+	if ExistsInModelsLibrary(s) {
+		return embeddedModels.ReadFile(fmt.Sprintf("models/%s.yaml", s))
+	}
+
+	return nil, fmt.Errorf("cannot find model %s", s)
+}
--- a/embedded/model_library.yaml
+++ b/embedded/model_library.yaml
@ -0,0 +1,9 @@
+### 
+###
+### This file contains the list of models that are available in the library
+### The URLs are automatically expanded when local-ai is being called with the key as argument
+###
+### For models with an entire YAML file to be embededd, put the file inside the `models`
+### directory, it will be automatically available with the file name as key (without the .yaml extension)
+
+phi-2:  "github://mudler/LocalAI/examples/configurations/phi-2.yaml@master"
--- a/embedded/models/llava.yaml
+++ b/embedded/models/llava.yaml
@ -0,0 +1,31 @@
+backend: llama-cpp
+context_size: 4096
+f16: true
+
+gpu_layers: 90
+mmap: true
+name: llava
+
+roles:
+  user: "USER:"
+  assistant: "ASSISTANT:"
+  system: "SYSTEM:"
+
+mmproj: bakllava-mmproj.gguf
+parameters:
+  model: bakllava.gguf
+  temperature: 0.2
+  top_k: 40
+  top_p: 0.95
+
+template:
+  chat: |
+    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+    {{.Input}}
+    ASSISTANT:
+
+download_files:
+- filename: bakllava.gguf
+  uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
+- filename: bakllava-mmproj.gguf
+  uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
--- a/embedded/models/mistral-openorca.yaml
+++ b/embedded/models/mistral-openorca.yaml
@ -0,0 +1,23 @@
+name: mistral-openorca
+mmap: true
+parameters:
+  model: huggingface://TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q6_K.gguf
+  temperature: 0.2
+  top_k: 40
+  top_p: 0.95
+template:
+  chat_message: |
+    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
+    {{if .Content}}{{.Content}}{{end}}
+    <|im_end|>
+    
+  chat: |
+    {{.Input}}
+    <|im_start|>assistant
+    
+  completion: |
+    {{.Input}}
+context_size: 4096
+f16: true
+stopwords:
+- <|im_end|>