feat: Add backend gallery (#5607)

* feat: Add backend gallery This PR add support to manage backends as similar to models. There is now available a backend gallery which can be used to install and remove extra backends. The backend gallery can be configured similarly as a model gallery, and API calls allows to install and remove new backends in runtime, and as well during the startup phase of LocalAI. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add backends docs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * wip: Backend Dockerfile for python backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: drop extras images, build python backends separately Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixup on all backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * test CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Tweaks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop old backends leftovers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixup CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Move dockerfile upper Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix proto Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Feature dropped for consistency - we prefer model galleries Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add missing packages in the build image Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * exllama is ponly available on cublas Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * pin torch on chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups to index Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Debug CI * Install accellerators deps Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add target arch * Add cuda minor version Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use self-hosted runners Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * ci: use quay for test images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups for vllm and chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Small fixups on CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chatterbox is only available for nvidia Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Simplify CI builds Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt test, use qwen3 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(model gallery): add jina-reranker-v1-tiny-en-gguf Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(gguf-parser): recover from potential panics that can happen while reading ggufs with gguf-parser Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use reranker from llama.cpp in AIO images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Limit concurrent jobs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-17 08:15:00 +00:00 · 2025-06-15 14:56:52 +02:00 · 2025-06-15 14:56:52 +02:00 · 2d64269763
commit 2d64269763
parent a7a6020328
114 changed files with 3996 additions and 1382 deletions
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@ -305,7 +305,7 @@ func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) fu
 		}

 		// Check if the backend is provided as external
-		if uri, ok := o.externalBackends[backend]; ok {
+		if uri, ok := ml.GetAllExternalBackends(o)[backend]; ok {
 			log.Debug().Msgf("Loading external backend: %s", uri)
 			// check if uri is a file or a address
 			if fi, err := os.Stat(uri); err == nil {
@ -526,7 +526,7 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
 	}

 	// append externalBackends supplied by the user via the CLI
-	for _, b := range o.externalBackends {
+	for _, b := range ml.GetAllExternalBackends(o) {
 		autoLoadBackends = append(autoLoadBackends, b)
 	}

--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@ -3,6 +3,7 @@ package model
 import (
 	"context"
 	"fmt"
+	"maps"
 	"os"
 	"path/filepath"
 	"strings"
@ -18,19 +19,21 @@ import (

 // TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we separate directories for .bin/.yaml and .tmpl
 type ModelLoader struct {
-	ModelPath     string
-	mu            sync.Mutex
-	singletonLock sync.Mutex
-	singletonMode bool
-	models        map[string]*Model
-	wd            *WatchDog
+	ModelPath        string
+	mu               sync.Mutex
+	singletonLock    sync.Mutex
+	singletonMode    bool
+	models           map[string]*Model
+	wd               *WatchDog
+	externalBackends map[string]string
 }

 func NewModelLoader(modelPath string, singleActiveBackend bool) *ModelLoader {
 	nml := &ModelLoader{
-		ModelPath:     modelPath,
-		models:        make(map[string]*Model),
-		singletonMode: singleActiveBackend,
+		ModelPath:        modelPath,
+		models:           make(map[string]*Model),
+		singletonMode:    singleActiveBackend,
+		externalBackends: make(map[string]string),
 	}

 	return nml
@ -44,6 +47,33 @@ func (ml *ModelLoader) ExistsInModelPath(s string) bool {
 	return utils.ExistsInPath(ml.ModelPath, s)
 }

+func (ml *ModelLoader) SetExternalBackend(name, uri string) {
+	ml.mu.Lock()
+	defer ml.mu.Unlock()
+	ml.externalBackends[name] = uri
+}
+
+func (ml *ModelLoader) DeleteExternalBackend(name string) {
+	ml.mu.Lock()
+	defer ml.mu.Unlock()
+	delete(ml.externalBackends, name)
+}
+
+func (ml *ModelLoader) GetExternalBackend(name string) string {
+	ml.mu.Lock()
+	defer ml.mu.Unlock()
+	return ml.externalBackends[name]
+}
+
+func (ml *ModelLoader) GetAllExternalBackends(o *Options) map[string]string {
+	backends := make(map[string]string)
+	maps.Copy(backends, ml.externalBackends)
+	if o != nil {
+		maps.Copy(backends, o.externalBackends)
+	}
+	return backends
+}
+
 var knownFilesToSkip []string = []string{
 	"MODEL_CARD",
 	"README",
--- a/pkg/startup/backend_preload.go
+++ b/pkg/startup/backend_preload.go
@ -0,0 +1,32 @@
+package startup
+
+import (
+	"errors"
+	"fmt"
+	"strings"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/gallery"
+)
+
+func InstallExternalBackends(galleries []config.Gallery, backendPath string, downloadStatus func(string, string, string, float64), backends ...string) error {
+	var errs error
+	for _, backend := range backends {
+		switch {
+		case strings.HasPrefix(backend, "oci://"):
+			backend = strings.TrimPrefix(backend, "oci://")
+
+			if err := gallery.InstallBackend(backendPath, &gallery.GalleryBackend{
+				URI: backend,
+			}, downloadStatus); err != nil {
+				errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
+			}
+		default:
+			err := gallery.InstallBackendFromGallery(galleries, backend, backendPath, downloadStatus)
+			if err != nil {
+				errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
+			}
+		}
+	}
+	return errs
+}
--- a/pkg/startup/model_preload.go
+++ b/pkg/startup/model_preload.go
@ -119,7 +119,7 @@ func installModel(galleries []config.Gallery, modelName, modelPath string, downl
 		return err, false
 	}

-	model := gallery.FindModel(models, modelName, modelPath)
+	model := gallery.FindGalleryElement(models, modelName, modelPath)
 	if model == nil {
 		return err, false
 	}