feat: Add backend gallery (#5607)

* feat: Add backend gallery

This PR add support to manage backends as similar to models. There is
now available a backend gallery which can be used to install and remove
extra backends.
The backend gallery can be configured similarly as a model gallery, and
API calls allows to install and remove new backends in runtime, and as
well during the startup phase of LocalAI.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add backends docs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* wip: Backend Dockerfile for python backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat: drop extras images, build python backends separately

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixup on all backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* test CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Tweaks

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Drop old backends leftovers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixup CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Move dockerfile upper

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix proto

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Feature dropped for consistency - we prefer model galleries

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add missing packages in the build image

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* exllama is ponly available on cublas

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* pin torch on chatterbox

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixups to index

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Debug CI

* Install accellerators deps

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add target arch

* Add cuda minor version

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Use self-hosted runners

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* ci: use quay for test images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups for vllm and chatterbox

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Small fixups on CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chatterbox is only available for nvidia

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Simplify CI builds

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Adapt test, use qwen3

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(model gallery): add jina-reranker-v1-tiny-en-gguf

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(gguf-parser): recover from potential panics that can happen while reading ggufs with gguf-parser

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Use reranker from llama.cpp in AIO images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Limit concurrent jobs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
This commit is contained in:
Ettore Di Giacinto 2025-06-15 14:56:52 +02:00 committed by GitHub
parent a7a6020328
commit 2d64269763
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
114 changed files with 3996 additions and 1382 deletions

View file

@ -305,7 +305,7 @@ func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) fu
}
// Check if the backend is provided as external
if uri, ok := o.externalBackends[backend]; ok {
if uri, ok := ml.GetAllExternalBackends(o)[backend]; ok {
log.Debug().Msgf("Loading external backend: %s", uri)
// check if uri is a file or a address
if fi, err := os.Stat(uri); err == nil {
@ -526,7 +526,7 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
}
// append externalBackends supplied by the user via the CLI
for _, b := range o.externalBackends {
for _, b := range ml.GetAllExternalBackends(o) {
autoLoadBackends = append(autoLoadBackends, b)
}

View file

@ -3,6 +3,7 @@ package model
import (
"context"
"fmt"
"maps"
"os"
"path/filepath"
"strings"
@ -18,19 +19,21 @@ import (
// TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we separate directories for .bin/.yaml and .tmpl
type ModelLoader struct {
ModelPath string
mu sync.Mutex
singletonLock sync.Mutex
singletonMode bool
models map[string]*Model
wd *WatchDog
ModelPath string
mu sync.Mutex
singletonLock sync.Mutex
singletonMode bool
models map[string]*Model
wd *WatchDog
externalBackends map[string]string
}
func NewModelLoader(modelPath string, singleActiveBackend bool) *ModelLoader {
nml := &ModelLoader{
ModelPath: modelPath,
models: make(map[string]*Model),
singletonMode: singleActiveBackend,
ModelPath: modelPath,
models: make(map[string]*Model),
singletonMode: singleActiveBackend,
externalBackends: make(map[string]string),
}
return nml
@ -44,6 +47,33 @@ func (ml *ModelLoader) ExistsInModelPath(s string) bool {
return utils.ExistsInPath(ml.ModelPath, s)
}
func (ml *ModelLoader) SetExternalBackend(name, uri string) {
ml.mu.Lock()
defer ml.mu.Unlock()
ml.externalBackends[name] = uri
}
func (ml *ModelLoader) DeleteExternalBackend(name string) {
ml.mu.Lock()
defer ml.mu.Unlock()
delete(ml.externalBackends, name)
}
func (ml *ModelLoader) GetExternalBackend(name string) string {
ml.mu.Lock()
defer ml.mu.Unlock()
return ml.externalBackends[name]
}
func (ml *ModelLoader) GetAllExternalBackends(o *Options) map[string]string {
backends := make(map[string]string)
maps.Copy(backends, ml.externalBackends)
if o != nil {
maps.Copy(backends, o.externalBackends)
}
return backends
}
var knownFilesToSkip []string = []string{
"MODEL_CARD",
"README",

View file

@ -0,0 +1,32 @@
package startup
import (
"errors"
"fmt"
"strings"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
)
func InstallExternalBackends(galleries []config.Gallery, backendPath string, downloadStatus func(string, string, string, float64), backends ...string) error {
var errs error
for _, backend := range backends {
switch {
case strings.HasPrefix(backend, "oci://"):
backend = strings.TrimPrefix(backend, "oci://")
if err := gallery.InstallBackend(backendPath, &gallery.GalleryBackend{
URI: backend,
}, downloadStatus); err != nil {
errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
}
default:
err := gallery.InstallBackendFromGallery(galleries, backend, backendPath, downloadStatus)
if err != nil {
errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
}
}
}
return errs
}

View file

@ -119,7 +119,7 @@ func installModel(galleries []config.Gallery, modelName, modelPath string, downl
return err, false
}
model := gallery.FindModel(models, modelName, modelPath)
model := gallery.FindGalleryElement(models, modelName, modelPath)
if model == nil {
return err, false
}