mirror of
https://github.com/mudler/LocalAI.git
synced 2025-06-17 08:15:00 +00:00
feat: Add backend gallery (#5607)
* feat: Add backend gallery This PR add support to manage backends as similar to models. There is now available a backend gallery which can be used to install and remove extra backends. The backend gallery can be configured similarly as a model gallery, and API calls allows to install and remove new backends in runtime, and as well during the startup phase of LocalAI. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add backends docs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * wip: Backend Dockerfile for python backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: drop extras images, build python backends separately Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixup on all backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * test CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Tweaks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop old backends leftovers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixup CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Move dockerfile upper Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix proto Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Feature dropped for consistency - we prefer model galleries Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add missing packages in the build image Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * exllama is ponly available on cublas Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * pin torch on chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups to index Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Debug CI * Install accellerators deps Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add target arch * Add cuda minor version Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use self-hosted runners Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * ci: use quay for test images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups for vllm and chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Small fixups on CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chatterbox is only available for nvidia Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Simplify CI builds Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt test, use qwen3 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(model gallery): add jina-reranker-v1-tiny-en-gguf Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(gguf-parser): recover from potential panics that can happen while reading ggufs with gguf-parser Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use reranker from llama.cpp in AIO images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Limit concurrent jobs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
This commit is contained in:
parent
a7a6020328
commit
2d64269763
114 changed files with 3996 additions and 1382 deletions
|
@ -305,7 +305,7 @@ func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) fu
|
|||
}
|
||||
|
||||
// Check if the backend is provided as external
|
||||
if uri, ok := o.externalBackends[backend]; ok {
|
||||
if uri, ok := ml.GetAllExternalBackends(o)[backend]; ok {
|
||||
log.Debug().Msgf("Loading external backend: %s", uri)
|
||||
// check if uri is a file or a address
|
||||
if fi, err := os.Stat(uri); err == nil {
|
||||
|
@ -526,7 +526,7 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
|
|||
}
|
||||
|
||||
// append externalBackends supplied by the user via the CLI
|
||||
for _, b := range o.externalBackends {
|
||||
for _, b := range ml.GetAllExternalBackends(o) {
|
||||
autoLoadBackends = append(autoLoadBackends, b)
|
||||
}
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@ package model
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"maps"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
@ -18,19 +19,21 @@ import (
|
|||
|
||||
// TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we separate directories for .bin/.yaml and .tmpl
|
||||
type ModelLoader struct {
|
||||
ModelPath string
|
||||
mu sync.Mutex
|
||||
singletonLock sync.Mutex
|
||||
singletonMode bool
|
||||
models map[string]*Model
|
||||
wd *WatchDog
|
||||
ModelPath string
|
||||
mu sync.Mutex
|
||||
singletonLock sync.Mutex
|
||||
singletonMode bool
|
||||
models map[string]*Model
|
||||
wd *WatchDog
|
||||
externalBackends map[string]string
|
||||
}
|
||||
|
||||
func NewModelLoader(modelPath string, singleActiveBackend bool) *ModelLoader {
|
||||
nml := &ModelLoader{
|
||||
ModelPath: modelPath,
|
||||
models: make(map[string]*Model),
|
||||
singletonMode: singleActiveBackend,
|
||||
ModelPath: modelPath,
|
||||
models: make(map[string]*Model),
|
||||
singletonMode: singleActiveBackend,
|
||||
externalBackends: make(map[string]string),
|
||||
}
|
||||
|
||||
return nml
|
||||
|
@ -44,6 +47,33 @@ func (ml *ModelLoader) ExistsInModelPath(s string) bool {
|
|||
return utils.ExistsInPath(ml.ModelPath, s)
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) SetExternalBackend(name, uri string) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
ml.externalBackends[name] = uri
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) DeleteExternalBackend(name string) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
delete(ml.externalBackends, name)
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) GetExternalBackend(name string) string {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
return ml.externalBackends[name]
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) GetAllExternalBackends(o *Options) map[string]string {
|
||||
backends := make(map[string]string)
|
||||
maps.Copy(backends, ml.externalBackends)
|
||||
if o != nil {
|
||||
maps.Copy(backends, o.externalBackends)
|
||||
}
|
||||
return backends
|
||||
}
|
||||
|
||||
var knownFilesToSkip []string = []string{
|
||||
"MODEL_CARD",
|
||||
"README",
|
||||
|
|
32
pkg/startup/backend_preload.go
Normal file
32
pkg/startup/backend_preload.go
Normal file
|
@ -0,0 +1,32 @@
|
|||
package startup
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
)
|
||||
|
||||
func InstallExternalBackends(galleries []config.Gallery, backendPath string, downloadStatus func(string, string, string, float64), backends ...string) error {
|
||||
var errs error
|
||||
for _, backend := range backends {
|
||||
switch {
|
||||
case strings.HasPrefix(backend, "oci://"):
|
||||
backend = strings.TrimPrefix(backend, "oci://")
|
||||
|
||||
if err := gallery.InstallBackend(backendPath, &gallery.GalleryBackend{
|
||||
URI: backend,
|
||||
}, downloadStatus); err != nil {
|
||||
errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
|
||||
}
|
||||
default:
|
||||
err := gallery.InstallBackendFromGallery(galleries, backend, backendPath, downloadStatus)
|
||||
if err != nil {
|
||||
errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
|
||||
}
|
||||
}
|
||||
}
|
||||
return errs
|
||||
}
|
|
@ -119,7 +119,7 @@ func installModel(galleries []config.Gallery, modelName, modelPath string, downl
|
|||
return err, false
|
||||
}
|
||||
|
||||
model := gallery.FindModel(models, modelName, modelPath)
|
||||
model := gallery.FindGalleryElement(models, modelName, modelPath)
|
||||
if model == nil {
|
||||
return err, false
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue