LocalAI/core/config/application_config.go
Ettore Di Giacinto 2d64269763
feat: Add backend gallery (#5607)
* feat: Add backend gallery

This PR add support to manage backends as similar to models. There is
now available a backend gallery which can be used to install and remove
extra backends.
The backend gallery can be configured similarly as a model gallery, and
API calls allows to install and remove new backends in runtime, and as
well during the startup phase of LocalAI.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add backends docs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* wip: Backend Dockerfile for python backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat: drop extras images, build python backends separately

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixup on all backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* test CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Tweaks

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Drop old backends leftovers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixup CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Move dockerfile upper

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix proto

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Feature dropped for consistency - we prefer model galleries

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add missing packages in the build image

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* exllama is ponly available on cublas

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* pin torch on chatterbox

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixups to index

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Debug CI

* Install accellerators deps

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add target arch

* Add cuda minor version

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Use self-hosted runners

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* ci: use quay for test images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups for vllm and chatterbox

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Small fixups on CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chatterbox is only available for nvidia

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Simplify CI builds

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Adapt test, use qwen3

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(model gallery): add jina-reranker-v1-tiny-en-gguf

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(gguf-parser): recover from potential panics that can happen while reading ggufs with gguf-parser

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Use reranker from llama.cpp in AIO images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Limit concurrent jobs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-15 14:56:52 +02:00

416 lines
9.4 KiB
Go

package config
import (
"context"
"encoding/json"
"regexp"
"time"
rice "github.com/GeertJohan/go.rice"
"github.com/mudler/LocalAI/pkg/xsysinfo"
"github.com/rs/zerolog/log"
)
type ApplicationConfig struct {
Context context.Context
ConfigFile string
ModelPath string
BackendsPath string
ExternalBackends []string
LibPath string
UploadLimitMB, Threads, ContextSize int
F16 bool
Debug bool
GeneratedContentDir string
ConfigsDir string
UploadDir string
DynamicConfigsDir string
DynamicConfigsDirPollInterval time.Duration
CORS bool
CSRF bool
PreloadJSONModels string
PreloadModelsFromPath string
CORSAllowOrigins string
ApiKeys []string
P2PToken string
P2PNetworkID string
DisableWebUI bool
EnforcePredownloadScans bool
OpaqueErrors bool
UseSubtleKeyComparison bool
DisableApiKeyRequirementForHttpGet bool
DisableMetrics bool
HttpGetExemptedEndpoints []*regexp.Regexp
DisableGalleryEndpoint bool
LoadToMemory []string
Galleries []Gallery
BackendGalleries []Gallery
BackendAssets *rice.Box
AssetsDestination string
ExternalGRPCBackends map[string]string
AutoloadGalleries bool
SingleBackend bool
ParallelBackendRequests bool
WatchDogIdle bool
WatchDogBusy bool
WatchDog bool
ModelsURL []string
WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
MachineTag string
}
type AppOption func(*ApplicationConfig)
func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
opt := &ApplicationConfig{
Context: context.Background(),
UploadLimitMB: 15,
ContextSize: 512,
Debug: true,
}
for _, oo := range o {
oo(opt)
}
return opt
}
func WithModelsURL(urls ...string) AppOption {
return func(o *ApplicationConfig) {
o.ModelsURL = urls
}
}
func WithModelPath(path string) AppOption {
return func(o *ApplicationConfig) {
o.ModelPath = path
}
}
func WithBackendsPath(path string) AppOption {
return func(o *ApplicationConfig) {
o.BackendsPath = path
}
}
func WithExternalBackends(backends ...string) AppOption {
return func(o *ApplicationConfig) {
o.ExternalBackends = backends
}
}
func WithMachineTag(tag string) AppOption {
return func(o *ApplicationConfig) {
o.MachineTag = tag
}
}
func WithCors(b bool) AppOption {
return func(o *ApplicationConfig) {
o.CORS = b
}
}
func WithP2PNetworkID(s string) AppOption {
return func(o *ApplicationConfig) {
o.P2PNetworkID = s
}
}
func WithCsrf(b bool) AppOption {
return func(o *ApplicationConfig) {
o.CSRF = b
}
}
func WithP2PToken(s string) AppOption {
return func(o *ApplicationConfig) {
o.P2PToken = s
}
}
func WithLibPath(path string) AppOption {
return func(o *ApplicationConfig) {
o.LibPath = path
}
}
var EnableWatchDog = func(o *ApplicationConfig) {
o.WatchDog = true
}
var EnableWatchDogIdleCheck = func(o *ApplicationConfig) {
o.WatchDog = true
o.WatchDogIdle = true
}
var DisableGalleryEndpoint = func(o *ApplicationConfig) {
o.DisableGalleryEndpoint = true
}
var EnableWatchDogBusyCheck = func(o *ApplicationConfig) {
o.WatchDog = true
o.WatchDogBusy = true
}
var DisableWebUI = func(o *ApplicationConfig) {
o.DisableWebUI = true
}
func SetWatchDogBusyTimeout(t time.Duration) AppOption {
return func(o *ApplicationConfig) {
o.WatchDogBusyTimeout = t
}
}
func SetWatchDogIdleTimeout(t time.Duration) AppOption {
return func(o *ApplicationConfig) {
o.WatchDogIdleTimeout = t
}
}
var EnableSingleBackend = func(o *ApplicationConfig) {
o.SingleBackend = true
}
var EnableParallelBackendRequests = func(o *ApplicationConfig) {
o.ParallelBackendRequests = true
}
var EnableGalleriesAutoload = func(o *ApplicationConfig) {
o.AutoloadGalleries = true
}
func WithExternalBackend(name string, uri string) AppOption {
return func(o *ApplicationConfig) {
if o.ExternalGRPCBackends == nil {
o.ExternalGRPCBackends = make(map[string]string)
}
o.ExternalGRPCBackends[name] = uri
}
}
func WithCorsAllowOrigins(b string) AppOption {
return func(o *ApplicationConfig) {
o.CORSAllowOrigins = b
}
}
func WithBackendAssetsOutput(out string) AppOption {
return func(o *ApplicationConfig) {
o.AssetsDestination = out
}
}
func WithBackendAssets(f *rice.Box) AppOption {
return func(o *ApplicationConfig) {
o.BackendAssets = f
}
}
func WithStringGalleries(galls string) AppOption {
return func(o *ApplicationConfig) {
if galls == "" {
o.Galleries = []Gallery{}
return
}
var galleries []Gallery
if err := json.Unmarshal([]byte(galls), &galleries); err != nil {
log.Error().Err(err).Msg("failed loading galleries")
}
o.Galleries = append(o.Galleries, galleries...)
}
}
func WithBackendGalleries(galls string) AppOption {
return func(o *ApplicationConfig) {
if galls == "" {
o.BackendGalleries = []Gallery{}
return
}
var galleries []Gallery
if err := json.Unmarshal([]byte(galls), &galleries); err != nil {
log.Error().Err(err).Msg("failed loading galleries")
}
o.BackendGalleries = append(o.BackendGalleries, galleries...)
}
}
func WithGalleries(galleries []Gallery) AppOption {
return func(o *ApplicationConfig) {
o.Galleries = append(o.Galleries, galleries...)
}
}
func WithContext(ctx context.Context) AppOption {
return func(o *ApplicationConfig) {
o.Context = ctx
}
}
func WithYAMLConfigPreload(configFile string) AppOption {
return func(o *ApplicationConfig) {
o.PreloadModelsFromPath = configFile
}
}
func WithJSONStringPreload(configFile string) AppOption {
return func(o *ApplicationConfig) {
o.PreloadJSONModels = configFile
}
}
func WithConfigFile(configFile string) AppOption {
return func(o *ApplicationConfig) {
o.ConfigFile = configFile
}
}
func WithUploadLimitMB(limit int) AppOption {
return func(o *ApplicationConfig) {
o.UploadLimitMB = limit
}
}
func WithThreads(threads int) AppOption {
return func(o *ApplicationConfig) {
if threads == 0 { // 0 is not allowed
threads = xsysinfo.CPUPhysicalCores()
}
o.Threads = threads
}
}
func WithContextSize(ctxSize int) AppOption {
return func(o *ApplicationConfig) {
o.ContextSize = ctxSize
}
}
func WithF16(f16 bool) AppOption {
return func(o *ApplicationConfig) {
o.F16 = f16
}
}
func WithDebug(debug bool) AppOption {
return func(o *ApplicationConfig) {
o.Debug = debug
}
}
func WithGeneratedContentDir(generatedContentDir string) AppOption {
return func(o *ApplicationConfig) {
o.GeneratedContentDir = generatedContentDir
}
}
func WithUploadDir(uploadDir string) AppOption {
return func(o *ApplicationConfig) {
o.UploadDir = uploadDir
}
}
func WithConfigsDir(configsDir string) AppOption {
return func(o *ApplicationConfig) {
o.ConfigsDir = configsDir
}
}
func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
return func(o *ApplicationConfig) {
o.DynamicConfigsDir = dynamicConfigsDir
}
}
func WithDynamicConfigDirPollInterval(interval time.Duration) AppOption {
return func(o *ApplicationConfig) {
o.DynamicConfigsDirPollInterval = interval
}
}
func WithApiKeys(apiKeys []string) AppOption {
return func(o *ApplicationConfig) {
o.ApiKeys = apiKeys
}
}
func WithEnforcedPredownloadScans(enforced bool) AppOption {
return func(o *ApplicationConfig) {
o.EnforcePredownloadScans = enforced
}
}
func WithOpaqueErrors(opaque bool) AppOption {
return func(o *ApplicationConfig) {
o.OpaqueErrors = opaque
}
}
func WithLoadToMemory(models []string) AppOption {
return func(o *ApplicationConfig) {
o.LoadToMemory = models
}
}
func WithSubtleKeyComparison(subtle bool) AppOption {
return func(o *ApplicationConfig) {
o.UseSubtleKeyComparison = subtle
}
}
func WithDisableApiKeyRequirementForHttpGet(required bool) AppOption {
return func(o *ApplicationConfig) {
o.DisableApiKeyRequirementForHttpGet = required
}
}
var DisableMetricsEndpoint AppOption = func(o *ApplicationConfig) {
o.DisableMetrics = true
}
func WithHttpGetExemptedEndpoints(endpoints []string) AppOption {
return func(o *ApplicationConfig) {
o.HttpGetExemptedEndpoints = []*regexp.Regexp{}
for _, epr := range endpoints {
r, err := regexp.Compile(epr)
if err == nil && r != nil {
o.HttpGetExemptedEndpoints = append(o.HttpGetExemptedEndpoints, r)
} else {
log.Warn().Err(err).Str("regex", epr).Msg("Error while compiling HTTP Get Exemption regex, skipping this entry.")
}
}
}
}
// ToConfigLoaderOptions returns a slice of ConfigLoader Option.
// Some options defined at the application level are going to be passed as defaults for
// all the configuration for the models.
// This includes for instance the context size or the number of threads.
// If a model doesn't set configs directly to the config model file
// it will use the defaults defined here.
func (o *ApplicationConfig) ToConfigLoaderOptions() []ConfigLoaderOption {
return []ConfigLoaderOption{
LoadOptionContextSize(o.ContextSize),
LoadOptionDebug(o.Debug),
LoadOptionF16(o.F16),
LoadOptionThreads(o.Threads),
ModelPath(o.ModelPath),
}
}
// func WithMetrics(meter *metrics.Metrics) AppOption {
// return func(o *StartupOptions) {
// o.Metrics = meter
// }
// }