Merge branch 'master' into default_miro

2025-06-28 05:35:00 +00:00 · 2025-02-06 11:42:22 +01:00 · 2025-02-06 11:42:22 +01:00 · ccc82ceb7e
commit ccc82ceb7e
parent 3a1727a4fe d35595372d
578 changed files with 15978 additions and 15598 deletions
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@ -3,11 +3,13 @@ package config
 import (
 	"os"
 	"regexp"
+	"slices"
 	"strings"

 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/functions"
+	"gopkg.in/yaml.v3"
 )

 const (
@ -19,21 +21,22 @@ type TTSConfig struct {
 	// Voice wav path or id
 	Voice string `yaml:"voice"`

-	// Vall-e-x
-	VallE VallE `yaml:"vall-e"`
+	AudioPath string `yaml:"audio_path"`
 }

 type BackendConfig struct {
 	schema.PredictionOptions `yaml:"parameters"`
 	Name                     string `yaml:"name"`

-	F16            *bool             `yaml:"f16"`
-	Threads        *int              `yaml:"threads"`
-	Debug          *bool             `yaml:"debug"`
-	Roles          map[string]string `yaml:"roles"`
-	Embeddings     *bool             `yaml:"embeddings"`
-	Backend        string            `yaml:"backend"`
-	TemplateConfig TemplateConfig    `yaml:"template"`
+	F16                 *bool                  `yaml:"f16"`
+	Threads             *int                   `yaml:"threads"`
+	Debug               *bool                  `yaml:"debug"`
+	Roles               map[string]string      `yaml:"roles"`
+	Embeddings          *bool                  `yaml:"embeddings"`
+	Backend             string                 `yaml:"backend"`
+	TemplateConfig      TemplateConfig         `yaml:"template"`
+	KnownUsecaseStrings []string               `yaml:"known_usecases"`
+	KnownUsecases       *BackendConfigUsecases `yaml:"-"`

 	PromptStrings, InputStrings                []string               `yaml:"-"`
 	InputToken                                 [][]int                `yaml:"-"`
@ -68,6 +71,8 @@ type BackendConfig struct {

 	Description string `yaml:"description"`
 	Usage       string `yaml:"usage"`
+
+	Options []string `yaml:"options"`
 }

 type File struct {
@ -76,10 +81,6 @@ type File struct {
 	URI      downloader.URI `yaml:"uri" json:"uri"`
 }

-type VallE struct {
-	AudioPath string `yaml:"audio_path"`
-}
-
 type FeatureFlag map[string]*bool

 func (ff FeatureFlag) Enabled(s string) bool {
@ -93,16 +94,15 @@ type GRPC struct {
 }

 type Diffusers struct {
-	CUDA             bool    `yaml:"cuda"`
-	PipelineType     string  `yaml:"pipeline_type"`
-	SchedulerType    string  `yaml:"scheduler_type"`
-	EnableParameters string  `yaml:"enable_parameters"` // A list of comma separated parameters to specify
-	CFGScale         float32 `yaml:"cfg_scale"`         // Classifier-Free Guidance Scale
-	IMG2IMG          bool    `yaml:"img2img"`           // Image to Image Diffuser
-	ClipSkip         int     `yaml:"clip_skip"`         // Skip every N frames
-	ClipModel        string  `yaml:"clip_model"`        // Clip model to use
-	ClipSubFolder    string  `yaml:"clip_subfolder"`    // Subfolder to use for clip model
-	ControlNet       string  `yaml:"control_net"`
+	CUDA             bool   `yaml:"cuda"`
+	PipelineType     string `yaml:"pipeline_type"`
+	SchedulerType    string `yaml:"scheduler_type"`
+	EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
+	IMG2IMG          bool   `yaml:"img2img"`           // Image to Image Diffuser
+	ClipSkip         int    `yaml:"clip_skip"`         // Skip every N frames
+	ClipModel        string `yaml:"clip_model"`        // Clip model to use
+	ClipSubFolder    string `yaml:"clip_subfolder"`    // Subfolder to use for clip model
+	ControlNet       string `yaml:"control_net"`
 }

 // LLMConfig is a struct that holds the configuration that are
@ -130,25 +130,30 @@ type LLMConfig struct {
 	TrimSpace       []string `yaml:"trimspace"`
 	TrimSuffix      []string `yaml:"trimsuffix"`

-	ContextSize          *int    `yaml:"context_size"`
-	NUMA                 bool    `yaml:"numa"`
-	LoraAdapter          string  `yaml:"lora_adapter"`
-	LoraBase             string  `yaml:"lora_base"`
-	LoraScale            float32 `yaml:"lora_scale"`
-	NoMulMatQ            bool    `yaml:"no_mulmatq"`
-	DraftModel           string  `yaml:"draft_model"`
-	NDraft               int32   `yaml:"n_draft"`
-	Quantization         string  `yaml:"quantization"`
-	GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
-	TrustRemoteCode      bool    `yaml:"trust_remote_code"`      // vLLM
-	EnforceEager         bool    `yaml:"enforce_eager"`          // vLLM
-	SwapSpace            int     `yaml:"swap_space"`             // vLLM
-	MaxModelLen          int     `yaml:"max_model_len"`          // vLLM
-	TensorParallelSize   int     `yaml:"tensor_parallel_size"`   // vLLM
-	MMProj               string  `yaml:"mmproj"`
+	ContextSize          *int      `yaml:"context_size"`
+	NUMA                 bool      `yaml:"numa"`
+	LoraAdapter          string    `yaml:"lora_adapter"`
+	LoraBase             string    `yaml:"lora_base"`
+	LoraAdapters         []string  `yaml:"lora_adapters"`
+	LoraScales           []float32 `yaml:"lora_scales"`
+	LoraScale            float32   `yaml:"lora_scale"`
+	NoMulMatQ            bool      `yaml:"no_mulmatq"`
+	DraftModel           string    `yaml:"draft_model"`
+	NDraft               int32     `yaml:"n_draft"`
+	Quantization         string    `yaml:"quantization"`
+	LoadFormat           string    `yaml:"load_format"`
+	GPUMemoryUtilization float32   `yaml:"gpu_memory_utilization"` // vLLM
+	TrustRemoteCode      bool      `yaml:"trust_remote_code"`      // vLLM
+	EnforceEager         bool      `yaml:"enforce_eager"`          // vLLM
+	SwapSpace            int       `yaml:"swap_space"`             // vLLM
+	MaxModelLen          int       `yaml:"max_model_len"`          // vLLM
+	TensorParallelSize   int       `yaml:"tensor_parallel_size"`   // vLLM
+	MMProj               string    `yaml:"mmproj"`

-	FlashAttention bool `yaml:"flash_attention"`
-	NoKVOffloading bool `yaml:"no_kv_offloading"`
+	FlashAttention bool   `yaml:"flash_attention"`
+	NoKVOffloading bool   `yaml:"no_kv_offloading"`
+	CacheTypeK     string `yaml:"cache_type_k"`
+	CacheTypeV     string `yaml:"cache_type_v"`

 	RopeScaling string `yaml:"rope_scaling"`
 	ModelType   string `yaml:"type"`
@ -157,6 +162,8 @@ type LLMConfig struct {
 	YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
 	YarnBetaFast   float32 `yaml:"yarn_beta_fast"`
 	YarnBetaSlow   float32 `yaml:"yarn_beta_slow"`
+
+	CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
 }

 // AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
@ -192,6 +199,21 @@ type TemplateConfig struct {
 	// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
 	// It defaults to \n
 	JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
+
+	Multimodal string `yaml:"multimodal"`
+
+	JinjaTemplate bool `yaml:"jinja_template"`
+}
+
+func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
+	type BCAlias BackendConfig
+	var aux BCAlias
+	if err := value.Decode(&aux); err != nil {
+		return err
+	}
+	*c = BackendConfig(aux)
+	c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings)
+	return nil
 }

 func (c *BackendConfig) SetFunctionCallString(s string) {
@ -411,3 +433,121 @@ func (c *BackendConfig) Validate() bool {
 func (c *BackendConfig) HasTemplate() bool {
 	return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != ""
 }
+
+type BackendConfigUsecases int
+
+const (
+	FLAG_ANY              BackendConfigUsecases = 0b000000000
+	FLAG_CHAT             BackendConfigUsecases = 0b000000001
+	FLAG_COMPLETION       BackendConfigUsecases = 0b000000010
+	FLAG_EDIT             BackendConfigUsecases = 0b000000100
+	FLAG_EMBEDDINGS       BackendConfigUsecases = 0b000001000
+	FLAG_RERANK           BackendConfigUsecases = 0b000010000
+	FLAG_IMAGE            BackendConfigUsecases = 0b000100000
+	FLAG_TRANSCRIPT       BackendConfigUsecases = 0b001000000
+	FLAG_TTS              BackendConfigUsecases = 0b010000000
+	FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
+
+	// Common Subsets
+	FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
+)
+
+func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
+	return map[string]BackendConfigUsecases{
+		"FLAG_ANY":              FLAG_ANY,
+		"FLAG_CHAT":             FLAG_CHAT,
+		"FLAG_COMPLETION":       FLAG_COMPLETION,
+		"FLAG_EDIT":             FLAG_EDIT,
+		"FLAG_EMBEDDINGS":       FLAG_EMBEDDINGS,
+		"FLAG_RERANK":           FLAG_RERANK,
+		"FLAG_IMAGE":            FLAG_IMAGE,
+		"FLAG_TRANSCRIPT":       FLAG_TRANSCRIPT,
+		"FLAG_TTS":              FLAG_TTS,
+		"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
+		"FLAG_LLM":              FLAG_LLM,
+	}
+}
+
+func GetUsecasesFromYAML(input []string) *BackendConfigUsecases {
+	if len(input) == 0 {
+		return nil
+	}
+	result := FLAG_ANY
+	flags := GetAllBackendConfigUsecases()
+	for _, str := range input {
+		flag, exists := flags["FLAG_"+strings.ToUpper(str)]
+		if exists {
+			result |= flag
+		}
+	}
+	return &result
+}
+
+// HasUsecases examines a BackendConfig and determines which endpoints have a chance of success.
+func (c *BackendConfig) HasUsecases(u BackendConfigUsecases) bool {
+	if (c.KnownUsecases != nil) && ((u & *c.KnownUsecases) == u) {
+		return true
+	}
+	return c.GuessUsecases(u)
+}
+
+// GuessUsecases is a **heuristic based** function, as the backend in question may not be loaded yet, and the config may not record what it's useful at.
+// In its current state, this function should ideally check for properties of the config like templates, rather than the direct backend name checks for the lower half.
+// This avoids the maintenance burden of updating this list for each new backend - but unfortunately, that's the best option for some services currently.
+func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
+	if (u & FLAG_CHAT) == FLAG_CHAT {
+		if c.TemplateConfig.Chat == "" && c.TemplateConfig.ChatMessage == "" {
+			return false
+		}
+	}
+	if (u & FLAG_COMPLETION) == FLAG_COMPLETION {
+		if c.TemplateConfig.Completion == "" {
+			return false
+		}
+	}
+	if (u & FLAG_EDIT) == FLAG_EDIT {
+		if c.TemplateConfig.Edit == "" {
+			return false
+		}
+	}
+	if (u & FLAG_EMBEDDINGS) == FLAG_EMBEDDINGS {
+		if c.Embeddings == nil || !*c.Embeddings {
+			return false
+		}
+	}
+	if (u & FLAG_IMAGE) == FLAG_IMAGE {
+		imageBackends := []string{"diffusers", "stablediffusion", "stablediffusion-ggml"}
+		if !slices.Contains(imageBackends, c.Backend) {
+			return false
+		}
+
+		if c.Backend == "diffusers" && c.Diffusers.PipelineType == "" {
+			return false
+		}
+
+	}
+	if (u & FLAG_RERANK) == FLAG_RERANK {
+		if c.Backend != "rerankers" {
+			return false
+		}
+	}
+	if (u & FLAG_TRANSCRIPT) == FLAG_TRANSCRIPT {
+		if c.Backend != "whisper" {
+			return false
+		}
+	}
+	if (u & FLAG_TTS) == FLAG_TTS {
+		ttsBackends := []string{"piper", "transformers-musicgen", "parler-tts"}
+		if !slices.Contains(ttsBackends, c.Backend) {
+			return false
+		}
+	}
+
+	if (u & FLAG_SOUND_GENERATION) == FLAG_SOUND_GENERATION {
+		if c.Backend != "transformers-musicgen" {
+			return false
+		}
+	}
+
+	return true
+}