Merge branch 'master' into default_miro

This commit is contained in:
Ettore Di Giacinto 2025-02-06 11:42:22 +01:00 committed by GitHub
commit ccc82ceb7e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
578 changed files with 15978 additions and 15598 deletions

View file

@ -3,11 +3,13 @@ package config
import (
"os"
"regexp"
"slices"
"strings"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/functions"
"gopkg.in/yaml.v3"
)
const (
@ -19,21 +21,22 @@ type TTSConfig struct {
// Voice wav path or id
Voice string `yaml:"voice"`
// Vall-e-x
VallE VallE `yaml:"vall-e"`
AudioPath string `yaml:"audio_path"`
}
type BackendConfig struct {
schema.PredictionOptions `yaml:"parameters"`
Name string `yaml:"name"`
F16 *bool `yaml:"f16"`
Threads *int `yaml:"threads"`
Debug *bool `yaml:"debug"`
Roles map[string]string `yaml:"roles"`
Embeddings *bool `yaml:"embeddings"`
Backend string `yaml:"backend"`
TemplateConfig TemplateConfig `yaml:"template"`
F16 *bool `yaml:"f16"`
Threads *int `yaml:"threads"`
Debug *bool `yaml:"debug"`
Roles map[string]string `yaml:"roles"`
Embeddings *bool `yaml:"embeddings"`
Backend string `yaml:"backend"`
TemplateConfig TemplateConfig `yaml:"template"`
KnownUsecaseStrings []string `yaml:"known_usecases"`
KnownUsecases *BackendConfigUsecases `yaml:"-"`
PromptStrings, InputStrings []string `yaml:"-"`
InputToken [][]int `yaml:"-"`
@ -68,6 +71,8 @@ type BackendConfig struct {
Description string `yaml:"description"`
Usage string `yaml:"usage"`
Options []string `yaml:"options"`
}
type File struct {
@ -76,10 +81,6 @@ type File struct {
URI downloader.URI `yaml:"uri" json:"uri"`
}
type VallE struct {
AudioPath string `yaml:"audio_path"`
}
type FeatureFlag map[string]*bool
func (ff FeatureFlag) Enabled(s string) bool {
@ -93,16 +94,15 @@ type GRPC struct {
}
type Diffusers struct {
CUDA bool `yaml:"cuda"`
PipelineType string `yaml:"pipeline_type"`
SchedulerType string `yaml:"scheduler_type"`
EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser
ClipSkip int `yaml:"clip_skip"` // Skip every N frames
ClipModel string `yaml:"clip_model"` // Clip model to use
ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model
ControlNet string `yaml:"control_net"`
CUDA bool `yaml:"cuda"`
PipelineType string `yaml:"pipeline_type"`
SchedulerType string `yaml:"scheduler_type"`
EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser
ClipSkip int `yaml:"clip_skip"` // Skip every N frames
ClipModel string `yaml:"clip_model"` // Clip model to use
ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model
ControlNet string `yaml:"control_net"`
}
// LLMConfig is a struct that holds the configuration that are
@ -130,25 +130,30 @@ type LLMConfig struct {
TrimSpace []string `yaml:"trimspace"`
TrimSuffix []string `yaml:"trimsuffix"`
ContextSize *int `yaml:"context_size"`
NUMA bool `yaml:"numa"`
LoraAdapter string `yaml:"lora_adapter"`
LoraBase string `yaml:"lora_base"`
LoraScale float32 `yaml:"lora_scale"`
NoMulMatQ bool `yaml:"no_mulmatq"`
DraftModel string `yaml:"draft_model"`
NDraft int32 `yaml:"n_draft"`
Quantization string `yaml:"quantization"`
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
EnforceEager bool `yaml:"enforce_eager"` // vLLM
SwapSpace int `yaml:"swap_space"` // vLLM
MaxModelLen int `yaml:"max_model_len"` // vLLM
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
MMProj string `yaml:"mmproj"`
ContextSize *int `yaml:"context_size"`
NUMA bool `yaml:"numa"`
LoraAdapter string `yaml:"lora_adapter"`
LoraBase string `yaml:"lora_base"`
LoraAdapters []string `yaml:"lora_adapters"`
LoraScales []float32 `yaml:"lora_scales"`
LoraScale float32 `yaml:"lora_scale"`
NoMulMatQ bool `yaml:"no_mulmatq"`
DraftModel string `yaml:"draft_model"`
NDraft int32 `yaml:"n_draft"`
Quantization string `yaml:"quantization"`
LoadFormat string `yaml:"load_format"`
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
EnforceEager bool `yaml:"enforce_eager"` // vLLM
SwapSpace int `yaml:"swap_space"` // vLLM
MaxModelLen int `yaml:"max_model_len"` // vLLM
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
MMProj string `yaml:"mmproj"`
FlashAttention bool `yaml:"flash_attention"`
NoKVOffloading bool `yaml:"no_kv_offloading"`
FlashAttention bool `yaml:"flash_attention"`
NoKVOffloading bool `yaml:"no_kv_offloading"`
CacheTypeK string `yaml:"cache_type_k"`
CacheTypeV string `yaml:"cache_type_v"`
RopeScaling string `yaml:"rope_scaling"`
ModelType string `yaml:"type"`
@ -157,6 +162,8 @@ type LLMConfig struct {
YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
YarnBetaFast float32 `yaml:"yarn_beta_fast"`
YarnBetaSlow float32 `yaml:"yarn_beta_slow"`
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
}
// AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
@ -192,6 +199,21 @@ type TemplateConfig struct {
// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
// It defaults to \n
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
Multimodal string `yaml:"multimodal"`
JinjaTemplate bool `yaml:"jinja_template"`
}
func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
type BCAlias BackendConfig
var aux BCAlias
if err := value.Decode(&aux); err != nil {
return err
}
*c = BackendConfig(aux)
c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings)
return nil
}
func (c *BackendConfig) SetFunctionCallString(s string) {
@ -411,3 +433,121 @@ func (c *BackendConfig) Validate() bool {
func (c *BackendConfig) HasTemplate() bool {
return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != ""
}
type BackendConfigUsecases int
const (
FLAG_ANY BackendConfigUsecases = 0b000000000
FLAG_CHAT BackendConfigUsecases = 0b000000001
FLAG_COMPLETION BackendConfigUsecases = 0b000000010
FLAG_EDIT BackendConfigUsecases = 0b000000100
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000001000
FLAG_RERANK BackendConfigUsecases = 0b000010000
FLAG_IMAGE BackendConfigUsecases = 0b000100000
FLAG_TRANSCRIPT BackendConfigUsecases = 0b001000000
FLAG_TTS BackendConfigUsecases = 0b010000000
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
// Common Subsets
FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
)
func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
return map[string]BackendConfigUsecases{
"FLAG_ANY": FLAG_ANY,
"FLAG_CHAT": FLAG_CHAT,
"FLAG_COMPLETION": FLAG_COMPLETION,
"FLAG_EDIT": FLAG_EDIT,
"FLAG_EMBEDDINGS": FLAG_EMBEDDINGS,
"FLAG_RERANK": FLAG_RERANK,
"FLAG_IMAGE": FLAG_IMAGE,
"FLAG_TRANSCRIPT": FLAG_TRANSCRIPT,
"FLAG_TTS": FLAG_TTS,
"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
"FLAG_LLM": FLAG_LLM,
}
}
func GetUsecasesFromYAML(input []string) *BackendConfigUsecases {
if len(input) == 0 {
return nil
}
result := FLAG_ANY
flags := GetAllBackendConfigUsecases()
for _, str := range input {
flag, exists := flags["FLAG_"+strings.ToUpper(str)]
if exists {
result |= flag
}
}
return &result
}
// HasUsecases examines a BackendConfig and determines which endpoints have a chance of success.
func (c *BackendConfig) HasUsecases(u BackendConfigUsecases) bool {
if (c.KnownUsecases != nil) && ((u & *c.KnownUsecases) == u) {
return true
}
return c.GuessUsecases(u)
}
// GuessUsecases is a **heuristic based** function, as the backend in question may not be loaded yet, and the config may not record what it's useful at.
// In its current state, this function should ideally check for properties of the config like templates, rather than the direct backend name checks for the lower half.
// This avoids the maintenance burden of updating this list for each new backend - but unfortunately, that's the best option for some services currently.
func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
if (u & FLAG_CHAT) == FLAG_CHAT {
if c.TemplateConfig.Chat == "" && c.TemplateConfig.ChatMessage == "" {
return false
}
}
if (u & FLAG_COMPLETION) == FLAG_COMPLETION {
if c.TemplateConfig.Completion == "" {
return false
}
}
if (u & FLAG_EDIT) == FLAG_EDIT {
if c.TemplateConfig.Edit == "" {
return false
}
}
if (u & FLAG_EMBEDDINGS) == FLAG_EMBEDDINGS {
if c.Embeddings == nil || !*c.Embeddings {
return false
}
}
if (u & FLAG_IMAGE) == FLAG_IMAGE {
imageBackends := []string{"diffusers", "stablediffusion", "stablediffusion-ggml"}
if !slices.Contains(imageBackends, c.Backend) {
return false
}
if c.Backend == "diffusers" && c.Diffusers.PipelineType == "" {
return false
}
}
if (u & FLAG_RERANK) == FLAG_RERANK {
if c.Backend != "rerankers" {
return false
}
}
if (u & FLAG_TRANSCRIPT) == FLAG_TRANSCRIPT {
if c.Backend != "whisper" {
return false
}
}
if (u & FLAG_TTS) == FLAG_TTS {
ttsBackends := []string{"piper", "transformers-musicgen", "parler-tts"}
if !slices.Contains(ttsBackends, c.Backend) {
return false
}
}
if (u & FLAG_SOUND_GENERATION) == FLAG_SOUND_GENERATION {
if c.Backend != "transformers-musicgen" {
return false
}
}
return true
}