feat(diffusers): allow multiple lora adapters (#4081)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 02:24:59 +00:00 · 2024-11-05 15:14:33 +01:00 · 2024-11-05 15:14:33 +01:00 · 947224b952
commit 947224b952
parent 20cd8814c1
4 changed files with 39 additions and 18 deletions
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@ -134,23 +134,25 @@ type LLMConfig struct {
 	TrimSpace       []string `yaml:"trimspace"`
 	TrimSuffix      []string `yaml:"trimsuffix"`

-	ContextSize          *int    `yaml:"context_size"`
-	NUMA                 bool    `yaml:"numa"`
-	LoraAdapter          string  `yaml:"lora_adapter"`
-	LoraBase             string  `yaml:"lora_base"`
-	LoraScale            float32 `yaml:"lora_scale"`
-	NoMulMatQ            bool    `yaml:"no_mulmatq"`
-	DraftModel           string  `yaml:"draft_model"`
-	NDraft               int32   `yaml:"n_draft"`
-	Quantization         string  `yaml:"quantization"`
-	LoadFormat           string  `yaml:"load_format"`
-	GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
-	TrustRemoteCode      bool    `yaml:"trust_remote_code"`      // vLLM
-	EnforceEager         bool    `yaml:"enforce_eager"`          // vLLM
-	SwapSpace            int     `yaml:"swap_space"`             // vLLM
-	MaxModelLen          int     `yaml:"max_model_len"`          // vLLM
-	TensorParallelSize   int     `yaml:"tensor_parallel_size"`   // vLLM
-	MMProj               string  `yaml:"mmproj"`
+	ContextSize          *int      `yaml:"context_size"`
+	NUMA                 bool      `yaml:"numa"`
+	LoraAdapter          string    `yaml:"lora_adapter"`
+	LoraBase             string    `yaml:"lora_base"`
+	LoraAdapters         []string  `yaml:"lora_adapters"`
+	LoraScales           []float32 `yaml:"lora_scales"`
+	LoraScale            float32   `yaml:"lora_scale"`
+	NoMulMatQ            bool      `yaml:"no_mulmatq"`
+	DraftModel           string    `yaml:"draft_model"`
+	NDraft               int32     `yaml:"n_draft"`
+	Quantization         string    `yaml:"quantization"`
+	LoadFormat           string    `yaml:"load_format"`
+	GPUMemoryUtilization float32   `yaml:"gpu_memory_utilization"` // vLLM
+	TrustRemoteCode      bool      `yaml:"trust_remote_code"`      // vLLM
+	EnforceEager         bool      `yaml:"enforce_eager"`          // vLLM
+	SwapSpace            int       `yaml:"swap_space"`             // vLLM
+	MaxModelLen          int       `yaml:"max_model_len"`          // vLLM
+	TensorParallelSize   int       `yaml:"tensor_parallel_size"`   // vLLM
+	MMProj               string    `yaml:"mmproj"`

 	FlashAttention bool `yaml:"flash_attention"`
 	NoKVOffloading bool `yaml:"no_kv_offloading"`