mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 02:24:59 +00:00
feat(diffusers): allow multiple lora adapters (#4081)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
20cd8814c1
commit
947224b952
4 changed files with 39 additions and 18 deletions
|
@ -134,23 +134,25 @@ type LLMConfig struct {
|
|||
TrimSpace []string `yaml:"trimspace"`
|
||||
TrimSuffix []string `yaml:"trimsuffix"`
|
||||
|
||||
ContextSize *int `yaml:"context_size"`
|
||||
NUMA bool `yaml:"numa"`
|
||||
LoraAdapter string `yaml:"lora_adapter"`
|
||||
LoraBase string `yaml:"lora_base"`
|
||||
LoraScale float32 `yaml:"lora_scale"`
|
||||
NoMulMatQ bool `yaml:"no_mulmatq"`
|
||||
DraftModel string `yaml:"draft_model"`
|
||||
NDraft int32 `yaml:"n_draft"`
|
||||
Quantization string `yaml:"quantization"`
|
||||
LoadFormat string `yaml:"load_format"`
|
||||
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
|
||||
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
|
||||
EnforceEager bool `yaml:"enforce_eager"` // vLLM
|
||||
SwapSpace int `yaml:"swap_space"` // vLLM
|
||||
MaxModelLen int `yaml:"max_model_len"` // vLLM
|
||||
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
|
||||
MMProj string `yaml:"mmproj"`
|
||||
ContextSize *int `yaml:"context_size"`
|
||||
NUMA bool `yaml:"numa"`
|
||||
LoraAdapter string `yaml:"lora_adapter"`
|
||||
LoraBase string `yaml:"lora_base"`
|
||||
LoraAdapters []string `yaml:"lora_adapters"`
|
||||
LoraScales []float32 `yaml:"lora_scales"`
|
||||
LoraScale float32 `yaml:"lora_scale"`
|
||||
NoMulMatQ bool `yaml:"no_mulmatq"`
|
||||
DraftModel string `yaml:"draft_model"`
|
||||
NDraft int32 `yaml:"n_draft"`
|
||||
Quantization string `yaml:"quantization"`
|
||||
LoadFormat string `yaml:"load_format"`
|
||||
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
|
||||
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
|
||||
EnforceEager bool `yaml:"enforce_eager"` // vLLM
|
||||
SwapSpace int `yaml:"swap_space"` // vLLM
|
||||
MaxModelLen int `yaml:"max_model_len"` // vLLM
|
||||
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
|
||||
MMProj string `yaml:"mmproj"`
|
||||
|
||||
FlashAttention bool `yaml:"flash_attention"`
|
||||
NoKVOffloading bool `yaml:"no_kv_offloading"`
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue