mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-25 13:04:59 +00:00
feat: Add Diffusers (#874)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
93a4bec06b
commit
8c781a6a44
21 changed files with 741 additions and 217 deletions
|
@ -470,6 +470,9 @@ var _ = Describe("API test", func() {
|
|||
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
ID: "model-gallery@stablediffusion",
|
||||
Overrides: map[string]interface{}{
|
||||
"parameters": map[string]interface{}{"model": "stablediffusion_assets"},
|
||||
},
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
|
|
|
@ -23,7 +23,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
|
|||
var err error
|
||||
|
||||
opts := []model.Option{
|
||||
model.WithLoadGRPCLLMModelOpts(grpcOpts),
|
||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
||||
model.WithThreads(uint32(c.Threads)),
|
||||
model.WithAssetDir(o.AssetsDestination),
|
||||
model.WithModel(modelFile),
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package backend
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
|
@ -11,16 +10,18 @@ import (
|
|||
)
|
||||
|
||||
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string, loader *model.ModelLoader, c config.Config, o *options.Option) (func() error, error) {
|
||||
if c.Backend != model.StableDiffusionBackend {
|
||||
return nil, fmt.Errorf("endpoint only working with stablediffusion models")
|
||||
}
|
||||
|
||||
opts := []model.Option{
|
||||
model.WithBackendString(c.Backend),
|
||||
model.WithAssetDir(o.AssetsDestination),
|
||||
model.WithThreads(uint32(c.Threads)),
|
||||
model.WithContext(o.Context),
|
||||
model.WithModel(c.ImageGenerationAssets),
|
||||
model.WithModel(c.Model),
|
||||
model.WithLoadGRPCLoadModelOpts(&proto.ModelOptions{
|
||||
CUDA: c.Diffusers.CUDA,
|
||||
SchedulerType: c.Diffusers.SchedulerType,
|
||||
PipelineType: c.Diffusers.PipelineType,
|
||||
}),
|
||||
}
|
||||
|
||||
for k, v := range o.ExternalGRPCBackends {
|
||||
|
|
|
@ -24,7 +24,7 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c
|
|||
var err error
|
||||
|
||||
opts := []model.Option{
|
||||
model.WithLoadGRPCLLMModelOpts(grpcOpts),
|
||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
||||
model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup
|
||||
model.WithAssetDir(o.AssetsDestination),
|
||||
model.WithModel(modelFile),
|
||||
|
|
|
@ -15,27 +15,29 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
|
|||
b = c.Batch
|
||||
}
|
||||
return &pb.ModelOptions{
|
||||
ContextSize: int32(c.ContextSize),
|
||||
Seed: int32(c.Seed),
|
||||
NBatch: int32(b),
|
||||
NGQA: c.NGQA,
|
||||
ModelBaseName: c.ModelBaseName,
|
||||
UseFastTokenizer: c.UseFastTokenizer,
|
||||
Device: c.Device,
|
||||
UseTriton: c.Triton,
|
||||
RMSNormEps: c.RMSNormEps,
|
||||
F16Memory: c.F16,
|
||||
MLock: c.MMlock,
|
||||
RopeFreqBase: c.RopeFreqBase,
|
||||
RopeFreqScale: c.RopeFreqScale,
|
||||
NUMA: c.NUMA,
|
||||
Embeddings: c.Embeddings,
|
||||
LowVRAM: c.LowVRAM,
|
||||
NGPULayers: int32(c.NGPULayers),
|
||||
MMap: c.MMap,
|
||||
MainGPU: c.MainGPU,
|
||||
Threads: int32(c.Threads),
|
||||
TensorSplit: c.TensorSplit,
|
||||
ContextSize: int32(c.ContextSize),
|
||||
Seed: int32(c.Seed),
|
||||
NBatch: int32(b),
|
||||
NGQA: c.NGQA,
|
||||
|
||||
RMSNormEps: c.RMSNormEps,
|
||||
F16Memory: c.F16,
|
||||
MLock: c.MMlock,
|
||||
RopeFreqBase: c.RopeFreqBase,
|
||||
RopeFreqScale: c.RopeFreqScale,
|
||||
NUMA: c.NUMA,
|
||||
Embeddings: c.Embeddings,
|
||||
LowVRAM: c.LowVRAM,
|
||||
NGPULayers: int32(c.NGPULayers),
|
||||
MMap: c.MMap,
|
||||
MainGPU: c.MainGPU,
|
||||
Threads: int32(c.Threads),
|
||||
TensorSplit: c.TensorSplit,
|
||||
// AutoGPTQ
|
||||
ModelBaseName: c.AutoGPTQ.ModelBaseName,
|
||||
Device: c.AutoGPTQ.Device,
|
||||
UseTriton: c.AutoGPTQ.Triton,
|
||||
UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -62,9 +64,9 @@ func gRPCPredictOpts(c config.Config, modelPath string) *pb.PredictOptions {
|
|||
RopeFreqBase: c.RopeFreqBase,
|
||||
RopeFreqScale: c.RopeFreqScale,
|
||||
NegativePrompt: c.NegativePrompt,
|
||||
Mirostat: int32(c.Mirostat),
|
||||
MirostatETA: float32(c.MirostatETA),
|
||||
MirostatTAU: float32(c.MirostatTAU),
|
||||
Mirostat: int32(c.LLMConfig.Mirostat),
|
||||
MirostatETA: float32(c.LLMConfig.MirostatETA),
|
||||
MirostatTAU: float32(c.LLMConfig.MirostatTAU),
|
||||
Debug: c.Debug,
|
||||
StopPrompts: c.StopWords,
|
||||
Repeat: int32(c.RepeatPenalty),
|
||||
|
|
|
@ -13,49 +13,65 @@ import (
|
|||
|
||||
type Config struct {
|
||||
PredictionOptions `yaml:"parameters"`
|
||||
Name string `yaml:"name"`
|
||||
StopWords []string `yaml:"stopwords"`
|
||||
Cutstrings []string `yaml:"cutstrings"`
|
||||
TrimSpace []string `yaml:"trimspace"`
|
||||
ContextSize int `yaml:"context_size"`
|
||||
F16 bool `yaml:"f16"`
|
||||
NUMA bool `yaml:"numa"`
|
||||
Threads int `yaml:"threads"`
|
||||
Debug bool `yaml:"debug"`
|
||||
Roles map[string]string `yaml:"roles"`
|
||||
Embeddings bool `yaml:"embeddings"`
|
||||
Backend string `yaml:"backend"`
|
||||
TemplateConfig TemplateConfig `yaml:"template"`
|
||||
MirostatETA float64 `yaml:"mirostat_eta"`
|
||||
MirostatTAU float64 `yaml:"mirostat_tau"`
|
||||
Mirostat int `yaml:"mirostat"`
|
||||
NGPULayers int `yaml:"gpu_layers"`
|
||||
MMap bool `yaml:"mmap"`
|
||||
MMlock bool `yaml:"mmlock"`
|
||||
LowVRAM bool `yaml:"low_vram"`
|
||||
Name string `yaml:"name"`
|
||||
|
||||
TensorSplit string `yaml:"tensor_split"`
|
||||
MainGPU string `yaml:"main_gpu"`
|
||||
ImageGenerationAssets string `yaml:"asset_dir"`
|
||||
F16 bool `yaml:"f16"`
|
||||
Threads int `yaml:"threads"`
|
||||
Debug bool `yaml:"debug"`
|
||||
Roles map[string]string `yaml:"roles"`
|
||||
Embeddings bool `yaml:"embeddings"`
|
||||
Backend string `yaml:"backend"`
|
||||
TemplateConfig TemplateConfig `yaml:"template"`
|
||||
|
||||
PromptCachePath string `yaml:"prompt_cache_path"`
|
||||
PromptCacheAll bool `yaml:"prompt_cache_all"`
|
||||
PromptCacheRO bool `yaml:"prompt_cache_ro"`
|
||||
|
||||
Grammar string `yaml:"grammar"`
|
||||
|
||||
PromptStrings, InputStrings []string
|
||||
InputToken [][]int
|
||||
functionCallString, functionCallNameString string
|
||||
PromptStrings, InputStrings []string `yaml:"-"`
|
||||
InputToken [][]int `yaml:"-"`
|
||||
functionCallString, functionCallNameString string `yaml:"-"`
|
||||
|
||||
FunctionsConfig Functions `yaml:"function"`
|
||||
|
||||
SystemPrompt string `yaml:"system_prompt"`
|
||||
// LLM configs (GPT4ALL, Llama.cpp, ...)
|
||||
LLMConfig `yaml:",inline"`
|
||||
|
||||
RMSNormEps float32 `yaml:"rms_norm_eps"`
|
||||
NGQA int32 `yaml:"ngqa"`
|
||||
// AutoGPTQ specifics
|
||||
AutoGPTQ AutoGPTQ `yaml:"autogptq"`
|
||||
|
||||
// AutoGPTQ
|
||||
// Diffusers
|
||||
Diffusers Diffusers `yaml:"diffusers"`
|
||||
|
||||
Step int `yaml:"step"`
|
||||
}
|
||||
|
||||
type Diffusers struct {
|
||||
PipelineType string `yaml:"pipeline_type"`
|
||||
SchedulerType string `yaml:"scheduler_type"`
|
||||
CUDA bool `yaml:"cuda"`
|
||||
}
|
||||
|
||||
type LLMConfig struct {
|
||||
SystemPrompt string `yaml:"system_prompt"`
|
||||
TensorSplit string `yaml:"tensor_split"`
|
||||
MainGPU string `yaml:"main_gpu"`
|
||||
RMSNormEps float32 `yaml:"rms_norm_eps"`
|
||||
NGQA int32 `yaml:"ngqa"`
|
||||
PromptCachePath string `yaml:"prompt_cache_path"`
|
||||
PromptCacheAll bool `yaml:"prompt_cache_all"`
|
||||
PromptCacheRO bool `yaml:"prompt_cache_ro"`
|
||||
MirostatETA float64 `yaml:"mirostat_eta"`
|
||||
MirostatTAU float64 `yaml:"mirostat_tau"`
|
||||
Mirostat int `yaml:"mirostat"`
|
||||
NGPULayers int `yaml:"gpu_layers"`
|
||||
MMap bool `yaml:"mmap"`
|
||||
MMlock bool `yaml:"mmlock"`
|
||||
LowVRAM bool `yaml:"low_vram"`
|
||||
Grammar string `yaml:"grammar"`
|
||||
StopWords []string `yaml:"stopwords"`
|
||||
Cutstrings []string `yaml:"cutstrings"`
|
||||
TrimSpace []string `yaml:"trimspace"`
|
||||
ContextSize int `yaml:"context_size"`
|
||||
NUMA bool `yaml:"numa"`
|
||||
}
|
||||
|
||||
type AutoGPTQ struct {
|
||||
ModelBaseName string `yaml:"model_base_name"`
|
||||
Device string `yaml:"device"`
|
||||
Triton bool `yaml:"triton"`
|
||||
|
|
|
@ -89,7 +89,10 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
|||
}
|
||||
|
||||
mode := 0
|
||||
step := 15
|
||||
step := config.Step
|
||||
if step == 0 {
|
||||
step = 15
|
||||
}
|
||||
|
||||
if input.Mode != 0 {
|
||||
mode = input.Mode
|
||||
|
|
|
@ -76,7 +76,7 @@ func updateConfig(config *config.Config, input *OpenAIRequest) {
|
|||
}
|
||||
|
||||
if input.ModelBaseName != "" {
|
||||
config.ModelBaseName = input.ModelBaseName
|
||||
config.AutoGPTQ.ModelBaseName = input.ModelBaseName
|
||||
}
|
||||
|
||||
if input.NegativePromptScale != 0 {
|
||||
|
@ -149,15 +149,15 @@ func updateConfig(config *config.Config, input *OpenAIRequest) {
|
|||
}
|
||||
|
||||
if input.Mirostat != 0 {
|
||||
config.Mirostat = input.Mirostat
|
||||
config.LLMConfig.Mirostat = input.Mirostat
|
||||
}
|
||||
|
||||
if input.MirostatETA != 0 {
|
||||
config.MirostatETA = input.MirostatETA
|
||||
config.LLMConfig.MirostatETA = input.MirostatETA
|
||||
}
|
||||
|
||||
if input.MirostatTAU != 0 {
|
||||
config.MirostatTAU = input.MirostatTAU
|
||||
config.LLMConfig.MirostatTAU = input.MirostatTAU
|
||||
}
|
||||
|
||||
if input.TypicalP != 0 {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue