feat: Add Diffusers (#874)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-25 13:04:59 +00:00 · 2023-08-09 08:38:51 +02:00 · 2023-08-09 08:38:51 +02:00 · 8c781a6a44
commit 8c781a6a44
parent 93a4bec06b
21 changed files with 741 additions and 217 deletions
--- a/api/api_test.go
+++ b/api/api_test.go
@ -470,6 +470,9 @@ var _ = Describe("API test", func() {

 			response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
 				ID: "model-gallery@stablediffusion",
+				Overrides: map[string]interface{}{
+					"parameters": map[string]interface{}{"model": "stablediffusion_assets"},
+				},
 			})

 			Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
--- a/api/backend/embeddings.go
+++ b/api/backend/embeddings.go
@ -23,7 +23,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
 	var err error

 	opts := []model.Option{
-		model.WithLoadGRPCLLMModelOpts(grpcOpts),
+		model.WithLoadGRPCLoadModelOpts(grpcOpts),
 		model.WithThreads(uint32(c.Threads)),
 		model.WithAssetDir(o.AssetsDestination),
 		model.WithModel(modelFile),
--- a/api/backend/image.go
+++ b/api/backend/image.go
@ -1,7 +1,6 @@
 package backend

 import (
-	"fmt"
 	"sync"

 	config "github.com/go-skynet/LocalAI/api/config"
@ -11,16 +10,18 @@ import (
 )

 func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string, loader *model.ModelLoader, c config.Config, o *options.Option) (func() error, error) {
-	if c.Backend != model.StableDiffusionBackend {
-		return nil, fmt.Errorf("endpoint only working with stablediffusion models")
-	}

 	opts := []model.Option{
 		model.WithBackendString(c.Backend),
 		model.WithAssetDir(o.AssetsDestination),
 		model.WithThreads(uint32(c.Threads)),
 		model.WithContext(o.Context),
-		model.WithModel(c.ImageGenerationAssets),
+		model.WithModel(c.Model),
+		model.WithLoadGRPCLoadModelOpts(&proto.ModelOptions{
+			CUDA:          c.Diffusers.CUDA,
+			SchedulerType: c.Diffusers.SchedulerType,
+			PipelineType:  c.Diffusers.PipelineType,
+		}),
 	}

 	for k, v := range o.ExternalGRPCBackends {
--- a/api/backend/llm.go
+++ b/api/backend/llm.go
@ -24,7 +24,7 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c
 	var err error

 	opts := []model.Option{
-		model.WithLoadGRPCLLMModelOpts(grpcOpts),
+		model.WithLoadGRPCLoadModelOpts(grpcOpts),
 		model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup
 		model.WithAssetDir(o.AssetsDestination),
 		model.WithModel(modelFile),
--- a/api/backend/options.go
+++ b/api/backend/options.go
@ -15,27 +15,29 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
 		b = c.Batch
 	}
 	return &pb.ModelOptions{
-		ContextSize:      int32(c.ContextSize),
-		Seed:             int32(c.Seed),
-		NBatch:           int32(b),
-		NGQA:             c.NGQA,
-		ModelBaseName:    c.ModelBaseName,
-		UseFastTokenizer: c.UseFastTokenizer,
-		Device:           c.Device,
-		UseTriton:        c.Triton,
-		RMSNormEps:       c.RMSNormEps,
-		F16Memory:        c.F16,
-		MLock:            c.MMlock,
-		RopeFreqBase:     c.RopeFreqBase,
-		RopeFreqScale:    c.RopeFreqScale,
-		NUMA:             c.NUMA,
-		Embeddings:       c.Embeddings,
-		LowVRAM:          c.LowVRAM,
-		NGPULayers:       int32(c.NGPULayers),
-		MMap:             c.MMap,
-		MainGPU:          c.MainGPU,
-		Threads:          int32(c.Threads),
-		TensorSplit:      c.TensorSplit,
+		ContextSize: int32(c.ContextSize),
+		Seed:        int32(c.Seed),
+		NBatch:      int32(b),
+		NGQA:        c.NGQA,
+
+		RMSNormEps:    c.RMSNormEps,
+		F16Memory:     c.F16,
+		MLock:         c.MMlock,
+		RopeFreqBase:  c.RopeFreqBase,
+		RopeFreqScale: c.RopeFreqScale,
+		NUMA:          c.NUMA,
+		Embeddings:    c.Embeddings,
+		LowVRAM:       c.LowVRAM,
+		NGPULayers:    int32(c.NGPULayers),
+		MMap:          c.MMap,
+		MainGPU:       c.MainGPU,
+		Threads:       int32(c.Threads),
+		TensorSplit:   c.TensorSplit,
+		// AutoGPTQ
+		ModelBaseName:    c.AutoGPTQ.ModelBaseName,
+		Device:           c.AutoGPTQ.Device,
+		UseTriton:        c.AutoGPTQ.Triton,
+		UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer,
 	}
 }

@ -62,9 +64,9 @@ func gRPCPredictOpts(c config.Config, modelPath string) *pb.PredictOptions {
 		RopeFreqBase:        c.RopeFreqBase,
 		RopeFreqScale:       c.RopeFreqScale,
 		NegativePrompt:      c.NegativePrompt,
-		Mirostat:            int32(c.Mirostat),
-		MirostatETA:         float32(c.MirostatETA),
-		MirostatTAU:         float32(c.MirostatTAU),
+		Mirostat:            int32(c.LLMConfig.Mirostat),
+		MirostatETA:         float32(c.LLMConfig.MirostatETA),
+		MirostatTAU:         float32(c.LLMConfig.MirostatTAU),
 		Debug:               c.Debug,
 		StopPrompts:         c.StopWords,
 		Repeat:              int32(c.RepeatPenalty),
--- a/api/config/config.go
+++ b/api/config/config.go
@ -13,49 +13,65 @@ import (

 type Config struct {
 	PredictionOptions `yaml:"parameters"`
-	Name              string            `yaml:"name"`
-	StopWords         []string          `yaml:"stopwords"`
-	Cutstrings        []string          `yaml:"cutstrings"`
-	TrimSpace         []string          `yaml:"trimspace"`
-	ContextSize       int               `yaml:"context_size"`
-	F16               bool              `yaml:"f16"`
-	NUMA              bool              `yaml:"numa"`
-	Threads           int               `yaml:"threads"`
-	Debug             bool              `yaml:"debug"`
-	Roles             map[string]string `yaml:"roles"`
-	Embeddings        bool              `yaml:"embeddings"`
-	Backend           string            `yaml:"backend"`
-	TemplateConfig    TemplateConfig    `yaml:"template"`
-	MirostatETA       float64           `yaml:"mirostat_eta"`
-	MirostatTAU       float64           `yaml:"mirostat_tau"`
-	Mirostat          int               `yaml:"mirostat"`
-	NGPULayers        int               `yaml:"gpu_layers"`
-	MMap              bool              `yaml:"mmap"`
-	MMlock            bool              `yaml:"mmlock"`
-	LowVRAM           bool              `yaml:"low_vram"`
+	Name              string `yaml:"name"`

-	TensorSplit           string `yaml:"tensor_split"`
-	MainGPU               string `yaml:"main_gpu"`
-	ImageGenerationAssets string `yaml:"asset_dir"`
+	F16            bool              `yaml:"f16"`
+	Threads        int               `yaml:"threads"`
+	Debug          bool              `yaml:"debug"`
+	Roles          map[string]string `yaml:"roles"`
+	Embeddings     bool              `yaml:"embeddings"`
+	Backend        string            `yaml:"backend"`
+	TemplateConfig TemplateConfig    `yaml:"template"`

-	PromptCachePath string `yaml:"prompt_cache_path"`
-	PromptCacheAll  bool   `yaml:"prompt_cache_all"`
-	PromptCacheRO   bool   `yaml:"prompt_cache_ro"`
-
-	Grammar string `yaml:"grammar"`
-
-	PromptStrings, InputStrings                []string
-	InputToken                                 [][]int
-	functionCallString, functionCallNameString string
+	PromptStrings, InputStrings                []string `yaml:"-"`
+	InputToken                                 [][]int  `yaml:"-"`
+	functionCallString, functionCallNameString string   `yaml:"-"`

 	FunctionsConfig Functions `yaml:"function"`

-	SystemPrompt string `yaml:"system_prompt"`
+	// LLM configs (GPT4ALL, Llama.cpp, ...)
+	LLMConfig `yaml:",inline"`

-	RMSNormEps float32 `yaml:"rms_norm_eps"`
-	NGQA       int32   `yaml:"ngqa"`
+	// AutoGPTQ specifics
+	AutoGPTQ AutoGPTQ `yaml:"autogptq"`

-	// AutoGPTQ
+	// Diffusers
+	Diffusers Diffusers `yaml:"diffusers"`
+
+	Step int `yaml:"step"`
+}
+
+type Diffusers struct {
+	PipelineType  string `yaml:"pipeline_type"`
+	SchedulerType string `yaml:"scheduler_type"`
+	CUDA          bool   `yaml:"cuda"`
+}
+
+type LLMConfig struct {
+	SystemPrompt    string   `yaml:"system_prompt"`
+	TensorSplit     string   `yaml:"tensor_split"`
+	MainGPU         string   `yaml:"main_gpu"`
+	RMSNormEps      float32  `yaml:"rms_norm_eps"`
+	NGQA            int32    `yaml:"ngqa"`
+	PromptCachePath string   `yaml:"prompt_cache_path"`
+	PromptCacheAll  bool     `yaml:"prompt_cache_all"`
+	PromptCacheRO   bool     `yaml:"prompt_cache_ro"`
+	MirostatETA     float64  `yaml:"mirostat_eta"`
+	MirostatTAU     float64  `yaml:"mirostat_tau"`
+	Mirostat        int      `yaml:"mirostat"`
+	NGPULayers      int      `yaml:"gpu_layers"`
+	MMap            bool     `yaml:"mmap"`
+	MMlock          bool     `yaml:"mmlock"`
+	LowVRAM         bool     `yaml:"low_vram"`
+	Grammar         string   `yaml:"grammar"`
+	StopWords       []string `yaml:"stopwords"`
+	Cutstrings      []string `yaml:"cutstrings"`
+	TrimSpace       []string `yaml:"trimspace"`
+	ContextSize     int      `yaml:"context_size"`
+	NUMA            bool     `yaml:"numa"`
+}
+
+type AutoGPTQ struct {
 	ModelBaseName    string `yaml:"model_base_name"`
 	Device           string `yaml:"device"`
 	Triton           bool   `yaml:"triton"`
--- a/api/openai/image.go
+++ b/api/openai/image.go
@ -89,7 +89,10 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
 				}

 				mode := 0
-				step := 15
+				step := config.Step
+				if step == 0 {
+					step = 15
+				}

 				if input.Mode != 0 {
 					mode = input.Mode
--- a/api/openai/request.go
+++ b/api/openai/request.go
@ -76,7 +76,7 @@ func updateConfig(config *config.Config, input *OpenAIRequest) {
 	}

 	if input.ModelBaseName != "" {
-		config.ModelBaseName = input.ModelBaseName
+		config.AutoGPTQ.ModelBaseName = input.ModelBaseName
 	}

 	if input.NegativePromptScale != 0 {
@ -149,15 +149,15 @@ func updateConfig(config *config.Config, input *OpenAIRequest) {
 	}

 	if input.Mirostat != 0 {
-		config.Mirostat = input.Mirostat
+		config.LLMConfig.Mirostat = input.Mirostat
 	}

 	if input.MirostatETA != 0 {
-		config.MirostatETA = input.MirostatETA
+		config.LLMConfig.MirostatETA = input.MirostatETA
 	}

 	if input.MirostatTAU != 0 {
-		config.MirostatTAU = input.MirostatTAU
+		config.LLMConfig.MirostatTAU = input.MirostatTAU
 	}

 	if input.TypicalP != 0 {