feat: add image generation with ncnn-stablediffusion (#272)

2025-05-28 22:44:59 +00:00 · 2023-05-16 19:32:53 +02:00 · 2023-05-16 19:32:53 +02:00 · 9d051c5d4f
commit 9d051c5d4f
parent acd03d15f2
17 changed files with 582 additions and 58 deletions
--- a/api/api.go
+++ b/api/api.go
@ -12,7 +12,7 @@ import (
 	"github.com/rs/zerolog/log"
 )

-func App(configFile string, loader *model.ModelLoader, uploadLimitMB, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
+func App(configFile string, loader *model.ModelLoader, uploadLimitMB, threads, ctxSize int, f16 bool, debug, disableMessage bool, imageDir string) *fiber.App {
 	zerolog.SetGlobalLevel(zerolog.InfoLevel)
 	if debug {
 		zerolog.SetGlobalLevel(zerolog.DebugLevel)
@ -87,6 +87,12 @@ func App(configFile string, loader *model.ModelLoader, uploadLimitMB, threads, c

 	app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, debug, loader, threads, ctxSize, f16))

+	app.Post("/v1/images/generations", imageEndpoint(cm, debug, loader, imageDir))
+
+	if imageDir != "" {
+		app.Static("/generated-images", imageDir)
+	}
+
 	app.Get("/v1/models", listModels(loader, cm))
 	app.Get("/models", listModels(loader, cm))

--- a/api/api_test.go
+++ b/api/api_test.go
@ -25,7 +25,7 @@ var _ = Describe("API test", func() {
 	Context("API query", func() {
 		BeforeEach(func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
-			app = App("", modelLoader, 15, 1, 512, false, true, true)
+			app = App("", modelLoader, 15, 1, 512, false, true, true, "")
 			go app.Listen("127.0.0.1:9090")

 			defaultConfig := openai.DefaultConfig("")
@ -140,7 +140,7 @@ var _ = Describe("API test", func() {
 	Context("Config file", func() {
 		BeforeEach(func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
-			app = App(os.Getenv("CONFIG_FILE"), modelLoader, 5, 1, 512, false, true, true)
+			app = App(os.Getenv("CONFIG_FILE"), modelLoader, 5, 1, 512, false, true, true, "")
 			go app.Listen("127.0.0.1:9090")

 			defaultConfig := openai.DefaultConfig("")
--- a/api/config.go
+++ b/api/config.go
@ -32,6 +32,7 @@ type Config struct {
 	MirostatTAU                 float64           `yaml:"mirostat_tau"`
 	Mirostat                    int               `yaml:"mirostat"`
 	NGPULayers                  int               `yaml:"gpu_layers"`
+	ImageGenerationAssets       string            `yaml:"asset_dir"`
 	PromptStrings, InputStrings []string
 	InputToken                  [][]int
 }
@ -211,12 +212,11 @@ func updateConfig(config *Config, input *OpenAIRequest) {
 		}
 	}
 }
-
-func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
+func readInput(c *fiber.Ctx, loader *model.ModelLoader, randomModel bool) (string, *OpenAIRequest, error) {
 	input := new(OpenAIRequest)
 	// Get input data from the request body
 	if err := c.BodyParser(input); err != nil {
-		return nil, nil, err
+		return "", nil, err
 	}

 	modelFile := input.Model
@ -234,14 +234,14 @@ func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug
 	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)

 	// If no model was specified, take the first available
-	if modelFile == "" && !bearerExists {
+	if modelFile == "" && !bearerExists && randomModel {
 		models, _ := loader.ListModels()
 		if len(models) > 0 {
 			modelFile = models[0]
 			log.Debug().Msgf("No model specified, using: %s", modelFile)
 		} else {
 			log.Debug().Msgf("No model specified, returning error")
-			return nil, nil, fmt.Errorf("no model specified")
+			return "", nil, fmt.Errorf("no model specified")
 		}
 	}

@ -250,7 +250,10 @@ func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug
 		log.Debug().Msgf("Using model from bearer token: %s", bearer)
 		modelFile = bearer
 	}
+	return modelFile, input, nil
+}

+func readConfig(modelFile string, input *OpenAIRequest, cm ConfigMerger, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
 	// Load a config file if present after the model name
 	modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
 	if _, err := os.Stat(modelConfig); err == nil {
--- a/api/openai.go
+++ b/api/openai.go
@ -3,13 +3,16 @@ package api
 import (
 	"bufio"
 	"bytes"
+	"encoding/base64"
 	"encoding/json"
 	"fmt"
 	"io"
+	"io/ioutil"
 	"net/http"
 	"os"
 	"path"
 	"path/filepath"
+	"strconv"
 	"strings"

 	"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
@ -43,6 +46,10 @@ type Item struct {
 	Embedding []float32 `json:"embedding"`
 	Index     int       `json:"index"`
 	Object    string    `json:"object,omitempty"`
+
+	// Images
+	URL     string `json:"url,omitempty"`
+	B64JSON string `json:"b64_json,omitempty"`
 }

 type OpenAIResponse struct {
@ -78,11 +85,13 @@ type OpenAIRequest struct {
 	Model string `json:"model" yaml:"model"`

 	// whisper
-	File           string `json:"file" validate:"required"`
+	File     string `json:"file" validate:"required"`
+	Language string `json:"language"`
+	//whisper/image
 	ResponseFormat string `json:"response_format"`
-	Language       string `json:"language"`
-
-	// Prompt is read only by completion API calls
+	// image
+	Size string `json:"size"`
+	// Prompt is read only by completion/image API calls
 	Prompt interface{} `json:"prompt" yaml:"prompt"`

 	// Edit endpoint
@ -116,6 +125,10 @@ type OpenAIRequest struct {
 	Mirostat    int     `json:"mirostat" yaml:"mirostat"`

 	Seed int `json:"seed" yaml:"seed"`
+
+	// Image (not supported by OpenAI)
+	Mode int `json:"mode"`
+	Step int `json:"step"`
 }

 func defaultRequest(modelFile string) OpenAIRequest {
@ -131,7 +144,13 @@ func defaultRequest(modelFile string) OpenAIRequest {
 // https://platform.openai.com/docs/api-reference/completions
 func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
+
+		model, input, err := readInput(c, loader, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
@ -182,7 +201,12 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
 // https://platform.openai.com/docs/api-reference/embeddings
 func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
+		model, input, err := readInput(c, loader, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
@ -249,7 +273,12 @@ func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
 		close(responses)
 	}
 	return func(c *fiber.Ctx) error {
-		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
+		model, input, err := readInput(c, loader, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
@ -349,7 +378,12 @@ func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread

 func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
+		model, input, err := readInput(c, loader, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
@ -398,14 +432,151 @@ func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
 	}
 }

-// https://platform.openai.com/docs/api-reference/audio/create
-func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+// https://platform.openai.com/docs/api-reference/images/create
+
+/*
+*
+
+	curl http://localhost:8080/v1/images/generations \
+	  -H "Content-Type: application/json" \
+	  -d '{
+	    "prompt": "A cute baby sea otter",
+	    "n": 1,
+	    "size": "512x512"
+	  }'
+
+*
+*/
+func imageEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, imageDir string) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
+		m, input, err := readInput(c, loader, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}

+		if m == "" {
+			m = model.StableDiffusionBackend
+		}
+		log.Debug().Msgf("Loading model: %+v", m)
+
+		config, input, err := readConfig(m, input, cm, loader, debug, 0, 0, false)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		log.Debug().Msgf("Parameter Config: %+v", config)
+
+		// XXX: Only stablediffusion is supported for now
+		if config.Backend == "" {
+			config.Backend = model.StableDiffusionBackend
+		}
+
+		sizeParts := strings.Split(input.Size, "x")
+		if len(sizeParts) != 2 {
+			return fmt.Errorf("Invalid value for 'size'")
+		}
+		width, err := strconv.Atoi(sizeParts[0])
+		if err != nil {
+			return fmt.Errorf("Invalid value for 'size'")
+		}
+		height, err := strconv.Atoi(sizeParts[1])
+		if err != nil {
+			return fmt.Errorf("Invalid value for 'size'")
+		}
+
+		b64JSON := false
+		if input.ResponseFormat == "b64_json" {
+			b64JSON = true
+		}
+
+		var result []Item
+		for _, i := range config.PromptStrings {
+			prompts := strings.Split(i, "|")
+			positive_prompt := prompts[0]
+			negative_prompt := ""
+			if len(prompts) > 1 {
+				negative_prompt = prompts[1]
+			}
+
+			mode := 0
+			step := 15
+
+			if input.Mode != 0 {
+				mode = input.Mode
+			}
+
+			if input.Step != 0 {
+				step = input.Step
+			}
+
+			tempDir := ""
+			if !b64JSON {
+				tempDir = imageDir
+			}
+			// Create a temporary file
+			outputFile, err := ioutil.TempFile(tempDir, "b64")
+			if err != nil {
+				return err
+			}
+			outputFile.Close()
+			output := outputFile.Name() + ".png"
+			// Rename the temporary file
+			err = os.Rename(outputFile.Name(), output)
+			if err != nil {
+				return err
+			}
+
+			baseURL := c.BaseURL()
+
+			fn, err := ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, output, loader, *config)
+			if err != nil {
+				return err
+			}
+			if err := fn(); err != nil {
+				return err
+			}
+
+			item := &Item{}
+
+			if b64JSON {
+				defer os.RemoveAll(output)
+				data, err := os.ReadFile(output)
+				if err != nil {
+					return err
+				}
+				item.B64JSON = base64.StdEncoding.EncodeToString(data)
+			} else {
+				base := filepath.Base(output)
+				item.URL = baseURL + "/generated-images/" + base
+			}
+
+			result = append(result, *item)
+		}
+
+		resp := &OpenAIResponse{
+			Data: result,
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
+
+// https://platform.openai.com/docs/api-reference/audio/create
+func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		m, input, err := readInput(c, loader, false)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		config, input, err := readConfig(m, input, cm, loader, debug, threads, ctx, f16)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
 		// retrieve the file data from the request
 		file, err := c.FormFile("file")
 		if err != nil {
--- a/api/prediction.go
+++ b/api/prediction.go
@ -8,11 +8,12 @@ import (

 	"github.com/donomii/go-rwkv.cpp"
 	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/go-skynet/LocalAI/pkg/stablediffusion"
 	"github.com/go-skynet/bloomz.cpp"
 	bert "github.com/go-skynet/go-bert.cpp"
 	gpt2 "github.com/go-skynet/go-gpt2.cpp"
 	llama "github.com/go-skynet/go-llama.cpp"
-	gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
+	gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
 )

 // mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
@ -38,6 +39,45 @@ func defaultLLamaOpts(c Config) []llama.ModelOption {
 	return llamaOpts
 }

+func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string, loader *model.ModelLoader, c Config) (func() error, error) {
+	if c.Backend != model.StableDiffusionBackend {
+		return nil, fmt.Errorf("endpoint only working with stablediffusion models")
+	}
+	inferenceModel, err := loader.BackendLoader(c.Backend, c.ImageGenerationAssets, []llama.ModelOption{}, uint32(c.Threads))
+	if err != nil {
+		return nil, err
+	}
+
+	var fn func() error
+	switch model := inferenceModel.(type) {
+	case *stablediffusion.StableDiffusion:
+		fn = func() error {
+			return model.GenerateImage(height, width, mode, step, seed, positive_prompt, negative_prompt, dst)
+		}
+
+	default:
+		fn = func() error {
+			return fmt.Errorf("creation of images not supported by the backend")
+		}
+	}
+
+	return func() error {
+		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
+		mutexMap.Lock()
+		l, ok := mutexes[c.Backend]
+		if !ok {
+			m := &sync.Mutex{}
+			mutexes[c.Backend] = m
+			l = m
+		}
+		mutexMap.Unlock()
+		l.Lock()
+		defer l.Unlock()
+
+		return fn()
+	}, nil
+}
+
 func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) {
 	if !c.Embeddings {
 		return nil, fmt.Errorf("endpoint disabled for this model by API configuration")