From b425a870b0e2f301d87f626a3c1c2da3bb150bb1 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 6 Nov 2024 08:53:02 +0100
Subject: [PATCH] fix(diffusers): correctly parse height and width request
 without parametrization (#4082)

* fix(diffusers): allow to specify width and height without enable-parameters

Let's simplify usage by not gating width and height by parameters

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore: use sane defaults

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/python/diffusers/backend.py | 13 +++++++++----
 core/http/endpoints/openai/image.go |  5 +++++
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py
index 1469043b..f1b447b4 100755
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -409,8 +409,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
         # create a dictionary of values for the parameters
         options = {
             "negative_prompt": request.negative_prompt,
-            "width": request.width,
-            "height": request.height,
             "num_inference_steps": steps,
         }
 
@@ -428,13 +426,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
         keys = options.keys()
 
         if request.EnableParameters != "":
-            keys = request.EnableParameters.split(",")
+            keys = [key.strip() for key in request.EnableParameters.split(",")]
 
         if request.EnableParameters == "none":
             keys = []
 
         # create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
-        kwargs = {key: options[key] for key in keys}
+        kwargs = {key: options.get(key) for key in keys if key in options}
 
         # Set seed
         if request.seed > 0:
@@ -445,6 +443,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
         if self.PipelineType == "FluxPipeline":
             kwargs["max_sequence_length"] = 256
 
+        if request.width:
+            kwargs["width"] = request.width
+
+        if request.height:
+            kwargs["height"] = request.height
+
         if self.PipelineType == "FluxTransformer2DModel":
             kwargs["output_type"] = "pil"
             kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
@@ -464,6 +468,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
             export_to_video(video_frames, request.dst)
             return backend_pb2.Result(message="Media generated successfully", success=True)
 
+        print(f"Generating image with {kwargs=}", file=sys.stderr)
         image = {}
         if COMPEL:
             conditioning, pooled = self.compel.build_conditioning_tensor(prompt)
diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go
index 6c76ba84..3fdb64d4 100644
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -136,6 +136,11 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
 			config.Backend = model.StableDiffusionBackend
 		}
 
+		if !strings.Contains(input.Size, "x") {
+			input.Size = "512x512"
+			log.Warn().Msgf("Invalid size, using default 512x512")
+		}
+
 		sizeParts := strings.Split(input.Size, "x")
 		if len(sizeParts) != 2 {
 			return fmt.Errorf("invalid value for 'size'")