feat(intel): add diffusers/transformers support (#1746)

* feat(intel): add diffusers support * try to consume upstream container image * Debug * Manually install deps * Map transformers/hf cache dir to modelpath if not specified * fix(compel): update initialization, pass by all gRPC options * fix: add dependencies, implement transformers for xpu * base it from the oneapi image * Add pillow * set threads if specified when launching the API * Skip conda install if intel * defaults to non-intel * ci: add to pipelines * prepare compel only if enabled * Skip conda install if intel * fix cleanup * Disable compel by default * Install torch 2.1.0 with Intel * Skip conda on some setups * Detect python * Quiet output * Do not override system python with conda * Prefer python3 * Fixups * exllama2: do not install without conda (overrides pytorch version) * exllama/exllama2: do not install if not using cuda * Add missing dataset dependency * Small fixups, symlink to python, add requirements * Add neural_speed to the deps * correctly handle model offloading * fix: device_map == xpu * go back at calling python, fixed at dockerfile level * Exllama2 restricted to only nvidia gpus * Tokenizer to xpu
2025-05-27 22:15:00 +00:00 · 2024-03-07 14:37:45 +01:00 · 2024-03-07 14:37:45 +01:00 · 5d1018495f
commit 5d1018495f
parent ad6fd7a991
23 changed files with 250 additions and 81 deletions
--- a/core/backend/image.go
+++ b/core/backend/image.go
@ -8,27 +8,18 @@ import (
 )

 func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
-
+	threads := backendConfig.Threads
+	if threads == 0 && appConfig.Threads != 0 {
+		threads = appConfig.Threads
+	}
+	gRPCOpts := gRPCModelOpts(backendConfig)
 	opts := modelOpts(backendConfig, appConfig, []model.Option{
 		model.WithBackendString(backendConfig.Backend),
 		model.WithAssetDir(appConfig.AssetsDestination),
-		model.WithThreads(uint32(backendConfig.Threads)),
+		model.WithThreads(uint32(threads)),
 		model.WithContext(appConfig.Context),
 		model.WithModel(backendConfig.Model),
-		model.WithLoadGRPCLoadModelOpts(&proto.ModelOptions{
-			CUDA:          backendConfig.CUDA || backendConfig.Diffusers.CUDA,
-			SchedulerType: backendConfig.Diffusers.SchedulerType,
-			PipelineType:  backendConfig.Diffusers.PipelineType,
-			CFGScale:      backendConfig.Diffusers.CFGScale,
-			LoraAdapter:   backendConfig.LoraAdapter,
-			LoraScale:     backendConfig.LoraScale,
-			LoraBase:      backendConfig.LoraBase,
-			IMG2IMG:       backendConfig.Diffusers.IMG2IMG,
-			CLIPModel:     backendConfig.Diffusers.ClipModel,
-			CLIPSubfolder: backendConfig.Diffusers.ClipSubFolder,
-			CLIPSkip:      int32(backendConfig.Diffusers.ClipSkip),
-			ControlNet:    backendConfig.Diffusers.ControlNet,
-		}),
+		model.WithLoadGRPCLoadModelOpts(gRPCOpts),
 	})

 	inferenceModel, err := loader.BackendLoader(