feat(intel): add diffusers/transformers support (#1746)

* feat(intel): add diffusers support * try to consume upstream container image * Debug * Manually install deps * Map transformers/hf cache dir to modelpath if not specified * fix(compel): update initialization, pass by all gRPC options * fix: add dependencies, implement transformers for xpu * base it from the oneapi image * Add pillow * set threads if specified when launching the API * Skip conda install if intel * defaults to non-intel * ci: add to pipelines * prepare compel only if enabled * Skip conda install if intel * fix cleanup * Disable compel by default * Install torch 2.1.0 with Intel * Skip conda on some setups * Detect python * Quiet output * Do not override system python with conda * Prefer python3 * Fixups * exllama2: do not install without conda (overrides pytorch version) * exllama/exllama2: do not install if not using cuda * Add missing dataset dependency * Small fixups, symlink to python, add requirements * Add neural_speed to the deps * correctly handle model offloading * fix: device_map == xpu * go back at calling python, fixed at dockerfile level * Exllama2 restricted to only nvidia gpus * Tokenizer to xpu
2025-05-25 13:04:59 +00:00 · 2024-03-07 14:37:45 +01:00 · 2024-03-07 14:37:45 +01:00 · 5d1018495f
commit 5d1018495f
parent ad6fd7a991
23 changed files with 250 additions and 81 deletions
--- a/core/backend/options.go
+++ b/core/backend/options.go
@ -40,11 +40,23 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
 	}

 	return &pb.ModelOptions{
+		CUDA:                 c.CUDA || c.Diffusers.CUDA,
+		SchedulerType:        c.Diffusers.SchedulerType,
+		PipelineType:         c.Diffusers.PipelineType,
+		CFGScale:             c.Diffusers.CFGScale,
+		LoraAdapter:          c.LoraAdapter,
+		LoraScale:            c.LoraScale,
+		F16Memory:            c.F16,
+		LoraBase:             c.LoraBase,
+		IMG2IMG:              c.Diffusers.IMG2IMG,
+		CLIPModel:            c.Diffusers.ClipModel,
+		CLIPSubfolder:        c.Diffusers.ClipSubFolder,
+		CLIPSkip:             int32(c.Diffusers.ClipSkip),
+		ControlNet:           c.Diffusers.ControlNet,
 		ContextSize:          int32(c.ContextSize),
 		Seed:                 int32(c.Seed),
 		NBatch:               int32(b),
 		NoMulMatQ:            c.NoMulMatQ,
-		CUDA:                 c.CUDA, // diffusers, transformers
 		DraftModel:           c.DraftModel,
 		AudioPath:            c.VallE.AudioPath,
 		Quantization:         c.Quantization,
@ -58,12 +70,8 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		YarnAttnFactor:       c.YarnAttnFactor,
 		YarnBetaFast:         c.YarnBetaFast,
 		YarnBetaSlow:         c.YarnBetaSlow,
-		LoraAdapter:          c.LoraAdapter,
-		LoraBase:             c.LoraBase,
-		LoraScale:            c.LoraScale,
 		NGQA:                 c.NGQA,
 		RMSNormEps:           c.RMSNormEps,
-		F16Memory:            c.F16,
 		MLock:                c.MMlock,
 		RopeFreqBase:         c.RopeFreqBase,
 		RopeScaling:          c.RopeScaling,