mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
feat(intel): add diffusers/transformers support (#1746)
* feat(intel): add diffusers support * try to consume upstream container image * Debug * Manually install deps * Map transformers/hf cache dir to modelpath if not specified * fix(compel): update initialization, pass by all gRPC options * fix: add dependencies, implement transformers for xpu * base it from the oneapi image * Add pillow * set threads if specified when launching the API * Skip conda install if intel * defaults to non-intel * ci: add to pipelines * prepare compel only if enabled * Skip conda install if intel * fix cleanup * Disable compel by default * Install torch 2.1.0 with Intel * Skip conda on some setups * Detect python * Quiet output * Do not override system python with conda * Prefer python3 * Fixups * exllama2: do not install without conda (overrides pytorch version) * exllama/exllama2: do not install if not using cuda * Add missing dataset dependency * Small fixups, symlink to python, add requirements * Add neural_speed to the deps * correctly handle model offloading * fix: device_map == xpu * go back at calling python, fixed at dockerfile level * Exllama2 restricted to only nvidia gpus * Tokenizer to xpu
This commit is contained in:
parent
ad6fd7a991
commit
5d1018495f
23 changed files with 250 additions and 81 deletions
|
@ -8,6 +8,13 @@ ifeq ($(BUILD_TYPE), hipblas)
|
|||
CONDA_ENV_PATH = "transformers-rocm.yml"
|
||||
endif
|
||||
|
||||
# Intel GPU are supposed to have dependencies installed in the main python
|
||||
# environment, so we skip conda installation for SYCL builds.
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
export SKIP_CONDA=1
|
||||
endif
|
||||
|
||||
.PHONY: transformers
|
||||
transformers:
|
||||
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||
|
|
|
@ -1,24 +1,38 @@
|
|||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
SKIP_CONDA=${SKIP_CONDA:-0}
|
||||
|
||||
# Check if environment exist
|
||||
conda_env_exists(){
|
||||
! conda list --name "${@}" >/dev/null 2>/dev/null
|
||||
}
|
||||
|
||||
if conda_env_exists "transformers" ; then
|
||||
echo "Creating virtual environment..."
|
||||
conda env create --name transformers --file $1
|
||||
echo "Virtual environment created."
|
||||
else
|
||||
echo "Virtual environment already exists."
|
||||
if [ $SKIP_CONDA -eq 1 ]; then
|
||||
echo "Skipping conda environment installation"
|
||||
else
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
if conda_env_exists "transformers" ; then
|
||||
echo "Creating virtual environment..."
|
||||
conda env create --name transformers --file $1
|
||||
echo "Virtual environment created."
|
||||
else
|
||||
echo "Virtual environment already exists."
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -d "/opt/intel" ]; then
|
||||
# Intel GPU: If the directory exists, we assume we are using the intel image
|
||||
# (no conda env)
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed
|
||||
fi
|
||||
|
||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
if [ $SKIP_CONDA -eq 0 ]; then
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
fi
|
||||
|
||||
pip cache purge
|
||||
fi
|
|
@ -4,6 +4,13 @@ ifeq ($(BUILD_TYPE), hipblas)
|
|||
export CONDA_ENV_PATH = "diffusers-rocm.yml"
|
||||
endif
|
||||
|
||||
# Intel GPU are supposed to have dependencies installed in the main python
|
||||
# environment, so we skip conda installation for SYCL builds.
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
export SKIP_CONDA=1
|
||||
endif
|
||||
|
||||
.PHONY: diffusers
|
||||
diffusers:
|
||||
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||
|
|
|
@ -21,14 +21,15 @@ from diffusers import StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipelin
|
|||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||
from diffusers.utils import load_image,export_to_video
|
||||
from compel import Compel
|
||||
from compel import Compel, ReturnedEmbeddingsType
|
||||
|
||||
from transformers import CLIPTextModel
|
||||
from safetensors.torch import load_file
|
||||
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
COMPEL=os.environ.get("COMPEL", "1") == "1"
|
||||
COMPEL=os.environ.get("COMPEL", "0") == "1"
|
||||
XPU=os.environ.get("XPU", "0") == "1"
|
||||
CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
|
||||
SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1"
|
||||
CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8")
|
||||
|
@ -36,6 +37,10 @@ FPS=os.environ.get("FPS", "7")
|
|||
DISABLE_CPU_OFFLOAD=os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
|
||||
FRAMES=os.environ.get("FRAMES", "64")
|
||||
|
||||
if XPU:
|
||||
import intel_extension_for_pytorch as ipex
|
||||
print(ipex.xpu.get_device_name(0))
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
|
@ -231,8 +236,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||
if request.SchedulerType != "":
|
||||
self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config)
|
||||
|
||||
if not self.img2vid:
|
||||
self.compel = Compel(tokenizer=self.pipe.tokenizer, text_encoder=self.pipe.text_encoder)
|
||||
if COMPEL:
|
||||
self.compel = Compel(
|
||||
tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ],
|
||||
text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
|
||||
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
|
||||
requires_pooled=[False, True]
|
||||
)
|
||||
|
||||
|
||||
if request.ControlNet:
|
||||
|
@ -247,6 +257,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||
self.pipe.to('cuda')
|
||||
if self.controlnet:
|
||||
self.controlnet.to('cuda')
|
||||
if XPU:
|
||||
self.pipe = self.pipe.to("xpu")
|
||||
# Assume directory from request.ModelFile.
|
||||
# Only if request.LoraAdapter it's not an absolute path
|
||||
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
|
||||
|
@ -386,8 +398,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||
|
||||
image = {}
|
||||
if COMPEL:
|
||||
conditioning = self.compel.build_conditioning_tensor(prompt)
|
||||
kwargs["prompt_embeds"]= conditioning
|
||||
conditioning, pooled = self.compel.build_conditioning_tensor(prompt)
|
||||
kwargs["prompt_embeds"] = conditioning
|
||||
kwargs["pooled_prompt_embeds"] = pooled
|
||||
# pass the kwargs dictionary to the self.pipe method
|
||||
image = self.pipe(
|
||||
guidance_scale=self.cfg_scale,
|
||||
|
|
|
@ -1,24 +1,50 @@
|
|||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
SKIP_CONDA=${SKIP_CONDA:-0}
|
||||
|
||||
# Check if environment exist
|
||||
conda_env_exists(){
|
||||
! conda list --name "${@}" >/dev/null 2>/dev/null
|
||||
}
|
||||
|
||||
if conda_env_exists "diffusers" ; then
|
||||
echo "Creating virtual environment..."
|
||||
conda env create --name diffusers --file $1
|
||||
echo "Virtual environment created."
|
||||
else
|
||||
echo "Virtual environment already exists."
|
||||
if [ $SKIP_CONDA -eq 1 ]; then
|
||||
echo "Skipping conda environment installation"
|
||||
else
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
if conda_env_exists "diffusers" ; then
|
||||
echo "Creating virtual environment..."
|
||||
conda env create --name diffusers --file $1
|
||||
echo "Virtual environment created."
|
||||
else
|
||||
echo "Virtual environment already exists."
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -d "/opt/intel" ]; then
|
||||
# Intel GPU: If the directory exists, we assume we are using the Intel image
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
pip install torch==2.1.0a0 \
|
||||
torchvision==0.16.0a0 \
|
||||
torchaudio==2.1.0a0 \
|
||||
intel-extension-for-pytorch==2.1.10+xpu \
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
|
||||
pip install google-api-python-client \
|
||||
grpcio \
|
||||
grpcio-tools \
|
||||
diffusers==0.24.0 \
|
||||
transformers>=4.25.1 \
|
||||
accelerate \
|
||||
compel==2.0.2 \
|
||||
Pillow
|
||||
fi
|
||||
|
||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate diffusers
|
||||
if [ $SKIP_CONDA -ne 1 ]; then
|
||||
# Activate conda environment
|
||||
source activate diffusers
|
||||
fi
|
||||
|
||||
pip cache purge
|
||||
fi
|
|
@ -3,10 +3,15 @@
|
|||
##
|
||||
## A bash script wrapper that runs the diffusers server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate diffusers
|
||||
if [ -d "/opt/intel" ]; then
|
||||
# Assumes we are using the Intel oneAPI container image
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
export XPU=1
|
||||
else
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
# Activate conda environment
|
||||
source activate diffusers
|
||||
fi
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
|
|
@ -3,6 +3,11 @@ set -ex
|
|||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
if [ "$BUILD_TYPE" != "cublas" ]; then
|
||||
echo "[exllama] Attention!!! Nvidia GPU is required - skipping installation"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check if environment exist
|
||||
conda_env_exists(){
|
||||
! conda list --name "${@}" >/dev/null 2>/dev/null
|
||||
|
|
|
@ -2,10 +2,14 @@
|
|||
set -e
|
||||
##
|
||||
## A bash script installs the required dependencies of VALL-E-X and prepares the environment
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f
|
||||
|
||||
# Activate conda environment
|
||||
if [ "$BUILD_TYPE" != "cublas" ]; then
|
||||
echo "[exllamav2] Attention!!! Nvidia GPU is required - skipping installation"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
source activate transformers
|
||||
|
||||
echo $CONDA_PREFIX
|
||||
|
|
|
@ -2,13 +2,14 @@
|
|||
set -e
|
||||
##
|
||||
## A bash script installs the required dependencies of VALL-E-X and prepares the environment
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
if [ "$BUILD_TYPE" != "cublas" ]; then
|
||||
echo "[mamba] Attention!!! nvcc is required - skipping installation"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
.PHONY: petals
|
||||
petals:
|
||||
@echo "Creating virtual environment..."
|
||||
@conda env create --name petals --file petals.yml
|
||||
bash install.sh "petals.yml"
|
||||
@echo "Virtual environment created."
|
||||
|
||||
.PHONY: run
|
||||
|
|
5
backend/python/petals/install.sh
Normal file
5
backend/python/petals/install.sh
Normal file
|
@ -0,0 +1,5 @@
|
|||
#!/bin/bash
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
conda env create --name petals --file $1
|
|
@ -3,10 +3,16 @@
|
|||
##
|
||||
## A bash script wrapper that runs the transformers server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
if [ -d "/opt/intel" ]; then
|
||||
# Assumes we are using the Intel oneAPI container image
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
export XPU=1
|
||||
else
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
fi
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
|
|
@ -16,7 +16,15 @@ import backend_pb2_grpc
|
|||
import grpc
|
||||
import torch
|
||||
import torch.cuda
|
||||
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed
|
||||
|
||||
XPU=os.environ.get("XPU", "0") == "1"
|
||||
if XPU:
|
||||
import intel_extension_for_pytorch as ipex
|
||||
from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM
|
||||
from transformers import AutoTokenizer, AutoModel, set_seed
|
||||
else:
|
||||
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed
|
||||
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
|
@ -69,12 +77,25 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||
model_name = request.Model
|
||||
try:
|
||||
if request.Type == "AutoModelForCausalLM":
|
||||
self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode)
|
||||
if XPU:
|
||||
self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode,
|
||||
device_map="xpu", load_in_4bit=True)
|
||||
else:
|
||||
self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode)
|
||||
else:
|
||||
self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode)
|
||||
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
self.CUDA = False
|
||||
self.XPU = False
|
||||
|
||||
if XPU:
|
||||
self.XPU = True
|
||||
try:
|
||||
print("Optimizing model", model_name, "to XPU.", file=sys.stderr)
|
||||
self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu")
|
||||
except Exception as err:
|
||||
print("Not using XPU:", err, file=sys.stderr)
|
||||
|
||||
if request.CUDA or torch.cuda.is_available():
|
||||
try:
|
||||
|
@ -139,6 +160,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||
inputs = self.tokenizer(request.Prompt, return_tensors="pt").input_ids
|
||||
if self.CUDA:
|
||||
inputs = inputs.to("cuda")
|
||||
if XPU:
|
||||
inputs = inputs.to("xpu")
|
||||
|
||||
outputs = self.model.generate(inputs,max_new_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP)
|
||||
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
export SKIP_CONDA=1
|
||||
endif
|
||||
|
||||
.PHONY: ttsvalle
|
||||
ttsvalle:
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
|
|
|
@ -2,13 +2,16 @@
|
|||
|
||||
##
|
||||
## A bash script installs the required dependencies of VALL-E-X and prepares the environment
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
export SHA=3faaf8ccadb154d63b38070caf518ce9309ea0f4
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
SKIP_CONDA=${SKIP_CONDA:-0}
|
||||
|
||||
echo $CONDA_PREFIX
|
||||
if [ $SKIP_CONDA -ne 1 ]; then
|
||||
source activate transformers
|
||||
else
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
CONDA_PREFIX=$PWD
|
||||
fi
|
||||
|
||||
git clone https://github.com/Plachtaa/VALL-E-X.git $CONDA_PREFIX/vall-e-x && pushd $CONDA_PREFIX/vall-e-x && git checkout -b build $SHA && popd
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue