fix regression #1971 (#1972)

fixes regression #1971 introduced by intel_extension_for_transformers==1.4
This commit is contained in:
fakezeta 2024-04-08 22:33:51 +02:00 committed by GitHub
parent efcca15d3f
commit a38618db02
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -22,11 +22,7 @@ import torch.cuda
XPU=os.environ.get("XPU", "0") == "1" XPU=os.environ.get("XPU", "0") == "1"
if XPU: if XPU:
import intel_extension_for_pytorch as ipex
from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer
from optimum.intel.openvino import OVModelForCausalLM
from openvino.runtime import Core
else: else:
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer
@ -115,6 +111,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
try: try:
if request.Type == "AutoModelForCausalLM": if request.Type == "AutoModelForCausalLM":
if XPU: if XPU:
import intel_extension_for_pytorch as ipex
from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM
device_map="xpu" device_map="xpu"
compute=torch.float16 compute=torch.float16
if request.Quantization == "xpu_4bit": if request.Quantization == "xpu_4bit":
@ -141,6 +140,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
device_map=device_map, device_map=device_map,
torch_dtype=compute) torch_dtype=compute)
elif request.Type == "OVModelForCausalLM": elif request.Type == "OVModelForCausalLM":
from optimum.intel.openvino import OVModelForCausalLM
from openvino.runtime import Core
if "GPU" in Core().available_devices: if "GPU" in Core().available_devices:
device_map="GPU" device_map="GPU"
else: else: