Some adaptations for the MusicgenForConditionalGeneration type

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-01-17 16:29:34 +01:00
parent 70376131cb
commit f6e309dd4d
2 changed files with 14 additions and 13 deletions

View file

@ -206,19 +206,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
torch_dtype=compute)
if request.ContextSize > 0:
self.max_tokens = request.ContextSize
else:
elif request.Type != "MusicgenForConditionalGeneration":
self.max_tokens = self.model.config.max_position_embeddings
else:
self.max_tokens = 512
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
self.XPU = False
if request.Type != "MusicgenForConditionalGeneration":
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
self.XPU = False
if XPU and self.OV == False:
self.XPU = True
try:
print("Optimizing model", model_name, "to XPU.", file=sys.stderr)
self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu")
except Exception as err:
print("Not using XPU:", err, file=sys.stderr)
if XPU and self.OV == False:
self.XPU = True
try:
print("Optimizing model", model_name, "to XPU.", file=sys.stderr)
self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu")
except Exception as err:
print("Not using XPU:", err, file=sys.stderr)
except Exception as err:
print("Error:", err, file=sys.stderr)

View file

@ -19,6 +19,7 @@ class TestBackendServicer(unittest.TestCase):
This method sets up the gRPC service by starting the server
"""
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
time.sleep(10)
def tearDown(self) -> None:
"""
@ -31,7 +32,6 @@ class TestBackendServicer(unittest.TestCase):
"""
This method tests if the server starts up successfully
"""
time.sleep(10)
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
@ -48,7 +48,6 @@ class TestBackendServicer(unittest.TestCase):
"""
This method tests if the model is loaded successfully
"""
time.sleep(10)
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
@ -66,7 +65,6 @@ class TestBackendServicer(unittest.TestCase):
"""
This method tests if the embeddings are generated successfully
"""
time.sleep(10)
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel: