Some adaptations for the MusicgenForConditionalGeneration type

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-01-17 16:29:34 +01:00
parent 70376131cb
commit f6e309dd4d
2 changed files with 14 additions and 13 deletions

View file

@ -206,19 +206,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
torch_dtype=compute) torch_dtype=compute)
if request.ContextSize > 0: if request.ContextSize > 0:
self.max_tokens = request.ContextSize self.max_tokens = request.ContextSize
else: elif request.Type != "MusicgenForConditionalGeneration":
self.max_tokens = self.model.config.max_position_embeddings self.max_tokens = self.model.config.max_position_embeddings
else:
self.max_tokens = 512
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True) if request.Type != "MusicgenForConditionalGeneration":
self.XPU = False self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
self.XPU = False
if XPU and self.OV == False: if XPU and self.OV == False:
self.XPU = True self.XPU = True
try: try:
print("Optimizing model", model_name, "to XPU.", file=sys.stderr) print("Optimizing model", model_name, "to XPU.", file=sys.stderr)
self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu") self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu")
except Exception as err: except Exception as err:
print("Not using XPU:", err, file=sys.stderr) print("Not using XPU:", err, file=sys.stderr)
except Exception as err: except Exception as err:
print("Error:", err, file=sys.stderr) print("Error:", err, file=sys.stderr)

View file

@ -19,6 +19,7 @@ class TestBackendServicer(unittest.TestCase):
This method sets up the gRPC service by starting the server This method sets up the gRPC service by starting the server
""" """
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
time.sleep(10)
def tearDown(self) -> None: def tearDown(self) -> None:
""" """
@ -31,7 +32,6 @@ class TestBackendServicer(unittest.TestCase):
""" """
This method tests if the server starts up successfully This method tests if the server starts up successfully
""" """
time.sleep(10)
try: try:
self.setUp() self.setUp()
with grpc.insecure_channel("localhost:50051") as channel: with grpc.insecure_channel("localhost:50051") as channel:
@ -48,7 +48,6 @@ class TestBackendServicer(unittest.TestCase):
""" """
This method tests if the model is loaded successfully This method tests if the model is loaded successfully
""" """
time.sleep(10)
try: try:
self.setUp() self.setUp()
with grpc.insecure_channel("localhost:50051") as channel: with grpc.insecure_channel("localhost:50051") as channel:
@ -66,7 +65,6 @@ class TestBackendServicer(unittest.TestCase):
""" """
This method tests if the embeddings are generated successfully This method tests if the embeddings are generated successfully
""" """
time.sleep(10)
try: try:
self.setUp() self.setUp()
with grpc.insecure_channel("localhost:50051") as channel: with grpc.insecure_channel("localhost:50051") as channel: