mirror of
https://github.com/mudler/LocalAI.git
synced 2025-06-29 22:20:43 +00:00
Some adaptations for the MusicgenForConditionalGeneration type
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
70376131cb
commit
f6e309dd4d
2 changed files with 14 additions and 13 deletions
|
@ -206,19 +206,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
torch_dtype=compute)
|
torch_dtype=compute)
|
||||||
if request.ContextSize > 0:
|
if request.ContextSize > 0:
|
||||||
self.max_tokens = request.ContextSize
|
self.max_tokens = request.ContextSize
|
||||||
else:
|
elif request.Type != "MusicgenForConditionalGeneration":
|
||||||
self.max_tokens = self.model.config.max_position_embeddings
|
self.max_tokens = self.model.config.max_position_embeddings
|
||||||
|
else:
|
||||||
|
self.max_tokens = 512
|
||||||
|
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
|
if request.Type != "MusicgenForConditionalGeneration":
|
||||||
self.XPU = False
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
|
||||||
|
self.XPU = False
|
||||||
|
|
||||||
if XPU and self.OV == False:
|
if XPU and self.OV == False:
|
||||||
self.XPU = True
|
self.XPU = True
|
||||||
try:
|
try:
|
||||||
print("Optimizing model", model_name, "to XPU.", file=sys.stderr)
|
print("Optimizing model", model_name, "to XPU.", file=sys.stderr)
|
||||||
self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu")
|
self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu")
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
print("Not using XPU:", err, file=sys.stderr)
|
print("Not using XPU:", err, file=sys.stderr)
|
||||||
|
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
print("Error:", err, file=sys.stderr)
|
print("Error:", err, file=sys.stderr)
|
||||||
|
|
|
@ -19,6 +19,7 @@ class TestBackendServicer(unittest.TestCase):
|
||||||
This method sets up the gRPC service by starting the server
|
This method sets up the gRPC service by starting the server
|
||||||
"""
|
"""
|
||||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
def tearDown(self) -> None:
|
||||||
"""
|
"""
|
||||||
|
@ -31,7 +32,6 @@ class TestBackendServicer(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
This method tests if the server starts up successfully
|
This method tests if the server starts up successfully
|
||||||
"""
|
"""
|
||||||
time.sleep(10)
|
|
||||||
try:
|
try:
|
||||||
self.setUp()
|
self.setUp()
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
@ -48,7 +48,6 @@ class TestBackendServicer(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
This method tests if the model is loaded successfully
|
This method tests if the model is loaded successfully
|
||||||
"""
|
"""
|
||||||
time.sleep(10)
|
|
||||||
try:
|
try:
|
||||||
self.setUp()
|
self.setUp()
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
@ -66,7 +65,6 @@ class TestBackendServicer(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
This method tests if the embeddings are generated successfully
|
This method tests if the embeddings are generated successfully
|
||||||
"""
|
"""
|
||||||
time.sleep(10)
|
|
||||||
try:
|
try:
|
||||||
self.setUp()
|
self.setUp()
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue