mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
Bump oneapi-basekit, optimum and openvino (#2139)
* Bump oneapi-basekit, optimum and openvino * Changed PERFORMANCE HINT to CUMULATIVE_THROUGHPUT Minor latency change for first token but about 10-15% speedup on token generation.
This commit is contained in:
parent
006306b183
commit
c9451cb604
5 changed files with 15 additions and 14 deletions
|
@ -150,7 +150,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||
self.model = OVModelForCausalLM.from_pretrained(model_name,
|
||||
compile=True,
|
||||
trust_remote_code=request.TrustRemoteCode,
|
||||
ov_config={"PERFORMANCE_HINT": "LATENCY"},
|
||||
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
|
||||
device=device_map)
|
||||
self.OV = True
|
||||
else:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue