Bump oneapi-basekit, optimum and openvino (#2139)

* Bump oneapi-basekit, optimum and openvino

* Changed PERFORMANCE HINT to CUMULATIVE_THROUGHPUT

Minor latency change for first token but about 10-15% speedup on token generation.
This commit is contained in:
fakezeta 2024-04-26 16:20:43 +02:00 committed by GitHub
parent 006306b183
commit c9451cb604
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 15 additions and 14 deletions

View file

@ -150,7 +150,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.model = OVModelForCausalLM.from_pretrained(model_name,
compile=True,
trust_remote_code=request.TrustRemoteCode,
ov_config={"PERFORMANCE_HINT": "LATENCY"},
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
device=device_map)
self.OV = True
else: