mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
Bump oneapi-basekit, optimum and openvino (#2139)
* Bump oneapi-basekit, optimum and openvino * Changed PERFORMANCE HINT to CUMULATIVE_THROUGHPUT Minor latency change for first token but about 10-15% speedup on token generation.
This commit is contained in:
parent
006306b183
commit
c9451cb604
5 changed files with 15 additions and 14 deletions
|
@ -60,9 +60,10 @@ dependencies:
|
|||
- networkx
|
||||
- numpy==1.26.0
|
||||
- onnx==1.15.0
|
||||
- openvino==2024.0.0
|
||||
- openvino-telemetry==2023.2.1
|
||||
- optimum[openvino]==1.17.1
|
||||
- openvino==2024.1.0
|
||||
- openvino-telemetry==2024.1.0
|
||||
- optimum[openvino]==1.19.1
|
||||
- optimum-intel==1.16.1
|
||||
- packaging==23.2
|
||||
- pandas
|
||||
- peft==0.5.0
|
||||
|
|
|
@ -150,7 +150,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||
self.model = OVModelForCausalLM.from_pretrained(model_name,
|
||||
compile=True,
|
||||
trust_remote_code=request.TrustRemoteCode,
|
||||
ov_config={"PERFORMANCE_HINT": "LATENCY"},
|
||||
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
|
||||
device=device_map)
|
||||
self.OV = True
|
||||
else:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue