feat: Token Stream support for Transformer, fix: missing package for OpenVINO (#1908)

* Streaming working * Small fix for regression on CUDA and XPU * use pip version of optimum[openvino] * Update backend/python/transformers/transformers_server.py Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> * Token streaming support fix optimum[openvino] package in install.sh * Token Streaming support --------- Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-05-20 18:45:00 +00:00 · 2024-03-27 17:50:35 +01:00 · 2024-03-27 17:50:35 +01:00 · 8210ffcb6c
commit 8210ffcb6c
parent e7cbe32601
2 changed files with 72 additions and 48 deletions
--- a/backend/python/common-env/transformers/install.sh
+++ b/backend/python/common-env/transformers/install.sh
@ -25,7 +25,7 @@ if [ -d "/opt/intel" ]; then
    # Intel GPU: If the directory exists, we assume we are using the intel image
    # (no conda env)
    # https://github.com/intel/intel-extension-for-pytorch/issues/538
-    pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed
+    pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino]
 fi

 if [ "$PIP_CACHE_PURGE" = true ] ; then