Bump vLLM version + more options when loading models in vLLM (#1782)

* Bump vLLM version to 0.3.2

* Add vLLM model loading options

* Remove transformers-exllama

* Fix install exllama
This commit is contained in:
Ludovic Leroux 2024-03-01 16:48:53 -05:00 committed by GitHub
parent 1c312685aa
commit 939411300a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 736 additions and 641 deletions

View file

@ -71,7 +71,7 @@ dependencies:
- regex==2023.10.3
- requests==2.31.0
- rouge==1.0.1
- safetensors==0.3.3
- safetensors>=0.3.3
- six==1.16.0
- sympy==1.12
- tokenizers==0.14.0

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -81,7 +81,7 @@ dependencies:
- requests==2.31.0
- rouge==1.0.1
- s3transfer==0.7.0
- safetensors==0.3.3
- safetensors>=0.4.1
- scipy==1.11.3
- six==1.16.0
- sympy==1.12
@ -113,7 +113,7 @@ dependencies:
- sudachipy
- sudachidict_core
- vocos
- vllm==0.2.7
- transformers>=4.36.0 # Required for Mixtral.
- vllm==0.3.2
- transformers>=4.38.0 # Required for Gemma.
- xformers==0.0.23.post1
prefix: /opt/conda/envs/transformers

View file

@ -71,7 +71,7 @@ dependencies:
- requests==2.31.0
- rouge==1.0.1
- s3transfer==0.7.0
- safetensors==0.3.3
- safetensors>=0.4.1
- scipy==1.11.3
- six==1.16.0
- sympy==1.12
@ -103,7 +103,7 @@ dependencies:
- sudachipy
- sudachidict_core
- vocos
- vllm==0.2.7
- transformers>=4.36.0 # Required for Mixtral.
- vllm==0.3.2
- transformers>=4.38.0 # Required for Gemma.
- xformers==0.0.23.post1
prefix: /opt/conda/envs/transformers

View file

@ -69,7 +69,7 @@ dependencies:
- requests==2.31.0
- rouge==1.0.1
- s3transfer==0.7.0
- safetensors==0.3.3
- safetensors>=0.4.1
- scipy==1.11.3
- six==1.16.0
- sympy==1.12
@ -101,7 +101,7 @@ dependencies:
- sudachipy
- sudachidict_core
- vocos
- vllm==0.2.7
- transformers>=4.36.0 # Required for Mixtral.
- vllm==0.3.2
- transformers>=4.38.0 # Required for Gemma.
- xformers==0.0.23.post1
prefix: /opt/conda/envs/transformers

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,8 @@
export CONDA_ENV_PATH = "exllama.yml"
.PHONY: exllama
exllama:
$(MAKE) -C ../common-env/transformers
bash install.sh
bash install.sh ${CONDA_ENV_PATH}
.PHONY: run
run:

File diff suppressed because one or more lines are too long

View file

@ -1,14 +1,22 @@
#!/bin/bash
set -ex
##
## A bash script installs the required dependencies of VALL-E-X and prepares the environment
export PATH=$PATH:/opt/conda/bin
# Activate conda environment
source activate transformers
# Check if environment exist
conda_env_exists(){
! conda list --name "${@}" >/dev/null 2>/dev/null
}
echo $CONDA_PREFIX
if conda_env_exists "exllama" ; then
echo "Creating virtual environment..."
conda env create --name exllama --file $1
echo "Virtual environment created."
else
echo "Virtual environment already exists."
fi
source activate exllama
git clone https://github.com/turboderp/exllama $CONDA_PREFIX/exllama && pushd $CONDA_PREFIX/exllama && pip install -r requirements.txt && popd

View file

@ -2,11 +2,10 @@
##
## A bash script wrapper that runs the exllama server with conda
export PATH=$PATH:/opt/conda/bin
# Activate conda environment
source activate transformers
source activate exllama
# get the directory where the bash script is located
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -79,7 +79,7 @@ dependencies:
- pypinyin==0.49.0
- python-multipart==0.0.6
- regex==2023.10.3
- safetensors==0.4.0
- safetensors>=0.4.0
- semantic-version==2.10.0
- soundfile==0.12.1
- starlette==0.27.0

File diff suppressed because one or more lines are too long

View file

@ -88,6 +88,16 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.Quantization != "":
engine_args.quantization = request.Quantization
if request.GPUMemoryUtilization != 0:
engine_args.gpu_memory_utilization = request.GPUMemoryUtilization
if request.TrustRemoteCode:
engine_args.trust_remote_code = request.TrustRemoteCode
if request.EnforceEager:
engine_args.enforce_eager = request.EnforceEager
if request.SwapSpace != 0:
engine_args.swap_space = request.SwapSpace
if request.MaxModelLen != 0:
engine_args.max_model_len = request.MaxModelLen
try:
self.llm = AsyncLLMEngine.from_engine_args(engine_args)