mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
Bump vLLM version + more options when loading models in vLLM (#1782)
* Bump vLLM version to 0.3.2 * Add vLLM model loading options * Remove transformers-exllama * Fix install exllama
This commit is contained in:
parent
1c312685aa
commit
939411300a
28 changed files with 736 additions and 641 deletions
|
@ -71,7 +71,7 @@ dependencies:
|
|||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- safetensors==0.3.3
|
||||
- safetensors>=0.3.3
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers==0.14.0
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -81,7 +81,7 @@ dependencies:
|
|||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- s3transfer==0.7.0
|
||||
- safetensors==0.3.3
|
||||
- safetensors>=0.4.1
|
||||
- scipy==1.11.3
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
|
@ -113,7 +113,7 @@ dependencies:
|
|||
- sudachipy
|
||||
- sudachidict_core
|
||||
- vocos
|
||||
- vllm==0.2.7
|
||||
- transformers>=4.36.0 # Required for Mixtral.
|
||||
- vllm==0.3.2
|
||||
- transformers>=4.38.0 # Required for Gemma.
|
||||
- xformers==0.0.23.post1
|
||||
prefix: /opt/conda/envs/transformers
|
||||
|
|
|
@ -71,7 +71,7 @@ dependencies:
|
|||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- s3transfer==0.7.0
|
||||
- safetensors==0.3.3
|
||||
- safetensors>=0.4.1
|
||||
- scipy==1.11.3
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
|
@ -103,7 +103,7 @@ dependencies:
|
|||
- sudachipy
|
||||
- sudachidict_core
|
||||
- vocos
|
||||
- vllm==0.2.7
|
||||
- transformers>=4.36.0 # Required for Mixtral.
|
||||
- vllm==0.3.2
|
||||
- transformers>=4.38.0 # Required for Gemma.
|
||||
- xformers==0.0.23.post1
|
||||
prefix: /opt/conda/envs/transformers
|
||||
|
|
|
@ -69,7 +69,7 @@ dependencies:
|
|||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- s3transfer==0.7.0
|
||||
- safetensors==0.3.3
|
||||
- safetensors>=0.4.1
|
||||
- scipy==1.11.3
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
|
@ -101,7 +101,7 @@ dependencies:
|
|||
- sudachipy
|
||||
- sudachidict_core
|
||||
- vocos
|
||||
- vllm==0.2.7
|
||||
- transformers>=4.36.0 # Required for Mixtral.
|
||||
- vllm==0.3.2
|
||||
- transformers>=4.38.0 # Required for Gemma.
|
||||
- xformers==0.0.23.post1
|
||||
prefix: /opt/conda/envs/transformers
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,8 @@
|
|||
export CONDA_ENV_PATH = "exllama.yml"
|
||||
|
||||
.PHONY: exllama
|
||||
exllama:
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
bash install.sh
|
||||
bash install.sh ${CONDA_ENV_PATH}
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,14 +1,22 @@
|
|||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
##
|
||||
## A bash script installs the required dependencies of VALL-E-X and prepares the environment
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
# Check if environment exist
|
||||
conda_env_exists(){
|
||||
! conda list --name "${@}" >/dev/null 2>/dev/null
|
||||
}
|
||||
|
||||
echo $CONDA_PREFIX
|
||||
if conda_env_exists "exllama" ; then
|
||||
echo "Creating virtual environment..."
|
||||
conda env create --name exllama --file $1
|
||||
echo "Virtual environment created."
|
||||
else
|
||||
echo "Virtual environment already exists."
|
||||
fi
|
||||
|
||||
source activate exllama
|
||||
|
||||
git clone https://github.com/turboderp/exllama $CONDA_PREFIX/exllama && pushd $CONDA_PREFIX/exllama && pip install -r requirements.txt && popd
|
||||
|
||||
|
|
|
@ -2,11 +2,10 @@
|
|||
|
||||
##
|
||||
## A bash script wrapper that runs the exllama server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
source activate exllama
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -79,7 +79,7 @@ dependencies:
|
|||
- pypinyin==0.49.0
|
||||
- python-multipart==0.0.6
|
||||
- regex==2023.10.3
|
||||
- safetensors==0.4.0
|
||||
- safetensors>=0.4.0
|
||||
- semantic-version==2.10.0
|
||||
- soundfile==0.12.1
|
||||
- starlette==0.27.0
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -88,6 +88,16 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||
|
||||
if request.Quantization != "":
|
||||
engine_args.quantization = request.Quantization
|
||||
if request.GPUMemoryUtilization != 0:
|
||||
engine_args.gpu_memory_utilization = request.GPUMemoryUtilization
|
||||
if request.TrustRemoteCode:
|
||||
engine_args.trust_remote_code = request.TrustRemoteCode
|
||||
if request.EnforceEager:
|
||||
engine_args.enforce_eager = request.EnforceEager
|
||||
if request.SwapSpace != 0:
|
||||
engine_args.swap_space = request.SwapSpace
|
||||
if request.MaxModelLen != 0:
|
||||
engine_args.max_model_len = request.MaxModelLen
|
||||
|
||||
try:
|
||||
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue