feat(vllm): Additional vLLM config options (Disable logging, dtype, and Per-Prompt media limits) (#4855)

* Adding the following vLLM config options: disable_log_status, dtype, limit_mm_per_prompt

Signed-off-by: TheDropZone <brandonbeiler@gmail.com>

* using " marks in the config.yaml file

Signed-off-by: TheDropZone <brandonbeiler@gmail.com>

* adding in missing colon

Signed-off-by: TheDropZone <brandonbeiler@gmail.com>

---------

Signed-off-by: TheDropZone <brandonbeiler@gmail.com>
This commit is contained in:
Brandon Beiler 2025-02-18 13:27:58 -05:00 committed by GitHub
parent 5b19af99ff
commit 6a6e1a0ea9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 64 additions and 23 deletions

View file

@ -16,6 +16,8 @@ config_file: |
use_tokenizer_template: true
# Uncomment to specify a quantization method (optional)
# quantization: "awq"
# Uncomment to set dtype, choices are: "auto", "half", "float16", "bfloat16", "float", "float32". awq on vLLM does not support bfloat16
# dtype: "float16"
# Uncomment to limit the GPU memory utilization (vLLM default is 0.9 for 90%)
# gpu_memory_utilization: 0.5
# Uncomment to trust remote code from huggingface
@ -30,3 +32,10 @@ config_file: |
# Allows you to partition and run large models. Performance gains are limited.
# https://github.com/vllm-project/vllm/issues/1435
# tensor_parallel_size: 2
# Uncomment to disable log stats
# disable_log_stats: true
# Uncomment to specify Multi-Model limits per prompt, defaults to 1 per modality if not specified
# limit_mm_per_prompt:
# image: 2
# video: 2
# audio: 2