Adding the following vLLM config options: disable_log_status, dtype, limit_mm_per_prompt

Signed-off-by: TheDropZone <brandonbeiler@gmail.com>
This commit is contained in:
TheDropZone 2025-02-17 16:15:55 -05:00
parent 6424f0666d
commit f0f2c87553
No known key found for this signature in database
5 changed files with 64 additions and 23 deletions

View file

@ -159,6 +159,12 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
SwapSpace: int32(c.SwapSpace),
MaxModelLen: int32(c.MaxModelLen),
TensorParallelSize: int32(c.TensorParallelSize),
DisableLogStatus c.DisableLogStatus,
DType: c.DType,
// LimitMMPerPrompt vLLM
LimitImagePerPrompt: int32(c.LimitMMPerPrompt.LimitImagePerPrompt),
LimitVideoPerPrompt: int32(c.LimitMMPerPrompt.LimitVideoPerPrompt),
LimitAudioPerPrompt: int32(c.LimitMMPerPrompt.LimitAudioPerPrompt),
MMProj: c.MMProj,
FlashAttention: c.FlashAttention,
CacheTypeKey: c.CacheTypeK,