feat: use tokenizer.apply_chat_template() in vLLM (#1990)

Use tokenizer.apply_chat_template() in vLLM Signed-off-by: Ludovic LEROUX <ludovic@inpher.io>
2025-05-27 22:15:00 +00:00 · 2024-04-11 13:20:22 -04:00 · 2024-04-11 13:20:22 -04:00 · 12c0d9443e
commit 12c0d9443e
parent cbda06fb96
34 changed files with 3088 additions and 989 deletions
--- a/core/http/endpoints/openai/inference.go
+++ b/core/http/endpoints/openai/inference.go
@ -29,7 +29,7 @@ func ComputeChoices(
 	}

 	// get the model function to call for the result
-	predFunc, err := backend.ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback)
+	predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback)
 	if err != nil {
 		return result, backend.TokenUsage{}, err
 	}