disable streaming

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-05-16 18:27:28 +02:00
parent 6b38c32a65
commit ef96c4f859

View file

@ -4088,6 +4088,7 @@ public:
json data = parse_options(true, request); json data = parse_options(true, request);
std::cout << "[DEBUG] Parsed request options" << std::endl; std::cout << "[DEBUG] Parsed request options" << std::endl;
data["stream"] = false;
//Raise error if embeddings is set to true //Raise error if embeddings is set to true
if (ctx_server.params_base.embedding) { if (ctx_server.params_base.embedding) {
std::cout << "[DEBUG] Error: Embedding mode not supported in streaming" << std::endl; std::cout << "[DEBUG] Error: Embedding mode not supported in streaming" << std::endl;
@ -4241,6 +4242,8 @@ public:
json body = parse_options(false, request); json body = parse_options(false, request);
body["stream"] = false;
if (llama_pooling_type(ctx_server.ctx) == LLAMA_POOLING_TYPE_NONE) { if (llama_pooling_type(ctx_server.ctx) == LLAMA_POOLING_TYPE_NONE) {
return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "Pooling type 'none' is not OAI compatible. Please use a different pooling type"); return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "Pooling type 'none' is not OAI compatible. Please use a different pooling type");
} }
@ -4311,6 +4314,7 @@ public:
grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response) { grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response) {
json body = parse_options(false, request); json body = parse_options(false, request);
body["stream"] = false;
json tokens_response = json::array(); json tokens_response = json::array();
if (body.count("prompt") != 0) { if (body.count("prompt") != 0) {