feat(python-grpc): allow to set max workers with PYTHON_GRPC_MAX_WORKERS (#1081)

**Description**

this allows to customize the maximum number of grpc workers for python
backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2023-09-19 21:30:39 +02:00 committed by GitHub
parent 453e9c5da9
commit bdf3f95346
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 34 additions and 8 deletions

View file

@ -14,6 +14,9 @@ from vllm import LLM, SamplingParams
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
def generate(self,prompt, max_new_tokens):
@ -70,7 +73,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
return self.Predict(request, context)
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=1))
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()