mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 18:45:00 +00:00
feat(python-grpc): allow to set max workers with PYTHON_GRPC_MAX_WORKERS (#1081)
**Description** this allows to customize the maximum number of grpc workers for python backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
453e9c5da9
commit
bdf3f95346
8 changed files with 34 additions and 8 deletions
|
@ -19,6 +19,9 @@ from exllama.tokenizer import ExLlamaTokenizer
|
|||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
def generate(self,prompt, max_new_tokens):
|
||||
|
@ -110,7 +113,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=1))
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue