mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-23 20:14:59 +00:00
feat(vllm): Allow to set quantization (#1094)
This particularly useful to set AWQ **Description** Follow up of #1015 **Notes for Reviewers** **[Signed commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)** - [ ] Yes, I signed my commits. <!-- Thank you for contributing to LocalAI! Contributing Conventions: 1. Include descriptive PR titles with [<component-name>] prepended. 2. Build and test your changes before submitting a PR. 3. Sign your commits By following the community's contribution conventions upfront, the review process will be accelerated and your PR merged more quickly. --> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
048b81373d
commit
a28ab18987
13 changed files with 357 additions and 332 deletions
|
@ -45,8 +45,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
def LoadModel(self, request, context):
|
||||
try:
|
||||
# https://github.com/vllm-project/vllm/blob/main/examples/offline_inference.py
|
||||
self.llm = LLM(model=request.Model)
|
||||
if request.Quantization != "":
|
||||
self.llm = LLM(model=request.Model, quantization=request.Quantization)
|
||||
else:
|
||||
self.llm = LLM(model=request.Model)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue