mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 18:45:00 +00:00
feat(vllm): add support for embeddings (#3440)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
56db715a91
commit
68fc014c6d
2 changed files with 43 additions and 0 deletions
|
@ -135,6 +135,26 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
res = await gen.__anext__()
|
res = await gen.__anext__()
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
def Embedding(self, request, context):
|
||||||
|
"""
|
||||||
|
A gRPC method that calculates embeddings for a given sentence.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: An EmbeddingRequest object that contains the request parameters.
|
||||||
|
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An EmbeddingResult object that contains the calculated embeddings.
|
||||||
|
"""
|
||||||
|
print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
|
||||||
|
outputs = self.model.encode(request.Embeddings)
|
||||||
|
# Check if we have one result at least
|
||||||
|
if len(outputs) == 0:
|
||||||
|
context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
|
||||||
|
context.set_details("No embeddings were calculated.")
|
||||||
|
return backend_pb2.EmbeddingResult()
|
||||||
|
return backend_pb2.EmbeddingResult(embeddings=outputs[0].outputs.embedding)
|
||||||
|
|
||||||
async def PredictStream(self, request, context):
|
async def PredictStream(self, request, context):
|
||||||
"""
|
"""
|
||||||
Generates text based on the given prompt and sampling parameters, and streams the results.
|
Generates text based on the given prompt and sampling parameters, and streams the results.
|
||||||
|
|
|
@ -74,3 +74,26 @@ class TestBackendServicer(unittest.TestCase):
|
||||||
self.fail("text service failed")
|
self.fail("text service failed")
|
||||||
finally:
|
finally:
|
||||||
self.tearDown()
|
self.tearDown()
|
||||||
|
|
||||||
|
def test_embedding(self):
|
||||||
|
"""
|
||||||
|
This method tests if the embeddings are generated successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="intfloat/e5-mistral-7b-instruct"))
|
||||||
|
self.assertTrue(response.success)
|
||||||
|
embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
|
||||||
|
embedding_response = stub.Embedding(embedding_request)
|
||||||
|
self.assertIsNotNone(embedding_response.embeddings)
|
||||||
|
# assert that is a list of floats
|
||||||
|
self.assertIsInstance(embedding_response.embeddings, list)
|
||||||
|
# assert that the list is not empty
|
||||||
|
self.assertTrue(len(embedding_response.embeddings) > 0)
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("Embedding service failed")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
Loading…
Add table
Add a link
Reference in a new issue