mirror of
https://github.com/mudler/LocalAI.git
synced 2025-06-29 14:14:59 +00:00
Remove sentencetransformers
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
8bf096265e
commit
60b05a0d21
13 changed files with 0 additions and 289 deletions
|
@ -1,31 +0,0 @@
|
||||||
.PHONY: sentencetransformers
|
|
||||||
sentencetransformers: protogen
|
|
||||||
bash ./install.sh
|
|
||||||
|
|
||||||
|
|
||||||
.PHONY: run
|
|
||||||
run: protogen
|
|
||||||
@echo "Running sentencetransformers..."
|
|
||||||
bash run.sh
|
|
||||||
@echo "sentencetransformers run."
|
|
||||||
|
|
||||||
# It is not working well by using command line. It only6 works with IDE like VSCode.
|
|
||||||
.PHONY: test
|
|
||||||
test: protogen
|
|
||||||
@echo "Testing sentencetransformers..."
|
|
||||||
bash test.sh
|
|
||||||
@echo "sentencetransformers tested."
|
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
rm -rf venv __pycache__
|
|
|
@ -1,5 +0,0 @@
|
||||||
# Creating a separate environment for the sentencetransformers project
|
|
||||||
|
|
||||||
```
|
|
||||||
make sentencetransformers
|
|
||||||
```
|
|
|
@ -1,114 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Extra gRPC server for HuggingFace SentenceTransformer models.
|
|
||||||
"""
|
|
||||||
from concurrent import futures
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import signal
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
from sentence_transformers import SentenceTransformer
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
|
||||||
|
|
||||||
# Implement the BackendServicer class with the service methods
|
|
||||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|
||||||
"""
|
|
||||||
A gRPC servicer for the backend service.
|
|
||||||
|
|
||||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
|
||||||
"""
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""
|
|
||||||
A gRPC method that returns the health status of the backend service.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: A HealthRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A Reply object that contains the health status of the backend service.
|
|
||||||
"""
|
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""
|
|
||||||
A gRPC method that loads a model into memory.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: A LoadModelRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A Result object that contains the result of the LoadModel operation.
|
|
||||||
"""
|
|
||||||
model_name = request.Model
|
|
||||||
try:
|
|
||||||
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
|
|
||||||
# Implement your logic here for the LoadModel service
|
|
||||||
# Replace this with your desired response
|
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""
|
|
||||||
A gRPC method that calculates embeddings for a given sentence.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: An EmbeddingRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
An EmbeddingResult object that contains the calculated embeddings.
|
|
||||||
"""
|
|
||||||
# Implement your logic here for the Embedding service
|
|
||||||
# Replace this with your desired response
|
|
||||||
print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
|
|
||||||
sentence_embeddings = self.model.encode(request.Embeddings)
|
|
||||||
return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings)
|
|
||||||
|
|
||||||
|
|
||||||
def serve(address):
|
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
|
||||||
server.add_insecure_port(address)
|
|
||||||
server.start()
|
|
||||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
|
||||||
|
|
||||||
# Define the signal handler function
|
|
||||||
def signal_handler(sig, frame):
|
|
||||||
print("Received termination signal. Shutting down...")
|
|
||||||
server.stop(0)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# Set the signal handlers for SIGINT and SIGTERM
|
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
|
||||||
signal.signal(signal.SIGTERM, signal_handler)
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
server.stop(0)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
|
||||||
parser.add_argument(
|
|
||||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
serve(args.addr)
|
|
|
@ -1,14 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
|
||||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
|
||||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
|
||||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
|
||||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
|
||||||
fi
|
|
||||||
|
|
||||||
installRequirements
|
|
|
@ -1,6 +0,0 @@
|
||||||
torch==2.4.1
|
|
||||||
accelerate
|
|
||||||
transformers
|
|
||||||
bitsandbytes
|
|
||||||
sentence-transformers==3.3.1
|
|
||||||
transformers
|
|
|
@ -1,5 +0,0 @@
|
||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
|
||||||
torch==2.4.1+cu118
|
|
||||||
accelerate
|
|
||||||
sentence-transformers==3.3.1
|
|
||||||
transformers
|
|
|
@ -1,4 +0,0 @@
|
||||||
torch==2.4.1
|
|
||||||
accelerate
|
|
||||||
sentence-transformers==3.3.1
|
|
||||||
transformers
|
|
|
@ -1,5 +0,0 @@
|
||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
|
||||||
torch==2.4.1+rocm6.0
|
|
||||||
accelerate
|
|
||||||
sentence-transformers==3.3.1
|
|
||||||
transformers
|
|
|
@ -1,9 +0,0 @@
|
||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
|
||||||
torch==2.3.1+cxx11.abi
|
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
|
||||||
setuptools
|
|
||||||
accelerate
|
|
||||||
sentence-transformers==3.3.1
|
|
||||||
transformers
|
|
|
@ -1,5 +0,0 @@
|
||||||
grpcio==1.69.0
|
|
||||||
protobuf
|
|
||||||
certifi
|
|
||||||
datasets
|
|
||||||
einops
|
|
|
@ -1,4 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
startBackend $@
|
|
|
@ -1,81 +0,0 @@
|
||||||
"""
|
|
||||||
A test script to test the gRPC service
|
|
||||||
"""
|
|
||||||
import unittest
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
|
|
||||||
class TestBackendServicer(unittest.TestCase):
|
|
||||||
"""
|
|
||||||
TestBackendServicer is the class that tests the gRPC service
|
|
||||||
"""
|
|
||||||
def setUp(self):
|
|
||||||
"""
|
|
||||||
This method sets up the gRPC service by starting the server
|
|
||||||
"""
|
|
||||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
|
||||||
time.sleep(10)
|
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
|
||||||
"""
|
|
||||||
This method tears down the gRPC service by terminating the server
|
|
||||||
"""
|
|
||||||
self.service.kill()
|
|
||||||
self.service.wait()
|
|
||||||
|
|
||||||
def test_server_startup(self):
|
|
||||||
"""
|
|
||||||
This method tests if the server starts up successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.Health(backend_pb2.HealthMessage())
|
|
||||||
self.assertEqual(response.message, b'OK')
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("Server failed to start")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
|
|
||||||
def test_load_model(self):
|
|
||||||
"""
|
|
||||||
This method tests if the model is loaded successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
self.assertEqual(response.message, "Model loaded successfully")
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("LoadModel service failed")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
|
|
||||||
def test_embedding(self):
|
|
||||||
"""
|
|
||||||
This method tests if the embeddings are generated successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
|
|
||||||
embedding_response = stub.Embedding(embedding_request)
|
|
||||||
self.assertIsNotNone(embedding_response.embeddings)
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("Embedding service failed")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
|
@ -1,6 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
runUnittests
|
|
Loading…
Add table
Add a link
Reference in a new issue