mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 02:24:59 +00:00
feat(transformers): merge sentencetransformers backend (#4624)
* merge sentencetransformers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add alias to silently redirect sentencetransformers to transformers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add alias also for transformers-musicgen Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop from makefile Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Move tests from sentencetransformers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Remove sentencetransformers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Remove tests from CI (part of transformers) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Do not always try to load the tokenizer Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix typo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Tiny adjustments Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
4bd8434ae0
commit
1e9bf19c8d
27 changed files with 104 additions and 354 deletions
24
.github/workflows/test-extra.yml
vendored
24
.github/workflows/test-extra.yml
vendored
|
@ -35,30 +35,6 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers
|
make --jobs=5 --output-sync=target -C backend/python/transformers
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
||||||
|
|
||||||
tests-sentencetransformers:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
- name: Test sentencetransformers
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
|
|
||||||
|
|
||||||
|
|
||||||
tests-rerankers:
|
tests-rerankers:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
|
3
.github/workflows/test.yml
vendored
3
.github/workflows/test.yml
vendored
|
@ -100,8 +100,7 @@ jobs:
|
||||||
# The python3-grpc-tools package in 22.04 is too old
|
# The python3-grpc-tools package in 22.04 is too old
|
||||||
pip install --user grpcio-tools
|
pip install --user grpcio-tools
|
||||||
|
|
||||||
sudo rm -rfv /usr/bin/conda || true
|
make -C backend/python/transformers
|
||||||
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
|
||||||
|
|
||||||
# Pre-build piper before we start tests in order to have shared libraries in place
|
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||||
make sources/go-piper && \
|
make sources/go-piper && \
|
||||||
|
|
|
@ -15,7 +15,7 @@ ARG TARGETARCH
|
||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||||
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
|
@ -456,9 +456,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMA
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/openvoice \
|
make -C backend/python/openvoice \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/sentencetransformers \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/exllama2 \
|
make -C backend/python/exllama2 \
|
||||||
; fi && \
|
; fi && \
|
||||||
|
|
15
Makefile
15
Makefile
|
@ -497,7 +497,7 @@ test: prepare test-models/testmodel.ggml grpcs
|
||||||
@echo 'Running tests'
|
@echo 'Running tests'
|
||||||
export GO_TAGS="tts stablediffusion debug"
|
export GO_TAGS="tts stablediffusion debug"
|
||||||
$(MAKE) prepare-test
|
$(MAKE) prepare-test
|
||||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||||
$(MAKE) test-llama
|
$(MAKE) test-llama
|
||||||
$(MAKE) test-llama-gguf
|
$(MAKE) test-llama-gguf
|
||||||
|
@ -583,10 +583,10 @@ protogen-go-clean:
|
||||||
$(RM) bin/*
|
$(RM) bin/*
|
||||||
|
|
||||||
.PHONY: protogen-python
|
.PHONY: protogen-python
|
||||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen
|
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen
|
||||||
|
|
||||||
.PHONY: protogen-python-clean
|
.PHONY: protogen-python-clean
|
||||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
||||||
|
|
||||||
.PHONY: autogptq-protogen
|
.PHONY: autogptq-protogen
|
||||||
autogptq-protogen:
|
autogptq-protogen:
|
||||||
|
@ -644,14 +644,6 @@ rerankers-protogen:
|
||||||
rerankers-protogen-clean:
|
rerankers-protogen-clean:
|
||||||
$(MAKE) -C backend/python/rerankers protogen-clean
|
$(MAKE) -C backend/python/rerankers protogen-clean
|
||||||
|
|
||||||
.PHONY: sentencetransformers-protogen
|
|
||||||
sentencetransformers-protogen:
|
|
||||||
$(MAKE) -C backend/python/sentencetransformers protogen
|
|
||||||
|
|
||||||
.PHONY: sentencetransformers-protogen-clean
|
|
||||||
sentencetransformers-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/sentencetransformers protogen-clean
|
|
||||||
|
|
||||||
.PHONY: transformers-protogen
|
.PHONY: transformers-protogen
|
||||||
transformers-protogen:
|
transformers-protogen:
|
||||||
$(MAKE) -C backend/python/transformers protogen
|
$(MAKE) -C backend/python/transformers protogen
|
||||||
|
@ -701,7 +693,6 @@ prepare-extra-conda-environments: protogen-python
|
||||||
$(MAKE) -C backend/python/diffusers
|
$(MAKE) -C backend/python/diffusers
|
||||||
$(MAKE) -C backend/python/vllm
|
$(MAKE) -C backend/python/vllm
|
||||||
$(MAKE) -C backend/python/mamba
|
$(MAKE) -C backend/python/mamba
|
||||||
$(MAKE) -C backend/python/sentencetransformers
|
|
||||||
$(MAKE) -C backend/python/rerankers
|
$(MAKE) -C backend/python/rerankers
|
||||||
$(MAKE) -C backend/python/transformers
|
$(MAKE) -C backend/python/transformers
|
||||||
$(MAKE) -C backend/python/parler-tts
|
$(MAKE) -C backend/python/parler-tts
|
||||||
|
|
|
@ -1,31 +0,0 @@
|
||||||
.PHONY: sentencetransformers
|
|
||||||
sentencetransformers: protogen
|
|
||||||
bash ./install.sh
|
|
||||||
|
|
||||||
|
|
||||||
.PHONY: run
|
|
||||||
run: protogen
|
|
||||||
@echo "Running sentencetransformers..."
|
|
||||||
bash run.sh
|
|
||||||
@echo "sentencetransformers run."
|
|
||||||
|
|
||||||
# It is not working well by using command line. It only6 works with IDE like VSCode.
|
|
||||||
.PHONY: test
|
|
||||||
test: protogen
|
|
||||||
@echo "Testing sentencetransformers..."
|
|
||||||
bash test.sh
|
|
||||||
@echo "sentencetransformers tested."
|
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
rm -rf venv __pycache__
|
|
|
@ -1,5 +0,0 @@
|
||||||
# Creating a separate environment for the sentencetransformers project
|
|
||||||
|
|
||||||
```
|
|
||||||
make sentencetransformers
|
|
||||||
```
|
|
|
@ -1,114 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Extra gRPC server for HuggingFace SentenceTransformer models.
|
|
||||||
"""
|
|
||||||
from concurrent import futures
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import signal
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
from sentence_transformers import SentenceTransformer
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
|
||||||
|
|
||||||
# Implement the BackendServicer class with the service methods
|
|
||||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|
||||||
"""
|
|
||||||
A gRPC servicer for the backend service.
|
|
||||||
|
|
||||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
|
||||||
"""
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""
|
|
||||||
A gRPC method that returns the health status of the backend service.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: A HealthRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A Reply object that contains the health status of the backend service.
|
|
||||||
"""
|
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""
|
|
||||||
A gRPC method that loads a model into memory.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: A LoadModelRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A Result object that contains the result of the LoadModel operation.
|
|
||||||
"""
|
|
||||||
model_name = request.Model
|
|
||||||
try:
|
|
||||||
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
|
|
||||||
# Implement your logic here for the LoadModel service
|
|
||||||
# Replace this with your desired response
|
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""
|
|
||||||
A gRPC method that calculates embeddings for a given sentence.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: An EmbeddingRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
An EmbeddingResult object that contains the calculated embeddings.
|
|
||||||
"""
|
|
||||||
# Implement your logic here for the Embedding service
|
|
||||||
# Replace this with your desired response
|
|
||||||
print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
|
|
||||||
sentence_embeddings = self.model.encode(request.Embeddings)
|
|
||||||
return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings)
|
|
||||||
|
|
||||||
|
|
||||||
def serve(address):
|
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
|
||||||
server.add_insecure_port(address)
|
|
||||||
server.start()
|
|
||||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
|
||||||
|
|
||||||
# Define the signal handler function
|
|
||||||
def signal_handler(sig, frame):
|
|
||||||
print("Received termination signal. Shutting down...")
|
|
||||||
server.stop(0)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# Set the signal handlers for SIGINT and SIGTERM
|
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
|
||||||
signal.signal(signal.SIGTERM, signal_handler)
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
server.stop(0)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
|
||||||
parser.add_argument(
|
|
||||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
serve(args.addr)
|
|
|
@ -1,14 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
|
||||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
|
||||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
|
||||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
|
||||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
|
||||||
fi
|
|
||||||
|
|
||||||
installRequirements
|
|
|
@ -1,6 +0,0 @@
|
||||||
torch==2.4.1
|
|
||||||
accelerate
|
|
||||||
transformers
|
|
||||||
bitsandbytes
|
|
||||||
sentence-transformers==3.3.1
|
|
||||||
transformers
|
|
|
@ -1,5 +0,0 @@
|
||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
|
||||||
torch==2.4.1+cu118
|
|
||||||
accelerate
|
|
||||||
sentence-transformers==3.3.1
|
|
||||||
transformers
|
|
|
@ -1,4 +0,0 @@
|
||||||
torch==2.4.1
|
|
||||||
accelerate
|
|
||||||
sentence-transformers==3.3.1
|
|
||||||
transformers
|
|
|
@ -1,5 +0,0 @@
|
||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
|
||||||
torch==2.4.1+rocm6.0
|
|
||||||
accelerate
|
|
||||||
sentence-transformers==3.3.1
|
|
||||||
transformers
|
|
|
@ -1,9 +0,0 @@
|
||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
|
||||||
torch==2.3.1+cxx11.abi
|
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
|
||||||
setuptools
|
|
||||||
accelerate
|
|
||||||
sentence-transformers==3.3.1
|
|
||||||
transformers
|
|
|
@ -1,5 +0,0 @@
|
||||||
grpcio==1.69.0
|
|
||||||
protobuf
|
|
||||||
certifi
|
|
||||||
datasets
|
|
||||||
einops
|
|
|
@ -1,4 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
startBackend $@
|
|
|
@ -1,81 +0,0 @@
|
||||||
"""
|
|
||||||
A test script to test the gRPC service
|
|
||||||
"""
|
|
||||||
import unittest
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
|
|
||||||
class TestBackendServicer(unittest.TestCase):
|
|
||||||
"""
|
|
||||||
TestBackendServicer is the class that tests the gRPC service
|
|
||||||
"""
|
|
||||||
def setUp(self):
|
|
||||||
"""
|
|
||||||
This method sets up the gRPC service by starting the server
|
|
||||||
"""
|
|
||||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
|
||||||
time.sleep(10)
|
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
|
||||||
"""
|
|
||||||
This method tears down the gRPC service by terminating the server
|
|
||||||
"""
|
|
||||||
self.service.kill()
|
|
||||||
self.service.wait()
|
|
||||||
|
|
||||||
def test_server_startup(self):
|
|
||||||
"""
|
|
||||||
This method tests if the server starts up successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.Health(backend_pb2.HealthMessage())
|
|
||||||
self.assertEqual(response.message, b'OK')
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("Server failed to start")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
|
|
||||||
def test_load_model(self):
|
|
||||||
"""
|
|
||||||
This method tests if the model is loaded successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
self.assertEqual(response.message, "Model loaded successfully")
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("LoadModel service failed")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
|
|
||||||
def test_embedding(self):
|
|
||||||
"""
|
|
||||||
This method tests if the embeddings are generated successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
|
|
||||||
embedding_response = stub.Embedding(embedding_request)
|
|
||||||
self.assertIsNotNone(embedding_response.embeddings)
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("Embedding service failed")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
|
@ -1,6 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
runUnittests
|
|
|
@ -25,6 +25,8 @@ from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreame
|
||||||
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
||||||
from scipy.io import wavfile
|
from scipy.io import wavfile
|
||||||
import outetts
|
import outetts
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
|
||||||
|
@ -88,10 +90,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
self.CUDA = torch.cuda.is_available()
|
self.CUDA = torch.cuda.is_available()
|
||||||
self.OV=False
|
self.OV=False
|
||||||
self.OuteTTS=False
|
self.OuteTTS=False
|
||||||
|
self.SentenceTransformer = False
|
||||||
|
|
||||||
device_map="cpu"
|
device_map="cpu"
|
||||||
|
|
||||||
quantization = None
|
quantization = None
|
||||||
|
autoTokenizer = True
|
||||||
|
|
||||||
if self.CUDA:
|
if self.CUDA:
|
||||||
from transformers import BitsAndBytesConfig, AutoModelForCausalLM
|
from transformers import BitsAndBytesConfig, AutoModelForCausalLM
|
||||||
|
@ -195,9 +199,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
device=device_map)
|
device=device_map)
|
||||||
self.OV = True
|
self.OV = True
|
||||||
elif request.Type == "MusicgenForConditionalGeneration":
|
elif request.Type == "MusicgenForConditionalGeneration":
|
||||||
|
autoTokenizer = False
|
||||||
self.processor = AutoProcessor.from_pretrained(model_name)
|
self.processor = AutoProcessor.from_pretrained(model_name)
|
||||||
self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
|
self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
|
||||||
elif request.Type == "OuteTTS":
|
elif request.Type == "OuteTTS":
|
||||||
|
autoTokenizer = False
|
||||||
options = request.Options
|
options = request.Options
|
||||||
MODELNAME = "OuteAI/OuteTTS-0.3-1B"
|
MODELNAME = "OuteAI/OuteTTS-0.3-1B"
|
||||||
TOKENIZER = "OuteAI/OuteTTS-0.3-1B"
|
TOKENIZER = "OuteAI/OuteTTS-0.3-1B"
|
||||||
|
@ -235,6 +241,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
self.speaker = self.interface.create_speaker(audio_path=self.AudioPath)
|
self.speaker = self.interface.create_speaker(audio_path=self.AudioPath)
|
||||||
else:
|
else:
|
||||||
self.speaker = self.interface.load_default_speaker(name=SPEAKER)
|
self.speaker = self.interface.load_default_speaker(name=SPEAKER)
|
||||||
|
elif request.Type == "SentenceTransformer":
|
||||||
|
autoTokenizer = False
|
||||||
|
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
|
||||||
|
self.SentenceTransformer = True
|
||||||
else:
|
else:
|
||||||
print("Automodel", file=sys.stderr)
|
print("Automodel", file=sys.stderr)
|
||||||
self.model = AutoModel.from_pretrained(model_name,
|
self.model = AutoModel.from_pretrained(model_name,
|
||||||
|
@ -250,7 +260,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
else:
|
else:
|
||||||
self.max_tokens = 512
|
self.max_tokens = 512
|
||||||
|
|
||||||
if request.Type != "MusicgenForConditionalGeneration":
|
if autoTokenizer:
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
|
||||||
self.XPU = False
|
self.XPU = False
|
||||||
|
|
||||||
|
@ -286,6 +296,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
max_length = 512
|
max_length = 512
|
||||||
if request.Tokens != 0:
|
if request.Tokens != 0:
|
||||||
max_length = request.Tokens
|
max_length = request.Tokens
|
||||||
|
|
||||||
|
embeds = None
|
||||||
|
|
||||||
|
if self.SentenceTransformer:
|
||||||
|
print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
|
||||||
|
embeds = self.model.encode(request.Embeddings)
|
||||||
|
else:
|
||||||
encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
|
encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
|
||||||
|
|
||||||
# Create word embeddings
|
# Create word embeddings
|
||||||
|
@ -297,7 +314,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
|
|
||||||
# Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence
|
# Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence
|
||||||
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
|
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
|
||||||
return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0])
|
embeds = sentence_embeddings[0]
|
||||||
|
return backend_pb2.EmbeddingResult(embeddings=embeds)
|
||||||
|
|
||||||
async def _predict(self, request, context, streaming=False):
|
async def _predict(self, request, context, streaming=False):
|
||||||
set_seed(request.Seed)
|
set_seed(request.Seed)
|
||||||
|
|
|
@ -4,3 +4,4 @@ accelerate
|
||||||
transformers
|
transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
outetts
|
outetts
|
||||||
|
sentence-transformers==3.3.1
|
||||||
|
|
|
@ -5,3 +5,4 @@ accelerate
|
||||||
transformers
|
transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
outetts
|
outetts
|
||||||
|
sentence-transformers==3.3.1
|
||||||
|
|
|
@ -4,3 +4,4 @@ llvmlite==0.43.0
|
||||||
transformers
|
transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
outetts
|
outetts
|
||||||
|
sentence-transformers==3.3.1
|
||||||
|
|
|
@ -5,3 +5,5 @@ transformers
|
||||||
llvmlite==0.43.0
|
llvmlite==0.43.0
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
outetts
|
outetts
|
||||||
|
bitsandbytes
|
||||||
|
sentence-transformers==3.3.1
|
||||||
|
|
|
@ -7,3 +7,4 @@ llvmlite==0.43.0
|
||||||
intel-extension-for-transformers
|
intel-extension-for-transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
outetts
|
outetts
|
||||||
|
sentence-transformers==3.3.1
|
||||||
|
|
|
@ -135,3 +135,39 @@ class TestBackendServicer(unittest.TestCase):
|
||||||
self.fail("SoundGeneration service failed")
|
self.fail("SoundGeneration service failed")
|
||||||
finally:
|
finally:
|
||||||
self.tearDown()
|
self.tearDown()
|
||||||
|
|
||||||
|
def test_embed_load_model(self):
|
||||||
|
"""
|
||||||
|
This method tests if the model is loaded successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer"))
|
||||||
|
self.assertTrue(response.success)
|
||||||
|
self.assertEqual(response.message, "Model loaded successfully")
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("LoadModel service failed")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
||||||
|
|
||||||
|
def test_sentencetransformers_embedding(self):
|
||||||
|
"""
|
||||||
|
This method tests if the embeddings are generated successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer"))
|
||||||
|
self.assertTrue(response.success)
|
||||||
|
embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
|
||||||
|
embedding_response = stub.Embedding(embedding_request)
|
||||||
|
self.assertIsNotNone(embedding_response.embeddings)
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("Embedding service failed")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
|
@ -822,7 +822,7 @@ var _ = Describe("API test", func() {
|
||||||
|
|
||||||
application, err := application.New(
|
application, err := application.New(
|
||||||
append(commonOpts,
|
append(commonOpts,
|
||||||
config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
|
config.WithExternalBackend("transformers", os.Getenv("HUGGINGFACE_GRPC")),
|
||||||
config.WithContext(c),
|
config.WithContext(c),
|
||||||
config.WithModelPath(modelPath),
|
config.WithModelPath(modelPath),
|
||||||
)...)
|
)...)
|
||||||
|
|
|
@ -25,8 +25,16 @@ var Aliases map[string]string = map[string]string{
|
||||||
"go-llama": LLamaCPP,
|
"go-llama": LLamaCPP,
|
||||||
"llama": LLamaCPP,
|
"llama": LLamaCPP,
|
||||||
"embedded-store": LocalStoreBackend,
|
"embedded-store": LocalStoreBackend,
|
||||||
|
"huggingface-embeddings": TransformersBackend,
|
||||||
"langchain-huggingface": LCHuggingFaceBackend,
|
"langchain-huggingface": LCHuggingFaceBackend,
|
||||||
"transformers-musicgen": TransformersBackend,
|
"transformers-musicgen": TransformersBackend,
|
||||||
|
"sentencetransformers": TransformersBackend,
|
||||||
|
}
|
||||||
|
|
||||||
|
var TypeAlias map[string]string = map[string]string{
|
||||||
|
"sentencetransformers": "SentenceTransformer",
|
||||||
|
"huggingface-embeddings": "SentenceTransformer",
|
||||||
|
"transformers-musicgen": "MusicgenForConditionalGeneration",
|
||||||
}
|
}
|
||||||
|
|
||||||
var AutoDetect = os.Getenv("DISABLE_AUTODETECT") != "true"
|
var AutoDetect = os.Getenv("DISABLE_AUTODETECT") != "true"
|
||||||
|
@ -396,6 +404,7 @@ func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) fu
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Wait for the service to start up")
|
log.Debug().Msgf("Wait for the service to start up")
|
||||||
|
log.Debug().Msgf("Options: %+v", o.gRPCOptions)
|
||||||
|
|
||||||
// Wait for the service to start up
|
// Wait for the service to start up
|
||||||
ready := false
|
ready := false
|
||||||
|
@ -460,8 +469,15 @@ func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err e
|
||||||
|
|
||||||
backend := strings.ToLower(o.backendString)
|
backend := strings.ToLower(o.backendString)
|
||||||
if realBackend, exists := Aliases[backend]; exists {
|
if realBackend, exists := Aliases[backend]; exists {
|
||||||
|
typeAlias, exists := TypeAlias[backend]
|
||||||
|
if exists {
|
||||||
|
log.Debug().Msgf("'%s' is a type alias of '%s' (%s)", backend, realBackend, typeAlias)
|
||||||
|
o.gRPCOptions.Type = typeAlias
|
||||||
|
} else {
|
||||||
|
log.Debug().Msgf("'%s' is an alias of '%s'", backend, realBackend)
|
||||||
|
}
|
||||||
|
|
||||||
backend = realBackend
|
backend = realBackend
|
||||||
log.Debug().Msgf("%s is an alias of %s", backend, realBackend)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ml.stopActiveBackends(o.modelID, o.singleActiveBackend)
|
ml.stopActiveBackends(o.modelID, o.singleActiveBackend)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
name: code-search-ada-code-001
|
name: code-search-ada-code-001
|
||||||
backend: huggingface
|
backend: sentencetransformers
|
||||||
embeddings: true
|
embeddings: true
|
||||||
parameters:
|
parameters:
|
||||||
model: all-MiniLM-L6-v2
|
model: all-MiniLM-L6-v2
|
Loading…
Add table
Add a link
Reference in a new issue