diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index eacd3ab0..e99ea516 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -35,30 +35,6 @@ jobs: run: | make --jobs=5 --output-sync=target -C backend/python/transformers make --jobs=5 --output-sync=target -C backend/python/transformers test - - tests-sentencetransformers: - runs-on: ubuntu-latest - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential ffmpeg - # Install UV - curl -LsSf https://astral.sh/uv/install.sh | sh - sudo apt-get install -y ca-certificates cmake curl patch python3-pip - sudo apt-get install -y libopencv-dev - pip install --user --no-cache-dir grpcio-tools==1.64.1 - - - name: Test sentencetransformers - run: | - make --jobs=5 --output-sync=target -C backend/python/sentencetransformers - make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test - - tests-rerankers: runs-on: ubuntu-latest steps: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ecef0569..0ee93afa 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -100,8 +100,7 @@ jobs: # The python3-grpc-tools package in 22.04 is too old pip install --user grpcio-tools - sudo rm -rfv /usr/bin/conda || true - PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers + make -C backend/python/transformers # Pre-build piper before we start tests in order to have shared libraries in place make sources/go-piper && \ diff --git a/Dockerfile b/Dockerfile index 9fb07516..4ddc921d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ ARG TARGETARCH ARG TARGETVARIANT ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" +ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" RUN apt-get update && \ @@ -456,9 +456,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMA if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ make -C backend/python/openvoice \ ; fi && \ - if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/sentencetransformers \ - ; fi && \ if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ make -C backend/python/exllama2 \ ; fi && \ diff --git a/Makefile b/Makefile index 03468ffb..944cad37 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=4dbc8b9cb71876e005724f4e8f73a3544646bcf5 +CPPLLAMA_VERSION?=3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp @@ -22,10 +22,6 @@ PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0 STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f -# tinydream version -TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream -TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057 - # bark.cpp BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git BARKCPP_VERSION?=v1.0.0 @@ -188,11 +184,6 @@ ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion) OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion endif -ifeq ($(findstring tinydream,$(GO_TAGS)),tinydream) -# OPTIONAL_TARGETS+=go-tiny-dream/libtinydream.a - OPTIONAL_GRPC+=backend-assets/grpc/tinydream -endif - ifeq ($(findstring tts,$(GO_TAGS)),tts) # OPTIONAL_TARGETS+=go-piper/libpiper_binding.a # OPTIONAL_TARGETS+=backend-assets/espeak-ng-data @@ -327,19 +318,6 @@ else mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1 endif -## tiny-dream -sources/go-tiny-dream: - mkdir -p sources/go-tiny-dream - cd sources/go-tiny-dream && \ - git init && \ - git remote add origin $(TINYDREAM_REPO) && \ - git fetch origin && \ - git checkout $(TINYDREAM_VERSION) && \ - git submodule update --init --recursive --depth 1 --single-branch - -sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream - $(MAKE) -C sources/go-tiny-dream libtinydream.a - ## whisper sources/whisper.cpp: mkdir -p sources/whisper.cpp @@ -353,12 +331,11 @@ sources/whisper.cpp: sources/whisper.cpp/libwhisper.a: sources/whisper.cpp cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a -get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp +get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion backend/cpp/llama/llama.cpp replace: $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go - $(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream $(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp @@ -366,7 +343,6 @@ replace: dropreplace: $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go - $(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream $(GOCMD) mod edit -dropreplace github.com/mudler/go-piper $(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion $(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp @@ -381,7 +357,6 @@ rebuild: ## Rebuilds the project $(MAKE) -C sources/whisper.cpp clean $(MAKE) -C sources/go-stable-diffusion clean $(MAKE) -C sources/go-piper clean - $(MAKE) -C sources/go-tiny-dream clean $(MAKE) build prepare: prepare-sources $(OPTIONAL_TARGETS) @@ -497,7 +472,7 @@ test: prepare test-models/testmodel.ggml grpcs @echo 'Running tests' export GO_TAGS="tts stablediffusion debug" $(MAKE) prepare-test - HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ + HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS) $(MAKE) test-llama $(MAKE) test-llama-gguf @@ -583,10 +558,10 @@ protogen-go-clean: $(RM) bin/* .PHONY: protogen-python -protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen +protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen .PHONY: protogen-python-clean -protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean +protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean .PHONY: autogptq-protogen autogptq-protogen: @@ -644,14 +619,6 @@ rerankers-protogen: rerankers-protogen-clean: $(MAKE) -C backend/python/rerankers protogen-clean -.PHONY: sentencetransformers-protogen -sentencetransformers-protogen: - $(MAKE) -C backend/python/sentencetransformers protogen - -.PHONY: sentencetransformers-protogen-clean -sentencetransformers-protogen-clean: - $(MAKE) -C backend/python/sentencetransformers protogen-clean - .PHONY: transformers-protogen transformers-protogen: $(MAKE) -C backend/python/transformers protogen @@ -701,7 +668,6 @@ prepare-extra-conda-environments: protogen-python $(MAKE) -C backend/python/diffusers $(MAKE) -C backend/python/vllm $(MAKE) -C backend/python/mamba - $(MAKE) -C backend/python/sentencetransformers $(MAKE) -C backend/python/rerankers $(MAKE) -C backend/python/transformers $(MAKE) -C backend/python/parler-tts @@ -864,13 +830,6 @@ ifneq ($(UPX),) $(UPX) backend-assets/grpc/silero-vad endif -backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc - CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream -ifneq ($(UPX),) - $(UPX) backend-assets/grpc/tinydream -endif - backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 4e75e7b0..9aeb34db 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -134,6 +134,32 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c return out; } +// Adds an RPC server +// https://github.com/ggerganov/llama.cpp/compare/4dbc8b9cb71876e005724f4e8f73a3544646bcf5..3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6 +static void add_rpc_devices(std::string servers) { + auto rpc_servers = string_split(servers, ','); + if (rpc_servers.empty()) { + throw std::invalid_argument("no RPC servers specified"); + } + ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC"); + if (!rpc_reg) { + throw std::invalid_argument("failed to find RPC backend"); + } + typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint); + ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device"); + if (!ggml_backend_rpc_add_device_fn) { + throw std::invalid_argument("failed to find RPC device add function"); + } + for (const auto & server : rpc_servers) { + ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str()); + if (dev) { + ggml_backend_device_register(dev); + } else { + throw std::invalid_argument("failed to register RPC device"); + } + } +} + // convert a vector of completion_token_output to json static json probs_vector_to_json(const llama_context *ctx, const std::vector &probs) { @@ -2282,7 +2308,7 @@ static void params_parse(const backend::ModelOptions* request, const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS"); if (llama_grpc_servers != NULL) { - params.rpc_servers = std::string(llama_grpc_servers); + add_rpc_devices(std::string(llama_grpc_servers)); } // TODO: Add yarn diff --git a/backend/go/image/tinydream/main.go b/backend/go/image/tinydream/main.go deleted file mode 100644 index ae259fa7..00000000 --- a/backend/go/image/tinydream/main.go +++ /dev/null @@ -1,21 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - grpc "github.com/mudler/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &Image{}); err != nil { - panic(err) - } -} diff --git a/backend/go/image/tinydream/tinydream.go b/backend/go/image/tinydream/tinydream.go deleted file mode 100644 index ad364c47..00000000 --- a/backend/go/image/tinydream/tinydream.go +++ /dev/null @@ -1,32 +0,0 @@ -package main - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "github.com/mudler/LocalAI/pkg/grpc/base" - pb "github.com/mudler/LocalAI/pkg/grpc/proto" - "github.com/mudler/LocalAI/pkg/tinydream" -) - -type Image struct { - base.SingleThread - tinydream *tinydream.TinyDream -} - -func (image *Image) Load(opts *pb.ModelOptions) error { - var err error - // Note: the Model here is a path to a directory containing the model files - image.tinydream, err = tinydream.New(opts.ModelFile) - return err -} - -func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error { - return image.tinydream.GenerateImage( - int(opts.Height), - int(opts.Width), - int(opts.Step), - int(opts.Seed), - opts.PositivePrompt, - opts.NegativePrompt, - opts.Dst) -} diff --git a/backend/python/sentencetransformers/Makefile b/backend/python/sentencetransformers/Makefile deleted file mode 100644 index 8b18e943..00000000 --- a/backend/python/sentencetransformers/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -.PHONY: sentencetransformers -sentencetransformers: protogen - bash ./install.sh - - -.PHONY: run -run: protogen - @echo "Running sentencetransformers..." - bash run.sh - @echo "sentencetransformers run." - -# It is not working well by using command line. It only6 works with IDE like VSCode. -.PHONY: test -test: protogen - @echo "Testing sentencetransformers..." - bash test.sh - @echo "sentencetransformers tested." - -.PHONY: protogen -protogen: backend_pb2_grpc.py backend_pb2.py - -.PHONY: protogen-clean -protogen-clean: - $(RM) backend_pb2_grpc.py backend_pb2.py - -backend_pb2_grpc.py backend_pb2.py: - python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto - -.PHONY: clean -clean: protogen-clean - rm -rf venv __pycache__ \ No newline at end of file diff --git a/backend/python/sentencetransformers/README.md b/backend/python/sentencetransformers/README.md deleted file mode 100644 index 829cf0d1..00000000 --- a/backend/python/sentencetransformers/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Creating a separate environment for the sentencetransformers project - -``` -make sentencetransformers -``` \ No newline at end of file diff --git a/backend/python/sentencetransformers/backend.py b/backend/python/sentencetransformers/backend.py deleted file mode 100755 index 2a20bf60..00000000 --- a/backend/python/sentencetransformers/backend.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python3 -""" -Extra gRPC server for HuggingFace SentenceTransformer models. -""" -from concurrent import futures - -import argparse -import signal -import sys -import os - -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - -from sentence_transformers import SentenceTransformer - -_ONE_DAY_IN_SECONDS = 60 * 60 * 24 - -# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 -MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) - -# Implement the BackendServicer class with the service methods -class BackendServicer(backend_pb2_grpc.BackendServicer): - """ - A gRPC servicer for the backend service. - - This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding. - """ - def Health(self, request, context): - """ - A gRPC method that returns the health status of the backend service. - - Args: - request: A HealthRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Reply object that contains the health status of the backend service. - """ - return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - - def LoadModel(self, request, context): - """ - A gRPC method that loads a model into memory. - - Args: - request: A LoadModelRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Result object that contains the result of the LoadModel operation. - """ - model_name = request.Model - try: - self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - - # Implement your logic here for the LoadModel service - # Replace this with your desired response - return backend_pb2.Result(message="Model loaded successfully", success=True) - - def Embedding(self, request, context): - """ - A gRPC method that calculates embeddings for a given sentence. - - Args: - request: An EmbeddingRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - An EmbeddingResult object that contains the calculated embeddings. - """ - # Implement your logic here for the Embedding service - # Replace this with your desired response - print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr) - sentence_embeddings = self.model.encode(request.Embeddings) - return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings) - - -def serve(address): - server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) - backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) - server.add_insecure_port(address) - server.start() - print("Server started. Listening on: " + address, file=sys.stderr) - - # Define the signal handler function - def signal_handler(sig, frame): - print("Received termination signal. Shutting down...") - server.stop(0) - sys.exit(0) - - # Set the signal handlers for SIGINT and SIGTERM - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - server.stop(0) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run the gRPC server.") - parser.add_argument( - "--addr", default="localhost:50051", help="The address to bind the server to." - ) - args = parser.parse_args() - - serve(args.addr) diff --git a/backend/python/sentencetransformers/install.sh b/backend/python/sentencetransformers/install.sh deleted file mode 100755 index 36443ef1..00000000 --- a/backend/python/sentencetransformers/install.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links. -# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match. -# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index -# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index -if [ "x${BUILD_PROFILE}" == "xintel" ]; then - EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" -fi - -installRequirements diff --git a/backend/python/sentencetransformers/requirements-cpu.txt b/backend/python/sentencetransformers/requirements-cpu.txt deleted file mode 100644 index 1e23f68c..00000000 --- a/backend/python/sentencetransformers/requirements-cpu.txt +++ /dev/null @@ -1,6 +0,0 @@ -torch==2.4.1 -accelerate -transformers -bitsandbytes -sentence-transformers==3.3.1 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-cublas11.txt b/backend/python/sentencetransformers/requirements-cublas11.txt deleted file mode 100644 index 3900aba9..00000000 --- a/backend/python/sentencetransformers/requirements-cublas11.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.4.1+cu118 -accelerate -sentence-transformers==3.3.1 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-cublas12.txt b/backend/python/sentencetransformers/requirements-cublas12.txt deleted file mode 100644 index 2afd0520..00000000 --- a/backend/python/sentencetransformers/requirements-cublas12.txt +++ /dev/null @@ -1,4 +0,0 @@ -torch==2.4.1 -accelerate -sentence-transformers==3.3.1 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-hipblas.txt b/backend/python/sentencetransformers/requirements-hipblas.txt deleted file mode 100644 index b472d371..00000000 --- a/backend/python/sentencetransformers/requirements-hipblas.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch==2.4.1+rocm6.0 -accelerate -sentence-transformers==3.3.1 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt deleted file mode 100644 index e9b72aab..00000000 --- a/backend/python/sentencetransformers/requirements-intel.txt +++ /dev/null @@ -1,9 +0,0 @@ ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch==2.3.110+xpu -torch==2.3.1+cxx11.abi -oneccl_bind_pt==2.3.100+xpu -optimum[openvino] -setuptools -accelerate -sentence-transformers==3.3.1 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt deleted file mode 100644 index 6e03c63f..00000000 --- a/backend/python/sentencetransformers/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -grpcio==1.69.0 -protobuf -certifi -datasets -einops \ No newline at end of file diff --git a/backend/python/sentencetransformers/run.sh b/backend/python/sentencetransformers/run.sh deleted file mode 100755 index 375c07e5..00000000 --- a/backend/python/sentencetransformers/run.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -source $(dirname $0)/../common/libbackend.sh - -startBackend $@ \ No newline at end of file diff --git a/backend/python/sentencetransformers/test.py b/backend/python/sentencetransformers/test.py deleted file mode 100644 index 9df52b14..00000000 --- a/backend/python/sentencetransformers/test.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -A test script to test the gRPC service -""" -import unittest -import subprocess -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - - -class TestBackendServicer(unittest.TestCase): - """ - TestBackendServicer is the class that tests the gRPC service - """ - def setUp(self): - """ - This method sets up the gRPC service by starting the server - """ - self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) - - def tearDown(self) -> None: - """ - This method tears down the gRPC service by terminating the server - """ - self.service.kill() - self.service.wait() - - def test_server_startup(self): - """ - This method tests if the server starts up successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.Health(backend_pb2.HealthMessage()) - self.assertEqual(response.message, b'OK') - except Exception as err: - print(err) - self.fail("Server failed to start") - finally: - self.tearDown() - - def test_load_model(self): - """ - This method tests if the model is loaded successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens")) - self.assertTrue(response.success) - self.assertEqual(response.message, "Model loaded successfully") - except Exception as err: - print(err) - self.fail("LoadModel service failed") - finally: - self.tearDown() - - def test_embedding(self): - """ - This method tests if the embeddings are generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens")) - self.assertTrue(response.success) - embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.") - embedding_response = stub.Embedding(embedding_request) - self.assertIsNotNone(embedding_response.embeddings) - except Exception as err: - print(err) - self.fail("Embedding service failed") - finally: - self.tearDown() \ No newline at end of file diff --git a/backend/python/sentencetransformers/test.sh b/backend/python/sentencetransformers/test.sh deleted file mode 100755 index 6940b066..00000000 --- a/backend/python/sentencetransformers/test.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -runUnittests diff --git a/backend/python/transformers/backend.py b/backend/python/transformers/backend.py index 27257934..9b65c6db 100644 --- a/backend/python/transformers/backend.py +++ b/backend/python/transformers/backend.py @@ -25,6 +25,8 @@ from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreame from transformers import AutoProcessor, MusicgenForConditionalGeneration from scipy.io import wavfile import outetts +from sentence_transformers import SentenceTransformer + _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -88,10 +90,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.CUDA = torch.cuda.is_available() self.OV=False self.OuteTTS=False + self.SentenceTransformer = False device_map="cpu" quantization = None + autoTokenizer = True if self.CUDA: from transformers import BitsAndBytesConfig, AutoModelForCausalLM @@ -195,9 +199,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): device=device_map) self.OV = True elif request.Type == "MusicgenForConditionalGeneration": + autoTokenizer = False self.processor = AutoProcessor.from_pretrained(model_name) self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) elif request.Type == "OuteTTS": + autoTokenizer = False options = request.Options MODELNAME = "OuteAI/OuteTTS-0.3-1B" TOKENIZER = "OuteAI/OuteTTS-0.3-1B" @@ -235,6 +241,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.speaker = self.interface.create_speaker(audio_path=self.AudioPath) else: self.speaker = self.interface.load_default_speaker(name=SPEAKER) + elif request.Type == "SentenceTransformer": + autoTokenizer = False + self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode) + self.SentenceTransformer = True else: print("Automodel", file=sys.stderr) self.model = AutoModel.from_pretrained(model_name, @@ -250,7 +260,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): else: self.max_tokens = 512 - if request.Type != "MusicgenForConditionalGeneration": + if autoTokenizer: self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True) self.XPU = False @@ -286,18 +296,26 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): max_length = 512 if request.Tokens != 0: max_length = request.Tokens - encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt") - # Create word embeddings - if self.CUDA: - encoded_input = encoded_input.to("cuda") + embeds = None - with torch.no_grad(): - model_output = self.model(**encoded_input) + if self.SentenceTransformer: + print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr) + embeds = self.model.encode(request.Embeddings) + else: + encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt") - # Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence - sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) - return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0]) + # Create word embeddings + if self.CUDA: + encoded_input = encoded_input.to("cuda") + + with torch.no_grad(): + model_output = self.model(**encoded_input) + + # Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence + sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) + embeds = sentence_embeddings[0] + return backend_pb2.EmbeddingResult(embeddings=embeds) async def _predict(self, request, context, streaming=False): set_seed(request.Seed) diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt index 56b77325..421c4b80 100644 --- a/backend/python/transformers/requirements-cpu.txt +++ b/backend/python/transformers/requirements-cpu.txt @@ -3,4 +3,5 @@ llvmlite==0.43.0 accelerate transformers bitsandbytes -outetts \ No newline at end of file +outetts +sentence-transformers==3.3.1 diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt index 924b0086..c5d18d09 100644 --- a/backend/python/transformers/requirements-cublas11.txt +++ b/backend/python/transformers/requirements-cublas11.txt @@ -4,4 +4,5 @@ llvmlite==0.43.0 accelerate transformers bitsandbytes -outetts \ No newline at end of file +outetts +sentence-transformers==3.3.1 diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt index 0feb3d81..c0bcfc87 100644 --- a/backend/python/transformers/requirements-cublas12.txt +++ b/backend/python/transformers/requirements-cublas12.txt @@ -3,4 +3,5 @@ accelerate llvmlite==0.43.0 transformers bitsandbytes -outetts \ No newline at end of file +outetts +sentence-transformers==3.3.1 diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt index fa65fb8e..e7f53860 100644 --- a/backend/python/transformers/requirements-hipblas.txt +++ b/backend/python/transformers/requirements-hipblas.txt @@ -4,4 +4,6 @@ accelerate transformers llvmlite==0.43.0 bitsandbytes -outetts \ No newline at end of file +outetts +bitsandbytes +sentence-transformers==3.3.1 diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt index 4a295599..aada6e00 100644 --- a/backend/python/transformers/requirements-intel.txt +++ b/backend/python/transformers/requirements-intel.txt @@ -6,4 +6,5 @@ optimum[openvino] llvmlite==0.43.0 intel-extension-for-transformers bitsandbytes -outetts \ No newline at end of file +outetts +sentence-transformers==3.3.1 diff --git a/backend/python/transformers/test.py b/backend/python/transformers/test.py index 305b0a93..14efa6a7 100644 --- a/backend/python/transformers/test.py +++ b/backend/python/transformers/test.py @@ -133,5 +133,41 @@ class TestBackendServicer(unittest.TestCase): except Exception as err: print(err) self.fail("SoundGeneration service failed") + finally: + self.tearDown() + + def test_embed_load_model(self): + """ + This method tests if the model is loaded successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer")) + self.assertTrue(response.success) + self.assertEqual(response.message, "Model loaded successfully") + except Exception as err: + print(err) + self.fail("LoadModel service failed") + finally: + self.tearDown() + + def test_sentencetransformers_embedding(self): + """ + This method tests if the embeddings are generated successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer")) + self.assertTrue(response.success) + embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.") + embedding_response = stub.Embedding(embedding_request) + self.assertIsNotNone(embedding_response.embeddings) + except Exception as err: + print(err) + self.fail("Embedding service failed") finally: self.tearDown() \ No newline at end of file diff --git a/core/config/backend_config.go b/core/config/backend_config.go index bb2fa643..a488f2a0 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -515,7 +515,7 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool { } } if (u & FLAG_IMAGE) == FLAG_IMAGE { - imageBackends := []string{"diffusers", "tinydream", "stablediffusion"} + imageBackends := []string{"diffusers", "stablediffusion"} if !slices.Contains(imageBackends, c.Backend) { return false } diff --git a/core/http/app_test.go b/core/http/app_test.go index 6bf1806b..a2e2f758 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -822,7 +822,7 @@ var _ = Describe("API test", func() { application, err := application.New( append(commonOpts, - config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")), + config.WithExternalBackend("transformers", os.Getenv("HUGGINGFACE_GRPC")), config.WithContext(c), config.WithModelPath(modelPath), )...) diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go index 3fdb64d4..baaecd4e 100644 --- a/core/http/endpoints/openai/image.go +++ b/core/http/endpoints/openai/image.go @@ -130,8 +130,6 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon switch config.Backend { case "stablediffusion": config.Backend = model.StableDiffusionBackend - case "tinydream": - config.Backend = model.TinyDreamBackend case "": config.Backend = model.StableDiffusionBackend } diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md index f21a5b48..9fff1989 100644 --- a/docs/content/docs/getting-started/build.md +++ b/docs/content/docs/getting-started/build.md @@ -88,7 +88,7 @@ Here is the list of the variables available that can be used to customize the bu | Variable | Default | Description | | ---------------------| ------- | ----------- | | `BUILD_TYPE` | None | Build type. Available: `cublas`, `openblas`, `clblas`, `metal`,`hipblas`, `sycl_f16`, `sycl_f32` | -| `GO_TAGS` | `tts stablediffusion` | Go tags. Available: `stablediffusion`, `tts`, `tinydream` | +| `GO_TAGS` | `tts stablediffusion` | Go tags. Available: `stablediffusion`, `tts` | | `CLBLAST_DIR` | | Specify a CLBlast directory | | `CUDA_LIBPATH` | | Specify a CUDA library path | | `BUILD_API_ONLY` | false | Set to true to build only the API (no backends will be built) | @@ -202,7 +202,7 @@ make build **Requirements**: OpenCV, Gomp -Image generation requires `GO_TAGS=stablediffusion` or `GO_TAGS=tinydream` to be set during build: +Image generation requires `GO_TAGS=stablediffusion` to be set during build: ``` make GO_TAGS=stablediffusion build diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/docs/getting-started/container-images.md index 25385f23..64f6dbc9 100644 --- a/docs/content/docs/getting-started/container-images.md +++ b/docs/content/docs/getting-started/container-images.md @@ -16,7 +16,7 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA **Available Images Types**: -- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn`, `tinydream` or `rwkv` backends - if you are not sure which one to use, do **not** use these images. +- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn` or `rwkv` backends - if you are not sure which one to use, do **not** use these images. - Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration. - FFMpeg is **not** included in the default images due to [its licensing](https://www.ffmpeg.org/legal.html). If you need FFMpeg, use the images ending with `-ffmpeg`. Note that `ffmpeg` is needed in case of using `audio-to-text` LocalAI's features. - If using old and outdated CPUs and no GPUs you might need to set `REBUILD` to `true` as environment variable along with options to disable the flags which your CPU does not support, however note that inference will perform poorly and slow. See also [flagset compatibility]({{%relref "docs/getting-started/build#cpu-flagset-compatibility" %}}). @@ -197,7 +197,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen | --- | --- |-------------------------------------------------------------| | Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f16` | `localai/localai:master-sycl-f16` | | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f16` | `localai/localai:latest-gpu-intel-f16` | -| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16` | `localai/localai:{{< version >}}-sycl-f16` | +| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-core` | `localai/localai:{{< version >}}-sycl-f16-core` | | Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg` | | Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg-core` | @@ -209,7 +209,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen | --- | --- |-------------------------------------------------------------| | Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f32` | `localai/localai:master-sycl-f32` | | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f32` | `localai/localai:latest-gpu-intel-f32` | -| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32` | `localai/localai:{{< version >}}-sycl-f32` | +| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-core` | `localai/localai:{{< version >}}-sycl-f32-core` | | Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg` | | Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg-core` | diff --git a/docs/content/docs/reference/compatibility-table.md b/docs/content/docs/reference/compatibility-table.md index 7056f4a5..d2f4d8ac 100644 --- a/docs/content/docs/reference/compatibility-table.md +++ b/docs/content/docs/reference/compatibility-table.md @@ -32,7 +32,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi | `mamba` | Mamba models architecture | yes | GPT | no | no | CPU/CUDA | | `exllama2` | GPTQ | yes | GPT only | no | no | N/A | | `transformers-musicgen` | | no | Audio generation | no | no | N/A | -| [tinydream](https://github.com/symisc/tiny-dream#tiny-dreaman-embedded-header-only-stable-diffusion-inference-c-librarypixlabiotiny-dream) | stablediffusion | no | Image | no | no | N/A | +| stablediffusion | no | Image | no | no | N/A | | `coqui` | Coqui | no | Audio generation and Voice cloning | no | no | CPU/CUDA | | `openvoice` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA | | `parler-tts` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA | diff --git a/gallery/index.yaml b/gallery/index.yaml index 349cd419..edd52725 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -9187,6 +9187,7 @@ uri: huggingface://xtuner/llava-llama-3-8b-v1_1-gguf/llava-llama-3-8b-v1_1-mmproj-f16.gguf - !!merge <<: *llama3 name: "minicpm-llama3-v-2_5" + icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png urls: - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5 @@ -9210,6 +9211,33 @@ - filename: minicpm-llama3-mmproj-f16.gguf sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf +- !!merge <<: *llama3 + name: "minicpm-v-2_6" + license: apache-2.0 + icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png + urls: + - https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf + - https://huggingface.co/openbmb/MiniCPM-V-2_6 + description: | + MiniCPM-V 2.6 is the latest and most capable model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters + tags: + - llm + - multimodal + - gguf + - gpu + - llama3 + - cpu + overrides: + mmproj: minicpm-v-2_6-mmproj-f16.gguf + parameters: + model: minicpm-v-2_6-Q4_K_M.gguf + files: + - filename: minicpm-v-2_6-Q4_K_M.gguf + sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1 + uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf + - filename: minicpm-v-2_6-mmproj-f16.gguf + sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0 + uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf - !!merge <<: *llama3 name: "llama-3-cursedstock-v1.8-8b-iq-imatrix" urls: @@ -11187,15 +11215,6 @@ description: | Stable Diffusion in NCNN with c++, supported txt2img and img2img name: stablediffusion-cpp -## Tiny Dream -- url: github:mudler/LocalAI/gallery/tinydream.yaml@master - name: tinydream - license: "BSD-3" - urls: - - https://github.com/symisc/tiny-dream - - https://github.com/symisc/tiny-dream/blob/main/LICENSE - description: | - An embedded, Header Only, Stable Diffusion C++ implementation - &piper ## Piper TTS url: github:mudler/LocalAI/gallery/piper.yaml@master diff --git a/gallery/tinydream.yaml b/gallery/tinydream.yaml deleted file mode 100644 index e4a79ad7..00000000 --- a/gallery/tinydream.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -name: "tinydream" - -config_file: | - name: tinydream - backend: tinydream - parameters: - model: tinydream_assets - -files: - - filename: "tinydream_assets/AutoencoderKL-fp16.bin" - sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.bin" - - filename: "tinydream_assets/AutoencoderKL-fp16.param" - sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.param" - - filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.bin" - sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.bin" - - filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.param" - sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.param" - - filename: "tinydream_assets/RealESRGAN_x4plus_anime.bin" - sha256: "fe01c269cfd10cdef8e018ab66ebe750cf79c7af4d1f9c16c737e1295229bacc" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.bin" - - filename: "tinydream_assets/RealESRGAN_x4plus_anime.param" - sha256: "2b8fb6e0ae4d2d85704ca08c119a2f5ea40add4f2ecd512eb7f4cd44b6127ed4" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.param" - - filename: "tinydream_assets/UNetModel-fp16.bin" - sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.bin" - - filename: "tinydream_assets/UNetModel-fp16.param" - sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.param" - - filename: "tinydream_assets/vocab.txt" - sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" - uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/vocab.txt" diff --git a/go.mod b/go.mod index 8aecf14d..adfa7357 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,6 @@ toolchain go1.23.1 require ( dario.cat/mergo v1.0.1 - github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9 github.com/Masterminds/sprig/v3 v3.3.0 github.com/alecthomas/kong v0.9.0 github.com/census-instrumentation/opencensus-proto v0.4.1 diff --git a/go.sum b/go.sum index a1a487b2..4a744ed8 100644 --- a/go.sum +++ b/go.sum @@ -27,8 +27,6 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03 github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= -github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9 h1:ASsbvw7wQPldWpwKdmYRszJ2A8Cj3oJDr4zO0DiXvN4= -github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0= diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index f4675050..756deea7 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -22,11 +22,19 @@ import ( ) var Aliases map[string]string = map[string]string{ - "go-llama": LLamaCPP, - "llama": LLamaCPP, - "embedded-store": LocalStoreBackend, - "langchain-huggingface": LCHuggingFaceBackend, - "transformers-musicgen": TransformersBackend, + "go-llama": LLamaCPP, + "llama": LLamaCPP, + "embedded-store": LocalStoreBackend, + "huggingface-embeddings": TransformersBackend, + "langchain-huggingface": LCHuggingFaceBackend, + "transformers-musicgen": TransformersBackend, + "sentencetransformers": TransformersBackend, +} + +var TypeAlias map[string]string = map[string]string{ + "sentencetransformers": "SentenceTransformer", + "huggingface-embeddings": "SentenceTransformer", + "transformers-musicgen": "MusicgenForConditionalGeneration", } var AutoDetect = os.Getenv("DISABLE_AUTODETECT") != "true" @@ -48,7 +56,6 @@ const ( WhisperBackend = "whisper" StableDiffusionBackend = "stablediffusion" - TinyDreamBackend = "tinydream" PiperBackend = "piper" LCHuggingFaceBackend = "huggingface" @@ -396,6 +403,7 @@ func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) fu } log.Debug().Msgf("Wait for the service to start up") + log.Debug().Msgf("Options: %+v", o.gRPCOptions) // Wait for the service to start up ready := false @@ -460,8 +468,15 @@ func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err e backend := strings.ToLower(o.backendString) if realBackend, exists := Aliases[backend]; exists { + typeAlias, exists := TypeAlias[backend] + if exists { + log.Debug().Msgf("'%s' is a type alias of '%s' (%s)", backend, realBackend, typeAlias) + o.gRPCOptions.Type = typeAlias + } else { + log.Debug().Msgf("'%s' is an alias of '%s'", backend, realBackend) + } + backend = realBackend - log.Debug().Msgf("%s is an alias of %s", backend, realBackend) } ml.stopActiveBackends(o.modelID, o.singleActiveBackend) diff --git a/pkg/tinydream/generate.go b/pkg/tinydream/generate.go deleted file mode 100644 index cfcd23cc..00000000 --- a/pkg/tinydream/generate.go +++ /dev/null @@ -1,36 +0,0 @@ -//go:build tinydream -// +build tinydream - -package tinydream - -import ( - "fmt" - "path/filepath" - - tinyDream "github.com/M0Rf30/go-tiny-dream" -) - -func GenerateImage(height, width, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error { - fmt.Println(dst) - if height > 512 || width > 512 { - return tinyDream.GenerateImage( - 1, - step, - seed, - positive_prompt, - negative_prompt, - filepath.Dir(dst), - asset_dir, - ) - } - - return tinyDream.GenerateImage( - 0, - step, - seed, - positive_prompt, - negative_prompt, - filepath.Dir(dst), - asset_dir, - ) -} diff --git a/pkg/tinydream/generate_unsupported.go b/pkg/tinydream/generate_unsupported.go deleted file mode 100644 index 4ffd421a..00000000 --- a/pkg/tinydream/generate_unsupported.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build !tinydream -// +build !tinydream - -package tinydream - -import "fmt" - -func GenerateImage(height, width, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error { - return fmt.Errorf("This version of LocalAI was built without the tinytts tag") -} diff --git a/pkg/tinydream/tinydream.go b/pkg/tinydream/tinydream.go deleted file mode 100644 index a316e641..00000000 --- a/pkg/tinydream/tinydream.go +++ /dev/null @@ -1,20 +0,0 @@ -package tinydream - -import "os" - -type TinyDream struct { - assetDir string -} - -func New(assetDir string) (*TinyDream, error) { - if _, err := os.Stat(assetDir); err != nil { - return nil, err - } - return &TinyDream{ - assetDir: assetDir, - }, nil -} - -func (td *TinyDream) GenerateImage(height, width, step, seed int, positive_prompt, negative_prompt, dst string) error { - return GenerateImage(height, width, step, seed, positive_prompt, negative_prompt, dst, td.assetDir) -} diff --git a/tests/models_fixtures/grpc.yaml b/tests/models_fixtures/grpc.yaml index 31c406ab..8c519920 100644 --- a/tests/models_fixtures/grpc.yaml +++ b/tests/models_fixtures/grpc.yaml @@ -1,5 +1,5 @@ name: code-search-ada-code-001 -backend: huggingface +backend: sentencetransformers embeddings: true parameters: model: all-MiniLM-L6-v2 \ No newline at end of file