mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
Merge branch 'master' into ci/public-runner
This commit is contained in:
commit
96c080cc64
42 changed files with 168 additions and 567 deletions
24
.github/workflows/test-extra.yml
vendored
24
.github/workflows/test-extra.yml
vendored
|
@ -35,30 +35,6 @@ jobs:
|
|||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
||||
|
||||
tests-sentencetransformers:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
- name: Test sentencetransformers
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
|
||||
|
||||
|
||||
tests-rerankers:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
|
3
.github/workflows/test.yml
vendored
3
.github/workflows/test.yml
vendored
|
@ -100,8 +100,7 @@ jobs:
|
|||
# The python3-grpc-tools package in 22.04 is too old
|
||||
pip install --user grpcio-tools
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
||||
make -C backend/python/transformers
|
||||
|
||||
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||
make sources/go-piper && \
|
||||
|
|
|
@ -15,7 +15,7 @@ ARG TARGETARCH
|
|||
ARG TARGETVARIANT
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
|
||||
|
||||
RUN apt-get update && \
|
||||
|
@ -456,9 +456,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMA
|
|||
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/openvoice \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/sentencetransformers \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/exllama2 \
|
||||
; fi && \
|
||||
|
|
51
Makefile
51
Makefile
|
@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
|||
# llama.cpp versions
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=4dbc8b9cb71876e005724f4e8f73a3544646bcf5
|
||||
CPPLLAMA_VERSION?=3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
|
@ -22,10 +22,6 @@ PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
|||
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
|
||||
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
||||
|
||||
# tinydream version
|
||||
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
|
||||
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
|
||||
|
||||
# bark.cpp
|
||||
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||
BARKCPP_VERSION?=v1.0.0
|
||||
|
@ -188,11 +184,6 @@ ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
|
|||
OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
|
||||
endif
|
||||
|
||||
ifeq ($(findstring tinydream,$(GO_TAGS)),tinydream)
|
||||
# OPTIONAL_TARGETS+=go-tiny-dream/libtinydream.a
|
||||
OPTIONAL_GRPC+=backend-assets/grpc/tinydream
|
||||
endif
|
||||
|
||||
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
||||
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
||||
|
@ -327,19 +318,6 @@ else
|
|||
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
|
||||
endif
|
||||
|
||||
## tiny-dream
|
||||
sources/go-tiny-dream:
|
||||
mkdir -p sources/go-tiny-dream
|
||||
cd sources/go-tiny-dream && \
|
||||
git init && \
|
||||
git remote add origin $(TINYDREAM_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(TINYDREAM_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
|
||||
$(MAKE) -C sources/go-tiny-dream libtinydream.a
|
||||
|
||||
## whisper
|
||||
sources/whisper.cpp:
|
||||
mkdir -p sources/whisper.cpp
|
||||
|
@ -353,12 +331,11 @@ sources/whisper.cpp:
|
|||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion backend/cpp/llama/llama.cpp
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||
|
@ -366,7 +343,6 @@ replace:
|
|||
dropreplace:
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
||||
|
@ -381,7 +357,6 @@ rebuild: ## Rebuilds the project
|
|||
$(MAKE) -C sources/whisper.cpp clean
|
||||
$(MAKE) -C sources/go-stable-diffusion clean
|
||||
$(MAKE) -C sources/go-piper clean
|
||||
$(MAKE) -C sources/go-tiny-dream clean
|
||||
$(MAKE) build
|
||||
|
||||
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||
|
@ -497,7 +472,7 @@ test: prepare test-models/testmodel.ggml grpcs
|
|||
@echo 'Running tests'
|
||||
export GO_TAGS="tts stablediffusion debug"
|
||||
$(MAKE) prepare-test
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||
$(MAKE) test-llama
|
||||
$(MAKE) test-llama-gguf
|
||||
|
@ -583,10 +558,10 @@ protogen-go-clean:
|
|||
$(RM) bin/*
|
||||
|
||||
.PHONY: protogen-python
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen
|
||||
|
||||
.PHONY: protogen-python-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
||||
|
||||
.PHONY: autogptq-protogen
|
||||
autogptq-protogen:
|
||||
|
@ -644,14 +619,6 @@ rerankers-protogen:
|
|||
rerankers-protogen-clean:
|
||||
$(MAKE) -C backend/python/rerankers protogen-clean
|
||||
|
||||
.PHONY: sentencetransformers-protogen
|
||||
sentencetransformers-protogen:
|
||||
$(MAKE) -C backend/python/sentencetransformers protogen
|
||||
|
||||
.PHONY: sentencetransformers-protogen-clean
|
||||
sentencetransformers-protogen-clean:
|
||||
$(MAKE) -C backend/python/sentencetransformers protogen-clean
|
||||
|
||||
.PHONY: transformers-protogen
|
||||
transformers-protogen:
|
||||
$(MAKE) -C backend/python/transformers protogen
|
||||
|
@ -701,7 +668,6 @@ prepare-extra-conda-environments: protogen-python
|
|||
$(MAKE) -C backend/python/diffusers
|
||||
$(MAKE) -C backend/python/vllm
|
||||
$(MAKE) -C backend/python/mamba
|
||||
$(MAKE) -C backend/python/sentencetransformers
|
||||
$(MAKE) -C backend/python/rerankers
|
||||
$(MAKE) -C backend/python/transformers
|
||||
$(MAKE) -C backend/python/parler-tts
|
||||
|
@ -864,13 +830,6 @@ ifneq ($(UPX),)
|
|||
$(UPX) backend-assets/grpc/silero-vad
|
||||
endif
|
||||
|
||||
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/tinydream
|
||||
endif
|
||||
|
||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
|
||||
|
|
|
@ -134,6 +134,32 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c
|
|||
return out;
|
||||
}
|
||||
|
||||
// Adds an RPC server
|
||||
// https://github.com/ggerganov/llama.cpp/compare/4dbc8b9cb71876e005724f4e8f73a3544646bcf5..3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6
|
||||
static void add_rpc_devices(std::string servers) {
|
||||
auto rpc_servers = string_split<std::string>(servers, ',');
|
||||
if (rpc_servers.empty()) {
|
||||
throw std::invalid_argument("no RPC servers specified");
|
||||
}
|
||||
ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC");
|
||||
if (!rpc_reg) {
|
||||
throw std::invalid_argument("failed to find RPC backend");
|
||||
}
|
||||
typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint);
|
||||
ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device");
|
||||
if (!ggml_backend_rpc_add_device_fn) {
|
||||
throw std::invalid_argument("failed to find RPC device add function");
|
||||
}
|
||||
for (const auto & server : rpc_servers) {
|
||||
ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str());
|
||||
if (dev) {
|
||||
ggml_backend_device_register(dev);
|
||||
} else {
|
||||
throw std::invalid_argument("failed to register RPC device");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert a vector of completion_token_output to json
|
||||
static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> &probs)
|
||||
{
|
||||
|
@ -2282,7 +2308,7 @@ static void params_parse(const backend::ModelOptions* request,
|
|||
|
||||
const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS");
|
||||
if (llama_grpc_servers != NULL) {
|
||||
params.rpc_servers = std::string(llama_grpc_servers);
|
||||
add_rpc_devices(std::string(llama_grpc_servers));
|
||||
}
|
||||
|
||||
// TODO: Add yarn
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &Image{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
|
@ -1,32 +0,0 @@
|
|||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/tinydream"
|
||||
)
|
||||
|
||||
type Image struct {
|
||||
base.SingleThread
|
||||
tinydream *tinydream.TinyDream
|
||||
}
|
||||
|
||||
func (image *Image) Load(opts *pb.ModelOptions) error {
|
||||
var err error
|
||||
// Note: the Model here is a path to a directory containing the model files
|
||||
image.tinydream, err = tinydream.New(opts.ModelFile)
|
||||
return err
|
||||
}
|
||||
|
||||
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
|
||||
return image.tinydream.GenerateImage(
|
||||
int(opts.Height),
|
||||
int(opts.Width),
|
||||
int(opts.Step),
|
||||
int(opts.Seed),
|
||||
opts.PositivePrompt,
|
||||
opts.NegativePrompt,
|
||||
opts.Dst)
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
.PHONY: sentencetransformers
|
||||
sentencetransformers: protogen
|
||||
bash ./install.sh
|
||||
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@echo "Running sentencetransformers..."
|
||||
bash run.sh
|
||||
@echo "sentencetransformers run."
|
||||
|
||||
# It is not working well by using command line. It only6 works with IDE like VSCode.
|
||||
.PHONY: test
|
||||
test: protogen
|
||||
@echo "Testing sentencetransformers..."
|
||||
bash test.sh
|
||||
@echo "sentencetransformers tested."
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
|
@ -1,5 +0,0 @@
|
|||
# Creating a separate environment for the sentencetransformers project
|
||||
|
||||
```
|
||||
make sentencetransformers
|
||||
```
|
|
@ -1,114 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extra gRPC server for HuggingFace SentenceTransformer models.
|
||||
"""
|
||||
from concurrent import futures
|
||||
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
A gRPC servicer for the backend service.
|
||||
|
||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
||||
"""
|
||||
def Health(self, request, context):
|
||||
"""
|
||||
A gRPC method that returns the health status of the backend service.
|
||||
|
||||
Args:
|
||||
request: A HealthRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Reply object that contains the health status of the backend service.
|
||||
"""
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
"""
|
||||
A gRPC method that loads a model into memory.
|
||||
|
||||
Args:
|
||||
request: A LoadModelRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Result object that contains the result of the LoadModel operation.
|
||||
"""
|
||||
model_name = request.Model
|
||||
try:
|
||||
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
|
||||
# Implement your logic here for the LoadModel service
|
||||
# Replace this with your desired response
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def Embedding(self, request, context):
|
||||
"""
|
||||
A gRPC method that calculates embeddings for a given sentence.
|
||||
|
||||
Args:
|
||||
request: An EmbeddingRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
An EmbeddingResult object that contains the calculated embeddings.
|
||||
"""
|
||||
# Implement your logic here for the Embedding service
|
||||
# Replace this with your desired response
|
||||
print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
|
||||
sentence_embeddings = self.model.encode(request.Embeddings)
|
||||
return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings)
|
||||
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
serve(args.addr)
|
|
@ -1,14 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
installRequirements
|
|
@ -1,6 +0,0 @@
|
|||
torch==2.4.1
|
||||
accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
sentence-transformers==3.3.1
|
||||
transformers
|
|
@ -1,5 +0,0 @@
|
|||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
accelerate
|
||||
sentence-transformers==3.3.1
|
||||
transformers
|
|
@ -1,4 +0,0 @@
|
|||
torch==2.4.1
|
||||
accelerate
|
||||
sentence-transformers==3.3.1
|
||||
transformers
|
|
@ -1,5 +0,0 @@
|
|||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch==2.4.1+rocm6.0
|
||||
accelerate
|
||||
sentence-transformers==3.3.1
|
||||
transformers
|
|
@ -1,9 +0,0 @@
|
|||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
setuptools
|
||||
accelerate
|
||||
sentence-transformers==3.3.1
|
||||
transformers
|
|
@ -1,5 +0,0 @@
|
|||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
datasets
|
||||
einops
|
|
@ -1,4 +0,0 @@
|
|||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
startBackend $@
|
|
@ -1,81 +0,0 @@
|
|||
"""
|
||||
A test script to test the gRPC service
|
||||
"""
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
This method tears down the gRPC service by terminating the server
|
||||
"""
|
||||
self.service.kill()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_embedding(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
|
||||
self.assertTrue(response.success)
|
||||
embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
|
||||
embedding_response = stub.Embedding(embedding_request)
|
||||
self.assertIsNotNone(embedding_response.embeddings)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Embedding service failed")
|
||||
finally:
|
||||
self.tearDown()
|
|
@ -1,6 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
runUnittests
|
|
@ -25,6 +25,8 @@ from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreame
|
|||
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
||||
from scipy.io import wavfile
|
||||
import outetts
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
|
@ -88,10 +90,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||
self.CUDA = torch.cuda.is_available()
|
||||
self.OV=False
|
||||
self.OuteTTS=False
|
||||
self.SentenceTransformer = False
|
||||
|
||||
device_map="cpu"
|
||||
|
||||
quantization = None
|
||||
autoTokenizer = True
|
||||
|
||||
if self.CUDA:
|
||||
from transformers import BitsAndBytesConfig, AutoModelForCausalLM
|
||||
|
@ -195,9 +199,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||
device=device_map)
|
||||
self.OV = True
|
||||
elif request.Type == "MusicgenForConditionalGeneration":
|
||||
autoTokenizer = False
|
||||
self.processor = AutoProcessor.from_pretrained(model_name)
|
||||
self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
|
||||
elif request.Type == "OuteTTS":
|
||||
autoTokenizer = False
|
||||
options = request.Options
|
||||
MODELNAME = "OuteAI/OuteTTS-0.3-1B"
|
||||
TOKENIZER = "OuteAI/OuteTTS-0.3-1B"
|
||||
|
@ -235,6 +241,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||
self.speaker = self.interface.create_speaker(audio_path=self.AudioPath)
|
||||
else:
|
||||
self.speaker = self.interface.load_default_speaker(name=SPEAKER)
|
||||
elif request.Type == "SentenceTransformer":
|
||||
autoTokenizer = False
|
||||
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
|
||||
self.SentenceTransformer = True
|
||||
else:
|
||||
print("Automodel", file=sys.stderr)
|
||||
self.model = AutoModel.from_pretrained(model_name,
|
||||
|
@ -250,7 +260,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||
else:
|
||||
self.max_tokens = 512
|
||||
|
||||
if request.Type != "MusicgenForConditionalGeneration":
|
||||
if autoTokenizer:
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
|
||||
self.XPU = False
|
||||
|
||||
|
@ -286,18 +296,26 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||
max_length = 512
|
||||
if request.Tokens != 0:
|
||||
max_length = request.Tokens
|
||||
encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
|
||||
|
||||
# Create word embeddings
|
||||
if self.CUDA:
|
||||
encoded_input = encoded_input.to("cuda")
|
||||
embeds = None
|
||||
|
||||
with torch.no_grad():
|
||||
model_output = self.model(**encoded_input)
|
||||
if self.SentenceTransformer:
|
||||
print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
|
||||
embeds = self.model.encode(request.Embeddings)
|
||||
else:
|
||||
encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
|
||||
|
||||
# Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence
|
||||
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
|
||||
return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0])
|
||||
# Create word embeddings
|
||||
if self.CUDA:
|
||||
encoded_input = encoded_input.to("cuda")
|
||||
|
||||
with torch.no_grad():
|
||||
model_output = self.model(**encoded_input)
|
||||
|
||||
# Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence
|
||||
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
|
||||
embeds = sentence_embeddings[0]
|
||||
return backend_pb2.EmbeddingResult(embeddings=embeds)
|
||||
|
||||
async def _predict(self, request, context, streaming=False):
|
||||
set_seed(request.Seed)
|
||||
|
|
|
@ -3,4 +3,5 @@ llvmlite==0.43.0
|
|||
accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
|
|
|
@ -4,4 +4,5 @@ llvmlite==0.43.0
|
|||
accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
|
|
|
@ -3,4 +3,5 @@ accelerate
|
|||
llvmlite==0.43.0
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
|
|
|
@ -4,4 +4,6 @@ accelerate
|
|||
transformers
|
||||
llvmlite==0.43.0
|
||||
bitsandbytes
|
||||
outetts
|
||||
outetts
|
||||
bitsandbytes
|
||||
sentence-transformers==3.3.1
|
||||
|
|
|
@ -6,4 +6,5 @@ optimum[openvino]
|
|||
llvmlite==0.43.0
|
||||
intel-extension-for-transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
|
|
|
@ -133,5 +133,41 @@ class TestBackendServicer(unittest.TestCase):
|
|||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("SoundGeneration service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_embed_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_sentencetransformers_embedding(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer"))
|
||||
self.assertTrue(response.success)
|
||||
embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
|
||||
embedding_response = stub.Embedding(embedding_request)
|
||||
self.assertIsNotNone(embedding_response.embeddings)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Embedding service failed")
|
||||
finally:
|
||||
self.tearDown()
|
|
@ -515,7 +515,7 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
|
|||
}
|
||||
}
|
||||
if (u & FLAG_IMAGE) == FLAG_IMAGE {
|
||||
imageBackends := []string{"diffusers", "tinydream", "stablediffusion"}
|
||||
imageBackends := []string{"diffusers", "stablediffusion"}
|
||||
if !slices.Contains(imageBackends, c.Backend) {
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -822,7 +822,7 @@ var _ = Describe("API test", func() {
|
|||
|
||||
application, err := application.New(
|
||||
append(commonOpts,
|
||||
config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
|
||||
config.WithExternalBackend("transformers", os.Getenv("HUGGINGFACE_GRPC")),
|
||||
config.WithContext(c),
|
||||
config.WithModelPath(modelPath),
|
||||
)...)
|
||||
|
|
|
@ -130,8 +130,6 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
|
|||
switch config.Backend {
|
||||
case "stablediffusion":
|
||||
config.Backend = model.StableDiffusionBackend
|
||||
case "tinydream":
|
||||
config.Backend = model.TinyDreamBackend
|
||||
case "":
|
||||
config.Backend = model.StableDiffusionBackend
|
||||
}
|
||||
|
|
|
@ -88,7 +88,7 @@ Here is the list of the variables available that can be used to customize the bu
|
|||
| Variable | Default | Description |
|
||||
| ---------------------| ------- | ----------- |
|
||||
| `BUILD_TYPE` | None | Build type. Available: `cublas`, `openblas`, `clblas`, `metal`,`hipblas`, `sycl_f16`, `sycl_f32` |
|
||||
| `GO_TAGS` | `tts stablediffusion` | Go tags. Available: `stablediffusion`, `tts`, `tinydream` |
|
||||
| `GO_TAGS` | `tts stablediffusion` | Go tags. Available: `stablediffusion`, `tts` |
|
||||
| `CLBLAST_DIR` | | Specify a CLBlast directory |
|
||||
| `CUDA_LIBPATH` | | Specify a CUDA library path |
|
||||
| `BUILD_API_ONLY` | false | Set to true to build only the API (no backends will be built) |
|
||||
|
@ -202,7 +202,7 @@ make build
|
|||
|
||||
**Requirements**: OpenCV, Gomp
|
||||
|
||||
Image generation requires `GO_TAGS=stablediffusion` or `GO_TAGS=tinydream` to be set during build:
|
||||
Image generation requires `GO_TAGS=stablediffusion` to be set during build:
|
||||
|
||||
```
|
||||
make GO_TAGS=stablediffusion build
|
||||
|
|
|
@ -16,7 +16,7 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA
|
|||
|
||||
**Available Images Types**:
|
||||
|
||||
- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn`, `tinydream` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
|
||||
- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
|
||||
- Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration.
|
||||
- FFMpeg is **not** included in the default images due to [its licensing](https://www.ffmpeg.org/legal.html). If you need FFMpeg, use the images ending with `-ffmpeg`. Note that `ffmpeg` is needed in case of using `audio-to-text` LocalAI's features.
|
||||
- If using old and outdated CPUs and no GPUs you might need to set `REBUILD` to `true` as environment variable along with options to disable the flags which your CPU does not support, however note that inference will perform poorly and slow. See also [flagset compatibility]({{%relref "docs/getting-started/build#cpu-flagset-compatibility" %}}).
|
||||
|
@ -197,7 +197,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen
|
|||
| --- | --- |-------------------------------------------------------------|
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f16` | `localai/localai:master-sycl-f16` |
|
||||
| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f16` | `localai/localai:latest-gpu-intel-f16` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16` | `localai/localai:{{< version >}}-sycl-f16` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-core` | `localai/localai:{{< version >}}-sycl-f16-core` |
|
||||
| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg` |
|
||||
| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg-core` |
|
||||
|
||||
|
@ -209,7 +209,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen
|
|||
| --- | --- |-------------------------------------------------------------|
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f32` | `localai/localai:master-sycl-f32` |
|
||||
| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f32` | `localai/localai:latest-gpu-intel-f32` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32` | `localai/localai:{{< version >}}-sycl-f32` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-core` | `localai/localai:{{< version >}}-sycl-f32-core` |
|
||||
| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg` |
|
||||
| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg-core` |
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi
|
|||
| `mamba` | Mamba models architecture | yes | GPT | no | no | CPU/CUDA |
|
||||
| `exllama2` | GPTQ | yes | GPT only | no | no | N/A |
|
||||
| `transformers-musicgen` | | no | Audio generation | no | no | N/A |
|
||||
| [tinydream](https://github.com/symisc/tiny-dream#tiny-dreaman-embedded-header-only-stable-diffusion-inference-c-librarypixlabiotiny-dream) | stablediffusion | no | Image | no | no | N/A |
|
||||
| stablediffusion | no | Image | no | no | N/A |
|
||||
| `coqui` | Coqui | no | Audio generation and Voice cloning | no | no | CPU/CUDA |
|
||||
| `openvoice` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA |
|
||||
| `parler-tts` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA |
|
||||
|
|
|
@ -9187,6 +9187,7 @@
|
|||
uri: huggingface://xtuner/llava-llama-3-8b-v1_1-gguf/llava-llama-3-8b-v1_1-mmproj-f16.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "minicpm-llama3-v-2_5"
|
||||
icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png
|
||||
urls:
|
||||
- https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf
|
||||
- https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5
|
||||
|
@ -9210,6 +9211,33 @@
|
|||
- filename: minicpm-llama3-mmproj-f16.gguf
|
||||
sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e
|
||||
uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "minicpm-v-2_6"
|
||||
license: apache-2.0
|
||||
icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png
|
||||
urls:
|
||||
- https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf
|
||||
- https://huggingface.co/openbmb/MiniCPM-V-2_6
|
||||
description: |
|
||||
MiniCPM-V 2.6 is the latest and most capable model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters
|
||||
tags:
|
||||
- llm
|
||||
- multimodal
|
||||
- gguf
|
||||
- gpu
|
||||
- llama3
|
||||
- cpu
|
||||
overrides:
|
||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||
parameters:
|
||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
||||
sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "llama-3-cursedstock-v1.8-8b-iq-imatrix"
|
||||
urls:
|
||||
|
@ -11187,15 +11215,6 @@
|
|||
description: |
|
||||
Stable Diffusion in NCNN with c++, supported txt2img and img2img
|
||||
name: stablediffusion-cpp
|
||||
## Tiny Dream
|
||||
- url: github:mudler/LocalAI/gallery/tinydream.yaml@master
|
||||
name: tinydream
|
||||
license: "BSD-3"
|
||||
urls:
|
||||
- https://github.com/symisc/tiny-dream
|
||||
- https://github.com/symisc/tiny-dream/blob/main/LICENSE
|
||||
description: |
|
||||
An embedded, Header Only, Stable Diffusion C++ implementation
|
||||
- &piper
|
||||
## Piper TTS
|
||||
url: github:mudler/LocalAI/gallery/piper.yaml@master
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
---
|
||||
name: "tinydream"
|
||||
|
||||
config_file: |
|
||||
name: tinydream
|
||||
backend: tinydream
|
||||
parameters:
|
||||
model: tinydream_assets
|
||||
|
||||
files:
|
||||
- filename: "tinydream_assets/AutoencoderKL-fp16.bin"
|
||||
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.bin"
|
||||
- filename: "tinydream_assets/AutoencoderKL-fp16.param"
|
||||
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.param"
|
||||
- filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.bin"
|
||||
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.bin"
|
||||
- filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.param"
|
||||
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.param"
|
||||
- filename: "tinydream_assets/RealESRGAN_x4plus_anime.bin"
|
||||
sha256: "fe01c269cfd10cdef8e018ab66ebe750cf79c7af4d1f9c16c737e1295229bacc"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.bin"
|
||||
- filename: "tinydream_assets/RealESRGAN_x4plus_anime.param"
|
||||
sha256: "2b8fb6e0ae4d2d85704ca08c119a2f5ea40add4f2ecd512eb7f4cd44b6127ed4"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.param"
|
||||
- filename: "tinydream_assets/UNetModel-fp16.bin"
|
||||
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.bin"
|
||||
- filename: "tinydream_assets/UNetModel-fp16.param"
|
||||
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.param"
|
||||
- filename: "tinydream_assets/vocab.txt"
|
||||
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/vocab.txt"
|
1
go.mod
1
go.mod
|
@ -6,7 +6,6 @@ toolchain go1.23.1
|
|||
|
||||
require (
|
||||
dario.cat/mergo v1.0.1
|
||||
github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9
|
||||
github.com/Masterminds/sprig/v3 v3.3.0
|
||||
github.com/alecthomas/kong v0.9.0
|
||||
github.com/census-instrumentation/opencensus-proto v0.4.1
|
||||
|
|
2
go.sum
2
go.sum
|
@ -27,8 +27,6 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03
|
|||
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
|
||||
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
|
||||
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
|
||||
github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9 h1:ASsbvw7wQPldWpwKdmYRszJ2A8Cj3oJDr4zO0DiXvN4=
|
||||
github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0=
|
||||
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
|
||||
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
|
||||
github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0=
|
||||
|
|
|
@ -22,11 +22,19 @@ import (
|
|||
)
|
||||
|
||||
var Aliases map[string]string = map[string]string{
|
||||
"go-llama": LLamaCPP,
|
||||
"llama": LLamaCPP,
|
||||
"embedded-store": LocalStoreBackend,
|
||||
"langchain-huggingface": LCHuggingFaceBackend,
|
||||
"transformers-musicgen": TransformersBackend,
|
||||
"go-llama": LLamaCPP,
|
||||
"llama": LLamaCPP,
|
||||
"embedded-store": LocalStoreBackend,
|
||||
"huggingface-embeddings": TransformersBackend,
|
||||
"langchain-huggingface": LCHuggingFaceBackend,
|
||||
"transformers-musicgen": TransformersBackend,
|
||||
"sentencetransformers": TransformersBackend,
|
||||
}
|
||||
|
||||
var TypeAlias map[string]string = map[string]string{
|
||||
"sentencetransformers": "SentenceTransformer",
|
||||
"huggingface-embeddings": "SentenceTransformer",
|
||||
"transformers-musicgen": "MusicgenForConditionalGeneration",
|
||||
}
|
||||
|
||||
var AutoDetect = os.Getenv("DISABLE_AUTODETECT") != "true"
|
||||
|
@ -48,7 +56,6 @@ const (
|
|||
|
||||
WhisperBackend = "whisper"
|
||||
StableDiffusionBackend = "stablediffusion"
|
||||
TinyDreamBackend = "tinydream"
|
||||
PiperBackend = "piper"
|
||||
LCHuggingFaceBackend = "huggingface"
|
||||
|
||||
|
@ -396,6 +403,7 @@ func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) fu
|
|||
}
|
||||
|
||||
log.Debug().Msgf("Wait for the service to start up")
|
||||
log.Debug().Msgf("Options: %+v", o.gRPCOptions)
|
||||
|
||||
// Wait for the service to start up
|
||||
ready := false
|
||||
|
@ -460,8 +468,15 @@ func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err e
|
|||
|
||||
backend := strings.ToLower(o.backendString)
|
||||
if realBackend, exists := Aliases[backend]; exists {
|
||||
typeAlias, exists := TypeAlias[backend]
|
||||
if exists {
|
||||
log.Debug().Msgf("'%s' is a type alias of '%s' (%s)", backend, realBackend, typeAlias)
|
||||
o.gRPCOptions.Type = typeAlias
|
||||
} else {
|
||||
log.Debug().Msgf("'%s' is an alias of '%s'", backend, realBackend)
|
||||
}
|
||||
|
||||
backend = realBackend
|
||||
log.Debug().Msgf("%s is an alias of %s", backend, realBackend)
|
||||
}
|
||||
|
||||
ml.stopActiveBackends(o.modelID, o.singleActiveBackend)
|
||||
|
|
|
@ -1,36 +0,0 @@
|
|||
//go:build tinydream
|
||||
// +build tinydream
|
||||
|
||||
package tinydream
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
tinyDream "github.com/M0Rf30/go-tiny-dream"
|
||||
)
|
||||
|
||||
func GenerateImage(height, width, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
|
||||
fmt.Println(dst)
|
||||
if height > 512 || width > 512 {
|
||||
return tinyDream.GenerateImage(
|
||||
1,
|
||||
step,
|
||||
seed,
|
||||
positive_prompt,
|
||||
negative_prompt,
|
||||
filepath.Dir(dst),
|
||||
asset_dir,
|
||||
)
|
||||
}
|
||||
|
||||
return tinyDream.GenerateImage(
|
||||
0,
|
||||
step,
|
||||
seed,
|
||||
positive_prompt,
|
||||
negative_prompt,
|
||||
filepath.Dir(dst),
|
||||
asset_dir,
|
||||
)
|
||||
}
|
|
@ -1,10 +0,0 @@
|
|||
//go:build !tinydream
|
||||
// +build !tinydream
|
||||
|
||||
package tinydream
|
||||
|
||||
import "fmt"
|
||||
|
||||
func GenerateImage(height, width, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
|
||||
return fmt.Errorf("This version of LocalAI was built without the tinytts tag")
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
package tinydream
|
||||
|
||||
import "os"
|
||||
|
||||
type TinyDream struct {
|
||||
assetDir string
|
||||
}
|
||||
|
||||
func New(assetDir string) (*TinyDream, error) {
|
||||
if _, err := os.Stat(assetDir); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &TinyDream{
|
||||
assetDir: assetDir,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (td *TinyDream) GenerateImage(height, width, step, seed int, positive_prompt, negative_prompt, dst string) error {
|
||||
return GenerateImage(height, width, step, seed, positive_prompt, negative_prompt, dst, td.assetDir)
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
name: code-search-ada-code-001
|
||||
backend: huggingface
|
||||
backend: sentencetransformers
|
||||
embeddings: true
|
||||
parameters:
|
||||
model: all-MiniLM-L6-v2
|
Loading…
Add table
Add a link
Reference in a new issue